#MDBlocal
How MongoDB 4.2 Pipeline is
Powering Queries, Updates and Views
Guillaume Meister
Principal Solutions Architect
AGGREGATION POWER++
PREVIOUSLY ...
... 2017 and before
#MDBW17
Analytics with MongoDB Aggregation Framework
@asya999 by Asya Kamsky,
Lead MongoDB Maven
PIPELINE POWER
STORE
RETRIEVE
#MDBLocal
ps ax |grep mongod |head 1
*nix command line pipe
PIPELINE
#MDBLocal
$match $group | $sort|
Input stream {} {} {} {} Result {} {} ...
PIPELINE
MongoDB document pipeline
DATA PIPELINE
STAGES
Stage 1 Stage 2 Stage 3 Stage 4
{} {} {} {}
{} {} {} {}
DATA PIPELINE
{} {} {} {}
{"$stage":{ ... }}
START
Collection
View
Special stage
STAGES
{title: "The Great Gatsby",
language: "English",
subjects: "Long Island"}
{title: "The Great Gatsby",
language: "English",
subjects: "New York"}
{title: "The Great Gatsby",
language: "English",
subjects: "1920s"}
{title: "The Great Gatsby",
language: "English",
subjects: [
"Long Island",
"New York",
"1920s"] },
{"$match":{"language":"English"}}
$match
{ _id:"Long Island",
count: 1 },
$group
{ _id: "New York",
count: 2 },
$unwind
{ _id: "1920s",
count: 1 },
$sort $skip$limit $project
{"$unwind":"$subjects"}
{"$group":{"_id":"$subjects", "count":{"$sum:1}}
{ _id: "Harlem",
count: 1 },
{ _id: "Long Island",
count: 1 },
{ _id: "New York",
count: 2 },
{ _id: "1920s",
count: 1 },
{title: "Open City",
language: "English",
subjects: [
"New York"
"Harlem" ] }
{ title: "The Great Gatsby",
language: "English",
subjects: [
"Long Island",
"New York",
"1920s"] },
{ title: "War and Peace",
language: "Russian",
subjects: [
"Russia",
"War of 1812",
"Napoleon"] },
{ title: "Open City",
language: "English",
subjects: [
"New York",
"Harlem" ] },
{title: "Open City",
language: "English",
subjects: "New York"}
{title: "Open City",
language: "English",
subjects: "Harlem"}
{ _id: "Harlem",
count: 1 },
{"$sort:{"count":-1} {"$limit":3}
{"$project":...}
#MDBLocal
INPUT STAGE RESULTSSTAGE
STREAMING RESOURCE USE
Each document is streamed through in RAM
#MDBLocal
INPUT STAGE RESULTSSTAGE
BLOCKING RESOURCE USE
Everything has to be kept in RAM (or spill)
5 minute review
https://github.com/asya999/mdbw17
PREVIOUSLY ...
... 2017
PREVIOUSLY ...
... 2017 ... 2018
#MDBLocal
THE FUTURE OF AGGREGATION
Better performance & optimizations
More stages & expressions
More options for output
Compass helper for aggregate
Unify different languages
#MDBLocal
THE FUTURE OF AGGREGATION
Better performance & optimizations
More stages & expressions
More options for output
Compass helper for aggregate
Unify different languages
#MDBLocal
THE FUTURE OF AGGREGATION
Better performance & optimizations
More stages & expressions
More options for output
Compass helper for aggregate
Unify different languages
#MDBLocal
THE FUTURE OF AGGREGATION
More options for output
Unify different languages
#MDBLocal
THE PRESENT OF AGGREGATION
More options for output
Unify different languages
#MDBLocal
Unify Different Languages
#MDBLocal
Unify Different Languages
{children: [
{name:"Max", dob:"1994-12-01", dep:true},
{name:"Sam", dob:"1997-09-28", dep:true},
{name:"Kim", dob:"2000-02-29", dep:true}
]}
AGGREGATION
#MDBLocal
Unify Different Languages
{children: [
{name:"Max", dob:"1994-12-01", dep:true},
{name:"Sam", dob:"1997-09-28", dep:true},
{name:"Kim", dob:"2000-02-29", dep:true}
]}
AGGREGATION
db.c.aggregate([
{$addFields:{
numChildren:{$size:"$children"},
numDependents:{$size:{
$filter:{
input:"$children.dep",
cond: "$$this"
}
}}
}},
...
])
#MDBLocal
Unify Different Languages
{children: [
{name:"Max", dob:"1994-12-01", dep:true},
{name:"Sam", dob:"1997-09-28", dep:true},
{name:"Kim", dob:"2000-02-29", dep:true}
]}
AGGREGATION
FIND
db.c.aggregate([
{$addFields:{
numChildren:{$size:"$children"},
numDependents:{$size:{
$filter:{
input:"$children.dep",
cond: "$$this"
}
}}
}},
...
])
#MDBLocal
Unify Different Languages
{children: [
{name:"Max", dob:"1994-12-01", dep:true},
{name:"Sam", dob:"1997-09-28", dep:true},
{name:"Kim", dob:"2000-02-29", dep:true}
]}
AGGREGATION
FIND
db.c.find (
{$expr:{
$lt:[
{$size:{$filter:{
input: "$children.dep",
cond: "$$this"
}}},
2
]
}}
)
#MDBLocal
Unify Different Languages
{children: [
{name:"Max", dob:"1994-12-01", dep:true},
{name:"Sam", dob:"1997-09-28", dep:true},
{name:"Kim", dob:"2000-02-29", dep:true}
]}
AGGREGATION
FIND
UPDATE
db.c.find (
{$expr:{
$lt:[
{$size:{$filter:{
input: "$children.dep",
cond: "$$this"
}}},
2
]
}}
)
#MDBLocal
Unify Different Languages
{children: [
{name:"Max", dob:"1994-12-01", dep:true},
{name:"Sam", dob:"1997-09-28", dep:true},
{name:"Kim", dob:"2000-02-29", dep:true}
]}
AGGREGATION
FIND
UPDATE
db.c.update(
{$expr:{
$anyElementTrue:{$map:{
input:"$children",
in: {$and:[
{$lt:["$$this.dob","1997-01-22"]},
"$$this.dep"
]}
}}
}},
{$set:{ audit:true }}
)
#MDBLocal
Update
db.coll.update(
<query>,
<update>,
<options>
)
#MDBLocal
Update
db.coll.update(
<query>,
<update>,
<options>
)
#MDBLocal
Update
db.coll.update(
<query>,
<update>,
<options>
)
<update>
#MDBLocal
Update
{
f1: <value>,
f2: <value>,
...
}
{
$set: { },
$inc: { },
$...
}
<update>
#MDBLocal
Update in 4.2
{ } OR [ ]
<update>
#MDBLocal
Update in 4.2
{ <same> } [ ]
<update>
#MDBLocal
Update in 4.2
{ <same> } [ <aggregation-pipeline> ]
<update>
Updates Using Aggregation
Pipeline
#MDBLocal
{ $addFields: { } }
{ $project: { } }
{ $replaceRoot: { } }
{ $set: { } }
{ $unset: [ ] }
{ $replaceWith: { } }
#MDBLocal
db.coll.update({_id:1},
{$inc:{a:1}},
{upsert:true})
{ _id: 1 }
{ _id: 1, a: 10 }
{ _id: 1, a: 100 }
---
{ _id: 1, a: "10" }
{ _id: 1, a: 1 }
{ _id: 1, a: 11 }
{ _id: 1, a: 101 }
{ _id: 1, a: 1 }
"errmsg" : "Cannot apply to a value of
non-numeric type."
#MDBLocal
{ _id: 1 }
{ _id: 1, a: 10 }
{ _id: 1, a: 100 }
---
{ _id: 1, a: "10" }
{ _id: 1, a: 1 }
{ _id: 1, a: 11 }
{ _id: 1, a: 101 }
{ _id: 1, a: 1 }
{ _id: 1, a: 1 }
db.coll.update({_id:1},
[ {$set:{a:{$sum:["$a",1]}}} ],
{upsert:true})
#MDBLocal
{ _id: 1 }
{ _id: 1, a: 10 }
{ _id: 1, a: 100 }
---
{ _id: 1, a: "10" }
{ _id: 1, a: 1 }
{ _id: 1, a: 11 }
{ _id: 1, a: 101 }
{ _id: 1, a: 1 }
"errmsg" : "$add only supports
numeric or date types, not string"
db.coll.update({_id:1},
[ {$set:{a:{$add:["$a",1]}}} ],
{upsert:true})
#MDBLocal
{ _id: 1 }
{ _id: 1, a: 10 }
{ _id: 1, a: 100 }
---
{ _id: 1, a: "10" }
{ _id:1, a: 21 }
{ _id: 1, a: 11 }
{ _id: 1, a: 101 }
{ _id:1, a: 21 }
db.coll.update({_id:1},
[ {$set:{a:{$ }} ],
{upsert:true})
#MDBLocal
{ _id: 1 }
{ _id: 1, a: 10 }
{ _id: 1, a: 100 }
---
{ _id: 1, a: "10" }
{ _id:1, a: 21 }
{ _id: 1, a: 11 }
{ _id: 1, a: 101 }
{ _id:1, a: 21 }
db.coll.update({_id:1}, [ {$set:{a:{$cond:{
if: ,
then: , else: }}}}], {upsert:true})
#MDBLocal
{ _id: 1 }
{ _id: 1, a: 10 }
{ _id: 1, a: 100 }
---
{ _id: 1, a: "10" }
{ _id:1, a: 21 }
{ _id: 1, a: 11 }
{ _id: 1, a: 101 }
{ _id:1, a: 21 }
db.coll.update({_id:1}, [ {$set:{a:{$cond:{
if: {$eq:[{$type:"$a"},"missing"]},
then: , else: }}}}], {upsert:true})
#MDBLocal
{ _id: 1 }
{ _id: 1, a: 10 }
{ _id: 1, a: 100 }
---
{ _id: 1, a: "10" }
{ _id:1, a: 21 }
{ _id: 1, a: 11 }
{ _id: 1, a: 101 }
{ _id:1, a: 21 }
db.coll.update({_id:1}, [ {$set:{a:{$cond:{
if: {$eq:[{$type:"$a"},"missing"]},
then: 21, else: {$sum:["$a",1]} }}}}], {upsert:true})
#MDBLocal
{ _id: 1 }
{ _id: 1, a: 10 }
{ _id: 1, a: 100 }
---
{ _id: 1, a: "10" }
{ _id:1, a: 21 }
{ _id: 1, a: 11 }
{ _id: 1, a: 100 }
{ _id:1, a: 21 }
db.coll.update({_id:1}, [ {$set:{a:{$cond:{
if: {$eq:[{$type:"$a"},"missing"]},
then: 21, else: {$sum:["$a",1]} }}}}], {upsert:true})
#MDBLocal
{ _id: 1 }
{ _id: 1, a: 10 }
{ _id: 1, a: 100 }
---
{ _id: 1, a: "10" }
{ _id:1, a: 21 }
{ _id: 1, a: 11 }
{ _id: 1, a: 100 }
{ _id:1, a: 21 }
db.coll.update({_id:1}, [ {$set:{a:{$min:[100, {$cond:{
if: {$eq:[{$type:"$a"},"missing"]},
then: 21, else: {$sum:["$a",1]} }}]} }}], {upsert:true})
#MDBLocal
{ _id: 1 }
{ _id: 1, a: 10 }
{ _id: 1, a: 100 }
---
{ _id: 1, a: "10" }
{ _id:1, a: 21 }
{ _id: 1, a: 11 }
{ _id: 1, a: 100 }
{ _id:1, a: 21 }
{ _id:1, a: 1 }
db.coll.update({_id:1}, [ {$set:{a:{$min:[100, {$cond:{
if: {$eq:[{$type:"$a"},"missing"]},
then: 21, else: {$sum:["$a",1]} }}]} }}], {upsert:true})
#MDBLocal
{ _id: 1 }
{ _id: 1, a: 10 }
{ _id: 1, a: 100 }
---
{ _id: 1, a: "10" }
{ _id:1, a: 21 }
{ _id: 1, a: 11 }
{ _id: 1, a: 100 }
{ _id:1, a: 21 }
{ _id:1, a: 1 }
db.coll.update({_id:1}, [ {$set:{a:{$min:[100, {$cond:{
if: {$eq:[{$type:"$a"},"missing"]},
then: 21, else: {$sum:["$a",1]} }}]}, prev_a:"$a" }}],
{upsert:true})
#MDBLocal
{ _id: 1 }
{ _id: 1, a: 10 }
{ _id: 1, a: 100 }
---
{ _id: 1, a: "10" }
{ _id:1, a: 21 }
{ _id: 1, a: 11, prev_a: 10 }
{ _id: 1, a: 100, prev_a: 100 }
{ _id:1, a: 21 }
{ _id:1, a: 1, prev_a: "10" }
db.coll.update({_id:1}, [ {$set:{a:{$min:[100, {$cond:{
if: {$eq:[{$type:"$a"},"missing"]},
then: 21, else: {$sum:["$a",1]} }}]}, prev_a:"$a" }}],
{upsert:true})
#MDBLocal
Set Defaults
#MDBLocal
Set Defaults
{_id: 1, a: 5, b: 12}
{_id: 2, a: 15, c: "abc"}
{_id: 3, b: 99, c: "xyz"}
If a or b are missing, set to 0, if c is missing -> "unset"
#MDBLocal
Set Defaults
{_id: 1, a: 5, b: 12}
{_id: 2, a: 15, c: "abc"}
{_id: 3, b: 99, c: "xyz"}
If a or b are missing, set to 0, if c is missing -> "unset"
db.coll.update({}, [
{$replaceWith:{
}}
], {multi:true})
#MDBLocal
Set Defaults
{_id: 1, a: 5, b: 12}
{_id: 2, a: 15, c: "abc"}
{_id: 3, b: 99, c: "xyz"}
If a or b are missing, set to 0, if c is missing -> "unset"
db.coll.update({}, [
{$replaceWith:{$mergeObjects:[
]}}
], {multi:true})
#MDBLocal
Set Defaults
{_id: 1, a: 5, b: 12}
{_id: 2, a: 15, c: "abc"}
{_id: 3, b: 99, c: "xyz"}
If a or b are missing, set to 0, if c is missing -> "unset"
db.coll.update({}, [
{$replaceWith:{$mergeObjects:[
{ a:0, b:0, c:"unset" },
"$$ROOT"
]}}
], {multi:true})
#MDBLocal
Set Defaults
{_id: 1, a: 5, b: 12}
{_id: 2, a: 15, c: "abc"}
{_id: 3, b: 99, c: "xyz"}
If a or b are missing, set to 0, if c is missing -> "unset"
db.coll.update({}, [
{$replaceWith:{$mergeObjects:[
{ a:0, b:0, c:"unset" },
"$$ROOT"
]}}
], {multi:true})
{_id: 1, a: 5, b: 12, c: "unset"}
{_id: 2, a: 15, b: 0, c: "abc"}
{_id: 3, a: 0, b: 99, c: "xyz"}
#MDBLocal
{ id: 1,
d: ISODate("2019-06-04T00:00:00"),
h: [
{ hour:"11", value: 296 },
{ hour:"12", value: 300 }
]}
id: X, d:Y, hour:Z, value: VAL
db.coll.update({id:X, d:Y},
[ {$set:{h:{$cond:{
if:
then:
else:
}}}}],
{upsert:true})
#MDBLocal
{ id: 1,
d: ISODate("2019-06-04T00:00:00"),
h: [
{ hour:"11", value: 296 },
{ hour:"12", value: 300 }
]}
id: X, d:Y, hour:Z, value: VAL
db.coll.update({id:X, d:Y},
[ {$set:{h:{$cond:{
if: {$in:[Z,{$ifNull:["$h.hour",[]]}]},
then:{$map:{
input:"$h",
in: {$cond:{ if:{$ne:["$$this.hour",Z]}, then:"$$this",
else: {hour: Z, value: {$sum:[ "$$this.value", VAL]}}
}}}},
else:{$concatArrays:[{$ifNull:["$h",[]]},[{hour:Z,value:VAL}]]}
}}}}],
{upsert:true})
if:
then:
else:
#MDBLocal
Recap:
Updates can be specified with aggregation pipeline
All fields from existing document can be accessed
Slightly slower, but a lot more powerful
#MDBLocal
THE FUTURE OF AGGREGATION
Better performance & optimizations
More stages & expressions
More options for output
Compass helper for aggregate
Unify different languages
#MDBLocal
THE FUTURE OF AGGREGATION
Better performance & optimizations
More stages & expressions
More options for output
Compass helper for aggregate
Unify different languages
#MDBLocal
THE FUTURE OF AGGREGATION
More options for output
#MDBLocal
More Options for Output
#MDBLocal
Prior to MongoDB 4.2
$out
coll
new_coll
$out
#MDBLocal
Prior to MongoDB 4.2
$out
coll
new_coll
$out
db.coll.aggregate( [ {pipeline}, ...
{$out: "new_coll"} ]);
#MDBLocal
Prior to MongoDB 4.2
$out
coll
new_coll
$out
db.coll.aggregate( [ {pipeline}, ...
{$out: "new_coll"} ]);
new_coll
○ must be unsharded
○ overwrites existing
New $merge stage
in aggregation pipeline
#MDBLocal
MongoDB 4.2
$merge
coll
coll2
$merge
#MDBLocal
MongoDB 4.2
$merge
db.coll.aggregate( [
{pipeline}, ...,
{$merge: { ... }
]);
coll
coll2
$merge
#MDBLocal
MongoDB 4.2
$merge
db.coll.aggregate( [
{pipeline}, ...,
{$merge: { ... }
]);
coll2
can exist, can be sharded
same or different 'db'
coll
coll2
$merge
#MDBLocal
coll
coll2
$merge
{ } { } { } { }
{ } { } { } { }
MongoDB 4.2
#MDBLocal
{
$merge: {
into: <target>
}
}
$merge syntax
#MDBLocal
{$merge: "collection2"}
$merge syntax
{
$merge: {
into: <target>
}
}
#MDBLocal
{$merge: {into: {db: "db2", coll: "collection2"}}
$merge syntax
{
$merge: {
into: <target>
}
}
#MDBLocal
{
$merge: {
into: <target>
}
}
$merge syntax
#MDBLocal
{
$merge: {
into: <target>,
on: <fields>
}
}
on: "_id"
on: [ "_id", "shardkey(s)" ]
must be unique
$merge syntax
#MDBLocal
{
$merge: {
into: <target>,
on: <fields>
}
}
$merge syntax
#MDBLocal
Actions
source target
#MDBLocal
Actions
nothing matched:
source target
#MDBLocal
Actions
nothing matched: usually insert
source target
#MDBLocal
Actions
nothing matched: usually insert
document matched:
source target
#MDBLocal
Actions
nothing matched: usually insert
document matched: overwrite? update? ???
source target
#MDBLocal
Actions
nothing matched: usually insert
document matched: update
source target
#MDBLocal
Actions
nothing matched: usually insert
document matched: update (merge)
source target
#MDBLocal
$merge syntax
{
$merge: {
into: <target>,
whenNotMatched:
whenMatched:
}
}
#MDBLocal
$merge syntax
{
$merge: {
into: <target>,
whenNotMatched:"insert",
whenMatched:
}
}
#MDBLocal
$merge syntax
{
$merge: {
into: <target>,
whenNotMatched:"insert",
whenMatched:"merge"
}
}
#MDBLocal
$merge syntax
{
$merge: {
into: <target>,
whenNotMatched:"insert"|"discard"|"fail",
whenMatched:"merge"
}
}
#MDBLocal
$merge syntax
{
$merge: {
into: <target>,
whenNotMatched:"insert"|"discard"|"fail",
whenMatched:"merge"|"replace"|"keepExisting"|"fail"|[...]
}
}
#MDBLocal
$merge syntax
{
$merge: {
into: <target>,
whenMatched:[...]
}
}
#MDBLocal
$merge syntax
{
$merge: {
into: <target>,
whenMatched:[<custom pipeline>]
}
}
#MDBLocal
$merge example
{
$merge: {
into: <target>,
whenMatched:[
{$addFields:{
}}
]
}
}
#MDBLocal
$merge example
{
$merge: {
into: <target>,
whenMatched:[
{$addFields:{
total:{$sum:["$total","$$new.total"]}
}}
]
}
}
#MDBLocal
$merge example
{
$merge: {
into: <target>,
whenMatched:[
{$set:{
total:{$sum:["$total","$$new.total"]}
}}
]
}
}
#MDBLocal
$merge example
{
$merge: {
into: <target>,
whenMatched:[
{$set:{
total:{$sum:["$total","$$new.total"]}
}}
]
}
}
#MDBLocal
$merge example
{
$merge: {
into: <target>,
whenMatched:[
{$set:{
total:{$sum:["$total","$$new.total"]}
}}
]
}
}
Incoming Target
{
_id: "37",
total: 64,
f1: "x"
}
{
_id: "37",
total: 245,
f1: "yyy"
}
Result:
{
}
#MDBLocal
$merge example
{
$merge: {
into: <target>,
whenMatched:[
{$set:{
total:{$sum:["$total","$$new.total"]}
}}
]
}
}
Incoming Target
{
_id: "37",
total: 64,
f1: "x"
}
{
_id: "37",
total: 245,
f1: "yyy"
}
Result:
{
_id: "37",
total: 309,
f1: "yyy"
}
#MDBLocal
$merge example 2
{
$merge: {
into: <target>,
whenMatched:[
{$replaceWith:{$mergeObjects:[
"$$new",
{total:{$sum:["$$new.total", "$total"]}}
]}}
]
}
}
#MDBLocal
$merge example 2
{
$merge: {
into: <target>,
whenMatched:[
{$replaceWith:{$mergeObjects:[
"$$new",
{total:{$sum:["$$new.total", "$total"]}}
]}}
]
}
}
Incoming Target
{
_id: "37",
total: 64,
f1: "x"
}
{
_id: "37",
total: 245,
f1: "yyy"
}
Result:
{
}
#MDBLocal
$merge example 2
{
$merge: {
into: <target>,
whenMatched:[
{$replaceWith:{$mergeObjects:[
"$$new",
{total:{$sum:["$$new.total", "$total"]}}
]}}
]
}
}
Incoming Target
{
_id: "37",
total: 64,
f1: "x"
}
{
_id: "37",
total: 245,
f1: "yyy"
}
Result:
{
_id: "37",
total: 309,
f1: "x"
}
#MDBLocal
$merge syntax
{
$merge: {
into: <target>,
whenMatched:[...]
}
}
#MDBLocal
$merge syntax
{
$merge: {
into: <target>,
let: { ... },
whenMatched:[ ...]
}
}
#MDBLocal
$merge syntax
{
$merge: {
into: <target>,
let: {new: "$$ROOT"},
whenMatched:[ ...]
}
}
#MDBLocal
{
$merge: {
into: <target>,
whenMatched:[
{$set:{
total:{$sum:["$total","$$new.total"]}
}}
]
}
}
#MDBLocal
{
$merge: {
into: <target>,
let: {itotal: "$total"},
whenMatched:[
{$set:{
total:{$sum:["$total","$$itotal"]}
}}
]
}
}
{
$merge: {
into: <target>,
whenMatched:[
{$set:{
total:{$sum:["$total","$$new.total"]}
}}
]
}
}
EXAMPLES
APPEND from TEMP collection
#MDBLocal
temp
real
data
real
Using $merge to append loaded and
cleansed records loaded into db
#MDBLocal
aggregate 'temp' and append valid records to 'data'
db.temp.aggregate( [
{ ... } /* pipeline to massage and cleanse data in temp */,
{$merge:{
into: "data",
whenMatched: "fail"
}}
]);
#MDBLocal
aggregate 'temp' and append valid records to 'data'
db.temp.aggregate( [
{ ... } /* pipeline to massage and cleanse data in temp */,
{$merge:{
into: "data",
whenMatched: "fail"
}}
]);
Similar to SQL's INSERT INTO T1 SELECT * from T2
EXAMPLES
Maintain Single View
#MDBLocal
mflix
users
users
mfriendbook
users
sv
Using $merge to populate/update
user fields from other services
#MDBLocal
mflix
users
users
mfriendbook
users
sv
Using $merge to populate/update
user fields from other services
sv.users
{
_id: "user253",
dob: ISODate(...),
f1: "yyy"
}
#MDBLocal
$merge updates fields from mflix.users collection into
sv.users collection. Our "_id" field is unique username
mflix_pipeline = [
{ "$project" : {
"_id" : "$username",
"mflix" : "$$ROOT"
}},
{ "$merge" : {
"into" : {
"db": "sv",
"collection" : "users"
},
"whenNotMatched" : "discard"
}}
]
(in mflix)
sv.users
{
_id: "user253",
dob: ISODate(...),
f1: "yyy"
}
#MDBLocal
$merge updates fields from mflix.users collection into
sv.users collection. Our "_id" field is unique username
mflix_pipeline = [
{ "$project" : {
"_id" : "$username",
"mflix" : "$$ROOT"
}},
{ "$merge" : {
"into" : {
"db": "sv",
"collection" : "users"
},
"whenNotMatched" : "discard"
}}
]
(in mflix) db.users.aggregate(mflix_pipeline)
sv.users
{
_id: "user253",
dob: ISODate(...),
f1: "yyy",
mflix: { ... }
}
#MDBLocal
$merge updates fields from mfriendbook.users collection into
sv.users collection. Our "_id" field is unique username
mfriendbook_pipeline = [
{ "$project" : {
"_id" : "$username",
"mfriendbook" : "$$ROOT"
}},
{ "$merge" : {
"into" : {
"db": "sv",
"collection" : "users"
},
"whenNotMatched" : "discard"
}}
]
(in mfriendbook)
sv.users
{
_id: "user253",
dob: ISODate(...),
f1: "yyy",
mflix: { ... }
}
#MDBLocal
$merge updates fields from mfriendbook.users collection into
sv.users collection. Our "_id" field is unique username
mfriendbook_pipeline = [
{ "$project" : {
"_id" : "$username",
"mfriendbook" : "$$ROOT"
}},
{ "$merge" : {
"into" : {
"db": "sv",
"collection" : "users"
},
"whenNotMatched" : "discard"
}}
]
(in mfriendbook) db.users.aggregate(mfriendbook_pipeline)
sv.users
{
_id: "user253",
dob: ISODate(...),
f1: "yyy",
mflix: { ... },
mfriendbook: { ... }
}
EXAMPLES
Populate ROLLUPS into summary table
registrations
real
regsummary
real
Using $merge to incrementally
update periodic rollups in summary
#MDBLocal
$merge to create/update periodic
rollups in summary collection (for all days)
db.regsummary.createIndex({event:1, date:1}, {unique: true});
#MDBLocal
$merge to create/update periodic
rollups in summary collection (for all days)
db.regsummary.createIndex({event:1, date:1}, {unique: true});
db.registrations.aggregate([
{$match: {event_id: "MDBW19"}},
{$group:{
_id:{$dateToString:{date:"$date",format:"%Y-%m-%d"}},
count: {$sum:1}
}},
{$project: {_id:0,event:"MDBW19",date:"$_id",total:"$count"}},
{$merge: {
into: "regsummary",
on: ["event", "date"]
}}
])
#MDBLocal
$merge to create/update periodic
rollups in summary collection (for all days)
db.regsummary.createIndex({event:1, date:1}, {unique: true});
db.registrations.aggregate([
{$match: {event_id: "MDBW19"}},
{$group:{
_id:{$dateToString:{date:"$date",format:"%Y-%m-%d"}},
count: {$sum:1}
}},
{$project: {_id:0,event:"MDBW19",date:"$_id",total:"$count"}},
{$merge: {
into: "regsummary",
on: ["event", "date"]
}}
])
{ "event" : "MDBW19", "date" : "2019-05-19", "total" : 33 }
{ "event" : "MDBW19", "date" : "2019-05-20", "total" : 15 }
{ "event" : "MDBW19", "date" : "2019-05-21", "total" : 24 }
#MDBLocal
$merge to incrementally update periodic rollups in
summary collection (for single day)
#MDBLocal
$merge to incrementally update periodic rollups in
summary collection (for single day)
db.registrations.aggregate([
{$match: {
event_id: "MDBW19",
date:{$gte:ISODate("2019-05-22"),$lt:ISODate("2019-05-23")}
}},
{$count: "total"},
{$addFields: {event:"MDBW19", "date":"2019-05-22"}},
{$merge: {
into: "regsummary",
on: ["event", "date"]
}}
])
#MDBLocal
$merge to incrementally update periodic rollups in
summary collection (for single day)
db.registrations.aggregate([
{$match: {
event_id: "MDBW19",
date:{$gte:ISODate("2019-05-22"),$lt:ISODate("2019-05-23")}
}},
{$count: "total"},
{$addFields: {event:"MDBW19", "date":"2019-05-22"}},
{$merge: {
into: "regsummary",
on: ["event", "date"]
}}
])
{ "event" : "MDBW19", "date" : "2019-05-19", "total" : 33 }
{ "event" : "MDBW19", "date" : "2019-05-20", "total" : 15 }
{ "event" : "MDBW19", "date" : "2019-05-21", "total" : 24 }
{ "event" : "MDBW19", "date" : "2019-05-22", "total" : 34 }
#MDBLocal
The aggregation framework is the main language for data
manipulation in MongoDB (unify languages)
It’s now possible to update documents using the aggregation
framework and existing fields (UPDATE)
Aggregation framework output can be used to merge data with a
target collection ($merge)
Key takeaways
MongoDB .local Paris 2020: La puissance du Pipeline d'Agrégation de MongoDB
MongoDB .local Paris 2020: La puissance du Pipeline d'Agrégation de MongoDB

MongoDB .local Paris 2020: La puissance du Pipeline d'Agrégation de MongoDB

  • 1.
    #MDBlocal How MongoDB 4.2Pipeline is Powering Queries, Updates and Views Guillaume Meister Principal Solutions Architect AGGREGATION POWER++
  • 2.
  • 3.
    #MDBW17 Analytics with MongoDBAggregation Framework @asya999 by Asya Kamsky, Lead MongoDB Maven PIPELINE POWER
  • 4.
  • 5.
    #MDBLocal ps ax |grepmongod |head 1 *nix command line pipe PIPELINE
  • 6.
    #MDBLocal $match $group |$sort| Input stream {} {} {} {} Result {} {} ... PIPELINE MongoDB document pipeline
  • 7.
  • 8.
    Stage 1 Stage2 Stage 3 Stage 4 {} {} {} {} {} {} {} {} DATA PIPELINE {} {} {} {} {"$stage":{ ... }} START Collection View Special stage STAGES
  • 9.
    {title: "The GreatGatsby", language: "English", subjects: "Long Island"} {title: "The Great Gatsby", language: "English", subjects: "New York"} {title: "The Great Gatsby", language: "English", subjects: "1920s"} {title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, {"$match":{"language":"English"}} $match { _id:"Long Island", count: 1 }, $group { _id: "New York", count: 2 }, $unwind { _id: "1920s", count: 1 }, $sort $skip$limit $project {"$unwind":"$subjects"} {"$group":{"_id":"$subjects", "count":{"$sum:1}} { _id: "Harlem", count: 1 }, { _id: "Long Island", count: 1 }, { _id: "New York", count: 2 }, { _id: "1920s", count: 1 }, {title: "Open City", language: "English", subjects: [ "New York" "Harlem" ] } { title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, { title: "War and Peace", language: "Russian", subjects: [ "Russia", "War of 1812", "Napoleon"] }, { title: "Open City", language: "English", subjects: [ "New York", "Harlem" ] }, {title: "Open City", language: "English", subjects: "New York"} {title: "Open City", language: "English", subjects: "Harlem"} { _id: "Harlem", count: 1 }, {"$sort:{"count":-1} {"$limit":3} {"$project":...}
  • 10.
    #MDBLocal INPUT STAGE RESULTSSTAGE STREAMINGRESOURCE USE Each document is streamed through in RAM
  • 11.
    #MDBLocal INPUT STAGE RESULTSSTAGE BLOCKINGRESOURCE USE Everything has to be kept in RAM (or spill)
  • 12.
  • 13.
  • 14.
  • 15.
    #MDBLocal THE FUTURE OFAGGREGATION Better performance & optimizations More stages & expressions More options for output Compass helper for aggregate Unify different languages
  • 16.
    #MDBLocal THE FUTURE OFAGGREGATION Better performance & optimizations More stages & expressions More options for output Compass helper for aggregate Unify different languages
  • 17.
    #MDBLocal THE FUTURE OFAGGREGATION Better performance & optimizations More stages & expressions More options for output Compass helper for aggregate Unify different languages
  • 18.
    #MDBLocal THE FUTURE OFAGGREGATION More options for output Unify different languages
  • 19.
    #MDBLocal THE PRESENT OFAGGREGATION More options for output Unify different languages
  • 20.
  • 21.
    #MDBLocal Unify Different Languages {children:[ {name:"Max", dob:"1994-12-01", dep:true}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} AGGREGATION
  • 22.
    #MDBLocal Unify Different Languages {children:[ {name:"Max", dob:"1994-12-01", dep:true}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} AGGREGATION db.c.aggregate([ {$addFields:{ numChildren:{$size:"$children"}, numDependents:{$size:{ $filter:{ input:"$children.dep", cond: "$$this" } }} }}, ... ])
  • 23.
    #MDBLocal Unify Different Languages {children:[ {name:"Max", dob:"1994-12-01", dep:true}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} AGGREGATION FIND db.c.aggregate([ {$addFields:{ numChildren:{$size:"$children"}, numDependents:{$size:{ $filter:{ input:"$children.dep", cond: "$$this" } }} }}, ... ])
  • 24.
    #MDBLocal Unify Different Languages {children:[ {name:"Max", dob:"1994-12-01", dep:true}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} AGGREGATION FIND db.c.find ( {$expr:{ $lt:[ {$size:{$filter:{ input: "$children.dep", cond: "$$this" }}}, 2 ] }} )
  • 25.
    #MDBLocal Unify Different Languages {children:[ {name:"Max", dob:"1994-12-01", dep:true}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} AGGREGATION FIND UPDATE db.c.find ( {$expr:{ $lt:[ {$size:{$filter:{ input: "$children.dep", cond: "$$this" }}}, 2 ] }} )
  • 26.
    #MDBLocal Unify Different Languages {children:[ {name:"Max", dob:"1994-12-01", dep:true}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} AGGREGATION FIND UPDATE db.c.update( {$expr:{ $anyElementTrue:{$map:{ input:"$children", in: {$and:[ {$lt:["$$this.dob","1997-01-22"]}, "$$this.dep" ]} }} }}, {$set:{ audit:true }} )
  • 27.
  • 28.
  • 29.
  • 30.
  • 31.
    #MDBLocal Update in 4.2 {} OR [ ] <update>
  • 32.
    #MDBLocal Update in 4.2 {<same> } [ ] <update>
  • 33.
    #MDBLocal Update in 4.2 {<same> } [ <aggregation-pipeline> ] <update>
  • 34.
  • 35.
    #MDBLocal { $addFields: {} } { $project: { } } { $replaceRoot: { } } { $set: { } } { $unset: [ ] } { $replaceWith: { } }
  • 36.
    #MDBLocal db.coll.update({_id:1}, {$inc:{a:1}}, {upsert:true}) { _id: 1} { _id: 1, a: 10 } { _id: 1, a: 100 } --- { _id: 1, a: "10" } { _id: 1, a: 1 } { _id: 1, a: 11 } { _id: 1, a: 101 } { _id: 1, a: 1 } "errmsg" : "Cannot apply to a value of non-numeric type."
  • 37.
    #MDBLocal { _id: 1} { _id: 1, a: 10 } { _id: 1, a: 100 } --- { _id: 1, a: "10" } { _id: 1, a: 1 } { _id: 1, a: 11 } { _id: 1, a: 101 } { _id: 1, a: 1 } { _id: 1, a: 1 } db.coll.update({_id:1}, [ {$set:{a:{$sum:["$a",1]}}} ], {upsert:true})
  • 38.
    #MDBLocal { _id: 1} { _id: 1, a: 10 } { _id: 1, a: 100 } --- { _id: 1, a: "10" } { _id: 1, a: 1 } { _id: 1, a: 11 } { _id: 1, a: 101 } { _id: 1, a: 1 } "errmsg" : "$add only supports numeric or date types, not string" db.coll.update({_id:1}, [ {$set:{a:{$add:["$a",1]}}} ], {upsert:true})
  • 39.
    #MDBLocal { _id: 1} { _id: 1, a: 10 } { _id: 1, a: 100 } --- { _id: 1, a: "10" } { _id:1, a: 21 } { _id: 1, a: 11 } { _id: 1, a: 101 } { _id:1, a: 21 } db.coll.update({_id:1}, [ {$set:{a:{$ }} ], {upsert:true})
  • 40.
    #MDBLocal { _id: 1} { _id: 1, a: 10 } { _id: 1, a: 100 } --- { _id: 1, a: "10" } { _id:1, a: 21 } { _id: 1, a: 11 } { _id: 1, a: 101 } { _id:1, a: 21 } db.coll.update({_id:1}, [ {$set:{a:{$cond:{ if: , then: , else: }}}}], {upsert:true})
  • 41.
    #MDBLocal { _id: 1} { _id: 1, a: 10 } { _id: 1, a: 100 } --- { _id: 1, a: "10" } { _id:1, a: 21 } { _id: 1, a: 11 } { _id: 1, a: 101 } { _id:1, a: 21 } db.coll.update({_id:1}, [ {$set:{a:{$cond:{ if: {$eq:[{$type:"$a"},"missing"]}, then: , else: }}}}], {upsert:true})
  • 42.
    #MDBLocal { _id: 1} { _id: 1, a: 10 } { _id: 1, a: 100 } --- { _id: 1, a: "10" } { _id:1, a: 21 } { _id: 1, a: 11 } { _id: 1, a: 101 } { _id:1, a: 21 } db.coll.update({_id:1}, [ {$set:{a:{$cond:{ if: {$eq:[{$type:"$a"},"missing"]}, then: 21, else: {$sum:["$a",1]} }}}}], {upsert:true})
  • 43.
    #MDBLocal { _id: 1} { _id: 1, a: 10 } { _id: 1, a: 100 } --- { _id: 1, a: "10" } { _id:1, a: 21 } { _id: 1, a: 11 } { _id: 1, a: 100 } { _id:1, a: 21 } db.coll.update({_id:1}, [ {$set:{a:{$cond:{ if: {$eq:[{$type:"$a"},"missing"]}, then: 21, else: {$sum:["$a",1]} }}}}], {upsert:true})
  • 44.
    #MDBLocal { _id: 1} { _id: 1, a: 10 } { _id: 1, a: 100 } --- { _id: 1, a: "10" } { _id:1, a: 21 } { _id: 1, a: 11 } { _id: 1, a: 100 } { _id:1, a: 21 } db.coll.update({_id:1}, [ {$set:{a:{$min:[100, {$cond:{ if: {$eq:[{$type:"$a"},"missing"]}, then: 21, else: {$sum:["$a",1]} }}]} }}], {upsert:true})
  • 45.
    #MDBLocal { _id: 1} { _id: 1, a: 10 } { _id: 1, a: 100 } --- { _id: 1, a: "10" } { _id:1, a: 21 } { _id: 1, a: 11 } { _id: 1, a: 100 } { _id:1, a: 21 } { _id:1, a: 1 } db.coll.update({_id:1}, [ {$set:{a:{$min:[100, {$cond:{ if: {$eq:[{$type:"$a"},"missing"]}, then: 21, else: {$sum:["$a",1]} }}]} }}], {upsert:true})
  • 46.
    #MDBLocal { _id: 1} { _id: 1, a: 10 } { _id: 1, a: 100 } --- { _id: 1, a: "10" } { _id:1, a: 21 } { _id: 1, a: 11 } { _id: 1, a: 100 } { _id:1, a: 21 } { _id:1, a: 1 } db.coll.update({_id:1}, [ {$set:{a:{$min:[100, {$cond:{ if: {$eq:[{$type:"$a"},"missing"]}, then: 21, else: {$sum:["$a",1]} }}]}, prev_a:"$a" }}], {upsert:true})
  • 47.
    #MDBLocal { _id: 1} { _id: 1, a: 10 } { _id: 1, a: 100 } --- { _id: 1, a: "10" } { _id:1, a: 21 } { _id: 1, a: 11, prev_a: 10 } { _id: 1, a: 100, prev_a: 100 } { _id:1, a: 21 } { _id:1, a: 1, prev_a: "10" } db.coll.update({_id:1}, [ {$set:{a:{$min:[100, {$cond:{ if: {$eq:[{$type:"$a"},"missing"]}, then: 21, else: {$sum:["$a",1]} }}]}, prev_a:"$a" }}], {upsert:true})
  • 48.
  • 49.
    #MDBLocal Set Defaults {_id: 1,a: 5, b: 12} {_id: 2, a: 15, c: "abc"} {_id: 3, b: 99, c: "xyz"} If a or b are missing, set to 0, if c is missing -> "unset"
  • 50.
    #MDBLocal Set Defaults {_id: 1,a: 5, b: 12} {_id: 2, a: 15, c: "abc"} {_id: 3, b: 99, c: "xyz"} If a or b are missing, set to 0, if c is missing -> "unset" db.coll.update({}, [ {$replaceWith:{ }} ], {multi:true})
  • 51.
    #MDBLocal Set Defaults {_id: 1,a: 5, b: 12} {_id: 2, a: 15, c: "abc"} {_id: 3, b: 99, c: "xyz"} If a or b are missing, set to 0, if c is missing -> "unset" db.coll.update({}, [ {$replaceWith:{$mergeObjects:[ ]}} ], {multi:true})
  • 52.
    #MDBLocal Set Defaults {_id: 1,a: 5, b: 12} {_id: 2, a: 15, c: "abc"} {_id: 3, b: 99, c: "xyz"} If a or b are missing, set to 0, if c is missing -> "unset" db.coll.update({}, [ {$replaceWith:{$mergeObjects:[ { a:0, b:0, c:"unset" }, "$$ROOT" ]}} ], {multi:true})
  • 53.
    #MDBLocal Set Defaults {_id: 1,a: 5, b: 12} {_id: 2, a: 15, c: "abc"} {_id: 3, b: 99, c: "xyz"} If a or b are missing, set to 0, if c is missing -> "unset" db.coll.update({}, [ {$replaceWith:{$mergeObjects:[ { a:0, b:0, c:"unset" }, "$$ROOT" ]}} ], {multi:true}) {_id: 1, a: 5, b: 12, c: "unset"} {_id: 2, a: 15, b: 0, c: "abc"} {_id: 3, a: 0, b: 99, c: "xyz"}
  • 54.
    #MDBLocal { id: 1, d:ISODate("2019-06-04T00:00:00"), h: [ { hour:"11", value: 296 }, { hour:"12", value: 300 } ]} id: X, d:Y, hour:Z, value: VAL db.coll.update({id:X, d:Y}, [ {$set:{h:{$cond:{ if: then: else: }}}}], {upsert:true})
  • 55.
    #MDBLocal { id: 1, d:ISODate("2019-06-04T00:00:00"), h: [ { hour:"11", value: 296 }, { hour:"12", value: 300 } ]} id: X, d:Y, hour:Z, value: VAL db.coll.update({id:X, d:Y}, [ {$set:{h:{$cond:{ if: {$in:[Z,{$ifNull:["$h.hour",[]]}]}, then:{$map:{ input:"$h", in: {$cond:{ if:{$ne:["$$this.hour",Z]}, then:"$$this", else: {hour: Z, value: {$sum:[ "$$this.value", VAL]}} }}}}, else:{$concatArrays:[{$ifNull:["$h",[]]},[{hour:Z,value:VAL}]]} }}}}], {upsert:true}) if: then: else:
  • 56.
    #MDBLocal Recap: Updates can bespecified with aggregation pipeline All fields from existing document can be accessed Slightly slower, but a lot more powerful
  • 57.
    #MDBLocal THE FUTURE OFAGGREGATION Better performance & optimizations More stages & expressions More options for output Compass helper for aggregate Unify different languages
  • 58.
    #MDBLocal THE FUTURE OFAGGREGATION Better performance & optimizations More stages & expressions More options for output Compass helper for aggregate Unify different languages
  • 59.
    #MDBLocal THE FUTURE OFAGGREGATION More options for output
  • 60.
  • 61.
    #MDBLocal Prior to MongoDB4.2 $out coll new_coll $out
  • 62.
    #MDBLocal Prior to MongoDB4.2 $out coll new_coll $out db.coll.aggregate( [ {pipeline}, ... {$out: "new_coll"} ]);
  • 63.
    #MDBLocal Prior to MongoDB4.2 $out coll new_coll $out db.coll.aggregate( [ {pipeline}, ... {$out: "new_coll"} ]); new_coll ○ must be unsharded ○ overwrites existing
  • 64.
    New $merge stage inaggregation pipeline
  • 65.
  • 66.
    #MDBLocal MongoDB 4.2 $merge db.coll.aggregate( [ {pipeline},..., {$merge: { ... } ]); coll coll2 $merge
  • 67.
    #MDBLocal MongoDB 4.2 $merge db.coll.aggregate( [ {pipeline},..., {$merge: { ... } ]); coll2 can exist, can be sharded same or different 'db' coll coll2 $merge
  • 68.
    #MDBLocal coll coll2 $merge { } {} { } { } { } { } { } { } MongoDB 4.2
  • 69.
  • 70.
  • 71.
    #MDBLocal {$merge: {into: {db:"db2", coll: "collection2"}} $merge syntax { $merge: { into: <target> } }
  • 72.
  • 73.
    #MDBLocal { $merge: { into: <target>, on:<fields> } } on: "_id" on: [ "_id", "shardkey(s)" ] must be unique $merge syntax
  • 74.
  • 75.
  • 76.
  • 77.
  • 78.
    #MDBLocal Actions nothing matched: usuallyinsert document matched: source target
  • 79.
    #MDBLocal Actions nothing matched: usuallyinsert document matched: overwrite? update? ??? source target
  • 80.
    #MDBLocal Actions nothing matched: usuallyinsert document matched: update source target
  • 81.
    #MDBLocal Actions nothing matched: usuallyinsert document matched: update (merge) source target
  • 82.
    #MDBLocal $merge syntax { $merge: { into:<target>, whenNotMatched: whenMatched: } }
  • 83.
    #MDBLocal $merge syntax { $merge: { into:<target>, whenNotMatched:"insert", whenMatched: } }
  • 84.
    #MDBLocal $merge syntax { $merge: { into:<target>, whenNotMatched:"insert", whenMatched:"merge" } }
  • 85.
    #MDBLocal $merge syntax { $merge: { into:<target>, whenNotMatched:"insert"|"discard"|"fail", whenMatched:"merge" } }
  • 86.
    #MDBLocal $merge syntax { $merge: { into:<target>, whenNotMatched:"insert"|"discard"|"fail", whenMatched:"merge"|"replace"|"keepExisting"|"fail"|[...] } }
  • 87.
    #MDBLocal $merge syntax { $merge: { into:<target>, whenMatched:[...] } }
  • 88.
    #MDBLocal $merge syntax { $merge: { into:<target>, whenMatched:[<custom pipeline>] } }
  • 89.
    #MDBLocal $merge example { $merge: { into:<target>, whenMatched:[ {$addFields:{ }} ] } }
  • 90.
    #MDBLocal $merge example { $merge: { into:<target>, whenMatched:[ {$addFields:{ total:{$sum:["$total","$$new.total"]} }} ] } }
  • 91.
    #MDBLocal $merge example { $merge: { into:<target>, whenMatched:[ {$set:{ total:{$sum:["$total","$$new.total"]} }} ] } }
  • 92.
    #MDBLocal $merge example { $merge: { into:<target>, whenMatched:[ {$set:{ total:{$sum:["$total","$$new.total"]} }} ] } }
  • 93.
    #MDBLocal $merge example { $merge: { into:<target>, whenMatched:[ {$set:{ total:{$sum:["$total","$$new.total"]} }} ] } } Incoming Target { _id: "37", total: 64, f1: "x" } { _id: "37", total: 245, f1: "yyy" } Result: { }
  • 94.
    #MDBLocal $merge example { $merge: { into:<target>, whenMatched:[ {$set:{ total:{$sum:["$total","$$new.total"]} }} ] } } Incoming Target { _id: "37", total: 64, f1: "x" } { _id: "37", total: 245, f1: "yyy" } Result: { _id: "37", total: 309, f1: "yyy" }
  • 95.
    #MDBLocal $merge example 2 { $merge:{ into: <target>, whenMatched:[ {$replaceWith:{$mergeObjects:[ "$$new", {total:{$sum:["$$new.total", "$total"]}} ]}} ] } }
  • 96.
    #MDBLocal $merge example 2 { $merge:{ into: <target>, whenMatched:[ {$replaceWith:{$mergeObjects:[ "$$new", {total:{$sum:["$$new.total", "$total"]}} ]}} ] } } Incoming Target { _id: "37", total: 64, f1: "x" } { _id: "37", total: 245, f1: "yyy" } Result: { }
  • 97.
    #MDBLocal $merge example 2 { $merge:{ into: <target>, whenMatched:[ {$replaceWith:{$mergeObjects:[ "$$new", {total:{$sum:["$$new.total", "$total"]}} ]}} ] } } Incoming Target { _id: "37", total: 64, f1: "x" } { _id: "37", total: 245, f1: "yyy" } Result: { _id: "37", total: 309, f1: "x" }
  • 98.
    #MDBLocal $merge syntax { $merge: { into:<target>, whenMatched:[...] } }
  • 99.
    #MDBLocal $merge syntax { $merge: { into:<target>, let: { ... }, whenMatched:[ ...] } }
  • 100.
    #MDBLocal $merge syntax { $merge: { into:<target>, let: {new: "$$ROOT"}, whenMatched:[ ...] } }
  • 101.
  • 102.
    #MDBLocal { $merge: { into: <target>, let:{itotal: "$total"}, whenMatched:[ {$set:{ total:{$sum:["$total","$$itotal"]} }} ] } } { $merge: { into: <target>, whenMatched:[ {$set:{ total:{$sum:["$total","$$new.total"]} }} ] } }
  • 103.
  • 104.
    #MDBLocal temp real data real Using $merge toappend loaded and cleansed records loaded into db
  • 105.
    #MDBLocal aggregate 'temp' andappend valid records to 'data' db.temp.aggregate( [ { ... } /* pipeline to massage and cleanse data in temp */, {$merge:{ into: "data", whenMatched: "fail" }} ]);
  • 106.
    #MDBLocal aggregate 'temp' andappend valid records to 'data' db.temp.aggregate( [ { ... } /* pipeline to massage and cleanse data in temp */, {$merge:{ into: "data", whenMatched: "fail" }} ]); Similar to SQL's INSERT INTO T1 SELECT * from T2
  • 107.
  • 108.
    #MDBLocal mflix users users mfriendbook users sv Using $merge topopulate/update user fields from other services
  • 109.
    #MDBLocal mflix users users mfriendbook users sv Using $merge topopulate/update user fields from other services sv.users { _id: "user253", dob: ISODate(...), f1: "yyy" }
  • 110.
    #MDBLocal $merge updates fieldsfrom mflix.users collection into sv.users collection. Our "_id" field is unique username mflix_pipeline = [ { "$project" : { "_id" : "$username", "mflix" : "$$ROOT" }}, { "$merge" : { "into" : { "db": "sv", "collection" : "users" }, "whenNotMatched" : "discard" }} ] (in mflix) sv.users { _id: "user253", dob: ISODate(...), f1: "yyy" }
  • 111.
    #MDBLocal $merge updates fieldsfrom mflix.users collection into sv.users collection. Our "_id" field is unique username mflix_pipeline = [ { "$project" : { "_id" : "$username", "mflix" : "$$ROOT" }}, { "$merge" : { "into" : { "db": "sv", "collection" : "users" }, "whenNotMatched" : "discard" }} ] (in mflix) db.users.aggregate(mflix_pipeline) sv.users { _id: "user253", dob: ISODate(...), f1: "yyy", mflix: { ... } }
  • 112.
    #MDBLocal $merge updates fieldsfrom mfriendbook.users collection into sv.users collection. Our "_id" field is unique username mfriendbook_pipeline = [ { "$project" : { "_id" : "$username", "mfriendbook" : "$$ROOT" }}, { "$merge" : { "into" : { "db": "sv", "collection" : "users" }, "whenNotMatched" : "discard" }} ] (in mfriendbook) sv.users { _id: "user253", dob: ISODate(...), f1: "yyy", mflix: { ... } }
  • 113.
    #MDBLocal $merge updates fieldsfrom mfriendbook.users collection into sv.users collection. Our "_id" field is unique username mfriendbook_pipeline = [ { "$project" : { "_id" : "$username", "mfriendbook" : "$$ROOT" }}, { "$merge" : { "into" : { "db": "sv", "collection" : "users" }, "whenNotMatched" : "discard" }} ] (in mfriendbook) db.users.aggregate(mfriendbook_pipeline) sv.users { _id: "user253", dob: ISODate(...), f1: "yyy", mflix: { ... }, mfriendbook: { ... } }
  • 114.
  • 115.
    registrations real regsummary real Using $merge toincrementally update periodic rollups in summary
  • 116.
    #MDBLocal $merge to create/updateperiodic rollups in summary collection (for all days) db.regsummary.createIndex({event:1, date:1}, {unique: true});
  • 117.
    #MDBLocal $merge to create/updateperiodic rollups in summary collection (for all days) db.regsummary.createIndex({event:1, date:1}, {unique: true}); db.registrations.aggregate([ {$match: {event_id: "MDBW19"}}, {$group:{ _id:{$dateToString:{date:"$date",format:"%Y-%m-%d"}}, count: {$sum:1} }}, {$project: {_id:0,event:"MDBW19",date:"$_id",total:"$count"}}, {$merge: { into: "regsummary", on: ["event", "date"] }} ])
  • 118.
    #MDBLocal $merge to create/updateperiodic rollups in summary collection (for all days) db.regsummary.createIndex({event:1, date:1}, {unique: true}); db.registrations.aggregate([ {$match: {event_id: "MDBW19"}}, {$group:{ _id:{$dateToString:{date:"$date",format:"%Y-%m-%d"}}, count: {$sum:1} }}, {$project: {_id:0,event:"MDBW19",date:"$_id",total:"$count"}}, {$merge: { into: "regsummary", on: ["event", "date"] }} ]) { "event" : "MDBW19", "date" : "2019-05-19", "total" : 33 } { "event" : "MDBW19", "date" : "2019-05-20", "total" : 15 } { "event" : "MDBW19", "date" : "2019-05-21", "total" : 24 }
  • 119.
    #MDBLocal $merge to incrementallyupdate periodic rollups in summary collection (for single day)
  • 120.
    #MDBLocal $merge to incrementallyupdate periodic rollups in summary collection (for single day) db.registrations.aggregate([ {$match: { event_id: "MDBW19", date:{$gte:ISODate("2019-05-22"),$lt:ISODate("2019-05-23")} }}, {$count: "total"}, {$addFields: {event:"MDBW19", "date":"2019-05-22"}}, {$merge: { into: "regsummary", on: ["event", "date"] }} ])
  • 121.
    #MDBLocal $merge to incrementallyupdate periodic rollups in summary collection (for single day) db.registrations.aggregate([ {$match: { event_id: "MDBW19", date:{$gte:ISODate("2019-05-22"),$lt:ISODate("2019-05-23")} }}, {$count: "total"}, {$addFields: {event:"MDBW19", "date":"2019-05-22"}}, {$merge: { into: "regsummary", on: ["event", "date"] }} ]) { "event" : "MDBW19", "date" : "2019-05-19", "total" : 33 } { "event" : "MDBW19", "date" : "2019-05-20", "total" : 15 } { "event" : "MDBW19", "date" : "2019-05-21", "total" : 24 } { "event" : "MDBW19", "date" : "2019-05-22", "total" : 34 }
  • 122.
    #MDBLocal The aggregation frameworkis the main language for data manipulation in MongoDB (unify languages) It’s now possible to update documents using the aggregation framework and existing fields (UPDATE) Aggregation framework output can be used to merge data with a target collection ($merge) Key takeaways