Map/Confused? A practical approach to Map/Reduce with MongoDB






































{
"_id" : ObjectId("4fb9fb91d066d657de8d6f36"),
"text" : “MongoDB uses Map/Reduce #epic #win",
…
"user" : {
"friends_count" : 73,
…
"followers_count" : 102,
"id" : 53507833,
},
…
}







 mongod --rest --shardsvr --port 27017 --dbpath /tmp/shard1/ --smallfiles
 mongod --rest --shardsvr --port 27017 --dbpath /tmp/shard1/ --smallfiles
 mongod --configsvr --port 10000 --dbpath /tmp/config/ --smallfiles
 mongos --port 22222 --configdb localhost:10000


1. db.tweets.mapReduce()
2. db.tweets.group()
3. db.tweets.aggregate()
4. MongoDB-Hadoop Adapter
5. db.tweets.find()

var measure = function(c) {
var a = Date.now();
var results = c.apply();
var d = Date.now() - a;
return { results:results, duration:d };
};

function() {
if (this.user != null) {
emit("user",
{userName: this.user.name,
followers: this.user.followers_count});
}
}

function(key, values) {
var result = null;

values.forEach( function(value) {
if (result == null ||
result.followers < value.followers) {
result = value;
}
})
return result;
}

db.tweets.group({
key: {},
initial: { name:'', followers_count:0 },
reduce: function(obj,prev) {
if (obj.user != null &&
prev.followers_count < obj.user.followers_count)
{
prev.name = obj.user.name;
prev.followers_count = obj.user.followers_count;
}
}
})

db.tweets.aggregate(
{$group: {
_id: {user_name: "$user.name"},
followers_count: {$max: "$user.followers_count"}
}},
{$sort: {"followers_count" : -1}},
{$limit : 1},
{$project: {
_id : 0,
user_name : "$_id.user_name",
followers_count : "$followers_count"
}})

#!/usr/bin/env python
# encoding: utf-8

import sys
sys.path.append(".")
from pymongo_hadoop import BSONMapper

def mapper(documents):
for doc in documents:
if doc['user'] != None:
yield {'_id': doc['user']['name'].encode('utf-8'),
'followers':doc['user']['followers_count']}

BSONMapper(mapper)
print >> sys.stderr, "Done Mapping!"

#!/usr/bin/env python
# encoding: utf-8

import sys
sys.path.append('.')
from pymongo_hadoop import BSONReducer

def reducer(key, values):
print >> sys.stderr, "Processing key %s" % key.encode('utf-8')
_count = 0
for v in values:
if _count < v['followers']:
_count = v["followers"]
return {"_id": key.encode('utf-8'), "count": _count}
BSONReducer(reducer)
print >> sys.stderr, "Done Reducing!"

hadoop jar /usr/lib/hadoop/lib/mongo-hadoop-streaming-
assembly-1.1.0-SNAPSHOT.jar
-files mapper.py, reducer.py
-inputURI mongodb://localhost:27017/twitter.tweets
-outputURI mongodb://localhost:27017/twitter.top_user
-mapper mapper.py
-reducer reducer.py

db.tweets.find().sort( {"user.followers_count": -1} ).limit(1)

db.tweets.mapReduce()

db.tweets.group()

db.tweets.aggregate()

MongoDB-Hadoop Adapter

db.tweets.find()

db.tweets.mapReduce()

db.tweets.group()

db.tweets.aggregate()

MongoDB-Hadoop Adapter

db.tweets.find()













Map/Confused? A practical approach to Map/Reduce with MongoDB

More Related Content

What's hot

Viewers also liked

Similar to Map/Confused? A practical approach to Map/Reduce with MongoDB

More from Uwe Printz

Recently uploaded

Map/Confused? A practical approach to Map/Reduce with MongoDB