Successfully reported this slideshow.
We use your LinkedIn profile and activity data to personalize ads and to show you more relevant ads. You can change your ad preferences anytime.

Introduction to mongodb for bioinformatics

3,688 views

Published on

Introduction to mongodb for bioinformatics

Published in: Technology, Health & Medicine

Introduction to mongodb for bioinformatics

  1. 1. A short introduction to MongoDB For Bioinformatics Pierre Lindenbaum PhD UMR915 – Institut du thorax Nantes, France @yokofakun http://plindenbaum.blogspot.com
  2. 2. http://www.mongodb.org/
  3. 3. Document Oriented Database
  4. 4. http://bytesforlunch.wordpress.com/2011/01/26/meet-an-agile-friend-from-the-nosql-ecosystem-mongodb-part-one/
  5. 6. null
  6. 7. true
  7. 8. 42
  8. 9. “ chr22”
  9. 10. [ “rs25”,null,42]
  10. 11. { “ name”:“rs25”, “ chrom”:”chr22”, “ position”:1234 }
  11. 12. { “ name”:“rs25”, “ chrom”:”chr22”, “ position”:1234, “ comments:[ “ hello”, { “ lang”:”fr”, ” msg”:”Bonjour” } ] }
  12. 13. Pubmed http://plindenbaum.blogspot.com/2010/09/mongodb-and-ncbi-pubmed-inserting.html
  13. 14. article={ _id : 20665232, pmid: 20665232, created: { year: 2010, month: 8, day: 10 }, title: "Charles Darwin's beagle voyage...", issue: "2", volume: "43", pgn: "363-99", journal: { title: "Journal of the history of biology", abbr: "J Hist Biol", issn: "0022-5010" }, doi: "10.1007/s10739-009-9189-9", lang: "eng", authors: [ { firstName: "Paul D", lastName: "Brinkman" } ], mesh: [ "Animals", "Fossils", "History, 19th Century", "Natural History", "Phylogeny", "Vertebrates" ] }; article=db.articles.save(article);
  14. 15. db.articles.ensureIndex({pmid:1}, {unique: true}); db.articles.ensureIndex({created:1}); db.articles.ensureIndex({authors:1}); db.articles.ensureIndex({mesh:1}); db.articles.ensureIndex({journal:1}); Indexes
  15. 16. Retrieving Records > db.articles.find().limit(3).forEach(printjson); { "_id" : 20665232, "pmid" : 20665232, "created" : { "year" : 2010, "month" : 8, "day" : 10 }, "title" : "Charles Darwin's beagle voyage, fossil vertebrate succession, and "the gradual birth & death of species".", "issue" : "2", "volume" : "43", "pgn" : "363-99", "journal" : { "title" : "Journal of the history of biology", "abbr" : "J Hist Biol", "issn" : "0022-5010" }, (...)
  16. 17. Count > db.articles.find().limit(20).count(); 327 > db.articles.find().limit(20).size(); 20
  17. 18. Get by pmid > db.articles.findOne({pmid:20180452}); { "_id" : 20180452, "pmid" : 20180452, "created" : { "year" : 2010, "month" : 2, "day" : 25 }, "title" : "[Darwin's hidden feeling for emotions of the species]", "issue" : "50-51", "volume" : "106", "pgn" : "3443-6",
  18. 19. Find “Lactose Intolerance[MESH]” > db.articles.find( {mesh:'Lactose Intolerance'} ).forEach(printjson); { "_id" : 17575947, "pmid" : 17575947, "created" : { "year" : 2007, "month" : 6, "day" : 19 }, "title" : "Darwin's illness: a final diagnosis.", "issue" : "1", "volume" : "61", "pgn" : "23-9", "journal" : { "title" : "Notes and records of the Royal Society of London", "abbr" : "Notes Rec R Soc Lond", "issn" : "0035-9149" }, "lang" : "eng", "authors"
  19. 20. FIND 'Evolution' AND 'Religion' AND 'History, 19th Century' >db.articles.find( { mesh:{$all:["Evolution","History, 19th Century","Religion"]}} ).limit(2).forEach(printjson); { "_id" : 20503821, "pmid" : 20503821, "created" : { "year" : 2010, "month" : 5, "day" : 27 }, "title" : "Darwin and the popularization of evolution.", "issue" : "1", "volume" : "64", "pgn" : "5-24", "journal" : { "title" : "Notes and records of the Royal Society of London", "abbr" : "Notes Re...
  20. 21. Search all, only return the title and the pmid, limit 5 > db.articles.find({},{"title":1,"pmid":1}).limit(5).forEach(printjson); { "_id" : 20665232, "pmid" : 20665232, "title" : "Charles Darwin's beagle voyage, fossil vertebrate succession, and "the gradual birth & death of species"." }, { "_id" : 20626121, "pmid" : 20626121, "title" : "[The biomedical legacy of Charles Darwin]" }, { "_id" : 20503821, "pmid" : 20503821, "title" : "Darwin and the popularization of evolution." }
  21. 22. Using javascript $where: articles starting with 'DARWIN' > db.articles.find( {$where:"this.title.substr(0,6)=="DARWIN""},{title:1} ).limit(5).forEach(printjson); { "_id" : 14341734, "title" : "DARWIN AS THE SOURCE OF FREUD'S NEO-LAMARCKIANISM." } { "_id" : 14275525, "title" : "DARWIN'S ILLNESS." } { "_id" : 14248443, "title" : "DARWIN'S HEALTH IN RELATION TO HIS VOYAGE TO SOUTH AMERICA." } { "_id" : 14217140, "title" : "DARWIN'S ILLNESS." }
  22. 23. GROUP operator: the number of articles per journal having mesh='Evolution' and having a number of articles greater than 2 > db.articles.group( { key: { }, cond: { mesh: &quot;Evolution&quot; }, initial: { journal: { }, total: 0 }, reduce: function(object, aggregate){ varcount=aggregate.journal[ object.journal.title ];if(!count){ count=0; }count++;aggregate.journal[ object.journal.title ]=count;aggregate.total++; }, finalize: function(aggregate){ for(jinaggregate.journal){ if(aggregate.journal[ j ]<3){ deleteaggregate.journal[ j ]; } } } }) [ { &quot;journal&quot; : { &quot;Comptes rendus biologies&quot; : 5, &quot;Läkartidningen&quot; : 6 (...)
  23. 24. Update(criteria, objNew, upsert, multi ) > db.articles.update( {&quot;journal.title&quot;:&quot;Nature&quot;}, {$unset:{title:1,authors:1,created:1,mesh:1}},false,true)
  24. 25. AYE

×