Introduction to mongodb for bioinformatics

3,303 views

Published on

Introduction to mongodb for bioinformatics

Published in: Technology, Health & Medicine

Introduction to mongodb for bioinformatics

  1. 1. A short introduction to MongoDB For Bioinformatics Pierre Lindenbaum PhD UMR915 – Institut du thorax Nantes, France @yokofakun http://plindenbaum.blogspot.com
  2. 2. http://www.mongodb.org/
  3. 3. Document Oriented Database
  4. 4. http://bytesforlunch.wordpress.com/2011/01/26/meet-an-agile-friend-from-the-nosql-ecosystem-mongodb-part-one/
  5. 6. null
  6. 7. true
  7. 8. 42
  8. 9. “ chr22”
  9. 10. [ “rs25”,null,42]
  10. 11. { “ name”:“rs25”, “ chrom”:”chr22”, “ position”:1234 }
  11. 12. { “ name”:“rs25”, “ chrom”:”chr22”, “ position”:1234, “ comments:[ “ hello”, { “ lang”:”fr”, ” msg”:”Bonjour” } ] }
  12. 13. Pubmed http://plindenbaum.blogspot.com/2010/09/mongodb-and-ncbi-pubmed-inserting.html
  13. 14. article={ _id : 20665232, pmid: 20665232, created: { year: 2010, month: 8, day: 10 }, title: "Charles Darwin's beagle voyage...", issue: "2", volume: "43", pgn: "363-99", journal: { title: "Journal of the history of biology", abbr: "J Hist Biol", issn: "0022-5010" }, doi: "10.1007/s10739-009-9189-9", lang: "eng", authors: [ { firstName: "Paul D", lastName: "Brinkman" } ], mesh: [ "Animals", "Fossils", "History, 19th Century", "Natural History", "Phylogeny", "Vertebrates" ] }; article=db.articles.save(article);
  14. 15. db.articles.ensureIndex({pmid:1}, {unique: true}); db.articles.ensureIndex({created:1}); db.articles.ensureIndex({authors:1}); db.articles.ensureIndex({mesh:1}); db.articles.ensureIndex({journal:1}); Indexes
  15. 16. Retrieving Records > db.articles.find().limit(3).forEach(printjson); { "_id" : 20665232, "pmid" : 20665232, "created" : { "year" : 2010, "month" : 8, "day" : 10 }, "title" : "Charles Darwin's beagle voyage, fossil vertebrate succession, and "the gradual birth & death of species".", "issue" : "2", "volume" : "43", "pgn" : "363-99", "journal" : { "title" : "Journal of the history of biology", "abbr" : "J Hist Biol", "issn" : "0022-5010" }, (...)
  16. 17. Count > db.articles.find().limit(20).count(); 327 > db.articles.find().limit(20).size(); 20
  17. 18. Get by pmid > db.articles.findOne({pmid:20180452}); { "_id" : 20180452, "pmid" : 20180452, "created" : { "year" : 2010, "month" : 2, "day" : 25 }, "title" : "[Darwin's hidden feeling for emotions of the species]", "issue" : "50-51", "volume" : "106", "pgn" : "3443-6",
  18. 19. Find “Lactose Intolerance[MESH]” > db.articles.find( {mesh:'Lactose Intolerance'} ).forEach(printjson); { "_id" : 17575947, "pmid" : 17575947, "created" : { "year" : 2007, "month" : 6, "day" : 19 }, "title" : "Darwin's illness: a final diagnosis.", "issue" : "1", "volume" : "61", "pgn" : "23-9", "journal" : { "title" : "Notes and records of the Royal Society of London", "abbr" : "Notes Rec R Soc Lond", "issn" : "0035-9149" }, "lang" : "eng", "authors"
  19. 20. FIND 'Evolution' AND 'Religion' AND 'History, 19th Century' >db.articles.find( { mesh:{$all:["Evolution","History, 19th Century","Religion"]}} ).limit(2).forEach(printjson); { "_id" : 20503821, "pmid" : 20503821, "created" : { "year" : 2010, "month" : 5, "day" : 27 }, "title" : "Darwin and the popularization of evolution.", "issue" : "1", "volume" : "64", "pgn" : "5-24", "journal" : { "title" : "Notes and records of the Royal Society of London", "abbr" : "Notes Re...
  20. 21. Search all, only return the title and the pmid, limit 5 > db.articles.find({},{"title":1,"pmid":1}).limit(5).forEach(printjson); { "_id" : 20665232, "pmid" : 20665232, "title" : "Charles Darwin's beagle voyage, fossil vertebrate succession, and "the gradual birth & death of species"." }, { "_id" : 20626121, "pmid" : 20626121, "title" : "[The biomedical legacy of Charles Darwin]" }, { "_id" : 20503821, "pmid" : 20503821, "title" : "Darwin and the popularization of evolution." }
  21. 22. Using javascript $where: articles starting with 'DARWIN' > db.articles.find( {$where:"this.title.substr(0,6)=="DARWIN""},{title:1} ).limit(5).forEach(printjson); { "_id" : 14341734, "title" : "DARWIN AS THE SOURCE OF FREUD'S NEO-LAMARCKIANISM." } { "_id" : 14275525, "title" : "DARWIN'S ILLNESS." } { "_id" : 14248443, "title" : "DARWIN'S HEALTH IN RELATION TO HIS VOYAGE TO SOUTH AMERICA." } { "_id" : 14217140, "title" : "DARWIN'S ILLNESS." }
  22. 23. GROUP operator: the number of articles per journal having mesh='Evolution' and having a number of articles greater than 2 > db.articles.group( { key: { }, cond: { mesh: &quot;Evolution&quot; }, initial: { journal: { }, total: 0 }, reduce: function(object, aggregate){ varcount=aggregate.journal[ object.journal.title ];if(!count){ count=0; }count++;aggregate.journal[ object.journal.title ]=count;aggregate.total++; }, finalize: function(aggregate){ for(jinaggregate.journal){ if(aggregate.journal[ j ]<3){ deleteaggregate.journal[ j ]; } } } }) [ { &quot;journal&quot; : { &quot;Comptes rendus biologies&quot; : 5, &quot;Läkartidningen&quot; : 6 (...)
  23. 24. Update(criteria, objNew, upsert, multi ) > db.articles.update( {&quot;journal.title&quot;:&quot;Nature&quot;}, {$unset:{title:1,authors:1,created:1,mesh:1}},false,true)
  24. 25. AYE

×