bioinfolec_10th_20071026

1. 1. 10 20 30 40 0.74 0.76 1.34 1.75 2.01 2.62 0.87 0.69 0.87 0.60 1.83 1.90 1.73 1.83 0.96 0.93 x2 x2 x x |dx1 | + |dx2 | dx2 + dx2 1 2 dx2 dx2 y y dx1 dx1 x1 x1 (A)
2. 2. n i=1 (xi − x)(yi − y ) ¯ ¯ r= n n i=1 (xi x)2 ¯ i=1 (yi − y )2 ¯ − y y y x x x r≈0 r≈1 r ≈ −1
3. 3. 40454_at 753_at 33355_at 40113_at 717_at 34800_at 41275_at 1044_s_at 36452_at 37981_at 37343_at 182_at 39424_at 1988_at 1389_at 35164_at 39781_at 2059_s_at 33238_at 1134_at 266_s_at 39318_at 37579_at 41139_at 1498_at 36937_s_at 41827_f_at 39929_at 38514_at 1081_at 36203_at 32378_at 1854_at 39829_at 37475_at 38063_at 40235_at 38521_at 1007_s_at 36643_at 32063_at 32529_at 1520_s_at 39402_at 39614_at 38285_at 34897_at 35260_at 37493_at 38340_at 32872_at 37625_at 36638_at 38994_at 1914_at 39003_at 32184_at 38717_at 37471_at 40396_at 1140_at 31901_at 37033_s_at 897_at 38413_at 33412_at 37544_at 34308_at 33777_at 41470_at 37809_at 37558_at 36873_at 41401_at 1065_at 34583_at 33283_at 40365_at 40782_at 33352_at 37099_at > ALLsubset <- read.table(quot;ALLsubset.txtquot;, header=T) > ALLhm <- apply(ALLsubset, c(1,2), as.numeric) > library(quot;RColorBrewerquot;) > hmcol <- colorRampPalette(brewer.pal(10,quot;RdBuquot;))(256) > heatmap(ALLhm, col=hmcol, hclust = function(x) hclust(x, quot;wardquot;), distfun = function(x) as.dist(1-cor(t(x))) )
4. 4. 40454_at 753_at 33355_at 40113_at 717_at 34800_at 41275_at 1044_s_at 36452_at 37981_at 37343_at 182_at 39424_at 1988_at 1389_at 35164_at 39781_at 2059_s_at 33238_at 1134_at 266_s_at 39318_at 37579_at 41139_at 1498_at 36937_s_at 41827_f_at 39929_at 38514_at 1081_at 36203_at 32378_at 1854_at 39829_at 37475_at 38063_at 40235_at 38521_at 1007_s_at 36643_at 32063_at 32529_at 1520_s_at 39402_at 39614_at 38285_at 34897_at 35260_at 37493_at 38340_at 32872_at 37625_at 36638_at 38994_at 1914_at 39003_at 32184_at 38717_at 37471_at 40396_at 1140_at 31901_at 37033_s_at 897_at 38413_at 33412_at 37544_at 34308_at 33777_at 41470_at 37809_at 37558_at 36873_at 41401_at 1065_at 34583_at 33283_at 40365_at 40782_at 33352_at 37099_at
5. 5. 33412_at 36638_at 1065_at 34583_at 32184_at 38994_at 41470_at 39003_at 37544_at 38413_at 897_at 40782_at 33283_at 31901_at 40365_at 40396_at 37033_s_at 1988_at 41401_at 37099_at 38717_at 33352_at 34308_at 1140_at 33777_at 37471_at 37558_at 36873_at 1914_at 37809_at 1134_at 34897_at 38340_at 35260_at 1498_at 40235_at 38063_at 38521_at 37475_at 1389_at 36937_s_at 37579_at 41827_f_at 32378_at 36452_at 1081_at 36203_at 39929_at 41139_at 1520_s_at 39318_at 38514_at 2059_s_at 33238_at 36643_at 32529_at 39424_at 1007_s_at 41275_at 39402_at 37981_at 40113_at 37343_at 266_s_at 35164_at 39781_at 1854_at 1044_s_at 182_at 37493_at 38285_at 39829_at 32063_at 37625_at 32872_at 33355_at 40454_at 717_at 39614_at 34800_at 753_at E2A.PBX1.36001 E2A.PBX1.24019 E2A.PBX1.08018 E2A.PBX1.28003 E2A.PBX1.LAL5 ALL1.AF4.26008 ALL1.AF4.04006 ALL1.AF4.31007 ALL1.AF4.16004 ALL1.AF4.15004 ALL1.AF4.28032 ALL1.AF4.63001 ALL1.AF4.28028 ALL1.AF4.19005 ALL1.AF4.24005
6. 6. > ALLhm.d<-dist(1-cor(t(ALLhm))) > ALLhm.hc<-hclust(ALLhm.d, quot;wardquot;) > ALLhm.dend <- as.dendrogram(ALLhm.hc) > plot(ALLhm.dend) > ALLhm.cl<-cutree(ALLhm.hc,h=100) > ALLhm.cl 1007_s_at 1044_s_at 1065_at 1081_at 1134_at 1140_at 1389_at 1 1 2 1 1 2 1 1498_at 1520_s_at 182_at 1854_at 1914_at 1988_at 2059_s_at 1 1 1 1 2 1 1 266_s_at 31901_at 32063_at 32184_at 32378_at 32529_at 32872_at 1 2 1 2 1 1 1 33238_at 33283_at 33352_at 33355_at 33412_at 33777_at 34308_at 1 2 2 1 2 2 2 34583_at 34800_at 34897_at 35164_at 35260_at 36203_at 36452_at 2 1 1 1 1 1 1 36638_at 36643_at 36873_at 36937_s_at 37033_s_at 37099_at 37343_at 2 1 2 1 2 2 1 37471_at 37475_at 37493_at 37544_at 37558_at 37579_at 37625_at 2 1 1 2 2 1 1 37809_at 37981_at 38063_at 38285_at 38340_at 38413_at 38514_at 2 1 1 1 1 2 1 38521_at 38717_at 38994_at 39003_at 39318_at 39402_at 39424_at 1 2 2 2 1 1 1 39614_at 39781_at 39829_at 39929_at 40113_at 40235_at 40365_at 1 1 1 1 1 1 2 40396_at 40454_at 40782_at 41139_at 41275_at 41401_at 41470_at 2 1 2 1 1 2 2 41827_f_at 717_at 753_at 897_at 1 1 1 2
7. 7. 0 100 200 300 400 500 31901_at 39003_at 1140_at 40396_at 33412_at 37544_at 38413_at 37033_s_at 897_at 33777_at 34308_at 38994_at 1914_at 36638_at 1065_at 34583_at 37558_at 36873_at 41470_at 38717_at 32184_at 37471_at 37809_at 41401_at 33352_at 37099_at 40365_at 33283_at 40782_at 40113_at 717_at 40454_at 753_at 33355_at 1044_s_at 41275_at 39402_at 1520_s_at 39614_at 32063_at 32872_at 39781_at 39424_at 37625_at 38340_at 1389_at 33238_at 1134_at 2059_s_at 34800_at 182_at 37343_at 1988_at 35164_at 266_s_at 38514_at 1081_at 36203_at 1498_at 36937_s_at 39929_at 41827_f_at 36452_at 37981_at 39318_at 37579_at 41139_at 35260_at 37493_at 34897_at 38285_at 1007_s_at 36643_at 38521_at 38063_at 40235_at 39829_at 1854_at 37475_at 32378_at 32529_at 897_at 182_at 717_at 753_at 1065_at 1140_at 1914_at 1854_at 1081_at 1498_at 1134_at 1389_at 1988_at 37099_at 33352_at 40782_at 40365_at 33283_at 34583_at 41401_at 36873_at 37558_at 37809_at 41470_at 33777_at 34308_at 37544_at 33412_at 38413_at 31901_at 40396_at 37471_at 38717_at 32184_at 39003_at 38994_at 36638_at 37625_at 32872_at 38340_at 37493_at 35260_at 34897_at 38285_at 39614_at 39402_at 32529_at 32063_at 36643_at 38521_at 40235_at 38063_at 37475_at 39829_at 32378_at 36203_at 38514_at 39929_at 41139_at 37579_at 39318_at 266_s_at 33238_at 39781_at 35164_at 39424_at 37343_at 37981_at 36452_at 41275_at 34800_at 40113_at 33355_at 40454_at 1520_s_at 1007_s_at 2059_s_at 1044_s_at 41827_f_at 37033_s_at 36937_s_at
8. 8. 36638_at 3 34583_at 1 33238_at 1 266_s_at 1 1498_at 1 1007_s_at 3 > ALLhm.cl.h5 36643_at 1 34800_at 3 33283_at 3 31901_at 2 1520_s_at 2 1044_s_at 1 0 100 200 300 400 500 31901_at 39003_at 1140_at 40396_at 33412_at 37544_at 38413_at 37033_s_at 1 34897_at 3 33352_at 2 32063_at 1 182_at 3 1065_at 3 897_at 33777_at 34308_at 38994_at >cutree(ALLhm.hc,h=5) 1914_at 36638_at 1065_at 34583_at 37558_at 36873_at 41470_at 38717_at 32184_at 37471_at 37809_at 41401_at 1 35164_at 2 33355_at 3 32184_at 1 1854_at 1 1081_at 1 33352_at 37099_at 40365_at > ALLhm.cl.h5<-cutree(ALLhm.hc,h=5) 33283_at 40782_at 40113_at 717_at 40454_at 753_at 33355_at 1044_s_at 41275_at 39402_at 1520_s_at 39614_at 32063_at 36873_at 36937_s_at 37033_s_at 1 35260_at 3 33412_at 1 32378_at 3 1914_at 1 1134_at 3 32872_at 39781_at 39424_at 37625_at 38340_at 1389_at 33238_at 1134_at 2059_s_at 34800_at 182_at 37343_at 1988_at 35164_at 266_s_at 38514_at 37099_at 1 36203_at 3 33777_at 1 32529_at 1 1988_at 3 1140_at 3 1081_at 36203_at 1498_at 36937_s_at 39929_at 41827_f_at 36452_at 37981_at 39318_at 37579_at 41139_at 35260_at 37493_at 34897_at 38285_at 1007_s_at 37343_at 1 36452_at 3 34308_at 2 32872_at 1 2059_s_at 1 1389_at 1 36643_at 38521_at 38063_at 40235_at 39829_at 1854_at 37475_at 32378_at 32529_at
9. 9. > ALLhm.cl[ALLhm.cl==2] 1065_at 1140_at 1914_at 31901_at 32184_at 33283_at 33352_at 2 2 2 2 2 2 2 33412_at 33777_at 34308_at 34583_at 36638_at 36873_at 37033_s_at 2 2 2 2 2 2 2 37099_at 37471_at 37544_at 37558_at 37809_at 38413_at 38717_at 2 2 2 2 2 2 2 38994_at 39003_at 40365_at 40396_at 40782_at 41401_at 41470_at 2 2 2 2 2 2 2 897_at 2 > names(ALLhm.cl[ALLhm.cl==2]) [1] quot;1065_atquot; quot;1140_atquot; quot;1914_atquot; quot;31901_atquot; quot;32184_atquot; quot;33283_atquot; [7] quot;33352_atquot; quot;33412_atquot; quot;33777_atquot; quot;34308_atquot; quot;34583_atquot; quot;36638_atquot; [13] quot;36873_atquot; quot;37033_s_atquot; quot;37099_atquot; quot;37471_atquot; quot;37544_atquot; quot;37558_atquot; [19] quot;37809_atquot; quot;38413_atquot; quot;38717_atquot; quot;38994_atquot; quot;39003_atquot; quot;40365_atquot; [25] quot;40396_atquot; quot;40782_atquot; quot;41401_atquot; quot;41470_atquot; quot;897_atquot; > affyids <- names(ALLhm.cl[ALLhm.cl==2]) > affyids [1] quot;1065_atquot; quot;1140_atquot; quot;1914_atquot; quot;31901_atquot; quot;32184_atquot; quot;33283_atquot; [7] quot;33352_atquot; quot;33412_atquot; quot;33777_atquot; quot;34308_atquot; quot;34583_atquot; quot;36638_atquot; [13] quot;36873_atquot; quot;37033_s_atquot; quot;37099_atquot; quot;37471_atquot; quot;37544_atquot; quot;37558_atquot; [19] quot;37809_atquot; quot;38413_atquot; quot;38717_atquot; quot;38994_atquot; quot;39003_atquot; quot;40365_atquot; [25] quot;40396_atquot; quot;40782_atquot; quot;41401_atquot; quot;41470_atquot; quot;897_atquot;
10. 10. > library(quot;hgu95av2quot;) > library(quot;genefilterquot;) > ls(quot;package:hgu95av2quot;) [1] quot;hgu95av2quot; quot;hgu95av2ACCNUMquot; [3] quot;hgu95av2CHRquot; quot;hgu95av2CHRLENGTHSquot; [5] quot;hgu95av2CHRLOCquot; quot;hgu95av2ENTREZIDquot; [7] quot;hgu95av2ENZYMEquot; quot;hgu95av2ENZYME2PROBEquot; [9] quot;hgu95av2GENENAMEquot; quot;hgu95av2GOquot; [11] quot;hgu95av2GO2ALLPROBESquot; quot;hgu95av2GO2PROBEquot; [13] quot;hgu95av2LOCUSIDquot; quot;hgu95av2MAPquot; [15] quot;hgu95av2MAPCOUNTSquot; quot;hgu95av2OMIMquot; [17] quot;hgu95av2ORGANISMquot; quot;hgu95av2PATHquot; [19] quot;hgu95av2PATH2PROBEquot; quot;hgu95av2PFAMquot; [21] quot;hgu95av2PMIDquot; quot;hgu95av2PMID2PROBEquot; [23] quot;hgu95av2PROSITEquot; quot;hgu95av2QCquot; [25] quot;hgu95av2REFSEQquot; quot;hgu95av2SUMFUNC_DEPRECATEDquot; [27] quot;hgu95av2SYMBOLquot; quot;hgu95av2UNIGENEquot;
11. 11. > mget(affyids,env=hgu95av2GENENAME) \$`1065_at` [1] quot;fms-related tyrosine kinase 3quot; \$`1140_at` [1] quot;integrin, alpha E (antigen CD103, human mucosal lymphocyte antigen 1; alpha polypeptide)quot; \$`1914_at` [1] quot;cyclin A1quot; ... > mget(affyids,env=hgu95av2SYMBOL) \$`1065_at` [1] quot;FLT3quot; \$`1140_at` [1] quot;ITGAEquot; \$`1914_at` [1] quot;CCNA1quot; \$`31901_at` [1] quot;KCNAB2quot;
12. 12. http://www.yeastgenome.org/help/gotutorial.html
13. 13. http://biology.plosjournals.org/perlserv/?request=get-document&doi=10.1371/journal.pbio.0000045
14. 14. > library(quot;GOquot;) > library(quot;annaffyquot;) > aafGO(affyids, quot;hgu95av2quot;) An object of class quot;aafListquot; [[1]] An object of class quot;aafGOquot; [[1]][[1]] An object of class quot;aafGOItemquot; @id quot;GO:0006468quot; @name quot;protein amino acid phosphorylationquot; @type quot;Biological Processquot; @evid quot;IEAquot; [[1]][[2]] An object of class quot;aafGOItemquot; @id quot;GO:0007169quot; @name quot;transmembrane receptor protein tyrosine kinase signaling pathwayquot; @type quot;Biological Processquot; @evid quot;TASquot; [[1]][[3]] An object of class quot;aafGOItemquot; @id quot;GO:0008284quot; @name quot;positive regulation of cell proliferationquot; @type quot;Biological Processquot; @evid quot;TASquot;
15. 15. > aafGO(affyids, quot;hgu95av2quot;)[[1]] An object of class quot;aafGOquot; [[1]] An object of class quot;aafGOItemquot; @id quot;GO:0006468quot; @name quot;protein amino acid phosphorylationquot; @type quot;Biological Processquot; @evid quot;IEAquot; ... > aafGO(affyids, quot;hgu95av2quot;)[[2]] An object of class quot;aafGOquot; [[1]] An object of class quot;aafGOItemquot; @id quot;GO:0007155quot; @name quot;cell adhesionquot; @type quot;Biological Processquot; @evid quot;IEAquot; ...
16. 16. > aafGO(affyids, quot;hgu95av2quot;)[[1]][[1]] An object of class quot;aafGOItemquot; @id quot;GO:0006468quot; @name quot;protein amino acid phosphorylationquot; @type quot;Biological Processquot; @evid quot;IEAquot; > getURL(aafGO(affyids, quot;hgu95av2quot;)[[1]][[1]]) [1] quot;http://amigo.geneontology.org/cgi-bin/amigo/go.cgi? view=details&query=GO:0006468quot;
17. 17. curl “http://eutils.ncbi.nlm.nih.gov/entrez/ eutils/esummary.fcgi?db=gene&retmode=xml&id=2322” <eSummaryResult> <DocSum> <Id>2322</Id> <Item Name=quot;Namequot; Type=quot;Stringquot;>FLT3</Item> <Item Name=quot;Descriptionquot; Type=quot;Stringquot;>fms- ... </DocSum> </eSummaryResult>
18. 18. • element( ) • element( ) • attribute( ) 1 • element attribute • element, attribute ) DTD <shop> shop <item name=”Apple”> <price>150</price> <piece>1</piece> </item> item item <item name=”Orange” /> </shop> name=”Orange” name=”Apple” price piece 150 1
19. 19. shop item item name=”Orange” price name=”Apple” piece 150 1
20. 20. > mget(affyids,env=hgu95av2ENTREZID) \$`1065_at` [1] quot;2322quot; \$`1140_at` [1] quot;3682quot; \$`1914_at` [1] quot;8900quot; ... > options(repos=quot;http://cran.md.tsukuba.ac.jpquot;) > install.packages(quot;XMLquot;) URL 'http://cran.md.tsukuba.ac.jp/bin/macosx/universal/contrib/2.6/ XML_1.93-2.tgz' Content type 'application/x-gzip' length 1156690 bytes (1.1 Mb) URL ================================================== downloaded 1.1 Mb The downloaded packages are in /tmp/RtmpSEwKl9/downloaded_packages > library(quot;XMLquot;)
21. 21. > ezURL<-quot;http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi? db=gene&retmode=xml&id=2322quot; > doc<-xmlTreeParse(ezURL, isURL=TRUE, handlers=NULL, asTree=TRUE, useInternalNodes=TRUE) > chrinfo<-getNodeSet(doc, '/eSummaryResult/DocSum/ Item[@Name=quot;GenomicInfoquot;]/Item/Item') > sapply(chrinfo, xmlAttrs)[1,] [1] quot;ChrLocquot; quot;ChrAccVerquot; quot;ChrStartquot; quot;ChrStopquot; > sapply(chrinfo, xmlValue) [1] quot;13quot; quot;NC_000013.9quot; quot;27572704quot; quot;27475752quot;
22. 22. > mget(affyids,env=hgu95av2PATH) \$`1065_at` [1] quot;04060quot; quot;04640quot; quot;05221quot; \$`1140_at` [1] quot;04810quot; \$`1914_at` [1] quot;04110quot; quot;05221quot; > sort(unlist(mget(affyids,env=hgu95av2PATH))) 40782_at1 40782_at2 37033_s_at1 33777_at 37033_s_at2 40782_at3 40782_at4 quot;00361quot; quot;00363quot; quot;00480quot; quot;00590quot; quot;00590quot; quot;00624quot; quot;00626quot; 40782_at5 40782_at6 40782_at7 33283_at1 40365_at 40396_at1 1065_at1 quot;00632quot; quot;00642quot; quot;00903quot; quot;04010quot; quot;04020quot; quot;04020quot; quot;04060quot; 34583_at1 40396_at2 1914_at1 38994_at1 1065_at2 34583_at2 33283_at2 quot;04060quot; quot;04080quot; quot;04110quot; quot;04630quot; quot;04640quot; quot;04640quot; quot;04740quot; 1140_at 38994_at2 38994_at3 37033_s_at3 1065_at3 1914_at2 34583_at3 quot;04810quot; quot;04910quot; quot;04930quot; quot;05030quot; quot;05221quot; quot;05221quot; quot;05221quot; > source(quot;http://bioconductor.org/biocLite.Rquot;) > biocLite(quot;KEGGSOAPquot;) > library(quot;KEGGSOAPquot;) SSOAP RCurl ...
23. 23. http://www.genome.jp/kegg/pathway/hsa/hsa04060.html
24. 24. > get.genes.by.pathway(quot;path:hsa04060quot;) [1] quot;hsa:10344quot; quot;hsa:10563quot; quot;hsa:10663quot; quot;hsa:10673quot; quot;hsa:10803quot; quot;hsa:10850quot; [7] quot;hsa:10913quot; quot;hsa:11009quot; quot;hsa:115650quot; quot;hsa:116379quot; quot;hsa:1230quot; quot;hsa:1231quot; > get.motifs.by.gene(quot;hsa:10663quot;, quot;pfamquot;) [[1]] \$motif_id [1] quot;pf:7tm_1quot; \$definition [1] quot;7 transmembrane receptor (rhodopsin family)quot; \$genes_id [1] quot;hsa:10663quot; ... [[2]] \$motif_id [1] quot;pf:Serpentine_recpquot; \$definition [1] quot;Caenorhabditis serpentine receptor-like proteinquot; ...