Successfully reported this slideshow.
We use your LinkedIn profile and activity data to personalize ads and to show you more relevant ads. You can change your ad preferences anytime.

好みの日本酒を呑みたい! 〜さけのわデータで探す自分好みの酒〜

1,157 views

Published on

第88回R勉強会@東京 LT

Published in: Data & Analytics
  • Be the first to comment

好みの日本酒を呑みたい! 〜さけのわデータで探す自分好みの酒〜

  1. 1. 🍶
  2. 2. > me $name [1] "Takashi Kitano" $twitter [1] "@kashitan" $work_in [1] " "
  3. 3. 🎉 🆓
  4. 4. ## charts <- jsonlite::read_json("https://muro.sakenowa.com/sakenowa-data/api/flavor-charts", simplifyVector = TRUE) %>% purrr::pluck("flavorCharts") ↑ ↑ ↑ ↑ ↑ ↑
  5. 5. ## brands <- jsonlite::read_json("https://muro.sakenowa.com/sakenowa-data/api/brands", simplifyVector = TRUE) %>% purrr::pluck("brands")
  6. 6. ## brands.dist.mat <- charts %>% # tibble::remove_rownames() %>% # ID tibble::column_to_rownames(var = "brandId") %>% # proxy::dist(method = "cosine")
  7. 7. brands.dist.mat %>% as.matrix() %>% .[1:8, 1:8]
  8. 8. ## data.frame brands.dist.df <- brands.dist.mat %>% as.matrix() %>% # tibble tibble::as_tibble(rownames = NA) %>% # tibble::rownames_to_column(var = "brandId1") %>% # tidyr::pivot_longer( cols = -brandId1, names_to = "brandId2", values_to = "dist") %>% # dplyr::filter(brandId1 != brandId2) %>% # ID dplyr::mutate_if(is.character, as.integer)
  9. 9. brands.dist.df %>% # dplyr::inner_join(brands, by = c("brandId1" = "id")) %>% dplyr::inner_join(brands, by = c("brandId2" = "id")) %>% # dplyr::filter(name.x == " ") %>% # dplyr::arrange(dist) %>% dplyr::select(name.y, dist)
  10. 10. brands.dist.df %>% # dplyr::inner_join(brands, by = c("brandId1" = "id")) %>% dplyr::inner_join(brands, by = c("brandId2" = "id")) %>% # dplyr::filter(name.x == " ") %>% # dplyr::arrange(desc(dist)) %>% dplyr::select(name.y, dist)
  11. 11. n <- 6 # ( 6 ) brands.cluster <- brands.dist.mat %>% # hclust(method="ward.D2") %>% # cutree(n) %>% # tibble tibble::tibble( brandId = as.integer(names(.)), cluster = . )
  12. 12. radars <- brands.cluster %>% # dplyr::inner_join(charts, by = c("brandId" = "brandId")) %>% # dplyr::group_by(cluster) %>% dplyr::summarise_at(dplyr::vars(dplyr::starts_with("f")), mean) 1
  13. 13. radars <- radars %>% # dplyr::group_by(cluster) %>% tidyr::nest() %>% dplyr::mutate(fig = purrr::map2(data, cluster, function(x, y) { plotly::plot_ly( type = "scatterpolar", mode = "markers", r = c(x$f1, x$f2, x$f3, x$f4, x$f5, x$f6, x$f1), theta = c(" ", " ", " ", " ", " ", " ", " "), fill = 'toself', fillcolor = RColorBrewer::brewer.pal(n = n, name = "Accent")[y], opacity = 0.5 ) %>% plotly::layout(polar = list(angularaxis = list( rotation = 90, direction = 'counterclockwise'))) }))
  14. 14. radars$fig[[1]]
  15. 15. radars$fig[[4]]
  16. 16. radars$fig[[2]]
  17. 17. radars$fig[[6]]
  18. 18. radars$fig[[3]]
  19. 19. radars$fig[[5]]
  20. 20. brands.mds <- brands.dist.mat %>% # (MDS) cmdscale() %>% `colnames<-`(c("x", "y")) %>% tibble::as_tibble(rownames = NA) %>% tibble::rownames_to_column(var = "brandId") %>% dplyr::mutate(brandId = as.integer(brandId)) %>% # dplyr::inner_join( brands.cluster, by = c("brandId" = "brandId") ) %>% # dplyr::inner_join( brands[, -3], by = c("brandId" = "id") ) %>% dplyr::mutate(cluster = forcats::as_factor(cluster)) 1
  21. 21. ## rankings <- jsonlite::read_json("https://muro.sakenowa.com/sakenowa-data/api/rankings", simplifyVector = TRUE)
  22. 22. brands.mds %>% # dplyr::filter(brandId %in% rankings$overall$brandId) %>% # plotly::plot_ly(x = ~x, y = ~y) %>% plotly::add_markers( color = ~cluster, text = ~name, colors = RColorBrewer::brewer.pal(n = n, name = "Accent")) %>% plotly::add_text( text = ~name, textposition = "top center" ) %>% plotly::layout(showlegend = FALSE)
  23. 23. ## tags <- jsonlite::read_json("https://muro.sakenowa.com/sakenowa-data/api/flavor-tags", simplifyVector = TRUE) %>% purrr::pluck("tags")
  24. 24. ## brand_tags <- jsonlite::read_json( "https://muro.sakenowa.com/sakenowa-data/api/brand-flavor-tags", simplifyVector = TRUE ) %>% purrr::pluck("flavorTags")
  25. 25. # contingency.table <- brand_tags %>% tidyr::unnest(cols = tagIds) %>% # dplyr::inner_join( brands.cluster, by = c("brandId" = "brandId") ) %>% # dplyr::inner_join( tags, by = c("tagIds" = "id") ) %>% dplyr::group_by(tagIds, tag, cluster) %>% dplyr::count() 1
  26. 26. contingency.table <- contingency.table %>% # tidyr::pivot_wider( id_cols = tag, names_from = cluster, values_from = n, values_fill = 0) %>% # tibble::column_to_rownames(var = "tag")
  27. 27. # res.ca <- FactoMineR::CA(contingency.table, graph = FALSE) # tibble tags.biplot <- tibble::tibble( type = "tag", x = res.ca$row$coord[, 1], y = res.ca$row$coord[, 2], label = rownames(contingency.table)) %>% dplyr::bind_rows( tibble::tibble( type = "cluster", x = res.ca$col$coord[, 1], y = res.ca$col$coord[, 2], label = colnames(contingency.table)) )
  28. 28. tags.biplot %>% plotly::plot_ly(x =~x, y =~y) %>% plotly::add_markers(color = ~type, colors = RColorBrewer::brewer.pal(3, "Set1")[1:2]) %>% plotly::add_text(text = ~label, textposition = "top center") %>% plotly::layout(showlegend = FALSE) factoextra::fviz_ca_biplot( res.ca, font.family = "HiraKakuProN-W3" )
  29. 29. wordclouds <- brand_tags %>% tidyr::unnest(cols = tagIds) %>% dplyr::inner_join( brands.cluster, by = c("brandId" = "brandId") ) %>% dplyr::inner_join(tags, by = c("tagIds" = "id")) %>% # dplyr::group_by(cluster, tag) %>% dplyr::count() %>% # dplyr::group_by(cluster) %>% dplyr::arrange(cluster, desc(n)) %>% tidyr::nest() %>% dplyr::mutate(fig = purrr::map( data, wordcloud2::wordcloud2, size = 0.8, minSize = 10 ))
  30. 30. radars$fig[[1]] wordclouds$fig[[1]]
  31. 31. radars$fig[[4]] wordclouds$fig[[4]]
  32. 32. radars$fig[[2]] wordclouds$fig[[2]]
  33. 33. radars$fig[[6]] wordclouds$fig[[6]]
  34. 34. radars$fig[[3]] wordclouds$fig[[3]]
  35. 35. radars$fig[[5]] wordclouds$fig[[5]]

×