好みの日本酒を呑みたい！〜さけのわデータで探す自分好みの酒〜

> me
$name
[1] "Takashi Kitano"
$twitter
[1] "@kashitan"
$work_in
[1] " "

##
charts <-
jsonlite::read_json("https://muro.sakenowa.com/sakenowa-data/api/flavor-charts",
simplifyVector = TRUE) %>%
purrr::pluck("flavorCharts")
↑ ↑ ↑ ↑ ↑ ↑

##
brands <-
jsonlite::read_json("https://muro.sakenowa.com/sakenowa-data/api/brands",
purrr::pluck("brands")

##
brands.dist.mat <-
charts %>%
#
tibble::remove_rownames() %>%
# ID
tibble::column_to_rownames(var = "brandId") %>%
#
proxy::dist(method = "cosine")

brands.dist.mat %>%
as.matrix() %>%
.[1:8, 1:8]

## data.frame
brands.dist.df <-
brands.dist.mat %>%
as.matrix() %>%
# tibble
tibble::as_tibble(rownames = NA) %>%
#
tibble::rownames_to_column(var = "brandId1") %>%
#
tidyr::pivot_longer(
cols = -brandId1,
names_to = "brandId2",
values_to = "dist") %>%
#
dplyr::filter(brandId1 != brandId2) %>%
# ID
dplyr::mutate_if(is.character, as.integer)

brands.dist.df %>%
#
dplyr::inner_join(brands,
by = c("brandId1" = "id")) %>%
#
dplyr::filter(name.x == " ") %>%
#
dplyr::arrange(dist) %>%
dplyr::select(name.y, dist)

brands.dist.df %>%
#
#
dplyr::filter(name.x == " ") %>%
#
dplyr::arrange(desc(dist)) %>%
dplyr::select(name.y, dist)

n <- 6 # ( 6 )
brands.cluster <-
brands.dist.mat %>%
#
hclust(method="ward.D2") %>%
#
cutree(n) %>%
# tibble
tibble::tibble(
brandId = as.integer(names(.)),
cluster = .
)

radars <-
brands.cluster %>%
#
dplyr::inner_join(charts, by = c("brandId" = "brandId")) %>%
#
dplyr::group_by(cluster) %>%
dplyr::summarise_at(dplyr::vars(dplyr::starts_with("f")), mean)
1

radars <-
radars %>%
#
tidyr::nest() %>%
dplyr::mutate(fig = purrr::map2(data, cluster, function(x, y) {
plotly::plot_ly(
type = "scatterpolar", mode = "markers",
r = c(x$f1, x$f2, x$f3, x$f4, x$f5, x$f6, x$f1),
theta = c(" ", " ", " ", " ", " ", " ", " "),
fill = 'toself',
fillcolor = RColorBrewer::brewer.pal(n = n, name = "Accent")[y],
opacity = 0.5
) %>%
plotly::layout(polar = list(angularaxis = list(
rotation = 90,
direction = 'counterclockwise')))
}))

brands.mds <-
brands.dist.mat %>%
# (MDS)
cmdscale() %>%
`colnames<-`(c("x", "y")) %>%
tibble::as_tibble(rownames = NA) %>%
tibble::rownames_to_column(var = "brandId") %>%
dplyr::mutate(brandId = as.integer(brandId)) %>%
#
dplyr::inner_join(
brands.cluster,
by = c("brandId" = "brandId")
) %>%
#
dplyr::inner_join(
brands[, -3],
by = c("brandId" = "id")
) %>%
dplyr::mutate(cluster = forcats::as_factor(cluster))
1

##
rankings <-
jsonlite::read_json("https://muro.sakenowa.com/sakenowa-data/api/rankings",
simplifyVector = TRUE)

brands.mds %>%
#
dplyr::filter(brandId %in% rankings$overall$brandId) %>%
#
plotly::plot_ly(x = ~x, y = ~y) %>%
plotly::add_markers(
color = ~cluster,
text = ~name,
colors = RColorBrewer::brewer.pal(n = n, name = "Accent")) %>%
plotly::add_text(
text = ~name,
textposition = "top center"
) %>%
plotly::layout(showlegend = FALSE)

##
tags <-
jsonlite::read_json("https://muro.sakenowa.com/sakenowa-data/api/flavor-tags",
purrr::pluck("tags")

##
brand_tags <-
jsonlite::read_json(
"https://muro.sakenowa.com/sakenowa-data/api/brand-flavor-tags",
simplifyVector = TRUE
) %>%
purrr::pluck("flavorTags")

#
contingency.table <-
brand_tags %>%
tidyr::unnest(cols = tagIds) %>%
#
dplyr::inner_join(
brands.cluster,
) %>%
#
dplyr::inner_join(
tags,
by = c("tagIds" = "id")
) %>%
dplyr::group_by(tagIds, tag, cluster) %>%
dplyr::count()
1

contingency.table <-
contingency.table %>%
#
tidyr::pivot_wider(
id_cols = tag,
names_from = cluster,
values_from = n,
values_fill = 0) %>%
#
tibble::column_to_rownames(var = "tag")

#
res.ca <-
FactoMineR::CA(contingency.table, graph = FALSE)
# tibble
tags.biplot <-
tibble::tibble(
type = "tag",
x = res.ca$row$coord[, 1],
y = res.ca$row$coord[, 2],
label = rownames(contingency.table)) %>%
dplyr::bind_rows(
tibble::tibble(
type = "cluster",
x = res.ca$col$coord[, 1],
y = res.ca$col$coord[, 2],
label = colnames(contingency.table))
)

tags.biplot %>%
plotly::plot_ly(x =~x, y =~y) %>%
plotly::add_markers(color = ~type,
colors = RColorBrewer::brewer.pal(3, "Set1")[1:2]) %>%
plotly::add_text(text = ~label, textposition = "top center") %>%
plotly::layout(showlegend = FALSE)
factoextra::fviz_ca_biplot(
res.ca,
font.family = "HiraKakuProN-W3"
)

wordclouds <-
brand_tags %>%
tidyr::unnest(cols = tagIds) %>%
dplyr::inner_join(
brands.cluster,
) %>%
dplyr::inner_join(tags, by = c("tagIds" = "id")) %>%
#
dplyr::group_by(cluster, tag) %>%
dplyr::count() %>%
#
dplyr::arrange(cluster, desc(n)) %>%
tidyr::nest() %>%
dplyr::mutate(fig = purrr::map(
data,
wordcloud2::wordcloud2, size = 0.8, minSize = 10
))

radars$fig[[1]] wordclouds$fig[[1]]

好みの日本酒を呑みたい！〜さけのわデータで探す自分好みの酒〜

好みの日本酒を呑みたい！ 〜さけのわデータで探す自分好みの酒〜

More Related Content

What's hot

More from Takashi Kitano

Recently uploaded