gt_df |>
pivot_longer(cols = COVID:M1,
names_to = "Keyword",
values_to = "Pop") |>
mutate(Keyword = if_else(Keyword == "M1", "「M-1グランプリ」",
"「新型コロナウイルス感染症」")) |>
ggplot() +
geom_line(aes(x = Date, y = Pop, color = Keyword),
linewidth = 1) +
labs(x = "日", y = "人気度 (Google Trend)", color = "キーワード") +
theme_bw() +
theme(legend.position = "bottom")
第13回講義資料
まとめ
スライド
セットアップ
order_score <- df |>
group_by(Order10) |>
summarise(Final = mean(Final) * 100,
N = n())
order_score2 <- df |>
drop_na(Order3) |>
select(No, Year, Order3, Score3) |>
group_by(Year) |>
mutate(Champ = if_else(Score3 == max(Score3), 1, 0)) |>
group_by(Order3) |>
summarise(Champ = sum(Champ)) |>
ungroup() |>
mutate(Prop = Champ / sum(Champ) * 100)
order_score |>
ggplot() +
geom_bar(aes(x = Order10, y = Final), stat = "identity") +
geom_label(aes(x = Order10, y = Final,
label = sprintf("%.0f%%", Final)),
family = "HiraginoSans-W3") +
scale_x_continuous(breaks = 1:10, labels = paste0(1:10, "番目")) +
labs(x = "出場順番", y = "ファイナル・ステージへ進出した\nコンビの割合 (%)") +
theme_bw(base_size = 12) +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
df |>
select(ファイナルステージへの進出有無 = Final,
出場順番 = Order10,
コンビ結成からの経過年数 = Since,
決勝舞台の回数 = No_Finals,
敗者復活ダミー = Zombie) |>
descr(stats = c("mean", "sd", "min", "max", "n.valid"),
transpose = TRUE, order = "p") |>
gt(rownames_to_stub = TRUE) |>
fmt_number(columns = Mean:Std.Dev, decimals = 3) |>
cols_label("Mean" = "平均値", "Std.Dev" = "標準偏差",
"Min" = "最小値", "Max" = "最大値", "N.Valid" = "有効ケース数")
平均値 | 標準偏差 | 最小値 | 最大値 | 有効ケース数 | |
---|---|---|---|---|---|
ファイナルステージへの進出有無 | 0.313 | 0.465 | 0 | 1 | 179 |
出場順番 | 5.223 | 2.735 | 1 | 10 | 179 |
コンビ結成からの経過年数 | 7.983 | 3.399 | 1 | 15 | 179 |
決勝舞台の回数 | 0.888 | 1.323 | 0 | 8 | 179 |
敗者復活ダミー | 0.101 | 0.302 | 0 | 1 | 179 |
\[ \begin{aligned} \text{Pr(ファイナル進出)} = & \text{logistic}(y^*) = \frac{1}{1 + e^{-y^*}} \\ y^* = & \alpha + \beta_1 \text{出場順番} + \beta_2 \text{結成からの経過年数} + \beta_3 \text{決勝進出回数} + \\ & \beta_4 \text{敗者復活ダミー} + \beta_5 \text{出場順番} \cdot \text{結成からの経過年数}. \end{aligned} \]
fit <- glm(Final ~ Order10 * Since + Since + No_Finals + Zombie,
data = df, family = binomial("logit"))
fit_pred1 <- predictions(fit, newdata = datagrid(Order10 = 1:10,
Since = mean(df$Since)))
fit_ame1 <- slopes(fit, variables = "Order10", newdata = datagrid(Since = mean(df$Since)))
fit_ame2 <- slopes(fit, variables = "Order10", newdata = datagrid(Since = 8))
modelsummary(list("係数(p値)" = fit),
estimate = "{estimate} ({p.value})",
statistic = NULL,
gof_map = c("nobs", "aic"),
coef_rename = c("(Intercept)" = "切片",
"Order10" = "出場順番",
"Since" = "結成からの経過年数",
"No_Finals" = "決勝進出回数",
"Zombie" = "敗者復活ダミー",
"Order10:Since" = "出場順番 X 結成からの経過年数"),
output = "gt")
係数(p値) | |
---|---|
切片 | -2.330 (0.032) |
出場順番 | 0.143 (0.432) |
結成からの経過年数 | 0.027 (0.826) |
決勝進出回数 | 0.299 (0.024) |
敗者復活ダミー | 0.187 (0.728) |
出場順番 X 結成からの経過年数 | 0.005 (0.797) |
Num.Obs. | 179 |
AIC | 217.3 |
predictions(fit, newdata = datagrid(Order10 = 1:10,
Since = mean(df$Since))) |>
ggplot() +
geom_line(aes(x = Order10, y = estimate)) +
geom_pointrange(aes(x = Order10, y = estimate,
ymin = conf.low, ymax = conf.high)) +
scale_x_continuous(breaks = 1:10,
labels = paste0(1:10, "番目")) +
labs(x = "出場順番",
y = "ファイナル・ステージに進出する確率") +
theme_bw(base_size = 12)
slopes(fit, var = "Order10", newdata = datagrid(Since = 1:15)) |>
ggplot() +
geom_hline(yintercept = 0, color = "red") +
geom_line(aes(x = Since, y = estimate)) +
geom_pointrange(aes(x = Since, y = estimate, ymin = conf.low, ymax = conf.high)) +
scale_x_continuous(breaks = 1:15, labels = 1:15) +
labs(x = "コンビ結成からの経過年数 (年)",
y = "出場順番がファイナル・ステージ進出に\n与える影響") +
theme_bw(base_size = 12)
predictions(fit, newdata = datagrid(Order10 = 1:10,
Since = c(1, 8, 15))) |>
mutate(Since = case_when(Since == 1 ~ "コンビ結成から1年経過",
Since == 8 ~ "コンビ結成から8年経過",
Since == 15 ~ "コンビ結成から15年経過"),
Since = fct_inorder(Since)) |>
ggplot() +
geom_line(aes(x = Order10, y = estimate)) +
geom_pointrange(aes(x = Order10, y = estimate,
ymin = conf.low, ymax = conf.high)) +
scale_x_continuous(breaks = 1:10, labels = 1:10) +
labs(x = "出場順番",
y = "ファイナル・ステージに進出する確率") +
facet_wrap(~Since, nrow = 1) +
theme_bw(base_size = 12)