Something along the lines of this will help with the interpretation of success criteria, though it can probably become much clearer with some tweaking.
Messy draft code
library(tidyverse)
library(FReD)
df <- load_fred_data() %>%
filter(!is.na(es_replication) & !is.na(es_original)) %>%
group_by(id) %>%
mutate(id_unique = paste0(id, "_", row_number())) %>%
ungroup()
criteria <- tribble(
~criterion, ~name,
"significance_r", "Significance\n(replication)",
"significance_agg", "Significance\n(aggregated)",
"consistency_ci", "Consistency\n(confidence interval)",
"consistency_pi", "Consistency\n(prediction interval)",
"homogeneity", "Homogeneity",
"homogeneity_significance", "Homogeneity +\nSignificance",
"small_telescopes", "Small Telescopes"
)
all_results <- lapply(criteria$criterion, function(criterion) {
df %>%
mutate(assess_replication_outcome(es_original, n_original, es_replication, n_replication, criterion = criterion)) %>%
mutate(criterion = criterion)
})
outcomes <- all_results %>%
bind_rows()
out <- outcomes %>%
select(id_unique, criterion, outcome) %>%
pivot_wider(values_from = outcome, names_from = criterion)
long <- out %>%
select(-small_telescopes) %>%
pivot_longer(
cols = -id_unique,
names_to = "criterion",
values_to = "outcome"
)
compute pairwise agreement / disagreement rates
pair_stats <- long %>%
rename(outcome_x = outcome, crit_x = criterion) %>%
inner_join(
long %>% rename(outcome_y = outcome, crit_y = criterion),
by = "id_unique"
) %>%
filter(crit_x != crit_y) %>%
group_by(crit_x, crit_y) %>%
summarize(
agree = mean(outcome_x == outcome_y, na.rm = TRUE),
A_succ_B_fail = mean(outcome_x == "success" & outcome_y == "failure", na.rm = TRUE),
B_succ_A_fail = mean(outcome_x == "failure" & outcome_y == "success", na.rm = TRUE),
.groups = "drop"
)
criteria_plot <- sort(unique(c(pair_stats$crit_x, pair_stats$crit_y)))
plot_df <- pair_stats %>%
mutate(
x = as.numeric(factor(crit_y, levels = criteria_plot)),
y = as.numeric(factor(crit_x, levels = rev(criteria_plot)))
)
upper‐left triangle (predicted‐success/actual‐fail)
tri_ul <- plot_df %>%
rowwise() %>%
mutate(poly = list(tibble(
px = c(x - .5, x + .5, x - .5),
py = c(y + .5, y + .5, y - .5),
val = A_succ_B_fail
))) %>%
unnest(poly)
bottom‐right triangle (predicted‐fail/actual‐success)
tri_br <- plot_df %>%
rowwise() %>%
mutate(poly = list(tibble(
px = c(x + .5, x - .5, x + .5),
py = c(y - .5, y - .5, y + .5),
val = B_succ_A_fail
))) %>%
unnest(poly)
plot_df2 <- plot_df %>% filter(crit_x > crit_y)
tri_ul2 <- tri_ul %>% filter(crit_x > crit_y)
tri_br2 <- tri_br %>% filter(crit_x > crit_y)
library(ggnewscale)
p <- ggplot() +
1) Draw the two bias-triangles FIRST with the "green" scale
geom_polygon(data = tri_ul2,
aes(px, py, group = paste(crit_x, crit_y), fill = A_succ_B_fail),
color = "white") +
geom_polygon(data = tri_br2,
aes(px, py, group = paste(crit_x, crit_y), fill = B_succ_A_fail),
color = "white") +
scale_fill_gradient(low = "white", high = "green", name = "Bias") +
2) Switch to a new fill scale…
new_scale_fill() +
3) …then draw the agreement squares on top
geom_rect(data = plot_df2,
aes(xmin = x - .5, xmax = x,
ymin = y - .5, ymax = y,
fill = agree),
color = "white") +
scale_fill_gradient(low = "grey90", high = "grey10", name = "Agreement") +
4) Finally, add the three text labels per cell
geom_text(data = plot_df2,
aes(x - .25, y + .25,
label = scales::percent(A_succ_B_fail, accuracy = 1)),
size = 2) +
geom_text(data = plot_df2,
aes(x - .25, y - .25,
label = scales::percent(agree, accuracy = 1)),
size = 2) +
geom_text(data = plot_df2,
aes(x + .25, y - .25,
label = scales::percent(B_succ_A_fail, accuracy = 1)),
size = 2) +
5) Axes, fixed aspect, clean theme
coord_fixed() +
scale_x_continuous(breaks = seq_along(criteria_plot), labels = (criteria$name %>% set_names(criteria$criterion))[criteria_plot], expand = c(0, 0)) +
scale_y_continuous(breaks = seq_along(criteria_plot), labels = rev((criteria$name %>% set_names(criteria$criterion))[criteria_plot]), expand = c(0, 0)) +
theme_minimal(base_size = 10) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid = element_blank()
) +
labs(x = "Success Criterion 1", y = "Success Criterion 2", title = "Comparison of success criteria",
caption = "Note: Squares show overlap, vertical arrows show\nwhen row is more liberal, horizontal arrows when column is more liberal.") +
theme(legend.position = "none") +
# draw full-cell borders
geom_tile(
data = plot_df2,
aes(x = x, y = y),
fill = NA,
color = "black",
linewidth = 0.3
)
p
Something along the lines of this will help with the interpretation of success criteria, though it can probably become much clearer with some tweaking.
Messy draft code
library(tidyverse)
library(FReD)
df <- load_fred_data() %>%
filter(!is.na(es_replication) & !is.na(es_original)) %>%
group_by(id) %>%
mutate(id_unique = paste0(id, "_", row_number())) %>%
ungroup()
criteria <- tribble(
~criterion, ~name,
"significance_r", "Significance\n(replication)",
"significance_agg", "Significance\n(aggregated)",
"consistency_ci", "Consistency\n(confidence interval)",
"consistency_pi", "Consistency\n(prediction interval)",
"homogeneity", "Homogeneity",
"homogeneity_significance", "Homogeneity +\nSignificance",
"small_telescopes", "Small Telescopes"
)
all_results <- lapply(criteria$criterion, function(criterion) {
df %>%
mutate(assess_replication_outcome(es_original, n_original, es_replication, n_replication, criterion = criterion)) %>%
mutate(criterion = criterion)
})
outcomes <- all_results %>%
bind_rows()
out <- outcomes %>%
select(id_unique, criterion, outcome) %>%
pivot_wider(values_from = outcome, names_from = criterion)
long <- out %>%
select(-small_telescopes) %>%
pivot_longer(
cols = -id_unique,
names_to = "criterion",
values_to = "outcome"
)
compute pairwise agreement / disagreement rates
pair_stats <- long %>%
rename(outcome_x = outcome, crit_x = criterion) %>%
inner_join(
long %>% rename(outcome_y = outcome, crit_y = criterion),
by = "id_unique"
) %>%
filter(crit_x != crit_y) %>%
group_by(crit_x, crit_y) %>%
summarize(
agree = mean(outcome_x == outcome_y, na.rm = TRUE),
A_succ_B_fail = mean(outcome_x == "success" & outcome_y == "failure", na.rm = TRUE),
B_succ_A_fail = mean(outcome_x == "failure" & outcome_y == "success", na.rm = TRUE),
.groups = "drop"
)
criteria_plot <- sort(unique(c(pair_stats$crit_x, pair_stats$crit_y)))
plot_df <- pair_stats %>%
mutate(
x = as.numeric(factor(crit_y, levels = criteria_plot)),
y = as.numeric(factor(crit_x, levels = rev(criteria_plot)))
)
upper‐left triangle (predicted‐success/actual‐fail)
tri_ul <- plot_df %>%
rowwise() %>%
mutate(poly = list(tibble(
px = c(x - .5, x + .5, x - .5),
py = c(y + .5, y + .5, y - .5),
val = A_succ_B_fail
))) %>%
unnest(poly)
bottom‐right triangle (predicted‐fail/actual‐success)
tri_br <- plot_df %>%
rowwise() %>%
mutate(poly = list(tibble(
px = c(x + .5, x - .5, x + .5),
py = c(y - .5, y - .5, y + .5),
val = B_succ_A_fail
))) %>%
unnest(poly)
plot_df2 <- plot_df %>% filter(crit_x > crit_y)
tri_ul2 <- tri_ul %>% filter(crit_x > crit_y)
tri_br2 <- tri_br %>% filter(crit_x > crit_y)
library(ggnewscale)
p <- ggplot() +
1) Draw the two bias-triangles FIRST with the "green" scale
geom_polygon(data = tri_ul2,
aes(px, py, group = paste(crit_x, crit_y), fill = A_succ_B_fail),
color = "white") +
geom_polygon(data = tri_br2,
aes(px, py, group = paste(crit_x, crit_y), fill = B_succ_A_fail),
color = "white") +
scale_fill_gradient(low = "white", high = "green", name = "Bias") +
2) Switch to a new fill scale…
new_scale_fill() +
3) …then draw the agreement squares on top
geom_rect(data = plot_df2,
aes(xmin = x - .5, xmax = x,
ymin = y - .5, ymax = y,
fill = agree),
color = "white") +
scale_fill_gradient(low = "grey90", high = "grey10", name = "Agreement") +
4) Finally, add the three text labels per cell
geom_text(data = plot_df2,
aes(x - .25, y + .25,
label = scales::percent(A_succ_B_fail, accuracy = 1)),
size = 2) +
geom_text(data = plot_df2,
aes(x - .25, y - .25,
label = scales::percent(agree, accuracy = 1)),
size = 2) +
geom_text(data = plot_df2,
aes(x + .25, y - .25,
label = scales::percent(B_succ_A_fail, accuracy = 1)),
size = 2) +
5) Axes, fixed aspect, clean theme
coord_fixed() +
scale_x_continuous(breaks = seq_along(criteria_plot), labels = (criteria$name %>% set_names(criteria$criterion))[criteria_plot], expand = c(0, 0)) +
scale_y_continuous(breaks = seq_along(criteria_plot), labels = rev((criteria$name %>% set_names(criteria$criterion))[criteria_plot]), expand = c(0, 0)) +
theme_minimal(base_size = 10) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid = element_blank()
) +
labs(x = "Success Criterion 1", y = "Success Criterion 2", title = "Comparison of success criteria",
caption = "Note: Squares show overlap, vertical arrows show\nwhen row is more liberal, horizontal arrows when column is more liberal.") +
theme(legend.position = "none") +
# draw full-cell borders
geom_tile(
data = plot_df2,
aes(x = x, y = y),
fill = NA,
color = "black",
linewidth = 0.3
)
p