Add confusion matrix to success criteria vignette

Something along the lines of this will help with the interpretation of success criteria, though it can probably become much clearer with some tweaking.

<img width="414" alt="Image" src="https://github.com/user-attachments/assets/c4b6c3ab-51e3-4cbb-b2fa-343cd5ebcafc" />

<details> <summary>Messy draft code</summary>

library(tidyverse)
library(FReD)

df <- load_fred_data() %>% 
  filter(!is.na(es_replication) & !is.na(es_original)) %>% 
  group_by(id) %>%
  mutate(id_unique = paste0(id, "_", row_number())) %>% 
  ungroup()

criteria <- tribble(
  ~criterion,                  ~name,                             
   "significance_r",            "Significance\n(replication)",               
   "significance_agg",          "Significance\n(aggregated)",             
   "consistency_ci",            "Consistency\n(confidence interval)", 
   "consistency_pi",            "Consistency\n(prediction interval)",   
   "homogeneity",               "Homogeneity",                    
   "homogeneity_significance",  "Homogeneity +\nSignificance",
   "small_telescopes",          "Small Telescopes"
)


all_results <- lapply(criteria$criterion, function(criterion) {
  df %>% 
    mutate(assess_replication_outcome(es_original, n_original, es_replication, n_replication, criterion = criterion)) %>% 
    mutate(criterion = criterion)
})

outcomes <- all_results %>%
  bind_rows() 

out <- outcomes %>% 
  select(id_unique, criterion, outcome) %>% 
  pivot_wider(values_from = outcome, names_from = criterion)



long <- out %>% 
  select(-small_telescopes) %>% 
  pivot_longer(
    cols = -id_unique, 
    names_to  = "criterion", 
    values_to = "outcome"
  )

# compute pairwise agreement / disagreement rates
pair_stats <- long %>% 
  rename(outcome_x = outcome, crit_x = criterion) %>% 
  inner_join(
    long %>% rename(outcome_y = outcome, crit_y = criterion),
    by = "id_unique"
  ) %>% 
  filter(crit_x != crit_y) %>% 
  group_by(crit_x, crit_y) %>% 
  summarize(
    agree         = mean(outcome_x == outcome_y,                  na.rm = TRUE),
    A_succ_B_fail = mean(outcome_x == "success" & outcome_y == "failure", na.rm = TRUE),
    B_succ_A_fail = mean(outcome_x == "failure" & outcome_y == "success", na.rm = TRUE),
    .groups = "drop"
  )


criteria_plot <- sort(unique(c(pair_stats$crit_x, pair_stats$crit_y)))

plot_df <- pair_stats %>% 
  mutate(
    x = as.numeric(factor(crit_y, levels = criteria_plot)),
    y = as.numeric(factor(crit_x, levels = rev(criteria_plot)))
  )

# upper‐left triangle (predicted‐success/actual‐fail)
tri_ul <- plot_df %>%
  rowwise() %>%
  mutate(poly = list(tibble(
    px  = c(x - .5, x + .5, x - .5),
    py  = c(y + .5, y + .5, y - .5),
    val = A_succ_B_fail
  ))) %>%
  unnest(poly)

# bottom‐right triangle (predicted‐fail/actual‐success)
tri_br <- plot_df %>%
  rowwise() %>%
  mutate(poly = list(tibble(
    px  = c(x + .5, x - .5, x + .5),
    py  = c(y - .5, y - .5, y + .5),
    val = B_succ_A_fail
  ))) %>%
  unnest(poly)

plot_df2 <- plot_df %>% filter(crit_x > crit_y)
tri_ul2   <- tri_ul   %>% filter(crit_x > crit_y)
tri_br2   <- tri_br   %>% filter(crit_x > crit_y)

library(ggnewscale)
p <- ggplot() +
  
  # 1) Draw the two bias-triangles FIRST with the "green" scale
  geom_polygon(data = tri_ul2,
               aes(px, py, group = paste(crit_x, crit_y), fill = A_succ_B_fail),
               color = "white") +
  geom_polygon(data = tri_br2,
               aes(px, py, group = paste(crit_x, crit_y), fill = B_succ_A_fail),
               color = "white") +
  scale_fill_gradient(low = "white", high = "green", name = "Bias") +
  
  # 2) Switch to a new fill scale…
  new_scale_fill() +
  
  # 3) …then draw the agreement squares on top
  geom_rect(data = plot_df2,
            aes(xmin = x - .5, xmax = x,
                ymin = y - .5, ymax = y,
                fill = agree),
            color = "white") +
  scale_fill_gradient(low = "grey90", high = "grey10", name = "Agreement") +
  
  # 4) Finally, add the three text labels per cell
  geom_text(data = plot_df2,
            aes(x - .25, y + .25,
                label = scales::percent(A_succ_B_fail, accuracy = 1)),
            size = 2) +
  geom_text(data = plot_df2,
            aes(x - .25, y - .25,
                label = scales::percent(agree, accuracy = 1)),
            size = 2) +
  geom_text(data = plot_df2,
            aes(x + .25, y - .25,
                label = scales::percent(B_succ_A_fail, accuracy = 1)),
            size = 2) +
  
  # 5) Axes, fixed aspect, clean theme
  coord_fixed() +
  scale_x_continuous(breaks = seq_along(criteria_plot), labels = (criteria$name %>% set_names(criteria$criterion))[criteria_plot], expand = c(0, 0)) +
  scale_y_continuous(breaks = seq_along(criteria_plot), labels = rev((criteria$name %>% set_names(criteria$criterion))[criteria_plot]), expand = c(0, 0)) +
  theme_minimal(base_size = 10) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid  = element_blank()
  ) +
  labs(x = "Success Criterion 1", y = "Success Criterion 2", title = "Comparison of success criteria",
       caption = "Note: Squares show overlap, vertical arrows show\nwhen row is more liberal, horizontal arrows when column is more liberal.") +
  theme(legend.position = "none") +
    # draw full-cell borders
  geom_tile(
    data = plot_df2,
    aes(x = x, y = y),
    fill = NA,
    color = "black",
    linewidth  = 0.3
  )

p


</details>

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add confusion matrix to success criteria vignette #81

compute pairwise agreement / disagreement rates

upper‐left triangle (predicted‐success/actual‐fail)

bottom‐right triangle (predicted‐fail/actual‐success)

1) Draw the two bias-triangles FIRST with the "green" scale

2) Switch to a new fill scale…

3) …then draw the agreement squares on top

4) Finally, add the three text labels per cell

5) Axes, fixed aspect, clean theme

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Add confusion matrix to success criteria vignette #81

Description

compute pairwise agreement / disagreement rates

upper‐left triangle (predicted‐success/actual‐fail)

bottom‐right triangle (predicted‐fail/actual‐success)

1) Draw the two bias-triangles FIRST with the "green" scale

2) Switch to a new fill scale…

3) …then draw the agreement squares on top

4) Finally, add the three text labels per cell

5) Axes, fixed aspect, clean theme

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions