From d02ce9a0b07a49a12592ee297a5607d9fe4224bd Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 26 Feb 2026 23:34:45 +0000
Subject: [PATCH 1/2] Initial plan


From 220e3373dac33a4ff7e113d6a39b73bb14c176bc Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 26 Feb 2026 23:40:53 +0000
Subject: [PATCH 2/2] Fix multiple bugs in plot_template_data.py

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 _doc/examples/ml/plot_template_data.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/_doc/examples/ml/plot_template_data.py b/_doc/examples/ml/plot_template_data.py
index 47f328a..7052ec8 100644
--- a/_doc/examples/ml/plot_template_data.py
+++ b/_doc/examples/ml/plot_template_data.py
@@ -53,11 +53,9 @@ def select_variables_and_clean(df):
     assert set(keys) & set(columns) == set(
         keys
     ), f"Missing columns {set(keys) - set(keys) & set(columns)} in {sorted(df.columns)}"
-    groups = df[[*keys, cible]].groupby(keys).count()
-    filtered = groups[groups[cible] > 1].reset_index(drop=False)
-
-    mask = filtered.duplicated(subset=keys, keep=False)
-    return filtered[~mask][[*keys, cible]], cible
+    subset = df[[*keys, cible]]
+    mask = subset.duplicated(subset=keys, keep=False)
+    return subset[~mask].reset_index(drop=True), cible
 
 
 def compute_oracle(table, cible):
@@ -72,13 +70,13 @@ def compute_oracle(table, cible):
             columns="Session",
             values=cible,
         )
-        # .dropna(axis=0)  # fails
+        .dropna(axis=0)
         .sort_index()
     )
     return mean_absolute_error(piv[2025], piv[2024])
 
 
-def split_train_test(table, cuble):
+def split_train_test(table, cible):
     X, y = table.drop(cible, axis=1), table[cible]
 
     train_test = X["Session"] < 2025
@@ -87,13 +85,13 @@ def split_train_test(table, cuble):
 
     train_X = X[train_test].drop(drop, axis=1)
     train_y = y[train_test]
-    test_X = X[train_test].drop(drop, axis=1)
-    test_y = y[train_test]
+    test_X = X[~train_test].drop(drop, axis=1)
+    test_y = y[~train_test]
     return train_X, test_X, train_y, test_y
 
 
 def make_pipeline(table, cible):
-    vars = [c for c in table.columns if c != "cible"]
+    vars = [c for c in table.columns if c != cible]
     num_cols = ["Capacité de l’établissement par formation"]
     cat_cols = [c for c in vars if c not in num_cols]