From d02ce9a0b07a49a12592ee297a5607d9fe4224bd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Feb 2026 23:34:45 +0000 Subject: [PATCH 1/2] Initial plan From 220e3373dac33a4ff7e113d6a39b73bb14c176bc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Feb 2026 23:40:53 +0000 Subject: [PATCH 2/2] Fix multiple bugs in plot_template_data.py Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- _doc/examples/ml/plot_template_data.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/_doc/examples/ml/plot_template_data.py b/_doc/examples/ml/plot_template_data.py index 47f328a..7052ec8 100644 --- a/_doc/examples/ml/plot_template_data.py +++ b/_doc/examples/ml/plot_template_data.py @@ -53,11 +53,9 @@ def select_variables_and_clean(df): assert set(keys) & set(columns) == set( keys ), f"Missing columns {set(keys) - set(keys) & set(columns)} in {sorted(df.columns)}" - groups = df[[*keys, cible]].groupby(keys).count() - filtered = groups[groups[cible] > 1].reset_index(drop=False) - - mask = filtered.duplicated(subset=keys, keep=False) - return filtered[~mask][[*keys, cible]], cible + subset = df[[*keys, cible]] + mask = subset.duplicated(subset=keys, keep=False) + return subset[~mask].reset_index(drop=True), cible def compute_oracle(table, cible): @@ -72,13 +70,13 @@ def compute_oracle(table, cible): columns="Session", values=cible, ) - # .dropna(axis=0) # fails + .dropna(axis=0) .sort_index() ) return mean_absolute_error(piv[2025], piv[2024]) -def split_train_test(table, cuble): +def split_train_test(table, cible): X, y = table.drop(cible, axis=1), table[cible] train_test = X["Session"] < 2025 @@ -87,13 +85,13 @@ def split_train_test(table, cuble): train_X = X[train_test].drop(drop, axis=1) train_y = y[train_test] - test_X = X[train_test].drop(drop, axis=1) - test_y = y[train_test] + test_X = X[~train_test].drop(drop, axis=1) + test_y = y[~train_test] return train_X, test_X, train_y, test_y def make_pipeline(table, cible): - vars = [c for c in table.columns if c != "cible"] + vars = [c for c in table.columns if c != cible] num_cols = ["Capacité de l’établissement par formation"] cat_cols = [c for c in vars if c not in num_cols]