From 20cc0fd40faea5d21347c9cb610c756146fcdd80 Mon Sep 17 00:00:00 2001 From: TechTide AI Date: Thu, 21 May 2026 18:02:38 -0400 Subject: [PATCH] fix: pass missing `gts` argument to `_dump_generations` call The `RayPPOTrainer._dump_generations()` method requires a `gts` (ground truths) positional argument, but both `AgentLightningTrainer._train_step` and `EnvAgentLightningTrainer._train_step` omit it, causing a TypeError at runtime when `rollout_data_dir` is configured. Pass `gts=None` since ground truth is not available in agent mode training. Also remove a leftover `print(batch.batch.keys())` debug statement from both call sites. Fixes #492 --- agentlightning/verl/trainer.py | 2 +- contrib/agentlightning/contrib/algorithm/env_verl/trainer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/agentlightning/verl/trainer.py b/agentlightning/verl/trainer.py index 413a0a1cf..12cf2bdaf 100644 --- a/agentlightning/verl/trainer.py +++ b/agentlightning/verl/trainer.py @@ -414,13 +414,13 @@ def _train_step(self, batch_dict: dict) -> dict: rollout_data_dir = self.config.trainer.get("rollout_data_dir", None) if rollout_data_dir: with _timer("dump_rollout_generations", timing_raw): - print(batch.batch.keys()) inputs = self.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True) outputs = self.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True) scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist() self._dump_generations( inputs=inputs, outputs=outputs, + gts=None, scores=scores, reward_extra_infos_dict=reward_extra_infos_dict, dump_path=rollout_data_dir, diff --git a/contrib/agentlightning/contrib/algorithm/env_verl/trainer.py b/contrib/agentlightning/contrib/algorithm/env_verl/trainer.py index b4574bd6f..c45e130be 100644 --- a/contrib/agentlightning/contrib/algorithm/env_verl/trainer.py +++ b/contrib/agentlightning/contrib/algorithm/env_verl/trainer.py @@ -514,13 +514,13 @@ def _train_step(self, batch_dict: dict) -> dict: rollout_data_dir = self.config.trainer.get("rollout_data_dir", None) if rollout_data_dir: with _timer("dump_rollout_generations", timing_raw): - print(batch.batch.keys()) inputs = self.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True) outputs = self.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True) scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist() self._dump_generations( inputs=inputs, outputs=outputs, + gts=None, scores=scores, reward_extra_infos_dict=reward_extra_infos_dict, dump_path=rollout_data_dir,