diff --git a/agentlightning/verl/trainer.py b/agentlightning/verl/trainer.py index 413a0a1cf..12cf2bdaf 100644 --- a/agentlightning/verl/trainer.py +++ b/agentlightning/verl/trainer.py @@ -414,13 +414,13 @@ def _train_step(self, batch_dict: dict) -> dict: rollout_data_dir = self.config.trainer.get("rollout_data_dir", None) if rollout_data_dir: with _timer("dump_rollout_generations", timing_raw): - print(batch.batch.keys()) inputs = self.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True) outputs = self.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True) scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist() self._dump_generations( inputs=inputs, outputs=outputs, + gts=None, scores=scores, reward_extra_infos_dict=reward_extra_infos_dict, dump_path=rollout_data_dir, diff --git a/contrib/agentlightning/contrib/algorithm/env_verl/trainer.py b/contrib/agentlightning/contrib/algorithm/env_verl/trainer.py index b4574bd6f..c45e130be 100644 --- a/contrib/agentlightning/contrib/algorithm/env_verl/trainer.py +++ b/contrib/agentlightning/contrib/algorithm/env_verl/trainer.py @@ -514,13 +514,13 @@ def _train_step(self, batch_dict: dict) -> dict: rollout_data_dir = self.config.trainer.get("rollout_data_dir", None) if rollout_data_dir: with _timer("dump_rollout_generations", timing_raw): - print(batch.batch.keys()) inputs = self.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True) outputs = self.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True) scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist() self._dump_generations( inputs=inputs, outputs=outputs, + gts=None, scores=scores, reward_extra_infos_dict=reward_extra_infos_dict, dump_path=rollout_data_dir,