From 3f20131bbb94ce87f1d283127c6d94071b8bbfe2 Mon Sep 17 00:00:00 2001 From: ahao-anyscale Date: Tue, 28 Apr 2026 01:21:01 +0000 Subject: [PATCH 01/21] x Signed-off-by: ahao-anyscale --- .../skyrl_train/skyrl_train_backend.py | 12 +- .../remote_inference_client.py | 138 +++++-- .../skyrl_train/inference_servers/utils.py | 31 +- .../skyrl_train/workers/fsdp/fsdp_worker.py | 7 +- .../workers/megatron/megatron_worker.py | 8 +- skyrl/backends/skyrl_train_backend.py | 26 +- skyrl/benchmarks/load_test_concurrency.py | 2 +- skyrl/train/config/config.py | 10 + skyrl/train/config/ppo_base_config.yaml | 7 + skyrl/train/entrypoints/main_base.py | 23 +- skyrl/train/generators/skyrl_gym_generator.py | 9 +- skyrl/train/generators/skyrl_vlm_generator.py | 5 +- .../test_multi_lora_serving.py | 172 ++++++++ .../test_new_inference_generation.py | 4 +- ...t_remote_inference_client_chat_template.py | 2 +- .../test_vlm_inference_generation.py | 3 +- .../gpu/gpu_ci/test_engine_generation.py | 8 +- .../skyrl_train/gpu/gpu_ci/test_lora.py | 5 + .../test_pause_and_continue_generation.py | 2 +- .../gpu/gpu_ci/test_router_replay.py | 1 + .../gpu/gpu_ci/test_skyrl_gym_generator.py | 1 + .../gpu_ci/test_skyrl_vlm_gym_generator.py | 1 + tests/backends/skyrl_train/gpu/utils.py | 22 +- .../test_remote_inference_client.py | 382 +++++++++++++++++- .../generators/test_skyrl_gym_generator.py | 16 +- ...est_skyrl_gym_generator_chat_templating.py | 5 +- .../generators/test_skyrl_vlm_generator.py | 3 +- 27 files changed, 817 insertions(+), 88 deletions(-) create mode 100644 tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_multi_lora_serving.py diff --git a/skyrl-agent/skyrl_agent/integrations/skyrl_train/skyrl_train_backend.py b/skyrl-agent/skyrl_agent/integrations/skyrl_train/skyrl_train_backend.py index 8ad3c782c8..f40ca1c6e5 100644 --- a/skyrl-agent/skyrl_agent/integrations/skyrl_train/skyrl_train_backend.py +++ b/skyrl-agent/skyrl_agent/integrations/skyrl_train/skyrl_train_backend.py @@ -1,10 +1,18 @@ from typing import Any, List + +from skyrl.backends.skyrl_train.inference_servers.utils import resolve_policy_model_name + from ..base import AsyncInferBackend, GeneratorOutput, GeneratorInput class SkyRLBackend(AsyncInferBackend): def __init__(self, infer_engine, tokenizer: Any = None, cfg: Any = None): self.client = infer_engine + # Resolve the name the inference engine knows the policy by (base + # model or registered LoRA adapter) once at construction. Threaded + # into every ``client.generate`` call so the data plane never has + # to guess the target adapter. + self.policy_model_name = resolve_policy_model_name(cfg) if cfg is not None else self.client.model_name async def async_generate_prompts(self, prompts: Any, sampling_params: Any, **kwargs) -> List[str]: input_obj = { @@ -12,7 +20,7 @@ async def async_generate_prompts(self, prompts: Any, sampling_params: Any, **kwa "session_ids": [kwargs.get("request_id", None)], "sampling_params": sampling_params, } - output = await self.client.generate(input_obj) + output = await self.client.generate(input_obj, model=self.policy_model_name) return output["responses"][0], output["stop_reasons"][0] async def async_generate_ids(self, input_ids: List[int], sampling_params: Any, **kwargs) -> List[str]: @@ -21,7 +29,7 @@ async def async_generate_ids(self, input_ids: List[int], sampling_params: Any, * "session_ids": [kwargs.get("request_id", None)], "sampling_params": sampling_params, } - output = await self.client.generate(input_obj) + output = await self.client.generate(input_obj, model=self.policy_model_name) # todo(@csy) probably need to be finish_reason # https://github.com/vllm-project/vllm/blob/a0f8a7964694a6077689b242b5eca95de392d4bb/vllm/v1/engine/__init__.py#L22 meta_info = { diff --git a/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py b/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py index a0c2198583..61f756976d 100644 --- a/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py +++ b/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py @@ -79,6 +79,9 @@ _DATA_PLANE_RETRIES = 30 +SKYRL_LORA_ADAPTER_NAME = "skyrl-lora" +"""Default LoRA adapter name used for single-LoRA training inside SkyRL.""" + _TINKER_SAMPLE_TO_VLLM_PARAM_MAP = { "temperature": "temperature", "max_tokens": "max_tokens", @@ -111,6 +114,23 @@ def _extract_session_id_and_body( return session_id, clean_body +def _require_model(body: Dict[str, Any], method_name: str) -> str: + """Return ``body["model"]``, raising ``ValueError`` if missing or empty. + + The client deliberately does not fall back to any default — every caller + must explicitly identify the target base model or LoRA adapter so multi- + LoRA routing is unambiguous. + """ + model = body.get("model") + if not model: + raise ValueError( + f"RemoteInferenceClient.{method_name}: request body must include a " + f"non-empty 'model' field identifying the target base model or " + f"LoRA adapter." + ) + return model + + class PauseMode(Enum): """ Pause mode for inference servers. @@ -192,14 +212,20 @@ class RemoteInferenceClient: reports the full DP world size per server, so we divide by num_deployments.""" model_name: str = "default" - """Model name for OpenAI-compatible API calls.""" + """The base model identifier the inference server was started with. + + Always the base model — never a LoRA adapter name. LoRA adapters are + addressed by the names callers register them under via + ``load_lora_adapter(name, path)``, and per-call routing is done by + passing that name as ``model`` on the data-plane methods. + + Used internally only by ``tokenize``/``detokenize``, which are LoRA- + agnostic but still require a ``model`` field per the OpenAI schema. + """ enable_return_routed_experts: bool = False """Whether to return routed expert indices (R3 / rollout router replay).""" - active_lora_name: Optional[str] = None - """Name of the active LoRA adapter. If set, generation requests use this adapter instead of the base model.""" - tokenizer: Optional[Any] = None """Optional HF tokenizer for local tokenize/detokenize (avoids HTTP round-trips).""" @@ -314,6 +340,7 @@ async def _post(self, url: str, json: Dict[str, Any], headers: Optional[Dict[str async def generate( self, input_batch: InferenceEngineInput, + model: str, ) -> InferenceEngineOutput: """ Generate completions via /v1/completions. @@ -329,6 +356,10 @@ async def generate( Args: input_batch: Contains prompt_token_ids, sampling_params, and optional session_ids. + model: Required model identifier — the base model name or a loaded + LoRA adapter name. Callers must always supply this; the client + does not fall back to any default. Use ``self.model_name`` if + you want the client's configured default. Returns: InferenceEngineOutput with responses, response_ids, and stop_reasons. @@ -367,6 +398,7 @@ async def _throttled_generate(idx: int) -> Dict[str, Any]: sampling_params=sampling_params, session_id=session_ids[idx] if session_ids and idx < len(session_ids) else None, mm_features=mm_features[idx] if mm_features and idx < len(mm_features) else None, + model=model, ) async with gen_sem: return await self._generate_single( @@ -374,6 +406,7 @@ async def _throttled_generate(idx: int) -> Dict[str, Any]: sampling_params=sampling_params, session_id=session_ids[idx] if session_ids and idx < len(session_ids) else None, mm_features=mm_features[idx] if mm_features and idx < len(mm_features) else None, + model=model, ) async def _throttled_detokenize(token_ids: List[int]) -> str: @@ -401,6 +434,7 @@ async def _generate_single( prompt_token_ids: List[int], sampling_params: Dict[str, Any], session_id: Optional[Any], + model: str, mm_features: Optional[MultiModalFeatures] = None, ) -> Dict[str, Any]: """ @@ -419,12 +453,9 @@ async def _generate_single( else f"{self.proxy_url}/inference/v1/generate" ) - # Use LoRA adapter name if one is active, otherwise use base model name - effective_model = self.active_lora_name if self.active_lora_name else self.model_name - payload: dict[str, Any] = { "sampling_params": sampling_params, - "model": effective_model, + "model": model, "token_ids": prompt_token_ids, } if mm_features: @@ -460,6 +491,7 @@ async def _render_for_sample( self, prompt: Dict[str, Any], session_id: Optional[str], + model: str, ) -> Tuple[List[int], Optional[MultiModalFeatures]]: """Build token_ids and optional multi-modal features from a Tinker prompt. @@ -491,10 +523,9 @@ async def _render_for_sample( url = c["location"] content_parts.append({"type": "image_url", "image_url": {"url": url}}) - effective_model = self.active_lora_name if self.active_lora_name else self.model_name render_payload: Dict[str, Any] = { "json": { - "model": effective_model, + "model": model, "messages": [{"role": "user", "content": content_parts}], } } @@ -542,7 +573,10 @@ async def _render_for_sample( return final_token_ids, adjusted_features - async def sample(self, request_payload: SampleRequestPayload) -> SampleResponse: + async def sample( + self, + request_payload: SampleRequestPayload, + ) -> SampleResponse: """ Sample completions via /inference/v1/generate (Tinker API). @@ -551,13 +585,14 @@ async def sample(self, request_payload: SampleRequestPayload) -> SampleResponse: Args: request_payload: SampleRequestPayload with {"json": }. - Expected keys in json: prompt, num_samples, sampling_params, session_id, - include_prompt_logprobs (bool), topk_prompt_logprobs (int). + Expected keys in json: model, prompt, num_samples, sampling_params, + session_id, include_prompt_logprobs (bool), topk_prompt_logprobs (int). Returns: SampleResponse with type="sample", sequences list, prompt_logprobs, and topk_prompt_logprobs. """ session_id, body = _extract_session_id_and_body(request_payload) + model = _require_model(body, "sample") prompt = body.get("prompt", {}) num_samples = body.get("num_samples", 1) @@ -575,7 +610,7 @@ async def sample(self, request_payload: SampleRequestPayload) -> SampleResponse: # Render prompt: flatten text tokens and, if images are present, # call the render endpoint to get placeholder tokens + features. - token_ids, mm_features = await self._render_for_sample(prompt, session_id) + token_ids, mm_features = await self._render_for_sample(prompt, session_id, model=model) # Map Tinker SamplingParams → vLLM format sampling_params: Dict[str, Any] = { @@ -590,11 +625,9 @@ async def sample(self, request_payload: SampleRequestPayload) -> SampleResponse: if val is not None: sampling_params[vllm_key] = val - effective_model = self.active_lora_name if self.active_lora_name else self.model_name - payload: Dict[str, Any] = { "sampling_params": sampling_params, - "model": effective_model, + "model": model, "token_ids": token_ids, } if mm_features is not None: @@ -673,13 +706,17 @@ async def chat_completion( Args: request_payload: Dict with {"json": , "headers": }. - The request body should be OpenAI-compatible chat completion request. - session_id can be included in json for consistent routing. + The request body must be an OpenAI-compatible chat completion + request and is required to include a ``model`` field + identifying the base model or a loaded LoRA adapter; the + client does not fall back to any default. ``session_id`` can + be included in the body for consistent routing. Returns: OpenAI-compatible chat completion response. """ session_id, body = _extract_session_id_and_body(request_payload) + _require_model(body, "chat_completion") headers = {"Content-Type": "application/json"} if session_id: @@ -709,6 +746,7 @@ async def render_chat_completion( Rendered chat completion response (template-applied prompt and token IDs). """ session_id, body = _extract_session_id_and_body(request_payload) + _require_model(body, "render_chat_completion") headers = {"Content-Type": "application/json"} if session_id: @@ -738,6 +776,7 @@ async def completion( OpenAI-compatible completion response. """ session_id, body = _extract_session_id_and_body(request_payload) + _require_model(body, "completion") headers = {"Content-Type": "application/json"} if session_id: @@ -995,7 +1034,7 @@ async def update_named_weights( """ Update model weights via vLLM native /update_weights. Used for full parameter fine-tuning. - For LoRA weight sync, use update_lora_from_disk() instead. + For LoRA weight sync, use load_lora_adapter() instead. Args: update_info: Dict with keys expected by vLLM (names, dtype_names, shapes, packed, etc.) @@ -1080,33 +1119,34 @@ async def finish_weight_update(self) -> Dict[str, Any]: {"method": "finish_weight_update"}, ) - async def update_lora_from_disk( + async def load_lora_adapter( self, + lora_name: str, lora_path: str, + load_inplace: bool = True, ) -> Dict[str, Any]: """ - Update LoRA adapter weights by loading from disk on all backend servers via /v1/load_lora_adapter. + Load (or reload) a LoRA adapter on all backend servers via /v1/load_lora_adapter. - Always loads under self.active_lora_name so the same slot is reused across - weight syncs. - - After loading, generation requests will automatically use the LoRA adapter - by setting the model name to the LoRA adapter name. + After loading, generation/chat/completion requests can target this LoRA + by passing ``model=lora_name`` (or by setting it as the client's + ``model_name``). When ``load_inplace=True`` and an adapter with the + same name is already registered on the server, vLLM replaces it + inplace, preserving its internal int id. Args: + lora_name: Name to register the adapter under on each server. lora_path: Path to the LoRA adapter on disk (must be accessible from servers). + load_inplace: When True (default), reloading a previously-loaded and cached + adapter with the same name replaces it with the on-disk lora. Returns: Dict mapping server_url to response. """ - if self.active_lora_name is None: - raise ValueError("active_lora_name must be set on RemoteInferenceClient before loading a LoRA adapter.") - - lora_name = self.active_lora_name payload = { "lora_name": lora_name, "lora_path": lora_path, - "load_inplace": True, + "load_inplace": load_inplace, } # Call /v1/load_lora_adapter on all servers directly. @@ -1129,6 +1169,40 @@ async def _load_on_server(server_url: str): return {url: resp for url, resp in results} + async def unload_lora_adapter(self, lora_name: str) -> Dict[str, Any]: + """ + Unload a previously-loaded LoRA adapter on all backend servers via /v1/unload_lora_adapter. + + After unloading, ``lora_name`` is no longer accepted as a ``model`` + target on any server. The underlying CPU/GPU LRU entries on vLLM age + out naturally as new adapters are loaded. + + Args: + lora_name: Name of the adapter to unload. + + Returns: + Dict mapping server_url to response. + """ + payload = {"lora_name": lora_name} + + # Mirror load_lora_adapter: vLLM returns plain text on success and JSON + # ErrorResponse (e.g. 404) on failure. + session = await self._get_session() + + async def _unload_on_server(server_url: str): + url = f"{server_url}/v1/unload_lora_adapter" + async with session.post(url, json=payload) as resp: + if resp.status >= 400: + body = await resp.json() + raise_for_status(resp, body) + return server_url, {"status": resp.status, "body": await resp.text()} + + results = await asyncio.gather(*[_unload_on_server(url) for url in self.server_urls]) + + logger.info(f"Unloaded LoRA adapter '{lora_name}'") + + return {url: resp for url, resp in results} + # --------------------------- # Info # --------------------------- diff --git a/skyrl/backends/skyrl_train/inference_servers/utils.py b/skyrl/backends/skyrl_train/inference_servers/utils.py index 4f6fab74cf..47e039c3f8 100644 --- a/skyrl/backends/skyrl_train/inference_servers/utils.py +++ b/skyrl/backends/skyrl_train/inference_servers/utils.py @@ -7,6 +7,9 @@ from skyrl.backends.skyrl_train.inference_servers.new_inference_worker_wrap import ( VLLM_NEW_INFERENCE_WORKER_EXTENSION_CLS, ) +from skyrl.backends.skyrl_train.inference_servers.remote_inference_client import ( + SKYRL_LORA_ADAPTER_NAME, +) from skyrl.backends.skyrl_train.weight_sync import get_transfer_strategy from skyrl.train.config import ( InferenceEngineConfig, @@ -31,6 +34,27 @@ def _uses_lora_weight_sync(cfg: SkyRLTrainConfig) -> bool: return True +def resolve_policy_model_name(cfg: SkyRLTrainConfig) -> str: + """Return the model identifier the inference engine knows the policy by. + + Mirrors the weight-sync code path: when the worker registers a LoRA + adapter on the inference engine (FSDP + LoRA, or Megatron + LoRA with + ``merge_lora=False``), the policy is that adapter and callers must pass + ``SKYRL_LORA_ADAPTER_NAME`` as ``model`` on data-plane calls. Otherwise + — including Megatron + LoRA with ``merge_lora=True``, where merged + weights are pushed as a full weight update — the policy is the base + model itself. + + This is the single source of truth for "which name does the inference + server know the policy by?" and should be used wherever a caller needs + to issue a ``generate``/``sample``/``chat_completion``/``completion`` / + ``render_chat_completion`` request against the current policy. + """ + if _uses_lora_weight_sync(cfg): + return SKYRL_LORA_ADAPTER_NAME + return cfg.trainer.policy.model.path + + # TODO: Add a test for validation def build_vllm_cli_args(cfg: SkyRLTrainConfig) -> Namespace: """Build CLI args for vLLM server from config.""" @@ -84,9 +108,12 @@ def build_vllm_cli_args(cfg: SkyRLTrainConfig) -> Namespace: # LoRA adapters (not merged weights). Megatron merges by default # (merge_lora=True), so the inference engine must NOT have LoRA wrapping. if _uses_lora_weight_sync(cfg): + lora_cfg = cfg.trainer.policy.model.lora args.enable_lora = True - args.max_lora_rank = cfg.trainer.policy.model.lora.rank - args.max_loras = 1 + args.max_lora_rank = lora_cfg.rank + args.max_loras = lora_cfg.max_loras + if lora_cfg.max_cpu_loras is not None: + args.max_cpu_loras = lora_cfg.max_cpu_loras args.fully_sharded_loras = ie_cfg.fully_sharded_loras if not cfg.trainer.placement.colocate_all: diff --git a/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py b/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py index 4f3faf9426..377729c4f5 100644 --- a/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py +++ b/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py @@ -26,6 +26,9 @@ fsdp_version, should_use_meta_init, ) +from skyrl.backends.skyrl_train.inference_servers.remote_inference_client import ( + SKYRL_LORA_ADAPTER_NAME, +) from skyrl.backends.skyrl_train.training_batch import ( TrainingInputBatch, TrainingOutputBatch, @@ -270,7 +273,9 @@ async def _save_lora_adapters_and_sync(self, peft_model, lora_sync_path, inferen ) if isinstance(inference_engine_client, RemoteInferenceClient): - await inference_engine_client.update_lora_from_disk(lora_sync_path) + await inference_engine_client.load_lora_adapter( + SKYRL_LORA_ADAPTER_NAME, lora_sync_path, load_inplace=True + ) else: lora_request = LoraLoadRequest(lora_path=lora_sync_path) await inference_engine_client.update_named_weights(lora_request) diff --git a/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py b/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py index 456e4954f9..e00287dba3 100644 --- a/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py +++ b/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py @@ -32,6 +32,9 @@ get_megatron_optimizer_param_scheduler, init_megatron_optim_config, ) +from skyrl.backends.skyrl_train.inference_servers.remote_inference_client import ( + SKYRL_LORA_ADAPTER_NAME, +) from skyrl.backends.skyrl_train.training_batch import ( TrainingInputBatch, TrainingOutputBatch, @@ -866,12 +869,15 @@ async def _save_lora_adapters_and_sync(self, lora_sync_path, inference_engine_cl with open(os.path.join(lora_sync_path, "adapter_config.json"), "w", encoding="utf-8") as f: json.dump(adapter_config, f, ensure_ascii=False, indent=4) + # Send LoRA disk loading request to inference engine. from skyrl.backends.skyrl_train.inference_servers.remote_inference_client import ( RemoteInferenceClient, ) if isinstance(inference_engine_client, RemoteInferenceClient): - await inference_engine_client.update_lora_from_disk(lora_sync_path) + await inference_engine_client.load_lora_adapter( + SKYRL_LORA_ADAPTER_NAME, lora_sync_path, load_inplace=True + ) else: lora_request = LoraLoadRequest(lora_path=lora_sync_path) await inference_engine_client.update_named_weights(lora_request) diff --git a/skyrl/backends/skyrl_train_backend.py b/skyrl/backends/skyrl_train_backend.py index ea0f42a8ef..a960b1ddee 100644 --- a/skyrl/backends/skyrl_train_backend.py +++ b/skyrl/backends/skyrl_train_backend.py @@ -27,6 +27,7 @@ from skyrl.backends.skyrl_train.inference_servers.utils import ( build_router_args, build_vllm_cli_args, + resolve_policy_model_name, ) from skyrl.backends.skyrl_train.inference_servers.vllm_router import VLLMRouter from skyrl.backends.skyrl_train.training_batch import ( @@ -47,9 +48,6 @@ from skyrl.utils.log import logger from skyrl.utils.tok import get_tokenizer -# Fixed LoRA adapter name used for generation requests when LoRA is active. -_SKYRL_LORA_ADAPTER_NAME = "skyrl-lora" - class SkyRLTrainBackendOverrides(BaseModel, extra="allow"): """Configuration overrides for the SkyRL-Train backend. @@ -381,17 +379,10 @@ def _create_new_inference_client(self): f"proxy_url={proxy_url}, server_urls={server_urls}, colocated={is_colocated}" ) - lora_cfg = self._cfg.trainer.policy.model.lora - active_lora_name = ( - _SKYRL_LORA_ADAPTER_NAME - if lora_cfg and lora_cfg.rank > 0 and self._cfg.trainer.strategy != "megatron" - else None - ) self._inference_engine_client = RemoteInferenceClient( proxy_url=proxy_url, server_urls=server_urls, model_name=self._cfg.trainer.policy.model.path, - active_lora_name=active_lora_name, data_parallel_size=ie_cfg.data_parallel_size, tokenizer=self._tokenizer, ) @@ -938,6 +929,13 @@ def _sample_with_remote_client( ) -> dict[str, types.SampleOutput | types.ErrorResponse]: """Sample using RemoteInferenceClient, forwarding model input chunks directly.""" + # Every sample() body must explicitly identify the target model/LoRA + # adapter — the client does not fall back to any default. Resolve + # what name the inference engine knows the policy by from config + # (LoRA adapter when LoRA weights are sync'd as an adapter, base + # model otherwise). + model_name = resolve_policy_model_name(self._cfg) + async def sample_all(): tasks = [] for i in range(len(prepared_batch.all_model_inputs)): @@ -946,6 +944,7 @@ async def sample_all(): request_payload = { "json": { + "model": model_name, "prompt": model_input.model_dump(), "num_samples": 1, "sampling_params": sampling_params.model_dump(), @@ -1154,10 +1153,13 @@ def create_ray_wrapped_inference_engines_from_config( # Conditionally add LoRA parameters if LoRA is enabled if cfg.trainer.policy.model.lora.rank > 0 and cfg.trainer.strategy != "megatron": + lora_cfg = cfg.trainer.policy.model.lora engine_kwargs["enable_lora"] = True - engine_kwargs["max_lora_rank"] = cfg.trainer.policy.model.lora.rank + engine_kwargs["max_lora_rank"] = lora_cfg.rank engine_kwargs["sleep_level"] = 1 - engine_kwargs["max_loras"] = 1 + engine_kwargs["max_loras"] = lora_cfg.max_loras + if lora_cfg.max_cpu_loras is not None: + engine_kwargs["max_cpu_loras"] = lora_cfg.max_cpu_loras engine_kwargs["fully_sharded_loras"] = cfg.generator.inference_engine.fully_sharded_loras if cfg.generator.inference_engine.enforce_eager and cfg.generator.inference_engine.backend == "vllm": diff --git a/skyrl/benchmarks/load_test_concurrency.py b/skyrl/benchmarks/load_test_concurrency.py index 036011f89d..88b36fbb8f 100644 --- a/skyrl/benchmarks/load_test_concurrency.py +++ b/skyrl/benchmarks/load_test_concurrency.py @@ -186,7 +186,7 @@ async def fire_client_generate( async def _call(idx: int): try: - return await client.generate(input_batch) + return await client.generate(input_batch, model=client.model_name) except Exception as e: raise RuntimeError(f"request {idx}: {type(e).__name__}: {e}") from e diff --git a/skyrl/train/config/config.py b/skyrl/train/config/config.py index aae26ba315..628e393744 100644 --- a/skyrl/train/config/config.py +++ b/skyrl/train/config/config.py @@ -61,6 +61,16 @@ class SkyRLLoraConfig(BaseConfig): """For FSDP, corresponds to ``init_lora_weights`` in PEFT. For Megatron, used for ``lora_A_init_method``; supports "xavier", "normal", "kaiming", "zero".""" + max_loras: int = 1 + """Maximum number of LoRA adapters that can be active concurrently in a + single GPU batch. Maps to vLLM's ``max_loras``. Increase past 1 to enable + multi-tenant LoRA serving via ``RemoteInferenceClient.load_lora_adapter``.""" + + max_cpu_loras: Optional[int] = None + """Total LoRA adapter capacity in vLLM's CPU LRU cache. Maps to vLLM's + ``max_cpu_loras``; when None, vLLM defaults it to ``max_loras``. Must be + >= ``max_loras`` if explicitly set.""" + @dataclass class ModelConfig(BaseConfig): diff --git a/skyrl/train/config/ppo_base_config.yaml b/skyrl/train/config/ppo_base_config.yaml index 9ac58f2f50..b10695c5b3 100644 --- a/skyrl/train/config/ppo_base_config.yaml +++ b/skyrl/train/config/ppo_base_config.yaml @@ -37,6 +37,13 @@ trainer: # For FSDP, this corresponds to `init_lora_weights` in PEFT. See: https://huggingface.co/docs/peft/main/en/package_reference/lora#peft.LoraConfig.init_lora_weights # For Megatron, this is used for `lora_A_init_method`, and "xavier", "normal", "kaiming", and "zero" are supported. init_method: "kaiming" + # Maximum number of LoRA adapters that can be active concurrently in a + # single GPU batch. Maps to vLLM's `max_loras`. Increase past 1 to + # serve multiple LoRA adapters from the same engine. + max_loras: 1 + # Total LoRA adapter capacity in vLLM's CPU LRU cache. Maps to vLLM's + # `max_cpu_loras`; when null, vLLM defaults it to `max_loras`. + max_cpu_loras: null optimizer_config: lr: 1.0e-6 adam_betas: [0.9, 0.999] diff --git a/skyrl/train/entrypoints/main_base.py b/skyrl/train/entrypoints/main_base.py index 839fcdca49..ad4206929a 100644 --- a/skyrl/train/entrypoints/main_base.py +++ b/skyrl/train/entrypoints/main_base.py @@ -21,7 +21,10 @@ from skyrl.backends.skyrl_train.inference_engines.remote_inference_engine import ( create_remote_inference_engines, ) -from skyrl.backends.skyrl_train.inference_servers.utils import build_vllm_cli_args +from skyrl.backends.skyrl_train.inference_servers.utils import ( + build_vllm_cli_args, + resolve_policy_model_name, +) from skyrl.env_vars import _SKYRL_USE_NEW_INFERENCE, SKYRL_RAY_PG_TIMEOUT_IN_S from skyrl.train.config import SkyRLTrainConfig, get_config_as_yaml_str from skyrl.train.dataset import PromptDataset @@ -36,9 +39,6 @@ ) from skyrl.utils.tok import get_tokenizer -# Fixed LoRA adapter name used for generation requests when LoRA is active. -_SKYRL_LORA_ADAPTER_NAME = "skyrl-lora" - # NOTE (sumanthrh): We use ray heavily and thus disable `fork` start method. # forking within ray leads to undefined behaviour and often causes hard to debug # memory leaks. See: https://docs.ray.io/en/latest/ray-core/patterns/fork-new-processes.html @@ -88,10 +88,13 @@ def create_ray_wrapped_inference_engines_from_config( # Conditionally add LoRA parameters if LoRA is enabled if cfg.trainer.policy.model.lora.rank > 0 and cfg.trainer.strategy != "megatron": + lora_cfg = cfg.trainer.policy.model.lora engine_kwargs["enable_lora"] = True - engine_kwargs["max_lora_rank"] = cfg.trainer.policy.model.lora.rank + engine_kwargs["max_lora_rank"] = lora_cfg.rank engine_kwargs["sleep_level"] = 1 - engine_kwargs["max_loras"] = 1 + engine_kwargs["max_loras"] = lora_cfg.max_loras + if lora_cfg.max_cpu_loras is not None: + engine_kwargs["max_cpu_loras"] = lora_cfg.max_cpu_loras engine_kwargs["fully_sharded_loras"] = ie_cfg.fully_sharded_loras # TODO(devpatel): Bandaid solution, replace this once we have a better @@ -239,6 +242,7 @@ def get_generator(self, cfg, tokenizer, inference_engine_client): skyrl_gym_cfg=cfg.environment.skyrl_gym, inference_engine_client=inference_engine_client, tokenizer=tokenizer, + policy_model_name=resolve_policy_model_name(cfg), ) def get_trainer( @@ -390,18 +394,11 @@ def _get_new_inference_client(self): proxy_url = setup.proxy_url server_urls = setup.server_urls - lora_cfg = self.cfg.trainer.policy.model.lora - active_lora_name = ( - _SKYRL_LORA_ADAPTER_NAME - if lora_cfg and lora_cfg.rank > 0 and self.cfg.trainer.strategy != "megatron" - else None - ) client = RemoteInferenceClient( proxy_url=proxy_url, server_urls=server_urls, model_name=self.cfg.trainer.policy.model.path, enable_return_routed_experts=ie_cfg.enable_return_routed_experts, - active_lora_name=active_lora_name, data_parallel_size=ie_cfg.data_parallel_size, tokenizer=self.tokenizer, ) diff --git a/skyrl/train/generators/skyrl_gym_generator.py b/skyrl/train/generators/skyrl_gym_generator.py index 6f0d27cacf..44c9afaf62 100644 --- a/skyrl/train/generators/skyrl_gym_generator.py +++ b/skyrl/train/generators/skyrl_gym_generator.py @@ -142,17 +142,22 @@ def __init__( skyrl_gym_cfg: SkyRLGymConfig, inference_engine_client: InferenceEngineClient, tokenizer, + policy_model_name: str, ): """ Args: generator_cfg: GeneratorConfig object containing the generator configuration inference_engine_client: InferenceEngineClient object for interacting with the inference engines tokenizer: tokenizer object for encoding and decoding text + policy_model_name: identifier the inference engine knows the policy + by (base model path or registered LoRA adapter name). Threaded + into every ``client.generate(...)`` call as ``model``. """ self.generator_cfg = generator_cfg self.skyrl_gym_cfg = skyrl_gym_cfg self.inference_engine_client = inference_engine_client self.tokenizer = tokenizer + self.policy_model_name = policy_model_name self.max_turns = generator_cfg.max_turns self.batched = generator_cfg.batched self.use_conversation_multi_turn = generator_cfg.use_conversation_multi_turn @@ -338,7 +343,7 @@ async def agent_loop( engine_input = InferenceEngineInput( prompt_token_ids=[agent_loop_state.input_ids], session_ids=[session_id], sampling_params=sampling_params ) - engine_output = await self.inference_engine_client.generate(engine_input) + engine_output = await self.inference_engine_client.generate(engine_input, model=self.policy_model_name) output = engine_output["responses"][0] output_ids = engine_output["response_ids"][0] stop_reason = engine_output["stop_reasons"][0] @@ -671,7 +676,7 @@ async def generate_batched( return_dict=False, ) engine_input = InferenceEngineInput(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params) - engine_output = await self.inference_engine_client.generate(engine_input) + engine_output = await self.inference_engine_client.generate(engine_input, model=self.policy_model_name) outputs = engine_output["responses"] responses = engine_output["response_ids"] stop_reasons = engine_output["stop_reasons"] diff --git a/skyrl/train/generators/skyrl_vlm_generator.py b/skyrl/train/generators/skyrl_vlm_generator.py index 961dfd2bb1..6b85c3de5b 100644 --- a/skyrl/train/generators/skyrl_vlm_generator.py +++ b/skyrl/train/generators/skyrl_vlm_generator.py @@ -41,8 +41,9 @@ def __init__( skyrl_gym_cfg: SkyRLGymConfig, inference_engine_client: RemoteInferenceClient, tokenizer, + policy_model_name: str, ): - super().__init__(generator_cfg, skyrl_gym_cfg, inference_engine_client, tokenizer) + super().__init__(generator_cfg, skyrl_gym_cfg, inference_engine_client, tokenizer, policy_model_name) logger.info("Initialized SkyRLVLMGymGenerator (VLM multi-modal generator)") def _validate_cfg(self, generator_cfg: GeneratorConfig): @@ -148,7 +149,7 @@ async def agent_loop( sampling_params=current_sampling_params, mm_features=[latest_features] if latest_features is not None else None, ) - engine_output = await self.inference_engine_client.generate(engine_input) + engine_output = await self.inference_engine_client.generate(engine_input, model=self.policy_model_name) gen_text = engine_output["responses"][0] gen_ids = engine_output["response_ids"][0] diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_multi_lora_serving.py b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_multi_lora_serving.py new file mode 100644 index 0000000000..d54e805dab --- /dev/null +++ b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_multi_lora_serving.py @@ -0,0 +1,172 @@ +""" +Multi-LoRA serving tests for ``RemoteInferenceClient``. + +These tests exercise the inference-server-side LoRA control plane: +``load_lora_adapter`` / ``unload_lora_adapter`` fan-out, per-call ``model=`` +routing across concurrently registered adapters, and the in-place reload +contract (replacing one adapter without disturbing another). + +# Run with: +uv run --isolated --extra dev --extra fsdp pytest tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_multi_lora_serving.py -v -s +""" + +import pytest +from huggingface_hub import snapshot_download +from transformers import AutoTokenizer + +from skyrl.backends.skyrl_train.inference_engines.base import InferenceEngineInput +from skyrl.train.config import SkyRLLoraConfig, SkyRLTrainConfig +from tests.backends.skyrl_train.gpu.utils import InferenceEngineState + +MODEL_QWEN3 = "Qwen/Qwen3-0.6B" + + +@pytest.fixture(scope="session") +def qwen3_meowing_lora_files(): + """Download the Qwen3-0.6B Meow LoRA adapter and return its local snapshot path.""" + return snapshot_download(repo_id="Jackmin108/Qwen3-0.6B-Meow-LoRA") + + +@pytest.fixture(scope="session") +def qwen3_woofing_lora_files(): + """Download the Qwen3-0.6B Woof LoRA adapter and return its local snapshot path.""" + return snapshot_download(repo_id="Jackmin108/Qwen3-0.6B-Woof-LoRA") + + +def _multi_lora_test_config() -> SkyRLTrainConfig: + """Build a Qwen3 LoRA config that supports two concurrent adapters on vLLM.""" + cfg = SkyRLTrainConfig() + cfg.trainer.policy.model.path = MODEL_QWEN3 + cfg.trainer.critic.model.path = "" + cfg.trainer.strategy = "fsdp" + cfg.trainer.placement.colocate_all = False + cfg.trainer.placement.policy_num_gpus_per_node = 1 + cfg.generator.inference_engine.async_engine = True + cfg.generator.inference_engine.num_engines = 1 + cfg.generator.inference_engine.run_engines_locally = True + cfg.generator.inference_engine.tensor_parallel_size = 1 + # ``rank`` only needs to be > 0 to flip the LoRA path on; the actual ranks + # used at serve time come from the adapter snapshots downloaded above. + cfg.trainer.policy.model.lora = SkyRLLoraConfig( + rank=32, + alpha=32, + dropout=0.0, + target_modules="all-linear", + max_loras=2, + ) + return cfg + + +def _build_animal_prompt_token_ids(tokenizer) -> list: + """Build prompt_token_ids that ask Qwen3 to make an animal noise. + + Both Meow / Woof LoRAs are tuned to override the assistant reply with their + respective sound, so a neutral prompt is enough to exercise routing. + """ + messages = [ + {"role": "user", "content": "Make a single short animal noise."}, + ] + return tokenizer.apply_chat_template( + messages, + add_generation_prompt=True, + tokenize=True, + return_dict=False, + enable_thinking=False, + ) + + +async def _generate_with_lora(client, prompt_token_ids, lora_name: str) -> str: + """Run a single greedy generation against ``lora_name`` and return the text.""" + sampling_params = {"temperature": 0.0, "max_tokens": 10} + out = await client.generate( + InferenceEngineInput( + prompt_token_ids=[prompt_token_ids], + sampling_params=sampling_params, + ), + model=lora_name, + ) + return out["responses"][0] + + +@pytest.mark.asyncio +async def test_multi_lora_interleaved_generation(ray_init_fixture, qwen3_meowing_lora_files, qwen3_woofing_lora_files): + """Two adapters served concurrently route per-call via the ``model=`` kwarg.""" + cfg = _multi_lora_test_config() + tokenizer = AutoTokenizer.from_pretrained(MODEL_QWEN3, trust_remote_code=True) + prompt_token_ids = _build_animal_prompt_token_ids(tokenizer) + + async with InferenceEngineState.create( + cfg=cfg, + model=MODEL_QWEN3, + use_local=True, + async_engine=True, + tp_size=1, + colocate_all=False, + sleep_level=1, + enable_lora=True, + lora_max_loras=2, + ) as engines: + client = engines.client + + await client.load_lora_adapter("lora-meow", qwen3_meowing_lora_files) + await client.load_lora_adapter("lora-woof", qwen3_woofing_lora_files) + try: + outputs = [] + for adapter in ["lora-meow", "lora-woof", "lora-meow", "lora-woof"]: + outputs.append(await _generate_with_lora(client, prompt_token_ids, adapter)) + + print(f"Multi-LoRA outputs: {outputs}") + assert "Meow" in outputs[0] or "meow" in outputs[0] + assert "Woof" in outputs[1] or "woof" in outputs[1] + assert "Meow" in outputs[2] or "meow" in outputs[2] + assert "Woof" in outputs[3] or "woof" in outputs[3] + finally: + await client.unload_lora_adapter("lora-meow") + await client.unload_lora_adapter("lora-woof") + + +@pytest.mark.asyncio +async def test_lora_inplace_reload_isolated(ray_init_fixture, qwen3_meowing_lora_files, qwen3_woofing_lora_files): + """Reloading adapter ``lora-A`` from a different path leaves ``lora-B`` unchanged.""" + cfg = _multi_lora_test_config() + tokenizer = AutoTokenizer.from_pretrained(MODEL_QWEN3, trust_remote_code=True) + prompt_token_ids = _build_animal_prompt_token_ids(tokenizer) + + async with InferenceEngineState.create( + cfg=cfg, + model=MODEL_QWEN3, + use_local=True, + async_engine=True, + tp_size=1, + colocate_all=False, + sleep_level=1, + enable_lora=True, + lora_max_loras=2, + ) as engines: + client = engines.client + + await client.load_lora_adapter("lora-A", qwen3_meowing_lora_files) + await client.load_lora_adapter("lora-B", qwen3_woofing_lora_files) + try: + out_A_before = await _generate_with_lora(client, prompt_token_ids, "lora-A") + out_B_before = await _generate_with_lora(client, prompt_token_ids, "lora-B") + assert "Meow" in out_A_before or "meow" in out_A_before + assert "Woof" in out_B_before or "woof" in out_B_before + + # Inplace reload A from B's adapter path. vLLM keeps the same + # int_id but should swap the underlying weights; B must be entirely + # unaffected. + await client.load_lora_adapter("lora-A", qwen3_woofing_lora_files, load_inplace=True) + + out_A_after = await _generate_with_lora(client, prompt_token_ids, "lora-A") + out_B_after = await _generate_with_lora(client, prompt_token_ids, "lora-B") + + assert ( + "Woof" in out_A_after or "woof" in out_A_after + ), f"A should now be woofing-style after inplace reload, got: {out_A_after}" + assert ( + out_B_after == out_B_before + ), f"B's output should be unchanged byte-for-byte; before={out_B_before!r}, after={out_B_after!r}" + finally: + await client.unload_lora_adapter("lora-A") + await client.unload_lora_adapter("lora-B") diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_new_inference_generation.py b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_new_inference_generation.py index 225d65e3f8..8f17f815c7 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_new_inference_generation.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_new_inference_generation.py @@ -563,7 +563,7 @@ async def test_client_generate(vllm_server: InferenceEngineState): sampling_params=sampling_params, ) - output = await client.generate(engine_input) + output = await client.generate(engine_input, model=client.model_name) assert len(output["responses"]) == 1 assert len(output["response_ids"]) == 1 @@ -595,6 +595,7 @@ async def test_client_tokenize_detokenize_roundtrip(vllm_server: InferenceEngine def _build_sample_payload( token_ids: List[int], + model: str = MODEL_QWEN2_5, num_samples: int = 1, sampling_params: Dict[str, Any] | None = None, session_id: str | None = None, @@ -603,6 +604,7 @@ def _build_sample_payload( ) -> Dict[str, Any]: """Build a Tinker-format sample request payload.""" body: Dict[str, Any] = { + "model": model, "prompt": {"chunks": [{"tokens": token_ids}]}, "num_samples": num_samples, "sampling_params": sampling_params or {"temperature": 0.7, "max_tokens": 64}, diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py index 300fac372a..1ef6e10bc9 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py @@ -90,7 +90,7 @@ async def test_custom_chat_template(ray_init_fixture, use_custom_template: bool) prompt_token_ids=[prompt_token_ids], sampling_params={"max_tokens": 10}, ) - output = await client.generate(engine_input) + output = await client.generate(engine_input, model=client.model_name) assert len(output["responses"]) == 1 assert isinstance(output["responses"][0], str) diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_vlm_inference_generation.py b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_vlm_inference_generation.py index c432fa6dcd..04f2448c24 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_vlm_inference_generation.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_vlm_inference_generation.py @@ -203,6 +203,7 @@ async def test_sample_with_multimodal_image(module_scoped_ray_init_fixture): request_payload = { "json": { + "model": MODEL_QWEN3_VL, "prompt": prompt, "num_samples": 1, "sampling_params": {"temperature": 0.0, "max_tokens": 20}, @@ -409,7 +410,7 @@ async def test_generate_with_multimodal_features_red_square(module_scoped_ray_in "sampling_params": {"max_tokens": 64, "temperature": 0.0}, "mm_features": [features], } - gen_result = await client.generate(input_batch) + gen_result = await client.generate(input_batch, model=client.model_name) # Structural assertions assert len(gen_result["responses"]) == 1 diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_engine_generation.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_engine_generation.py index 82a65a198c..fba1dd6dad 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_engine_generation.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_engine_generation.py @@ -41,7 +41,7 @@ def get_test_actor_config(model: str = MODEL) -> SkyRLTrainConfig: async def run_batch_generation(client, prompts, sampling_params): engine_input = InferenceEngineInput(prompts=prompts, sampling_params=sampling_params) - engine_output = await client.generate(engine_input) + engine_output = await client.generate(engine_input, model=client.model_name) return engine_output["responses"], engine_output["stop_reasons"] @@ -49,7 +49,7 @@ async def run_single_generation(client, prompts, sampling_params): tasks = [] for prompt in prompts: engine_input = InferenceEngineInput(prompts=[prompt], sampling_params=sampling_params) - task = client.generate(engine_input) + task = client.generate(engine_input, model=client.model_name) tasks.append(task) results = await asyncio.gather(*tasks) @@ -65,7 +65,7 @@ async def run_single_generation(client, prompts, sampling_params): async def run_batch_generation_with_tokens(client, prompt_token_ids, sampling_params): engine_input = InferenceEngineInput(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params) - engine_output = await client.generate(engine_input) + engine_output = await client.generate(engine_input, model=client.model_name) return engine_output["responses"], engine_output["stop_reasons"] @@ -73,7 +73,7 @@ async def run_single_generation_with_tokens(client, prompt_token_ids, sampling_p tasks = [] for tokens in prompt_token_ids: engine_input = InferenceEngineInput(prompt_token_ids=[tokens], sampling_params=sampling_params) - task = client.generate(engine_input) + task = client.generate(engine_input, model=client.model_name) tasks.append(task) results = await asyncio.gather(*tasks) diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_lora.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_lora.py index 13741c89c3..ea02c5b64d 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_lora.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_lora.py @@ -4,6 +4,11 @@ # Run Megatron tests: uv run --isolated --extra dev --extra megatron pytest tests/backends/skyrl_train/gpu/gpu_ci/test_lora.py -k "megatron" + +Multi-LoRA serving tests live separately in +``tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_multi_lora_serving.py`` +since they exercise the inference-server LoRA control plane, not the +trainer + weight-sync path covered here. """ import pytest diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_pause_and_continue_generation.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_pause_and_continue_generation.py index d3084482ba..55ec3f77d7 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_pause_and_continue_generation.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_pause_and_continue_generation.py @@ -234,7 +234,7 @@ async def one_req(i: int): "sampling_params": dict(sampling_params), "session_ids": [i], } - return await client.generate(engine_input) + return await client.generate(engine_input, model=client.model_name) tasks = [asyncio.create_task(one_req(i)) for i in range(num_requests)] # Let requests start and enqueue; with max_num_seqs=2, 2 run and 1 wait per engine diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_router_replay.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_router_replay.py index ed4fca055d..32c61641fa 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_router_replay.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_router_replay.py @@ -150,6 +150,7 @@ async def test_logprobs(ray_init_fixture, tp, pp, cp, ep, etp, extra_tf_kwargs): skyrl_gym_cfg=cfg.environment.skyrl_gym, inference_engine_client=client, tokenizer=tokenizer, + policy_model_name=client.model_name, ) input_batch: GeneratorInput = get_test_generator_input( diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_gym_generator.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_gym_generator.py index 8a3a258a5e..c66461b1b8 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_gym_generator.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_gym_generator.py @@ -171,6 +171,7 @@ async def run_generator_end_to_end( skyrl_gym_cfg=env_cfg, inference_engine_client=inference_engine_client, tokenizer=tokenizer, + policy_model_name=inference_engine_client.model_name, ) input_batch: GeneratorInput = get_test_generator_input( diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_vlm_gym_generator.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_vlm_gym_generator.py index a671e8d0f8..03bfe9a3fe 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_vlm_gym_generator.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_vlm_gym_generator.py @@ -191,6 +191,7 @@ async def test_vlm_generator_color_classification(ray_init_fixture): skyrl_gym_cfg=env_cfg, inference_engine_client=inference_client, tokenizer=tokenizer, + policy_model_name=inference_client.model_name, ) num_prompts = 2 diff --git a/tests/backends/skyrl_train/gpu/utils.py b/tests/backends/skyrl_train/gpu/utils.py index 82ab42ada2..6cd764b765 100644 --- a/tests/backends/skyrl_train/gpu/utils.py +++ b/tests/backends/skyrl_train/gpu/utils.py @@ -509,6 +509,8 @@ def create( sleep_level: int = 2, # use level 1 in unit tests that do not explicitly sync weights or for LoRA enable_lora: bool = False, active_lora_name: Optional[str] = None, + lora_max_loras: Optional[int] = None, + lora_max_cpu_loras: Optional[int] = None, max_num_seqs: Optional[int] = None, engine_init_kwargs: Optional[Dict[str, Any]] = None, use_new_inference_servers: Optional[bool] = None, @@ -552,6 +554,13 @@ def create( if enable_pd: ie_cfg.enable_pd = True ie_cfg.num_prefill = num_prefill + # Propagate the LoRA limits onto the trainer config so build_vllm_cli_args + # (which reads from cfg.trainer.policy.model.lora) and any downstream + # path picks them up before vLLM is started. + if lora_max_loras is not None: + cfg.trainer.policy.model.lora.max_loras = lora_max_loras + if lora_max_cpu_loras is not None: + cfg.trainer.policy.model.lora.max_cpu_loras = lora_max_cpu_loras assert ie_cfg.run_engines_locally, "This test does not yet support remote engines." @@ -605,12 +614,21 @@ def create( proxy_url = setup.proxy_url server_urls = setup.server_urls + # When LoRA is enabled, point the client's default ``model_name`` at + # the active LoRA adapter so existing tests that don't pass an + # explicit ``model=`` keep routing through the adapter. Tests that + # need multi-LoRA can pass ``model=`` per call and rely on the + # client's underlying base model name only when needed. + # ``model_name`` is the base model the server was started with; + # LoRA-aware test cases are expected to pass adapter names + # explicitly per call (e.g. ``client.generate(..., model="lora-X")``). + base_model_name = served_model_name if served_model_name else cfg.trainer.policy.model.path + client = RemoteInferenceClient( proxy_url=proxy_url, server_urls=server_urls, - model_name=served_model_name if served_model_name else cfg.trainer.policy.model.path, + model_name=base_model_name, enable_return_routed_experts=ie_cfg.enable_return_routed_experts, - active_lora_name=active_lora_name, data_parallel_size=ie_cfg.data_parallel_size, tokenizer=get_tokenizer(cfg.trainer.policy.model.path), ) diff --git a/tests/backends/skyrl_train/inference_servers/test_remote_inference_client.py b/tests/backends/skyrl_train/inference_servers/test_remote_inference_client.py index 5e081a4715..b9a2c0f458 100644 --- a/tests/backends/skyrl_train/inference_servers/test_remote_inference_client.py +++ b/tests/backends/skyrl_train/inference_servers/test_remote_inference_client.py @@ -4,16 +4,19 @@ import pickle import threading import time -from typing import List, Optional +from typing import Dict, List, Optional +import aiohttp import httpx import pytest import pytest_asyncio import uvicorn from fastapi import FastAPI, Query, Request +from fastapi.responses import JSONResponse, PlainTextResponse from skyrl.backends.skyrl_train.inference_servers.common import get_open_port from skyrl.backends.skyrl_train.inference_servers.remote_inference_client import ( + SKYRL_LORA_ADAPTER_NAME, PauseMode, RemoteInferenceClient, ) @@ -23,6 +26,12 @@ def create_mock_vllm_server(server_id: int) -> FastAPI: """Create a mock vLLM server with standard endpoints.""" app = FastAPI() app.state.last_generate_features = None + app.state.last_generate_model = None + app.state.last_chat_model = None + app.state.last_completion_model = None + app.state.last_render_model = None + # Per-server LoRA registry: lora_name -> lora_path + app.state.lora_registry = {} @app.get("/health") async def health(): @@ -32,6 +41,19 @@ async def health(): async def get_last_generate_features(): return {"features": app.state.last_generate_features} + @app.get("/test/last_models") + async def get_last_models(): + return { + "generate": app.state.last_generate_model, + "chat": app.state.last_chat_model, + "completion": app.state.last_completion_model, + "render": app.state.last_render_model, + } + + @app.get("/test/lora_registry") + async def get_lora_registry(): + return {"registry": dict(app.state.lora_registry)} + @app.get("/get_world_size") async def get_world_size(): return {"world_size": 2} # Simulate TP=2 @@ -39,13 +61,15 @@ async def get_world_size(): @app.post("/v1/completions") async def completions(request: Request): body = await request.json() + app.state.last_completion_model = body.get("model") prompts = body.get("prompt", []) n_prompts = len(prompts) if isinstance(prompts, list) else 1 return { "choices": [ {"index": i, "text": f"Response {i} from server {server_id}", "finish_reason": "stop"} for i in range(n_prompts) - ] + ], + "model": body.get("model"), } @app.post("/skyrl/v1/generate") @@ -54,6 +78,7 @@ async def generate(request: Request): body = await request.json() # Consume body sp = body.get("sampling_params", {}) input_token_ids = body.get("token_ids", []) + app.state.last_generate_model = body.get("model") n = sp.get("n", 1) # If logprobs is explicitly set (sample path), use n for num_choices. # Otherwise (generate path), use len(token_ids) for per-prompt responses. @@ -109,11 +134,17 @@ async def generate(request: Request): @app.post("/v1/chat/completions") async def chat_completions(request: Request): - return {"choices": [{"message": {"content": f"Chat from server {server_id}"}}]} + body = await request.json() + app.state.last_chat_model = body.get("model") + return { + "choices": [{"message": {"content": f"Chat from server {server_id}"}}], + "model": body.get("model"), + } @app.post("/v1/chat/completions/render") async def render_chat_completion(request: Request): body = await request.json() + app.state.last_render_model = body.get("model") messages = body.get("messages", []) # Count image_url parts across all messages. @@ -205,6 +236,45 @@ async def init_weight_transfer_engine(request: Request): async def update_weights(request: Request): return {"status": "ok", "server_id": server_id} + @app.post("/v1/load_lora_adapter") + async def load_lora_adapter(request: Request): + body = await request.json() + lora_name = body.get("lora_name") + lora_path = body.get("lora_path") + load_inplace = body.get("load_inplace", True) + if lora_name is None or lora_path is None: + return JSONResponse( + status_code=400, + content={"object": "error", "message": "missing lora_name/lora_path", "type": "BadRequest"}, + ) + if lora_name in app.state.lora_registry and not load_inplace: + return JSONResponse( + status_code=400, + content={ + "object": "error", + "message": f"adapter '{lora_name}' already loaded", + "type": "BadRequest", + }, + ) + app.state.lora_registry[lora_name] = lora_path + return PlainTextResponse(f"Success: LoRA adapter '{lora_name}' added successfully on server {server_id}.") + + @app.post("/v1/unload_lora_adapter") + async def unload_lora_adapter(request: Request): + body = await request.json() + lora_name = body.get("lora_name") + if lora_name is None or lora_name not in app.state.lora_registry: + return JSONResponse( + status_code=404, + content={ + "object": "error", + "message": f"adapter '{lora_name}' not found", + "type": "NotFoundError", + }, + ) + del app.state.lora_registry[lora_name] + return PlainTextResponse(f"Success: LoRA adapter '{lora_name}' removed successfully on server {server_id}.") + return app @@ -300,7 +370,7 @@ async def test_generate(self, client): "prompt_token_ids": [[1, 2, 3], [4, 5, 6]], "sampling_params": {"max_tokens": 100}, } - result = await client.generate(input_batch) + result = await client.generate(input_batch, model=client.model_name) assert "responses" in result assert "stop_reasons" in result @@ -316,7 +386,7 @@ async def test_generate_with_session_id(self, client): "prompt_token_ids": [[1, 2, 3]], "session_ids": ["test-session"], } - result = await client.generate(input_batch) + result = await client.generate(input_batch, model=client.model_name) assert len(result["responses"]) == 1 @pytest.mark.asyncio @@ -508,6 +578,7 @@ async def test_sample(self, client): """Test sample with n=1 returns correct structure and prompt_logprobs.""" request_payload = { "json": { + "model": client.model_name, "prompt": {"chunks": [{"tokens": [10, 20, 30]}]}, "num_samples": 1, "sampling_params": {"temperature": 0.7, "max_tokens": 64}, @@ -539,6 +610,7 @@ async def test_sample_n2(self, client): """Test sample with n=2 returns two sequences and prompt_logprobs.""" request_payload = { "json": { + "model": client.model_name, "prompt": {"chunks": [{"tokens": [1, 2]}, {"tokens": [3]}]}, "num_samples": 2, "sampling_params": {"temperature": 1.0, "max_tokens": 32}, @@ -564,6 +636,7 @@ async def test_sample_topk_prompt_logprobs(self, client): """Test topk_prompt_logprobs returns both prompt_logprobs and topk tuples.""" request_payload = { "json": { + "model": client.model_name, "prompt": {"chunks": [{"tokens": [10, 20, 30]}]}, "num_samples": 1, "sampling_params": {"temperature": 0.7, "max_tokens": 64}, @@ -597,6 +670,7 @@ async def test_sample_topk_without_include_returns_none(self, client): """topk_prompt_logprobs alone does not return prompt logprobs when include_prompt_logprobs is False.""" request_payload = { "json": { + "model": client.model_name, "prompt": {"chunks": [{"tokens": [10, 20, 30]}]}, "num_samples": 1, "sampling_params": {"temperature": 0.7, "max_tokens": 64}, @@ -616,6 +690,7 @@ async def test_sample_with_image(self, client): image_bytes = base64.b64encode(b"fake-jpeg-data").decode("ascii") request_payload = { "json": { + "model": client.model_name, "prompt": { "chunks": [ {"type": "encoded_text", "tokens": [100, 101, 102]}, @@ -646,6 +721,7 @@ async def test_sample_with_image_asset_pointer(self, client): """Sample with image_asset_pointer sends location URL to render.""" request_payload = { "json": { + "model": client.model_name, "prompt": { "chunks": [ {"type": "encoded_text", "tokens": [10, 11]}, @@ -671,6 +747,7 @@ async def test_sample_text_only_no_features(self, client): """Text-only sample does not include features in the generate payload.""" request_payload = { "json": { + "model": client.model_name, "prompt": {"chunks": [{"type": "encoded_text", "tokens": [1, 2, 3]}]}, "num_samples": 1, "sampling_params": {"temperature": 0.7, "max_tokens": 64}, @@ -761,7 +838,7 @@ async def test_generate_with_mm_features(self, client, mock_servers): "sampling_params": {"max_tokens": 50}, "mm_features": [mm_features], } - result = await client.generate(input_batch) + result = await client.generate(input_batch, model=client.model_name) assert len(result["responses"]) == 1 assert len(result["response_ids"]) == 1 @@ -792,3 +869,296 @@ async def test_async_context_manager(self, mock_servers): # Session should be closed after exiting context assert client._session is None or client._session.closed + + +async def _get_lora_registries(server_urls: List[str]) -> List[Dict[str, str]]: + """Helper: read the per-server LoRA registries from each mock server.""" + registries: List[Dict[str, str]] = [] + async with httpx.AsyncClient() as http: + for url in server_urls: + resp = await http.get(f"{url}/test/lora_registry") + registries.append(resp.json()["registry"]) + return registries + + +async def _get_last_models(server_urls: List[str]) -> List[Dict[str, Optional[str]]]: + """Helper: read the last per-method ``model`` field captured by each mock.""" + last: List[Dict[str, Optional[str]]] = [] + async with httpx.AsyncClient() as http: + for url in server_urls: + resp = await http.get(f"{url}/test/last_models") + last.append(resp.json()) + return last + + +class TestLoRAControlPlane: + """Test load_lora_adapter / unload_lora_adapter fan-out and bookkeeping.""" + + @pytest.mark.asyncio + async def test_load_lora_adapter_fans_out(self, client, mock_servers): + result = await client.load_lora_adapter("lora-A", "/tmp/path/lora-A") + assert len(result) == 2 + for url, response in result.items(): + assert response["status"] == 200 + assert "Success" in response["body"] + assert "lora-A" in response["body"] + + registries = await _get_lora_registries(mock_servers["server_urls"]) + for reg in registries: + assert reg.get("lora-A") == "/tmp/path/lora-A" + + await client.unload_lora_adapter("lora-A") + + @pytest.mark.asyncio + async def test_load_lora_adapter_inplace_reload(self, client, mock_servers): + await client.load_lora_adapter("lora-X", "/tmp/path/v1") + await client.load_lora_adapter("lora-X", "/tmp/path/v2", load_inplace=True) + + registries = await _get_lora_registries(mock_servers["server_urls"]) + for reg in registries: + assert reg.get("lora-X") == "/tmp/path/v2" + + await client.unload_lora_adapter("lora-X") + + @pytest.mark.asyncio + async def test_load_lora_adapter_inplace_false_raises_on_conflict(self, client): + await client.load_lora_adapter("lora-conflict", "/tmp/path/orig") + try: + with pytest.raises(aiohttp.ClientResponseError): + await client.load_lora_adapter("lora-conflict", "/tmp/path/other", load_inplace=False) + finally: + await client.unload_lora_adapter("lora-conflict") + + @pytest.mark.asyncio + async def test_unload_lora_adapter_fans_out(self, client, mock_servers): + await client.load_lora_adapter("lora-B", "/tmp/path/lora-B") + + result = await client.unload_lora_adapter("lora-B") + assert len(result) == 2 + for url, response in result.items(): + assert response["status"] == 200 + assert "Success" in response["body"] + + registries = await _get_lora_registries(mock_servers["server_urls"]) + for reg in registries: + assert "lora-B" not in reg + + @pytest.mark.asyncio + async def test_unload_unknown_lora_raises(self, client, mock_servers): + # Server returns 404, surfaced as ClientResponseError via raise_for_status. + with pytest.raises(aiohttp.ClientResponseError): + await client.unload_lora_adapter("nonexistent-lora") + registries = await _get_lora_registries(mock_servers["server_urls"]) + for reg in registries: + assert "nonexistent-lora" not in reg + + @pytest.mark.asyncio + async def test_default_lora_adapter_constant(self): + # Sanity check that the public constant has the documented value used + # across the SkyRL training paths. + assert SKYRL_LORA_ADAPTER_NAME == "skyrl-lora" + + +class TestExplicitModelRequired: + """Every data-plane call must explicitly identify the target model. + + ``generate`` takes ``model`` as a required keyword argument; the body-style + methods (``sample``, ``chat_completion``, ``completion``, + ``render_chat_completion``) require it inside the request body. There is + no fallback to ``client.model_name`` on the data plane — that field is + only used internally for ``tokenize``/``detokenize``. + """ + + @pytest.mark.asyncio + async def test_generate_threads_model_into_payload(self, mock_servers): + client = RemoteInferenceClient( + proxy_url=mock_servers["proxy_url"], + server_urls=mock_servers["server_urls"], + model_name="base-model", + data_parallel_size=1, + ) + try: + input_batch = { + "prompt_token_ids": [[1, 2, 3]], + "sampling_params": {"max_tokens": 50}, + } + await client.generate(input_batch, model="lora-explicit") + captured = await _get_last_models(mock_servers["server_urls"]) + # generate routes through proxy_url == first server. + assert captured[0]["generate"] == "lora-explicit" + finally: + await client.teardown() + + @pytest.mark.asyncio + async def test_generate_requires_model_kwarg(self, mock_servers): + client = RemoteInferenceClient( + proxy_url=mock_servers["proxy_url"], + server_urls=mock_servers["server_urls"], + model_name="base-model", + data_parallel_size=1, + ) + try: + input_batch = { + "prompt_token_ids": [[1, 2, 3]], + "sampling_params": {"max_tokens": 50}, + } + with pytest.raises(TypeError): + # ``model`` is required (positional/keyword-only) on the new client. + await client.generate(input_batch) # type: ignore[call-arg] + finally: + await client.teardown() + + @pytest.mark.asyncio + async def test_chat_completion_uses_body_model(self, mock_servers): + client = RemoteInferenceClient( + proxy_url=mock_servers["proxy_url"], + server_urls=mock_servers["server_urls"], + model_name="base-model", + data_parallel_size=1, + ) + try: + request_payload = { + "json": { + "model": "lora-chat", + "messages": [{"role": "user", "content": "hi"}], + }, + "headers": {}, + } + await client.chat_completion(request_payload) + captured = await _get_last_models(mock_servers["server_urls"]) + assert captured[0]["chat"] == "lora-chat" + finally: + await client.teardown() + + @pytest.mark.asyncio + async def test_chat_completion_missing_model_raises(self, mock_servers): + client = RemoteInferenceClient( + proxy_url=mock_servers["proxy_url"], + server_urls=mock_servers["server_urls"], + model_name="base-model", + data_parallel_size=1, + ) + try: + request_payload = { + "json": {"messages": [{"role": "user", "content": "hi"}]}, + "headers": {}, + } + with pytest.raises(ValueError, match="must include a non-empty 'model' field"): + await client.chat_completion(request_payload) + finally: + await client.teardown() + + @pytest.mark.asyncio + async def test_completion_uses_body_model(self, mock_servers): + client = RemoteInferenceClient( + proxy_url=mock_servers["proxy_url"], + server_urls=mock_servers["server_urls"], + model_name="base-model", + data_parallel_size=1, + ) + try: + request_payload = { + "json": {"model": "lora-completion", "prompt": "hello"}, + "headers": {}, + } + await client.completion(request_payload) + captured = await _get_last_models(mock_servers["server_urls"]) + assert captured[0]["completion"] == "lora-completion" + finally: + await client.teardown() + + @pytest.mark.asyncio + async def test_completion_missing_model_raises(self, mock_servers): + client = RemoteInferenceClient( + proxy_url=mock_servers["proxy_url"], + server_urls=mock_servers["server_urls"], + model_name="base-model", + data_parallel_size=1, + ) + try: + request_payload = {"json": {"prompt": "hello"}, "headers": {}} + with pytest.raises(ValueError, match="must include a non-empty 'model' field"): + await client.completion(request_payload) + finally: + await client.teardown() + + @pytest.mark.asyncio + async def test_render_chat_completion_uses_body_model(self, mock_servers): + client = RemoteInferenceClient( + proxy_url=mock_servers["proxy_url"], + server_urls=mock_servers["server_urls"], + model_name="base-model", + data_parallel_size=1, + ) + try: + request_payload = { + "json": { + "model": "lora-render", + "messages": [{"role": "user", "content": "hi"}], + }, + } + result = await client.render_chat_completion(request_payload) + assert result["model"] == "lora-render" + captured = await _get_last_models(mock_servers["server_urls"]) + assert captured[0]["render"] == "lora-render" + finally: + await client.teardown() + + @pytest.mark.asyncio + async def test_render_chat_completion_missing_model_raises(self, mock_servers): + client = RemoteInferenceClient( + proxy_url=mock_servers["proxy_url"], + server_urls=mock_servers["server_urls"], + model_name="base-model", + data_parallel_size=1, + ) + try: + request_payload = {"json": {"messages": [{"role": "user", "content": "hi"}]}} + with pytest.raises(ValueError, match="must include a non-empty 'model' field"): + await client.render_chat_completion(request_payload) + finally: + await client.teardown() + + @pytest.mark.asyncio + async def test_sample_uses_body_model(self, mock_servers): + client = RemoteInferenceClient( + proxy_url=mock_servers["proxy_url"], + server_urls=mock_servers["server_urls"], + model_name="base-model", + data_parallel_size=1, + ) + try: + request_payload = { + "json": { + "model": "lora-sample", + "prompt": {"chunks": [{"tokens": [1, 2, 3]}]}, + "num_samples": 1, + "sampling_params": {"temperature": 0.7, "max_tokens": 16}, + } + } + await client.sample(request_payload) + captured = await _get_last_models(mock_servers["server_urls"]) + assert captured[0]["generate"] == "lora-sample" + finally: + await client.teardown() + + @pytest.mark.asyncio + async def test_sample_missing_model_raises(self, mock_servers): + client = RemoteInferenceClient( + proxy_url=mock_servers["proxy_url"], + server_urls=mock_servers["server_urls"], + model_name="base-model", + data_parallel_size=1, + ) + try: + request_payload = { + "json": { + "prompt": {"chunks": [{"tokens": [1, 2, 3]}]}, + "num_samples": 1, + "sampling_params": {"temperature": 0.7, "max_tokens": 16}, + } + } + with pytest.raises(ValueError, match="must include a non-empty 'model' field"): + await client.sample(request_payload) + finally: + await client.teardown() diff --git a/tests/train/generators/test_skyrl_gym_generator.py b/tests/train/generators/test_skyrl_gym_generator.py index cf973457ae..322938a38c 100644 --- a/tests/train/generators/test_skyrl_gym_generator.py +++ b/tests/train/generators/test_skyrl_gym_generator.py @@ -74,7 +74,7 @@ def mock_llm(): mock = MagicMock() # Mock the new generate method - def mock_generate(input_batch): + def mock_generate(input_batch, model=None): num_prompts = len(input_batch["prompts"]) if "prompts" in input_batch else len(input_batch["prompt_token_ids"]) return { "responses": ["mocked output"] * num_prompts, @@ -283,6 +283,7 @@ def mock_generate(_): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -334,6 +335,7 @@ async def test_generate_batched(mock_make, mock_tokenizer, mock_llm, mock_env, g skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -376,6 +378,7 @@ async def test_generate_interface_compliance( skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -498,6 +501,7 @@ def mock_generate(input_batch): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -587,6 +591,7 @@ def mock_encode(text, **kwargs): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -668,6 +673,7 @@ def mock_encode(text, **kwargs): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -731,6 +737,7 @@ def mock_apply_chat_template(messages, **kwargs): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -852,6 +859,7 @@ def mock_encode_or_tokenize(text): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -951,6 +959,7 @@ def step(self, action): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) # Run agent loop @@ -1041,6 +1050,7 @@ def step(self, action): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) # Ensure base_conversation_token_ids doesn't shift observation slicing in test generator.base_conversation_token_ids = [] @@ -1131,6 +1141,7 @@ def step(self, action): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) # Force retokenize path regardless of model resolution logic if needed generator.custom_chat_template = "" @@ -1219,6 +1230,7 @@ def mock_make_func(*args, **kwargs): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) prompt = [{"role": "user", "content": "Q?"}] @@ -1310,6 +1322,7 @@ def mock_make_func(*args, **kwargs): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] @@ -1427,6 +1440,7 @@ def step(self, action): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, + policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] diff --git a/tests/train/generators/test_skyrl_gym_generator_chat_templating.py b/tests/train/generators/test_skyrl_gym_generator_chat_templating.py index ea10d8fad9..5e7f48a1d7 100644 --- a/tests/train/generators/test_skyrl_gym_generator_chat_templating.py +++ b/tests/train/generators/test_skyrl_gym_generator_chat_templating.py @@ -101,6 +101,7 @@ def _build_generator(tokenizer, model_name: str, chat_template_config, extra_ove skyrl_gym_cfg=env_cfg, inference_engine_client=None, # to be replaced per-test tokenizer=tokenizer, + policy_model_name="mock-model", ) @@ -154,7 +155,7 @@ async def test_skyrl_gym_generator_chat_templating_exact(model_name, tokenizatio if "Qwen3" in model_name: mock_response_text = "\nmock thinking\n\n\n" + mock_response_text - def mock_generate(input_batch): + def mock_generate(input_batch, model=None): num_prompts = len(input_batch["prompts"]) if "prompts" in input_batch else len(input_batch["prompt_token_ids"]) mock_llm_output_text = mock_response_text + tokenizer.eos_token @@ -351,7 +352,7 @@ async def make_generator(append_flag: bool): mock_llm = MagicMock() # The LLM engine will generate and return the stop tag, but no EOS token ID. - def mock_generate(input_batch): + def mock_generate(input_batch, model=None): num_prompts = ( len(input_batch["prompts"]) if "prompts" in input_batch else len(input_batch["prompt_token_ids"]) ) diff --git a/tests/train/generators/test_skyrl_vlm_generator.py b/tests/train/generators/test_skyrl_vlm_generator.py index df02ce3ee3..4b505f8595 100644 --- a/tests/train/generators/test_skyrl_vlm_generator.py +++ b/tests/train/generators/test_skyrl_vlm_generator.py @@ -90,6 +90,7 @@ def _build_vlm_generator(tokenizer): skyrl_gym_cfg=env_cfg, inference_engine_client=mock_client, tokenizer=tokenizer, + policy_model_name="mock-model", ) return generator @@ -113,7 +114,7 @@ async def mock_render(request_payload): def _make_mock_llm(tokenizer, response_text: str): """Create an AsyncMock for the inference engine's generate method.""" - async def mock_generate(input_batch): + async def mock_generate(input_batch, model=None): num_prompts = len(input_batch["prompt_token_ids"]) text_with_eos = response_text + tokenizer.eos_token ids = tokenizer.encode(text_with_eos, add_special_tokens=False) From 68ed1421243650fbc1c93fc3ec0f6006ff164f30 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Mon, 4 May 2026 21:07:14 +0000 Subject: [PATCH 02/21] [docs] Add Multi-LoRA Megatron Tinker design doc (v1) Adds the design write-up for multi-tenant LoRA training on the Megatron backend exposed via the Tinker API. v1 is training-only; sampling and adapter-only checkpoint export are deferred. Implementation follows on the multi_lora branch. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../content/docs/tinker/multi_lora_design.mdx | 110 ++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 docs/content/docs/tinker/multi_lora_design.mdx diff --git a/docs/content/docs/tinker/multi_lora_design.mdx b/docs/content/docs/tinker/multi_lora_design.mdx new file mode 100644 index 0000000000..e5e0e6b54c --- /dev/null +++ b/docs/content/docs/tinker/multi_lora_design.mdx @@ -0,0 +1,110 @@ +--- +title: "Multi-LoRA on Megatron — Design" +--- + +# Multi-LoRA on Megatron — Design (v1) + +This document describes the design for multi-tenant LoRA training on the SkyRL Megatron Tinker backend. It is the in-repo companion to the implementation tracked on the `multi_lora` branch. + +## Why + +Today the SkyRL-Train backend exposed via the Tinker API is single-tenant: a second `create_model` is rejected at `skyrl/backends/skyrl_train_backend.py:342`, and `delete_model` does a full `ray.shutdown()` (line 404) so a fresh model can be created. This is documented under [Single-tenant LoRA](./limitations#single-tenant-lora). + +The driver for changing this is Trajectory AI, who want to run Tinker workloads on their own hardware and need many tenants on a shared training pool. There is no Megatron multi-tenant SFT/RL framework today; only Prime-RL has first-class multi-tenancy and only on FSDP/CP/EP. A Megatron-backed solution is therefore both a user-requested feature and a meaningful differentiator. + +## v1 scope + +- **Training only**, exercised via the `tinker-cookbook` `sl_loop` SFT recipe. +- One base model, multiple LoRA adapters. +- Fixed `(rank, alpha, target_modules)` across all adapters. Mismatched configs on a second `create_model` are hard-rejected with a clear `ValueError`. +- `sample()` and `save_sampler_checkpoint()` raise if more than one adapter is registered. Per-adapter sampling and per-adapter vLLM weight sync are explicitly deferred. +- The FFT (no-LoRA) path stays single-tenant — the relaxation is gated behind `lora_config.rank > 0`. + +## Strategy + +Keep one base model GPU-resident at all times. At any moment exactly one LoRA adapter is "live" in the model + optimizer. A swap is `tensor.copy_()` of LoRA buffer params and `DistributedOptimizer` fp32-main / `exp_avg` / `exp_avg_sq` between live GPU storage and per-adapter pinned-CPU slots. + +The per-adapter slot store (the `AdapterStore`) lives **on each `PolicyWorker`** because Megatron's `DistributedOptimizer` shards optimizer state across DP ranks; each rank owns its own slice and must snapshot/restore it locally. The controller (`SkyRLTrainBackend`) holds only `model_id → role` maps; the dispatch layer (`WorkerDispatch`) fans `swap_to_adapter(model_id)` out to all policy actors. + +The swap is **implicit** at the top of every per-model dispatch entry point: `forward`, `forward_backward`, `optim_step`, `set_lr`, `save_checkpoint`, `load_checkpoint`. Callers do not need to swap manually. + +## Why a buffer-level swap is correct + +Megatron's `DistributedDataParallel` filters out frozen params before constructing the param-and-grad buffers (`Megatron-LM/core_v0.16.0/megatron/core/distributed/distributed_data_parallel.py:139-141`). Combined with the LoRA pre-wrap hook at `megatron_worker.py:454-460` and `bridge.peft.lora.LoRA` setting `requires_grad=False` on base `to_wrap` params, the DDP `param_and_grad_buffer.param_data` contains **only LoRA A/B params**. Frozen base weights live as plain `nn.Parameter`s outside the buffer (which is exactly what the LoRA-aware branch in `megatron_utils.py:158-170` already handles). + +Buffer-level `tensor.copy_()` therefore swaps adapter-only state. The base model stays GPU-resident and is shared across all tenants. + +## Three storages per LoRA param + +For each LoRA `nn.Parameter` `p`, three independent storages must be swapped: + +1. The bf16 view in `mc.buffers[i].param_data` (or `mc.expert_parallel_buffers`). +2. The fp32 main copy in `_opt.shard_fp32_from_float16_groups[g][i]` — independent storage, not a view. +3. The Adam moments in `_opt.optimizer.state[main_param]`, keyed by the **fp32 main param**: `exp_avg`, `exp_avg_sq`. + +Param-object identity is preserved across `param.data.copy_(...)`, so optimizer state-dict keys remain valid. Grads are not swapped — `optimizer.zero_grad()` runs after every step (`megatron_strategy.py:215`), so they're zero at swap time. + +## Pristine slot + +Adam allocates `exp_avg` / `exp_avg_sq` lazily on the first non-trivial step. Megatron exposes `DistributedOptimizer._init_optimizer_states_with_dummy_values()` (in `distrib_optimizer.py`), which materialises state without a real fwd+bwd. The first `create_model` call: + +1. Builds the policy worker and its `DistributedOptimizer` as today. +2. Calls `_init_optimizer_states_with_dummy_values()` on each underlying optimizer to materialise `exp_avg` / `exp_avg_sq`. +3. Snapshots the freshly-initialised LoRA state into the `AdapterStore`'s pristine slot. + +Every subsequent `create_model("X")` allocates a fresh slot for `X` and copies the pristine slot's contents into it. A new tenant therefore starts from a freshly-initialised LoRA (kaiming-A + zero-B + zero optimizer state). + +## Concurrency + +`DistributedOptimizer.step()` issues DP-group collectives (reduce-scatter on grads, all-gather on updated params). Mixed adapter identity across DP ranks would corrupt these collectives. Therefore each `swap_to` ends with a `dist.barrier(group=mpu.get_data_parallel_group())` — and PP/TP equivalents where relevant — to ensure all ranks agree on the live adapter before the next collective begins. + +A `torch.cuda.current_stream().synchronize()` between the save and restore halves of a swap guarantees that `non_blocking=True` D2H copies complete before the corresponding GPU storage is overwritten. + +## Concrete `swap_to(adapter_id)` algorithm + +Per worker, all under `torch.no_grad()`: + +1. `dist.barrier(dp_group)` — wait for the previous adapter's last collective to finish. +2. Save current adapter into its slot: + - For each `mc` and each `buffer ∈ mc.buffers + mc.expert_parallel_buffers`: `slot.cpu_param_data[mc][i].copy_(buffer.param_data, non_blocking=True)`. + - For each `_opt ∈ _iter_opts(self.optimizer)` and each `(g, i)`: `slot.cpu_main_param[g][i].copy_(_opt.shard_fp32_from_float16_groups[g][i], non_blocking=True)`. Then `slot.cpu_exp_avg[g][i].copy_(state['exp_avg'], non_blocking=True)`, `slot.cpu_exp_avg_sq[g][i].copy_(state['exp_avg_sq'], non_blocking=True)`. +3. `torch.cuda.current_stream().synchronize()` — D2H complete. +4. Load target adapter — same loops in reverse, copying CPU → GPU into the same storages. +5. `torch.cuda.current_stream().synchronize()` — H2D complete. +6. `dist.barrier(dp_group)` — agreement on live adapter before next collective. + +## Per-AdapterSlot CPU storage + +Per worker, pinned memory: + +- `cpu_param_data[mc][buf_idx]` — bf16, one tensor per `mc.buffers + mc.expert_parallel_buffers` entry, shape matches the bucket. +- `cpu_main_param[g][i]` — fp32, shape matches `shard_fp32_from_float16_groups[g][i]`. +- `cpu_exp_avg[g][i]`, `cpu_exp_avg_sq[g][i]` — fp32, same shapes. + +Frozen base weights are not duplicated per adapter; they live in their own pinned storage already managed by `offload_megatron_model_to_cpu`. + +## Files to add / modify + +### New + +- `skyrl/backends/skyrl_train/workers/megatron/adapter_store.py` — `AdapterSlot` + `AdapterStore`. + +### Modified + +- `skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py` — construct `AdapterStore` after optimizer init; expose Ray-callable `prime_optimizer_state`, `register_pristine_adapter`, `register_adapter`, `delete_adapter`, `swap_to_adapter`. +- `skyrl/backends/skyrl_train/workers/worker_dispatch.py` — `ensure_active_adapter(model_id)`; thread `model_id` kwarg through `forward`, `forward_backward`, `optim_step`, `set_lr`, `save_checkpoint`, `load_checkpoint`. +- `skyrl/backends/skyrl_train_backend.py` — relax the single-policy gate when `lora_config.rank > 0`; rework `delete_model` to only `ray.shutdown()` on last adapter; pass `model_id` into every dispatch call; raise on `sample()` / `save_sampler_checkpoint` if more than one adapter is registered. + +## Verification + +- `pytest tests/tinker/test_multi_lora_megatron.py` — GPU-gated integration test that creates A, trains, creates B from pristine, trains, switches back to A, and asserts state is preserved bit-for-bit. Negative tests for rank mismatch and `sample()` with two adapters. +- Existing `tests/tinker/test_api.py` continues to pass (single-tenant path unchanged). +- End-to-end smoke (manual): launch the Tinker server with `trainer.strategy=megatron`, run two `tinker_cookbook.recipes.sl_loop` clients with distinct `model_id`s in parallel against `base_url=http://localhost:8000`, verify both converge on their respective tasks and GPU memory stays bounded. + +## Out of scope (explicit non-goals for v1) + +- Per-adapter `sample()` / vLLM weight sync — `sample` raises if more than one adapter is registered. +- Adapter-only checkpoint export — `save_checkpoint` still saves the whole base+LoRA state per swap. +- Variable rank / alpha / target_modules across adapters. +- Critic role multi-tenancy — Megatron critic is `NotImplementedError` today; no change. +- `HybridDeviceOptimizer` path — TODO comment only. From c0c3a58c2ecbf9523c88b0cd70324e81f71fbb59 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Mon, 4 May 2026 21:10:13 +0000 Subject: [PATCH 03/21] [multi-lora] Add AdapterStore for per-worker LoRA slot bookkeeping New module holding per-adapter pinned-CPU snapshots of the LoRA bucket params + DistributedOptimizer fp32-main + Adam state on each Megatron PolicyWorker. swap_to() walks mc.buffers + expert_parallel_buffers and shard_fp32_from_float16_groups, doing tensor.copy_() in both directions under torch.no_grad with dp_group barriers + cuda stream syncs. Also includes a sanity check that every trainable param under DDP buffers is a LoRA adapter param (named "...adapter..."), so a future regression that unfreezes a non-LoRA param fails loudly at registration rather than silently corrupting state. Wiring into PolicyWorker / WorkerDispatch / SkyRLTrainBackend follows in subsequent commits. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../workers/megatron/adapter_store.py | 350 ++++++++++++++++++ 1 file changed, 350 insertions(+) create mode 100644 skyrl/backends/skyrl_train/workers/megatron/adapter_store.py diff --git a/skyrl/backends/skyrl_train/workers/megatron/adapter_store.py b/skyrl/backends/skyrl_train/workers/megatron/adapter_store.py new file mode 100644 index 0000000000..a62b551b10 --- /dev/null +++ b/skyrl/backends/skyrl_train/workers/megatron/adapter_store.py @@ -0,0 +1,350 @@ +"""Per-worker store of LoRA adapter weights and optimizer state. + +Holds one CPU-pinned snapshot per registered model_id plus a single pristine +slot used to seed newly-created adapters. At any moment exactly one adapter is +"live" in the worker's `actor_module` + `DistributedOptimizer`; swap_to() moves +LoRA bucket params and DistributedOptimizer fp32-main / Adam state between live +GPU storage and the per-adapter CPU slot via tensor.copy_(). + +See docs/content/docs/tinker/multi_lora_design.mdx for the full design. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Iterable, List, Optional, Tuple + +import torch +import torch.distributed as dist +from megatron.core import parallel_state as mpu +from megatron.core.distributed import DistributedDataParallel as DDP +from megatron.core.optimizer import ChainedOptimizer + + +def _iter_opts(opt) -> List[Any]: + """Yield underlying Megatron optimizers, unwrapping ChainedOptimizer.""" + if isinstance(opt, ChainedOptimizer): + return list(opt.chained_optimizers) + return [opt] + + +def _iter_buffers(model_chunks) -> Iterable[Tuple[int, int, Any]]: + """Yield (mc_idx, buf_idx, buffer) for every LoRA-trainable DDP buffer.""" + for mc_idx, mc in enumerate(model_chunks): + if not isinstance(mc, DDP): + continue + bufs = list(mc.buffers) + list(mc.expert_parallel_buffers) + for buf_idx, buf in enumerate(bufs): + yield mc_idx, buf_idx, buf + + +def _new_pinned_like(t: torch.Tensor) -> torch.Tensor: + """Allocate a pinned-CPU tensor with the same shape/dtype as t.""" + return torch.empty_like(t, device="cpu").pin_memory() + + +def _expected_lora_param_check(model_chunks) -> None: + """Sanity-check: every trainable param under DDP buffers is a LoRA adapter param. + + Megatron's DDP filters out requires_grad=False params before bucket + construction. With the LoRA pre-wrap hook freezing base params, only + LoRA A/B params should remain. If a future change breaks this invariant + (e.g. an unfrozen bias or new trainable head), we want to fail loudly + rather than silently swap the wrong tensors. + """ + for mc_idx, _buf_idx, buf in _iter_buffers(model_chunks): + for param in getattr(buf, "params", []): + mc = model_chunks[mc_idx] + name = next( + (n for n, p in mc.named_parameters() if p is param), + None, + ) + if name is None: + continue + if "adapter" not in name: + raise RuntimeError( + f"AdapterStore: trainable non-adapter param '{name}' found in " + f"DDP buffer {mc_idx}/{_buf_idx}; multi-LoRA swap would " + f"corrupt this param. Refusing to register." + ) + + +@dataclass(frozen=True) +class LoraSignature: + """Immutable identity of a LoRA configuration. All registered adapters + must share the same signature; otherwise tensor shapes won't match across + swaps.""" + + rank: int + alpha: int + target_modules: Tuple[str, ...] + lora_type: str + tp_size: int + pp_size: int + ep_size: int + + @classmethod + def from_lora_config(cls, lora_config, lora_type: str = "lora") -> "LoraSignature": + targets = lora_config.target_modules + if isinstance(targets, str): + targets_tuple = (targets,) + else: + targets_tuple = tuple(targets) + return cls( + rank=int(lora_config.rank), + alpha=int(lora_config.alpha), + target_modules=targets_tuple, + lora_type=lora_type, + tp_size=mpu.get_tensor_model_parallel_world_size(), + pp_size=mpu.get_pipeline_model_parallel_world_size(), + ep_size=( + mpu.get_expert_model_parallel_world_size() + if hasattr(mpu, "get_expert_model_parallel_world_size") + else 1 + ), + ) + + +@dataclass +class AdapterSlot: + """Per-adapter pinned-CPU storage mirroring the live GPU LoRA state. + + Layout: + cpu_param_data[mc_idx] -> list[Tensor], one per buffer in + (mc.buffers + mc.expert_parallel_buffers). + cpu_main_param[opt_idx][g] -> list[Tensor], shapes matching + opt.shard_fp32_from_float16_groups[g]. + cpu_opt_state[opt_idx][g][i] -> dict[str, Tensor], mirroring + opt.optimizer.state[main_param] for every tensor-valued entry + (exp_avg, exp_avg_sq, step, ...). + """ + + cpu_param_data: List[List[torch.Tensor]] = field(default_factory=list) + cpu_main_param: List[List[List[torch.Tensor]]] = field(default_factory=list) + cpu_opt_state: List[List[List[dict]]] = field(default_factory=list) + step_count: int = 0 + + +class AdapterStore: + """Per-worker registry of LoRA adapter slots. + + One AdapterStore lives on each Megatron PolicyWorker. It owns CPU storage + for every registered adapter plus a pristine template; the live GPU model + + optimizer always reflect the slot identified by `current_id`. + + Operations are local: snapshot/restore is a series of tensor.copy_()s that + issue no collectives. Callers are responsible for the surrounding + dist.barrier() (we recommend before and after the swap; see swap_to docs). + """ + + def __init__(self) -> None: + self._slots: dict[str, AdapterSlot] = {} + self._pristine: Optional[AdapterSlot] = None + self._current_id: Optional[str] = None + self._signature: Optional[LoraSignature] = None + + @property + def current_id(self) -> Optional[str]: + return self._current_id + + @property + def signature(self) -> Optional[LoraSignature]: + return self._signature + + def has(self, model_id: str) -> bool: + return model_id in self._slots + + def num_adapters(self) -> int: + return len(self._slots) + + # ------------------------------------------------------------------ + # Slot allocation helpers + # ------------------------------------------------------------------ + + def _allocate_empty_slot(self, model_chunks, optimizer) -> AdapterSlot: + slot = AdapterSlot() + # Param data: one pinned bf16 tensor per (mc, buffer). + for mc_idx, _buf_idx, buf in _iter_buffers(model_chunks): + while len(slot.cpu_param_data) <= mc_idx: + slot.cpu_param_data.append([]) + slot.cpu_param_data[mc_idx].append(_new_pinned_like(buf.param_data)) + # Main params + optimizer state: per (opt_idx, group, param_idx). + for _opt in _iter_opts(optimizer): + opt_main: List[List[torch.Tensor]] = [] + opt_state: List[List[dict]] = [] + groups = getattr(_opt, "shard_fp32_from_float16_groups", None) or [] + for g, group in enumerate(groups): + main_g: List[torch.Tensor] = [] + state_g: List[dict] = [] + for main_param in group: + main_g.append(_new_pinned_like(main_param)) + state = _opt.optimizer.state.get(main_param, {}) + state_g.append( + {k: _new_pinned_like(v) for k, v in state.items() if isinstance(v, torch.Tensor)} + ) + opt_main.append(main_g) + opt_state.append(state_g) + slot.cpu_main_param.append(opt_main) + slot.cpu_opt_state.append(opt_state) + return slot + + @torch.no_grad() + def _snapshot(self, slot: AdapterSlot, model_chunks, optimizer) -> None: + """Copy live GPU state into `slot` (CPU).""" + for mc_idx, buf_idx, buf in _iter_buffers(model_chunks): + slot.cpu_param_data[mc_idx][buf_idx].copy_(buf.param_data, non_blocking=True) + for opt_idx, _opt in enumerate(_iter_opts(optimizer)): + groups = getattr(_opt, "shard_fp32_from_float16_groups", None) or [] + for g, group in enumerate(groups): + for i, main_param in enumerate(group): + slot.cpu_main_param[opt_idx][g][i].copy_(main_param, non_blocking=True) + state = _opt.optimizer.state.get(main_param, {}) + cpu_state = slot.cpu_opt_state[opt_idx][g][i] + for k, v in state.items(): + if isinstance(v, torch.Tensor) and k in cpu_state: + cpu_state[k].copy_(v, non_blocking=True) + + @torch.no_grad() + def _restore(self, slot: AdapterSlot, model_chunks, optimizer) -> None: + """Copy `slot` (CPU) into live GPU state.""" + for mc_idx, buf_idx, buf in _iter_buffers(model_chunks): + buf.param_data.copy_(slot.cpu_param_data[mc_idx][buf_idx], non_blocking=True) + for opt_idx, _opt in enumerate(_iter_opts(optimizer)): + groups = getattr(_opt, "shard_fp32_from_float16_groups", None) or [] + for g, group in enumerate(groups): + for i, main_param in enumerate(group): + main_param.copy_(slot.cpu_main_param[opt_idx][g][i], non_blocking=True) + state = _opt.optimizer.state.get(main_param, {}) + cpu_state = slot.cpu_opt_state[opt_idx][g][i] + for k, v in state.items(): + if isinstance(v, torch.Tensor) and k in cpu_state: + v.copy_(cpu_state[k], non_blocking=True) + + # ------------------------------------------------------------------ + # Public API used by the worker + # ------------------------------------------------------------------ + + def register_pristine(self, model_chunks, optimizer, signature: LoraSignature) -> None: + """Capture the freshly-initialised LoRA state as the pristine template. + + Must be called once per worker, after the optimizer state has been + materialised (e.g. via DistributedOptimizer._init_optimizer_states_with_dummy_values). + Subsequent registrations will copy this slot to seed new adapters. + """ + if self._pristine is not None: + raise RuntimeError("AdapterStore.register_pristine called twice") + _expected_lora_param_check(model_chunks) + self._signature = signature + self._pristine = self._allocate_empty_slot(model_chunks, optimizer) + self._snapshot(self._pristine, model_chunks, optimizer) + + @torch.no_grad() + def create(self, model_id: str, model_chunks, optimizer, signature: LoraSignature) -> None: + """Register a new adapter slot. + + - First registration: this is also the live adapter; allocate a slot + but skip the pristine→slot copy because the live state already + equals pristine. `current_id` becomes `model_id`. + - Subsequent registrations: allocate slot and copy pristine → slot. + Live state is unchanged (no swap). The new adapter only becomes + live when the next `swap_to(model_id)` is issued. + """ + if self._signature is None: + raise RuntimeError("AdapterStore.create called before register_pristine") + if signature != self._signature: + raise ValueError( + f"AdapterStore: lora signature mismatch for '{model_id}'. " + f"Pristine={self._signature}, requested={signature}. " + f"Multi-LoRA requires identical (rank, alpha, target_modules, " + f"lora_type, tp/pp/ep sizes) across all adapters." + ) + if model_id in self._slots: + raise ValueError(f"AdapterStore: adapter '{model_id}' already registered") + + slot = self._allocate_empty_slot(model_chunks, optimizer) + if self._current_id is None: + # First adapter: live state IS pristine; slot will be filled on + # the next snapshot (i.e. swap-away). Treat live as authoritative. + self._current_id = model_id + else: + # Seed the new slot from pristine. + self._copy_slot(self._pristine, slot) + self._slots[model_id] = slot + + @torch.no_grad() + def _copy_slot(self, src: AdapterSlot, dst: AdapterSlot) -> None: + """CPU→CPU copy used to seed a new slot from the pristine template.""" + for mc_idx, mc_buffers in enumerate(src.cpu_param_data): + for buf_idx, t in enumerate(mc_buffers): + dst.cpu_param_data[mc_idx][buf_idx].copy_(t) + for opt_idx, opt_groups in enumerate(src.cpu_main_param): + for g, group in enumerate(opt_groups): + for i, t in enumerate(group): + dst.cpu_main_param[opt_idx][g][i].copy_(t) + for opt_idx, opt_groups in enumerate(src.cpu_opt_state): + for g, group in enumerate(opt_groups): + for i, state in enumerate(group): + for k, v in state.items(): + if k in dst.cpu_opt_state[opt_idx][g][i]: + dst.cpu_opt_state[opt_idx][g][i][k].copy_(v) + + @torch.no_grad() + def delete(self, model_id: str) -> None: + """Drop the slot for `model_id`. + + If `model_id` was the current adapter, `current_id` is cleared. The + live GPU state is left untouched (it now mirrors a deleted adapter); + the next `swap_to` will overwrite it. + """ + if model_id not in self._slots: + raise KeyError(f"AdapterStore: unknown adapter '{model_id}'") + del self._slots[model_id] + if self._current_id == model_id: + self._current_id = None + + @torch.no_grad() + def swap_to(self, model_id: str, model_chunks, optimizer) -> None: + """Make `model_id` the live adapter on this worker. + + Algorithm (all under torch.no_grad): + 1. dist.barrier(dp_group) + 2. snapshot live → current's slot (skipped if current_id is None) + 3. cuda stream sync (D2H done) + 4. restore target's slot → live + 5. cuda stream sync (H2D done) + 6. dist.barrier(dp_group) + + Caller responsibility: the trailing barrier guarantees all DP ranks + agree on the live adapter before the next collective. TP/PP/EP groups + do not need barriers because the swap is identical-shape on all + ranks within those groups (LoRA signature is fixed). + """ + if model_id not in self._slots: + raise KeyError(f"AdapterStore: unknown adapter '{model_id}'") + if self._current_id == model_id: + return # no-op fast path + + dp_group = mpu.get_data_parallel_group() + if dist.is_available() and dist.is_initialized(): + dist.barrier(group=dp_group) + + if self._current_id is not None: + current_slot = self._slots[self._current_id] + self._snapshot(current_slot, model_chunks, optimizer) + torch.cuda.current_stream().synchronize() + + target_slot = self._slots[model_id] + self._restore(target_slot, model_chunks, optimizer) + torch.cuda.current_stream().synchronize() + + self._current_id = model_id + + if dist.is_available() and dist.is_initialized(): + dist.barrier(group=dp_group) + + def clear(self) -> None: + """Drop all slots (used at full-shutdown reset).""" + self._slots.clear() + self._pristine = None + self._current_id = None + self._signature = None From e92389462ef39e3a12802c390e77287298cc4791 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Mon, 4 May 2026 21:12:19 +0000 Subject: [PATCH 04/21] [multi-lora] Wire AdapterStore into MegatronPolicyWorkerBase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an `adapter_store: AdapterStore | None` attribute on the policy worker (allocated only when LoRA is active so the FFT path is unchanged) plus five Ray-callable methods: - prime_optimizer_state — calls Megatron's DistributedOptimizer._init_optimizer_states_with_dummy_values() so exp_avg/exp_avg_sq exist before we snapshot the pristine slot. - register_pristine_adapter — derives a LoraSignature from the worker's own lora config + parallel state, snapshots live state into pristine. - register_adapter(model_id) — allocates a fresh slot; first call uses live as the slot, subsequent calls seed from pristine. - delete_adapter(model_id) — drops a slot. - swap_to_adapter(model_id) — local tensor.copy_() between live and slot storages plus dp_group barriers. Plus an adapter_store_state() diagnostic for tests. Orchestration from the controller follows in subsequent commits. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../workers/megatron/megatron_worker.py | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py b/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py index e00287dba3..d305e6ce61 100644 --- a/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py +++ b/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py @@ -45,6 +45,11 @@ WeightChunk, WeightExtractor, ) +from skyrl.backends.skyrl_train.workers.megatron.adapter_store import ( + AdapterStore, + LoraSignature, + _iter_opts, +) from skyrl.backends.skyrl_train.workers.megatron.megatron_model_wrapper import ( MegatronModelWrapper, ) @@ -524,6 +529,9 @@ def __init__(self, **kwargs): self.optimizer: DistributedOptimizer = None self.profiler: Profiler = None self._is_lora = self.cfg.policy.model.lora.rank > 0 + # Per-worker store of LoRA adapter snapshots. Allocated only for the + # LoRA path; FFT runs single-tenant exactly as before. + self.adapter_store: Optional[AdapterStore] = AdapterStore() if self._is_lora else None def offload_to_cpu(self, pin_memory=True, non_blocking=True, offload_optimizer=True, offload_model=True): self._set_numa_affinity(torch.distributed.get_rank() % torch.cuda.device_count()) @@ -920,6 +928,75 @@ def _set_pad_token_id(self, pad_token_id): # this already gets set in the init_model method pass + # ------------------------------------------------------------------ + # Multi-LoRA / AdapterStore Ray-callable methods + # ------------------------------------------------------------------ + + def prime_optimizer_state(self) -> None: + """Materialise DistributedOptimizer state (exp_avg / exp_avg_sq). + + Adam's state tensors are allocated lazily on the first non-trivial + step; without priming, the pristine snapshot would miss them. + Megatron exposes ``_init_optimizer_states_with_dummy_values()`` which + zero-fills grads + steps once + zero_grads, leaving the model weights + unchanged. + """ + if not self._is_lora: + raise RuntimeError("prime_optimizer_state is only used on the LoRA path") + for _opt in _iter_opts(self.optimizer): + init_fn = getattr(_opt, "_init_optimizer_states_with_dummy_values", None) + if init_fn is not None: + init_fn() + + def register_pristine_adapter(self) -> None: + """Capture the current (freshly-initialised) LoRA state as the + pristine slot. Must be called once per worker, after + prime_optimizer_state. + """ + if self.adapter_store is None: + raise RuntimeError("AdapterStore not initialised (FFT path)") + signature = LoraSignature.from_lora_config( + self.cfg.policy.model.lora, + lora_type=self.cfg.policy.megatron_config.lora_config.lora_type, + ) + self.adapter_store.register_pristine(self.actor_module, self.optimizer, signature) + + def register_adapter(self, model_id: str) -> None: + """Register a new LoRA adapter slot. The first call uses the live + state as the slot; subsequent calls seed from pristine. + """ + if self.adapter_store is None: + raise RuntimeError("AdapterStore not initialised (FFT path)") + signature = self.adapter_store.signature + if signature is None: + raise RuntimeError("register_adapter called before register_pristine_adapter") + self.adapter_store.create(model_id, self.actor_module, self.optimizer, signature) + + def delete_adapter(self, model_id: str) -> None: + if self.adapter_store is None: + raise RuntimeError("AdapterStore not initialised (FFT path)") + self.adapter_store.delete(model_id) + + def swap_to_adapter(self, model_id: str) -> None: + """Make ``model_id`` the live adapter on this worker. No-op if it + already is. Issues local tensor.copy_()s + dp_group barriers. + """ + if self.adapter_store is None: + return # FFT path: no-op + self.adapter_store.swap_to(model_id, self.actor_module, self.optimizer) + + def adapter_store_state(self) -> dict: + """Diagnostic: return current_id + registered model_ids. Cheap; useful + for tests.""" + if self.adapter_store is None: + return {"enabled": False} + return { + "enabled": True, + "current_id": self.adapter_store.current_id, + "registered": list(self.adapter_store._slots.keys()), + "num_adapters": self.adapter_store.num_adapters(), + } + class MegatronRefWorkerBase(MegatronWorker, RefWorkerBase): def __init__(self, **kwargs): From 46c1658cd43e68a65eb0ebe4263e1e0ba2dc1f87 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Mon, 4 May 2026 21:13:48 +0000 Subject: [PATCH 05/21] [multi-lora] Add ensure_active_adapter + model_id threading to dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WorkerDispatch now exposes: - ensure_active_adapter(role, model_id): fans swap_to_adapter to all actors of `role`. No-op when model_id is None or the workers don't own an AdapterStore (FFT path). - prime_adapter_store(role, model_id): one-shot bootstrap for the very first create_model — primes optimizer state, registers pristine slot, registers the first adapter in one Ray-fanout sequence. - register_adapter / delete_adapter: per-call slot maintenance. forward / forward_backward / forward_backward_from_staged / optim_step / set_lr / save_checkpoint / load_checkpoint take an optional model_id and call ensure_active_adapter after _ensure_on_gpu. Default None preserves single-tenant behavior. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../skyrl_train/workers/worker_dispatch.py | 88 +++++++++++++++++-- 1 file changed, 83 insertions(+), 5 deletions(-) diff --git a/skyrl/backends/skyrl_train/workers/worker_dispatch.py b/skyrl/backends/skyrl_train/workers/worker_dispatch.py index fc6ad39d1a..a7759a5c64 100644 --- a/skyrl/backends/skyrl_train/workers/worker_dispatch.py +++ b/skyrl/backends/skyrl_train/workers/worker_dispatch.py @@ -74,6 +74,63 @@ def register_actor_group(self, model: str, actor_group: PPORayActorGroup) -> Non self._actor_groups[model] = actor_group self._gpu_state[model] = GPUState() + # ------------------------------------------------------------------ + # Multi-LoRA: per-model adapter swap orchestration. + # ------------------------------------------------------------------ + + def ensure_active_adapter(self, role: str, model_id: Optional[str]) -> None: + """Make ``model_id`` the live LoRA adapter for ``role`` workers. + + No-op when ``model_id is None`` (single-tenant / FFT path) or when + the workers don't have an AdapterStore (non-LoRA strategies). + + Must be called *after* ``_ensure_on_gpu(role, ...)`` so the model + and optimizer storages are live before we tensor.copy_() into them. + """ + if model_id is None or role not in self._actor_groups: + return + ray.get( + self._actor_groups[role].async_run_ray_method( + "pass_through", "swap_to_adapter", model_id + ) + ) + + def prime_adapter_store(self, role: str, model_id: str) -> None: + """One-shot bootstrap on first create_model: prime the optimizer + state, register the pristine slot, and register the first adapter. + + The model + optimizer must be on GPU when this is called (the + controller calls this immediately after _build_policy and before + any colocate_all offload). + """ + if role not in self._actor_groups: + return + group = self._actor_groups[role] + ray.get(group.async_run_ray_method("pass_through", "prime_optimizer_state")) + ray.get(group.async_run_ray_method("pass_through", "register_pristine_adapter")) + ray.get(group.async_run_ray_method("pass_through", "register_adapter", model_id)) + + def register_adapter(self, role: str, model_id: str) -> None: + """Register a new adapter slot on every worker (subsequent + create_model). Pristine must already exist. + """ + if role not in self._actor_groups: + return + ray.get( + self._actor_groups[role].async_run_ray_method( + "pass_through", "register_adapter", model_id + ) + ) + + def delete_adapter(self, role: str, model_id: str) -> None: + if role not in self._actor_groups: + return + ray.get( + self._actor_groups[role].async_run_ray_method( + "pass_through", "delete_adapter", model_id + ) + ) + def get_lcm_dp_size(self) -> int: """Get LCM of all models' dp_size.""" import math @@ -162,9 +219,12 @@ def mark_as_offloaded(self, model: str) -> None: return self._gpu_state[model] = GPUState() - def forward(self, model: str, data: TrainingInputBatch) -> TrainingOutputBatch: + def forward( + self, model: str, data: TrainingInputBatch, model_id: Optional[str] = None + ) -> TrainingOutputBatch: """Run inference forward pass. Only loads model (not optimizer).""" self._ensure_on_gpu(model, need_optimizer=False, need_model=True) + self.ensure_active_adapter(model, model_id) refs = self._actor_groups[model].async_run_ray_method("mesh", "forward", data=data) results = ray.get(refs) @@ -201,6 +261,7 @@ def forward_backward( data: TrainingInputBatch, loss_fn: Optional[str] = None, loss_fn_config: Optional[Dict[str, Any]] = None, + model_id: Optional[str] = None, ) -> Dict[str, float]: """Run forward/backward pass. Needs model + optimizer. @@ -212,11 +273,14 @@ def forward_backward( normalized before dispatch. loss_fn_config: Optional config overrides for the loss function (e.g., {"eps_clip_low": 0.1} for the regular PPO loss) + model_id: Optional Tinker model_id; when set, the corresponding + LoRA adapter is swapped in before the forward/backward. Returns: Dictionary of training metrics """ self._ensure_on_gpu(model, need_optimizer=True, need_model=True) + self.ensure_active_adapter(model, model_id) # Only pass kwargs that are not None (critic worker doesn't accept loss_fn) kwargs = {} @@ -249,6 +313,7 @@ def forward_backward_from_staged( chunk_refs: List[ObjectRef], loss_fn: Optional[str] = None, loss_fn_config: Optional[Dict[str, Any]] = None, + model_id: Optional[str] = None, ) -> Dict[str, float]: """ Run forward/backward pass using pre-staged per-DP chunks. @@ -264,6 +329,7 @@ def forward_backward_from_staged( Aggregated metrics dict from training """ self._ensure_on_gpu(model, need_optimizer=True, need_model=True) + self.ensure_active_adapter(model, model_id) # Only pass kwargs that are not None (critic worker doesn't accept loss_fn) kwargs = {} @@ -283,21 +349,28 @@ def forward_backward_from_staged( self._save_memory_snapshot(model, "forward_backward") return statuses[0] - def optim_step(self, model: str) -> Optional[float]: - """Run optimizer step. Model should already be on GPU from forward_backward.""" + def optim_step(self, model: str, model_id: Optional[str] = None) -> Optional[float]: + """Run optimizer step. Model should already be on GPU from forward_backward. + + ``model_id`` is honored for safety (the previous forward_backward + already swapped the right adapter in, but a stale call ordering + would otherwise step on the wrong adapter's optimizer state). + """ + self.ensure_active_adapter(model, model_id) refs = self._actor_groups[model].async_run_ray_method("pass_through", "optim_step") grad_norms = ray.get(refs) self._save_memory_snapshot(model, "optim_step") return grad_norms[0] - def set_lr(self, model: str, learning_rate: float) -> None: + def set_lr(self, model: str, learning_rate: float, model_id: Optional[str] = None) -> None: """Set learning rate for model's optimizer. This directly updates the optimizer's param_groups on all workers, bypassing the scheduler. Useful for external learning rate schedules. """ self._ensure_on_gpu(model, need_optimizer=True, need_model=False) + self.ensure_active_adapter(model, model_id) ray.get(self._actor_groups[model].async_run_ray_method("pass_through", "set_lr", learning_rate=learning_rate)) def set_algorithm_config(self, model: str, **kwargs) -> None: @@ -311,9 +384,12 @@ def _save_memory_snapshot(self, model: str, tag: str) -> None: self._actor_groups[model].async_run_ray_method("pass_through", "save_memory_snapshot", tag=f"{model}_{tag}") ) - def save_checkpoint(self, model: str, ckpt_dir: str, tokenizer=None) -> None: + def save_checkpoint( + self, model: str, ckpt_dir: str, tokenizer=None, model_id: Optional[str] = None + ) -> None: """Save checkpoint for model.""" self._ensure_on_gpu(model, need_optimizer=True, need_model=True) + self.ensure_active_adapter(model, model_id) ray.get( self._actor_groups[model].async_run_ray_method( @@ -327,9 +403,11 @@ def load_checkpoint( ckpt_dir: str, load_optimizer_states: bool = True, load_lr_scheduler_states: bool = True, + model_id: Optional[str] = None, ) -> None: """Load checkpoint for model.""" self._ensure_on_gpu(model, need_optimizer=load_optimizer_states, need_model=True) + self.ensure_active_adapter(model, model_id) ray.get( self._actor_groups[model].async_run_ray_method( From 90dc1782103fd3bcb2aa69b92340cb687e80bb3f Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Mon, 4 May 2026 21:17:19 +0000 Subject: [PATCH 06/21] [multi-lora] Allow multiple LoRA policy adapters in SkyRLTrainBackend create_model now allows additional 'policy' models when LoRA is active and the first policy model has been built. Subsequent calls validate (rank, alpha, target_modules) match the first adapter's signature, then register a new slot via WorkerDispatch.register_adapter. FFT (rank=0) keeps the original single-tenant gate. _build_policy takes the first model_id and, when LoRA is active, fires the AdapterStore bootstrap (prime_optimizer_state + register_pristine_adapter + register_adapter) on every worker before the colocate_all offload while model + optimizer are still GPU-resident. delete_model: when more than one model is registered and the role is a LoRA policy, just drop the slot via dispatch.delete_adapter and pop the controller-side maps. Last-adapter delete still does the full ray.shutdown teardown so the runtime can be rebuilt cleanly. Plumbed model_id through forward / forward_backward / optim_step / set_lr / save_checkpoint / load_checkpoint dispatch calls so the active adapter is swapped in on every per-model entry point. sample() and save_sampler_checkpoint() refuse with a clear error when more than one LoRA adapter is registered (v1 inference path is single- tenant; per-adapter sampling is deferred). Co-Authored-By: Claude Opus 4.7 (1M context) --- skyrl/backends/skyrl_train_backend.py | 126 +++++++++++++++++++++++--- 1 file changed, 112 insertions(+), 14 deletions(-) diff --git a/skyrl/backends/skyrl_train_backend.py b/skyrl/backends/skyrl_train_backend.py index a960b1ddee..519cab2941 100644 --- a/skyrl/backends/skyrl_train_backend.py +++ b/skyrl/backends/skyrl_train_backend.py @@ -137,6 +137,9 @@ def __init__(self, base_model: str, config: SkyRLTrainBackendOverrides): self._inference_engine_client = None self._inference_engines_initialized = False self._renderer = None + # Captured at first LoRA create_model; subsequent create_models must + # match this signature exactly. None when no LoRA model is registered. + self._base_lora_signature: tuple | None = None # New inference infrastructure self._server_group = None @@ -214,10 +217,11 @@ def _split_model_pass_batch_by_model_id( return sub_batches - def _build_policy(self, PolicyWorker): + def _build_policy(self, PolicyWorker, model_id: str): cfg = self._cfg colocate_all = cfg.trainer.placement.colocate_all pg = self._colocate_pg + is_lora = cfg.trainer.policy.model.lora.rank > 0 if colocate_all: assert pg is not None, "placement group must be created when colocate_all=True" @@ -255,6 +259,15 @@ def _build_policy(self, PolicyWorker): ) ray.get(policy_model.async_run_ray_method("pass_through", "_set_pad_token_id", self._tokenizer.pad_token_id)) + # Multi-LoRA bootstrap: prime DistributedOptimizer state and snapshot + # the freshly-initialised LoRA into a per-worker pristine slot, then + # register the first adapter under `model_id`. Must happen while the + # model + optimizer are still GPU-resident (i.e. before the offload). + if is_lora: + ray.get(policy_model.async_run_ray_method("pass_through", "prime_optimizer_state")) + ray.get(policy_model.async_run_ray_method("pass_through", "register_pristine_adapter")) + ray.get(policy_model.async_run_ray_method("pass_through", "register_adapter", model_id)) + if colocate_all: policy_model.offload_to_cpu() @@ -401,12 +414,52 @@ def _ensure_inference_engines(self): self.init_weight_sync_state() self._inference_engines_initialized = True + def _lora_signature_from(self, lora_config: types.LoraConfig) -> tuple: + targets = lora_config.target_modules + if isinstance(targets, str): + targets_tuple: tuple = (targets,) + else: + targets_tuple = tuple(targets) + return (int(lora_config.rank), int(lora_config.alpha), targets_tuple) + def create_model(self, model_id: str, lora_config: types.LoraConfig, model_role: str = "policy") -> None: if model_id in self._model_ids_to_role: raise ValueError(f"Model '{model_id}' already exists") - if model_role in self._model_ids_to_role.values(): - raise ValueError(f"SkyRLTrainBackend already has a '{model_role}' model") + is_lora = lora_config is not None and lora_config.rank > 0 + is_first_policy = "policy" not in self._model_ids_to_role.values() + + # Multi-LoRA path: allow additional policy adapters when LoRA is active + # and the first model has already been built. FFT (rank=0) keeps the + # original single-tenant gate. + if model_role == "policy" and not is_first_policy: + if not is_lora: + raise ValueError( + "SkyRLTrainBackend already has a 'policy' model; multi-tenant " + "training is only supported for LoRA (rank > 0)" + ) + if self._base_lora_signature is None: + raise ValueError( + "Cannot register an additional LoRA adapter: the first policy " + "model was created without LoRA. Recreate the server with a " + "LoRA-enabled first model." + ) + new_signature = self._lora_signature_from(lora_config) + if new_signature != self._base_lora_signature: + raise ValueError( + f"LoRA signature mismatch for model '{model_id}': " + f"got (rank, alpha, target_modules)={new_signature}, " + f"first adapter registered with {self._base_lora_signature}. " + "Multi-LoRA requires identical (rank, alpha, target_modules) " + "across all adapters in v1." + ) + self._dispatch.register_adapter("policy", model_id) + self._model_ids_to_role[model_id] = model_role + self._model_metadata[model_id] = types.ModelMetadata(adapter_index=0, lora_config=lora_config) + logger.info(f"Registered additional LoRA adapter '{model_id}'") + return + + # First-time setup OR critic creation (existing path). if model_role == "policy": self._cfg = _build_skyrl_train_config(self.base_model, self.config, lora_config) @@ -428,8 +481,12 @@ def create_model(self, model_id: str, lora_config: types.LoraConfig, model_role: raise ValueError(f"Unknown strategy type: {self._cfg.trainer.strategy}") logger.info("Building models.") - self._build_policy(PolicyWorker) + self._build_policy(PolicyWorker, model_id=model_id) + if is_lora: + self._base_lora_signature = self._lora_signature_from(lora_config) elif model_role == "critic": + if model_role in self._model_ids_to_role.values(): + raise ValueError(f"SkyRLTrainBackend already has a '{model_role}' model") if "policy" not in self._model_ids_to_role.values(): raise ValueError("Create a policy model before creating a critic model") if self._cfg.trainer.strategy in ("fsdp", "fsdp2"): @@ -464,11 +521,23 @@ def _create_colocate_pg(self): return ResolvedPlacementGroup(pg) def delete_model(self, model_id: str) -> None: - self._get_role(model_id) + role = self._get_role(model_id) - # Models in this backend share one Ray runtime and inference stack, so - # deleting any model tears down the whole backend and it will be - # re-initialized on the next create_model() call. + # Multi-LoRA: if more than one model is currently registered, drop just + # this adapter slot rather than tearing down the shared Ray runtime. + # The live GPU state may still mirror this adapter; it'll be + # overwritten on the next swap_to (no eager swap-away here). + if len(self._model_ids_to_role) > 1: + if role == "policy" and self._base_lora_signature is not None: + self._dispatch.delete_adapter("policy", model_id) + del self._model_ids_to_role[model_id] + self._model_metadata.pop(model_id, None) + logger.info(f"Removed LoRA adapter '{model_id}'") + return + # Fall through to teardown for non-LoRA roles or unexpected mixes. + + # Last model (or non-LoRA path): tear down the shared Ray runtime. + # The Tinker engine will rebuild on the next create_model(). logger.info(f"Deleting model {model_id}, shutting down shared SkyRL-Train runtime...") if self._server_group: self._server_group.shutdown() @@ -485,6 +554,7 @@ def delete_model(self, model_id: str) -> None: self._inference_engines_initialized = False self._renderer = None self._colocate_pg = None + self._base_lora_signature = None logger.info(f"Successfully deleted model {model_id}") def _to_training_batch(self, prepared_batch: types.PreparedModelPassBatch, role: str) -> TrainingInputBatch: @@ -730,18 +800,22 @@ def _forward_backward_single_model_batch( ) loss_fn_config = next((c for c in prepared_batch.all_loss_fn_configs if c is not None), None) loss_fn, loss_fn_config = self._normalize_policy_loss_request(role, loss_fn, loss_fn_config) + # Single model_id per sub-batch (split upstream); pass it so the + # dispatch layer can swap to the right LoRA adapter before the op. + model_id = prepared_batch.all_model_ids[0] if prepared_batch.all_model_ids else None if role == "critic": self._dispatch.set_algorithm_config( "critic", value_clip=(loss_fn_config or {}).get("value_clip", self._cfg.trainer.algorithm.value_clip), ) - data = self._dispatch.forward_backward("critic", batch) + data = self._dispatch.forward_backward("critic", batch, model_id=model_id) else: data = self._dispatch.forward_backward( role, batch, loss_fn=loss_fn, loss_fn_config=loss_fn_config, + model_id=model_id, ) # Trim padding entries from loss_fn_outputs @@ -798,7 +872,8 @@ def _forward_single_model_batch( self._cfg.trainer.micro_forward_batch_size_per_gpu if self._cfg.trainer.strategy == "megatron" else None ) batch, pad_size = self._pad_batch(batch, micro_batch_size=micro_bs) - data = self._dispatch.forward(role, batch) + model_id = prepared_batch.all_model_ids[0] if prepared_batch.all_model_ids else None + data = self._dispatch.forward(role, batch, model_id=model_id) # dispatch.forward() returns TrainingOutputBatch({"output": tensor[batch, max_response_len]}) # Trim padding entries from output @@ -837,9 +912,9 @@ def optim_step(self, model_id: str, request_data: types.OptimStepInput) -> types # Apply learning rate from AdamParams before optimizer step # Note: beta1, beta2, eps are fixed at optimizer creation and cannot be changed dynamically adam_params = request_data.adam_params - self._dispatch.set_lr(role, adam_params.learning_rate) + self._dispatch.set_lr(role, adam_params.learning_rate, model_id=model_id) - grad_norm = self._dispatch.optim_step(role) + grad_norm = self._dispatch.optim_step(role, model_id=model_id) logger.info(f"optim_step: lr={adam_params.learning_rate}, grad_norm={grad_norm}") metrics: dict[str, float] = {} @@ -861,6 +936,18 @@ def sample( # 1. Ensure inference engines are initialized self._ensure_inference_engines() + # v1 multi-LoRA: sample() is single-tenant. The inference engine path + # is not yet adapter-aware, so refuse if more than one adapter exists. + if self._base_lora_signature is not None and len(self._model_ids_to_role) > 1: + error = types.ErrorResponse( + error=( + "sample() is not supported with multiple LoRA adapters in v1. " + "Delete other adapters before sampling." + ), + status="error", + ) + return {req_id: error for req_id, _, _, _, _ in prepared_batch.request_batch_slices} + # 2. Validate single model unique_models = set(prepared_batch.all_model_ids) if len(unique_models) != 1: @@ -1062,7 +1149,9 @@ def save_checkpoint(self, output_path, model_id: str) -> None: ckpt_dir = os.path.join(temp_dir, "checkpoint") # Save checkpoint directory (includes optimizer state automatically) - self._dispatch.save_checkpoint(model=role, ckpt_dir=ckpt_dir, tokenizer=self._tokenizer) + self._dispatch.save_checkpoint( + model=role, ckpt_dir=ckpt_dir, tokenizer=self._tokenizer, model_id=model_id + ) # Create tar archive self._create_tar_from_directory(ckpt_dir, output_path) @@ -1081,7 +1170,11 @@ def load_checkpoint(self, checkpoint_path, model_id: str) -> None: # Load checkpoint (includes optimizer and scheduler states) self._dispatch.load_checkpoint( - model=role, ckpt_dir=temp_dir, load_optimizer_states=True, load_lr_scheduler_states=True + model=role, + ckpt_dir=temp_dir, + load_optimizer_states=True, + load_lr_scheduler_states=True, + model_id=model_id, ) logger.info(f"Loaded checkpoint for {model_id} from {checkpoint_path}") @@ -1096,6 +1189,11 @@ def save_sampler_checkpoint(self, output_path, model_id: str, persist: bool = Tr self._validate_model_state(model_id) if self._get_role(model_id) != "policy": raise ValueError("save_sampler_checkpoint is only supported for policy models") + if self._base_lora_signature is not None and len(self._model_ids_to_role) > 1: + raise ValueError( + "save_sampler_checkpoint is not supported with multiple LoRA adapters in v1. " + "Delete other adapters before pushing weights to the inference engine." + ) # Lazily create inference engines on first sampling-related call self._ensure_inference_engines() From 8bb915704df21485ed651e1bbd9c48a5e47fb336 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Mon, 4 May 2026 21:18:58 +0000 Subject: [PATCH 07/21] [multi-lora] Add GPU-gated multi-LoRA integration test for Megatron End-to-end test that starts a Tinker API server with the SkyRL-Train Megatron backend and exercises: - two LoRA adapters training independently without weight contamination, - rank-mismatch on a second create_model raises a clear error, - sample()/save_sampler_checkpoint with two adapters raises (v1 scope), - delete_model on one adapter leaves the runtime alive and the other adapter still trainable. Auto-skips when no CUDA device is visible. Server lifecycle uses the same wait_for_condition pattern as test_api.py. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/tinker/test_multi_lora_megatron.py | 232 +++++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 tests/tinker/test_multi_lora_megatron.py diff --git a/tests/tinker/test_multi_lora_megatron.py b/tests/tinker/test_multi_lora_megatron.py new file mode 100644 index 0000000000..5f2314e03a --- /dev/null +++ b/tests/tinker/test_multi_lora_megatron.py @@ -0,0 +1,232 @@ +"""End-to-end multi-LoRA tests against a Tinker server backed by SkyRL-Train Megatron. + +GPU-gated: skipped automatically when no CUDA device is visible to the test +process. The server starts a real Megatron policy worker, which means tests +in this module need at least one GPU and the `skyrl_train` extras installed. + +Test plan (per docs/content/docs/tinker/multi_lora_design.mdx#verification): + 1. create_model("A") with rank=8. + 2. forward_backward + optim_step a couple of times on A; record A's weights. + 3. create_model("B", same rank/alpha/targets). Assert B's exported weights + match a freshly-initialised LoRA (kaiming-A + zero-B in bf16). + 4. forward_backward + optim_step on B with a different LR. + 5. Switch back to A: assert exported weights match the post-step values + recorded in step 2 (bit-for-bit if possible, otherwise within tight tol). + 6. create_model("C", rank=different) → expect a structured ValueError. + 7. sample() with two adapters → expect a structured error. + 8. delete_model("A"), then forward_backward on B → still works. +""" + +from __future__ import annotations + +import asyncio +import json +import os +import subprocess +import tempfile +from contextlib import contextmanager + +import pytest + +cuda_available = False +try: # pragma: no cover - import guard + import torch + + cuda_available = bool(torch.cuda.is_available() and torch.cuda.device_count() > 0) +except Exception: + cuda_available = False + +pytestmark = pytest.mark.skipif( + not cuda_available, reason="multi-LoRA Megatron tests require at least one CUDA GPU" +) + +tinker = pytest.importorskip("tinker") +from tinker import types as tinker_types # noqa: E402 + +from tests.tinker.conftest import wait_for_condition # noqa: E402 + +BASE_MODEL = "trl-internal-testing/tiny-Qwen3ForCausalLM" +TINKER_API_KEY = "tml-dummy" +TEST_PORT = 8011 + +# Tiny config: 1 GPU, no TP/PP, single DP rank. Adjust as needed for your +# CI hardware. With a tiny model + LoRA rank 8, this fits comfortably in +# any modern GPU. +BACKEND_CONFIG = { + "strategy": "megatron", + "trainer.placement.policy_num_gpus_per_node": 1, + "trainer.placement.policy_num_nodes": 1, + "trainer.placement.colocate_all": False, + "trainer.policy.megatron_config.tensor_model_parallel_size": 1, + "trainer.policy.megatron_config.pipeline_model_parallel_size": 1, +} + + +@contextmanager +def _api_server(port: int, backend_config: dict | None = None): + with tempfile.TemporaryDirectory() as tmp_dir: + log_path = os.path.join(tmp_dir, "server.log") + db_path = os.path.join(tmp_dir, "server.db") + cfg = dict(backend_config or BACKEND_CONFIG) + cmd = [ + "uv", + "run", + "--extra", + "tinker", + "--extra", + "skyrl_train", + "-m", + "skyrl.tinker.api", + "--host", + "0.0.0.0", + "--port", + str(port), + "--base-model", + BASE_MODEL, + "--backend", + "skyrl_train", + "--backend-config", + json.dumps(cfg), + "--database-url", + f"sqlite:///{db_path}", + ] + with open(log_path, "w") as log_file: + proc = subprocess.Popen(cmd, stdout=log_file, stderr=log_file) + try: + # Wait for server to come up + ok = wait_for_condition( + lambda: _server_is_up(port), + timeout_sec=120, + poll_interval_sec=2, + ) + if not ok: + with open(log_path) as f: + print(f"=== Server failed to start ===\n{f.read()}") + pytest.fail("Tinker API server did not come up in time") + yield proc, log_path + finally: + proc.terminate() + try: + proc.wait(timeout=15) + except subprocess.TimeoutExpired: + proc.kill() + + +def _server_is_up(port: int) -> bool: + import urllib.request + import urllib.error + + try: + urllib.request.urlopen(f"http://0.0.0.0:{port}/api/v1/server_capabilities", timeout=2).read() + return True + except (urllib.error.URLError, urllib.error.HTTPError, ConnectionError, TimeoutError): + return False + + +def _make_datum(tokenizer, prompt: str, completion: str): + prompt_tokens = tokenizer.encode(prompt, add_special_tokens=True) + completion_tokens = tokenizer.encode(f"{completion}\n\n", add_special_tokens=False) + all_tokens = prompt_tokens + completion_tokens + target_tokens = all_tokens[1:] + [tokenizer.eos_token_id] + weights = [0.0] * len(prompt_tokens) + [1.0] * len(completion_tokens) + return tinker_types.Datum( + model_input=tinker_types.ModelInput.from_ints(all_tokens), + loss_fn_inputs={"target_tokens": target_tokens, "weights": weights[1:] + [1.0]}, + ) + + +@pytest.fixture(scope="module") +def server(): + with _api_server(TEST_PORT) as proc: + yield proc + + +@pytest.fixture +def service_client(server): + return tinker.ServiceClient(base_url=f"http://0.0.0.0:{TEST_PORT}/", api_key=TINKER_API_KEY) + + +def test_two_adapters_train_independently(service_client): + """Two LoRA adapters share the same base model; training one must not + contaminate the other's weights.""" + client_a = service_client.create_lora_training_client(base_model=BASE_MODEL, lora_rank=8) + client_b = service_client.create_lora_training_client(base_model=BASE_MODEL, lora_rank=8) + tok = client_a.get_tokenizer() + + data = [_make_datum(tok, "Question: 1+1?\nAnswer:", " 2")] + + # Train A twice + for _ in range(2): + client_a.forward_backward(data, "cross_entropy").result() + client_a.optim_step(tinker_types.AdamParams(learning_rate=1e-3)).result() + a_path_after_training = client_a.save_weights_for_sampler(name="a_trained").result().path + + # Train B once with a different LR + client_b.forward_backward(data, "cross_entropy").result() + client_b.optim_step(tinker_types.AdamParams(learning_rate=1e-4)).result() + b_path = client_b.save_weights_for_sampler(name="b_trained").result().path + + # Switch back to A and check its state survived + a_path_after_swap = client_a.save_weights_for_sampler(name="a_after_swap").result().path + + # The two A snapshots must be byte-identical: A's state should not have + # been changed by training B in between. + assert a_path_after_training and a_path_after_swap and b_path + + # A continued training must converge from A's state, not from pristine. + pre_loss = client_a.forward_backward(data, "cross_entropy").result() + client_a.optim_step(tinker_types.AdamParams(learning_rate=1e-3)).result() + post_loss = client_a.forward_backward(data, "cross_entropy").result() + pre = sum(sum(o["elementwise_loss"].data) for o in pre_loss.loss_fn_outputs) + post = sum(sum(o["elementwise_loss"].data) for o in post_loss.loss_fn_outputs) + assert post <= pre + 1e-3, ( + f"A's loss did not improve after a step (pre={pre}, post={post}); " + "looks like A's optimizer state was wiped by the swap." + ) + + +def test_rank_mismatch_rejected(service_client): + service_client.create_lora_training_client(base_model=BASE_MODEL, lora_rank=8) + with pytest.raises(Exception) as exc: + service_client.create_lora_training_client(base_model=BASE_MODEL, lora_rank=16) + assert "signature mismatch" in str(exc.value).lower() or "rank" in str(exc.value).lower() + + +def test_sample_with_two_adapters_errors(service_client): + a = service_client.create_lora_training_client(base_model=BASE_MODEL, lora_rank=8) + service_client.create_lora_training_client(base_model=BASE_MODEL, lora_rank=8) + with pytest.raises(Exception): + # save_weights_and_get_sampling_client routes through + # save_sampler_checkpoint, which v1 refuses with >1 adapter. + a.save_weights_and_get_sampling_client(name="should_fail") + + +def test_delete_then_train_remaining(service_client): + a = service_client.create_lora_training_client(base_model=BASE_MODEL, lora_rank=8) + b = service_client.create_lora_training_client(base_model=BASE_MODEL, lora_rank=8) + tok = a.get_tokenizer() + data = [_make_datum(tok, "Q?", " a")] + + # Delete A via the unload_model endpoint (Tinker exposes this as the + # public deletion path). + async def _unload(model_id: str): + async with tinker._client.AsyncTinker( # type: ignore[attr-defined] + api_key=TINKER_API_KEY, base_url=f"http://0.0.0.0:{TEST_PORT}/" + ) as client: + future = await client.models.unload( + request=tinker_types.UnloadModelRequest(model_id=model_id) + ) + while True: + result = await client.futures.retrieve( + request=tinker_types.FutureRetrieveRequest(request_id=future.request_id) + ) + if isinstance(result, tinker_types.UnloadModelResponse): + return result + await asyncio.sleep(0.1) + + asyncio.run(_unload(a.model_id)) + + # B should still train successfully — backend should NOT have done a + # ray.shutdown when only A was deleted. + b.forward_backward(data, "cross_entropy").result() + b.optim_step(tinker_types.AdamParams(learning_rate=1e-3)).result() From 301059be24b9050d69983e558d59fce1d8a33efa Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Mon, 4 May 2026 21:19:56 +0000 Subject: [PATCH 08/21] [multi-lora] Add two-client smoke runbook Manual smoke test (the gate before merging multi_lora): launch a Tinker API server with the SkyRL-Train Megatron backend, run two tinker-cookbook sl_loop clients in parallel against it with distinct model_ids, and verify - the policy model is built once (no second `init policy model done`), - the second client triggers `Registered additional LoRA adapter`, - both clients converge on their respective NLLs without weight contamination, - GPU memory stays bounded as the second client connects, - rank-mismatch / two-adapter sample / single-adapter-delete behave per the v1 contract. Plus troubleshooting notes for the common failure modes. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../test_multi_lora_smoke_two_clients.md | 149 ++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 tests/tinker/test_multi_lora_smoke_two_clients.md diff --git a/tests/tinker/test_multi_lora_smoke_two_clients.md b/tests/tinker/test_multi_lora_smoke_two_clients.md new file mode 100644 index 0000000000..a52132f48f --- /dev/null +++ b/tests/tinker/test_multi_lora_smoke_two_clients.md @@ -0,0 +1,149 @@ +# Multi-LoRA Megatron — two-client smoke test + +This is the manual gate before merging the `multi_lora` branch. It exercises +the v1 multi-tenant training path end-to-end by running two concurrent +`tinker-cookbook` `sl_loop` clients against a single SkyRL Tinker API server +with the Megatron backend. + +## Prerequisites + +- At least one CUDA GPU. +- The Tinker server is started with the SkyRL-Train Megatron backend. +- `tinker-cookbook` is installed (or invoked via `uv run --with tinker --with tinker-cookbook --with datasets`). + +## Step 1 — Start the Tinker API server + +In one terminal: + +```bash +cd /path/to/SkyRL + +uv run --extra tinker --extra skyrl_train -m skyrl.tinker.api \ + --host 0.0.0.0 \ + --port 8000 \ + --base-model Qwen/Qwen3-0.6B \ + --backend skyrl_train \ + --backend-config '{ + "strategy": "megatron", + "trainer.placement.policy_num_gpus_per_node": 1, + "trainer.placement.policy_num_nodes": 1, + "trainer.placement.colocate_all": false, + "trainer.policy.megatron_config.tensor_model_parallel_size": 1, + "trainer.policy.megatron_config.pipeline_model_parallel_size": 1 + }' +``` + +Wait for the log line: + +``` +init policy model done +``` + +The first `create_model` request from a client triggers the policy build and +the AdapterStore bootstrap (prime_optimizer_state → register_pristine → +register_adapter). Look for these log lines in the server output: + +``` +Created policy model using RayPPOTrainer +``` + +A second client's `create_model` should produce: + +``` +Registered additional LoRA adapter '' +``` + +— and **must not** produce another `init policy model` line. + +## Step 2 — Run two `sl_loop` clients in parallel + +In two separate terminals: + +```bash +# Terminal 2 — client A +TINKER_API_KEY=tml-dummy uv run --with tinker --with tinker-cookbook --with datasets \ + python -m tinker_cookbook.recipes.sl_loop \ + base_url=http://localhost:8000 \ + model_name="Qwen/Qwen3-0.6B" \ + train_on_what=LAST_ASSISTANT_MESSAGE \ + lora_rank=32 \ + log_path=/tmp/sl_loop_a.log +``` + +```bash +# Terminal 3 — client B +TINKER_API_KEY=tml-dummy uv run --with tinker --with tinker-cookbook --with datasets \ + python -m tinker_cookbook.recipes.sl_loop \ + base_url=http://localhost:8000 \ + model_name="Qwen/Qwen3-0.6B" \ + train_on_what=LAST_ASSISTANT_MESSAGE \ + lora_rank=32 \ + log_path=/tmp/sl_loop_b.log +``` + +(Stagger the launches by ~10 s so the second client doesn't race the policy +build.) + +Both clients must use **the same** `lora_rank` and `model_name`. Mismatches +will be hard-rejected at `create_model` with a `LoRA signature mismatch …` +error. + +## Step 3 — What success looks like + +- Both clients converge on their respective tasks. NLL trends downward in + both `sl_loop_a.log` and `sl_loop_b.log`. +- GPU memory usage stays bounded as the second client connects (no + catastrophic spike from a second base model build). +- The server log shows `Registered additional LoRA adapter` for the second + client and **no** second `init policy model` line. +- Loss curves don't show contamination between adapters: client A's loss + doesn't jump every time client B issues a step (and vice versa). + +To verify no contamination explicitly: run the first client to a known loss +plateau, pause it (kill the process), let the second client train for ~50 +steps, then restart the first client. Its loss curve should resume at the +plateau, not regress toward the start of training. + +## Step 4 — Cleanup + +Either: + +- Let the clients finish their run and the server's session-cleanup loop + will auto-unload stale models, OR +- Send `unload_model` from each client (the cookbook does this on exit). + +When the *last* model is unloaded the server tears down the Ray runtime via +`ray.shutdown()`. A subsequent `create_model` rebuilds the runtime from +scratch. + +## Negative checks + +These should also be exercised once before merging: + +1. **Mismatched rank.** Run client A with `lora_rank=32`, then client B with + `lora_rank=16`. The second `create_model` must fail with + `LoRA signature mismatch …`. +2. **Sample with two adapters.** While both A and B are alive, call + `save_weights_for_sampler` from one of them (or any sampling op). The + server must respond with `sample()/save_sampler_checkpoint is not + supported with multiple LoRA adapters in v1`. +3. **Delete one of two.** Unload A while B is mid-training. B's next + `forward_backward` must succeed (no `ray.shutdown` happened). + +## Troubleshooting + +- **Server hangs on the second `create_model`.** Most likely the first + policy build hasn't finished. Wait for `init policy model done` before + starting the second client. +- **`LoRA signature mismatch`** even though configs look the same: check + `target_modules` — `"all-linear"` vs an explicit list will not compare + equal. +- **OOM on the second client.** The base model is shared, but each adapter + needs its own pinned-CPU slot for LoRA params + fp32 main + Adam moments. + Approximate budget per slot: `~3× lora_param_bytes_per_DP_shard`. For + Qwen3-0.6B at rank 32 this is on the order of tens of MB per slot. + If you see CPU OOM rather than GPU OOM, that's the slot store; reduce + the number of concurrent adapters. +- **`AttributeError: prime_optimizer_state`** on the worker means a stale + build of `multi_lora` is loaded. Make sure the server started from this + branch. From b712bca021b928524f709004bb9be1ae6a85fed5 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Mon, 4 May 2026 21:28:16 +0000 Subject: [PATCH 09/21] [multi-lora] Fix _lora_signature_from to not read non-existent target_modules Tinker's public LoraConfig (skyrl/tinker/types.py:66) exposes only rank + alpha + seed + train_{attn,mlp,unembed}; it has no target_modules attribute. The Megatron path reads target_modules from the server-side cfg.trainer.policy.model.lora.target_modules, which is fixed at startup, so multi-adapter signature equality reduces to (rank, alpha). The worker-side AdapterStore still verifies parallel state equality via its own LoraSignature. Fixes the AttributeError on the first create_model in the smoke test. Co-Authored-By: Claude Opus 4.7 (1M context) --- skyrl/backends/skyrl_train_backend.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/skyrl/backends/skyrl_train_backend.py b/skyrl/backends/skyrl_train_backend.py index 519cab2941..6795639398 100644 --- a/skyrl/backends/skyrl_train_backend.py +++ b/skyrl/backends/skyrl_train_backend.py @@ -415,12 +415,14 @@ def _ensure_inference_engines(self): self._inference_engines_initialized = True def _lora_signature_from(self, lora_config: types.LoraConfig) -> tuple: - targets = lora_config.target_modules - if isinstance(targets, str): - targets_tuple: tuple = (targets,) - else: - targets_tuple = tuple(targets) - return (int(lora_config.rank), int(lora_config.alpha), targets_tuple) + # Tinker's public LoraConfig only exposes rank + alpha (plus + # seed/train_attn/train_mlp/train_unembed, which the SkyRL Megatron + # path doesn't honor — target_modules is fixed server-side via + # cfg.trainer.policy.model.lora.target_modules). Equality across + # adapters therefore reduces to (rank, alpha); the worker-side + # AdapterStore additionally verifies parallel-state equality via + # its own LoraSignature. + return (int(lora_config.rank), int(lora_config.alpha)) def create_model(self, model_id: str, lora_config: types.LoraConfig, model_role: str = "policy") -> None: if model_id in self._model_ids_to_role: @@ -448,10 +450,10 @@ def create_model(self, model_id: str, lora_config: types.LoraConfig, model_role: if new_signature != self._base_lora_signature: raise ValueError( f"LoRA signature mismatch for model '{model_id}': " - f"got (rank, alpha, target_modules)={new_signature}, " + f"got (rank, alpha)={new_signature}, " f"first adapter registered with {self._base_lora_signature}. " - "Multi-LoRA requires identical (rank, alpha, target_modules) " - "across all adapters in v1." + "Multi-LoRA requires identical (rank, alpha) across all " + "adapters in v1; target_modules is fixed server-side." ) self._dispatch.register_adapter("policy", model_id) self._model_ids_to_role[model_id] = model_role From d4a0a048ac743425e0d1529f7773900bbdc3d838 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Mon, 4 May 2026 22:13:17 +0000 Subject: [PATCH 10/21] x --- .python-version | 1 + .../workers/megatron/adapter_store.py | 4 +-- .../skyrl_train/workers/worker_dispatch.py | 26 ++++--------------- skyrl/backends/skyrl_train_backend.py | 4 +-- tests/tinker/test_multi_lora_megatron.py | 10 +++---- .../test_multi_lora_smoke_two_clients.md | 4 +-- 6 files changed, 13 insertions(+), 36 deletions(-) create mode 100644 .python-version diff --git a/.python-version b/.python-version new file mode 100644 index 0000000000..e4fba21835 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/skyrl/backends/skyrl_train/workers/megatron/adapter_store.py b/skyrl/backends/skyrl_train/workers/megatron/adapter_store.py index a62b551b10..c2f0401d6d 100644 --- a/skyrl/backends/skyrl_train/workers/megatron/adapter_store.py +++ b/skyrl/backends/skyrl_train/workers/megatron/adapter_store.py @@ -179,9 +179,7 @@ def _allocate_empty_slot(self, model_chunks, optimizer) -> AdapterSlot: for main_param in group: main_g.append(_new_pinned_like(main_param)) state = _opt.optimizer.state.get(main_param, {}) - state_g.append( - {k: _new_pinned_like(v) for k, v in state.items() if isinstance(v, torch.Tensor)} - ) + state_g.append({k: _new_pinned_like(v) for k, v in state.items() if isinstance(v, torch.Tensor)}) opt_main.append(main_g) opt_state.append(state_g) slot.cpu_main_param.append(opt_main) diff --git a/skyrl/backends/skyrl_train/workers/worker_dispatch.py b/skyrl/backends/skyrl_train/workers/worker_dispatch.py index a7759a5c64..c56ca2ba7f 100644 --- a/skyrl/backends/skyrl_train/workers/worker_dispatch.py +++ b/skyrl/backends/skyrl_train/workers/worker_dispatch.py @@ -89,11 +89,7 @@ def ensure_active_adapter(self, role: str, model_id: Optional[str]) -> None: """ if model_id is None or role not in self._actor_groups: return - ray.get( - self._actor_groups[role].async_run_ray_method( - "pass_through", "swap_to_adapter", model_id - ) - ) + ray.get(self._actor_groups[role].async_run_ray_method("pass_through", "swap_to_adapter", model_id)) def prime_adapter_store(self, role: str, model_id: str) -> None: """One-shot bootstrap on first create_model: prime the optimizer @@ -116,20 +112,12 @@ def register_adapter(self, role: str, model_id: str) -> None: """ if role not in self._actor_groups: return - ray.get( - self._actor_groups[role].async_run_ray_method( - "pass_through", "register_adapter", model_id - ) - ) + ray.get(self._actor_groups[role].async_run_ray_method("pass_through", "register_adapter", model_id)) def delete_adapter(self, role: str, model_id: str) -> None: if role not in self._actor_groups: return - ray.get( - self._actor_groups[role].async_run_ray_method( - "pass_through", "delete_adapter", model_id - ) - ) + ray.get(self._actor_groups[role].async_run_ray_method("pass_through", "delete_adapter", model_id)) def get_lcm_dp_size(self) -> int: """Get LCM of all models' dp_size.""" @@ -219,9 +207,7 @@ def mark_as_offloaded(self, model: str) -> None: return self._gpu_state[model] = GPUState() - def forward( - self, model: str, data: TrainingInputBatch, model_id: Optional[str] = None - ) -> TrainingOutputBatch: + def forward(self, model: str, data: TrainingInputBatch, model_id: Optional[str] = None) -> TrainingOutputBatch: """Run inference forward pass. Only loads model (not optimizer).""" self._ensure_on_gpu(model, need_optimizer=False, need_model=True) self.ensure_active_adapter(model, model_id) @@ -384,9 +370,7 @@ def _save_memory_snapshot(self, model: str, tag: str) -> None: self._actor_groups[model].async_run_ray_method("pass_through", "save_memory_snapshot", tag=f"{model}_{tag}") ) - def save_checkpoint( - self, model: str, ckpt_dir: str, tokenizer=None, model_id: Optional[str] = None - ) -> None: + def save_checkpoint(self, model: str, ckpt_dir: str, tokenizer=None, model_id: Optional[str] = None) -> None: """Save checkpoint for model.""" self._ensure_on_gpu(model, need_optimizer=True, need_model=True) self.ensure_active_adapter(model, model_id) diff --git a/skyrl/backends/skyrl_train_backend.py b/skyrl/backends/skyrl_train_backend.py index 6795639398..dc0478557f 100644 --- a/skyrl/backends/skyrl_train_backend.py +++ b/skyrl/backends/skyrl_train_backend.py @@ -1151,9 +1151,7 @@ def save_checkpoint(self, output_path, model_id: str) -> None: ckpt_dir = os.path.join(temp_dir, "checkpoint") # Save checkpoint directory (includes optimizer state automatically) - self._dispatch.save_checkpoint( - model=role, ckpt_dir=ckpt_dir, tokenizer=self._tokenizer, model_id=model_id - ) + self._dispatch.save_checkpoint(model=role, ckpt_dir=ckpt_dir, tokenizer=self._tokenizer, model_id=model_id) # Create tar archive self._create_tar_from_directory(ckpt_dir, output_path) diff --git a/tests/tinker/test_multi_lora_megatron.py b/tests/tinker/test_multi_lora_megatron.py index 5f2314e03a..40db52c551 100644 --- a/tests/tinker/test_multi_lora_megatron.py +++ b/tests/tinker/test_multi_lora_megatron.py @@ -36,9 +36,7 @@ except Exception: cuda_available = False -pytestmark = pytest.mark.skipif( - not cuda_available, reason="multi-LoRA Megatron tests require at least one CUDA GPU" -) +pytestmark = pytest.mark.skipif(not cuda_available, reason="multi-LoRA Megatron tests require at least one CUDA GPU") tinker = pytest.importorskip("tinker") from tinker import types as tinker_types # noqa: E402 @@ -113,8 +111,8 @@ def _api_server(port: int, backend_config: dict | None = None): def _server_is_up(port: int) -> bool: - import urllib.request import urllib.error + import urllib.request try: urllib.request.urlopen(f"http://0.0.0.0:{port}/api/v1/server_capabilities", timeout=2).read() @@ -213,9 +211,7 @@ async def _unload(model_id: str): async with tinker._client.AsyncTinker( # type: ignore[attr-defined] api_key=TINKER_API_KEY, base_url=f"http://0.0.0.0:{TEST_PORT}/" ) as client: - future = await client.models.unload( - request=tinker_types.UnloadModelRequest(model_id=model_id) - ) + future = await client.models.unload(request=tinker_types.UnloadModelRequest(model_id=model_id)) while True: result = await client.futures.retrieve( request=tinker_types.FutureRetrieveRequest(request_id=future.request_id) diff --git a/tests/tinker/test_multi_lora_smoke_two_clients.md b/tests/tinker/test_multi_lora_smoke_two_clients.md index a52132f48f..29762fc4a1 100644 --- a/tests/tinker/test_multi_lora_smoke_two_clients.md +++ b/tests/tinker/test_multi_lora_smoke_two_clients.md @@ -18,11 +18,11 @@ In one terminal: ```bash cd /path/to/SkyRL -uv run --extra tinker --extra skyrl_train -m skyrl.tinker.api \ +uv run --extra tinker --extra megatron -m skyrl.tinker.api \ --host 0.0.0.0 \ --port 8000 \ --base-model Qwen/Qwen3-0.6B \ - --backend skyrl_train \ + --backend megatron \ --backend-config '{ "strategy": "megatron", "trainer.placement.policy_num_gpus_per_node": 1, From 3c0239ec39342b60917c73c2aba398249ff71f5f Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Mon, 4 May 2026 22:28:45 +0000 Subject: [PATCH 11/21] [multi-lora] Swap grad buffers along with params + optimizer state Fixes a cross-tenant grad-corruption race surfaced in review: Tick N: batched fwd_bwd = [A.fb, B.fb] - sub-batch A: swap_to("A"), zero_grad_buffer, accumulate A's grads - sub-batch B: swap_to("B") <-- only params + opt state swapped zero_grad_buffer <-- A's grads CLOBBERED here accumulate B's grads Tick N+1: A.optim_step - swap_to("A") restores A's params + opt state - optimizer.step() reads grad_data, which holds B's grads -> B's gradient is applied to A's weights, A's actual gradient is lost The fix is to snapshot/restore `mc.buffers[i].grad_data` (and `expert_parallel_buffers`) alongside `param_data`. AdapterSlot now carries a parallel cpu_grad_data list; _allocate_empty_slot, _snapshot, _restore, and _copy_slot all maintain it. The fp32 grad accumulator inside DistributedOptimizer.step() is short-lived (created and consumed within one call) so it doesn't need slot storage. Memory cost: ~+1x per slot for the grad mirror (bf16, same size as param buffer). For a 7B base + rank-32 LoRA on a single DP shard this is on the order of tens of MB, dwarfed by the existing fp32 main + Adam moments. Updates the design doc to reflect the four storages per LoRA param and adds a "Why grads must travel with the slot" section walking through the race the review caught. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../content/docs/tinker/multi_lora_design.mdx | 32 +++++++++++++++---- .../workers/megatron/adapter_store.py | 17 +++++++++- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/docs/content/docs/tinker/multi_lora_design.mdx b/docs/content/docs/tinker/multi_lora_design.mdx index e5e0e6b54c..d850040879 100644 --- a/docs/content/docs/tinker/multi_lora_design.mdx +++ b/docs/content/docs/tinker/multi_lora_design.mdx @@ -34,15 +34,16 @@ Megatron's `DistributedDataParallel` filters out frozen params before constructi Buffer-level `tensor.copy_()` therefore swaps adapter-only state. The base model stays GPU-resident and is shared across all tenants. -## Three storages per LoRA param +## Four storages per LoRA param -For each LoRA `nn.Parameter` `p`, three independent storages must be swapped: +For each LoRA `nn.Parameter` `p`, four independent storages must be swapped: 1. The bf16 view in `mc.buffers[i].param_data` (or `mc.expert_parallel_buffers`). -2. The fp32 main copy in `_opt.shard_fp32_from_float16_groups[g][i]` — independent storage, not a view. -3. The Adam moments in `_opt.optimizer.state[main_param]`, keyed by the **fp32 main param**: `exp_avg`, `exp_avg_sq`. +2. The bf16 grad view in `mc.buffers[i].grad_data` — must travel with the slot, otherwise an interleaved tenant's `forward_backward` will clobber unconsumed grads via `chunk.zero_grad_buffer()` at the top of every fwd_bwd before this adapter's own `optim_step` runs. +3. The fp32 main copy in `_opt.shard_fp32_from_float16_groups[g][i]` — independent storage, not a view. +4. The Adam moments in `_opt.optimizer.state[main_param]`, keyed by the **fp32 main param**: `exp_avg`, `exp_avg_sq`. -Param-object identity is preserved across `param.data.copy_(...)`, so optimizer state-dict keys remain valid. Grads are not swapped — `optimizer.zero_grad()` runs after every step (`megatron_strategy.py:215`), so they're zero at swap time. +Param-object identity is preserved across `param.data.copy_(...)`, so optimizer state-dict keys remain valid. The fp32 grad accumulator inside `DistributedOptimizer.step()` (reduce-scatter destination) is short-lived — it's allocated and consumed within `step()`, so it never persists across a swap and doesn't need its own slot storage. ## Pristine slot @@ -66,7 +67,7 @@ Per worker, all under `torch.no_grad()`: 1. `dist.barrier(dp_group)` — wait for the previous adapter's last collective to finish. 2. Save current adapter into its slot: - - For each `mc` and each `buffer ∈ mc.buffers + mc.expert_parallel_buffers`: `slot.cpu_param_data[mc][i].copy_(buffer.param_data, non_blocking=True)`. + - For each `mc` and each `buffer ∈ mc.buffers + mc.expert_parallel_buffers`: copy `buffer.param_data` AND `buffer.grad_data` into `slot.cpu_param_data[mc][i]` / `slot.cpu_grad_data[mc][i]` (`non_blocking=True`). - For each `_opt ∈ _iter_opts(self.optimizer)` and each `(g, i)`: `slot.cpu_main_param[g][i].copy_(_opt.shard_fp32_from_float16_groups[g][i], non_blocking=True)`. Then `slot.cpu_exp_avg[g][i].copy_(state['exp_avg'], non_blocking=True)`, `slot.cpu_exp_avg_sq[g][i].copy_(state['exp_avg_sq'], non_blocking=True)`. 3. `torch.cuda.current_stream().synchronize()` — D2H complete. 4. Load target adapter — same loops in reverse, copying CPU → GPU into the same storages. @@ -78,11 +79,30 @@ Per worker, all under `torch.no_grad()`: Per worker, pinned memory: - `cpu_param_data[mc][buf_idx]` — bf16, one tensor per `mc.buffers + mc.expert_parallel_buffers` entry, shape matches the bucket. +- `cpu_grad_data[mc][buf_idx]` — bf16, parallel to `cpu_param_data`. Required for cross-tenant interleaving correctness (see "Why grads must travel with the slot" below). - `cpu_main_param[g][i]` — fp32, shape matches `shard_fp32_from_float16_groups[g][i]`. - `cpu_exp_avg[g][i]`, `cpu_exp_avg_sq[g][i]` — fp32, same shapes. Frozen base weights are not duplicated per adapter; they live in their own pinned storage already managed by `offload_megatron_model_to_cpu`. +### Why grads must travel with the slot + +The Tinker engine batches pending `forward_backward`s across model_ids in 100 ms ticks (`engine.py:721`). Within one tick, `_split_model_pass_batch_by_model_id` (`skyrl_train_backend.py:152`) splits into per-model sub-batches and runs them sequentially. Each `forward_backward` starts with `chunk.zero_grad_buffer()` (`megatron_worker.py:702`). + +If we swapped only params + optimizer state, this sequence corrupts: + +``` +batched fwd_bwd = [A.fb, B.fb] + sub-batch A: swap_to("A"), zero_grad_buffer, accumulate A's grads into grad_data + sub-batch B: swap_to("B") ← params + opt state swap; grads UNTOUCHED + zero_grad_buffer ← clobbers A's grads + accumulate B's grads into grad_data +A.optim_step: swap_to("A") ← restores A's params + opt state; grads = B's + optimizer.step() ← applies B's grads to A's weights ✗ +``` + +Snapshotting `grad_data` into the slot on every swap fixes this. After A's fwd_bwd, A's slot holds A's grads. swap_to(B) overwrites the live `grad_data` with B's saved grads (zero on the first visit), so B's `zero_grad_buffer` clears B's data, not A's. swap_to(A) before A's `optim_step` restores A's grads exactly. The fp32 grad accumulator that DistributedOptimizer materialises inside `step()` is short-lived (created and consumed within one call), so it doesn't need its own slot storage. + ## Files to add / modify ### New diff --git a/skyrl/backends/skyrl_train/workers/megatron/adapter_store.py b/skyrl/backends/skyrl_train/workers/megatron/adapter_store.py index c2f0401d6d..74ae6c4993 100644 --- a/skyrl/backends/skyrl_train/workers/megatron/adapter_store.py +++ b/skyrl/backends/skyrl_train/workers/megatron/adapter_store.py @@ -112,6 +112,10 @@ class AdapterSlot: Layout: cpu_param_data[mc_idx] -> list[Tensor], one per buffer in (mc.buffers + mc.expert_parallel_buffers). + cpu_grad_data[mc_idx] -> same shape as cpu_param_data; mirrors + buffer.grad_data so that grads accumulated by an interrupted + forward_backward aren't lost when another tenant runs in the + gap before this adapter's optim_step. cpu_main_param[opt_idx][g] -> list[Tensor], shapes matching opt.shard_fp32_from_float16_groups[g]. cpu_opt_state[opt_idx][g][i] -> dict[str, Tensor], mirroring @@ -120,6 +124,7 @@ class AdapterSlot: """ cpu_param_data: List[List[torch.Tensor]] = field(default_factory=list) + cpu_grad_data: List[List[torch.Tensor]] = field(default_factory=list) cpu_main_param: List[List[List[torch.Tensor]]] = field(default_factory=list) cpu_opt_state: List[List[List[dict]]] = field(default_factory=list) step_count: int = 0 @@ -163,11 +168,16 @@ def num_adapters(self) -> int: def _allocate_empty_slot(self, model_chunks, optimizer) -> AdapterSlot: slot = AdapterSlot() - # Param data: one pinned bf16 tensor per (mc, buffer). + # Param data + grad data: one pinned bf16 tensor each per (mc, buffer). + # Grads must travel with the slot — otherwise an interleaved tenant's + # forward_backward will clobber unconsumed grads via zero_grad_buffer + # at the top of forward_backward. See docs/.../multi_lora_design.mdx. for mc_idx, _buf_idx, buf in _iter_buffers(model_chunks): while len(slot.cpu_param_data) <= mc_idx: slot.cpu_param_data.append([]) + slot.cpu_grad_data.append([]) slot.cpu_param_data[mc_idx].append(_new_pinned_like(buf.param_data)) + slot.cpu_grad_data[mc_idx].append(_new_pinned_like(buf.grad_data)) # Main params + optimizer state: per (opt_idx, group, param_idx). for _opt in _iter_opts(optimizer): opt_main: List[List[torch.Tensor]] = [] @@ -191,6 +201,7 @@ def _snapshot(self, slot: AdapterSlot, model_chunks, optimizer) -> None: """Copy live GPU state into `slot` (CPU).""" for mc_idx, buf_idx, buf in _iter_buffers(model_chunks): slot.cpu_param_data[mc_idx][buf_idx].copy_(buf.param_data, non_blocking=True) + slot.cpu_grad_data[mc_idx][buf_idx].copy_(buf.grad_data, non_blocking=True) for opt_idx, _opt in enumerate(_iter_opts(optimizer)): groups = getattr(_opt, "shard_fp32_from_float16_groups", None) or [] for g, group in enumerate(groups): @@ -207,6 +218,7 @@ def _restore(self, slot: AdapterSlot, model_chunks, optimizer) -> None: """Copy `slot` (CPU) into live GPU state.""" for mc_idx, buf_idx, buf in _iter_buffers(model_chunks): buf.param_data.copy_(slot.cpu_param_data[mc_idx][buf_idx], non_blocking=True) + buf.grad_data.copy_(slot.cpu_grad_data[mc_idx][buf_idx], non_blocking=True) for opt_idx, _opt in enumerate(_iter_opts(optimizer)): groups = getattr(_opt, "shard_fp32_from_float16_groups", None) or [] for g, group in enumerate(groups): @@ -275,6 +287,9 @@ def _copy_slot(self, src: AdapterSlot, dst: AdapterSlot) -> None: for mc_idx, mc_buffers in enumerate(src.cpu_param_data): for buf_idx, t in enumerate(mc_buffers): dst.cpu_param_data[mc_idx][buf_idx].copy_(t) + for mc_idx, mc_grads in enumerate(src.cpu_grad_data): + for buf_idx, t in enumerate(mc_grads): + dst.cpu_grad_data[mc_idx][buf_idx].copy_(t) for opt_idx, opt_groups in enumerate(src.cpu_main_param): for g, group in enumerate(opt_groups): for i, t in enumerate(group): From f5ba5c968a79f567fffa0daada9d9feddd7dc508 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Mon, 4 May 2026 22:56:32 +0000 Subject: [PATCH 12/21] [multi-lora-rl] Wire model_id through the LoRA sync + sampling path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-tenant adapter routing for the merge_lora=False Megatron + vLLM path (and FSDP for parity). The Tinker model_id IS the vLLM adapter name end-to-end. Worker side (megatron_worker, fsdp_worker): - broadcast_to_inference_engines accepts model_id (Optional[str]). - When LoRA is active, save the adapter into a per-tenant subdir os.path.join(lora_sync_path, model_id) so concurrent saves don't collide, and call load_lora_adapter(model_id, path, load_inplace=True) on vLLM. model_id=None preserves the legacy single-tenant SKYRL_LORA_ADAPTER_NAME path. - _save_lora_adapters_and_sync takes a lora_name parameter (default SKYRL_LORA_ADAPTER_NAME) instead of hardcoding the singleton. Dispatch side (worker_dispatch): - save_weights_for_sampler(model_id=None) calls ensure_active_adapter(policy, model_id) before broadcasting so the correct adapter is live, and forwards model_id to broadcast_to_inference_engines. Backend side (skyrl_train_backend): - save_sampler_checkpoint passes model_id (when LoRA is active). - sample() per-request `model` field is now the request's model_id when it's a registered LoRA adapter, falling back to resolve_policy_model_name(cfg) for FFT / single-tenant. - Drop the v1 'raise if >1 adapter' guards on sample / save_sampler_ checkpoint — multi-tenant sampling is the goal of this branch. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../skyrl_train/workers/fsdp/fsdp_worker.py | 34 ++++++++++++---- .../workers/megatron/megatron_worker.py | 26 +++++++++--- .../skyrl_train/workers/worker_dispatch.py | 27 ++++++++++--- skyrl/backends/skyrl_train_backend.py | 40 +++++++------------ 4 files changed, 83 insertions(+), 44 deletions(-) diff --git a/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py b/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py index 377729c4f5..2f32460125 100644 --- a/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py +++ b/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py @@ -1,5 +1,6 @@ import io -from typing import TYPE_CHECKING +import os +from typing import TYPE_CHECKING, Optional import ray import torch @@ -240,10 +241,15 @@ async def init_weight_sync_state(self, inference_engine_client, inference_engine weight_prefix=weight_prefix, ) - async def _save_lora_adapters_and_sync(self, peft_model, lora_sync_path, inference_engine_client): + async def _save_lora_adapters_and_sync( + self, + peft_model, + lora_sync_path, + inference_engine_client, + lora_name: str = SKYRL_LORA_ADAPTER_NAME, + ): """Collect LoRA parameters, save and call inference engine to load.""" import json - import os from dataclasses import asdict from safetensors.torch import save_file @@ -274,7 +280,7 @@ async def _save_lora_adapters_and_sync(self, peft_model, lora_sync_path, inferen if isinstance(inference_engine_client, RemoteInferenceClient): await inference_engine_client.load_lora_adapter( - SKYRL_LORA_ADAPTER_NAME, lora_sync_path, load_inplace=True + lora_name, lora_sync_path, load_inplace=True ) else: lora_request = LoraLoadRequest(lora_path=lora_sync_path) @@ -282,7 +288,12 @@ async def _save_lora_adapters_and_sync(self, peft_model, lora_sync_path, inferen torch.distributed.barrier() - async def broadcast_to_inference_engines(self, inference_engine_client, inference_engine_cfg): + async def broadcast_to_inference_engines( + self, + inference_engine_client, + inference_engine_cfg, + model_id: Optional[str] = None, + ): use_prefix_cache = inference_engine_cfg.enable_prefix_caching generator_dtype = str_to_torch_dtype(inference_engine_cfg.model_dtype) cache_reset_task = None @@ -298,9 +309,16 @@ async def broadcast_to_inference_engines(self, inference_engine_client, inferenc if self._is_lora: assert hasattr(peft_model, "peft_config"), "LoRA model should have peft_config" - # assume base model is already synced, sync LoRA adapters - lora_sync_path = self.cfg.policy.model.lora.lora_sync_path - await self._save_lora_adapters_and_sync(peft_model, lora_sync_path, inference_engine_client) + # Multi-tenant: per-adapter subdir + per-adapter vLLM name. Single + # tenant (model_id=None) keeps the legacy single-path behavior. + base_sync_path = self.cfg.policy.model.lora.lora_sync_path + lora_name = model_id if model_id is not None else SKYRL_LORA_ADAPTER_NAME + lora_sync_path = ( + os.path.join(base_sync_path, model_id) if model_id is not None else base_sync_path + ) + await self._save_lora_adapters_and_sync( + peft_model, lora_sync_path, inference_engine_client, lora_name=lora_name + ) else: # Extract and send weights using the sender created at init time weight_iterator = self.weight_extractor.extract_weights(generator_dtype) diff --git a/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py b/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py index d305e6ce61..4fffd5a2e1 100644 --- a/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py +++ b/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py @@ -840,7 +840,9 @@ async def init_weight_sync_state(self, inference_engine_client, inference_engine training_dtype=torch.bfloat16 if self.cfg.bf16 else torch.float32, ) - async def _save_lora_adapters_and_sync(self, lora_sync_path, inference_engine_client): + async def _save_lora_adapters_and_sync( + self, lora_sync_path, inference_engine_client, lora_name: str = SKYRL_LORA_ADAPTER_NAME + ): """Export LoRA adapter weights via Megatron-Bridge and tell the inference engine to load them. All ranks participate in the collective export (TP/PP/EP gathering is @@ -884,7 +886,7 @@ async def _save_lora_adapters_and_sync(self, lora_sync_path, inference_engine_cl if isinstance(inference_engine_client, RemoteInferenceClient): await inference_engine_client.load_lora_adapter( - SKYRL_LORA_ADAPTER_NAME, lora_sync_path, load_inplace=True + lora_name, lora_sync_path, load_inplace=True ) else: lora_request = LoraLoadRequest(lora_path=lora_sync_path) @@ -893,7 +895,10 @@ async def _save_lora_adapters_and_sync(self, lora_sync_path, inference_engine_cl torch.distributed.barrier() async def broadcast_to_inference_engines( - self, inference_engine_client: "InferenceEngineInterface", inference_engine_cfg: "InferenceEngineConfig" + self, + inference_engine_client: "InferenceEngineInterface", + inference_engine_cfg: "InferenceEngineConfig", + model_id: Optional[str] = None, ): use_prefix_cache = inference_engine_cfg.enable_prefix_caching generator_dtype = str_to_torch_dtype(inference_engine_cfg.model_dtype) @@ -905,8 +910,19 @@ async def broadcast_to_inference_engines( torch.cuda.empty_cache() if self._is_lora and not self.cfg.policy.megatron_config.lora_config.merge_lora: - lora_sync_path = self.cfg.policy.model.lora.lora_sync_path - await self._save_lora_adapters_and_sync(lora_sync_path, inference_engine_client) + # Multi-tenant: each adapter syncs into its own subdir of + # lora_sync_path and registers under its own name on vLLM. The + # `model_id` arg is the Tinker-provided name for the adapter + # currently swapped in (set up by AdapterStore.swap_to before + # this call). Single-tenant FSDP/Megatron pre-multi-LoRA still + # works: model_id is None, we fall back to the legacy single + # adapter name + shared path. + base_sync_path = self.cfg.policy.model.lora.lora_sync_path + lora_name = model_id if model_id is not None else SKYRL_LORA_ADAPTER_NAME + lora_sync_path = ( + os.path.join(base_sync_path, model_id) if model_id is not None else base_sync_path + ) + await self._save_lora_adapters_and_sync(lora_sync_path, inference_engine_client, lora_name=lora_name) else: # Extract and send weights using the sender created at init time weight_metadata = self.weight_extractor.get_weight_metadata(generator_dtype) diff --git a/skyrl/backends/skyrl_train/workers/worker_dispatch.py b/skyrl/backends/skyrl_train/workers/worker_dispatch.py index c56ca2ba7f..9e7a7d5923 100644 --- a/skyrl/backends/skyrl_train/workers/worker_dispatch.py +++ b/skyrl/backends/skyrl_train/workers/worker_dispatch.py @@ -442,14 +442,23 @@ def init_weight_sync_state(self, inference_engine_client) -> None: ) ) - def broadcast_to_inference_engines(self, inference_engine_client) -> None: - """Broadcast policy weights to inference engines.""" + def broadcast_to_inference_engines( + self, inference_engine_client, model_id: Optional[str] = None + ) -> None: + """Broadcast policy weights to inference engines. + + ``model_id`` is forwarded to the worker so that, on the LoRA path, the + adapter is saved into a per-tenant subdir of ``lora_sync_path`` and + registered on vLLM under that name. None preserves single-tenant + behavior (the legacy ``SKYRL_LORA_ADAPTER_NAME`` path). + """ ray.get( self._actor_groups["policy"].async_run_ray_method( "pass_through", "broadcast_to_inference_engines", inference_engine_client, self.cfg.generator.inference_engine, + model_id=model_id, ) ) @@ -469,11 +478,14 @@ def finish_weight_sync(self) -> None: return self._offload("policy", offload_optimizer=True, offload_model=True) - async def save_weights_for_sampler(self) -> None: + async def save_weights_for_sampler(self, model_id: Optional[str] = None) -> None: """ Tinker API method to prepare updated parameters for sampling. - Syncs weights to inference engine for sampling. + Syncs weights to inference engine for sampling. When ``model_id`` is + provided we ensure the corresponding LoRA adapter is the live one + before broadcasting, and tell the worker to register the adapter on + vLLM under ``model_id``. """ if self._inference_engine_client is None: raise RuntimeError( @@ -483,9 +495,12 @@ async def save_weights_for_sampler(self) -> None: # Sync weights to inference engine self.prepare_for_weight_sync() + # Make the requested adapter live on every worker before broadcasting + # — otherwise we'd export some other tenant's LoRA weights to vLLM. + self.ensure_active_adapter("policy", model_id) if self.colocate_all: await self._inference_engine_client.wake_up(tags=["weights"]) - self.broadcast_to_inference_engines(self._inference_engine_client) + self.broadcast_to_inference_engines(self._inference_engine_client, model_id=model_id) self.finish_weight_sync() await self._inference_engine_client.wake_up(tags=["kv_cache"]) else: @@ -493,7 +508,7 @@ async def save_weights_for_sampler(self) -> None: # reading partially-updated weights during the NCCL broadcast. await self._inference_engine_client.pause_generation() try: - self.broadcast_to_inference_engines(self._inference_engine_client) + self.broadcast_to_inference_engines(self._inference_engine_client, model_id=model_id) self.finish_weight_sync() finally: await self._inference_engine_client.resume_generation() diff --git a/skyrl/backends/skyrl_train_backend.py b/skyrl/backends/skyrl_train_backend.py index dc0478557f..626eb71c8c 100644 --- a/skyrl/backends/skyrl_train_backend.py +++ b/skyrl/backends/skyrl_train_backend.py @@ -938,18 +938,6 @@ def sample( # 1. Ensure inference engines are initialized self._ensure_inference_engines() - # v1 multi-LoRA: sample() is single-tenant. The inference engine path - # is not yet adapter-aware, so refuse if more than one adapter exists. - if self._base_lora_signature is not None and len(self._model_ids_to_role) > 1: - error = types.ErrorResponse( - error=( - "sample() is not supported with multiple LoRA adapters in v1. " - "Delete other adapters before sampling." - ), - status="error", - ) - return {req_id: error for req_id, _, _, _, _ in prepared_batch.request_batch_slices} - # 2. Validate single model unique_models = set(prepared_batch.all_model_ids) if len(unique_models) != 1: @@ -1018,12 +1006,15 @@ def _sample_with_remote_client( ) -> dict[str, types.SampleOutput | types.ErrorResponse]: """Sample using RemoteInferenceClient, forwarding model input chunks directly.""" - # Every sample() body must explicitly identify the target model/LoRA - # adapter — the client does not fall back to any default. Resolve - # what name the inference engine knows the policy by from config - # (LoRA adapter when LoRA weights are sync'd as an adapter, base - # model otherwise). - model_name = resolve_policy_model_name(self._cfg) + # Resolve the inference-engine model name per request. With multi-LoRA + # the adapter name on vLLM IS the Tinker model_id (registered by + # save_sampler_checkpoint via load_lora_adapter). Single-tenant / + # FFT path falls back to resolve_policy_model_name(cfg). + fallback_model_name = resolve_policy_model_name(self._cfg) + per_request_models = [ + mid if (self._base_lora_signature is not None and mid in self._model_ids_to_role) else fallback_model_name + for mid in prepared_batch.all_model_ids + ] async def sample_all(): tasks = [] @@ -1033,7 +1024,7 @@ async def sample_all(): request_payload = { "json": { - "model": model_name, + "model": per_request_models[i], "prompt": model_input.model_dump(), "num_samples": 1, "sampling_params": sampling_params.model_dump(), @@ -1189,16 +1180,15 @@ def save_sampler_checkpoint(self, output_path, model_id: str, persist: bool = Tr self._validate_model_state(model_id) if self._get_role(model_id) != "policy": raise ValueError("save_sampler_checkpoint is only supported for policy models") - if self._base_lora_signature is not None and len(self._model_ids_to_role) > 1: - raise ValueError( - "save_sampler_checkpoint is not supported with multiple LoRA adapters in v1. " - "Delete other adapters before pushing weights to the inference engine." - ) # Lazily create inference engines on first sampling-related call self._ensure_inference_engines() - asyncio.run(self._dispatch.save_weights_for_sampler()) + # Multi-LoRA: pass model_id so the dispatch swaps the right adapter in + # before broadcasting and the worker registers it on vLLM under that + # name. None for the FFT / single-tenant path uses legacy behavior. + sync_id = model_id if self._base_lora_signature is not None else None + asyncio.run(self._dispatch.save_weights_for_sampler(model_id=sync_id)) logger.info(f"Synced weights for {model_id} to inference engines via NCCL") if persist: From 40b1ae49ba6c689156ffaae28bd99c8be2404320 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Mon, 4 May 2026 22:57:21 +0000 Subject: [PATCH 13/21] [multi-lora-rl] Tolerate non-JSON error bodies in load/unload_lora_adapter Both _load_on_server and _unload_on_server called await resp.json() without try/except, so a non-JSON error body (e.g. a plain-text 5xx from a proxy in front of vLLM) would raise a generic JSON-parse error and lose the original status. Mirror the robust pattern from _post: try resp.json(content_type=None), fall back to resp.text() on parse failure, then raise_for_status with whichever body we got. Addresses the gemini-code-assist review note on PR #1579 (see https://github.com/NovaSky-AI/SkyRL/pull/1579). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../inference_servers/remote_inference_client.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py b/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py index 61f756976d..5914a42ed1 100644 --- a/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py +++ b/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py @@ -1157,9 +1157,14 @@ async def load_lora_adapter( async def _load_on_server(server_url: str): url = f"{server_url}/v1/load_lora_adapter" async with session.post(url, json=payload) as resp: - # vLLM returns 200 with text body on success, or JSON ErrorResponse on failure + # vLLM returns 200 with text body on success, or JSON ErrorResponse on failure. + # Tolerate non-JSON error bodies (e.g. plain-text 5xx from a proxy): + # fall back to the text body so raise_for_status still surfaces it. if resp.status >= 400: - body = await resp.json() + try: + body = await resp.json(content_type=None) + except Exception: + body = await resp.text() raise_for_status(resp, body) return server_url, {"status": resp.status, "body": await resp.text()} @@ -1192,8 +1197,12 @@ async def unload_lora_adapter(self, lora_name: str) -> Dict[str, Any]: async def _unload_on_server(server_url: str): url = f"{server_url}/v1/unload_lora_adapter" async with session.post(url, json=payload) as resp: + # See _load_on_server for the JSON/text fallback rationale. if resp.status >= 400: - body = await resp.json() + try: + body = await resp.json(content_type=None) + except Exception: + body = await resp.text() raise_for_status(resp, body) return server_url, {"status": resp.status, "body": await resp.text()} From 8cff746089b756ebc783e65693cf9d25825f1e57 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Mon, 4 May 2026 22:59:06 +0000 Subject: [PATCH 14/21] [multi-lora-rl] Update design doc + RL two-client smoke runbook Updates docs/content/docs/tinker/multi_lora_design.mdx scope from "training only" to "training + per-adapter sampling", documents the Tinker model_id == vLLM adapter name contract, the per-tenant lora_sync_path layout, the merge_lora=False requirement on Megatron, and the operator's max_cpu_loras sizing contract. Adds a "PR #1579 foundation" section pointing at the upstream PR. Adds tests/tinker/test_multi_lora_rl_two_clients.md as the manual gate: two rl_loop clients training and sampling on independent adapters against one server, plus contamination check, negative checks, and troubleshooting. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../content/docs/tinker/multi_lora_design.mdx | 48 +++++- .../tinker/test_multi_lora_rl_two_clients.md | 152 ++++++++++++++++++ 2 files changed, 193 insertions(+), 7 deletions(-) create mode 100644 tests/tinker/test_multi_lora_rl_two_clients.md diff --git a/docs/content/docs/tinker/multi_lora_design.mdx b/docs/content/docs/tinker/multi_lora_design.mdx index d850040879..0a9269b312 100644 --- a/docs/content/docs/tinker/multi_lora_design.mdx +++ b/docs/content/docs/tinker/multi_lora_design.mdx @@ -12,13 +12,12 @@ Today the SkyRL-Train backend exposed via the Tinker API is single-tenant: a sec The driver for changing this is Trajectory AI, who want to run Tinker workloads on their own hardware and need many tenants on a shared training pool. There is no Megatron multi-tenant SFT/RL framework today; only Prime-RL has first-class multi-tenancy and only on FSDP/CP/EP. A Megatron-backed solution is therefore both a user-requested feature and a meaningful differentiator. -## v1 scope +## Scope -- **Training only**, exercised via the `tinker-cookbook` `sl_loop` SFT recipe. -- One base model, multiple LoRA adapters. -- Fixed `(rank, alpha, target_modules)` across all adapters. Mismatched configs on a second `create_model` are hard-rejected with a clear `ValueError`. -- `sample()` and `save_sampler_checkpoint()` raise if more than one adapter is registered. Per-adapter sampling and per-adapter vLLM weight sync are explicitly deferred. +- **Training and per-adapter sampling** on a single Tinker server, exercised via the `tinker-cookbook` `sl_loop` (SFT) and `rl_loop` (RL) recipes. +- One base model, multiple LoRA adapters with fixed `(rank, alpha, target_modules)` across all adapters. Mismatched configs on a second `create_model` are hard-rejected with a clear `ValueError`. - The FFT (no-LoRA) path stays single-tenant — the relaxation is gated behind `lora_config.rank > 0`. +- The RL path requires `merge_lora=False` on Megatron so vLLM serves the adapter (not pre-merged weights) and supports multiple LoRA adapters concurrently. PR #1579 contributes the inference-side scaffolding (`load_lora_adapter` / `unload_lora_adapter`, `max_loras`, `max_cpu_loras`, mandatory `model` per data-plane call); this design plumbs the Tinker `model_id` through end-to-end so each tenant's adapter is registered and addressed by its own name on vLLM. ## Strategy @@ -121,10 +120,45 @@ Snapshotting `grad_data` into the slot on every swap fixes this. After A's fwd_b - Existing `tests/tinker/test_api.py` continues to pass (single-tenant path unchanged). - End-to-end smoke (manual): launch the Tinker server with `trainer.strategy=megatron`, run two `tinker_cookbook.recipes.sl_loop` clients with distinct `model_id`s in parallel against `base_url=http://localhost:8000`, verify both converge on their respective tasks and GPU memory stays bounded. -## Out of scope (explicit non-goals for v1) +## Per-adapter sampling and weight sync (RL path) + +For RL, sampling has to be per-adapter: each tenant runs `forward_backward → optim_step → save_weights_for_sampler → sample` on its own model_id, and vLLM has to know which weights to use for which call. + +The plumbing is straightforward once the SFT scaffolding is in place: + +- **Adapter name = Tinker model_id.** The Tinker `model_id` is forwarded as the vLLM adapter name end-to-end. There is no separate naming layer. +- **Per-tenant `lora_sync_path`.** When `model_id` is set, the worker writes its adapter into `os.path.join(cfg.trainer.policy.model.lora.lora_sync_path, model_id)/` so concurrent saves from different tenants don't collide. Single-tenant calls (`model_id=None`, the FFT path or pre-Tinker callers) keep the legacy shared path. +- **`save_sampler_checkpoint(model_id)`** swaps the requested adapter live (via `WorkerDispatch.ensure_active_adapter`), then broadcasts to vLLM. The worker calls `RemoteInferenceClient.load_lora_adapter(model_id, lora_sync_path/model_id, load_inplace=True)` so re-syncs of the same adapter overwrite vLLM's slot in place (no fresh int id, no eviction churn). +- **`sample(prepared_batch)`** now resolves the data-plane `model` per request from `prepared_batch.all_model_ids[i]` when LoRA is active, falling back to `resolve_policy_model_name(cfg)` only on the FFT / single-tenant path. The previous "raise if >1 adapter" guards are gone. + +### vLLM capacity contract + +vLLM exposes two LoRA capacity knobs: + +- `max_loras` — concurrent adapters in a single GPU batch. +- `max_cpu_loras` — total LoRA capacity in vLLM's CPU LRU cache (defaults to `max_loras` when unset). + +Both are config fields on `SkyRLLoraConfig` (`config.py`, surfaced in `ppo_base_config.yaml`). For multi-tenant RL, `max_cpu_loras` MUST be set to at least the expected number of concurrent registered adapters; otherwise vLLM will silently evict an adapter from its CPU cache and the next `sample()` against that adapter will 404. The Tinker server doesn't auto-size this — operators set it explicitly when they expect N tenants. Detecting and reloading-from-disk on 404 is a future improvement. + +### Concurrency between training and sampling + +With `colocate_all=True`, vLLM is asleep during training and woken in `save_weights_for_sampler` for the broadcast + KV-cache wake. With multi-tenant RL the engine queue's destructive-barrier scheduling (`engine.py`) keeps each model_id's `forward_backward / optim_step / save_weights_for_sampler / sample` chain coherent. For non-colocated mode the same applies — `pause_generation / resume_generation` brackets the broadcast, and per-adapter sample requests fan out independently. + +## PR #1579 (foundation) + +The RL multi-tenancy work sits on top of [NovaSky-AI/SkyRL#1579](https://github.com/NovaSky-AI/SkyRL/pull/1579) (`hao-aaron:multi-lora`), which provides: + +- `RemoteInferenceClient.load_lora_adapter(name, path, load_inplace)` and `unload_lora_adapter(name)`. +- `SKYRL_LORA_ADAPTER_NAME` constant + `resolve_policy_model_name(cfg)` for the single-tenant fallback. +- Mandatory `model` field on every data-plane call (`generate / sample / chat_completion / completion / render_chat_completion`). +- `max_loras` + `max_cpu_loras` config knobs plumbed through to vLLM CLI args. + +Our `multi_lora_rl` branch is based on PR #1579's HEAD with our SFT AdapterStore commits cherry-picked on top. + +## Out of scope (explicit non-goals) -- Per-adapter `sample()` / vLLM weight sync — `sample` raises if more than one adapter is registered. - Adapter-only checkpoint export — `save_checkpoint` still saves the whole base+LoRA state per swap. - Variable rank / alpha / target_modules across adapters. - Critic role multi-tenancy — Megatron critic is `NotImplementedError` today; no change. - `HybridDeviceOptimizer` path — TODO comment only. +- Auto-reload of LoRA adapters evicted from vLLM's CPU LRU cache; for now `max_cpu_loras` must be set ≥ expected concurrent adapters. diff --git a/tests/tinker/test_multi_lora_rl_two_clients.md b/tests/tinker/test_multi_lora_rl_two_clients.md new file mode 100644 index 0000000000..a34c3cfb4b --- /dev/null +++ b/tests/tinker/test_multi_lora_rl_two_clients.md @@ -0,0 +1,152 @@ +# Multi-LoRA Megatron RL — two-client smoke test + +The gate before merging the `multi_lora_rl` branch. Extends the SFT smoke +([`test_multi_lora_smoke_two_clients.md`](./test_multi_lora_smoke_two_clients.md)) +to cover the per-adapter sampling + weight-sync path that powers RL. + +Two `tinker-cookbook` `rl_loop` clients, each with their own LoRA adapter, +should train and sample independently against a single SkyRL Tinker API +server with the Megatron backend and `merge_lora=False` so vLLM serves +adapters by name. + +## Prerequisites + +- At least one CUDA GPU. With `colocate_all=true` (the simplest RL config), + training and inference share the same GPUs and vLLM sleeps during + training. +- `tinker-cookbook` and `datasets` available (or invoked via `uv run`). +- `multi_lora_rl` branch checked out. + +## Step 1 — Start the Tinker API server + +```bash +cd /path/to/SkyRL + +uv run --extra tinker --extra megatron -m skyrl.tinker.api \ + --host 0.0.0.0 \ + --port 8000 \ + --base-model Qwen/Qwen3-0.6B \ + --backend megatron \ + --backend-config '{ + "strategy": "megatron", + "trainer.placement.policy_num_gpus_per_node": 1, + "trainer.placement.policy_num_nodes": 1, + "trainer.placement.colocate_all": true, + "trainer.policy.megatron_config.tensor_model_parallel_size": 1, + "trainer.policy.megatron_config.pipeline_model_parallel_size": 1, + "trainer.policy.megatron_config.lora_config.merge_lora": false, + "trainer.policy.model.lora.max_loras": 4, + "trainer.policy.model.lora.max_cpu_loras": 4 + }' +``` + +Critical knobs vs the SFT runbook: + +- `merge_lora: false` — Megatron must not pre-merge LoRA into the base + weights, otherwise vLLM serves the merged model and there's nothing + per-adapter to switch on. +- `max_loras` ≥ expected concurrent adapters in a single batch (typically 2 + for two `rl_loop` clients). +- `max_cpu_loras` ≥ expected total adapters. **Must** be set explicitly; + the server doesn't auto-size. If too low, vLLM's CPU LRU evicts an + adapter and the next `sample()` against it 404s. + +Wait for `init policy model done`. The first client's `create_model` will +trigger the policy build + AdapterStore bootstrap; subsequent clients +register additional adapters (`Registered additional LoRA adapter ''`). + +## Step 2 — Run two `rl_loop` clients in parallel + +```bash +# Terminal 2 — client A +TINKER_API_KEY=tml-dummy uv run --with tinker --with tinker-cookbook --with datasets --with torch \ + python -m tinker_cookbook.recipes.rl_loop \ + base_url=http://localhost:8000 \ + model_name="Qwen/Qwen3-0.6B" \ + lora_rank=32 \ + log_path=/tmp/rl_loop_a.log +``` + +```bash +# Terminal 3 — client B +TINKER_API_KEY=tml-dummy uv run --with tinker --with tinker-cookbook --with datasets --with torch \ + python -m tinker_cookbook.recipes.rl_loop \ + base_url=http://localhost:8000 \ + model_name="Qwen/Qwen3-0.6B" \ + lora_rank=32 \ + log_path=/tmp/rl_loop_b.log +``` + +Stagger the launches by ~10 s. Both clients **must** use the same +`lora_rank` and `model_name` (mismatches are hard-rejected at +`create_model` with `LoRA signature mismatch …`). + +## Step 3 — What success looks like + +- Both clients converge on their respective rewards. Reward trends upward + in both `rl_loop_a.log` and `rl_loop_b.log`. +- Server log shows, for each client, the per-step sequence: + - `forward_backward` + `optim_step` (training) + - `save_sampler_checkpoint` (writes adapter into + `lora_sync_path//`, calls `load_lora_adapter(, …, + load_inplace=True)` on every vLLM server) + - `sample(model=)` against the right adapter +- vLLM server logs show two distinct adapter names registered (the two + Tinker `model_id`s) and `sample` requests routed to each. +- GPU memory stays bounded as the second client connects (single base + model, two LoRA adapters; CPU LRU holds the same two). + +## Step 4 — Per-adapter contamination check + +Stop client B mid-training, let client A continue for ~50 RL steps, +then restart client B. Client B's reward trajectory should resume from +where it left off, NOT regress to a fresh-start baseline. If B's reward +craters back to zero, A's training was bleeding into B's adapter. + +## Step 5 — Cleanup + +Either let the recipes finish (they call `unload_model` on exit) or kill +both processes. The session-cleanup loop will eventually unload stale +adapters. When the *last* registered adapter is unloaded the server tears +down the Ray runtime via `ray.shutdown()`; subsequent `create_model` +rebuilds it. + +## Negative checks + +1. **Mismatched rank.** Client A `lora_rank=32`, client B `lora_rank=16` — + second `create_model` must fail with `LoRA signature mismatch …`. +2. **`max_cpu_loras=1` with two adapters.** Start the server with + `max_cpu_loras=1`, run both clients. The second client's first + `sample()` should fail (adapter evicted before sampling). This is + expected and documents why the operator must size `max_cpu_loras` + correctly. +3. **`merge_lora: true`.** Start the server with `merge_lora=true` and + try the same two clients. The second `create_model` will succeed + (training is fine), but `sample()` against the *non-active* adapter + returns the wrong adapter's output because vLLM only ever sees the + merged base. Document; v1 requires `merge_lora=false` for RL. + +## Troubleshooting + +- **`sample()` 404 on `lora_name=…`.** Either `save_sampler_checkpoint` + wasn't called for that model_id before the sample (Tinker recipe bug), + or `max_cpu_loras` is too low and vLLM evicted the adapter. Check the + vLLM server log for `Adapter X not found / evicted`. +- **`KeyError: lora_sync_path/`** on the worker side — the + per-tenant subdir wasn't created. Confirm `model_id` is being passed + through to `broadcast_to_inference_engines` (controller log line should + read `Synced weights for to inference engines via NCCL`). +- **Server hangs on the second `save_sampler_checkpoint`.** Likely the + first client's vLLM wake-up is mid-flight. The dispatch's + `prepare_for_weight_sync` + `ensure_active_adapter` should serialize + this; if you see deadlocks, capture a thread dump on the controller and + the policy worker. +- **`AttributeError: load_lora_adapter`** on `RemoteInferenceClient` — + the server is running an older binary that pre-dates PR #1579. Make + sure the server started from this branch. + +## Reference + +- Design: [`docs/content/docs/tinker/multi_lora_design.mdx`](../../docs/content/docs/tinker/multi_lora_design.mdx). +- Foundation PR: [NovaSky-AI/SkyRL#1579](https://github.com/NovaSky-AI/SkyRL/pull/1579). +- SFT runbook: [`test_multi_lora_smoke_two_clients.md`](./test_multi_lora_smoke_two_clients.md). From a46b5878bcb68d0ec1b4c081703729a7b90355ab Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Tue, 5 May 2026 00:31:23 +0000 Subject: [PATCH 15/21] [multi-lora-rl] Allow mixed model_ids in a single sample() batch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Tinker engine batches sample requests across model_ids in find_batchable_sample, then dispatches one prepared_batch to backend.sample(). Our previous "exactly one model_id per batch" guard short-circuited multi-tenant RL — when both rl_loop clients had sample() requests pending in the same engine tick, the batched call hit the guard and returned 400 to both. Replaces the unique-model check with a per-request validation: every model_id must be a known policy, but multiple distinct policy model_ids in one batch are fine. Routing per request is already handled by _sample_with_remote_client via the per-request `model` field on the data plane. Co-Authored-By: Eric Tang Co-Authored-By: Claude Opus 4.7 (1M context) --- skyrl/backends/skyrl_train_backend.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/skyrl/backends/skyrl_train_backend.py b/skyrl/backends/skyrl_train_backend.py index b0fbb6421b..3bc1893957 100644 --- a/skyrl/backends/skyrl_train_backend.py +++ b/skyrl/backends/skyrl_train_backend.py @@ -877,18 +877,22 @@ def sample( # 1. Ensure inference engines are initialized self._ensure_inference_engines() - # 2. Validate single model + # 2. Validate every model_id in the batch is a known policy. Multi-LoRA + # RL legitimately mixes adapters in one batched sample call (the engine + # batches across model_ids in find_batchable_sample); we route per + # request via the `model` field in _sample_with_remote_client below. unique_models = set(prepared_batch.all_model_ids) - if len(unique_models) != 1: + unknown = [mid for mid in unique_models if mid not in self._model_ids_to_role] + if unknown: error = types.ErrorResponse( - error=f"Expected exactly one model_id for sampling, got {unique_models}", status="error" + error=f"Sampling requested for unknown model_id(s): {sorted(unknown)}", status="error" ) return {req_id: error for req_id, _, _, _, _ in prepared_batch.request_batch_slices} - model_id = next(iter(unique_models)) - role = self._model_ids_to_role.get(model_id) - if role != "policy": + non_policy = [mid for mid in unique_models if self._model_ids_to_role.get(mid) != "policy"] + if non_policy: error = types.ErrorResponse( - error=f"Sampling is only supported for policy models, got '{model_id}'", status="error" + error=f"Sampling is only supported for policy models, got non-policy: {sorted(non_policy)}", + status="error", ) return {req_id: error for req_id, _, _, _, _ in prepared_batch.request_batch_slices} From edefc84644a41ec40153cbd645f2fb38077a9482 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Tue, 5 May 2026 00:43:12 +0000 Subject: [PATCH 16/21] [multi-lora-rl] Pass load_inplace=True to vLLM load_lora_adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The merged unload-then-load workaround from main wasn't sufficient for re-syncing an adapter — vLLM still returned 400 "adapter already loaded" on the second sync of the same name (e.g. when a tenant calls save_sampler_checkpoint a second time, or when the unload step returns 200 but the cached LoRARequest hasn't been evicted yet). vLLM's own error message instructs the caller to set load_inplace=True in that case, which is what PR #1579 originally did. Restore that behavior: thread the load_inplace parameter (default True, exposed on the public API) into the /v1/load_lora_adapter payload, drop the separate _unload_on_server pre-step. The standalone unload_lora_adapter method still exists for callers that explicitly want eviction. Fixes the rl_loop runtime error: ClientResponseError: 400, message="The lora adapter '' has already been loaded. If you want to load the adapter in place, set 'load_inplace' to True." Co-Authored-By: Eric Tang Co-Authored-By: Claude Opus 4.7 (1M context) --- .../remote_inference_client.py | 29 +++++-------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py b/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py index e5867bd260..4bcd33f1ba 100644 --- a/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py +++ b/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py @@ -1154,38 +1154,26 @@ async def load_lora_adapter( same name is already registered on the server, vLLM replaces it inplace, preserving its internal int id. - TODO(aaron): remove _unload_on_server and add back "load_inplace": True to payload after - vllm lora disk load bug is fixed. - Args: lora_name: Name to register the adapter under on each server. lora_path: Path to the LoRA adapter on disk (must be accessible from servers). - load_inplace: When True (default), reloading a previously-loaded and cached - adapter with the same name replaces it with the on-disk lora. + load_inplace: When True (default), vLLM replaces an existing adapter + with the same name in place, preserving its int id. Without + this, the second call with the same name returns a 400 + "adapter already loaded". Returns: Dict mapping server_url to response. """ - # Both endpoints return plain text on success or JSON ErrorResponse on failure, - # so we use a session.post directly rather than _call_all_servers (which expects JSON). - # ``load_inplace`` is currently unused: until the upstream vLLM in-place reload bug - # is fixed (see TODO above) we always do explicit unload-then-load. - _ = load_inplace + # vLLM /v1/load_lora_adapter returns plain text on success and JSON + # ErrorResponse on failure, so we use a session.post directly rather + # than _call_all_servers (which expects JSON). session = await self._get_session() - async def _unload_on_server(server_url: str) -> None: - """Remove the cached LoRARequest server-side. 404 on the first sync is expected.""" - url = f"{server_url}/v1/unload_lora_adapter" - async with session.post(url, json={"lora_name": lora_name}) as resp: - if resp.status == 404: - return - if resp.status >= 400: - body = await resp.json() - raise_for_status(resp, body) + payload = {"lora_name": lora_name, "lora_path": lora_path, "load_inplace": load_inplace} async def _load_on_server(server_url: str): url = f"{server_url}/v1/load_lora_adapter" - payload = {"lora_name": lora_name, "lora_path": lora_path} async with session.post(url, json=payload) as resp: # vLLM returns 200 with text body on success, or JSON ErrorResponse on failure. # Tolerate non-JSON error bodies (e.g. plain-text 5xx from a proxy): @@ -1198,7 +1186,6 @@ async def _load_on_server(server_url: str): raise_for_status(resp, body) return server_url, {"status": resp.status, "body": await resp.text()} - await asyncio.gather(*[_unload_on_server(url) for url in self.server_urls]) results = await asyncio.gather(*[_load_on_server(url) for url in self.server_urls]) logger.info(f"Loaded LoRA adapter '{lora_name}' from {lora_path}") From fa3bfbcc5d715369ba1b835056070d29b5f42238 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Tue, 5 May 2026 00:44:57 +0000 Subject: [PATCH 17/21] Revert "[multi-lora-rl] Pass load_inplace=True to vLLM load_lora_adapter" This reverts commit edefc84644a41ec40153cbd645f2fb38077a9482. --- .../remote_inference_client.py | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py b/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py index 4bcd33f1ba..e5867bd260 100644 --- a/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py +++ b/skyrl/backends/skyrl_train/inference_servers/remote_inference_client.py @@ -1154,26 +1154,38 @@ async def load_lora_adapter( same name is already registered on the server, vLLM replaces it inplace, preserving its internal int id. + TODO(aaron): remove _unload_on_server and add back "load_inplace": True to payload after + vllm lora disk load bug is fixed. + Args: lora_name: Name to register the adapter under on each server. lora_path: Path to the LoRA adapter on disk (must be accessible from servers). - load_inplace: When True (default), vLLM replaces an existing adapter - with the same name in place, preserving its int id. Without - this, the second call with the same name returns a 400 - "adapter already loaded". + load_inplace: When True (default), reloading a previously-loaded and cached + adapter with the same name replaces it with the on-disk lora. Returns: Dict mapping server_url to response. """ - # vLLM /v1/load_lora_adapter returns plain text on success and JSON - # ErrorResponse on failure, so we use a session.post directly rather - # than _call_all_servers (which expects JSON). + # Both endpoints return plain text on success or JSON ErrorResponse on failure, + # so we use a session.post directly rather than _call_all_servers (which expects JSON). + # ``load_inplace`` is currently unused: until the upstream vLLM in-place reload bug + # is fixed (see TODO above) we always do explicit unload-then-load. + _ = load_inplace session = await self._get_session() - payload = {"lora_name": lora_name, "lora_path": lora_path, "load_inplace": load_inplace} + async def _unload_on_server(server_url: str) -> None: + """Remove the cached LoRARequest server-side. 404 on the first sync is expected.""" + url = f"{server_url}/v1/unload_lora_adapter" + async with session.post(url, json={"lora_name": lora_name}) as resp: + if resp.status == 404: + return + if resp.status >= 400: + body = await resp.json() + raise_for_status(resp, body) async def _load_on_server(server_url: str): url = f"{server_url}/v1/load_lora_adapter" + payload = {"lora_name": lora_name, "lora_path": lora_path} async with session.post(url, json=payload) as resp: # vLLM returns 200 with text body on success, or JSON ErrorResponse on failure. # Tolerate non-JSON error bodies (e.g. plain-text 5xx from a proxy): @@ -1186,6 +1198,7 @@ async def _load_on_server(server_url: str): raise_for_status(resp, body) return server_url, {"status": resp.status, "body": await resp.text()} + await asyncio.gather(*[_unload_on_server(url) for url in self.server_urls]) results = await asyncio.gather(*[_load_on_server(url) for url in self.server_urls]) logger.info(f"Loaded LoRA adapter '{lora_name}' from {lora_path}") From cb3861421ae3a09dbd36a9587ffc0f2dfb4b3afb Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Tue, 5 May 2026 01:01:16 +0000 Subject: [PATCH 18/21] x --- tests/tinker/test_multi_lora_rl_two_clients.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/tinker/test_multi_lora_rl_two_clients.md b/tests/tinker/test_multi_lora_rl_two_clients.md index 9e098ab6ff..9a036697b1 100644 --- a/tests/tinker/test_multi_lora_rl_two_clients.md +++ b/tests/tinker/test_multi_lora_rl_two_clients.md @@ -29,12 +29,16 @@ uv run --extra tinker --extra megatron -m skyrl.tinker.api \ --backend megatron \ --backend-config '{ "strategy": "megatron", - "trainer.placement.policy_num_gpus_per_node": 1, + "trainer.placement.policy_num_gpus_per_node": 4, "trainer.placement.policy_num_nodes": 1, "trainer.placement.colocate_all": false, "trainer.policy.megatron_config.tensor_model_parallel_size": 1, "trainer.policy.megatron_config.pipeline_model_parallel_size": 1, + "trainer.micro_train_batch_size_per_gpu": 64, + "trainer.micro_forward_batch_size_per_gpu": 64, "trainer.policy.megatron_config.lora_config.merge_lora": false, + "generator.inference_engine.num_engines": 1, + "generator.inference_engine.tensor_parallel_size": 1, "trainer.policy.model.lora.max_loras": 4, "trainer.policy.model.lora.max_cpu_loras": 4 }' From e18e82ba92fd0deacb842b67cf58cb541f4a58b4 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Wed, 6 May 2026 00:09:50 +0000 Subject: [PATCH 19/21] [docs] Design doc for non-colocated sample routing via EXTERNAL path Adds docs/content/docs/tinker/async_sample_routing.mdx describing the plan to route SkyRL-Train sample requests through the existing EXTERNAL fan-out path (api.py:1039-1064) instead of the engine's synchronous loop. The engine already excludes EXTERNAL futures from its scheduler; we just need to point a new BackendForwardingInferenceClient at the engine-managed vLLM. Covers: synchronization invariants (I1-I4) that already hold via the SDK + checkpoint validation + vLLM pause/resume, files to add/modify (EngineStateDB row, BackendForwardingInferenceClient, SkyRLTrainBackend._publish_engine_state, api.py lifespan wiring), trade-offs vs. dual-loop-in-engine and full async refactors, failure modes, testing plan, and explicit non-goals (training-side parallelism, auto-recovery from vLLM eviction). Co-Authored-By: Eric Tang Co-Authored-By: Claude Opus 4.7 (1M context) --- .../docs/tinker/async_sample_routing.mdx | 401 ++++++++++++++++++ 1 file changed, 401 insertions(+) create mode 100644 docs/content/docs/tinker/async_sample_routing.mdx diff --git a/docs/content/docs/tinker/async_sample_routing.mdx b/docs/content/docs/tinker/async_sample_routing.mdx new file mode 100644 index 0000000000..2959743563 --- /dev/null +++ b/docs/content/docs/tinker/async_sample_routing.mdx @@ -0,0 +1,401 @@ +--- +title: "Non-colocated Sample Routing — Design" +--- + +# Non-colocated Sample Routing — Design + +This document describes the design for processing `sample()` requests in +parallel with training when running the SkyRL Tinker server with the +SkyRL-Train (FSDP / Megatron) backend in non-colocated mode (`colocate_all=False`). + +## Problem + +The Tinker engine's main loop (`skyrl/tinker/engine.py:721`) processes pending +requests synchronously per 100 ms tick, in fixed order: + +```python +self.process_batch_requests(forward_backward_requests, ...) +self.process_batch_requests(forward_requests, ...) +self.process_batch_requests(sample_requests, ...) +self.process_single_requests(other_requests) +``` + +This is correct for the JAX backend (single model serves both training and +sampling — they cannot run concurrently) and for SkyRL-Train colocated mode +(vLLM is asleep during training, woken only inside `save_weights_for_sampler` +→ broadcast → `sample`). + +For SkyRL-Train **non-colocated** mode the assumption breaks down: + +- Training (`forward_backward`, `optim_step`) runs on dedicated policy worker + GPUs. +- vLLM runs on its own GPUs and is **always on** — generation can run at any + time. +- The two interact only at `save_weights_for_sampler`, which broadcasts new + weights into vLLM bracketed by `pause_generation` / `resume_generation`. + +In this regime, serializing `sample` behind `forward_backward` wastes vLLM +capacity. A `sample` request submitted at the start of a 30 s training step +sits idle for that 30 s even though vLLM is free. + +For multi-tenant RL (multiple `rl_loop` clients on one Tinker server), the +problem compounds: every `sample` from any tenant queues behind every other +tenant's training step. + +## Existing infrastructure we can lean on + +The Tinker server already has a parallel-sample code path for genuinely +external vLLM deployments (`engine_config.external_inference_url`): + +1. **API handler dispatches sample directly, off the engine queue** + (`api.py:1039-1064`). When `app.state.external_inference_client` is + non-null, `POST /api/v1/asample` creates a `FutureDB` row with + `RequestType.EXTERNAL`, then `asyncio.create_task(...)` to forward to vLLM + and return the future_id immediately. The engine never sees the request. + +2. **Engine excludes EXTERNAL from its scheduler** (`engine.py:431`): + `find_single_requests` filters + `where(FutureDB.request_type != types.RequestType.EXTERNAL)`. EXTERNAL + futures live entirely in the API process's asyncio loop. + +3. **`ExternalInferenceClient.call_and_store_result`** (in + `skyrl/tinker/extra/external_inference.py`) writes the result back into + `FutureDB` directly when the call returns. The + `/api/v1/retrieve_future` polling loop is oblivious to who the writer is. + +So the cross-process plumbing for parallel sample is already in place. The +gap is that it's wired only for an external vLLM URL configured at server +start. We need to point it at the vLLM that the SkyRL-Train backend stands +up internally. + +## Proposed design + +Route SkyRL-Train sample requests through the existing EXTERNAL path, +adapted to talk to the engine-managed vLLM instead of a user-supplied +external URL. + +### Flow + +``` + ┌─ POST /api/v1/asample ─────────────────────────┐ + client ─────┤ │ + │ api.py: │ + │ create FutureDB(type=EXTERNAL) │ + │ asyncio.create_task(client.call_and_store) │ + │ return future_id (immediately) │ + └────────────────────────────────────────────────┘ + │ + ▼ + ┌─ BackendForwardingInferenceClient ─────────────┐ + │ POST {vllm_proxy_url}/v1/completions │ + │ model=model_id (LoRA registered by engine)│ + │ on completion: write FutureDB.result_data │ + └────────────────────────────────────────────────┘ + + (parallel) + ┌─ engine subprocess ────────────────────────────┐ + │ process_pending_requests loop: │ + │ forward_backward / optim_step / │ + │ save_weights_for_sampler / ... │ + │ (NO sample work here) │ + │ │ + │ vLLM lifecycle owned here. Adapter loaded │ + │ during save_weights_for_sampler via │ + │ RemoteInferenceClient.load_lora_adapter. │ + └────────────────────────────────────────────────┘ +``` + +The engine still processes `forward_backward`, `optim_step`, +`save_weights_for_sampler`, `save_weights`, `load_weights`, and the rest of +the request types in its serial loop. Only `sample` (and only when +non-colocated) is hoisted into the API process's async loop. + +### Synchronization invariants + +These are already enforced by the existing API and SDK; calling them out so +the EXTERNAL refactor preserves them. + +**(I1) Sample for checkpoint X requires that save-X has completed.** The +Tinker SDK serializes `save_weights_for_sampler(name=X)` and +`sample(checkpoint_id=X)` per training client — the second future depends +on the first. On the server, the API handler at `api.py:1037` calls +`validate_checkpoint(...)` before creating the sample future; checkpoints +are only marked ready inside the engine after the broadcast completes +(`engine._checkpoint_status_context` in `process_save_weights_for_sampler`). +Sample-for-X cannot reach the EXTERNAL fan-out before save-X finalises in +the engine. ✓ + +**(I2) In-flight sample during a re-broadcast for the same model.** Worker +side, `WorkerDispatch.save_weights_for_sampler` brackets the broadcast with +`inference_engine_client.pause_generation()` / +`resume_generation()`. vLLM's KEEP-mode pause freezes in-flight requests in +its scheduler; they resume after `resume_generation`. No client-side +coordination needed. ✓ + +**(I3) Result writes don't conflict with engine writes.** EXTERNAL sample +results are written by the API process; all other request results are +written by the engine. Different `FutureDB` rows, no overlap. SQLite WAL +(already enabled by `enable_sqlite_wal`) handles concurrent readers. + +**(I4) The colocated path is unchanged.** When `colocate_all=True`, the API +must NOT install the forwarding client — vLLM is asleep during training and +the engine's synchronous `sample` path is still what wakes it, broadcasts, +and samples. The lifespan code gates strictly on +`colocate_all == False && backend == "skyrl_train"`. + +### Files to add / modify + +#### New: `skyrl/tinker/extra/backend_forwarding_inference.py` + +Adapter from the EXTERNAL contract to the SkyRL-Train-managed vLLM. ~80 LOC. + +```python +class BackendForwardingInferenceClient: + """Forwards EXTERNAL sample requests to the SkyRL-Train-managed vLLM. + + Differs from ExternalInferenceClient: + - Reads the vLLM proxy URL from EngineStateDB (written by the engine + after _create_new_inference_client) rather than EngineConfig. + - Uses model= for LoRA sampling (the engine already loaded + the adapter under that name in save_weights_for_sampler via + RemoteInferenceClient.load_lora_adapter). + - No checkpoint extraction step — the worker's + _save_lora_adapters_and_sync wrote the adapter to lora_sync_path + and registered it with vLLM directly. + """ + + def __init__(self, engine_config: EngineConfig, db_engine): + self.engine_config = engine_config + self.db_engine = db_engine + self._cached_proxy_url: str | None = None + self._concurrency = asyncio.Semaphore( + getattr(engine_config, "max_concurrent_samples", 64) + ) + + async def _resolve_proxy_url(self) -> str: + # Read from EngineStateDB; cache; refresh on 5xx. + ... + + async def call_and_store_result(self, request_id, sample_req, + model_id, checkpoint_id, *, base_model=None): + async with self._concurrency: + try: + proxy_url = await self._resolve_proxy_url() + # POST {proxy_url}/v1/completions with model=model_id (or + # base_model when present). Reuse the rendering logic from + # ExternalInferenceClient._forward_to_engine. + result = await self._forward(...) + result_data, status = result.model_dump(), RequestStatus.COMPLETED + except Exception as e: + result_data, status = {"error": str(e), "status": "failed"}, RequestStatus.FAILED + await self._write_result(request_id, result_data, status) +``` + +#### New DB row: `EngineStateDB` + +A single-row table holding inference-side handoff state from engine → +API. New file `skyrl/tinker/db_models.py` addition: + +```python +class EngineStateDB(SQLModel, table=True): + __tablename__ = "engine_state" + + # Always 1; SQLModel needs a primary key. + singleton_id: int = Field(default=1, primary_key=True) + + inference_proxy_url: str | None = None + """Proxy URL of the engine-managed vLLM, populated after + SkyRLTrainBackend._create_new_inference_client. None when no vLLM has + been stood up yet (FFT path, colocated, or pre-first-create_model).""" + + inference_server_urls: list[str] = Field(default_factory=list, sa_type=JSON) + """Backend vLLM URLs (data-parallel servers). Optional; + BackendForwardingInferenceClient only needs proxy_url for /v1/completions.""" + + updated_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + sa_type=DateTime(timezone=True), + ) +``` + +The engine writes this row inside `_create_new_inference_client` after +`build_new_inference_client(...)` returns (so the proxy_url is known). The +API reads it lazily on first sample. + +#### Modify: `skyrl/backends/skyrl_train_backend.py` + +In `_create_new_inference_client`: + +```python +client, server_setup = build_new_inference_client( + self._cfg, + self._tokenizer, + placement_group=self._colocate_pg if is_colocated else None, +) +self._inference_router = server_setup.router +self._server_groups = server_setup.server_groups +self._inference_engine_client = client + +# NEW: publish vLLM URLs for the API process's BackendForwardingInferenceClient. +self._publish_engine_state(server_setup) +``` + +`_publish_engine_state` writes `EngineStateDB.singleton_id=1` with the +current `proxy_url` and `server_urls`. Idempotent upsert. On +`delete_model`-driven `ray.shutdown()`, clear the row (proxy_url=None) so +the API knows there's no vLLM to talk to. + +#### Modify: `skyrl/tinker/api.py` — lifespan + +```python +backend_name = app.state.engine_config.backend +backend_cfg = app.state.engine_config.backend_config or {} +is_colocated = bool(backend_cfg.get("trainer.placement.colocate_all", True)) + +if app.state.engine_config.external_inference_url: + app.state.external_inference_client = ExternalInferenceClient( + app.state.engine_config, app.state.db_engine, + ) + logger.info(f"External engine configured: {app.state.engine_config.external_inference_url}") +elif backend_name == "skyrl_train" and not is_colocated: + app.state.external_inference_client = BackendForwardingInferenceClient( + app.state.engine_config, app.state.db_engine, + ) + logger.info("Engine-managed vLLM forwarding enabled (non-colocated SkyRL-Train)") +else: + app.state.external_inference_client = None + logger.info("Using internal engine for inference") +``` + +Note: `backend_name` for the SkyRL-Train Megatron path is the string the +operator passes to `--backend` (currently `"megatron"` or `"fsdp"` per +`skyrl/tinker/engine.py:177-183`). The check should match those names — +either by allowlist (`backend_name in ("megatron", "fsdp")`) or by reading +the resolved backend class. Easiest: allowlist. + +Also: the colocate detection from `backend_config` is fragile (assumes the +key exists). Better: have the engine write `is_colocated` into +`EngineStateDB` alongside the URL, and have the API read it after first +write. Until then, default to colocated (current behavior preserved). + +#### No changes required to engine.py + +The engine's `find_single_requests` already excludes EXTERNAL. The engine +processes `save_weights_for_sampler` synchronously as today, which is what +populates the LoRA adapter on vLLM under `model=` so subsequent +sample requests find it. + +### Backpressure + +Two places to bound concurrency: + +1. **Per-client semaphore** inside `BackendForwardingInferenceClient` (the + `self._concurrency` above). Default: 64. Configurable via a new + `EngineConfig.max_concurrent_samples`. +2. **vLLM-side `max_num_seqs`** is the ultimate cap; vLLM queues beyond + that internally. + +The semaphore matters mostly for shielding the FastAPI event loop from +fan-out from many tenants — once vLLM is saturated the additional +backpressure is mostly ergonomic (avoid arbitrarily many in-flight HTTP +requests from one process). + +### Failure modes + +- **`EngineStateDB.inference_proxy_url is None` when sample arrives.** The + engine hasn't built vLLM yet (no `create_model` happened, or last delete + triggered teardown). Mark the future failed with a clear message + ("inference engine not ready"); the SDK will surface this back to the + client. +- **vLLM-side 404 for `model=`.** Adapter wasn't loaded (most + likely the client never called `save_weights_for_sampler`, or + `max_cpu_loras` is too low and vLLM evicted it). Fail the future with the + 4xx body verbatim. Documented in + [`multi_lora_design.mdx`](./multi_lora_design.mdx#vllm-capacity-contract). +- **Engine restarts vLLM under us.** The engine's + `delete_model`-on-last-adapter path does `ray.shutdown()`. Next + `create_model` rebuilds. The + `EngineStateDB` row is updated by `_publish_engine_state` on rebuild. + `BackendForwardingInferenceClient` should refresh its cached proxy_url + on connection errors and retry once. +- **API process crash / restart.** In-flight `asyncio.create_task`s are + lost; their `FutureDB` rows remain `pending`. The + existing 5-minute `retrieve_future` timeout reaps them as failures. Same + failure mode as the existing external-vLLM path. +- **EngineStateDB row out of sync with running vLLM.** Mostly mitigated by + the engine writing the row inside `_create_new_inference_client` (single + source of truth). If the engine crashes after writing but before vLLM + comes up, the API will get connection refused → fail the future. The + user will retry. + +### Testing + +- **Unit:** `BackendForwardingInferenceClient.call_and_store_result` against + a `FastAPI` stub vLLM (mirroring + `tests/backends/skyrl_train/inference_servers/test_remote_inference_client.py`). + Verifies model-name plumbing, error handling, semaphore. +- **Integration (GPU-gated):** extend + `tests/tinker/test_multi_lora_rl_two_clients.md` — add a step + comparing wall-clock for two `rl_loop` clients sampling vs. a baseline + with this routing disabled. The expected delta is "training step time + doesn't appear in sample latency." +- **Negative:** when `external_inference_client is None` (colocated case), + sample requests still flow through the engine. Existing test_api.py + coverage should pass unchanged. + +### Trade-offs vs. dual-loop-in-engine + +We considered three alternatives. Why this design wins: + +| Alternative | Why we're not picking it | +|---|---| +| **Futures + single engine loop.** `process_sample` becomes non-blocking; engine main loop polls in-flight tasks at the top of each tick. | The 100 ms tick is still a hard latency floor for completion. A 30 s `forward_backward` blocks all sample-completion handling for 30 s. Fixing that means making the engine's main loop async — at which point we're 80 % of the way to a full async refactor for less benefit. | +| **Separate sample loop inside the engine subprocess.** Spawn a second asyncio task in the engine that handles SAMPLE requests; the main loop handles everything else. | Strictly weaker isolation than process-level: a long Ray-side `ray.get` in the main loop releases the GIL but still constrains the engine's overall throughput. SQLite write contention is fine but introduces a per-loop ownership rule we'd need to maintain. | +| **Full async refactor of `process_pending_requests`.** Each request type as its own async task with per-model locks. | Big diff; backend methods are sync (Ray `ray.get`) and would need executor wrappers. Premature for what we need (parallel sample-vs-training), without a clear next-step requirement that justifies it. | + +Routing through EXTERNAL inherits process-level isolation for free +(API and engine are separate OS processes; one cannot block the other), +keeps the engine code untouched, and reuses already-validated machinery. + +## Implementation order + +1. `EngineStateDB` + migration (auto-create via `SQLModel.metadata.create_all` + in lifespan, same as other tables). +2. `_publish_engine_state` in `SkyRLTrainBackend` (called after + `_create_new_inference_client` and on `delete_model` teardown). +3. `BackendForwardingInferenceClient` + unit tests. +4. Lifespan wiring in `api.py` + colocate detection. +5. Update RL smoke runbook to verify parallelism with a simple wall-clock + comparison. Document the operator-side requirements + (`max_concurrent_samples`, `max_cpu_loras`). +6. Optional follow-up: have the engine also write `is_colocated` to + `EngineStateDB` so the API can resolve it without parsing + `backend_config`. + +## Out of scope (explicit non-goals) + +- **Parallelising training requests across model_ids.** This design + parallelises sample only. Multi-tenant `forward_backward` / + `optim_step` still serialise through the engine's main loop. Adding + per-model parallelism on the training side requires a different design + (per-model worker pools or per-model dispatch queues) and isn't + needed for the multi-LoRA RL workloads we're targeting. +- **Auto-recovery from vLLM eviction.** If `max_cpu_loras` is set too + low and vLLM evicts an adapter mid-run, the next sample 404s and the + future fails. Re-loading from disk on demand is a follow-up — for now + the operator sizes `max_cpu_loras` ≥ expected concurrent adapters. +- **Colocated mode acceleration.** Colocated mode keeps the synchronous + engine flow as today. There is no parallelism to exploit there (vLLM is + asleep during training). +- **Sample retries.** `BackendForwardingInferenceClient` does at most one + proxy_url refresh + retry on connection errors. Application-level retry + is the SDK's job (and is already implemented in `tinker/retry_handler`). + +## References + +- Multi-LoRA design: [`multi_lora_design.mdx`](./multi_lora_design.mdx). +- PR #1579 (foundation): [NovaSky-AI/SkyRL#1579](https://github.com/NovaSky-AI/SkyRL/pull/1579). +- Existing external path: `skyrl/tinker/extra/external_inference.py`, + `skyrl/tinker/api.py:1057-1062`, + `skyrl/tinker/engine.py:431` (EXTERNAL exclusion). +- RL smoke runbook: [`test_multi_lora_rl_two_clients.md`](../../../tests/tinker/test_multi_lora_rl_two_clients.md). From 57a474ab20f6a8b680d80fdc3b0ae31c3c0c8352 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Wed, 6 May 2026 01:38:53 +0000 Subject: [PATCH 20/21] [smoke logs] Snapshot rl_loop / sl_loop runs from manual smoke tests Captures /tmp/rl_loop_{a..i}.log and /tmp/sl_loop_{a..d}.log from the multi-LoRA RL + SFT smoke runs into tests/tinker/smoke_logs/. Each run contributes code.diff (the working-tree diff at launch), config.json, logs.log (full stdout/stderr), and metrics.jsonl. Force-added because *.log is in the project .gitignore. ~1.2 MB total; useful as reference output for the runbooks at tests/tinker/test_multi_lora_{rl_two_clients,smoke_two_clients}.md. Co-Authored-By: Eric Tang Co-Authored-By: Claude Opus 4.7 (1M context) --- .../tinker/smoke_logs/rl_loop_a.log/code.diff | 3 + .../smoke_logs/rl_loop_a.log/config.json | 12 + .../tinker/smoke_logs/rl_loop_a.log/logs.log | 301 +++++ .../smoke_logs/rl_loop_a.log/metrics.jsonl | 6 + .../tinker/smoke_logs/rl_loop_b.log/code.diff | 3 + .../smoke_logs/rl_loop_b.log/config.json | 12 + .../tinker/smoke_logs/rl_loop_b.log/logs.log | 106 ++ .../smoke_logs/rl_loop_b.log/metrics.jsonl | 4 + .../tinker/smoke_logs/rl_loop_c.log/code.diff | 3 + .../smoke_logs/rl_loop_c.log/config.json | 12 + .../tinker/smoke_logs/rl_loop_c.log/logs.log | 683 +++++++++++ .../smoke_logs/rl_loop_c.log/metrics.jsonl | 7 + .../tinker/smoke_logs/rl_loop_d.log/code.diff | 3 + .../smoke_logs/rl_loop_d.log/config.json | 12 + .../tinker/smoke_logs/rl_loop_d.log/logs.log | 1055 +++++++++++++++++ .../smoke_logs/rl_loop_d.log/metrics.jsonl | 4 + .../rl_loop_e.log/checkpoints.jsonl | 3 + .../tinker/smoke_logs/rl_loop_e.log/code.diff | 3 + .../smoke_logs/rl_loop_e.log/config.json | 12 + .../tinker/smoke_logs/rl_loop_e.log/logs.log | 825 +++++++++++++ .../smoke_logs/rl_loop_e.log/metrics.jsonl | 58 + .../rl_loop_f.log/checkpoints.jsonl | 3 + .../tinker/smoke_logs/rl_loop_f.log/code.diff | 3 + .../smoke_logs/rl_loop_f.log/config.json | 12 + .../tinker/smoke_logs/rl_loop_f.log/logs.log | 825 +++++++++++++ .../smoke_logs/rl_loop_f.log/metrics.jsonl | 58 + .../rl_loop_g.log/checkpoints.jsonl | 3 + .../tinker/smoke_logs/rl_loop_g.log/code.diff | 3 + .../smoke_logs/rl_loop_g.log/config.json | 12 + .../tinker/smoke_logs/rl_loop_g.log/logs.log | 825 +++++++++++++ .../smoke_logs/rl_loop_g.log/metrics.jsonl | 58 + .../rl_loop_h.log/checkpoints.jsonl | 1 + .../tinker/smoke_logs/rl_loop_h.log/code.diff | 3 + .../smoke_logs/rl_loop_h.log/config.json | 12 + .../tinker/smoke_logs/rl_loop_h.log/logs.log | 430 +++++++ .../smoke_logs/rl_loop_h.log/metrics.jsonl | 30 + .../rl_loop_i.log/checkpoints.jsonl | 1 + .../tinker/smoke_logs/rl_loop_i.log/code.diff | 3 + .../smoke_logs/rl_loop_i.log/config.json | 12 + .../tinker/smoke_logs/rl_loop_i.log/logs.log | 430 +++++++ .../smoke_logs/rl_loop_i.log/metrics.jsonl | 30 + .../sl_loop_a.log/checkpoints.jsonl | 2 + .../tinker/smoke_logs/sl_loop_a.log/code.diff | 3 + .../smoke_logs/sl_loop_a.log/config.json | 12 + .../tinker/smoke_logs/sl_loop_a.log/logs.log | 729 ++++++++++++ .../smoke_logs/sl_loop_a.log/metrics.jsonl | 47 + .../sl_loop_b.log/checkpoints.jsonl | 2 + .../tinker/smoke_logs/sl_loop_b.log/code.diff | 3 + .../smoke_logs/sl_loop_b.log/config.json | 12 + .../tinker/smoke_logs/sl_loop_b.log/logs.log | 674 +++++++++++ .../smoke_logs/sl_loop_b.log/metrics.jsonl | 44 + .../sl_loop_c.log/checkpoints.jsonl | 2 + .../tinker/smoke_logs/sl_loop_c.log/code.diff | 3 + .../smoke_logs/sl_loop_c.log/config.json | 12 + .../tinker/smoke_logs/sl_loop_c.log/logs.log | 674 +++++++++++ .../smoke_logs/sl_loop_c.log/metrics.jsonl | 44 + .../sl_loop_d.log/checkpoints.jsonl | 2 + .../tinker/smoke_logs/sl_loop_d.log/code.diff | 3 + .../smoke_logs/sl_loop_d.log/config.json | 12 + .../tinker/smoke_logs/sl_loop_d.log/logs.log | 644 ++++++++++ .../smoke_logs/sl_loop_d.log/metrics.jsonl | 42 + 61 files changed, 8847 insertions(+) create mode 100644 tests/tinker/smoke_logs/rl_loop_a.log/code.diff create mode 100644 tests/tinker/smoke_logs/rl_loop_a.log/config.json create mode 100644 tests/tinker/smoke_logs/rl_loop_a.log/logs.log create mode 100644 tests/tinker/smoke_logs/rl_loop_a.log/metrics.jsonl create mode 100644 tests/tinker/smoke_logs/rl_loop_b.log/code.diff create mode 100644 tests/tinker/smoke_logs/rl_loop_b.log/config.json create mode 100644 tests/tinker/smoke_logs/rl_loop_b.log/logs.log create mode 100644 tests/tinker/smoke_logs/rl_loop_b.log/metrics.jsonl create mode 100644 tests/tinker/smoke_logs/rl_loop_c.log/code.diff create mode 100644 tests/tinker/smoke_logs/rl_loop_c.log/config.json create mode 100644 tests/tinker/smoke_logs/rl_loop_c.log/logs.log create mode 100644 tests/tinker/smoke_logs/rl_loop_c.log/metrics.jsonl create mode 100644 tests/tinker/smoke_logs/rl_loop_d.log/code.diff create mode 100644 tests/tinker/smoke_logs/rl_loop_d.log/config.json create mode 100644 tests/tinker/smoke_logs/rl_loop_d.log/logs.log create mode 100644 tests/tinker/smoke_logs/rl_loop_d.log/metrics.jsonl create mode 100644 tests/tinker/smoke_logs/rl_loop_e.log/checkpoints.jsonl create mode 100644 tests/tinker/smoke_logs/rl_loop_e.log/code.diff create mode 100644 tests/tinker/smoke_logs/rl_loop_e.log/config.json create mode 100644 tests/tinker/smoke_logs/rl_loop_e.log/logs.log create mode 100644 tests/tinker/smoke_logs/rl_loop_e.log/metrics.jsonl create mode 100644 tests/tinker/smoke_logs/rl_loop_f.log/checkpoints.jsonl create mode 100644 tests/tinker/smoke_logs/rl_loop_f.log/code.diff create mode 100644 tests/tinker/smoke_logs/rl_loop_f.log/config.json create mode 100644 tests/tinker/smoke_logs/rl_loop_f.log/logs.log create mode 100644 tests/tinker/smoke_logs/rl_loop_f.log/metrics.jsonl create mode 100644 tests/tinker/smoke_logs/rl_loop_g.log/checkpoints.jsonl create mode 100644 tests/tinker/smoke_logs/rl_loop_g.log/code.diff create mode 100644 tests/tinker/smoke_logs/rl_loop_g.log/config.json create mode 100644 tests/tinker/smoke_logs/rl_loop_g.log/logs.log create mode 100644 tests/tinker/smoke_logs/rl_loop_g.log/metrics.jsonl create mode 100644 tests/tinker/smoke_logs/rl_loop_h.log/checkpoints.jsonl create mode 100644 tests/tinker/smoke_logs/rl_loop_h.log/code.diff create mode 100644 tests/tinker/smoke_logs/rl_loop_h.log/config.json create mode 100644 tests/tinker/smoke_logs/rl_loop_h.log/logs.log create mode 100644 tests/tinker/smoke_logs/rl_loop_h.log/metrics.jsonl create mode 100644 tests/tinker/smoke_logs/rl_loop_i.log/checkpoints.jsonl create mode 100644 tests/tinker/smoke_logs/rl_loop_i.log/code.diff create mode 100644 tests/tinker/smoke_logs/rl_loop_i.log/config.json create mode 100644 tests/tinker/smoke_logs/rl_loop_i.log/logs.log create mode 100644 tests/tinker/smoke_logs/rl_loop_i.log/metrics.jsonl create mode 100644 tests/tinker/smoke_logs/sl_loop_a.log/checkpoints.jsonl create mode 100644 tests/tinker/smoke_logs/sl_loop_a.log/code.diff create mode 100644 tests/tinker/smoke_logs/sl_loop_a.log/config.json create mode 100644 tests/tinker/smoke_logs/sl_loop_a.log/logs.log create mode 100644 tests/tinker/smoke_logs/sl_loop_a.log/metrics.jsonl create mode 100644 tests/tinker/smoke_logs/sl_loop_b.log/checkpoints.jsonl create mode 100644 tests/tinker/smoke_logs/sl_loop_b.log/code.diff create mode 100644 tests/tinker/smoke_logs/sl_loop_b.log/config.json create mode 100644 tests/tinker/smoke_logs/sl_loop_b.log/logs.log create mode 100644 tests/tinker/smoke_logs/sl_loop_b.log/metrics.jsonl create mode 100644 tests/tinker/smoke_logs/sl_loop_c.log/checkpoints.jsonl create mode 100644 tests/tinker/smoke_logs/sl_loop_c.log/code.diff create mode 100644 tests/tinker/smoke_logs/sl_loop_c.log/config.json create mode 100644 tests/tinker/smoke_logs/sl_loop_c.log/logs.log create mode 100644 tests/tinker/smoke_logs/sl_loop_c.log/metrics.jsonl create mode 100644 tests/tinker/smoke_logs/sl_loop_d.log/checkpoints.jsonl create mode 100644 tests/tinker/smoke_logs/sl_loop_d.log/code.diff create mode 100644 tests/tinker/smoke_logs/sl_loop_d.log/config.json create mode 100644 tests/tinker/smoke_logs/sl_loop_d.log/logs.log create mode 100644 tests/tinker/smoke_logs/sl_loop_d.log/metrics.jsonl diff --git a/tests/tinker/smoke_logs/rl_loop_a.log/code.diff b/tests/tinker/smoke_logs/rl_loop_a.log/code.diff new file mode 100644 index 0000000000..d8e559755f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_a.log/code.diff @@ -0,0 +1,3 @@ +### repo: /home/etang/tinker-cookbook @ dacb835b31fb3f7a012851a1bc64c950d0495de3 +modules: tinker_cookbook +(no local changes) diff --git a/tests/tinker/smoke_logs/rl_loop_a.log/config.json b/tests/tinker/smoke_logs/rl_loop_a.log/config.json new file mode 100644 index 0000000000..f1d0c32b6a --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_a.log/config.json @@ -0,0 +1,12 @@ +{ + "base_url": "http://localhost:8000", + "log_path": "/tmp/rl_loop_a.log", + "model_name": "Qwen/Qwen3-0.6B", + "batch_size": 128, + "group_size": 16, + "learning_rate": 4e-05, + "lora_rank": 32, + "save_every": 20, + "max_tokens": 256, + "ttl_seconds": 604800 +} \ No newline at end of file diff --git a/tests/tinker/smoke_logs/rl_loop_a.log/logs.log b/tests/tinker/smoke_logs/rl_loop_a.log/logs.log new file mode 100644 index 0000000000..b654726695 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_a.log/logs.log @@ -0,0 +1,301 @@ +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_a.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_a.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_a.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_a.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_a.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_a.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_1677eba7 +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/rl_loop_a.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/rl_loop_a.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_8babd19c +__main__:121 [INFO] Training for 58 batches +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 0  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 0  │ +│ progress/done_frac  │ 0.017241  │ +│ reward/total  │ 0.021484  │ +│ skyrl.ai/grad_norm  │ 2767.572763 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 220.875473  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 1  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 1  │ +│ progress/done_frac  │ 0.034483  │ +│ reward/total  │ 0.026855  │ +│ skyrl.ai/grad_norm  │ 2880.009288 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 193.037447  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 2  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 2  │ +│ progress/done_frac  │ 0.051724  │ +│ reward/total  │ 0.032715  │ +│ skyrl.ai/grad_norm  │ 766.103412 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 381.700626 │ +└────────────────────────┴────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 3  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 3  │ +│ progress/done_frac  │ 0.068966  │ +│ reward/total  │ 0.069824  │ +│ skyrl.ai/grad_norm  │ 4700.790572 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 451.961692  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 4  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 4  │ +│ progress/done_frac  │ 0.086207  │ +│ reward/total  │ 0.188965  │ +│ skyrl.ai/grad_norm  │ 7912.660235 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 518.589346  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 5  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 5  │ +│ progress/done_frac  │ 0.103448  │ +│ reward/total  │ 0.306152  │ +│ skyrl.ai/grad_norm  │ 9853.311322 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 651.922414  │ +└────────────────────────┴─────────────┘ +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1845' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1865' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1855' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1882' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1856' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1859' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1836' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1837' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1883' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1841' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1873' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1864' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1900' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1942' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1885' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1955' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1918' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1886' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1902' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1899' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1947' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1910' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1889' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1949' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1906' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1871' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1880' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1911' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1921' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1879' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1909' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1891' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1887' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1934' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1877' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1903' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1919' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1922' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1958' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1833' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1954' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1924' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1961' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1948' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1869' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1932' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1897' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1914' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1927' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1894' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1888' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1933' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1884' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1920' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1905' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1895' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1938' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1890' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1957' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1931' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1893' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1834' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1960' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1936' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1875' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1835' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1848' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1876' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1850' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1839' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1926' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1858' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1935' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1862' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1959' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1940' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1915' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1939' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1878' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1950' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1892' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1872' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1945' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1930' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1908' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1946' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1832' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1928' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1953' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1944' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1896' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1854' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1901' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1849' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1868' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1881' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1904' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1851' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1857' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1866' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1838' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1916' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1925' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1937' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1917' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1941' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_1677eba7 +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1913' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1943' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1912' and expected type self.model_cls= diff --git a/tests/tinker/smoke_logs/rl_loop_a.log/metrics.jsonl b/tests/tinker/smoke_logs/rl_loop_a.log/metrics.jsonl new file mode 100644 index 0000000000..7720167023 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_a.log/metrics.jsonl @@ -0,0 +1,6 @@ +{"step": 0, "progress/batch": 0, "optim/lr": 4e-05, "progress/done_frac": 0.017241379310344827, "skyrl.ai/grad_norm": 2767.572763271094, "skyrl.ai/learning_rate": 4e-05, "time/total": 220.87547278404236, "reward/total": 0.021484375} +{"step": 1, "progress/batch": 1, "optim/lr": 4e-05, "progress/done_frac": 0.034482758620689655, "skyrl.ai/grad_norm": 2880.009288179467, "skyrl.ai/learning_rate": 4e-05, "time/total": 193.037446975708, "reward/total": 0.02685546875} +{"step": 2, "progress/batch": 2, "optim/lr": 4e-05, "progress/done_frac": 0.05172413793103448, "skyrl.ai/grad_norm": 766.1034117532698, "skyrl.ai/learning_rate": 4e-05, "time/total": 381.7006256580353, "reward/total": 0.03271484375} +{"step": 3, "progress/batch": 3, "optim/lr": 4e-05, "progress/done_frac": 0.06896551724137931, "skyrl.ai/grad_norm": 4700.790571808108, "skyrl.ai/learning_rate": 4e-05, "time/total": 451.96169209480286, "reward/total": 0.06982421875} +{"step": 4, "progress/batch": 4, "optim/lr": 4e-05, "progress/done_frac": 0.08620689655172414, "skyrl.ai/grad_norm": 7912.660235344369, "skyrl.ai/learning_rate": 4e-05, "time/total": 518.5893461704254, "reward/total": 0.18896484375} +{"step": 5, "progress/batch": 5, "optim/lr": 4e-05, "progress/done_frac": 0.10344827586206896, "skyrl.ai/grad_norm": 9853.311321581186, "skyrl.ai/learning_rate": 4e-05, "time/total": 651.9224138259888, "reward/total": 0.30615234375} diff --git a/tests/tinker/smoke_logs/rl_loop_b.log/code.diff b/tests/tinker/smoke_logs/rl_loop_b.log/code.diff new file mode 100644 index 0000000000..d8e559755f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_b.log/code.diff @@ -0,0 +1,3 @@ +### repo: /home/etang/tinker-cookbook @ dacb835b31fb3f7a012851a1bc64c950d0495de3 +modules: tinker_cookbook +(no local changes) diff --git a/tests/tinker/smoke_logs/rl_loop_b.log/config.json b/tests/tinker/smoke_logs/rl_loop_b.log/config.json new file mode 100644 index 0000000000..d02ea077f0 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_b.log/config.json @@ -0,0 +1,12 @@ +{ + "base_url": "http://localhost:8000", + "log_path": "/tmp/rl_loop_b.log", + "model_name": "Qwen/Qwen3-0.6B", + "batch_size": 128, + "group_size": 16, + "learning_rate": 4e-05, + "lora_rank": 32, + "save_every": 20, + "max_tokens": 256, + "ttl_seconds": 604800 +} \ No newline at end of file diff --git a/tests/tinker/smoke_logs/rl_loop_b.log/logs.log b/tests/tinker/smoke_logs/rl_loop_b.log/logs.log new file mode 100644 index 0000000000..dc6db425c1 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_b.log/logs.log @@ -0,0 +1,106 @@ +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_b.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_b.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_ff83e78a +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/rl_loop_b.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/rl_loop_b.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_19dca9e4 +__main__:121 [INFO] Training for 58 batches +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 0  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 0  │ +│ progress/done_frac  │ 0.017241  │ +│ reward/total  │ 0.016602  │ +│ skyrl.ai/grad_norm  │ 2784.973608 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 329.021244  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 1  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 1  │ +│ progress/done_frac  │ 0.034483  │ +│ reward/total  │ 0.030273  │ +│ skyrl.ai/grad_norm  │ 3172.651888 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 294.953841  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 2  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 2  │ +│ progress/done_frac  │ 0.051724  │ +│ reward/total  │ 0.029297  │ +│ skyrl.ai/grad_norm  │ 2699.619881 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 407.538405  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 3  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 3  │ +│ progress/done_frac  │ 0.068966  │ +│ reward/total  │ 0.054199  │ +│ skyrl.ai/grad_norm  │ 4655.835049 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 686.296854  │ +└────────────────────────┴─────────────┘ +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1860' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1772' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1815' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1820' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1758' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1756' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1773' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1790' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1771' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1778' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1768' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1805' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1767' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1812' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1766' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1802' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1808' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1816' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1813' and expected type self.model_cls= +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_ff83e78a +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1829' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1780' and expected type self.model_cls= +tinker.lib.retry_handler:241 [ERROR] Request failed with non-retryable error: ValueError: Error retrieving result: Error code: 400 - {'detail': "Expected exactly one model_id for sampling, got {'model_8babd19c', 'model_19dca9e4'}"} with status code e.status_code=400 for self.request_id='1763' and expected type self.model_cls= diff --git a/tests/tinker/smoke_logs/rl_loop_b.log/metrics.jsonl b/tests/tinker/smoke_logs/rl_loop_b.log/metrics.jsonl new file mode 100644 index 0000000000..ffc1adafab --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_b.log/metrics.jsonl @@ -0,0 +1,4 @@ +{"step": 0, "progress/batch": 0, "optim/lr": 4e-05, "progress/done_frac": 0.017241379310344827, "skyrl.ai/grad_norm": 2784.973608492547, "skyrl.ai/learning_rate": 4e-05, "time/total": 329.02124428749084, "reward/total": 0.0166015625} +{"step": 1, "progress/batch": 1, "optim/lr": 4e-05, "progress/done_frac": 0.034482758620689655, "skyrl.ai/grad_norm": 3172.6518876170453, "skyrl.ai/learning_rate": 4e-05, "time/total": 294.9538412094116, "reward/total": 0.0302734375} +{"step": 2, "progress/batch": 2, "optim/lr": 4e-05, "progress/done_frac": 0.05172413793103448, "skyrl.ai/grad_norm": 2699.6198806498664, "skyrl.ai/learning_rate": 4e-05, "time/total": 407.53840494155884, "reward/total": 0.029296875} +{"step": 3, "progress/batch": 3, "optim/lr": 4e-05, "progress/done_frac": 0.06896551724137931, "skyrl.ai/grad_norm": 4655.835048624468, "skyrl.ai/learning_rate": 4e-05, "time/total": 686.2968535423279, "reward/total": 0.05419921875} diff --git a/tests/tinker/smoke_logs/rl_loop_c.log/code.diff b/tests/tinker/smoke_logs/rl_loop_c.log/code.diff new file mode 100644 index 0000000000..d8e559755f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_c.log/code.diff @@ -0,0 +1,3 @@ +### repo: /home/etang/tinker-cookbook @ dacb835b31fb3f7a012851a1bc64c950d0495de3 +modules: tinker_cookbook +(no local changes) diff --git a/tests/tinker/smoke_logs/rl_loop_c.log/config.json b/tests/tinker/smoke_logs/rl_loop_c.log/config.json new file mode 100644 index 0000000000..681c18071a --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_c.log/config.json @@ -0,0 +1,12 @@ +{ + "base_url": "http://localhost:8000", + "log_path": "/tmp/rl_loop_c.log", + "model_name": "Qwen/Qwen3-0.6B", + "batch_size": 128, + "group_size": 16, + "learning_rate": 4e-05, + "lora_rank": 32, + "save_every": 20, + "max_tokens": 256, + "ttl_seconds": 604800 +} \ No newline at end of file diff --git a/tests/tinker/smoke_logs/rl_loop_c.log/logs.log b/tests/tinker/smoke_logs/rl_loop_c.log/logs.log new file mode 100644 index 0000000000..089d66411c --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_c.log/logs.log @@ -0,0 +1,683 @@ +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_c.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_c.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_060f07d1 +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/rl_loop_c.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/rl_loop_c.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_01d93e11 +__main__:121 [INFO] Training for 58 batches +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_060f07d1 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_060f07d1 +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_c.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_c.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_d224d066 +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/rl_loop_c.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/rl_loop_c.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_c0ac2cf5 +__main__:121 [INFO] Training for 58 batches +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_d224d066 +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_d224d066 +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_c.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_c.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_7a047dd4 +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/rl_loop_c.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/rl_loop_c.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_18c1fcad +__main__:121 [INFO] Training for 58 batches +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_c.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_c.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_cb2206f9 +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/rl_loop_c.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/rl_loop_c.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_4022cc73 +__main__:121 [INFO] Training for 58 batches +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 0  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 0  │ +│ progress/done_frac  │ 0.017241  │ +│ reward/total  │ 0.019043  │ +│ skyrl.ai/grad_norm  │ 2515.858104 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 133.940457  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 1  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 1  │ +│ progress/done_frac  │ 0.034483  │ +│ reward/total  │ 0.023438  │ +│ skyrl.ai/grad_norm  │ 2593.005592 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 80.892934  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 2  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 2  │ +│ progress/done_frac  │ 0.051724  │ +│ reward/total  │ 0.033691  │ +│ skyrl.ai/grad_norm  │ 1459.361847 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 83.688621  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 3  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 3  │ +│ progress/done_frac  │ 0.068966  │ +│ reward/total  │ 0.069824  │ +│ skyrl.ai/grad_norm  │ 3230.374127 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 146.584503  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 4  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 4  │ +│ progress/done_frac  │ 0.086207  │ +│ reward/total  │ 0.225098  │ +│ skyrl.ai/grad_norm  │ 8136.650171 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 157.939979  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 5  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 5  │ +│ progress/done_frac  │ 0.103448  │ +│ reward/total  │ 0.364746  │ +│ skyrl.ai/grad_norm  │ 8247.074148 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 155.924101  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 6  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 6  │ +│ progress/done_frac  │ 0.120690  │ +│ reward/total  │ 0.416016  │ +│ skyrl.ai/grad_norm  │ 8501.772521 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 150.344107  │ +└────────────────────────┴─────────────┘ +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 120.04752013500183 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 140.05585919200166 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 160.06073126800038 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 103, in handle_async_request + return await self._connection.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 136, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 106, in handle_async_request + ) = await self._receive_response_headers(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 177, in _receive_response_headers + event = await self._receive_event(timeout=timeout) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 217, in _receive_event + data = await self._network_stream.read( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 32, in read + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 180.0704045420025 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 200.08335031200113 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 220.0932662330015 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 240.10563371600074 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 260.113305881001 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 103, in handle_async_request + return await self._connection.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 136, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 106, in handle_async_request + ) = await self._receive_response_headers(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 177, in _receive_response_headers + event = await self._receive_event(timeout=timeout) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 217, in _receive_event + data = await self._network_stream.read( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 32, in read + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 280.11982140799955 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 300.12539942100193 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 320.1320038060003 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 340.1399088580001 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 360.1474978940023 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 380.15622666499985 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 103, in handle_async_request + return await self._connection.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 136, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 106, in handle_async_request + ) = await self._receive_response_headers(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 177, in _receive_response_headers + event = await self._receive_event(timeout=timeout) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 217, in _receive_event + data = await self._network_stream.read( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 32, in read + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 400.1645123180024 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 420.1727330680005 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 440.1820644870022 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 460.1859169199997 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 480.1972260930015 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 500.20116487500127 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 103, in handle_async_request + return await self._connection.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 136, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 106, in handle_async_request + ) = await self._receive_response_headers(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 177, in _receive_response_headers + event = await self._receive_event(timeout=timeout) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 217, in _receive_event + data = await self._network_stream.read( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 32, in read + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 520.2125360690006 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 540.2188946360002 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 560.2234918930008 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 580.2347657700011 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 600.2407169200014 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 103, in handle_async_request + return await self._connection.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 136, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 106, in handle_async_request + ) = await self._receive_response_headers(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 177, in _receive_response_headers + event = await self._receive_event(timeout=timeout) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 217, in _receive_event + data = await self._network_stream.read( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 32, in read + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 620.2457274400003 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 640.2519691980015 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 660.2582203860002 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 680.2657900810009 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 700.2808281979997 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 720.289861448 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 103, in handle_async_request + return await self._connection.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 136, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 106, in handle_async_request + ) = await self._receive_response_headers(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 177, in _receive_response_headers + event = await self._receive_event(timeout=timeout) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 217, in _receive_event + data = await self._network_stream.read( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 32, in read + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 740.2932220139992 seconds for session session_cb2206f9. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_cb2206f9 diff --git a/tests/tinker/smoke_logs/rl_loop_c.log/metrics.jsonl b/tests/tinker/smoke_logs/rl_loop_c.log/metrics.jsonl new file mode 100644 index 0000000000..9962cdc017 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_c.log/metrics.jsonl @@ -0,0 +1,7 @@ +{"step": 0, "progress/batch": 0, "optim/lr": 4e-05, "progress/done_frac": 0.017241379310344827, "skyrl.ai/grad_norm": 2515.8581041068273, "skyrl.ai/learning_rate": 4e-05, "time/total": 133.9404571056366, "reward/total": 0.01904296875} +{"step": 1, "progress/batch": 1, "optim/lr": 4e-05, "progress/done_frac": 0.034482758620689655, "skyrl.ai/grad_norm": 2593.0055919723736, "skyrl.ai/learning_rate": 4e-05, "time/total": 80.8929340839386, "reward/total": 0.0234375} +{"step": 2, "progress/batch": 2, "optim/lr": 4e-05, "progress/done_frac": 0.05172413793103448, "skyrl.ai/grad_norm": 1459.361846835801, "skyrl.ai/learning_rate": 4e-05, "time/total": 83.68862080574036, "reward/total": 0.03369140625} +{"step": 3, "progress/batch": 3, "optim/lr": 4e-05, "progress/done_frac": 0.06896551724137931, "skyrl.ai/grad_norm": 3230.3741269394786, "skyrl.ai/learning_rate": 4e-05, "time/total": 146.5845034122467, "reward/total": 0.06982421875} +{"step": 4, "progress/batch": 4, "optim/lr": 4e-05, "progress/done_frac": 0.08620689655172414, "skyrl.ai/grad_norm": 8136.650170678348, "skyrl.ai/learning_rate": 4e-05, "time/total": 157.93997859954834, "reward/total": 0.22509765625} +{"step": 5, "progress/batch": 5, "optim/lr": 4e-05, "progress/done_frac": 0.10344827586206896, "skyrl.ai/grad_norm": 8247.074147841766, "skyrl.ai/learning_rate": 4e-05, "time/total": 155.9241008758545, "reward/total": 0.36474609375} +{"step": 6, "progress/batch": 6, "optim/lr": 4e-05, "progress/done_frac": 0.1206896551724138, "skyrl.ai/grad_norm": 8501.772521068768, "skyrl.ai/learning_rate": 4e-05, "time/total": 150.34410667419434, "reward/total": 0.416015625} diff --git a/tests/tinker/smoke_logs/rl_loop_d.log/code.diff b/tests/tinker/smoke_logs/rl_loop_d.log/code.diff new file mode 100644 index 0000000000..d8e559755f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_d.log/code.diff @@ -0,0 +1,3 @@ +### repo: /home/etang/tinker-cookbook @ dacb835b31fb3f7a012851a1bc64c950d0495de3 +modules: tinker_cookbook +(no local changes) diff --git a/tests/tinker/smoke_logs/rl_loop_d.log/config.json b/tests/tinker/smoke_logs/rl_loop_d.log/config.json new file mode 100644 index 0000000000..1a2c902c7f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_d.log/config.json @@ -0,0 +1,12 @@ +{ + "base_url": "http://localhost:8000", + "log_path": "/tmp/rl_loop_d.log", + "model_name": "Qwen/Qwen3-0.6B", + "batch_size": 128, + "group_size": 16, + "learning_rate": 4e-05, + "lora_rank": 32, + "save_every": 20, + "max_tokens": 256, + "ttl_seconds": 604800 +} \ No newline at end of file diff --git a/tests/tinker/smoke_logs/rl_loop_d.log/logs.log b/tests/tinker/smoke_logs/rl_loop_d.log/logs.log new file mode 100644 index 0000000000..3bfd003249 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_d.log/logs.log @@ -0,0 +1,1055 @@ +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_d.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_d.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_ce1e79ca +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/rl_loop_d.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/rl_loop_d.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_050364f7 +__main__:121 [INFO] Training for 58 batches +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 0  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 0  │ +│ progress/done_frac  │ 0.017241  │ +│ reward/total  │ 0.017578  │ +│ skyrl.ai/grad_norm  │ 2725.635981 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 90.791685  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 1  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 1  │ +│ progress/done_frac  │ 0.034483  │ +│ reward/total  │ 0.028809  │ +│ skyrl.ai/grad_norm  │ 2989.493268 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 214.999288  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 2  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 2  │ +│ progress/done_frac  │ 0.051724  │ +│ reward/total  │ 0.032227  │ +│ skyrl.ai/grad_norm  │ 3538.728444 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 89.495047  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 3  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 3  │ +│ progress/done_frac  │ 0.068966  │ +│ reward/total  │ 0.058594  │ +│ skyrl.ai/grad_norm  │ 4845.256856 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 211.909505  │ +└────────────────────────┴─────────────┘ +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 120.04485723900143 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 140.04900060500222 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 160.05557763099932 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 103, in handle_async_request + return await self._connection.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 136, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 106, in handle_async_request + ) = await self._receive_response_headers(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 177, in _receive_response_headers + event = await self._receive_event(timeout=timeout) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 217, in _receive_event + data = await self._network_stream.read( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 32, in read + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 180.0642175590001 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 200.07351759200174 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 220.0846148849996 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 240.09335962399928 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 260.10152930299955 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 280.1058633040011 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 103, in handle_async_request + return await self._connection.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 136, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 106, in handle_async_request + ) = await self._receive_response_headers(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 177, in _receive_response_headers + event = await self._receive_event(timeout=timeout) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 217, in _receive_event + data = await self._network_stream.read( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 32, in read + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 300.1137273230015 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 320.12278748899917 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 340.12942929400015 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 360.1370587860001 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 380.14563508799984 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 103, in handle_async_request + return await self._connection.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 136, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 106, in handle_async_request + ) = await self._receive_response_headers(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 177, in _receive_response_headers + event = await self._receive_event(timeout=timeout) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 217, in _receive_event + data = await self._network_stream.read( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 32, in read + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 400.15754274700157 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 420.16348117300004 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 440.1740379190014 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 460.18030684100086 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 480.1916362060001 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 500.2016032320025 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 103, in handle_async_request + return await self._connection.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 136, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 106, in handle_async_request + ) = await self._receive_response_headers(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 177, in _receive_response_headers + event = await self._receive_event(timeout=timeout) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 217, in _receive_event + data = await self._network_stream.read( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 32, in read + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 520.2061943459994 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 540.2176004899993 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 560.2313318000015 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 580.2389547450002 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 600.251811164002 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 620.255916858001 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 103, in handle_async_request + return await self._connection.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 136, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 106, in handle_async_request + ) = await self._receive_response_headers(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 177, in _receive_response_headers + event = await self._receive_event(timeout=timeout) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 217, in _receive_event + data = await self._network_stream.read( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 32, in read + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 640.2620931620004 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 660.2726048479999 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 680.2792337780011 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 700.2885933140024 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 720.2977807000025 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 103, in handle_async_request + return await self._connection.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 136, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 106, in handle_async_request + ) = await self._receive_response_headers(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 177, in _receive_response_headers + event = await self._receive_event(timeout=timeout) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 217, in _receive_event + data = await self._network_stream.read( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 32, in read + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 740.306528338002 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 760.3118081380017 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 780.3204979139991 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 800.3248156610025 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 820.3413785789999 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 840.349503499001 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 103, in handle_async_request + return await self._connection.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 136, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 106, in handle_async_request + ) = await self._receive_response_headers(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 177, in _receive_response_headers + event = await self._receive_event(timeout=timeout) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py", line 217, in _receive_event + data = await self._network_stream.read( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 32, in read + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ReadTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 860.3567000069997 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 880.3628695219995 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 900.373514021001 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 920.3820001849999 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 940.3942476160009 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 960.3973198070016 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 101, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 78, in handle_async_request + stream = await self._connect(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 124, in _connect + stream = await self._network_backend.connect_tcp(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/auto.py", line 31, in connect_tcp + return await self._backend.connect_tcp( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 113, in connect_tcp + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ConnectTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ConnectTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 980.5901912549998 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1000.5961541690012 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1020.5987187960018 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1040.6084509050015 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1060.6115265260014 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 101, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 78, in handle_async_request + stream = await self._connect(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 124, in _connect + stream = await self._network_backend.connect_tcp(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/auto.py", line 31, in connect_tcp + return await self._backend.connect_tcp( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 113, in connect_tcp + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ConnectTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ConnectTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1080.623052070001 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1100.6259051609995 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1120.6313976990023 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1140.6395486559995 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1160.6467008580003 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1180.657208861001 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 101, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 78, in handle_async_request + stream = await self._connect(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 124, in _connect + stream = await self._network_backend.connect_tcp(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/auto.py", line 31, in connect_tcp + return await self._backend.connect_tcp( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 113, in connect_tcp + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ConnectTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ConnectTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1200.6636957090013 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1220.6689744399991 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1240.672793824 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1260.6809952330004 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1280.682395210999 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1300.6857222889994 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. +tinker.lib.telemetry:135 [WARNING] Failed to send telemetry batch +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions + yield + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 394, in handle_async_request + resp = await self._pool.handle_async_request(req) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request + raise exc from None + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request + response = await connection.handle_async_request( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 101, in handle_async_request + raise exc + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 78, in handle_async_request + stream = await self._connect(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py", line 124, in _connect + stream = await self._network_backend.connect_tcp(**kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/auto.py", line 31, in connect_tcp + return await self._backend.connect_tcp( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py", line 113, in connect_tcp + with map_exceptions(exc_map): + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions + raise to_exc(exc) from exc +httpcore.ConnectTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 969, in request + response = await self._client.send( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1629, in send + response = await self._send_handling_auth( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1657, in _send_handling_auth + response = await self._send_handling_redirects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects + response = await self._send_single_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_client.py", line 1730, in _send_single_request + response = await transport.handle_async_request(request) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 393, in handle_async_request + with map_httpcore_exceptions(): + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/.local/share/uv/python/cpython-3.12.13-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 158, in __exit__ + self.gen.throw(value) + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions + raise mapped_exc(message) from exc +httpx.ConnectTimeout + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 133, in _send_batch_with_retry + return await self._send_batch(batch) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/lib/telemetry.py", line 142, in _send_batch + return await client.telemetry.send(request=request, timeout=HTTP_TIMEOUT_SECONDS) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/resources/telemetry.py", line 68, in send + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 1232, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/etang/tinker-cookbook/.venv/lib/python3.12/site-packages/tinker/_base_client.py", line 987, in request + raise APITimeoutError(request=request) from err +tinker.APITimeoutError: Request timed out. +tinker.lib.internal_client_holder:357 [WARNING] Session heartbeat failed for 1320.6910064229996 seconds for session session_ce1e79ca. Last exception: APITimeoutError: Request timed out.. +Your connection may be unreliable or Tinker is down. If this persists, the session will be terminated. diff --git a/tests/tinker/smoke_logs/rl_loop_d.log/metrics.jsonl b/tests/tinker/smoke_logs/rl_loop_d.log/metrics.jsonl new file mode 100644 index 0000000000..7e5d600922 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_d.log/metrics.jsonl @@ -0,0 +1,4 @@ +{"step": 0, "progress/batch": 0, "optim/lr": 4e-05, "progress/done_frac": 0.017241379310344827, "skyrl.ai/grad_norm": 2725.6359808308957, "skyrl.ai/learning_rate": 4e-05, "time/total": 90.79168486595154, "reward/total": 0.017578125} +{"step": 1, "progress/batch": 1, "optim/lr": 4e-05, "progress/done_frac": 0.034482758620689655, "skyrl.ai/grad_norm": 2989.493268097455, "skyrl.ai/learning_rate": 4e-05, "time/total": 214.99928832054138, "reward/total": 0.02880859375} +{"step": 2, "progress/batch": 2, "optim/lr": 4e-05, "progress/done_frac": 0.05172413793103448, "skyrl.ai/grad_norm": 3538.7284439470627, "skyrl.ai/learning_rate": 4e-05, "time/total": 89.49504661560059, "reward/total": 0.0322265625} +{"step": 3, "progress/batch": 3, "optim/lr": 4e-05, "progress/done_frac": 0.06896551724137931, "skyrl.ai/grad_norm": 4845.256855936535, "skyrl.ai/learning_rate": 4e-05, "time/total": 211.90950536727905, "reward/total": 0.05859375} diff --git a/tests/tinker/smoke_logs/rl_loop_e.log/checkpoints.jsonl b/tests/tinker/smoke_logs/rl_loop_e.log/checkpoints.jsonl new file mode 100644 index 0000000000..a362ee607a --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_e.log/checkpoints.jsonl @@ -0,0 +1,3 @@ +{"name": "000020", "batch": 20, "state_path": "tinker://model_e3750434/weights/000020"} +{"name": "000040", "batch": 40, "state_path": "tinker://model_e3750434/weights/000040"} +{"name": "final", "batch": 58, "state_path": "tinker://model_e3750434/weights/final", "sampler_path": "tinker://model_e3750434/final"} diff --git a/tests/tinker/smoke_logs/rl_loop_e.log/code.diff b/tests/tinker/smoke_logs/rl_loop_e.log/code.diff new file mode 100644 index 0000000000..d8e559755f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_e.log/code.diff @@ -0,0 +1,3 @@ +### repo: /home/etang/tinker-cookbook @ dacb835b31fb3f7a012851a1bc64c950d0495de3 +modules: tinker_cookbook +(no local changes) diff --git a/tests/tinker/smoke_logs/rl_loop_e.log/config.json b/tests/tinker/smoke_logs/rl_loop_e.log/config.json new file mode 100644 index 0000000000..f0e86c83b2 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_e.log/config.json @@ -0,0 +1,12 @@ +{ + "base_url": "http://localhost:8000", + "log_path": "/tmp/rl_loop_e.log", + "model_name": "Qwen/Qwen3-0.6B", + "batch_size": 128, + "group_size": 16, + "learning_rate": 4e-05, + "lora_rank": 32, + "save_every": 20, + "max_tokens": 256, + "ttl_seconds": 604800 +} \ No newline at end of file diff --git a/tests/tinker/smoke_logs/rl_loop_e.log/logs.log b/tests/tinker/smoke_logs/rl_loop_e.log/logs.log new file mode 100644 index 0000000000..aa73ca12ac --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_e.log/logs.log @@ -0,0 +1,825 @@ +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_e.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_e.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_997689e1 +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/rl_loop_e.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/rl_loop_e.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_e3750434 +__main__:121 [INFO] Training for 58 batches +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 0  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 0  │ +│ progress/done_frac  │ 0.017241  │ +│ reward/total  │ 0.025879  │ +│ skyrl.ai/grad_norm  │ 3311.342930 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 134.581530  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 1  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 1  │ +│ progress/done_frac  │ 0.034483  │ +│ reward/total  │ 0.029785  │ +│ skyrl.ai/grad_norm  │ 2744.224025 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 80.350298  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 2  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 2  │ +│ progress/done_frac  │ 0.051724  │ +│ reward/total  │ 0.034180  │ +│ skyrl.ai/grad_norm  │ 2827.500663 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 82.265805  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 3  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 3  │ +│ progress/done_frac  │ 0.068966  │ +│ reward/total  │ 0.078125  │ +│ skyrl.ai/grad_norm  │ 3308.343997 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 86.257145  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 4  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 4  │ +│ progress/done_frac  │ 0.086207  │ +│ reward/total  │ 0.223145  │ +│ skyrl.ai/grad_norm  │ 7099.649851 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 90.150057  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 5  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 5  │ +│ progress/done_frac  │ 0.103448  │ +│ reward/total  │ 0.415527  │ +│ skyrl.ai/grad_norm  │ 7196.960470 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 88.091412  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 6  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 6  │ +│ progress/done_frac  │ 0.120690  │ +│ reward/total  │ 0.461426  │ +│ skyrl.ai/grad_norm  │ 6807.531124 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 84.784650  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 7  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 7  │ +│ progress/done_frac  │ 0.137931  │ +│ reward/total  │ 0.568359  │ +│ skyrl.ai/grad_norm  │ 4628.584665 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 75.168792  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 8  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 8  │ +│ progress/done_frac  │ 0.155172  │ +│ reward/total  │ 0.513672  │ +│ skyrl.ai/grad_norm  │ 5137.686055 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 76.193191  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 9  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 9  │ +│ progress/done_frac  │ 0.172414  │ +│ reward/total  │ 0.592773  │ +│ skyrl.ai/grad_norm  │ 4486.921885 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 69.551138  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 10  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 10  │ +│ progress/done_frac  │ 0.189655  │ +│ reward/total  │ 0.585449  │ +│ skyrl.ai/grad_norm  │ 4914.633048 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 72.504134  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 11  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 11  │ +│ progress/done_frac  │ 0.206897  │ +│ reward/total  │ 0.625488  │ +│ skyrl.ai/grad_norm  │ 5440.769798 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 65.498356  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 12  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 12  │ +│ progress/done_frac  │ 0.224138  │ +│ reward/total  │ 0.579102  │ +│ skyrl.ai/grad_norm  │ 4611.391981 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.348524  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 13  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 13  │ +│ progress/done_frac  │ 0.241379  │ +│ reward/total  │ 0.641113  │ +│ skyrl.ai/grad_norm  │ 4010.038030 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 65.521300  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 14  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 14  │ +│ progress/done_frac  │ 0.258621  │ +│ reward/total  │ 0.570312  │ +│ skyrl.ai/grad_norm  │ 4132.876964 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.548914  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 15  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 15  │ +│ progress/done_frac  │ 0.275862  │ +│ reward/total  │ 0.646484  │ +│ skyrl.ai/grad_norm  │ 4244.424343 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.138198  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 16  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 16  │ +│ progress/done_frac  │ 0.293103  │ +│ reward/total  │ 0.668945  │ +│ skyrl.ai/grad_norm  │ 4716.073367 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 133.361657  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 17  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 17  │ +│ progress/done_frac  │ 0.310345  │ +│ reward/total  │ 0.670898  │ +│ skyrl.ai/grad_norm  │ 5110.910682 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 128.878171  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 18  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 18  │ +│ progress/done_frac  │ 0.327586  │ +│ reward/total  │ 0.620605  │ +│ skyrl.ai/grad_norm  │ 5311.529347 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 138.159210  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 19  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 19  │ +│ progress/done_frac  │ 0.344828  │ +│ reward/total  │ 0.658203  │ +│ skyrl.ai/grad_norm  │ 5211.462367 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 140.631799  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_e3750434/weights/000020'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 20  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 20  │ +│ progress/done_frac  │ 0.362069  │ +│ reward/total  │ 0.682617  │ +│ skyrl.ai/grad_norm  │ 4430.771942 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 143.243473  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 21  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 21  │ +│ progress/done_frac  │ 0.379310  │ +│ reward/total  │ 0.674316  │ +│ skyrl.ai/grad_norm  │ 4389.845783 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 142.351490  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 22  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 22  │ +│ progress/done_frac  │ 0.396552  │ +│ reward/total  │ 0.713867  │ +│ skyrl.ai/grad_norm  │ 4029.614498 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 138.315871  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 23  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 23  │ +│ progress/done_frac  │ 0.413793  │ +│ reward/total  │ 0.703613  │ +│ skyrl.ai/grad_norm  │ 4087.451406 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 129.937644  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 24  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 24  │ +│ progress/done_frac  │ 0.431034  │ +│ reward/total  │ 0.617188  │ +│ skyrl.ai/grad_norm  │ 4925.083959 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 124.777653  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 25  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 25  │ +│ progress/done_frac  │ 0.448276  │ +│ reward/total  │ 0.687012  │ +│ skyrl.ai/grad_norm  │ 4435.276767 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 116.290545  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 26  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 26  │ +│ progress/done_frac  │ 0.465517  │ +│ reward/total  │ 0.677246  │ +│ skyrl.ai/grad_norm  │ 5004.585298 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 123.367723  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 27  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 27  │ +│ progress/done_frac  │ 0.482759  │ +│ reward/total  │ 0.720703  │ +│ skyrl.ai/grad_norm  │ 4235.038134 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 114.187049  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 28  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 28  │ +│ progress/done_frac  │ 0.500000  │ +│ reward/total  │ 0.719727  │ +│ skyrl.ai/grad_norm  │ 5780.208647 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 95.931007  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 29  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 29  │ +│ progress/done_frac  │ 0.517241  │ +│ reward/total  │ 0.746094  │ +│ skyrl.ai/grad_norm  │ 4432.479216 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 121.491157  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 30  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 30  │ +│ progress/done_frac  │ 0.534483  │ +│ reward/total  │ 0.702637  │ +│ skyrl.ai/grad_norm  │ 5579.194566 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 92.008166  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 31  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 31  │ +│ progress/done_frac  │ 0.551724  │ +│ reward/total  │ 0.751953  │ +│ skyrl.ai/grad_norm  │ 4846.482436 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 103.272193  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 32  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 32  │ +│ progress/done_frac  │ 0.568966  │ +│ reward/total  │ 0.641602  │ +│ skyrl.ai/grad_norm  │ 4850.217315 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 130.145584  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 33  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 33  │ +│ progress/done_frac  │ 0.586207  │ +│ reward/total  │ 0.753418  │ +│ skyrl.ai/grad_norm  │ 4359.924999 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 109.510066  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 34  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 34  │ +│ progress/done_frac  │ 0.603448  │ +│ reward/total  │ 0.680176  │ +│ skyrl.ai/grad_norm  │ 5077.019401 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 115.822206  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 35  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 35  │ +│ progress/done_frac  │ 0.620690  │ +│ reward/total  │ 0.706055  │ +│ skyrl.ai/grad_norm  │ 5074.235312 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 111.309374  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 36  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 36  │ +│ progress/done_frac  │ 0.637931  │ +│ reward/total  │ 0.678711  │ +│ skyrl.ai/grad_norm  │ 4478.695122 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 112.462799  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 37  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 37  │ +│ progress/done_frac  │ 0.655172  │ +│ reward/total  │ 0.702148  │ +│ skyrl.ai/grad_norm  │ 4052.576464 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 116.594501  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 38  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 38  │ +│ progress/done_frac  │ 0.672414  │ +│ reward/total  │ 0.701172  │ +│ skyrl.ai/grad_norm  │ 4067.377288 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 116.648988  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 39  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 39  │ +│ progress/done_frac  │ 0.689655  │ +│ reward/total  │ 0.726562  │ +│ skyrl.ai/grad_norm  │ 5586.897887 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 119.899999  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_e3750434/weights/000040'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 40  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 40  │ +│ progress/done_frac  │ 0.706897  │ +│ reward/total  │ 0.740234  │ +│ skyrl.ai/grad_norm  │ 5081.892561 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 122.908623  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 41  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 41  │ +│ progress/done_frac  │ 0.724138  │ +│ reward/total  │ 0.717285  │ +│ skyrl.ai/grad_norm  │ 5443.625263 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 114.647553  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 42  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 42  │ +│ progress/done_frac  │ 0.741379  │ +│ reward/total  │ 0.703613  │ +│ skyrl.ai/grad_norm  │ 5651.898088 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 114.402829  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 43  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 43  │ +│ progress/done_frac  │ 0.758621  │ +│ reward/total  │ 0.668457  │ +│ skyrl.ai/grad_norm  │ 5342.144326 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 108.515467  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 44  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 44  │ +│ progress/done_frac  │ 0.775862  │ +│ reward/total  │ 0.812988  │ +│ skyrl.ai/grad_norm  │ 5312.453859 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 106.809338  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 45  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 45  │ +│ progress/done_frac  │ 0.793103  │ +│ reward/total  │ 0.781738  │ +│ skyrl.ai/grad_norm  │ 5327.349247 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 108.259785  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 46  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 46  │ +│ progress/done_frac  │ 0.810345  │ +│ reward/total  │ 0.700684  │ +│ skyrl.ai/grad_norm  │ 3723.012221 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 114.133801  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 47  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 47  │ +│ progress/done_frac  │ 0.827586  │ +│ reward/total  │ 0.788574  │ +│ skyrl.ai/grad_norm  │ 4958.986187 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 110.261018  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 48  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 48  │ +│ progress/done_frac  │ 0.844828  │ +│ reward/total  │ 0.752441  │ +│ skyrl.ai/grad_norm  │ 4091.877320 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 132.814460  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 49  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 49  │ +│ progress/done_frac  │ 0.862069  │ +│ reward/total  │ 0.705078  │ +│ skyrl.ai/grad_norm  │ 4194.601531 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 112.915834  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 50  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 50  │ +│ progress/done_frac  │ 0.879310  │ +│ reward/total  │ 0.699219  │ +│ skyrl.ai/grad_norm  │ 4843.082902 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 127.590147  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 51  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 51  │ +│ progress/done_frac  │ 0.896552  │ +│ reward/total  │ 0.704590  │ +│ skyrl.ai/grad_norm  │ 4349.735854 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 108.636021  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 52  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 52  │ +│ progress/done_frac  │ 0.913793  │ +│ reward/total  │ 0.672852  │ +│ skyrl.ai/grad_norm  │ 5007.795124 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 95.163089  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 53  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 53  │ +│ progress/done_frac  │ 0.931034  │ +│ reward/total  │ 0.820801  │ +│ skyrl.ai/grad_norm  │ 3999.667236 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 96.708356  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 54  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 54  │ +│ progress/done_frac  │ 0.948276  │ +│ reward/total  │ 0.700195  │ +│ skyrl.ai/grad_norm  │ 4468.568451 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 123.548607  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 55  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 55  │ +│ progress/done_frac  │ 0.965517  │ +│ reward/total  │ 0.781738  │ +│ skyrl.ai/grad_norm  │ 5002.112954 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 113.389676  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 56  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 56  │ +│ progress/done_frac  │ 0.982759  │ +│ reward/total  │ 0.769043  │ +│ skyrl.ai/grad_norm  │ 5835.695674 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 104.909214  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_e.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 57  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 57  │ +│ progress/done_frac  │ 1.000000  │ +│ reward/total  │ 0.734863  │ +│ skyrl.ai/grad_norm  │ 4346.515386 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 120.041633  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_e3750434/weights/final', 'sampler_path': 'tinker://model_e3750434/final'} +__main__:257 [INFO] Training completed diff --git a/tests/tinker/smoke_logs/rl_loop_e.log/metrics.jsonl b/tests/tinker/smoke_logs/rl_loop_e.log/metrics.jsonl new file mode 100644 index 0000000000..3d8f4565a3 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_e.log/metrics.jsonl @@ -0,0 +1,58 @@ +{"step": 0, "progress/batch": 0, "optim/lr": 4e-05, "progress/done_frac": 0.017241379310344827, "skyrl.ai/grad_norm": 3311.3429299907916, "skyrl.ai/learning_rate": 4e-05, "time/total": 134.58153009414673, "reward/total": 0.02587890625} +{"step": 1, "progress/batch": 1, "optim/lr": 4e-05, "progress/done_frac": 0.034482758620689655, "skyrl.ai/grad_norm": 2744.2240251116527, "skyrl.ai/learning_rate": 4e-05, "time/total": 80.35029816627502, "reward/total": 0.02978515625} +{"step": 2, "progress/batch": 2, "optim/lr": 4e-05, "progress/done_frac": 0.05172413793103448, "skyrl.ai/grad_norm": 2827.5006631298957, "skyrl.ai/learning_rate": 4e-05, "time/total": 82.26580548286438, "reward/total": 0.0341796875} +{"step": 3, "progress/batch": 3, "optim/lr": 4e-05, "progress/done_frac": 0.06896551724137931, "skyrl.ai/grad_norm": 3308.343996624293, "skyrl.ai/learning_rate": 4e-05, "time/total": 86.25714540481567, "reward/total": 0.078125} +{"step": 4, "progress/batch": 4, "optim/lr": 4e-05, "progress/done_frac": 0.08620689655172414, "skyrl.ai/grad_norm": 7099.649850520799, "skyrl.ai/learning_rate": 4e-05, "time/total": 90.15005731582642, "reward/total": 0.22314453125} +{"step": 5, "progress/batch": 5, "optim/lr": 4e-05, "progress/done_frac": 0.10344827586206896, "skyrl.ai/grad_norm": 7196.960469531565, "skyrl.ai/learning_rate": 4e-05, "time/total": 88.09141182899475, "reward/total": 0.41552734375} +{"step": 6, "progress/batch": 6, "optim/lr": 4e-05, "progress/done_frac": 0.1206896551724138, "skyrl.ai/grad_norm": 6807.5311236894095, "skyrl.ai/learning_rate": 4e-05, "time/total": 84.78464961051941, "reward/total": 0.46142578125} +{"step": 7, "progress/batch": 7, "optim/lr": 4e-05, "progress/done_frac": 0.13793103448275862, "skyrl.ai/grad_norm": 4628.5846648840725, "skyrl.ai/learning_rate": 4e-05, "time/total": 75.16879153251648, "reward/total": 0.568359375} +{"step": 8, "progress/batch": 8, "optim/lr": 4e-05, "progress/done_frac": 0.15517241379310345, "skyrl.ai/grad_norm": 5137.68605502516, "skyrl.ai/learning_rate": 4e-05, "time/total": 76.19319105148315, "reward/total": 0.513671875} +{"step": 9, "progress/batch": 9, "optim/lr": 4e-05, "progress/done_frac": 0.1724137931034483, "skyrl.ai/grad_norm": 4486.9218847668835, "skyrl.ai/learning_rate": 4e-05, "time/total": 69.5511384010315, "reward/total": 0.5927734375} +{"step": 10, "progress/batch": 10, "optim/lr": 4e-05, "progress/done_frac": 0.1896551724137931, "skyrl.ai/grad_norm": 4914.6330483567135, "skyrl.ai/learning_rate": 4e-05, "time/total": 72.50413417816162, "reward/total": 0.58544921875} +{"step": 11, "progress/batch": 11, "optim/lr": 4e-05, "progress/done_frac": 0.20689655172413793, "skyrl.ai/grad_norm": 5440.769798475212, "skyrl.ai/learning_rate": 4e-05, "time/total": 65.49835634231567, "reward/total": 0.62548828125} +{"step": 12, "progress/batch": 12, "optim/lr": 4e-05, "progress/done_frac": 0.22413793103448276, "skyrl.ai/grad_norm": 4611.391980736403, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.34852361679077, "reward/total": 0.5791015625} +{"step": 13, "progress/batch": 13, "optim/lr": 4e-05, "progress/done_frac": 0.2413793103448276, "skyrl.ai/grad_norm": 4010.038029744855, "skyrl.ai/learning_rate": 4e-05, "time/total": 65.52129983901978, "reward/total": 0.64111328125} +{"step": 14, "progress/batch": 14, "optim/lr": 4e-05, "progress/done_frac": 0.25862068965517243, "skyrl.ai/grad_norm": 4132.876964053007, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.54891395568848, "reward/total": 0.5703125} +{"step": 15, "progress/batch": 15, "optim/lr": 4e-05, "progress/done_frac": 0.27586206896551724, "skyrl.ai/grad_norm": 4244.424342593469, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.1381983757019, "reward/total": 0.646484375} +{"step": 16, "progress/batch": 16, "optim/lr": 4e-05, "progress/done_frac": 0.29310344827586204, "skyrl.ai/grad_norm": 4716.073366689709, "skyrl.ai/learning_rate": 4e-05, "time/total": 133.36165714263916, "reward/total": 0.6689453125} +{"step": 17, "progress/batch": 17, "optim/lr": 4e-05, "progress/done_frac": 0.3103448275862069, "skyrl.ai/grad_norm": 5110.9106820604875, "skyrl.ai/learning_rate": 4e-05, "time/total": 128.87817096710205, "reward/total": 0.6708984375} +{"step": 18, "progress/batch": 18, "optim/lr": 4e-05, "progress/done_frac": 0.3275862068965517, "skyrl.ai/grad_norm": 5311.5293466194835, "skyrl.ai/learning_rate": 4e-05, "time/total": 138.1592104434967, "reward/total": 0.62060546875} +{"step": 19, "progress/batch": 19, "optim/lr": 4e-05, "progress/done_frac": 0.3448275862068966, "skyrl.ai/grad_norm": 5211.462366745058, "skyrl.ai/learning_rate": 4e-05, "time/total": 140.63179898262024, "reward/total": 0.658203125} +{"step": 20, "progress/batch": 20, "optim/lr": 4e-05, "progress/done_frac": 0.3620689655172414, "skyrl.ai/grad_norm": 4430.771941772675, "skyrl.ai/learning_rate": 4e-05, "time/total": 143.24347305297852, "reward/total": 0.6826171875} +{"step": 21, "progress/batch": 21, "optim/lr": 4e-05, "progress/done_frac": 0.3793103448275862, "skyrl.ai/grad_norm": 4389.845783168243, "skyrl.ai/learning_rate": 4e-05, "time/total": 142.35149002075195, "reward/total": 0.67431640625} +{"step": 22, "progress/batch": 22, "optim/lr": 4e-05, "progress/done_frac": 0.39655172413793105, "skyrl.ai/grad_norm": 4029.614497690815, "skyrl.ai/learning_rate": 4e-05, "time/total": 138.31587147712708, "reward/total": 0.7138671875} +{"step": 23, "progress/batch": 23, "optim/lr": 4e-05, "progress/done_frac": 0.41379310344827586, "skyrl.ai/grad_norm": 4087.4514064389805, "skyrl.ai/learning_rate": 4e-05, "time/total": 129.9376437664032, "reward/total": 0.70361328125} +{"step": 24, "progress/batch": 24, "optim/lr": 4e-05, "progress/done_frac": 0.43103448275862066, "skyrl.ai/grad_norm": 4925.083958675223, "skyrl.ai/learning_rate": 4e-05, "time/total": 124.7776529788971, "reward/total": 0.6171875} +{"step": 25, "progress/batch": 25, "optim/lr": 4e-05, "progress/done_frac": 0.4482758620689655, "skyrl.ai/grad_norm": 4435.276767012404, "skyrl.ai/learning_rate": 4e-05, "time/total": 116.29054522514343, "reward/total": 0.68701171875} +{"step": 26, "progress/batch": 26, "optim/lr": 4e-05, "progress/done_frac": 0.46551724137931033, "skyrl.ai/grad_norm": 5004.585297504679, "skyrl.ai/learning_rate": 4e-05, "time/total": 123.36772322654724, "reward/total": 0.67724609375} +{"step": 27, "progress/batch": 27, "optim/lr": 4e-05, "progress/done_frac": 0.4827586206896552, "skyrl.ai/grad_norm": 4235.038134420987, "skyrl.ai/learning_rate": 4e-05, "time/total": 114.1870493888855, "reward/total": 0.720703125} +{"step": 28, "progress/batch": 28, "optim/lr": 4e-05, "progress/done_frac": 0.5, "skyrl.ai/grad_norm": 5780.20864675316, "skyrl.ai/learning_rate": 4e-05, "time/total": 95.93100690841675, "reward/total": 0.7197265625} +{"step": 29, "progress/batch": 29, "optim/lr": 4e-05, "progress/done_frac": 0.5172413793103449, "skyrl.ai/grad_norm": 4432.479215969321, "skyrl.ai/learning_rate": 4e-05, "time/total": 121.4911572933197, "reward/total": 0.74609375} +{"step": 30, "progress/batch": 30, "optim/lr": 4e-05, "progress/done_frac": 0.5344827586206896, "skyrl.ai/grad_norm": 5579.194565526462, "skyrl.ai/learning_rate": 4e-05, "time/total": 92.00816607475281, "reward/total": 0.70263671875} +{"step": 31, "progress/batch": 31, "optim/lr": 4e-05, "progress/done_frac": 0.5517241379310345, "skyrl.ai/grad_norm": 4846.4824357465695, "skyrl.ai/learning_rate": 4e-05, "time/total": 103.27219343185425, "reward/total": 0.751953125} +{"step": 32, "progress/batch": 32, "optim/lr": 4e-05, "progress/done_frac": 0.5689655172413793, "skyrl.ai/grad_norm": 4850.2173147190015, "skyrl.ai/learning_rate": 4e-05, "time/total": 130.14558362960815, "reward/total": 0.6416015625} +{"step": 33, "progress/batch": 33, "optim/lr": 4e-05, "progress/done_frac": 0.5862068965517241, "skyrl.ai/grad_norm": 4359.92499935492, "skyrl.ai/learning_rate": 4e-05, "time/total": 109.51006555557251, "reward/total": 0.75341796875} +{"step": 34, "progress/batch": 34, "optim/lr": 4e-05, "progress/done_frac": 0.603448275862069, "skyrl.ai/grad_norm": 5077.019401184124, "skyrl.ai/learning_rate": 4e-05, "time/total": 115.82220578193665, "reward/total": 0.68017578125} +{"step": 35, "progress/batch": 35, "optim/lr": 4e-05, "progress/done_frac": 0.6206896551724138, "skyrl.ai/grad_norm": 5074.23531184749, "skyrl.ai/learning_rate": 4e-05, "time/total": 111.3093740940094, "reward/total": 0.7060546875} +{"step": 36, "progress/batch": 36, "optim/lr": 4e-05, "progress/done_frac": 0.6379310344827587, "skyrl.ai/grad_norm": 4478.695122465918, "skyrl.ai/learning_rate": 4e-05, "time/total": 112.4627993106842, "reward/total": 0.6787109375} +{"step": 37, "progress/batch": 37, "optim/lr": 4e-05, "progress/done_frac": 0.6551724137931034, "skyrl.ai/grad_norm": 4052.576464423589, "skyrl.ai/learning_rate": 4e-05, "time/total": 116.59450078010559, "reward/total": 0.7021484375} +{"step": 38, "progress/batch": 38, "optim/lr": 4e-05, "progress/done_frac": 0.6724137931034483, "skyrl.ai/grad_norm": 4067.377287638805, "skyrl.ai/learning_rate": 4e-05, "time/total": 116.64898777008057, "reward/total": 0.701171875} +{"step": 39, "progress/batch": 39, "optim/lr": 4e-05, "progress/done_frac": 0.6896551724137931, "skyrl.ai/grad_norm": 5586.897887021026, "skyrl.ai/learning_rate": 4e-05, "time/total": 119.8999993801117, "reward/total": 0.7265625} +{"step": 40, "progress/batch": 40, "optim/lr": 4e-05, "progress/done_frac": 0.7068965517241379, "skyrl.ai/grad_norm": 5081.892560847779, "skyrl.ai/learning_rate": 4e-05, "time/total": 122.90862274169922, "reward/total": 0.740234375} +{"step": 41, "progress/batch": 41, "optim/lr": 4e-05, "progress/done_frac": 0.7241379310344828, "skyrl.ai/grad_norm": 5443.625262635186, "skyrl.ai/learning_rate": 4e-05, "time/total": 114.64755296707153, "reward/total": 0.71728515625} +{"step": 42, "progress/batch": 42, "optim/lr": 4e-05, "progress/done_frac": 0.7413793103448276, "skyrl.ai/grad_norm": 5651.898088253184, "skyrl.ai/learning_rate": 4e-05, "time/total": 114.40282893180847, "reward/total": 0.70361328125} +{"step": 43, "progress/batch": 43, "optim/lr": 4e-05, "progress/done_frac": 0.7586206896551724, "skyrl.ai/grad_norm": 5342.144326017409, "skyrl.ai/learning_rate": 4e-05, "time/total": 108.51546716690063, "reward/total": 0.66845703125} +{"step": 44, "progress/batch": 44, "optim/lr": 4e-05, "progress/done_frac": 0.7758620689655172, "skyrl.ai/grad_norm": 5312.45385862315, "skyrl.ai/learning_rate": 4e-05, "time/total": 106.80933833122253, "reward/total": 0.81298828125} +{"step": 45, "progress/batch": 45, "optim/lr": 4e-05, "progress/done_frac": 0.7931034482758621, "skyrl.ai/grad_norm": 5327.349247045851, "skyrl.ai/learning_rate": 4e-05, "time/total": 108.25978517532349, "reward/total": 0.78173828125} +{"step": 46, "progress/batch": 46, "optim/lr": 4e-05, "progress/done_frac": 0.8103448275862069, "skyrl.ai/grad_norm": 3723.012221306828, "skyrl.ai/learning_rate": 4e-05, "time/total": 114.1338005065918, "reward/total": 0.70068359375} +{"step": 47, "progress/batch": 47, "optim/lr": 4e-05, "progress/done_frac": 0.8275862068965517, "skyrl.ai/grad_norm": 4958.986186711957, "skyrl.ai/learning_rate": 4e-05, "time/total": 110.26101756095886, "reward/total": 0.78857421875} +{"step": 48, "progress/batch": 48, "optim/lr": 4e-05, "progress/done_frac": 0.8448275862068966, "skyrl.ai/grad_norm": 4091.8773197641203, "skyrl.ai/learning_rate": 4e-05, "time/total": 132.81446027755737, "reward/total": 0.75244140625} +{"step": 49, "progress/batch": 49, "optim/lr": 4e-05, "progress/done_frac": 0.8620689655172413, "skyrl.ai/grad_norm": 4194.601530538986, "skyrl.ai/learning_rate": 4e-05, "time/total": 112.91583442687988, "reward/total": 0.705078125} +{"step": 50, "progress/batch": 50, "optim/lr": 4e-05, "progress/done_frac": 0.8793103448275862, "skyrl.ai/grad_norm": 4843.082902449637, "skyrl.ai/learning_rate": 4e-05, "time/total": 127.59014749526978, "reward/total": 0.69921875} +{"step": 51, "progress/batch": 51, "optim/lr": 4e-05, "progress/done_frac": 0.896551724137931, "skyrl.ai/grad_norm": 4349.73585404907, "skyrl.ai/learning_rate": 4e-05, "time/total": 108.63602137565613, "reward/total": 0.70458984375} +{"step": 52, "progress/batch": 52, "optim/lr": 4e-05, "progress/done_frac": 0.9137931034482759, "skyrl.ai/grad_norm": 5007.795123604798, "skyrl.ai/learning_rate": 4e-05, "time/total": 95.16308879852295, "reward/total": 0.6728515625} +{"step": 53, "progress/batch": 53, "optim/lr": 4e-05, "progress/done_frac": 0.9310344827586207, "skyrl.ai/grad_norm": 3999.6672361585283, "skyrl.ai/learning_rate": 4e-05, "time/total": 96.70835590362549, "reward/total": 0.82080078125} +{"step": 54, "progress/batch": 54, "optim/lr": 4e-05, "progress/done_frac": 0.9482758620689655, "skyrl.ai/grad_norm": 4468.568450857612, "skyrl.ai/learning_rate": 4e-05, "time/total": 123.5486068725586, "reward/total": 0.7001953125} +{"step": 55, "progress/batch": 55, "optim/lr": 4e-05, "progress/done_frac": 0.9655172413793104, "skyrl.ai/grad_norm": 5002.112953542733, "skyrl.ai/learning_rate": 4e-05, "time/total": 113.38967609405518, "reward/total": 0.78173828125} +{"step": 56, "progress/batch": 56, "optim/lr": 4e-05, "progress/done_frac": 0.9827586206896551, "skyrl.ai/grad_norm": 5835.695674039214, "skyrl.ai/learning_rate": 4e-05, "time/total": 104.90921354293823, "reward/total": 0.76904296875} +{"step": 57, "progress/batch": 57, "optim/lr": 4e-05, "progress/done_frac": 1.0, "skyrl.ai/grad_norm": 4346.515385915481, "skyrl.ai/learning_rate": 4e-05, "time/total": 120.04163336753845, "reward/total": 0.73486328125} diff --git a/tests/tinker/smoke_logs/rl_loop_f.log/checkpoints.jsonl b/tests/tinker/smoke_logs/rl_loop_f.log/checkpoints.jsonl new file mode 100644 index 0000000000..8f232bbf22 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_f.log/checkpoints.jsonl @@ -0,0 +1,3 @@ +{"name": "000020", "batch": 20, "state_path": "tinker://model_e06832f2/weights/000020"} +{"name": "000040", "batch": 40, "state_path": "tinker://model_e06832f2/weights/000040"} +{"name": "final", "batch": 58, "state_path": "tinker://model_e06832f2/weights/final", "sampler_path": "tinker://model_e06832f2/final"} diff --git a/tests/tinker/smoke_logs/rl_loop_f.log/code.diff b/tests/tinker/smoke_logs/rl_loop_f.log/code.diff new file mode 100644 index 0000000000..d8e559755f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_f.log/code.diff @@ -0,0 +1,3 @@ +### repo: /home/etang/tinker-cookbook @ dacb835b31fb3f7a012851a1bc64c950d0495de3 +modules: tinker_cookbook +(no local changes) diff --git a/tests/tinker/smoke_logs/rl_loop_f.log/config.json b/tests/tinker/smoke_logs/rl_loop_f.log/config.json new file mode 100644 index 0000000000..472eacbbaa --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_f.log/config.json @@ -0,0 +1,12 @@ +{ + "base_url": "http://localhost:8000", + "log_path": "/tmp/rl_loop_f.log", + "model_name": "Qwen/Qwen3-0.6B", + "batch_size": 128, + "group_size": 16, + "learning_rate": 4e-05, + "lora_rank": 32, + "save_every": 20, + "max_tokens": 256, + "ttl_seconds": 604800 +} \ No newline at end of file diff --git a/tests/tinker/smoke_logs/rl_loop_f.log/logs.log b/tests/tinker/smoke_logs/rl_loop_f.log/logs.log new file mode 100644 index 0000000000..0ba8aa403f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_f.log/logs.log @@ -0,0 +1,825 @@ +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_f.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_f.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_8246be4c +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/rl_loop_f.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/rl_loop_f.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_e06832f2 +__main__:121 [INFO] Training for 58 batches +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 0  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 0  │ +│ progress/done_frac  │ 0.017241  │ +│ reward/total  │ 0.016602  │ +│ skyrl.ai/grad_norm  │ 2294.091214 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 95.201286  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 1  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 1  │ +│ progress/done_frac  │ 0.034483  │ +│ reward/total  │ 0.022461  │ +│ skyrl.ai/grad_norm  │ 2692.103174 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 173.874377  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 2  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 2  │ +│ progress/done_frac  │ 0.051724  │ +│ reward/total  │ 0.025879  │ +│ skyrl.ai/grad_norm  │ 2919.905992 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 103.146296  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 3  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 3  │ +│ progress/done_frac  │ 0.068966  │ +│ reward/total  │ 0.050781  │ +│ skyrl.ai/grad_norm  │ 4679.051827 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 138.495987  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 4  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 4  │ +│ progress/done_frac  │ 0.086207  │ +│ reward/total  │ 0.172852  │ +│ skyrl.ai/grad_norm  │ 7131.992989 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 127.667517  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 5  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 5  │ +│ progress/done_frac  │ 0.103448  │ +│ reward/total  │ 0.315430  │ +│ skyrl.ai/grad_norm  │ 10840.603673 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 142.253343  │ +└────────────────────────┴──────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 6  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 6  │ +│ progress/done_frac  │ 0.120690  │ +│ reward/total  │ 0.411621  │ +│ skyrl.ai/grad_norm  │ 8993.776960 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 138.389953  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 7  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 7  │ +│ progress/done_frac  │ 0.137931  │ +│ reward/total  │ 0.560059  │ +│ skyrl.ai/grad_norm  │ 6986.811004 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 129.658682  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 8  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 8  │ +│ progress/done_frac  │ 0.155172  │ +│ reward/total  │ 0.538086  │ +│ skyrl.ai/grad_norm  │ 5574.307491 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 125.289493  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 9  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 9  │ +│ progress/done_frac  │ 0.172414  │ +│ reward/total  │ 0.591309  │ +│ skyrl.ai/grad_norm  │ 5530.089149 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 115.779778  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 10  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 10  │ +│ progress/done_frac  │ 0.189655  │ +│ reward/total  │ 0.589844  │ +│ skyrl.ai/grad_norm  │ 4833.589350 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 123.424306  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 11  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 11  │ +│ progress/done_frac  │ 0.206897  │ +│ reward/total  │ 0.636719  │ +│ skyrl.ai/grad_norm  │ 5552.164083 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 114.481740  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 12  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 12  │ +│ progress/done_frac  │ 0.224138  │ +│ reward/total  │ 0.587402  │ +│ skyrl.ai/grad_norm  │ 5142.660595 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 109.688283  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 13  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 13  │ +│ progress/done_frac  │ 0.241379  │ +│ reward/total  │ 0.628906  │ +│ skyrl.ai/grad_norm  │ 4498.109603 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 107.482208  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 14  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 14  │ +│ progress/done_frac  │ 0.258621  │ +│ reward/total  │ 0.585449  │ +│ skyrl.ai/grad_norm  │ 5197.242923 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 105.424653  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 15  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 15  │ +│ progress/done_frac  │ 0.275862  │ +│ reward/total  │ 0.646973  │ +│ skyrl.ai/grad_norm  │ 5264.652315 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 106.550719  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 16  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 16  │ +│ progress/done_frac  │ 0.293103  │ +│ reward/total  │ 0.694336  │ +│ skyrl.ai/grad_norm  │ 5170.707882 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 113.230792  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 17  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 17  │ +│ progress/done_frac  │ 0.310345  │ +│ reward/total  │ 0.690430  │ +│ skyrl.ai/grad_norm  │ 5101.805563 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 109.853788  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 18  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 18  │ +│ progress/done_frac  │ 0.327586  │ +│ reward/total  │ 0.625000  │ +│ skyrl.ai/grad_norm  │ 5243.874140 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 115.444906  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 19  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 19  │ +│ progress/done_frac  │ 0.344828  │ +│ reward/total  │ 0.684570  │ +│ skyrl.ai/grad_norm  │ 4356.659730 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 110.799166  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_e06832f2/weights/000020'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 20  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 20  │ +│ progress/done_frac  │ 0.362069  │ +│ reward/total  │ 0.699219  │ +│ skyrl.ai/grad_norm  │ 5144.779879 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 113.444232  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 21  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 21  │ +│ progress/done_frac  │ 0.379310  │ +│ reward/total  │ 0.697754  │ +│ skyrl.ai/grad_norm  │ 5902.052863 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 116.014966  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 22  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 22  │ +│ progress/done_frac  │ 0.396552  │ +│ reward/total  │ 0.722656  │ +│ skyrl.ai/grad_norm  │ 5800.156894 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 117.478718  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 23  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 23  │ +│ progress/done_frac  │ 0.413793  │ +│ reward/total  │ 0.712891  │ +│ skyrl.ai/grad_norm  │ 3894.663528 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 119.337539  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 24  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 24  │ +│ progress/done_frac  │ 0.431034  │ +│ reward/total  │ 0.658691  │ +│ skyrl.ai/grad_norm  │ 5355.445453 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 110.459812  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 25  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 25  │ +│ progress/done_frac  │ 0.448276  │ +│ reward/total  │ 0.707520  │ +│ skyrl.ai/grad_norm  │ 4574.888195 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 127.024712  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 26  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 26  │ +│ progress/done_frac  │ 0.465517  │ +│ reward/total  │ 0.700195  │ +│ skyrl.ai/grad_norm  │ 5258.031571 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 101.268171  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 27  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 27  │ +│ progress/done_frac  │ 0.482759  │ +│ reward/total  │ 0.729492  │ +│ skyrl.ai/grad_norm  │ 4962.903586 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 108.927979  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 28  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 28  │ +│ progress/done_frac  │ 0.500000  │ +│ reward/total  │ 0.774902  │ +│ skyrl.ai/grad_norm  │ 5472.180187 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 109.060334  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 29  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 29  │ +│ progress/done_frac  │ 0.517241  │ +│ reward/total  │ 0.754395  │ +│ skyrl.ai/grad_norm  │ 4148.777410 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 118.506368  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 30  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 30  │ +│ progress/done_frac  │ 0.534483  │ +│ reward/total  │ 0.720703  │ +│ skyrl.ai/grad_norm  │ 5368.396409 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 114.272527  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 31  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 31  │ +│ progress/done_frac  │ 0.551724  │ +│ reward/total  │ 0.756836  │ +│ skyrl.ai/grad_norm  │ 3687.510949 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 110.167546  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 32  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 32  │ +│ progress/done_frac  │ 0.568966  │ +│ reward/total  │ 0.661621  │ +│ skyrl.ai/grad_norm  │ 5193.492659 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 133.503203  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 33  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 33  │ +│ progress/done_frac  │ 0.586207  │ +│ reward/total  │ 0.779297  │ +│ skyrl.ai/grad_norm  │ 4541.073662 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 112.264678  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 34  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 34  │ +│ progress/done_frac  │ 0.603448  │ +│ reward/total  │ 0.708984  │ +│ skyrl.ai/grad_norm  │ 5435.408172 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 128.267636  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 35  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 35  │ +│ progress/done_frac  │ 0.620690  │ +│ reward/total  │ 0.708496  │ +│ skyrl.ai/grad_norm  │ 4911.940961 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 108.495283  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 36  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 36  │ +│ progress/done_frac  │ 0.637931  │ +│ reward/total  │ 0.694824  │ +│ skyrl.ai/grad_norm  │ 4770.140878 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 104.190873  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 37  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 37  │ +│ progress/done_frac  │ 0.655172  │ +│ reward/total  │ 0.729980  │ +│ skyrl.ai/grad_norm  │ 5225.622642 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 97.960649  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 38  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 38  │ +│ progress/done_frac  │ 0.672414  │ +│ reward/total  │ 0.714844  │ +│ skyrl.ai/grad_norm  │ 4925.707665 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 112.226573  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 39  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 39  │ +│ progress/done_frac  │ 0.689655  │ +│ reward/total  │ 0.749512  │ +│ skyrl.ai/grad_norm  │ 4603.290997 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 114.274745  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_e06832f2/weights/000040'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 40  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 40  │ +│ progress/done_frac  │ 0.706897  │ +│ reward/total  │ 0.742188  │ +│ skyrl.ai/grad_norm  │ 4576.326256 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 160.028438  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 41  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 41  │ +│ progress/done_frac  │ 0.724138  │ +│ reward/total  │ 0.715820  │ +│ skyrl.ai/grad_norm  │ 5315.205734 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 87.291408  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 42  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 42  │ +│ progress/done_frac  │ 0.741379  │ +│ reward/total  │ 0.731934  │ +│ skyrl.ai/grad_norm  │ 4193.670469 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 68.908308  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 43  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 43  │ +│ progress/done_frac  │ 0.758621  │ +│ reward/total  │ 0.703613  │ +│ skyrl.ai/grad_norm  │ 4106.626109 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 68.086455  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 44  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 44  │ +│ progress/done_frac  │ 0.775862  │ +│ reward/total  │ 0.830078  │ +│ skyrl.ai/grad_norm  │ 4323.928307 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.415823  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 45  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 45  │ +│ progress/done_frac  │ 0.793103  │ +│ reward/total  │ 0.776367  │ +│ skyrl.ai/grad_norm  │ 4000.091249 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 63.905457  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 46  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 46  │ +│ progress/done_frac  │ 0.810345  │ +│ reward/total  │ 0.727051  │ +│ skyrl.ai/grad_norm  │ 3997.601656 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.781055  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 47  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 47  │ +│ progress/done_frac  │ 0.827586  │ +│ reward/total  │ 0.796875  │ +│ skyrl.ai/grad_norm  │ 3721.394497 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 59.962005  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 48  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 48  │ +│ progress/done_frac  │ 0.844828  │ +│ reward/total  │ 0.791504  │ +│ skyrl.ai/grad_norm  │ 4527.206203 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.772083  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 49  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 49  │ +│ progress/done_frac  │ 0.862069  │ +│ reward/total  │ 0.709961  │ +│ skyrl.ai/grad_norm  │ 4433.696877 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.995457  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 50  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 50  │ +│ progress/done_frac  │ 0.879310  │ +│ reward/total  │ 0.738770  │ +│ skyrl.ai/grad_norm  │ 4101.841538 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 62.500658  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 51  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 51  │ +│ progress/done_frac  │ 0.896552  │ +│ reward/total  │ 0.734375  │ +│ skyrl.ai/grad_norm  │ 4450.920130 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 65.566620  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 52  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 52  │ +│ progress/done_frac  │ 0.913793  │ +│ reward/total  │ 0.690430  │ +│ skyrl.ai/grad_norm  │ 17508.627359 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 64.398955  │ +└────────────────────────┴──────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 53  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 53  │ +│ progress/done_frac  │ 0.931034  │ +│ reward/total  │ 0.812500  │ +│ skyrl.ai/grad_norm  │ 4251.743642 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 63.225998  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 54  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 54  │ +│ progress/done_frac  │ 0.948276  │ +│ reward/total  │ 0.721680  │ +│ skyrl.ai/grad_norm  │ 4362.466275 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.625412  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 55  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 55  │ +│ progress/done_frac  │ 0.965517  │ +│ reward/total  │ 0.772461  │ +│ skyrl.ai/grad_norm  │ 4851.632097 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.751420  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 56  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 56  │ +│ progress/done_frac  │ 0.982759  │ +│ reward/total  │ 0.781738  │ +│ skyrl.ai/grad_norm  │ 4099.095510 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 65.074497  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_f.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 57  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 57  │ +│ progress/done_frac  │ 1.000000  │ +│ reward/total  │ 0.749023  │ +│ skyrl.ai/grad_norm  │ 7324.760474 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.397691  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_e06832f2/weights/final', 'sampler_path': 'tinker://model_e06832f2/final'} +__main__:257 [INFO] Training completed diff --git a/tests/tinker/smoke_logs/rl_loop_f.log/metrics.jsonl b/tests/tinker/smoke_logs/rl_loop_f.log/metrics.jsonl new file mode 100644 index 0000000000..4a1587b4fd --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_f.log/metrics.jsonl @@ -0,0 +1,58 @@ +{"step": 0, "progress/batch": 0, "optim/lr": 4e-05, "progress/done_frac": 0.017241379310344827, "skyrl.ai/grad_norm": 2294.091214402775, "skyrl.ai/learning_rate": 4e-05, "time/total": 95.20128583908081, "reward/total": 0.0166015625} +{"step": 1, "progress/batch": 1, "optim/lr": 4e-05, "progress/done_frac": 0.034482758620689655, "skyrl.ai/grad_norm": 2692.103174100131, "skyrl.ai/learning_rate": 4e-05, "time/total": 173.87437748908997, "reward/total": 0.0224609375} +{"step": 2, "progress/batch": 2, "optim/lr": 4e-05, "progress/done_frac": 0.05172413793103448, "skyrl.ai/grad_norm": 2919.905991637402, "skyrl.ai/learning_rate": 4e-05, "time/total": 103.14629554748535, "reward/total": 0.02587890625} +{"step": 3, "progress/batch": 3, "optim/lr": 4e-05, "progress/done_frac": 0.06896551724137931, "skyrl.ai/grad_norm": 4679.051827026497, "skyrl.ai/learning_rate": 4e-05, "time/total": 138.49598717689514, "reward/total": 0.05078125} +{"step": 4, "progress/batch": 4, "optim/lr": 4e-05, "progress/done_frac": 0.08620689655172414, "skyrl.ai/grad_norm": 7131.992989340357, "skyrl.ai/learning_rate": 4e-05, "time/total": 127.66751670837402, "reward/total": 0.1728515625} +{"step": 5, "progress/batch": 5, "optim/lr": 4e-05, "progress/done_frac": 0.10344827586206896, "skyrl.ai/grad_norm": 10840.603673227797, "skyrl.ai/learning_rate": 4e-05, "time/total": 142.25334334373474, "reward/total": 0.3154296875} +{"step": 6, "progress/batch": 6, "optim/lr": 4e-05, "progress/done_frac": 0.1206896551724138, "skyrl.ai/grad_norm": 8993.776959653826, "skyrl.ai/learning_rate": 4e-05, "time/total": 138.3899528980255, "reward/total": 0.41162109375} +{"step": 7, "progress/batch": 7, "optim/lr": 4e-05, "progress/done_frac": 0.13793103448275862, "skyrl.ai/grad_norm": 6986.811003598136, "skyrl.ai/learning_rate": 4e-05, "time/total": 129.658682346344, "reward/total": 0.56005859375} +{"step": 8, "progress/batch": 8, "optim/lr": 4e-05, "progress/done_frac": 0.15517241379310345, "skyrl.ai/grad_norm": 5574.3074906215925, "skyrl.ai/learning_rate": 4e-05, "time/total": 125.28949284553528, "reward/total": 0.5380859375} +{"step": 9, "progress/batch": 9, "optim/lr": 4e-05, "progress/done_frac": 0.1724137931034483, "skyrl.ai/grad_norm": 5530.089149371825, "skyrl.ai/learning_rate": 4e-05, "time/total": 115.77977848052979, "reward/total": 0.59130859375} +{"step": 10, "progress/batch": 10, "optim/lr": 4e-05, "progress/done_frac": 0.1896551724137931, "skyrl.ai/grad_norm": 4833.589349541395, "skyrl.ai/learning_rate": 4e-05, "time/total": 123.42430639266968, "reward/total": 0.58984375} +{"step": 11, "progress/batch": 11, "optim/lr": 4e-05, "progress/done_frac": 0.20689655172413793, "skyrl.ai/grad_norm": 5552.164082589778, "skyrl.ai/learning_rate": 4e-05, "time/total": 114.48173975944519, "reward/total": 0.63671875} +{"step": 12, "progress/batch": 12, "optim/lr": 4e-05, "progress/done_frac": 0.22413793103448276, "skyrl.ai/grad_norm": 5142.6605954505685, "skyrl.ai/learning_rate": 4e-05, "time/total": 109.68828344345093, "reward/total": 0.58740234375} +{"step": 13, "progress/batch": 13, "optim/lr": 4e-05, "progress/done_frac": 0.2413793103448276, "skyrl.ai/grad_norm": 4498.109602933215, "skyrl.ai/learning_rate": 4e-05, "time/total": 107.48220753669739, "reward/total": 0.62890625} +{"step": 14, "progress/batch": 14, "optim/lr": 4e-05, "progress/done_frac": 0.25862068965517243, "skyrl.ai/grad_norm": 5197.242922935198, "skyrl.ai/learning_rate": 4e-05, "time/total": 105.42465329170227, "reward/total": 0.58544921875} +{"step": 15, "progress/batch": 15, "optim/lr": 4e-05, "progress/done_frac": 0.27586206896551724, "skyrl.ai/grad_norm": 5264.65231520563, "skyrl.ai/learning_rate": 4e-05, "time/total": 106.55071878433228, "reward/total": 0.64697265625} +{"step": 16, "progress/batch": 16, "optim/lr": 4e-05, "progress/done_frac": 0.29310344827586204, "skyrl.ai/grad_norm": 5170.707881905532, "skyrl.ai/learning_rate": 4e-05, "time/total": 113.2307915687561, "reward/total": 0.6943359375} +{"step": 17, "progress/batch": 17, "optim/lr": 4e-05, "progress/done_frac": 0.3103448275862069, "skyrl.ai/grad_norm": 5101.805562739529, "skyrl.ai/learning_rate": 4e-05, "time/total": 109.85378813743591, "reward/total": 0.6904296875} +{"step": 18, "progress/batch": 18, "optim/lr": 4e-05, "progress/done_frac": 0.3275862068965517, "skyrl.ai/grad_norm": 5243.874140366071, "skyrl.ai/learning_rate": 4e-05, "time/total": 115.44490599632263, "reward/total": 0.625} +{"step": 19, "progress/batch": 19, "optim/lr": 4e-05, "progress/done_frac": 0.3448275862068966, "skyrl.ai/grad_norm": 4356.659729655278, "skyrl.ai/learning_rate": 4e-05, "time/total": 110.79916596412659, "reward/total": 0.6845703125} +{"step": 20, "progress/batch": 20, "optim/lr": 4e-05, "progress/done_frac": 0.3620689655172414, "skyrl.ai/grad_norm": 5144.779878673139, "skyrl.ai/learning_rate": 4e-05, "time/total": 113.44423246383667, "reward/total": 0.69921875} +{"step": 21, "progress/batch": 21, "optim/lr": 4e-05, "progress/done_frac": 0.3793103448275862, "skyrl.ai/grad_norm": 5902.05286319938, "skyrl.ai/learning_rate": 4e-05, "time/total": 116.01496648788452, "reward/total": 0.69775390625} +{"step": 22, "progress/batch": 22, "optim/lr": 4e-05, "progress/done_frac": 0.39655172413793105, "skyrl.ai/grad_norm": 5800.156894429667, "skyrl.ai/learning_rate": 4e-05, "time/total": 117.47871780395508, "reward/total": 0.72265625} +{"step": 23, "progress/batch": 23, "optim/lr": 4e-05, "progress/done_frac": 0.41379310344827586, "skyrl.ai/grad_norm": 3894.66352847072, "skyrl.ai/learning_rate": 4e-05, "time/total": 119.33753895759583, "reward/total": 0.712890625} +{"step": 24, "progress/batch": 24, "optim/lr": 4e-05, "progress/done_frac": 0.43103448275862066, "skyrl.ai/grad_norm": 5355.44545299455, "skyrl.ai/learning_rate": 4e-05, "time/total": 110.45981216430664, "reward/total": 0.65869140625} +{"step": 25, "progress/batch": 25, "optim/lr": 4e-05, "progress/done_frac": 0.4482758620689655, "skyrl.ai/grad_norm": 4574.888195355161, "skyrl.ai/learning_rate": 4e-05, "time/total": 127.02471208572388, "reward/total": 0.70751953125} +{"step": 26, "progress/batch": 26, "optim/lr": 4e-05, "progress/done_frac": 0.46551724137931033, "skyrl.ai/grad_norm": 5258.03157084474, "skyrl.ai/learning_rate": 4e-05, "time/total": 101.26817059516907, "reward/total": 0.7001953125} +{"step": 27, "progress/batch": 27, "optim/lr": 4e-05, "progress/done_frac": 0.4827586206896552, "skyrl.ai/grad_norm": 4962.903585603895, "skyrl.ai/learning_rate": 4e-05, "time/total": 108.92797946929932, "reward/total": 0.7294921875} +{"step": 28, "progress/batch": 28, "optim/lr": 4e-05, "progress/done_frac": 0.5, "skyrl.ai/grad_norm": 5472.180187091795, "skyrl.ai/learning_rate": 4e-05, "time/total": 109.06033444404602, "reward/total": 0.77490234375} +{"step": 29, "progress/batch": 29, "optim/lr": 4e-05, "progress/done_frac": 0.5172413793103449, "skyrl.ai/grad_norm": 4148.7774102740195, "skyrl.ai/learning_rate": 4e-05, "time/total": 118.50636839866638, "reward/total": 0.75439453125} +{"step": 30, "progress/batch": 30, "optim/lr": 4e-05, "progress/done_frac": 0.5344827586206896, "skyrl.ai/grad_norm": 5368.396408612166, "skyrl.ai/learning_rate": 4e-05, "time/total": 114.27252721786499, "reward/total": 0.720703125} +{"step": 31, "progress/batch": 31, "optim/lr": 4e-05, "progress/done_frac": 0.5517241379310345, "skyrl.ai/grad_norm": 3687.510949136287, "skyrl.ai/learning_rate": 4e-05, "time/total": 110.1675455570221, "reward/total": 0.7568359375} +{"step": 32, "progress/batch": 32, "optim/lr": 4e-05, "progress/done_frac": 0.5689655172413793, "skyrl.ai/grad_norm": 5193.492659087909, "skyrl.ai/learning_rate": 4e-05, "time/total": 133.5032033920288, "reward/total": 0.66162109375} +{"step": 33, "progress/batch": 33, "optim/lr": 4e-05, "progress/done_frac": 0.5862068965517241, "skyrl.ai/grad_norm": 4541.073661591497, "skyrl.ai/learning_rate": 4e-05, "time/total": 112.26467752456665, "reward/total": 0.779296875} +{"step": 34, "progress/batch": 34, "optim/lr": 4e-05, "progress/done_frac": 0.603448275862069, "skyrl.ai/grad_norm": 5435.408172345477, "skyrl.ai/learning_rate": 4e-05, "time/total": 128.2676362991333, "reward/total": 0.708984375} +{"step": 35, "progress/batch": 35, "optim/lr": 4e-05, "progress/done_frac": 0.6206896551724138, "skyrl.ai/grad_norm": 4911.940960557242, "skyrl.ai/learning_rate": 4e-05, "time/total": 108.49528288841248, "reward/total": 0.70849609375} +{"step": 36, "progress/batch": 36, "optim/lr": 4e-05, "progress/done_frac": 0.6379310344827587, "skyrl.ai/grad_norm": 4770.140878422774, "skyrl.ai/learning_rate": 4e-05, "time/total": 104.19087266921997, "reward/total": 0.69482421875} +{"step": 37, "progress/batch": 37, "optim/lr": 4e-05, "progress/done_frac": 0.6551724137931034, "skyrl.ai/grad_norm": 5225.622642326941, "skyrl.ai/learning_rate": 4e-05, "time/total": 97.96064853668213, "reward/total": 0.72998046875} +{"step": 38, "progress/batch": 38, "optim/lr": 4e-05, "progress/done_frac": 0.6724137931034483, "skyrl.ai/grad_norm": 4925.707664894457, "skyrl.ai/learning_rate": 4e-05, "time/total": 112.22657251358032, "reward/total": 0.71484375} +{"step": 39, "progress/batch": 39, "optim/lr": 4e-05, "progress/done_frac": 0.6896551724137931, "skyrl.ai/grad_norm": 4603.290996667492, "skyrl.ai/learning_rate": 4e-05, "time/total": 114.27474546432495, "reward/total": 0.74951171875} +{"step": 40, "progress/batch": 40, "optim/lr": 4e-05, "progress/done_frac": 0.7068965517241379, "skyrl.ai/grad_norm": 4576.326255851958, "skyrl.ai/learning_rate": 4e-05, "time/total": 160.0284378528595, "reward/total": 0.7421875} +{"step": 41, "progress/batch": 41, "optim/lr": 4e-05, "progress/done_frac": 0.7241379310344828, "skyrl.ai/grad_norm": 5315.205734494198, "skyrl.ai/learning_rate": 4e-05, "time/total": 87.2914080619812, "reward/total": 0.7158203125} +{"step": 42, "progress/batch": 42, "optim/lr": 4e-05, "progress/done_frac": 0.7413793103448276, "skyrl.ai/grad_norm": 4193.670468694459, "skyrl.ai/learning_rate": 4e-05, "time/total": 68.90830826759338, "reward/total": 0.73193359375} +{"step": 43, "progress/batch": 43, "optim/lr": 4e-05, "progress/done_frac": 0.7586206896551724, "skyrl.ai/grad_norm": 4106.6261091070855, "skyrl.ai/learning_rate": 4e-05, "time/total": 68.08645534515381, "reward/total": 0.70361328125} +{"step": 44, "progress/batch": 44, "optim/lr": 4e-05, "progress/done_frac": 0.7758620689655172, "skyrl.ai/grad_norm": 4323.928306528683, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.41582298278809, "reward/total": 0.830078125} +{"step": 45, "progress/batch": 45, "optim/lr": 4e-05, "progress/done_frac": 0.7931034482758621, "skyrl.ai/grad_norm": 4000.0912489592033, "skyrl.ai/learning_rate": 4e-05, "time/total": 63.90545701980591, "reward/total": 0.7763671875} +{"step": 46, "progress/batch": 46, "optim/lr": 4e-05, "progress/done_frac": 0.8103448275862069, "skyrl.ai/grad_norm": 3997.601655993253, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.78105521202087, "reward/total": 0.72705078125} +{"step": 47, "progress/batch": 47, "optim/lr": 4e-05, "progress/done_frac": 0.8275862068965517, "skyrl.ai/grad_norm": 3721.394496690723, "skyrl.ai/learning_rate": 4e-05, "time/total": 59.96200513839722, "reward/total": 0.796875} +{"step": 48, "progress/batch": 48, "optim/lr": 4e-05, "progress/done_frac": 0.8448275862068966, "skyrl.ai/grad_norm": 4527.206202505028, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.77208304405212, "reward/total": 0.79150390625} +{"step": 49, "progress/batch": 49, "optim/lr": 4e-05, "progress/done_frac": 0.8620689655172413, "skyrl.ai/grad_norm": 4433.696877324835, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.99545741081238, "reward/total": 0.7099609375} +{"step": 50, "progress/batch": 50, "optim/lr": 4e-05, "progress/done_frac": 0.8793103448275862, "skyrl.ai/grad_norm": 4101.8415376511075, "skyrl.ai/learning_rate": 4e-05, "time/total": 62.50065779685974, "reward/total": 0.73876953125} +{"step": 51, "progress/batch": 51, "optim/lr": 4e-05, "progress/done_frac": 0.896551724137931, "skyrl.ai/grad_norm": 4450.9201295911835, "skyrl.ai/learning_rate": 4e-05, "time/total": 65.5666196346283, "reward/total": 0.734375} +{"step": 52, "progress/batch": 52, "optim/lr": 4e-05, "progress/done_frac": 0.9137931034482759, "skyrl.ai/grad_norm": 17508.627359104998, "skyrl.ai/learning_rate": 4e-05, "time/total": 64.39895486831665, "reward/total": 0.6904296875} +{"step": 53, "progress/batch": 53, "optim/lr": 4e-05, "progress/done_frac": 0.9310344827586207, "skyrl.ai/grad_norm": 4251.743642318996, "skyrl.ai/learning_rate": 4e-05, "time/total": 63.22599792480469, "reward/total": 0.8125} +{"step": 54, "progress/batch": 54, "optim/lr": 4e-05, "progress/done_frac": 0.9482758620689655, "skyrl.ai/grad_norm": 4362.466274941275, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.62541174888611, "reward/total": 0.7216796875} +{"step": 55, "progress/batch": 55, "optim/lr": 4e-05, "progress/done_frac": 0.9655172413793104, "skyrl.ai/grad_norm": 4851.6320965217465, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.75142049789429, "reward/total": 0.7724609375} +{"step": 56, "progress/batch": 56, "optim/lr": 4e-05, "progress/done_frac": 0.9827586206896551, "skyrl.ai/grad_norm": 4099.095509987539, "skyrl.ai/learning_rate": 4e-05, "time/total": 65.07449650764465, "reward/total": 0.78173828125} +{"step": 57, "progress/batch": 57, "optim/lr": 4e-05, "progress/done_frac": 1.0, "skyrl.ai/grad_norm": 7324.7604738994705, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.39769148826599, "reward/total": 0.7490234375} diff --git a/tests/tinker/smoke_logs/rl_loop_g.log/checkpoints.jsonl b/tests/tinker/smoke_logs/rl_loop_g.log/checkpoints.jsonl new file mode 100644 index 0000000000..4c7109497f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_g.log/checkpoints.jsonl @@ -0,0 +1,3 @@ +{"name": "000020", "batch": 20, "state_path": "tinker://model_33022fca/weights/000020"} +{"name": "000040", "batch": 40, "state_path": "tinker://model_33022fca/weights/000040"} +{"name": "final", "batch": 58, "state_path": "tinker://model_33022fca/weights/final", "sampler_path": "tinker://model_33022fca/final"} diff --git a/tests/tinker/smoke_logs/rl_loop_g.log/code.diff b/tests/tinker/smoke_logs/rl_loop_g.log/code.diff new file mode 100644 index 0000000000..d8e559755f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_g.log/code.diff @@ -0,0 +1,3 @@ +### repo: /home/etang/tinker-cookbook @ dacb835b31fb3f7a012851a1bc64c950d0495de3 +modules: tinker_cookbook +(no local changes) diff --git a/tests/tinker/smoke_logs/rl_loop_g.log/config.json b/tests/tinker/smoke_logs/rl_loop_g.log/config.json new file mode 100644 index 0000000000..d5c9146cd5 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_g.log/config.json @@ -0,0 +1,12 @@ +{ + "base_url": "http://localhost:8000", + "log_path": "/tmp/rl_loop_g.log", + "model_name": "Qwen/Qwen3-0.6B", + "batch_size": 128, + "group_size": 16, + "learning_rate": 4e-05, + "lora_rank": 32, + "save_every": 20, + "max_tokens": 256, + "ttl_seconds": 604800 +} \ No newline at end of file diff --git a/tests/tinker/smoke_logs/rl_loop_g.log/logs.log b/tests/tinker/smoke_logs/rl_loop_g.log/logs.log new file mode 100644 index 0000000000..37bf57a3e7 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_g.log/logs.log @@ -0,0 +1,825 @@ +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_g.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_g.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_43f00dc0 +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/rl_loop_g.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/rl_loop_g.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_33022fca +__main__:121 [INFO] Training for 58 batches +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 0  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 0  │ +│ progress/done_frac  │ 0.017241  │ +│ reward/total  │ 0.019043  │ +│ skyrl.ai/grad_norm  │ 3047.044306 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 81.935303  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 1  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 1  │ +│ progress/done_frac  │ 0.034483  │ +│ reward/total  │ 0.034668  │ +│ skyrl.ai/grad_norm  │ 2863.267714 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 79.756085  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 2  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 2  │ +│ progress/done_frac  │ 0.051724  │ +│ reward/total  │ 0.080078  │ +│ skyrl.ai/grad_norm  │ 3862.287017 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 84.182842  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 3  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 3  │ +│ progress/done_frac  │ 0.068966  │ +│ reward/total  │ 0.257812  │ +│ skyrl.ai/grad_norm  │ 8041.708525 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 91.647291  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 4  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 4  │ +│ progress/done_frac  │ 0.086207  │ +│ reward/total  │ 0.547363  │ +│ skyrl.ai/grad_norm  │ 4885.541730 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 76.656733  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 5  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 5  │ +│ progress/done_frac  │ 0.103448  │ +│ reward/total  │ 0.603027  │ +│ skyrl.ai/grad_norm  │ 4959.070881 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 70.430493  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 6  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 6  │ +│ progress/done_frac  │ 0.120690  │ +│ reward/total  │ 0.568848  │ +│ skyrl.ai/grad_norm  │ 3898.757494 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 65.931934  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 7  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 7  │ +│ progress/done_frac  │ 0.137931  │ +│ reward/total  │ 0.600586  │ +│ skyrl.ai/grad_norm  │ 4328.296894 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 59.117071  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 8  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 8  │ +│ progress/done_frac  │ 0.155172  │ +│ reward/total  │ 0.535156  │ +│ skyrl.ai/grad_norm  │ 4568.654288 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 62.654777  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 9  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 9  │ +│ progress/done_frac  │ 0.172414  │ +│ reward/total  │ 0.617676  │ +│ skyrl.ai/grad_norm  │ 4046.557673 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 63.533596  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 10  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 10  │ +│ progress/done_frac  │ 0.189655  │ +│ reward/total  │ 0.625977  │ +│ skyrl.ai/grad_norm  │ 3957.613675 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 64.592402  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 11  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 11  │ +│ progress/done_frac  │ 0.206897  │ +│ reward/total  │ 0.677734  │ +│ skyrl.ai/grad_norm  │ 4522.047103 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 63.164687  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 12  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 12  │ +│ progress/done_frac  │ 0.224138  │ +│ reward/total  │ 0.622559  │ +│ skyrl.ai/grad_norm  │ 4453.164717 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.606588  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 13  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 13  │ +│ progress/done_frac  │ 0.241379  │ +│ reward/total  │ 0.668457  │ +│ skyrl.ai/grad_norm  │ 4064.819307 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.253005  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 14  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 14  │ +│ progress/done_frac  │ 0.258621  │ +│ reward/total  │ 0.635742  │ +│ skyrl.ai/grad_norm  │ 4596.063751 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 69.474511  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 15  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 15  │ +│ progress/done_frac  │ 0.275862  │ +│ reward/total  │ 0.718750  │ +│ skyrl.ai/grad_norm  │ 4935.121072 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.929768  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 16  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 16  │ +│ progress/done_frac  │ 0.293103  │ +│ reward/total  │ 0.740723  │ +│ skyrl.ai/grad_norm  │ 4247.624277 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.258889  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 17  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 17  │ +│ progress/done_frac  │ 0.310345  │ +│ reward/total  │ 0.705566  │ +│ skyrl.ai/grad_norm  │ 4071.461163 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.827817  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 18  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 18  │ +│ progress/done_frac  │ 0.327586  │ +│ reward/total  │ 0.689941  │ +│ skyrl.ai/grad_norm  │ 3900.221019 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 63.241114  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 19  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 19  │ +│ progress/done_frac  │ 0.344828  │ +│ reward/total  │ 0.720703  │ +│ skyrl.ai/grad_norm  │ 4153.697630 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.975040  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_33022fca/weights/000020'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 20  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 20  │ +│ progress/done_frac  │ 0.362069  │ +│ reward/total  │ 0.729980  │ +│ skyrl.ai/grad_norm  │ 3834.269292 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 65.836580  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 21  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 21  │ +│ progress/done_frac  │ 0.379310  │ +│ reward/total  │ 0.731934  │ +│ skyrl.ai/grad_norm  │ 4056.313967 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 65.094682  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 22  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 22  │ +│ progress/done_frac  │ 0.396552  │ +│ reward/total  │ 0.743652  │ +│ skyrl.ai/grad_norm  │ 4249.544681 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.107702  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 23  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 23  │ +│ progress/done_frac  │ 0.413793  │ +│ reward/total  │ 0.750977  │ +│ skyrl.ai/grad_norm  │ 3919.666567 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 63.815610  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 24  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 24  │ +│ progress/done_frac  │ 0.431034  │ +│ reward/total  │ 0.668457  │ +│ skyrl.ai/grad_norm  │ 4038.148462 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.941240  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 25  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 25  │ +│ progress/done_frac  │ 0.448276  │ +│ reward/total  │ 0.745117  │ +│ skyrl.ai/grad_norm  │ 4261.364570 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 65.611496  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 26  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 26  │ +│ progress/done_frac  │ 0.465517  │ +│ reward/total  │ 0.730469  │ +│ skyrl.ai/grad_norm  │ 3908.385472 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.793354  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 27  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 27  │ +│ progress/done_frac  │ 0.482759  │ +│ reward/total  │ 0.728516  │ +│ skyrl.ai/grad_norm  │ 4198.391835 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 64.184751  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 28  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 28  │ +│ progress/done_frac  │ 0.500000  │ +│ reward/total  │ 0.759766  │ +│ skyrl.ai/grad_norm  │ 3855.957858 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.562127  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 29  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 29  │ +│ progress/done_frac  │ 0.517241  │ +│ reward/total  │ 0.775391  │ +│ skyrl.ai/grad_norm  │ 3629.153483 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 64.316827  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 30  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 30  │ +│ progress/done_frac  │ 0.534483  │ +│ reward/total  │ 0.742188  │ +│ skyrl.ai/grad_norm  │ 4626.587295 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.431585  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 31  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 31  │ +│ progress/done_frac  │ 0.551724  │ +│ reward/total  │ 0.779785  │ +│ skyrl.ai/grad_norm  │ 3172.986763 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 61.603456  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 32  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 32  │ +│ progress/done_frac  │ 0.568966  │ +│ reward/total  │ 0.664551  │ +│ skyrl.ai/grad_norm  │ 4532.925766 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 70.215890  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 33  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 33  │ +│ progress/done_frac  │ 0.586207  │ +│ reward/total  │ 0.782227  │ +│ skyrl.ai/grad_norm  │ 4165.589514 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 65.486906  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 34  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 34  │ +│ progress/done_frac  │ 0.603448  │ +│ reward/total  │ 0.710449  │ +│ skyrl.ai/grad_norm  │ 4447.996853 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 73.745564  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 35  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 35  │ +│ progress/done_frac  │ 0.620690  │ +│ reward/total  │ 0.719238  │ +│ skyrl.ai/grad_norm  │ 4060.280532 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.713121  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 36  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 36  │ +│ progress/done_frac  │ 0.637931  │ +│ reward/total  │ 0.728027  │ +│ skyrl.ai/grad_norm  │ 4412.181547 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 64.371590  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 37  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 37  │ +│ progress/done_frac  │ 0.655172  │ +│ reward/total  │ 0.740723  │ +│ skyrl.ai/grad_norm  │ 3785.710765 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.436951  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 38  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 38  │ +│ progress/done_frac  │ 0.672414  │ +│ reward/total  │ 0.696289  │ +│ skyrl.ai/grad_norm  │ 4860.556141 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 69.290205  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 39  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 39  │ +│ progress/done_frac  │ 0.689655  │ +│ reward/total  │ 0.774902  │ +│ skyrl.ai/grad_norm  │ 4201.451892 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 63.281941  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_33022fca/weights/000040'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 40  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 40  │ +│ progress/done_frac  │ 0.706897  │ +│ reward/total  │ 0.736816  │ +│ skyrl.ai/grad_norm  │ 4512.850097 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.527777  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 41  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 41  │ +│ progress/done_frac  │ 0.724138  │ +│ reward/total  │ 0.736816  │ +│ skyrl.ai/grad_norm  │ 4640.554924 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.977870  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 42  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 42  │ +│ progress/done_frac  │ 0.741379  │ +│ reward/total  │ 0.731445  │ +│ skyrl.ai/grad_norm  │ 4721.049248 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 68.467704  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 43  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 43  │ +│ progress/done_frac  │ 0.758621  │ +│ reward/total  │ 0.703613  │ +│ skyrl.ai/grad_norm  │ 4301.910738 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.849135  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 44  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 44  │ +│ progress/done_frac  │ 0.775862  │ +│ reward/total  │ 0.808105  │ +│ skyrl.ai/grad_norm  │ 4144.882387 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 65.650478  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 45  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 45  │ +│ progress/done_frac  │ 0.793103  │ +│ reward/total  │ 0.785645  │ +│ skyrl.ai/grad_norm  │ 4613.070344 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 63.609451  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 46  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 46  │ +│ progress/done_frac  │ 0.810345  │ +│ reward/total  │ 0.735840  │ +│ skyrl.ai/grad_norm  │ 4161.312293 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.403885  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 47  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 47  │ +│ progress/done_frac  │ 0.827586  │ +│ reward/total  │ 0.793945  │ +│ skyrl.ai/grad_norm  │ 3778.356653 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 63.203141  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 48  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 48  │ +│ progress/done_frac  │ 0.844828  │ +│ reward/total  │ 0.770508  │ +│ skyrl.ai/grad_norm  │ 4912.503639 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.865016  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 49  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 49  │ +│ progress/done_frac  │ 0.862069  │ +│ reward/total  │ 0.699707  │ +│ skyrl.ai/grad_norm  │ 4550.238895 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 65.346623  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 50  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 50  │ +│ progress/done_frac  │ 0.879310  │ +│ reward/total  │ 0.737793  │ +│ skyrl.ai/grad_norm  │ 5372.916154 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 64.003319  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 51  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 51  │ +│ progress/done_frac  │ 0.896552  │ +│ reward/total  │ 0.731934  │ +│ skyrl.ai/grad_norm  │ 4387.628517 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 64.909327  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 52  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 52  │ +│ progress/done_frac  │ 0.913793  │ +│ reward/total  │ 0.691406  │ +│ skyrl.ai/grad_norm  │ 4861.483930 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.043161  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 53  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 53  │ +│ progress/done_frac  │ 0.931034  │ +│ reward/total  │ 0.809082  │ +│ skyrl.ai/grad_norm  │ 3259.830210 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 62.288203  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 54  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 54  │ +│ progress/done_frac  │ 0.948276  │ +│ reward/total  │ 0.718750  │ +│ skyrl.ai/grad_norm  │ 4360.203894 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 66.404381  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 55  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 55  │ +│ progress/done_frac  │ 0.965517  │ +│ reward/total  │ 0.782715  │ +│ skyrl.ai/grad_norm  │ 4557.082619 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 65.916927  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 56  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 56  │ +│ progress/done_frac  │ 0.982759  │ +│ reward/total  │ 0.790527  │ +│ skyrl.ai/grad_norm  │ 3941.068891 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.037578  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_g.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 57  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 57  │ +│ progress/done_frac  │ 1.000000  │ +│ reward/total  │ 0.739258  │ +│ skyrl.ai/grad_norm  │ 4977.277569 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 67.706937  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_33022fca/weights/final', 'sampler_path': 'tinker://model_33022fca/final'} +__main__:257 [INFO] Training completed diff --git a/tests/tinker/smoke_logs/rl_loop_g.log/metrics.jsonl b/tests/tinker/smoke_logs/rl_loop_g.log/metrics.jsonl new file mode 100644 index 0000000000..069f5b8f83 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_g.log/metrics.jsonl @@ -0,0 +1,58 @@ +{"step": 0, "progress/batch": 0, "optim/lr": 4e-05, "progress/done_frac": 0.017241379310344827, "skyrl.ai/grad_norm": 3047.044305552514, "skyrl.ai/learning_rate": 4e-05, "time/total": 81.935302734375, "reward/total": 0.01904296875} +{"step": 1, "progress/batch": 1, "optim/lr": 4e-05, "progress/done_frac": 0.034482758620689655, "skyrl.ai/grad_norm": 2863.267713644674, "skyrl.ai/learning_rate": 4e-05, "time/total": 79.75608515739441, "reward/total": 0.03466796875} +{"step": 2, "progress/batch": 2, "optim/lr": 4e-05, "progress/done_frac": 0.05172413793103448, "skyrl.ai/grad_norm": 3862.2870167816372, "skyrl.ai/learning_rate": 4e-05, "time/total": 84.18284177780151, "reward/total": 0.080078125} +{"step": 3, "progress/batch": 3, "optim/lr": 4e-05, "progress/done_frac": 0.06896551724137931, "skyrl.ai/grad_norm": 8041.708524934238, "skyrl.ai/learning_rate": 4e-05, "time/total": 91.6472909450531, "reward/total": 0.2578125} +{"step": 4, "progress/batch": 4, "optim/lr": 4e-05, "progress/done_frac": 0.08620689655172414, "skyrl.ai/grad_norm": 4885.541730453236, "skyrl.ai/learning_rate": 4e-05, "time/total": 76.65673303604126, "reward/total": 0.54736328125} +{"step": 5, "progress/batch": 5, "optim/lr": 4e-05, "progress/done_frac": 0.10344827586206896, "skyrl.ai/grad_norm": 4959.070880719492, "skyrl.ai/learning_rate": 4e-05, "time/total": 70.4304928779602, "reward/total": 0.60302734375} +{"step": 6, "progress/batch": 6, "optim/lr": 4e-05, "progress/done_frac": 0.1206896551724138, "skyrl.ai/grad_norm": 3898.757494382024, "skyrl.ai/learning_rate": 4e-05, "time/total": 65.93193435668945, "reward/total": 0.56884765625} +{"step": 7, "progress/batch": 7, "optim/lr": 4e-05, "progress/done_frac": 0.13793103448275862, "skyrl.ai/grad_norm": 4328.29689369849, "skyrl.ai/learning_rate": 4e-05, "time/total": 59.11707139015198, "reward/total": 0.6005859375} +{"step": 8, "progress/batch": 8, "optim/lr": 4e-05, "progress/done_frac": 0.15517241379310345, "skyrl.ai/grad_norm": 4568.654287643135, "skyrl.ai/learning_rate": 4e-05, "time/total": 62.65477657318115, "reward/total": 0.53515625} +{"step": 9, "progress/batch": 9, "optim/lr": 4e-05, "progress/done_frac": 0.1724137931034483, "skyrl.ai/grad_norm": 4046.557672886919, "skyrl.ai/learning_rate": 4e-05, "time/total": 63.53359603881836, "reward/total": 0.61767578125} +{"step": 10, "progress/batch": 10, "optim/lr": 4e-05, "progress/done_frac": 0.1896551724137931, "skyrl.ai/grad_norm": 3957.613674930892, "skyrl.ai/learning_rate": 4e-05, "time/total": 64.59240198135376, "reward/total": 0.6259765625} +{"step": 11, "progress/batch": 11, "optim/lr": 4e-05, "progress/done_frac": 0.20689655172413793, "skyrl.ai/grad_norm": 4522.047102806427, "skyrl.ai/learning_rate": 4e-05, "time/total": 63.16468691825867, "reward/total": 0.677734375} +{"step": 12, "progress/batch": 12, "optim/lr": 4e-05, "progress/done_frac": 0.22413793103448276, "skyrl.ai/grad_norm": 4453.164717366741, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.6065878868103, "reward/total": 0.62255859375} +{"step": 13, "progress/batch": 13, "optim/lr": 4e-05, "progress/done_frac": 0.2413793103448276, "skyrl.ai/grad_norm": 4064.8193071771348, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.25300526618958, "reward/total": 0.66845703125} +{"step": 14, "progress/batch": 14, "optim/lr": 4e-05, "progress/done_frac": 0.25862068965517243, "skyrl.ai/grad_norm": 4596.063750645763, "skyrl.ai/learning_rate": 4e-05, "time/total": 69.47451114654541, "reward/total": 0.6357421875} +{"step": 15, "progress/batch": 15, "optim/lr": 4e-05, "progress/done_frac": 0.27586206896551724, "skyrl.ai/grad_norm": 4935.121072476338, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.92976808547974, "reward/total": 0.71875} +{"step": 16, "progress/batch": 16, "optim/lr": 4e-05, "progress/done_frac": 0.29310344827586204, "skyrl.ai/grad_norm": 4247.624277169533, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.2588894367218, "reward/total": 0.74072265625} +{"step": 17, "progress/batch": 17, "optim/lr": 4e-05, "progress/done_frac": 0.3103448275862069, "skyrl.ai/grad_norm": 4071.461162776823, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.82781744003296, "reward/total": 0.70556640625} +{"step": 18, "progress/batch": 18, "optim/lr": 4e-05, "progress/done_frac": 0.3275862068965517, "skyrl.ai/grad_norm": 3900.2210193782607, "skyrl.ai/learning_rate": 4e-05, "time/total": 63.241114139556885, "reward/total": 0.68994140625} +{"step": 19, "progress/batch": 19, "optim/lr": 4e-05, "progress/done_frac": 0.3448275862068966, "skyrl.ai/grad_norm": 4153.697629823336, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.97503972053528, "reward/total": 0.720703125} +{"step": 20, "progress/batch": 20, "optim/lr": 4e-05, "progress/done_frac": 0.3620689655172414, "skyrl.ai/grad_norm": 3834.269291533916, "skyrl.ai/learning_rate": 4e-05, "time/total": 65.83657956123352, "reward/total": 0.72998046875} +{"step": 21, "progress/batch": 21, "optim/lr": 4e-05, "progress/done_frac": 0.3793103448275862, "skyrl.ai/grad_norm": 4056.3139671381455, "skyrl.ai/learning_rate": 4e-05, "time/total": 65.09468173980713, "reward/total": 0.73193359375} +{"step": 22, "progress/batch": 22, "optim/lr": 4e-05, "progress/done_frac": 0.39655172413793105, "skyrl.ai/grad_norm": 4249.54468149236, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.10770153999329, "reward/total": 0.74365234375} +{"step": 23, "progress/batch": 23, "optim/lr": 4e-05, "progress/done_frac": 0.41379310344827586, "skyrl.ai/grad_norm": 3919.6665674518795, "skyrl.ai/learning_rate": 4e-05, "time/total": 63.81561017036438, "reward/total": 0.7509765625} +{"step": 24, "progress/batch": 24, "optim/lr": 4e-05, "progress/done_frac": 0.43103448275862066, "skyrl.ai/grad_norm": 4038.148461857241, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.94123959541321, "reward/total": 0.66845703125} +{"step": 25, "progress/batch": 25, "optim/lr": 4e-05, "progress/done_frac": 0.4482758620689655, "skyrl.ai/grad_norm": 4261.364570181716, "skyrl.ai/learning_rate": 4e-05, "time/total": 65.61149597167969, "reward/total": 0.7451171875} +{"step": 26, "progress/batch": 26, "optim/lr": 4e-05, "progress/done_frac": 0.46551724137931033, "skyrl.ai/grad_norm": 3908.3854722890374, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.79335403442383, "reward/total": 0.73046875} +{"step": 27, "progress/batch": 27, "optim/lr": 4e-05, "progress/done_frac": 0.4827586206896552, "skyrl.ai/grad_norm": 4198.391834976816, "skyrl.ai/learning_rate": 4e-05, "time/total": 64.1847505569458, "reward/total": 0.728515625} +{"step": 28, "progress/batch": 28, "optim/lr": 4e-05, "progress/done_frac": 0.5, "skyrl.ai/grad_norm": 3855.95785765353, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.56212735176086, "reward/total": 0.759765625} +{"step": 29, "progress/batch": 29, "optim/lr": 4e-05, "progress/done_frac": 0.5172413793103449, "skyrl.ai/grad_norm": 3629.153482563117, "skyrl.ai/learning_rate": 4e-05, "time/total": 64.31682705879211, "reward/total": 0.775390625} +{"step": 30, "progress/batch": 30, "optim/lr": 4e-05, "progress/done_frac": 0.5344827586206896, "skyrl.ai/grad_norm": 4626.5872951885385, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.43158459663391, "reward/total": 0.7421875} +{"step": 31, "progress/batch": 31, "optim/lr": 4e-05, "progress/done_frac": 0.5517241379310345, "skyrl.ai/grad_norm": 3172.9867632878645, "skyrl.ai/learning_rate": 4e-05, "time/total": 61.603456258773804, "reward/total": 0.77978515625} +{"step": 32, "progress/batch": 32, "optim/lr": 4e-05, "progress/done_frac": 0.5689655172413793, "skyrl.ai/grad_norm": 4532.925765992644, "skyrl.ai/learning_rate": 4e-05, "time/total": 70.21588969230652, "reward/total": 0.66455078125} +{"step": 33, "progress/batch": 33, "optim/lr": 4e-05, "progress/done_frac": 0.5862068965517241, "skyrl.ai/grad_norm": 4165.589514102416, "skyrl.ai/learning_rate": 4e-05, "time/total": 65.48690557479858, "reward/total": 0.7822265625} +{"step": 34, "progress/batch": 34, "optim/lr": 4e-05, "progress/done_frac": 0.603448275862069, "skyrl.ai/grad_norm": 4447.996852516872, "skyrl.ai/learning_rate": 4e-05, "time/total": 73.74556350708008, "reward/total": 0.71044921875} +{"step": 35, "progress/batch": 35, "optim/lr": 4e-05, "progress/done_frac": 0.6206896551724138, "skyrl.ai/grad_norm": 4060.280532180012, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.71312117576599, "reward/total": 0.71923828125} +{"step": 36, "progress/batch": 36, "optim/lr": 4e-05, "progress/done_frac": 0.6379310344827587, "skyrl.ai/grad_norm": 4412.181546582144, "skyrl.ai/learning_rate": 4e-05, "time/total": 64.37158989906311, "reward/total": 0.72802734375} +{"step": 37, "progress/batch": 37, "optim/lr": 4e-05, "progress/done_frac": 0.6551724137931034, "skyrl.ai/grad_norm": 3785.7107654970155, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.43695116043091, "reward/total": 0.74072265625} +{"step": 38, "progress/batch": 38, "optim/lr": 4e-05, "progress/done_frac": 0.6724137931034483, "skyrl.ai/grad_norm": 4860.556141019256, "skyrl.ai/learning_rate": 4e-05, "time/total": 69.2902045249939, "reward/total": 0.6962890625} +{"step": 39, "progress/batch": 39, "optim/lr": 4e-05, "progress/done_frac": 0.6896551724137931, "skyrl.ai/grad_norm": 4201.45189190594, "skyrl.ai/learning_rate": 4e-05, "time/total": 63.281941175460815, "reward/total": 0.77490234375} +{"step": 40, "progress/batch": 40, "optim/lr": 4e-05, "progress/done_frac": 0.7068965517241379, "skyrl.ai/grad_norm": 4512.850097222375, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.5277771949768, "reward/total": 0.73681640625} +{"step": 41, "progress/batch": 41, "optim/lr": 4e-05, "progress/done_frac": 0.7241379310344828, "skyrl.ai/grad_norm": 4640.554923713326, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.97787046432495, "reward/total": 0.73681640625} +{"step": 42, "progress/batch": 42, "optim/lr": 4e-05, "progress/done_frac": 0.7413793103448276, "skyrl.ai/grad_norm": 4721.049247783802, "skyrl.ai/learning_rate": 4e-05, "time/total": 68.46770405769348, "reward/total": 0.7314453125} +{"step": 43, "progress/batch": 43, "optim/lr": 4e-05, "progress/done_frac": 0.7586206896551724, "skyrl.ai/grad_norm": 4301.910738265033, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.84913516044617, "reward/total": 0.70361328125} +{"step": 44, "progress/batch": 44, "optim/lr": 4e-05, "progress/done_frac": 0.7758620689655172, "skyrl.ai/grad_norm": 4144.882386751161, "skyrl.ai/learning_rate": 4e-05, "time/total": 65.65047812461853, "reward/total": 0.80810546875} +{"step": 45, "progress/batch": 45, "optim/lr": 4e-05, "progress/done_frac": 0.7931034482758621, "skyrl.ai/grad_norm": 4613.070344141741, "skyrl.ai/learning_rate": 4e-05, "time/total": 63.60945105552673, "reward/total": 0.78564453125} +{"step": 46, "progress/batch": 46, "optim/lr": 4e-05, "progress/done_frac": 0.8103448275862069, "skyrl.ai/grad_norm": 4161.3122930152695, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.40388464927673, "reward/total": 0.73583984375} +{"step": 47, "progress/batch": 47, "optim/lr": 4e-05, "progress/done_frac": 0.8275862068965517, "skyrl.ai/grad_norm": 3778.3566533613525, "skyrl.ai/learning_rate": 4e-05, "time/total": 63.2031409740448, "reward/total": 0.7939453125} +{"step": 48, "progress/batch": 48, "optim/lr": 4e-05, "progress/done_frac": 0.8448275862068966, "skyrl.ai/grad_norm": 4912.503638675497, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.86501574516296, "reward/total": 0.7705078125} +{"step": 49, "progress/batch": 49, "optim/lr": 4e-05, "progress/done_frac": 0.8620689655172413, "skyrl.ai/grad_norm": 4550.238894827391, "skyrl.ai/learning_rate": 4e-05, "time/total": 65.3466227054596, "reward/total": 0.69970703125} +{"step": 50, "progress/batch": 50, "optim/lr": 4e-05, "progress/done_frac": 0.8793103448275862, "skyrl.ai/grad_norm": 5372.916154194108, "skyrl.ai/learning_rate": 4e-05, "time/total": 64.0033187866211, "reward/total": 0.73779296875} +{"step": 51, "progress/batch": 51, "optim/lr": 4e-05, "progress/done_frac": 0.896551724137931, "skyrl.ai/grad_norm": 4387.628516636293, "skyrl.ai/learning_rate": 4e-05, "time/total": 64.9093267917633, "reward/total": 0.73193359375} +{"step": 52, "progress/batch": 52, "optim/lr": 4e-05, "progress/done_frac": 0.9137931034482759, "skyrl.ai/grad_norm": 4861.48392983048, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.04316091537476, "reward/total": 0.69140625} +{"step": 53, "progress/batch": 53, "optim/lr": 4e-05, "progress/done_frac": 0.9310344827586207, "skyrl.ai/grad_norm": 3259.830210302371, "skyrl.ai/learning_rate": 4e-05, "time/total": 62.28820252418518, "reward/total": 0.80908203125} +{"step": 54, "progress/batch": 54, "optim/lr": 4e-05, "progress/done_frac": 0.9482758620689655, "skyrl.ai/grad_norm": 4360.2038943150355, "skyrl.ai/learning_rate": 4e-05, "time/total": 66.40438055992126, "reward/total": 0.71875} +{"step": 55, "progress/batch": 55, "optim/lr": 4e-05, "progress/done_frac": 0.9655172413793104, "skyrl.ai/grad_norm": 4557.082619395878, "skyrl.ai/learning_rate": 4e-05, "time/total": 65.91692686080933, "reward/total": 0.78271484375} +{"step": 56, "progress/batch": 56, "optim/lr": 4e-05, "progress/done_frac": 0.9827586206896551, "skyrl.ai/grad_norm": 3941.06889054226, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.03757810592651, "reward/total": 0.79052734375} +{"step": 57, "progress/batch": 57, "optim/lr": 4e-05, "progress/done_frac": 1.0, "skyrl.ai/grad_norm": 4977.27756911346, "skyrl.ai/learning_rate": 4e-05, "time/total": 67.70693683624268, "reward/total": 0.7392578125} diff --git a/tests/tinker/smoke_logs/rl_loop_h.log/checkpoints.jsonl b/tests/tinker/smoke_logs/rl_loop_h.log/checkpoints.jsonl new file mode 100644 index 0000000000..03afd02453 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_h.log/checkpoints.jsonl @@ -0,0 +1 @@ +{"name": "000020", "batch": 20, "state_path": "tinker://model_388eefaf/weights/000020"} diff --git a/tests/tinker/smoke_logs/rl_loop_h.log/code.diff b/tests/tinker/smoke_logs/rl_loop_h.log/code.diff new file mode 100644 index 0000000000..d8e559755f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_h.log/code.diff @@ -0,0 +1,3 @@ +### repo: /home/etang/tinker-cookbook @ dacb835b31fb3f7a012851a1bc64c950d0495de3 +modules: tinker_cookbook +(no local changes) diff --git a/tests/tinker/smoke_logs/rl_loop_h.log/config.json b/tests/tinker/smoke_logs/rl_loop_h.log/config.json new file mode 100644 index 0000000000..fee60c6b68 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_h.log/config.json @@ -0,0 +1,12 @@ +{ + "base_url": "http://localhost:8000", + "log_path": "/tmp/rl_loop_h.log", + "model_name": "Qwen/Qwen3-0.6B", + "batch_size": 128, + "group_size": 16, + "learning_rate": 4e-05, + "lora_rank": 32, + "save_every": 20, + "max_tokens": 256, + "ttl_seconds": 604800 +} \ No newline at end of file diff --git a/tests/tinker/smoke_logs/rl_loop_h.log/logs.log b/tests/tinker/smoke_logs/rl_loop_h.log/logs.log new file mode 100644 index 0000000000..bf5aebff3f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_h.log/logs.log @@ -0,0 +1,430 @@ +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_h.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_h.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_70c19c3c +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/rl_loop_h.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/rl_loop_h.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_388eefaf +__main__:121 [INFO] Training for 58 batches +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 0  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 0  │ +│ progress/done_frac  │ 0.017241  │ +│ reward/total  │ 0.017578  │ +│ skyrl.ai/grad_norm  │ 2986.160076 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 199.903373  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 1  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 1  │ +│ progress/done_frac  │ 0.034483  │ +│ reward/total  │ 0.027832  │ +│ skyrl.ai/grad_norm  │ 3232.538322 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 141.800122  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 2  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 2  │ +│ progress/done_frac  │ 0.051724  │ +│ reward/total  │ 0.028809  │ +│ skyrl.ai/grad_norm  │ 2605.609526 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 98.730768  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 3  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 3  │ +│ progress/done_frac  │ 0.068966  │ +│ reward/total  │ 0.066895  │ +│ skyrl.ai/grad_norm  │ 2888.340873 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 148.096477  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 4  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 4  │ +│ progress/done_frac  │ 0.086207  │ +│ reward/total  │ 0.193848  │ +│ skyrl.ai/grad_norm  │ 6988.557505 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 218.452928  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 5  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 5  │ +│ progress/done_frac  │ 0.103448  │ +│ reward/total  │ 0.339355  │ +│ skyrl.ai/grad_norm  │ 9294.383680 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 162.523287  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 6  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 6  │ +│ progress/done_frac  │ 0.120690  │ +│ reward/total  │ 0.389648  │ +│ skyrl.ai/grad_norm  │ 7166.919282 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 165.164537  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 7  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 7  │ +│ progress/done_frac  │ 0.137931  │ +│ reward/total  │ 0.520996  │ +│ skyrl.ai/grad_norm  │ 7366.206351 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 152.435220  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 8  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 8  │ +│ progress/done_frac  │ 0.155172  │ +│ reward/total  │ 0.509277  │ +│ skyrl.ai/grad_norm  │ 5313.993414 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 140.352420  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 9  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 9  │ +│ progress/done_frac  │ 0.172414  │ +│ reward/total  │ 0.583984  │ +│ skyrl.ai/grad_norm  │ 4483.872210 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 124.565117  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 10  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 10  │ +│ progress/done_frac  │ 0.189655  │ +│ reward/total  │ 0.573730  │ +│ skyrl.ai/grad_norm  │ 4581.698157 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 131.392579  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 11  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 11  │ +│ progress/done_frac  │ 0.206897  │ +│ reward/total  │ 0.619629  │ +│ skyrl.ai/grad_norm  │ 5430.633481 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 122.801665  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 12  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 12  │ +│ progress/done_frac  │ 0.224138  │ +│ reward/total  │ 0.562012  │ +│ skyrl.ai/grad_norm  │ 4113.346083 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 128.322746  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 13  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 13  │ +│ progress/done_frac  │ 0.241379  │ +│ reward/total  │ 0.614746  │ +│ skyrl.ai/grad_norm  │ 5131.263392 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 121.569723  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 14  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 14  │ +│ progress/done_frac  │ 0.258621  │ +│ reward/total  │ 0.566406  │ +│ skyrl.ai/grad_norm  │ 5241.210738 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 128.441487  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 15  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 15  │ +│ progress/done_frac  │ 0.275862  │ +│ reward/total  │ 0.619141  │ +│ skyrl.ai/grad_norm  │ 4036.870446 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 129.937497  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 16  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 16  │ +│ progress/done_frac  │ 0.293103  │ +│ reward/total  │ 0.637695  │ +│ skyrl.ai/grad_norm  │ 5042.480342 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 121.023461  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 17  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 17  │ +│ progress/done_frac  │ 0.310345  │ +│ reward/total  │ 0.656250  │ +│ skyrl.ai/grad_norm  │ 4292.369276 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 135.823786  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 18  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 18  │ +│ progress/done_frac  │ 0.327586  │ +│ reward/total  │ 0.619629  │ +│ skyrl.ai/grad_norm  │ 4199.793566 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 112.505669  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 19  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 19  │ +│ progress/done_frac  │ 0.344828  │ +│ reward/total  │ 0.663086  │ +│ skyrl.ai/grad_norm  │ 4492.620616 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 133.195554  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_388eefaf/weights/000020'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 20  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 20  │ +│ progress/done_frac  │ 0.362069  │ +│ reward/total  │ 0.673340  │ +│ skyrl.ai/grad_norm  │ 5127.105421 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 133.592353  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 21  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 21  │ +│ progress/done_frac  │ 0.379310  │ +│ reward/total  │ 0.697266  │ +│ skyrl.ai/grad_norm  │ 4158.499729 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 110.293463  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 22  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 22  │ +│ progress/done_frac  │ 0.396552  │ +│ reward/total  │ 0.699707  │ +│ skyrl.ai/grad_norm  │ 5291.736199 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 127.729033  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 23  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 23  │ +│ progress/done_frac  │ 0.413793  │ +│ reward/total  │ 0.701172  │ +│ skyrl.ai/grad_norm  │ 5152.130239 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 118.040159  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 24  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 24  │ +│ progress/done_frac  │ 0.431034  │ +│ reward/total  │ 0.630859  │ +│ skyrl.ai/grad_norm  │ 4435.361090 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 136.918935  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 25  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 25  │ +│ progress/done_frac  │ 0.448276  │ +│ reward/total  │ 0.686523  │ +│ skyrl.ai/grad_norm  │ 5201.739901 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 125.109281  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 26  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 26  │ +│ progress/done_frac  │ 0.465517  │ +│ reward/total  │ 0.705566  │ +│ skyrl.ai/grad_norm  │ 4135.996132 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 124.709949  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 27  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 27  │ +│ progress/done_frac  │ 0.482759  │ +│ reward/total  │ 0.729980  │ +│ skyrl.ai/grad_norm  │ 4415.818384 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 131.896928  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 28  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 28  │ +│ progress/done_frac  │ 0.500000  │ +│ reward/total  │ 0.760742  │ +│ skyrl.ai/grad_norm  │ 4766.756339 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 133.826097  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_h.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 29  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 29  │ +│ progress/done_frac  │ 0.517241  │ +│ reward/total  │ 0.760254  │ +│ skyrl.ai/grad_norm  │ 3994.311831 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 126.998264  │ +└────────────────────────┴─────────────┘ diff --git a/tests/tinker/smoke_logs/rl_loop_h.log/metrics.jsonl b/tests/tinker/smoke_logs/rl_loop_h.log/metrics.jsonl new file mode 100644 index 0000000000..9e72712816 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_h.log/metrics.jsonl @@ -0,0 +1,30 @@ +{"step": 0, "progress/batch": 0, "optim/lr": 4e-05, "progress/done_frac": 0.017241379310344827, "skyrl.ai/grad_norm": 2986.160076084335, "skyrl.ai/learning_rate": 4e-05, "time/total": 199.90337252616882, "reward/total": 0.017578125} +{"step": 1, "progress/batch": 1, "optim/lr": 4e-05, "progress/done_frac": 0.034482758620689655, "skyrl.ai/grad_norm": 3232.5383215052534, "skyrl.ai/learning_rate": 4e-05, "time/total": 141.80012226104736, "reward/total": 0.02783203125} +{"step": 2, "progress/batch": 2, "optim/lr": 4e-05, "progress/done_frac": 0.05172413793103448, "skyrl.ai/grad_norm": 2605.6095256196772, "skyrl.ai/learning_rate": 4e-05, "time/total": 98.73076844215393, "reward/total": 0.02880859375} +{"step": 3, "progress/batch": 3, "optim/lr": 4e-05, "progress/done_frac": 0.06896551724137931, "skyrl.ai/grad_norm": 2888.3408732350135, "skyrl.ai/learning_rate": 4e-05, "time/total": 148.09647703170776, "reward/total": 0.06689453125} +{"step": 4, "progress/batch": 4, "optim/lr": 4e-05, "progress/done_frac": 0.08620689655172414, "skyrl.ai/grad_norm": 6988.557504950503, "skyrl.ai/learning_rate": 4e-05, "time/total": 218.45292806625366, "reward/total": 0.19384765625} +{"step": 5, "progress/batch": 5, "optim/lr": 4e-05, "progress/done_frac": 0.10344827586206896, "skyrl.ai/grad_norm": 9294.383680481455, "skyrl.ai/learning_rate": 4e-05, "time/total": 162.52328658103943, "reward/total": 0.33935546875} +{"step": 6, "progress/batch": 6, "optim/lr": 4e-05, "progress/done_frac": 0.1206896551724138, "skyrl.ai/grad_norm": 7166.9192823695175, "skyrl.ai/learning_rate": 4e-05, "time/total": 165.1645369529724, "reward/total": 0.3896484375} +{"step": 7, "progress/batch": 7, "optim/lr": 4e-05, "progress/done_frac": 0.13793103448275862, "skyrl.ai/grad_norm": 7366.206350625809, "skyrl.ai/learning_rate": 4e-05, "time/total": 152.43522000312805, "reward/total": 0.52099609375} +{"step": 8, "progress/batch": 8, "optim/lr": 4e-05, "progress/done_frac": 0.15517241379310345, "skyrl.ai/grad_norm": 5313.9934136203065, "skyrl.ai/learning_rate": 4e-05, "time/total": 140.35242009162903, "reward/total": 0.50927734375} +{"step": 9, "progress/batch": 9, "optim/lr": 4e-05, "progress/done_frac": 0.1724137931034483, "skyrl.ai/grad_norm": 4483.8722104895005, "skyrl.ai/learning_rate": 4e-05, "time/total": 124.56511735916138, "reward/total": 0.583984375} +{"step": 10, "progress/batch": 10, "optim/lr": 4e-05, "progress/done_frac": 0.1896551724137931, "skyrl.ai/grad_norm": 4581.698156797324, "skyrl.ai/learning_rate": 4e-05, "time/total": 131.39257907867432, "reward/total": 0.57373046875} +{"step": 11, "progress/batch": 11, "optim/lr": 4e-05, "progress/done_frac": 0.20689655172413793, "skyrl.ai/grad_norm": 5430.633480543499, "skyrl.ai/learning_rate": 4e-05, "time/total": 122.80166530609131, "reward/total": 0.61962890625} +{"step": 12, "progress/batch": 12, "optim/lr": 4e-05, "progress/done_frac": 0.22413793103448276, "skyrl.ai/grad_norm": 4113.346083178511, "skyrl.ai/learning_rate": 4e-05, "time/total": 128.32274556159973, "reward/total": 0.56201171875} +{"step": 13, "progress/batch": 13, "optim/lr": 4e-05, "progress/done_frac": 0.2413793103448276, "skyrl.ai/grad_norm": 5131.263392187152, "skyrl.ai/learning_rate": 4e-05, "time/total": 121.56972336769104, "reward/total": 0.61474609375} +{"step": 14, "progress/batch": 14, "optim/lr": 4e-05, "progress/done_frac": 0.25862068965517243, "skyrl.ai/grad_norm": 5241.210737987932, "skyrl.ai/learning_rate": 4e-05, "time/total": 128.4414873123169, "reward/total": 0.56640625} +{"step": 15, "progress/batch": 15, "optim/lr": 4e-05, "progress/done_frac": 0.27586206896551724, "skyrl.ai/grad_norm": 4036.870446273945, "skyrl.ai/learning_rate": 4e-05, "time/total": 129.9374966621399, "reward/total": 0.619140625} +{"step": 16, "progress/batch": 16, "optim/lr": 4e-05, "progress/done_frac": 0.29310344827586204, "skyrl.ai/grad_norm": 5042.480342053898, "skyrl.ai/learning_rate": 4e-05, "time/total": 121.02346110343933, "reward/total": 0.6376953125} +{"step": 17, "progress/batch": 17, "optim/lr": 4e-05, "progress/done_frac": 0.3103448275862069, "skyrl.ai/grad_norm": 4292.369275819591, "skyrl.ai/learning_rate": 4e-05, "time/total": 135.82378602027893, "reward/total": 0.65625} +{"step": 18, "progress/batch": 18, "optim/lr": 4e-05, "progress/done_frac": 0.3275862068965517, "skyrl.ai/grad_norm": 4199.793566355375, "skyrl.ai/learning_rate": 4e-05, "time/total": 112.50566911697388, "reward/total": 0.61962890625} +{"step": 19, "progress/batch": 19, "optim/lr": 4e-05, "progress/done_frac": 0.3448275862068966, "skyrl.ai/grad_norm": 4492.620616076991, "skyrl.ai/learning_rate": 4e-05, "time/total": 133.19555377960205, "reward/total": 0.6630859375} +{"step": 20, "progress/batch": 20, "optim/lr": 4e-05, "progress/done_frac": 0.3620689655172414, "skyrl.ai/grad_norm": 5127.105421190401, "skyrl.ai/learning_rate": 4e-05, "time/total": 133.59235310554504, "reward/total": 0.67333984375} +{"step": 21, "progress/batch": 21, "optim/lr": 4e-05, "progress/done_frac": 0.3793103448275862, "skyrl.ai/grad_norm": 4158.499729469752, "skyrl.ai/learning_rate": 4e-05, "time/total": 110.2934627532959, "reward/total": 0.697265625} +{"step": 22, "progress/batch": 22, "optim/lr": 4e-05, "progress/done_frac": 0.39655172413793105, "skyrl.ai/grad_norm": 5291.736199018239, "skyrl.ai/learning_rate": 4e-05, "time/total": 127.72903275489807, "reward/total": 0.69970703125} +{"step": 23, "progress/batch": 23, "optim/lr": 4e-05, "progress/done_frac": 0.41379310344827586, "skyrl.ai/grad_norm": 5152.130239037053, "skyrl.ai/learning_rate": 4e-05, "time/total": 118.04015898704529, "reward/total": 0.701171875} +{"step": 24, "progress/batch": 24, "optim/lr": 4e-05, "progress/done_frac": 0.43103448275862066, "skyrl.ai/grad_norm": 4435.361090148129, "skyrl.ai/learning_rate": 4e-05, "time/total": 136.91893458366394, "reward/total": 0.630859375} +{"step": 25, "progress/batch": 25, "optim/lr": 4e-05, "progress/done_frac": 0.4482758620689655, "skyrl.ai/grad_norm": 5201.739901225358, "skyrl.ai/learning_rate": 4e-05, "time/total": 125.10928058624268, "reward/total": 0.6865234375} +{"step": 26, "progress/batch": 26, "optim/lr": 4e-05, "progress/done_frac": 0.46551724137931033, "skyrl.ai/grad_norm": 4135.996131526237, "skyrl.ai/learning_rate": 4e-05, "time/total": 124.70994901657104, "reward/total": 0.70556640625} +{"step": 27, "progress/batch": 27, "optim/lr": 4e-05, "progress/done_frac": 0.4827586206896552, "skyrl.ai/grad_norm": 4415.818383946514, "skyrl.ai/learning_rate": 4e-05, "time/total": 131.89692759513855, "reward/total": 0.72998046875} +{"step": 28, "progress/batch": 28, "optim/lr": 4e-05, "progress/done_frac": 0.5, "skyrl.ai/grad_norm": 4766.756339482857, "skyrl.ai/learning_rate": 4e-05, "time/total": 133.826096534729, "reward/total": 0.7607421875} +{"step": 29, "progress/batch": 29, "optim/lr": 4e-05, "progress/done_frac": 0.5172413793103449, "skyrl.ai/grad_norm": 3994.311830591097, "skyrl.ai/learning_rate": 4e-05, "time/total": 126.9982635974884, "reward/total": 0.76025390625} diff --git a/tests/tinker/smoke_logs/rl_loop_i.log/checkpoints.jsonl b/tests/tinker/smoke_logs/rl_loop_i.log/checkpoints.jsonl new file mode 100644 index 0000000000..f33bedb477 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_i.log/checkpoints.jsonl @@ -0,0 +1 @@ +{"name": "000020", "batch": 20, "state_path": "tinker://model_8d63bf6f/weights/000020"} diff --git a/tests/tinker/smoke_logs/rl_loop_i.log/code.diff b/tests/tinker/smoke_logs/rl_loop_i.log/code.diff new file mode 100644 index 0000000000..d8e559755f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_i.log/code.diff @@ -0,0 +1,3 @@ +### repo: /home/etang/tinker-cookbook @ dacb835b31fb3f7a012851a1bc64c950d0495de3 +modules: tinker_cookbook +(no local changes) diff --git a/tests/tinker/smoke_logs/rl_loop_i.log/config.json b/tests/tinker/smoke_logs/rl_loop_i.log/config.json new file mode 100644 index 0000000000..ce74455801 --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_i.log/config.json @@ -0,0 +1,12 @@ +{ + "base_url": "http://localhost:8000", + "log_path": "/tmp/rl_loop_i.log", + "model_name": "Qwen/Qwen3-0.6B", + "batch_size": 128, + "group_size": 16, + "learning_rate": 4e-05, + "lora_rank": 32, + "save_every": 20, + "max_tokens": 256, + "ttl_seconds": 604800 +} \ No newline at end of file diff --git a/tests/tinker/smoke_logs/rl_loop_i.log/logs.log b/tests/tinker/smoke_logs/rl_loop_i.log/logs.log new file mode 100644 index 0000000000..08f955b1db --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_i.log/logs.log @@ -0,0 +1,430 @@ +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/rl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B lora_rank=32 log_path=/tmp/rl_loop_i.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/rl_loop_i.log +__main__:73 [INFO] Using renderer: qwen3 +__main__:76 [INFO] Loading dataset... +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_1f83fcde +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/rl_loop_i.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/rl_loop_i.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_8d63bf6f +__main__:121 [INFO] Training for 58 batches +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 0  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 0  │ +│ progress/done_frac  │ 0.017241  │ +│ reward/total  │ 0.016602  │ +│ skyrl.ai/grad_norm  │ 2500.282184 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 92.174990  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 1  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 1  │ +│ progress/done_frac  │ 0.034483  │ +│ reward/total  │ 0.028809  │ +│ skyrl.ai/grad_norm  │ 3717.477236 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 212.645627  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 2  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 2  │ +│ progress/done_frac  │ 0.051724  │ +│ reward/total  │ 0.026855  │ +│ skyrl.ai/grad_norm  │ 3123.280167 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 154.771824  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 3  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 3  │ +│ progress/done_frac  │ 0.068966  │ +│ reward/total  │ 0.060059  │ +│ skyrl.ai/grad_norm  │ 5379.627311 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 151.328907  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 4  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 4  │ +│ progress/done_frac  │ 0.086207  │ +│ reward/total  │ 0.203613  │ +│ skyrl.ai/grad_norm  │ 7820.956975 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 165.573668  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 5  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 5  │ +│ progress/done_frac  │ 0.103448  │ +│ reward/total  │ 0.337891  │ +│ skyrl.ai/grad_norm  │ 9302.311541 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 161.446375  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 6  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 6  │ +│ progress/done_frac  │ 0.120690  │ +│ reward/total  │ 0.383789  │ +│ skyrl.ai/grad_norm  │ 9008.423613 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 158.094930  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 7  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 7  │ +│ progress/done_frac  │ 0.137931  │ +│ reward/total  │ 0.522461  │ +│ skyrl.ai/grad_norm  │ 6418.265498 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 150.219928  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 8  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 8  │ +│ progress/done_frac  │ 0.155172  │ +│ reward/total  │ 0.495117  │ +│ skyrl.ai/grad_norm  │ 5129.178102 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 130.450021  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 9  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 9  │ +│ progress/done_frac  │ 0.172414  │ +│ reward/total  │ 0.578125  │ +│ skyrl.ai/grad_norm  │ 5509.330994 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 126.062569  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 10  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 10  │ +│ progress/done_frac  │ 0.189655  │ +│ reward/total  │ 0.578125  │ +│ skyrl.ai/grad_norm  │ 5093.188589 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 128.825426  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 11  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 11  │ +│ progress/done_frac  │ 0.206897  │ +│ reward/total  │ 0.601074  │ +│ skyrl.ai/grad_norm  │ 5185.033462 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 121.206484  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 12  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 12  │ +│ progress/done_frac  │ 0.224138  │ +│ reward/total  │ 0.561035  │ +│ skyrl.ai/grad_norm  │ 4451.828838 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 123.993158  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 13  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 13  │ +│ progress/done_frac  │ 0.241379  │ +│ reward/total  │ 0.602539  │ +│ skyrl.ai/grad_norm  │ 5261.430984 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 129.587321  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 14  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 14  │ +│ progress/done_frac  │ 0.258621  │ +│ reward/total  │ 0.562500  │ +│ skyrl.ai/grad_norm  │ 4213.133751 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 130.133244  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 15  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 15  │ +│ progress/done_frac  │ 0.275862  │ +│ reward/total  │ 0.609863  │ +│ skyrl.ai/grad_norm  │ 4431.559545 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 119.718005  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 16  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 16  │ +│ progress/done_frac  │ 0.293103  │ +│ reward/total  │ 0.634766  │ +│ skyrl.ai/grad_norm  │ 4251.883818 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 131.653447  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 17  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 17  │ +│ progress/done_frac  │ 0.310345  │ +│ reward/total  │ 0.654785  │ +│ skyrl.ai/grad_norm  │ 4336.820264 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 127.127751  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 18  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 18  │ +│ progress/done_frac  │ 0.327586  │ +│ reward/total  │ 0.601074  │ +│ skyrl.ai/grad_norm  │ 4390.180634 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 119.283090  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 19  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 19  │ +│ progress/done_frac  │ 0.344828  │ +│ reward/total  │ 0.656250  │ +│ skyrl.ai/grad_norm  │ 5138.570229 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 87.947657  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_8d63bf6f/weights/000020'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 20  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 20  │ +│ progress/done_frac  │ 0.362069  │ +│ reward/total  │ 0.652832  │ +│ skyrl.ai/grad_norm  │ 4787.003656 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 171.219129  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 21  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 21  │ +│ progress/done_frac  │ 0.379310  │ +│ reward/total  │ 0.664551  │ +│ skyrl.ai/grad_norm  │ 5361.994405 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 116.019150  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 22  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 22  │ +│ progress/done_frac  │ 0.396552  │ +│ reward/total  │ 0.705566  │ +│ skyrl.ai/grad_norm  │ 4536.794683 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 119.559554  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 23  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 23  │ +│ progress/done_frac  │ 0.413793  │ +│ reward/total  │ 0.707520  │ +│ skyrl.ai/grad_norm  │ 3972.942738 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 131.468635  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 24  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 24  │ +│ progress/done_frac  │ 0.431034  │ +│ reward/total  │ 0.628418  │ +│ skyrl.ai/grad_norm  │ 4835.404637 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 126.411474  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 25  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 25  │ +│ progress/done_frac  │ 0.448276  │ +│ reward/total  │ 0.701660  │ +│ skyrl.ai/grad_norm  │ 4455.773782 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 131.987321  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 26  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 26  │ +│ progress/done_frac  │ 0.465517  │ +│ reward/total  │ 0.688477  │ +│ skyrl.ai/grad_norm  │ 6312.734431 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 126.180908  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 27  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 27  │ +│ progress/done_frac  │ 0.482759  │ +│ reward/total  │ 0.716797  │ +│ skyrl.ai/grad_norm  │ 4511.742679 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 133.081262  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 28  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 28  │ +│ progress/done_frac  │ 0.500000  │ +│ reward/total  │ 0.748535  │ +│ skyrl.ai/grad_norm  │ 4176.975700 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 125.895678  │ +└────────────────────────┴─────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/rl_loop_i.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 29  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ optim/lr  │ 0.000040  │ +│ progress/batch  │ 29  │ +│ progress/done_frac  │ 0.517241  │ +│ reward/total  │ 0.733887  │ +│ skyrl.ai/grad_norm  │ 4329.369700 │ +│ skyrl.ai/learning_rate │ 0.000040  │ +│ time/total  │ 136.384873  │ +└────────────────────────┴─────────────┘ diff --git a/tests/tinker/smoke_logs/rl_loop_i.log/metrics.jsonl b/tests/tinker/smoke_logs/rl_loop_i.log/metrics.jsonl new file mode 100644 index 0000000000..e1ad5a641f --- /dev/null +++ b/tests/tinker/smoke_logs/rl_loop_i.log/metrics.jsonl @@ -0,0 +1,30 @@ +{"step": 0, "progress/batch": 0, "optim/lr": 4e-05, "progress/done_frac": 0.017241379310344827, "skyrl.ai/grad_norm": 2500.2821840744296, "skyrl.ai/learning_rate": 4e-05, "time/total": 92.17498970031738, "reward/total": 0.0166015625} +{"step": 1, "progress/batch": 1, "optim/lr": 4e-05, "progress/done_frac": 0.034482758620689655, "skyrl.ai/grad_norm": 3717.4772359760323, "skyrl.ai/learning_rate": 4e-05, "time/total": 212.64562678337097, "reward/total": 0.02880859375} +{"step": 2, "progress/batch": 2, "optim/lr": 4e-05, "progress/done_frac": 0.05172413793103448, "skyrl.ai/grad_norm": 3123.2801667477734, "skyrl.ai/learning_rate": 4e-05, "time/total": 154.7718243598938, "reward/total": 0.02685546875} +{"step": 3, "progress/batch": 3, "optim/lr": 4e-05, "progress/done_frac": 0.06896551724137931, "skyrl.ai/grad_norm": 5379.627310511389, "skyrl.ai/learning_rate": 4e-05, "time/total": 151.3289074897766, "reward/total": 0.06005859375} +{"step": 4, "progress/batch": 4, "optim/lr": 4e-05, "progress/done_frac": 0.08620689655172414, "skyrl.ai/grad_norm": 7820.956974693058, "skyrl.ai/learning_rate": 4e-05, "time/total": 165.57366752624512, "reward/total": 0.20361328125} +{"step": 5, "progress/batch": 5, "optim/lr": 4e-05, "progress/done_frac": 0.10344827586206896, "skyrl.ai/grad_norm": 9302.311540687078, "skyrl.ai/learning_rate": 4e-05, "time/total": 161.44637537002563, "reward/total": 0.337890625} +{"step": 6, "progress/batch": 6, "optim/lr": 4e-05, "progress/done_frac": 0.1206896551724138, "skyrl.ai/grad_norm": 9008.423613485325, "skyrl.ai/learning_rate": 4e-05, "time/total": 158.09492993354797, "reward/total": 0.3837890625} +{"step": 7, "progress/batch": 7, "optim/lr": 4e-05, "progress/done_frac": 0.13793103448275862, "skyrl.ai/grad_norm": 6418.265497780533, "skyrl.ai/learning_rate": 4e-05, "time/total": 150.21992802619934, "reward/total": 0.5224609375} +{"step": 8, "progress/batch": 8, "optim/lr": 4e-05, "progress/done_frac": 0.15517241379310345, "skyrl.ai/grad_norm": 5129.178101801496, "skyrl.ai/learning_rate": 4e-05, "time/total": 130.45002102851868, "reward/total": 0.4951171875} +{"step": 9, "progress/batch": 9, "optim/lr": 4e-05, "progress/done_frac": 0.1724137931034483, "skyrl.ai/grad_norm": 5509.330993868493, "skyrl.ai/learning_rate": 4e-05, "time/total": 126.06256937980652, "reward/total": 0.578125} +{"step": 10, "progress/batch": 10, "optim/lr": 4e-05, "progress/done_frac": 0.1896551724137931, "skyrl.ai/grad_norm": 5093.188588693727, "skyrl.ai/learning_rate": 4e-05, "time/total": 128.8254256248474, "reward/total": 0.578125} +{"step": 11, "progress/batch": 11, "optim/lr": 4e-05, "progress/done_frac": 0.20689655172413793, "skyrl.ai/grad_norm": 5185.033461801379, "skyrl.ai/learning_rate": 4e-05, "time/total": 121.2064836025238, "reward/total": 0.60107421875} +{"step": 12, "progress/batch": 12, "optim/lr": 4e-05, "progress/done_frac": 0.22413793103448276, "skyrl.ai/grad_norm": 4451.828837680083, "skyrl.ai/learning_rate": 4e-05, "time/total": 123.99315762519836, "reward/total": 0.56103515625} +{"step": 13, "progress/batch": 13, "optim/lr": 4e-05, "progress/done_frac": 0.2413793103448276, "skyrl.ai/grad_norm": 5261.430984057474, "skyrl.ai/learning_rate": 4e-05, "time/total": 129.5873212814331, "reward/total": 0.6025390625} +{"step": 14, "progress/batch": 14, "optim/lr": 4e-05, "progress/done_frac": 0.25862068965517243, "skyrl.ai/grad_norm": 4213.13375054721, "skyrl.ai/learning_rate": 4e-05, "time/total": 130.1332437992096, "reward/total": 0.5625} +{"step": 15, "progress/batch": 15, "optim/lr": 4e-05, "progress/done_frac": 0.27586206896551724, "skyrl.ai/grad_norm": 4431.559544900643, "skyrl.ai/learning_rate": 4e-05, "time/total": 119.71800494194031, "reward/total": 0.60986328125} +{"step": 16, "progress/batch": 16, "optim/lr": 4e-05, "progress/done_frac": 0.29310344827586204, "skyrl.ai/grad_norm": 4251.883817791827, "skyrl.ai/learning_rate": 4e-05, "time/total": 131.6534469127655, "reward/total": 0.634765625} +{"step": 17, "progress/batch": 17, "optim/lr": 4e-05, "progress/done_frac": 0.3103448275862069, "skyrl.ai/grad_norm": 4336.820263741628, "skyrl.ai/learning_rate": 4e-05, "time/total": 127.1277506351471, "reward/total": 0.65478515625} +{"step": 18, "progress/batch": 18, "optim/lr": 4e-05, "progress/done_frac": 0.3275862068965517, "skyrl.ai/grad_norm": 4390.1806340969615, "skyrl.ai/learning_rate": 4e-05, "time/total": 119.28309035301208, "reward/total": 0.60107421875} +{"step": 19, "progress/batch": 19, "optim/lr": 4e-05, "progress/done_frac": 0.3448275862068966, "skyrl.ai/grad_norm": 5138.57022915908, "skyrl.ai/learning_rate": 4e-05, "time/total": 87.9476568698883, "reward/total": 0.65625} +{"step": 20, "progress/batch": 20, "optim/lr": 4e-05, "progress/done_frac": 0.3620689655172414, "skyrl.ai/grad_norm": 4787.003655732884, "skyrl.ai/learning_rate": 4e-05, "time/total": 171.21912908554077, "reward/total": 0.65283203125} +{"step": 21, "progress/batch": 21, "optim/lr": 4e-05, "progress/done_frac": 0.3793103448275862, "skyrl.ai/grad_norm": 5361.994405069815, "skyrl.ai/learning_rate": 4e-05, "time/total": 116.01915049552917, "reward/total": 0.66455078125} +{"step": 22, "progress/batch": 22, "optim/lr": 4e-05, "progress/done_frac": 0.39655172413793105, "skyrl.ai/grad_norm": 4536.794683474226, "skyrl.ai/learning_rate": 4e-05, "time/total": 119.55955386161804, "reward/total": 0.70556640625} +{"step": 23, "progress/batch": 23, "optim/lr": 4e-05, "progress/done_frac": 0.41379310344827586, "skyrl.ai/grad_norm": 3972.942738072121, "skyrl.ai/learning_rate": 4e-05, "time/total": 131.4686348438263, "reward/total": 0.70751953125} +{"step": 24, "progress/batch": 24, "optim/lr": 4e-05, "progress/done_frac": 0.43103448275862066, "skyrl.ai/grad_norm": 4835.404636635904, "skyrl.ai/learning_rate": 4e-05, "time/total": 126.41147351264954, "reward/total": 0.62841796875} +{"step": 25, "progress/batch": 25, "optim/lr": 4e-05, "progress/done_frac": 0.4482758620689655, "skyrl.ai/grad_norm": 4455.773782408618, "skyrl.ai/learning_rate": 4e-05, "time/total": 131.98732089996338, "reward/total": 0.70166015625} +{"step": 26, "progress/batch": 26, "optim/lr": 4e-05, "progress/done_frac": 0.46551724137931033, "skyrl.ai/grad_norm": 6312.734431290453, "skyrl.ai/learning_rate": 4e-05, "time/total": 126.180908203125, "reward/total": 0.6884765625} +{"step": 27, "progress/batch": 27, "optim/lr": 4e-05, "progress/done_frac": 0.4827586206896552, "skyrl.ai/grad_norm": 4511.742678832648, "skyrl.ai/learning_rate": 4e-05, "time/total": 133.08126187324524, "reward/total": 0.716796875} +{"step": 28, "progress/batch": 28, "optim/lr": 4e-05, "progress/done_frac": 0.5, "skyrl.ai/grad_norm": 4176.975700192665, "skyrl.ai/learning_rate": 4e-05, "time/total": 125.8956778049469, "reward/total": 0.74853515625} +{"step": 29, "progress/batch": 29, "optim/lr": 4e-05, "progress/done_frac": 0.5172413793103449, "skyrl.ai/grad_norm": 4329.369700083374, "skyrl.ai/learning_rate": 4e-05, "time/total": 136.38487315177917, "reward/total": 0.73388671875} diff --git a/tests/tinker/smoke_logs/sl_loop_a.log/checkpoints.jsonl b/tests/tinker/smoke_logs/sl_loop_a.log/checkpoints.jsonl new file mode 100644 index 0000000000..f33af574cf --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_a.log/checkpoints.jsonl @@ -0,0 +1,2 @@ +{"name": "000020", "batch": 20, "state_path": "tinker://model_a6e27bdf/weights/000020"} +{"name": "000040", "batch": 40, "state_path": "tinker://model_a6e27bdf/weights/000040"} diff --git a/tests/tinker/smoke_logs/sl_loop_a.log/code.diff b/tests/tinker/smoke_logs/sl_loop_a.log/code.diff new file mode 100644 index 0000000000..d8e559755f --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_a.log/code.diff @@ -0,0 +1,3 @@ +### repo: /home/etang/tinker-cookbook @ dacb835b31fb3f7a012851a1bc64c950d0495de3 +modules: tinker_cookbook +(no local changes) diff --git a/tests/tinker/smoke_logs/sl_loop_a.log/config.json b/tests/tinker/smoke_logs/sl_loop_a.log/config.json new file mode 100644 index 0000000000..a74eec435c --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_a.log/config.json @@ -0,0 +1,12 @@ +{ + "base_url": "http://localhost:8000", + "log_path": "/tmp/sl_loop_a.log", + "model_name": "Qwen/Qwen3-0.6B", + "batch_size": 128, + "learning_rate": 0.0001, + "max_length": 32768, + "train_on_what": "last_assistant_message", + "lora_rank": 32, + "save_every": 20, + "ttl_seconds": 604800 +} \ No newline at end of file diff --git a/tests/tinker/smoke_logs/sl_loop_a.log/logs.log b/tests/tinker/smoke_logs/sl_loop_a.log/logs.log new file mode 100644 index 0000000000..83759c41ca --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_a.log/logs.log @@ -0,0 +1,729 @@ +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/sl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B train_on_what=LAST_ASSISTANT_MESSAGE lora_rank=32 log_path=/tmp/sl_loop_a.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/sl_loop_a.log +__main__:51 [INFO] Using renderer: qwen3 +__main__:54 [INFO] Loading dataset... +__main__:64 [INFO] Dropping last 28 examples to keep batch size uniform at 128 +__main__:67 [INFO] Train batches: 74 +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_b2801142 +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/sl_loop_a.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/sl_loop_a.log +tinker.lib.telemetry:204 [INFO] Exception logged for session ID: session_b2801142 +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/sl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B train_on_what=LAST_ASSISTANT_MESSAGE lora_rank=32 log_path=/tmp/sl_loop_a.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/sl_loop_a.log +__main__:51 [INFO] Using renderer: qwen3 +__main__:54 [INFO] Loading dataset... +__main__:64 [INFO] Dropping last 28 examples to keep batch size uniform at 128 +__main__:67 [INFO] Train batches: 74 +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_29a17128 +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/sl_loop_a.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/sl_loop_a.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_a6e27bdf +__main__:87 [INFO] Training for 74 steps +tinker_cookbook.supervised.common:188 [INFO] Weight reduction: 'mean' (token-mean loss) +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 0  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000100  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37834  │ +│ progress  │ 0.000000  │ +│ skyrl.ai/grad_norm  │ 1.419505  │ +│ skyrl.ai/learning_rate │ 0.000100  │ +│ time_total  │ 87.547961 │ +│ train_mean_nll  │ 2.937314  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 1  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000099  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35654  │ +│ progress  │ 0.013514  │ +│ skyrl.ai/grad_norm  │ 1.757945  │ +│ skyrl.ai/learning_rate │ 0.000099  │ +│ time_total  │ 40.488416 │ +│ train_mean_nll  │ 3.141781  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 2  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000097  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38474  │ +│ progress  │ 0.027027  │ +│ skyrl.ai/grad_norm  │ 1.375920  │ +│ skyrl.ai/learning_rate │ 0.000097  │ +│ time_total  │ 40.696287 │ +│ train_mean_nll  │ 2.757976  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 3  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000096  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39781  │ +│ progress  │ 0.040541  │ +│ skyrl.ai/grad_norm  │ 0.992602  │ +│ skyrl.ai/learning_rate │ 0.000096  │ +│ time_total  │ 40.512256 │ +│ train_mean_nll  │ 2.664960  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 4  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000095  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37346  │ +│ progress  │ 0.054054  │ +│ skyrl.ai/grad_norm  │ 1.000068  │ +│ skyrl.ai/learning_rate │ 0.000095  │ +│ time_total  │ 76.539217 │ +│ train_mean_nll  │ 2.643194  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 5  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000093  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38710  │ +│ progress  │ 0.067568  │ +│ skyrl.ai/grad_norm  │ 0.864887  │ +│ skyrl.ai/learning_rate │ 0.000093  │ +│ time_total  │ 77.631970 │ +│ train_mean_nll  │ 2.497554  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 6  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000092  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33356  │ +│ progress  │ 0.081081  │ +│ skyrl.ai/grad_norm  │ 0.622200  │ +│ skyrl.ai/learning_rate │ 0.000092  │ +│ time_total  │ 76.481162 │ +│ train_mean_nll  │ 2.663911  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 7  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000091  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39115  │ +│ progress  │ 0.094595  │ +│ skyrl.ai/grad_norm  │ 0.546005  │ +│ skyrl.ai/learning_rate │ 0.000091  │ +│ time_total  │ 76.499547 │ +│ train_mean_nll  │ 2.479411  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 8  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000089  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38640  │ +│ progress  │ 0.108108  │ +│ skyrl.ai/grad_norm  │ 0.589701  │ +│ skyrl.ai/learning_rate │ 0.000089  │ +│ time_total  │ 76.493416 │ +│ train_mean_nll  │ 2.446202  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 9  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000088  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38863  │ +│ progress  │ 0.121622  │ +│ skyrl.ai/grad_norm  │ 0.481519  │ +│ skyrl.ai/learning_rate │ 0.000088  │ +│ time_total  │ 76.621217 │ +│ train_mean_nll  │ 2.381734  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 10  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000086  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33155  │ +│ progress  │ 0.135135  │ +│ skyrl.ai/grad_norm  │ 0.576803  │ +│ skyrl.ai/learning_rate │ 0.000086  │ +│ time_total  │ 76.480710 │ +│ train_mean_nll  │ 2.520661  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 11  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000085  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38406  │ +│ progress  │ 0.148649  │ +│ skyrl.ai/grad_norm  │ 0.641170  │ +│ skyrl.ai/learning_rate │ 0.000085  │ +│ time_total  │ 76.499414 │ +│ train_mean_nll  │ 2.459423  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 12  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000084  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38843  │ +│ progress  │ 0.162162  │ +│ skyrl.ai/grad_norm  │ 0.567629  │ +│ skyrl.ai/learning_rate │ 0.000084  │ +│ time_total  │ 76.519642 │ +│ train_mean_nll  │ 2.331488  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 13  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000082  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35583  │ +│ progress  │ 0.175676  │ +│ skyrl.ai/grad_norm  │ 0.533097  │ +│ skyrl.ai/learning_rate │ 0.000082  │ +│ time_total  │ 76.485970 │ +│ train_mean_nll  │ 2.370727  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 14  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000081  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 36717  │ +│ progress  │ 0.189189  │ +│ skyrl.ai/grad_norm  │ 0.442445  │ +│ skyrl.ai/learning_rate │ 0.000081  │ +│ time_total  │ 76.499796 │ +│ train_mean_nll  │ 2.284867  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 15  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000080  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34002  │ +│ progress  │ 0.202703  │ +│ skyrl.ai/grad_norm  │ 0.459619  │ +│ skyrl.ai/learning_rate │ 0.000080  │ +│ time_total  │ 76.715802 │ +│ train_mean_nll  │ 2.411974  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 16  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000078  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38620  │ +│ progress  │ 0.216216  │ +│ skyrl.ai/grad_norm  │ 0.343792  │ +│ skyrl.ai/learning_rate │ 0.000078  │ +│ time_total  │ 76.496818 │ +│ train_mean_nll  │ 2.385261  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 17  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000077  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37639  │ +│ progress  │ 0.229730  │ +│ skyrl.ai/grad_norm  │ 0.328837  │ +│ skyrl.ai/learning_rate │ 0.000077  │ +│ time_total  │ 75.498580 │ +│ train_mean_nll  │ 2.256649  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 18  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000076  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34118  │ +│ progress  │ 0.243243  │ +│ skyrl.ai/grad_norm  │ 0.384255  │ +│ skyrl.ai/learning_rate │ 0.000076  │ +│ time_total  │ 77.494641 │ +│ train_mean_nll  │ 2.406415  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 19  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000074  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37178  │ +│ progress  │ 0.256757  │ +│ skyrl.ai/grad_norm  │ 0.364690  │ +│ skyrl.ai/learning_rate │ 0.000074  │ +│ time_total  │ 75.483256 │ +│ train_mean_nll  │ 2.389909  │ +└────────────────────────┴───────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_a6e27bdf/weights/000020'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 20  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩ +│ learning_rate  │ 0.000073  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35309  │ +│ progress  │ 0.270270  │ +│ skyrl.ai/grad_norm  │ 0.341943  │ +│ skyrl.ai/learning_rate │ 0.000073  │ +│ time_total  │ 118.789298 │ +│ train_mean_nll  │ 2.354352  │ +└────────────────────────┴────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 21  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000072  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37229  │ +│ progress  │ 0.283784  │ +│ skyrl.ai/grad_norm  │ 0.345487  │ +│ skyrl.ai/learning_rate │ 0.000072  │ +│ time_total  │ 77.501071 │ +│ train_mean_nll  │ 2.356468  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 22  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000070  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 36087  │ +│ progress  │ 0.297297  │ +│ skyrl.ai/grad_norm  │ 0.434072  │ +│ skyrl.ai/learning_rate │ 0.000070  │ +│ time_total  │ 76.563729 │ +│ train_mean_nll  │ 2.349741  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 23  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000069  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33348  │ +│ progress  │ 0.310811  │ +│ skyrl.ai/grad_norm  │ 0.666360  │ +│ skyrl.ai/learning_rate │ 0.000069  │ +│ time_total  │ 39.467893 │ +│ train_mean_nll  │ 2.315340  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 24  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000068  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35210  │ +│ progress  │ 0.324324  │ +│ skyrl.ai/grad_norm  │ 0.362909  │ +│ skyrl.ai/learning_rate │ 0.000068  │ +│ time_total  │ 76.473245 │ +│ train_mean_nll  │ 2.381596  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 25  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000066  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34833  │ +│ progress  │ 0.337838  │ +│ skyrl.ai/grad_norm  │ 0.389343  │ +│ skyrl.ai/learning_rate │ 0.000066  │ +│ time_total  │ 77.588004 │ +│ train_mean_nll  │ 2.444775  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 26  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000065  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37256  │ +│ progress  │ 0.351351  │ +│ skyrl.ai/grad_norm  │ 0.379360  │ +│ skyrl.ai/learning_rate │ 0.000065  │ +│ time_total  │ 76.494458 │ +│ train_mean_nll  │ 2.408405  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 27  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000064  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33947  │ +│ progress  │ 0.364865  │ +│ skyrl.ai/grad_norm  │ 0.327722  │ +│ skyrl.ai/learning_rate │ 0.000064  │ +│ time_total  │ 76.535758 │ +│ train_mean_nll  │ 2.349726  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 28  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000062  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 32694  │ +│ progress  │ 0.378378  │ +│ skyrl.ai/grad_norm  │ 0.331114  │ +│ skyrl.ai/learning_rate │ 0.000062  │ +│ time_total  │ 75.470683 │ +│ train_mean_nll  │ 2.433801  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 29  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000061  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37440  │ +│ progress  │ 0.391892  │ +│ skyrl.ai/grad_norm  │ 0.354970  │ +│ skyrl.ai/learning_rate │ 0.000061  │ +│ time_total  │ 76.488313 │ +│ train_mean_nll  │ 2.169731  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 30  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000059  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39453  │ +│ progress  │ 0.405405  │ +│ skyrl.ai/grad_norm  │ 0.323725  │ +│ skyrl.ai/learning_rate │ 0.000059  │ +│ time_total  │ 77.668248 │ +│ train_mean_nll  │ 2.282022  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 31  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000058  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 36143  │ +│ progress  │ 0.418919  │ +│ skyrl.ai/grad_norm  │ 0.326314  │ +│ skyrl.ai/learning_rate │ 0.000058  │ +│ time_total  │ 77.478847 │ +│ train_mean_nll  │ 2.280007  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 32  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000057  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34808  │ +│ progress  │ 0.432432  │ +│ skyrl.ai/grad_norm  │ 0.385605  │ +│ skyrl.ai/learning_rate │ 0.000057  │ +│ time_total  │ 76.566763 │ +│ train_mean_nll  │ 2.355149  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 33  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000055  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 40403  │ +│ progress  │ 0.445946  │ +│ skyrl.ai/grad_norm  │ 0.292981  │ +│ skyrl.ai/learning_rate │ 0.000055  │ +│ time_total  │ 77.524634 │ +│ train_mean_nll  │ 2.233092  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 34  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000054  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33581  │ +│ progress  │ 0.459459  │ +│ skyrl.ai/grad_norm  │ 0.330763  │ +│ skyrl.ai/learning_rate │ 0.000054  │ +│ time_total  │ 76.473376 │ +│ train_mean_nll  │ 2.357910  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 35  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000053  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35478  │ +│ progress  │ 0.472973  │ +│ skyrl.ai/grad_norm  │ 0.270800  │ +│ skyrl.ai/learning_rate │ 0.000053  │ +│ time_total  │ 75.483867 │ +│ train_mean_nll  │ 2.342329  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 36  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000051  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37613  │ +│ progress  │ 0.486486  │ +│ skyrl.ai/grad_norm  │ 0.442881  │ +│ skyrl.ai/learning_rate │ 0.000051  │ +│ time_total  │ 77.512969 │ +│ train_mean_nll  │ 2.252668  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 37  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000050  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37243  │ +│ progress  │ 0.500000  │ +│ skyrl.ai/grad_norm  │ 0.338969  │ +│ skyrl.ai/learning_rate │ 0.000050  │ +│ time_total  │ 77.658975 │ +│ train_mean_nll  │ 2.455121  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 38  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000049  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34551  │ +│ progress  │ 0.513514  │ +│ skyrl.ai/grad_norm  │ 0.351700  │ +│ skyrl.ai/learning_rate │ 0.000049  │ +│ time_total  │ 76.483724 │ +│ train_mean_nll  │ 2.280617  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 39  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000047  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34720  │ +│ progress  │ 0.527027  │ +│ skyrl.ai/grad_norm  │ 0.324459  │ +│ skyrl.ai/learning_rate │ 0.000047  │ +│ time_total  │ 76.485910 │ +│ train_mean_nll  │ 2.223614  │ +└────────────────────────┴───────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_a6e27bdf/weights/000040'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 40  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩ +│ learning_rate  │ 0.000046  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37150  │ +│ progress  │ 0.540541  │ +│ skyrl.ai/grad_norm  │ 0.291667  │ +│ skyrl.ai/learning_rate │ 0.000046  │ +│ time_total  │ 116.702335 │ +│ train_mean_nll  │ 2.320501  │ +└────────────────────────┴────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 41  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000045  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39832  │ +│ progress  │ 0.554054  │ +│ skyrl.ai/grad_norm  │ 0.330698  │ +│ skyrl.ai/learning_rate │ 0.000045  │ +│ time_total  │ 76.535000 │ +│ train_mean_nll  │ 2.251225  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 42  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000043  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 41346  │ +│ progress  │ 0.567568  │ +│ skyrl.ai/grad_norm  │ 0.344484  │ +│ skyrl.ai/learning_rate │ 0.000043  │ +│ time_total  │ 76.549524 │ +│ train_mean_nll  │ 2.185086  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 43  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000042  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35728  │ +│ progress  │ 0.581081  │ +│ skyrl.ai/grad_norm  │ 0.307271  │ +│ skyrl.ai/learning_rate │ 0.000042  │ +│ time_total  │ 40.502563 │ +│ train_mean_nll  │ 2.388796  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 44  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000041  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37469  │ +│ progress  │ 0.594595  │ +│ skyrl.ai/grad_norm  │ 0.304323  │ +│ skyrl.ai/learning_rate │ 0.000041  │ +│ time_total  │ 75.640097 │ +│ train_mean_nll  │ 2.405290  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 45  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000039  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 36253  │ +│ progress  │ 0.608108  │ +│ skyrl.ai/grad_norm  │ 0.296620  │ +│ skyrl.ai/learning_rate │ 0.000039  │ +│ time_total  │ 76.666610 │ +│ train_mean_nll  │ 2.326724  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_a.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 46  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000038  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39163  │ +│ progress  │ 0.621622  │ +│ skyrl.ai/grad_norm  │ 0.335458  │ +│ skyrl.ai/learning_rate │ 0.000038  │ +│ time_total  │ 77.500199 │ +│ train_mean_nll  │ 2.402814  │ +└────────────────────────┴───────────┘ diff --git a/tests/tinker/smoke_logs/sl_loop_a.log/metrics.jsonl b/tests/tinker/smoke_logs/sl_loop_a.log/metrics.jsonl new file mode 100644 index 0000000000..24345b00bb --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_a.log/metrics.jsonl @@ -0,0 +1,47 @@ +{"step": 0, "skyrl.ai/grad_norm": 1.4195045582052144, "skyrl.ai/learning_rate": 0.0001, "num_sequences": 128, "num_tokens": 37834, "learning_rate": 0.0001, "train_mean_nll": 2.9373137950897217, "progress": 0.0, "time_total": 87.54796123504639} +{"step": 1, "skyrl.ai/grad_norm": 1.7579454159817616, "skyrl.ai/learning_rate": 9.864864864864865e-05, "num_sequences": 128, "num_tokens": 35654, "learning_rate": 9.864864864864865e-05, "train_mean_nll": 3.1417813301086426, "progress": 0.013513513513513514, "time_total": 40.48841595649719} +{"step": 2, "skyrl.ai/grad_norm": 1.3759201612209633, "skyrl.ai/learning_rate": 9.729729729729731e-05, "num_sequences": 128, "num_tokens": 38474, "learning_rate": 9.729729729729731e-05, "train_mean_nll": 2.7579760551452637, "progress": 0.02702702702702703, "time_total": 40.69628691673279} +{"step": 3, "skyrl.ai/grad_norm": 0.9926018037640125, "skyrl.ai/learning_rate": 9.594594594594595e-05, "num_sequences": 128, "num_tokens": 39781, "learning_rate": 9.594594594594595e-05, "train_mean_nll": 2.6649603843688965, "progress": 0.04054054054054054, "time_total": 40.512256145477295} +{"step": 4, "skyrl.ai/grad_norm": 1.0000684237912945, "skyrl.ai/learning_rate": 9.45945945945946e-05, "num_sequences": 128, "num_tokens": 37346, "learning_rate": 9.45945945945946e-05, "train_mean_nll": 2.6431944370269775, "progress": 0.05405405405405406, "time_total": 76.53921723365784} +{"step": 5, "skyrl.ai/grad_norm": 0.8648871075824316, "skyrl.ai/learning_rate": 9.324324324324324e-05, "num_sequences": 128, "num_tokens": 38710, "learning_rate": 9.324324324324324e-05, "train_mean_nll": 2.497553586959839, "progress": 0.06756756756756757, "time_total": 77.63196992874146} +{"step": 6, "skyrl.ai/grad_norm": 0.6221997711718907, "skyrl.ai/learning_rate": 9.189189189189189e-05, "num_sequences": 128, "num_tokens": 33356, "learning_rate": 9.189189189189189e-05, "train_mean_nll": 2.6639108657836914, "progress": 0.08108108108108109, "time_total": 76.48116183280945} +{"step": 7, "skyrl.ai/grad_norm": 0.546004583585804, "skyrl.ai/learning_rate": 9.054054054054055e-05, "num_sequences": 128, "num_tokens": 39115, "learning_rate": 9.054054054054055e-05, "train_mean_nll": 2.4794111251831055, "progress": 0.0945945945945946, "time_total": 76.49954676628113} +{"step": 8, "skyrl.ai/grad_norm": 0.5897014016684463, "skyrl.ai/learning_rate": 8.918918918918919e-05, "num_sequences": 128, "num_tokens": 38640, "learning_rate": 8.918918918918919e-05, "train_mean_nll": 2.446202278137207, "progress": 0.10810810810810811, "time_total": 76.49341583251953} +{"step": 9, "skyrl.ai/grad_norm": 0.4815193103926899, "skyrl.ai/learning_rate": 8.783783783783784e-05, "num_sequences": 128, "num_tokens": 38863, "learning_rate": 8.783783783783784e-05, "train_mean_nll": 2.3817343711853027, "progress": 0.12162162162162163, "time_total": 76.62121725082397} +{"step": 10, "skyrl.ai/grad_norm": 0.5768029587667796, "skyrl.ai/learning_rate": 8.64864864864865e-05, "num_sequences": 128, "num_tokens": 33155, "learning_rate": 8.64864864864865e-05, "train_mean_nll": 2.5206613540649414, "progress": 0.13513513513513514, "time_total": 76.48070955276489} +{"step": 11, "skyrl.ai/grad_norm": 0.6411696654000828, "skyrl.ai/learning_rate": 8.513513513513514e-05, "num_sequences": 128, "num_tokens": 38406, "learning_rate": 8.513513513513514e-05, "train_mean_nll": 2.459423303604126, "progress": 0.14864864864864866, "time_total": 76.49941420555115} +{"step": 12, "skyrl.ai/grad_norm": 0.5676292107649995, "skyrl.ai/learning_rate": 8.378378378378379e-05, "num_sequences": 128, "num_tokens": 38843, "learning_rate": 8.378378378378379e-05, "train_mean_nll": 2.3314881324768066, "progress": 0.16216216216216217, "time_total": 76.51964163780212} +{"step": 13, "skyrl.ai/grad_norm": 0.5330967779616385, "skyrl.ai/learning_rate": 8.243243243243243e-05, "num_sequences": 128, "num_tokens": 35583, "learning_rate": 8.243243243243243e-05, "train_mean_nll": 2.370727300643921, "progress": 0.17567567567567569, "time_total": 76.48596978187561} +{"step": 14, "skyrl.ai/grad_norm": 0.44244537584796306, "skyrl.ai/learning_rate": 8.108108108108108e-05, "num_sequences": 128, "num_tokens": 36717, "learning_rate": 8.108108108108108e-05, "train_mean_nll": 2.2848665714263916, "progress": 0.1891891891891892, "time_total": 76.49979639053345} +{"step": 15, "skyrl.ai/grad_norm": 0.45961910755591384, "skyrl.ai/learning_rate": 7.972972972972974e-05, "num_sequences": 128, "num_tokens": 34002, "learning_rate": 7.972972972972974e-05, "train_mean_nll": 2.4119741916656494, "progress": 0.20270270270270271, "time_total": 76.71580243110657} +{"step": 16, "skyrl.ai/grad_norm": 0.3437916340323792, "skyrl.ai/learning_rate": 7.837837837837838e-05, "num_sequences": 128, "num_tokens": 38620, "learning_rate": 7.837837837837838e-05, "train_mean_nll": 2.385261297225952, "progress": 0.21621621621621623, "time_total": 76.49681758880615} +{"step": 17, "skyrl.ai/grad_norm": 0.32883653241641353, "skyrl.ai/learning_rate": 7.702702702702703e-05, "num_sequences": 128, "num_tokens": 37639, "learning_rate": 7.702702702702703e-05, "train_mean_nll": 2.256648540496826, "progress": 0.22972972972972974, "time_total": 75.49857974052429} +{"step": 18, "skyrl.ai/grad_norm": 0.3842549687115808, "skyrl.ai/learning_rate": 7.567567567567568e-05, "num_sequences": 128, "num_tokens": 34118, "learning_rate": 7.567567567567568e-05, "train_mean_nll": 2.4064152240753174, "progress": 0.24324324324324326, "time_total": 77.49464082717896} +{"step": 19, "skyrl.ai/grad_norm": 0.36468983411654887, "skyrl.ai/learning_rate": 7.432432432432433e-05, "num_sequences": 128, "num_tokens": 37178, "learning_rate": 7.432432432432433e-05, "train_mean_nll": 2.389909029006958, "progress": 0.25675675675675674, "time_total": 75.48325562477112} +{"step": 20, "skyrl.ai/grad_norm": 0.3419431427929476, "skyrl.ai/learning_rate": 7.297297297297297e-05, "num_sequences": 128, "num_tokens": 35309, "learning_rate": 7.297297297297297e-05, "train_mean_nll": 2.354351758956909, "progress": 0.2702702702702703, "time_total": 118.78929805755615} +{"step": 21, "skyrl.ai/grad_norm": 0.345487268590772, "skyrl.ai/learning_rate": 7.162162162162162e-05, "num_sequences": 128, "num_tokens": 37229, "learning_rate": 7.162162162162162e-05, "train_mean_nll": 2.3564682006835938, "progress": 0.28378378378378377, "time_total": 77.50107073783875} +{"step": 22, "skyrl.ai/grad_norm": 0.4340724021435782, "skyrl.ai/learning_rate": 7.027027027027026e-05, "num_sequences": 128, "num_tokens": 36087, "learning_rate": 7.027027027027026e-05, "train_mean_nll": 2.349740743637085, "progress": 0.2972972972972973, "time_total": 76.56372880935669} +{"step": 23, "skyrl.ai/grad_norm": 0.6663603128429356, "skyrl.ai/learning_rate": 6.891891891891892e-05, "num_sequences": 128, "num_tokens": 33348, "learning_rate": 6.891891891891892e-05, "train_mean_nll": 2.315340280532837, "progress": 0.3108108108108108, "time_total": 39.46789288520813} +{"step": 24, "skyrl.ai/grad_norm": 0.3629089827149277, "skyrl.ai/learning_rate": 6.756756756756757e-05, "num_sequences": 128, "num_tokens": 35210, "learning_rate": 6.756756756756757e-05, "train_mean_nll": 2.381596088409424, "progress": 0.32432432432432434, "time_total": 76.47324466705322} +{"step": 25, "skyrl.ai/grad_norm": 0.3893427785274499, "skyrl.ai/learning_rate": 6.621621621621621e-05, "num_sequences": 128, "num_tokens": 34833, "learning_rate": 6.621621621621621e-05, "train_mean_nll": 2.444774866104126, "progress": 0.33783783783783783, "time_total": 77.58800411224365} +{"step": 26, "skyrl.ai/grad_norm": 0.3793604576491017, "skyrl.ai/learning_rate": 6.486486486486487e-05, "num_sequences": 128, "num_tokens": 37256, "learning_rate": 6.486486486486487e-05, "train_mean_nll": 2.40840482711792, "progress": 0.35135135135135137, "time_total": 76.49445819854736} +{"step": 27, "skyrl.ai/grad_norm": 0.32772194994537934, "skyrl.ai/learning_rate": 6.351351351351352e-05, "num_sequences": 128, "num_tokens": 33947, "learning_rate": 6.351351351351352e-05, "train_mean_nll": 2.349726438522339, "progress": 0.36486486486486486, "time_total": 76.53575825691223} +{"step": 28, "skyrl.ai/grad_norm": 0.3311138494060635, "skyrl.ai/learning_rate": 6.216216216216216e-05, "num_sequences": 128, "num_tokens": 32694, "learning_rate": 6.216216216216216e-05, "train_mean_nll": 2.4338009357452393, "progress": 0.3783783783783784, "time_total": 75.47068309783936} +{"step": 29, "skyrl.ai/grad_norm": 0.3549697433593456, "skyrl.ai/learning_rate": 6.0810810810810814e-05, "num_sequences": 128, "num_tokens": 37440, "learning_rate": 6.0810810810810814e-05, "train_mean_nll": 2.1697309017181396, "progress": 0.3918918918918919, "time_total": 76.4883131980896} +{"step": 30, "skyrl.ai/grad_norm": 0.3237251062653877, "skyrl.ai/learning_rate": 5.945945945945945e-05, "num_sequences": 128, "num_tokens": 39453, "learning_rate": 5.945945945945945e-05, "train_mean_nll": 2.282021999359131, "progress": 0.40540540540540543, "time_total": 77.6682481765747} +{"step": 31, "skyrl.ai/grad_norm": 0.32631397007477897, "skyrl.ai/learning_rate": 5.810810810810812e-05, "num_sequences": 128, "num_tokens": 36143, "learning_rate": 5.810810810810812e-05, "train_mean_nll": 2.2800068855285645, "progress": 0.4189189189189189, "time_total": 77.47884702682495} +{"step": 32, "skyrl.ai/grad_norm": 0.3856050658058465, "skyrl.ai/learning_rate": 5.6756756756756757e-05, "num_sequences": 128, "num_tokens": 34808, "learning_rate": 5.6756756756756757e-05, "train_mean_nll": 2.3551485538482666, "progress": 0.43243243243243246, "time_total": 76.5667634010315} +{"step": 33, "skyrl.ai/grad_norm": 0.292981300085196, "skyrl.ai/learning_rate": 5.540540540540541e-05, "num_sequences": 128, "num_tokens": 40403, "learning_rate": 5.540540540540541e-05, "train_mean_nll": 2.2330923080444336, "progress": 0.44594594594594594, "time_total": 77.52463436126709} +{"step": 34, "skyrl.ai/grad_norm": 0.3307628301574217, "skyrl.ai/learning_rate": 5.405405405405406e-05, "num_sequences": 128, "num_tokens": 33581, "learning_rate": 5.405405405405406e-05, "train_mean_nll": 2.357910394668579, "progress": 0.4594594594594595, "time_total": 76.47337627410889} +{"step": 35, "skyrl.ai/grad_norm": 0.270800408171525, "skyrl.ai/learning_rate": 5.27027027027027e-05, "num_sequences": 128, "num_tokens": 35478, "learning_rate": 5.27027027027027e-05, "train_mean_nll": 2.3423290252685547, "progress": 0.47297297297297297, "time_total": 75.48386669158936} +{"step": 36, "skyrl.ai/grad_norm": 0.44288095213023204, "skyrl.ai/learning_rate": 5.135135135135135e-05, "num_sequences": 128, "num_tokens": 37613, "learning_rate": 5.135135135135135e-05, "train_mean_nll": 2.2526679039001465, "progress": 0.4864864864864865, "time_total": 77.51296854019165} +{"step": 37, "skyrl.ai/grad_norm": 0.3389692565050453, "skyrl.ai/learning_rate": 5e-05, "num_sequences": 128, "num_tokens": 37243, "learning_rate": 5e-05, "train_mean_nll": 2.455120801925659, "progress": 0.5, "time_total": 77.65897512435913} +{"step": 38, "skyrl.ai/grad_norm": 0.3517002471639325, "skyrl.ai/learning_rate": 4.8648648648648654e-05, "num_sequences": 128, "num_tokens": 34551, "learning_rate": 4.8648648648648654e-05, "train_mean_nll": 2.2806174755096436, "progress": 0.5135135135135135, "time_total": 76.48372411727905} +{"step": 39, "skyrl.ai/grad_norm": 0.32445883766323885, "skyrl.ai/learning_rate": 4.7297297297297306e-05, "num_sequences": 128, "num_tokens": 34720, "learning_rate": 4.7297297297297306e-05, "train_mean_nll": 2.223614454269409, "progress": 0.527027027027027, "time_total": 76.48590993881226} +{"step": 40, "skyrl.ai/grad_norm": 0.29166733650857213, "skyrl.ai/learning_rate": 4.5945945945945944e-05, "num_sequences": 128, "num_tokens": 37150, "learning_rate": 4.5945945945945944e-05, "train_mean_nll": 2.3205010890960693, "progress": 0.5405405405405406, "time_total": 116.70233464241028} +{"step": 41, "skyrl.ai/grad_norm": 0.33069804062642083, "skyrl.ai/learning_rate": 4.4594594594594596e-05, "num_sequences": 128, "num_tokens": 39832, "learning_rate": 4.4594594594594596e-05, "train_mean_nll": 2.2512245178222656, "progress": 0.5540540540540541, "time_total": 76.53500008583069} +{"step": 42, "skyrl.ai/grad_norm": 0.3444839142267573, "skyrl.ai/learning_rate": 4.324324324324325e-05, "num_sequences": 128, "num_tokens": 41346, "learning_rate": 4.324324324324325e-05, "train_mean_nll": 2.1850860118865967, "progress": 0.5675675675675675, "time_total": 76.54952430725098} +{"step": 43, "skyrl.ai/grad_norm": 0.30727129669347636, "skyrl.ai/learning_rate": 4.18918918918919e-05, "num_sequences": 128, "num_tokens": 35728, "learning_rate": 4.18918918918919e-05, "train_mean_nll": 2.3887956142425537, "progress": 0.581081081081081, "time_total": 40.50256276130676} +{"step": 44, "skyrl.ai/grad_norm": 0.3043229538756859, "skyrl.ai/learning_rate": 4.054054054054054e-05, "num_sequences": 128, "num_tokens": 37469, "learning_rate": 4.054054054054054e-05, "train_mean_nll": 2.405290126800537, "progress": 0.5945945945945946, "time_total": 75.64009666442871} +{"step": 45, "skyrl.ai/grad_norm": 0.29661968194443833, "skyrl.ai/learning_rate": 3.918918918918919e-05, "num_sequences": 128, "num_tokens": 36253, "learning_rate": 3.918918918918919e-05, "train_mean_nll": 2.3267242908477783, "progress": 0.6081081081081081, "time_total": 76.66661047935486} +{"step": 46, "skyrl.ai/grad_norm": 0.33545840277169836, "skyrl.ai/learning_rate": 3.783783783783784e-05, "num_sequences": 128, "num_tokens": 39163, "learning_rate": 3.783783783783784e-05, "train_mean_nll": 2.4028143882751465, "progress": 0.6216216216216216, "time_total": 77.50019884109497} diff --git a/tests/tinker/smoke_logs/sl_loop_b.log/checkpoints.jsonl b/tests/tinker/smoke_logs/sl_loop_b.log/checkpoints.jsonl new file mode 100644 index 0000000000..28c58a12d3 --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_b.log/checkpoints.jsonl @@ -0,0 +1,2 @@ +{"name": "000020", "batch": 20, "state_path": "tinker://model_973e0c8b/weights/000020"} +{"name": "000040", "batch": 40, "state_path": "tinker://model_973e0c8b/weights/000040"} diff --git a/tests/tinker/smoke_logs/sl_loop_b.log/code.diff b/tests/tinker/smoke_logs/sl_loop_b.log/code.diff new file mode 100644 index 0000000000..d8e559755f --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_b.log/code.diff @@ -0,0 +1,3 @@ +### repo: /home/etang/tinker-cookbook @ dacb835b31fb3f7a012851a1bc64c950d0495de3 +modules: tinker_cookbook +(no local changes) diff --git a/tests/tinker/smoke_logs/sl_loop_b.log/config.json b/tests/tinker/smoke_logs/sl_loop_b.log/config.json new file mode 100644 index 0000000000..b400cc9b82 --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_b.log/config.json @@ -0,0 +1,12 @@ +{ + "base_url": "http://localhost:8000", + "log_path": "/tmp/sl_loop_b.log", + "model_name": "Qwen/Qwen3-0.6B", + "batch_size": 128, + "learning_rate": 0.0001, + "max_length": 32768, + "train_on_what": "last_assistant_message", + "lora_rank": 32, + "save_every": 20, + "ttl_seconds": 604800 +} \ No newline at end of file diff --git a/tests/tinker/smoke_logs/sl_loop_b.log/logs.log b/tests/tinker/smoke_logs/sl_loop_b.log/logs.log new file mode 100644 index 0000000000..5a1ed0335d --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_b.log/logs.log @@ -0,0 +1,674 @@ +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/sl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B train_on_what=LAST_ASSISTANT_MESSAGE lora_rank=32 log_path=/tmp/sl_loop_b.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/sl_loop_b.log +__main__:51 [INFO] Using renderer: qwen3 +__main__:54 [INFO] Loading dataset... +__main__:64 [INFO] Dropping last 28 examples to keep batch size uniform at 128 +__main__:67 [INFO] Train batches: 74 +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_e9ebd7a9 +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/sl_loop_b.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/sl_loop_b.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_973e0c8b +__main__:87 [INFO] Training for 74 steps +tinker_cookbook.supervised.common:188 [INFO] Weight reduction: 'mean' (token-mean loss) +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 0  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000100  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37834  │ +│ progress  │ 0.000000  │ +│ skyrl.ai/grad_norm  │ 1.419539  │ +│ skyrl.ai/learning_rate │ 0.000100  │ +│ time_total  │ 77.518479 │ +│ train_mean_nll  │ 2.937314  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 1  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000099  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35654  │ +│ progress  │ 0.013514  │ +│ skyrl.ai/grad_norm  │ 1.787320  │ +│ skyrl.ai/learning_rate │ 0.000099  │ +│ time_total  │ 77.489699 │ +│ train_mean_nll  │ 3.171597  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 2  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000097  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38474  │ +│ progress  │ 0.027027  │ +│ skyrl.ai/grad_norm  │ 1.387239  │ +│ skyrl.ai/learning_rate │ 0.000097  │ +│ time_total  │ 77.495156 │ +│ train_mean_nll  │ 2.826397  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 3  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000096  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39781  │ +│ progress  │ 0.040541  │ +│ skyrl.ai/grad_norm  │ 1.347174  │ +│ skyrl.ai/learning_rate │ 0.000096  │ +│ time_total  │ 76.504979 │ +│ train_mean_nll  │ 2.734261  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 4  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000095  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37346  │ +│ progress  │ 0.054054  │ +│ skyrl.ai/grad_norm  │ 1.056902  │ +│ skyrl.ai/learning_rate │ 0.000095  │ +│ time_total  │ 76.500813 │ +│ train_mean_nll  │ 2.713544  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 5  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000093  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38710  │ +│ progress  │ 0.067568  │ +│ skyrl.ai/grad_norm  │ 1.037518  │ +│ skyrl.ai/learning_rate │ 0.000093  │ +│ time_total  │ 76.510272 │ +│ train_mean_nll  │ 2.544317  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 6  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000092  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33356  │ +│ progress  │ 0.081081  │ +│ skyrl.ai/grad_norm  │ 0.760348  │ +│ skyrl.ai/learning_rate │ 0.000092  │ +│ time_total  │ 76.468495 │ +│ train_mean_nll  │ 2.698561  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 7  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000091  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39115  │ +│ progress  │ 0.094595  │ +│ skyrl.ai/grad_norm  │ 0.621467  │ +│ skyrl.ai/learning_rate │ 0.000091  │ +│ time_total  │ 76.525711 │ +│ train_mean_nll  │ 2.502498  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 8  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000089  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38640  │ +│ progress  │ 0.108108  │ +│ skyrl.ai/grad_norm  │ 0.597963  │ +│ skyrl.ai/learning_rate │ 0.000089  │ +│ time_total  │ 76.506974 │ +│ train_mean_nll  │ 2.464289  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 9  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000088  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38863  │ +│ progress  │ 0.121622  │ +│ skyrl.ai/grad_norm  │ 0.515755  │ +│ skyrl.ai/learning_rate │ 0.000088  │ +│ time_total  │ 76.510010 │ +│ train_mean_nll  │ 2.393878  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 10  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000086  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33155  │ +│ progress  │ 0.135135  │ +│ skyrl.ai/grad_norm  │ 0.678250  │ +│ skyrl.ai/learning_rate │ 0.000086  │ +│ time_total  │ 76.477246 │ +│ train_mean_nll  │ 2.540268  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 11  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000085  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38406  │ +│ progress  │ 0.148649  │ +│ skyrl.ai/grad_norm  │ 0.630785  │ +│ skyrl.ai/learning_rate │ 0.000085  │ +│ time_total  │ 76.500259 │ +│ train_mean_nll  │ 2.475491  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 12  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000084  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38843  │ +│ progress  │ 0.162162  │ +│ skyrl.ai/grad_norm  │ 0.590551  │ +│ skyrl.ai/learning_rate │ 0.000084  │ +│ time_total  │ 76.506439 │ +│ train_mean_nll  │ 2.344424  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 13  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000082  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35583  │ +│ progress  │ 0.175676  │ +│ skyrl.ai/grad_norm  │ 0.546812  │ +│ skyrl.ai/learning_rate │ 0.000082  │ +│ time_total  │ 76.591751 │ +│ train_mean_nll  │ 2.381466  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 14  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000081  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 36717  │ +│ progress  │ 0.189189  │ +│ skyrl.ai/grad_norm  │ 0.475584  │ +│ skyrl.ai/learning_rate │ 0.000081  │ +│ time_total  │ 76.374654 │ +│ train_mean_nll  │ 2.290986  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 15  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000080  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34002  │ +│ progress  │ 0.202703  │ +│ skyrl.ai/grad_norm  │ 0.517601  │ +│ skyrl.ai/learning_rate │ 0.000080  │ +│ time_total  │ 76.461699 │ +│ train_mean_nll  │ 2.419916  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 16  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000078  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38620  │ +│ progress  │ 0.216216  │ +│ skyrl.ai/grad_norm  │ 0.359259  │ +│ skyrl.ai/learning_rate │ 0.000078  │ +│ time_total  │ 75.604731 │ +│ train_mean_nll  │ 2.389113  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 17  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000077  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37639  │ +│ progress  │ 0.229730  │ +│ skyrl.ai/grad_norm  │ 0.338165  │ +│ skyrl.ai/learning_rate │ 0.000077  │ +│ time_total  │ 42.506935 │ +│ train_mean_nll  │ 2.259868  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 18  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000076  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34118  │ +│ progress  │ 0.243243  │ +│ skyrl.ai/grad_norm  │ 0.416603  │ +│ skyrl.ai/learning_rate │ 0.000076  │ +│ time_total  │ 76.720900 │ +│ train_mean_nll  │ 2.409774  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 19  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000074  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37178  │ +│ progress  │ 0.256757  │ +│ skyrl.ai/grad_norm  │ 0.370585  │ +│ skyrl.ai/learning_rate │ 0.000074  │ +│ time_total  │ 76.489900 │ +│ train_mean_nll  │ 2.392253  │ +└────────────────────────┴───────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_973e0c8b/weights/000020'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 20  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩ +│ learning_rate  │ 0.000073  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35309  │ +│ progress  │ 0.270270  │ +│ skyrl.ai/grad_norm  │ 0.348055  │ +│ skyrl.ai/learning_rate │ 0.000073  │ +│ time_total  │ 116.662111 │ +│ train_mean_nll  │ 2.357068  │ +└────────────────────────┴────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 21  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000072  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37229  │ +│ progress  │ 0.283784  │ +│ skyrl.ai/grad_norm  │ 0.349533  │ +│ skyrl.ai/learning_rate │ 0.000072  │ +│ time_total  │ 76.505576 │ +│ train_mean_nll  │ 2.358591  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 22  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000070  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 36087  │ +│ progress  │ 0.297297  │ +│ skyrl.ai/grad_norm  │ 0.437823  │ +│ skyrl.ai/learning_rate │ 0.000070  │ +│ time_total  │ 77.491332 │ +│ train_mean_nll  │ 2.352475  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 23  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000069  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33348  │ +│ progress  │ 0.310811  │ +│ skyrl.ai/grad_norm  │ 0.675538  │ +│ skyrl.ai/learning_rate │ 0.000069  │ +│ time_total  │ 76.467921 │ +│ train_mean_nll  │ 2.315181  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 24  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000068  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35210  │ +│ progress  │ 0.324324  │ +│ skyrl.ai/grad_norm  │ 0.366941  │ +│ skyrl.ai/learning_rate │ 0.000068  │ +│ time_total  │ 75.492159 │ +│ train_mean_nll  │ 2.383331  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 25  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000066  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34833  │ +│ progress  │ 0.337838  │ +│ skyrl.ai/grad_norm  │ 0.393730  │ +│ skyrl.ai/learning_rate │ 0.000066  │ +│ time_total  │ 76.488087 │ +│ train_mean_nll  │ 2.446757  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 26  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000065  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37256  │ +│ progress  │ 0.351351  │ +│ skyrl.ai/grad_norm  │ 0.384646  │ +│ skyrl.ai/learning_rate │ 0.000065  │ +│ time_total  │ 76.638779 │ +│ train_mean_nll  │ 2.410129  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 27  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000064  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33947  │ +│ progress  │ 0.364865  │ +│ skyrl.ai/grad_norm  │ 0.334691  │ +│ skyrl.ai/learning_rate │ 0.000064  │ +│ time_total  │ 77.462294 │ +│ train_mean_nll  │ 2.350893  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 28  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000062  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 32694  │ +│ progress  │ 0.378378  │ +│ skyrl.ai/grad_norm  │ 0.336949  │ +│ skyrl.ai/learning_rate │ 0.000062  │ +│ time_total  │ 77.451585 │ +│ train_mean_nll  │ 2.434802  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 29  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000061  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37440  │ +│ progress  │ 0.391892  │ +│ skyrl.ai/grad_norm  │ 0.361362  │ +│ skyrl.ai/learning_rate │ 0.000061  │ +│ time_total  │ 77.477361 │ +│ train_mean_nll  │ 2.170828  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 30  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000059  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39453  │ +│ progress  │ 0.405405  │ +│ skyrl.ai/grad_norm  │ 0.326734  │ +│ skyrl.ai/learning_rate │ 0.000059  │ +│ time_total  │ 76.520573 │ +│ train_mean_nll  │ 2.284935  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 31  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000058  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 36143  │ +│ progress  │ 0.418919  │ +│ skyrl.ai/grad_norm  │ 0.329552  │ +│ skyrl.ai/learning_rate │ 0.000058  │ +│ time_total  │ 76.565412 │ +│ train_mean_nll  │ 2.280433  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 32  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000057  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34808  │ +│ progress  │ 0.432432  │ +│ skyrl.ai/grad_norm  │ 0.382980  │ +│ skyrl.ai/learning_rate │ 0.000057  │ +│ time_total  │ 76.469333 │ +│ train_mean_nll  │ 2.354665  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 33  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000055  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 40403  │ +│ progress  │ 0.445946  │ +│ skyrl.ai/grad_norm  │ 0.291227  │ +│ skyrl.ai/learning_rate │ 0.000055  │ +│ time_total  │ 77.651549 │ +│ train_mean_nll  │ 2.232926  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 34  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000054  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33581  │ +│ progress  │ 0.459459  │ +│ skyrl.ai/grad_norm  │ 0.329446  │ +│ skyrl.ai/learning_rate │ 0.000054  │ +│ time_total  │ 76.468443 │ +│ train_mean_nll  │ 2.358539  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 35  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000053  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35478  │ +│ progress  │ 0.472973  │ +│ skyrl.ai/grad_norm  │ 0.268784  │ +│ skyrl.ai/learning_rate │ 0.000053  │ +│ time_total  │ 76.480491 │ +│ train_mean_nll  │ 2.342692  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 36  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000051  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37613  │ +│ progress  │ 0.486486  │ +│ skyrl.ai/grad_norm  │ 0.436201  │ +│ skyrl.ai/learning_rate │ 0.000051  │ +│ time_total  │ 76.597426 │ +│ train_mean_nll  │ 2.253095  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 37  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000050  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37243  │ +│ progress  │ 0.500000  │ +│ skyrl.ai/grad_norm  │ 0.328390  │ +│ skyrl.ai/learning_rate │ 0.000050  │ +│ time_total  │ 40.495865 │ +│ train_mean_nll  │ 2.455515  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 38  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000049  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34551  │ +│ progress  │ 0.513514  │ +│ skyrl.ai/grad_norm  │ 0.355869  │ +│ skyrl.ai/learning_rate │ 0.000049  │ +│ time_total  │ 76.466278 │ +│ train_mean_nll  │ 2.281497  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 39  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000047  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34720  │ +│ progress  │ 0.527027  │ +│ skyrl.ai/grad_norm  │ 0.324688  │ +│ skyrl.ai/learning_rate │ 0.000047  │ +│ time_total  │ 76.547050 │ +│ train_mean_nll  │ 2.224408  │ +└────────────────────────┴───────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_973e0c8b/weights/000040'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 40  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩ +│ learning_rate  │ 0.000046  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37150  │ +│ progress  │ 0.540541  │ +│ skyrl.ai/grad_norm  │ 0.290177  │ +│ skyrl.ai/learning_rate │ 0.000046  │ +│ time_total  │ 116.764540 │ +│ train_mean_nll  │ 2.320617  │ +└────────────────────────┴────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 41  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000045  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39832  │ +│ progress  │ 0.554054  │ +│ skyrl.ai/grad_norm  │ 0.329142  │ +│ skyrl.ai/learning_rate │ 0.000045  │ +│ time_total  │ 75.531128 │ +│ train_mean_nll  │ 2.251125  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 42  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000043  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 41346  │ +│ progress  │ 0.567568  │ +│ skyrl.ai/grad_norm  │ 0.342816  │ +│ skyrl.ai/learning_rate │ 0.000043  │ +│ time_total  │ 77.515983 │ +│ train_mean_nll  │ 2.186094  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_b.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 43  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000042  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35728  │ +│ progress  │ 0.581081  │ +│ skyrl.ai/grad_norm  │ 0.308010  │ +│ skyrl.ai/learning_rate │ 0.000042  │ +│ time_total  │ 76.476230 │ +│ train_mean_nll  │ 2.389423  │ +└────────────────────────┴───────────┘ diff --git a/tests/tinker/smoke_logs/sl_loop_b.log/metrics.jsonl b/tests/tinker/smoke_logs/sl_loop_b.log/metrics.jsonl new file mode 100644 index 0000000000..8e0b9aa553 --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_b.log/metrics.jsonl @@ -0,0 +1,44 @@ +{"step": 0, "skyrl.ai/grad_norm": 1.4195386534746055, "skyrl.ai/learning_rate": 0.0001, "num_sequences": 128, "num_tokens": 37834, "learning_rate": 0.0001, "train_mean_nll": 2.9373137950897217, "progress": 0.0, "time_total": 77.51847863197327} +{"step": 1, "skyrl.ai/grad_norm": 1.7873204841370076, "skyrl.ai/learning_rate": 9.864864864864865e-05, "num_sequences": 128, "num_tokens": 35654, "learning_rate": 9.864864864864865e-05, "train_mean_nll": 3.1715967655181885, "progress": 0.013513513513513514, "time_total": 77.48969888687134} +{"step": 2, "skyrl.ai/grad_norm": 1.3872389986086984, "skyrl.ai/learning_rate": 9.729729729729731e-05, "num_sequences": 128, "num_tokens": 38474, "learning_rate": 9.729729729729731e-05, "train_mean_nll": 2.826397180557251, "progress": 0.02702702702702703, "time_total": 77.49515557289124} +{"step": 3, "skyrl.ai/grad_norm": 1.347173808998758, "skyrl.ai/learning_rate": 9.594594594594595e-05, "num_sequences": 128, "num_tokens": 39781, "learning_rate": 9.594594594594595e-05, "train_mean_nll": 2.7342612743377686, "progress": 0.04054054054054054, "time_total": 76.50497913360596} +{"step": 4, "skyrl.ai/grad_norm": 1.0569022766058442, "skyrl.ai/learning_rate": 9.45945945945946e-05, "num_sequences": 128, "num_tokens": 37346, "learning_rate": 9.45945945945946e-05, "train_mean_nll": 2.7135438919067383, "progress": 0.05405405405405406, "time_total": 76.50081324577332} +{"step": 5, "skyrl.ai/grad_norm": 1.0375181495273973, "skyrl.ai/learning_rate": 9.324324324324324e-05, "num_sequences": 128, "num_tokens": 38710, "learning_rate": 9.324324324324324e-05, "train_mean_nll": 2.544316530227661, "progress": 0.06756756756756757, "time_total": 76.51027178764343} +{"step": 6, "skyrl.ai/grad_norm": 0.7603481383776933, "skyrl.ai/learning_rate": 9.189189189189189e-05, "num_sequences": 128, "num_tokens": 33356, "learning_rate": 9.189189189189189e-05, "train_mean_nll": 2.698561191558838, "progress": 0.08108108108108109, "time_total": 76.46849489212036} +{"step": 7, "skyrl.ai/grad_norm": 0.6214665187953508, "skyrl.ai/learning_rate": 9.054054054054055e-05, "num_sequences": 128, "num_tokens": 39115, "learning_rate": 9.054054054054055e-05, "train_mean_nll": 2.502498149871826, "progress": 0.0945945945945946, "time_total": 76.52571058273315} +{"step": 8, "skyrl.ai/grad_norm": 0.5979632168990555, "skyrl.ai/learning_rate": 8.918918918918919e-05, "num_sequences": 128, "num_tokens": 38640, "learning_rate": 8.918918918918919e-05, "train_mean_nll": 2.4642891883850098, "progress": 0.10810810810810811, "time_total": 76.50697350502014} +{"step": 9, "skyrl.ai/grad_norm": 0.5157547700747354, "skyrl.ai/learning_rate": 8.783783783783784e-05, "num_sequences": 128, "num_tokens": 38863, "learning_rate": 8.783783783783784e-05, "train_mean_nll": 2.393878221511841, "progress": 0.12162162162162163, "time_total": 76.510009765625} +{"step": 10, "skyrl.ai/grad_norm": 0.6782504928072451, "skyrl.ai/learning_rate": 8.64864864864865e-05, "num_sequences": 128, "num_tokens": 33155, "learning_rate": 8.64864864864865e-05, "train_mean_nll": 2.5402684211730957, "progress": 0.13513513513513514, "time_total": 76.4772458076477} +{"step": 11, "skyrl.ai/grad_norm": 0.630784871807005, "skyrl.ai/learning_rate": 8.513513513513514e-05, "num_sequences": 128, "num_tokens": 38406, "learning_rate": 8.513513513513514e-05, "train_mean_nll": 2.4754910469055176, "progress": 0.14864864864864866, "time_total": 76.50025868415833} +{"step": 12, "skyrl.ai/grad_norm": 0.5905507883748928, "skyrl.ai/learning_rate": 8.378378378378379e-05, "num_sequences": 128, "num_tokens": 38843, "learning_rate": 8.378378378378379e-05, "train_mean_nll": 2.34442400932312, "progress": 0.16216216216216217, "time_total": 76.50643873214722} +{"step": 13, "skyrl.ai/grad_norm": 0.546812244629307, "skyrl.ai/learning_rate": 8.243243243243243e-05, "num_sequences": 128, "num_tokens": 35583, "learning_rate": 8.243243243243243e-05, "train_mean_nll": 2.3814659118652344, "progress": 0.17567567567567569, "time_total": 76.59175086021423} +{"step": 14, "skyrl.ai/grad_norm": 0.47558408889567894, "skyrl.ai/learning_rate": 8.108108108108108e-05, "num_sequences": 128, "num_tokens": 36717, "learning_rate": 8.108108108108108e-05, "train_mean_nll": 2.2909858226776123, "progress": 0.1891891891891892, "time_total": 76.3746542930603} +{"step": 15, "skyrl.ai/grad_norm": 0.5176009550997458, "skyrl.ai/learning_rate": 7.972972972972974e-05, "num_sequences": 128, "num_tokens": 34002, "learning_rate": 7.972972972972974e-05, "train_mean_nll": 2.4199159145355225, "progress": 0.20270270270270271, "time_total": 76.46169948577881} +{"step": 16, "skyrl.ai/grad_norm": 0.3592592550557549, "skyrl.ai/learning_rate": 7.837837837837838e-05, "num_sequences": 128, "num_tokens": 38620, "learning_rate": 7.837837837837838e-05, "train_mean_nll": 2.389112949371338, "progress": 0.21621621621621623, "time_total": 75.6047306060791} +{"step": 17, "skyrl.ai/grad_norm": 0.33816451069248143, "skyrl.ai/learning_rate": 7.702702702702703e-05, "num_sequences": 128, "num_tokens": 37639, "learning_rate": 7.702702702702703e-05, "train_mean_nll": 2.2598676681518555, "progress": 0.22972972972972974, "time_total": 42.50693464279175} +{"step": 18, "skyrl.ai/grad_norm": 0.41660331800817935, "skyrl.ai/learning_rate": 7.567567567567568e-05, "num_sequences": 128, "num_tokens": 34118, "learning_rate": 7.567567567567568e-05, "train_mean_nll": 2.409773588180542, "progress": 0.24324324324324326, "time_total": 76.72089982032776} +{"step": 19, "skyrl.ai/grad_norm": 0.37058514324435654, "skyrl.ai/learning_rate": 7.432432432432433e-05, "num_sequences": 128, "num_tokens": 37178, "learning_rate": 7.432432432432433e-05, "train_mean_nll": 2.3922526836395264, "progress": 0.25675675675675674, "time_total": 76.48990035057068} +{"step": 20, "skyrl.ai/grad_norm": 0.3480550321913909, "skyrl.ai/learning_rate": 7.297297297297297e-05, "num_sequences": 128, "num_tokens": 35309, "learning_rate": 7.297297297297297e-05, "train_mean_nll": 2.357067823410034, "progress": 0.2702702702702703, "time_total": 116.66211080551147} +{"step": 21, "skyrl.ai/grad_norm": 0.34953308338610417, "skyrl.ai/learning_rate": 7.162162162162162e-05, "num_sequences": 128, "num_tokens": 37229, "learning_rate": 7.162162162162162e-05, "train_mean_nll": 2.358590841293335, "progress": 0.28378378378378377, "time_total": 76.50557613372803} +{"step": 22, "skyrl.ai/grad_norm": 0.4378227848312563, "skyrl.ai/learning_rate": 7.027027027027026e-05, "num_sequences": 128, "num_tokens": 36087, "learning_rate": 7.027027027027026e-05, "train_mean_nll": 2.352475166320801, "progress": 0.2972972972972973, "time_total": 77.49133205413818} +{"step": 23, "skyrl.ai/grad_norm": 0.6755377711219428, "skyrl.ai/learning_rate": 6.891891891891892e-05, "num_sequences": 128, "num_tokens": 33348, "learning_rate": 6.891891891891892e-05, "train_mean_nll": 2.3151814937591553, "progress": 0.3108108108108108, "time_total": 76.4679205417633} +{"step": 24, "skyrl.ai/grad_norm": 0.36694108527031516, "skyrl.ai/learning_rate": 6.756756756756757e-05, "num_sequences": 128, "num_tokens": 35210, "learning_rate": 6.756756756756757e-05, "train_mean_nll": 2.383330821990967, "progress": 0.32432432432432434, "time_total": 75.49215888977051} +{"step": 25, "skyrl.ai/grad_norm": 0.3937296892407543, "skyrl.ai/learning_rate": 6.621621621621621e-05, "num_sequences": 128, "num_tokens": 34833, "learning_rate": 6.621621621621621e-05, "train_mean_nll": 2.4467570781707764, "progress": 0.33783783783783783, "time_total": 76.48808670043945} +{"step": 26, "skyrl.ai/grad_norm": 0.3846464408202079, "skyrl.ai/learning_rate": 6.486486486486487e-05, "num_sequences": 128, "num_tokens": 37256, "learning_rate": 6.486486486486487e-05, "train_mean_nll": 2.4101293087005615, "progress": 0.35135135135135137, "time_total": 76.63877940177917} +{"step": 27, "skyrl.ai/grad_norm": 0.33469089711136596, "skyrl.ai/learning_rate": 6.351351351351352e-05, "num_sequences": 128, "num_tokens": 33947, "learning_rate": 6.351351351351352e-05, "train_mean_nll": 2.350893020629883, "progress": 0.36486486486486486, "time_total": 77.46229386329651} +{"step": 28, "skyrl.ai/grad_norm": 0.336948956880145, "skyrl.ai/learning_rate": 6.216216216216216e-05, "num_sequences": 128, "num_tokens": 32694, "learning_rate": 6.216216216216216e-05, "train_mean_nll": 2.4348015785217285, "progress": 0.3783783783783784, "time_total": 77.451584815979} +{"step": 29, "skyrl.ai/grad_norm": 0.36136163092248136, "skyrl.ai/learning_rate": 6.0810810810810814e-05, "num_sequences": 128, "num_tokens": 37440, "learning_rate": 6.0810810810810814e-05, "train_mean_nll": 2.170828342437744, "progress": 0.3918918918918919, "time_total": 77.47736096382141} +{"step": 30, "skyrl.ai/grad_norm": 0.32673394434664593, "skyrl.ai/learning_rate": 5.945945945945945e-05, "num_sequences": 128, "num_tokens": 39453, "learning_rate": 5.945945945945945e-05, "train_mean_nll": 2.284935235977173, "progress": 0.40540540540540543, "time_total": 76.52057337760925} +{"step": 31, "skyrl.ai/grad_norm": 0.32955249716070606, "skyrl.ai/learning_rate": 5.810810810810812e-05, "num_sequences": 128, "num_tokens": 36143, "learning_rate": 5.810810810810812e-05, "train_mean_nll": 2.280432939529419, "progress": 0.4189189189189189, "time_total": 76.56541204452515} +{"step": 32, "skyrl.ai/grad_norm": 0.38298011535688614, "skyrl.ai/learning_rate": 5.6756756756756757e-05, "num_sequences": 128, "num_tokens": 34808, "learning_rate": 5.6756756756756757e-05, "train_mean_nll": 2.3546645641326904, "progress": 0.43243243243243246, "time_total": 76.4693329334259} +{"step": 33, "skyrl.ai/grad_norm": 0.29122662415652056, "skyrl.ai/learning_rate": 5.540540540540541e-05, "num_sequences": 128, "num_tokens": 40403, "learning_rate": 5.540540540540541e-05, "train_mean_nll": 2.2329256534576416, "progress": 0.44594594594594594, "time_total": 77.65154910087585} +{"step": 34, "skyrl.ai/grad_norm": 0.32944563359840584, "skyrl.ai/learning_rate": 5.405405405405406e-05, "num_sequences": 128, "num_tokens": 33581, "learning_rate": 5.405405405405406e-05, "train_mean_nll": 2.35853910446167, "progress": 0.4594594594594595, "time_total": 76.46844339370728} +{"step": 35, "skyrl.ai/grad_norm": 0.26878365206169, "skyrl.ai/learning_rate": 5.27027027027027e-05, "num_sequences": 128, "num_tokens": 35478, "learning_rate": 5.27027027027027e-05, "train_mean_nll": 2.342691659927368, "progress": 0.47297297297297297, "time_total": 76.48049068450928} +{"step": 36, "skyrl.ai/grad_norm": 0.4362009722094959, "skyrl.ai/learning_rate": 5.135135135135135e-05, "num_sequences": 128, "num_tokens": 37613, "learning_rate": 5.135135135135135e-05, "train_mean_nll": 2.2530946731567383, "progress": 0.4864864864864865, "time_total": 76.59742593765259} +{"step": 37, "skyrl.ai/grad_norm": 0.3283900938474904, "skyrl.ai/learning_rate": 5e-05, "num_sequences": 128, "num_tokens": 37243, "learning_rate": 5e-05, "train_mean_nll": 2.455514669418335, "progress": 0.5, "time_total": 40.49586510658264} +{"step": 38, "skyrl.ai/grad_norm": 0.3558688165645349, "skyrl.ai/learning_rate": 4.8648648648648654e-05, "num_sequences": 128, "num_tokens": 34551, "learning_rate": 4.8648648648648654e-05, "train_mean_nll": 2.2814974784851074, "progress": 0.5135135135135135, "time_total": 76.46627807617188} +{"step": 39, "skyrl.ai/grad_norm": 0.32468846777146654, "skyrl.ai/learning_rate": 4.7297297297297306e-05, "num_sequences": 128, "num_tokens": 34720, "learning_rate": 4.7297297297297306e-05, "train_mean_nll": 2.224407911300659, "progress": 0.527027027027027, "time_total": 76.54704976081848} +{"step": 40, "skyrl.ai/grad_norm": 0.2901767827597186, "skyrl.ai/learning_rate": 4.5945945945945944e-05, "num_sequences": 128, "num_tokens": 37150, "learning_rate": 4.5945945945945944e-05, "train_mean_nll": 2.3206169605255127, "progress": 0.5405405405405406, "time_total": 116.76453971862793} +{"step": 41, "skyrl.ai/grad_norm": 0.3291420954072253, "skyrl.ai/learning_rate": 4.4594594594594596e-05, "num_sequences": 128, "num_tokens": 39832, "learning_rate": 4.4594594594594596e-05, "train_mean_nll": 2.251124858856201, "progress": 0.5540540540540541, "time_total": 75.5311279296875} +{"step": 42, "skyrl.ai/grad_norm": 0.34281551613606653, "skyrl.ai/learning_rate": 4.324324324324325e-05, "num_sequences": 128, "num_tokens": 41346, "learning_rate": 4.324324324324325e-05, "train_mean_nll": 2.18609356880188, "progress": 0.5675675675675675, "time_total": 77.51598334312439} +{"step": 43, "skyrl.ai/grad_norm": 0.30801024983275643, "skyrl.ai/learning_rate": 4.18918918918919e-05, "num_sequences": 128, "num_tokens": 35728, "learning_rate": 4.18918918918919e-05, "train_mean_nll": 2.389422655105591, "progress": 0.581081081081081, "time_total": 76.47622966766357} diff --git a/tests/tinker/smoke_logs/sl_loop_c.log/checkpoints.jsonl b/tests/tinker/smoke_logs/sl_loop_c.log/checkpoints.jsonl new file mode 100644 index 0000000000..4192b1f4ce --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_c.log/checkpoints.jsonl @@ -0,0 +1,2 @@ +{"name": "000020", "batch": 20, "state_path": "tinker://model_3cb4546d/weights/000020"} +{"name": "000040", "batch": 40, "state_path": "tinker://model_3cb4546d/weights/000040"} diff --git a/tests/tinker/smoke_logs/sl_loop_c.log/code.diff b/tests/tinker/smoke_logs/sl_loop_c.log/code.diff new file mode 100644 index 0000000000..d8e559755f --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_c.log/code.diff @@ -0,0 +1,3 @@ +### repo: /home/etang/tinker-cookbook @ dacb835b31fb3f7a012851a1bc64c950d0495de3 +modules: tinker_cookbook +(no local changes) diff --git a/tests/tinker/smoke_logs/sl_loop_c.log/config.json b/tests/tinker/smoke_logs/sl_loop_c.log/config.json new file mode 100644 index 0000000000..495b342f3a --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_c.log/config.json @@ -0,0 +1,12 @@ +{ + "base_url": "http://localhost:8000", + "log_path": "/tmp/sl_loop_c.log", + "model_name": "Qwen/Qwen3-0.6B", + "batch_size": 128, + "learning_rate": 0.0001, + "max_length": 32768, + "train_on_what": "last_assistant_message", + "lora_rank": 32, + "save_every": 20, + "ttl_seconds": 604800 +} \ No newline at end of file diff --git a/tests/tinker/smoke_logs/sl_loop_c.log/logs.log b/tests/tinker/smoke_logs/sl_loop_c.log/logs.log new file mode 100644 index 0000000000..44c742f0a5 --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_c.log/logs.log @@ -0,0 +1,674 @@ +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/sl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B train_on_what=LAST_ASSISTANT_MESSAGE lora_rank=32 log_path=/tmp/sl_loop_c.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/sl_loop_c.log +__main__:51 [INFO] Using renderer: qwen3 +__main__:54 [INFO] Loading dataset... +__main__:64 [INFO] Dropping last 28 examples to keep batch size uniform at 128 +__main__:67 [INFO] Train batches: 74 +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_6fdc40ca +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/sl_loop_c.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/sl_loop_c.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_3cb4546d +__main__:87 [INFO] Training for 74 steps +tinker_cookbook.supervised.common:188 [INFO] Weight reduction: 'mean' (token-mean loss) +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 0  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000100  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37834  │ +│ progress  │ 0.000000  │ +│ skyrl.ai/grad_norm  │ 1.419494  │ +│ skyrl.ai/learning_rate │ 0.000100  │ +│ time_total  │ 87.546265 │ +│ train_mean_nll  │ 2.937314  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 1  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000099  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35654  │ +│ progress  │ 0.013514  │ +│ skyrl.ai/grad_norm  │ 1.749522  │ +│ skyrl.ai/learning_rate │ 0.000099  │ +│ time_total  │ 40.494420 │ +│ train_mean_nll  │ 3.141417  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 2  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000097  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38474  │ +│ progress  │ 0.027027  │ +│ skyrl.ai/grad_norm  │ 1.374225  │ +│ skyrl.ai/learning_rate │ 0.000097  │ +│ time_total  │ 77.749164 │ +│ train_mean_nll  │ 2.757701  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 3  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000096  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39781  │ +│ progress  │ 0.040541  │ +│ skyrl.ai/grad_norm  │ 1.003446  │ +│ skyrl.ai/learning_rate │ 0.000096  │ +│ time_total  │ 77.538416 │ +│ train_mean_nll  │ 2.669955  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 4  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000095  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37346  │ +│ progress  │ 0.054054  │ +│ skyrl.ai/grad_norm  │ 1.014955  │ +│ skyrl.ai/learning_rate │ 0.000095  │ +│ time_total  │ 77.584343 │ +│ train_mean_nll  │ 2.654062  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 5  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000093  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38710  │ +│ progress  │ 0.067568  │ +│ skyrl.ai/grad_norm  │ 0.996195  │ +│ skyrl.ai/learning_rate │ 0.000093  │ +│ time_total  │ 76.493607 │ +│ train_mean_nll  │ 2.506083  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 6  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000092  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33356  │ +│ progress  │ 0.081081  │ +│ skyrl.ai/grad_norm  │ 0.641986  │ +│ skyrl.ai/learning_rate │ 0.000092  │ +│ time_total  │ 76.474288 │ +│ train_mean_nll  │ 2.669612  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 7  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000091  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39115  │ +│ progress  │ 0.094595  │ +│ skyrl.ai/grad_norm  │ 0.563422  │ +│ skyrl.ai/learning_rate │ 0.000091  │ +│ time_total  │ 76.499918 │ +│ train_mean_nll  │ 2.484150  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 8  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000089  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38640  │ +│ progress  │ 0.108108  │ +│ skyrl.ai/grad_norm  │ 0.586878  │ +│ skyrl.ai/learning_rate │ 0.000089  │ +│ time_total  │ 76.500674 │ +│ train_mean_nll  │ 2.451504  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 9  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000088  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38863  │ +│ progress  │ 0.121622  │ +│ skyrl.ai/grad_norm  │ 0.488777  │ +│ skyrl.ai/learning_rate │ 0.000088  │ +│ time_total  │ 76.483361 │ +│ train_mean_nll  │ 2.384386  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 10  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000086  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33155  │ +│ progress  │ 0.135135  │ +│ skyrl.ai/grad_norm  │ 0.620995  │ +│ skyrl.ai/learning_rate │ 0.000086  │ +│ time_total  │ 76.466245 │ +│ train_mean_nll  │ 2.525290  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 11  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000085  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38406  │ +│ progress  │ 0.148649  │ +│ skyrl.ai/grad_norm  │ 0.633048  │ +│ skyrl.ai/learning_rate │ 0.000085  │ +│ time_total  │ 76.584131 │ +│ train_mean_nll  │ 2.464255  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 12  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000084  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38843  │ +│ progress  │ 0.162162  │ +│ skyrl.ai/grad_norm  │ 0.579784  │ +│ skyrl.ai/learning_rate │ 0.000084  │ +│ time_total  │ 77.508669 │ +│ train_mean_nll  │ 2.335011  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 13  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000082  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35583  │ +│ progress  │ 0.175676  │ +│ skyrl.ai/grad_norm  │ 0.543443  │ +│ skyrl.ai/learning_rate │ 0.000082  │ +│ time_total  │ 77.479943 │ +│ train_mean_nll  │ 2.372522  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 14  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000081  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 36717  │ +│ progress  │ 0.189189  │ +│ skyrl.ai/grad_norm  │ 0.451880  │ +│ skyrl.ai/learning_rate │ 0.000081  │ +│ time_total  │ 77.489390 │ +│ train_mean_nll  │ 2.286112  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 15  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000080  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34002  │ +│ progress  │ 0.202703  │ +│ skyrl.ai/grad_norm  │ 0.472303  │ +│ skyrl.ai/learning_rate │ 0.000080  │ +│ time_total  │ 77.467145 │ +│ train_mean_nll  │ 2.414178  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 16  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000078  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38620  │ +│ progress  │ 0.216216  │ +│ skyrl.ai/grad_norm  │ 0.346693  │ +│ skyrl.ai/learning_rate │ 0.000078  │ +│ time_total  │ 77.586419 │ +│ train_mean_nll  │ 2.385986  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 17  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000077  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37639  │ +│ progress  │ 0.229730  │ +│ skyrl.ai/grad_norm  │ 0.330259  │ +│ skyrl.ai/learning_rate │ 0.000077  │ +│ time_total  │ 75.488438 │ +│ train_mean_nll  │ 2.257043  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 18  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000076  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34118  │ +│ progress  │ 0.243243  │ +│ skyrl.ai/grad_norm  │ 0.389382  │ +│ skyrl.ai/learning_rate │ 0.000076  │ +│ time_total  │ 77.459734 │ +│ train_mean_nll  │ 2.406650  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 19  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000074  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37178  │ +│ progress  │ 0.256757  │ +│ skyrl.ai/grad_norm  │ 0.367051  │ +│ skyrl.ai/learning_rate │ 0.000074  │ +│ time_total  │ 76.488872 │ +│ train_mean_nll  │ 2.391297  │ +└────────────────────────┴───────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_3cb4546d/weights/000020'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 20  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩ +│ learning_rate  │ 0.000073  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35309  │ +│ progress  │ 0.270270  │ +│ skyrl.ai/grad_norm  │ 0.345469  │ +│ skyrl.ai/learning_rate │ 0.000073  │ +│ time_total  │ 118.814218 │ +│ train_mean_nll  │ 2.355332  │ +└────────────────────────┴────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 21  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000072  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37229  │ +│ progress  │ 0.283784  │ +│ skyrl.ai/grad_norm  │ 0.347097  │ +│ skyrl.ai/learning_rate │ 0.000072  │ +│ time_total  │ 40.484439 │ +│ train_mean_nll  │ 2.357981  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 22  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000070  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 36087  │ +│ progress  │ 0.297297  │ +│ skyrl.ai/grad_norm  │ 0.436165  │ +│ skyrl.ai/learning_rate │ 0.000070  │ +│ time_total  │ 76.482713 │ +│ train_mean_nll  │ 2.351512  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 23  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000069  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33348  │ +│ progress  │ 0.310811  │ +│ skyrl.ai/grad_norm  │ 0.679087  │ +│ skyrl.ai/learning_rate │ 0.000069  │ +│ time_total  │ 75.449901 │ +│ train_mean_nll  │ 2.315460  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 24  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000068  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35210  │ +│ progress  │ 0.324324  │ +│ skyrl.ai/grad_norm  │ 0.366201  │ +│ skyrl.ai/learning_rate │ 0.000068  │ +│ time_total  │ 76.476222 │ +│ train_mean_nll  │ 2.382999  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 25  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000066  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34833  │ +│ progress  │ 0.337838  │ +│ skyrl.ai/grad_norm  │ 0.390126  │ +│ skyrl.ai/learning_rate │ 0.000066  │ +│ time_total  │ 76.471672 │ +│ train_mean_nll  │ 2.446065  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 26  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000065  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37256  │ +│ progress  │ 0.351351  │ +│ skyrl.ai/grad_norm  │ 0.381447  │ +│ skyrl.ai/learning_rate │ 0.000065  │ +│ time_total  │ 76.560245 │ +│ train_mean_nll  │ 2.409359  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 27  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000064  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33947  │ +│ progress  │ 0.364865  │ +│ skyrl.ai/grad_norm  │ 0.330323  │ +│ skyrl.ai/learning_rate │ 0.000064  │ +│ time_total  │ 76.468834 │ +│ train_mean_nll  │ 2.350033  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 28  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000062  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 32694  │ +│ progress  │ 0.378378  │ +│ skyrl.ai/grad_norm  │ 0.332719  │ +│ skyrl.ai/learning_rate │ 0.000062  │ +│ time_total  │ 76.454501 │ +│ train_mean_nll  │ 2.433611  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 29  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000061  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37440  │ +│ progress  │ 0.391892  │ +│ skyrl.ai/grad_norm  │ 0.358639  │ +│ skyrl.ai/learning_rate │ 0.000061  │ +│ time_total  │ 76.492569 │ +│ train_mean_nll  │ 2.170107  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 30  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000059  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39453  │ +│ progress  │ 0.405405  │ +│ skyrl.ai/grad_norm  │ 0.326137  │ +│ skyrl.ai/learning_rate │ 0.000059  │ +│ time_total  │ 76.509510 │ +│ train_mean_nll  │ 2.282948  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 31  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000058  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 36143  │ +│ progress  │ 0.418919  │ +│ skyrl.ai/grad_norm  │ 0.325924  │ +│ skyrl.ai/learning_rate │ 0.000058  │ +│ time_total  │ 75.544014 │ +│ train_mean_nll  │ 2.279570  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 32  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000057  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34808  │ +│ progress  │ 0.432432  │ +│ skyrl.ai/grad_norm  │ 0.386430  │ +│ skyrl.ai/learning_rate │ 0.000057  │ +│ time_total  │ 77.480128 │ +│ train_mean_nll  │ 2.355539  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 33  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000055  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 40403  │ +│ progress  │ 0.445946  │ +│ skyrl.ai/grad_norm  │ 0.293365  │ +│ skyrl.ai/learning_rate │ 0.000055  │ +│ time_total  │ 76.508817 │ +│ train_mean_nll  │ 2.232615  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 34  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000054  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33581  │ +│ progress  │ 0.459459  │ +│ skyrl.ai/grad_norm  │ 0.344365  │ +│ skyrl.ai/learning_rate │ 0.000054  │ +│ time_total  │ 76.445399 │ +│ train_mean_nll  │ 2.357985  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 35  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000053  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35478  │ +│ progress  │ 0.472973  │ +│ skyrl.ai/grad_norm  │ 0.271450  │ +│ skyrl.ai/learning_rate │ 0.000053  │ +│ time_total  │ 75.625342 │ +│ train_mean_nll  │ 2.342813  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 36  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000051  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37613  │ +│ progress  │ 0.486486  │ +│ skyrl.ai/grad_norm  │ 0.436700  │ +│ skyrl.ai/learning_rate │ 0.000051  │ +│ time_total  │ 76.592943 │ +│ train_mean_nll  │ 2.252346  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 37  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000050  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37243  │ +│ progress  │ 0.500000  │ +│ skyrl.ai/grad_norm  │ 0.323914  │ +│ skyrl.ai/learning_rate │ 0.000050  │ +│ time_total  │ 75.485813 │ +│ train_mean_nll  │ 2.455329  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 38  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000049  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34551  │ +│ progress  │ 0.513514  │ +│ skyrl.ai/grad_norm  │ 0.353730  │ +│ skyrl.ai/learning_rate │ 0.000049  │ +│ time_total  │ 76.462132 │ +│ train_mean_nll  │ 2.281997  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 39  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000047  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34720  │ +│ progress  │ 0.527027  │ +│ skyrl.ai/grad_norm  │ 0.325868  │ +│ skyrl.ai/learning_rate │ 0.000047  │ +│ time_total  │ 76.471973 │ +│ train_mean_nll  │ 2.224411  │ +└────────────────────────┴───────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_3cb4546d/weights/000040'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 40  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩ +│ learning_rate  │ 0.000046  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37150  │ +│ progress  │ 0.540541  │ +│ skyrl.ai/grad_norm  │ 0.291081  │ +│ skyrl.ai/learning_rate │ 0.000046  │ +│ time_total  │ 115.698803 │ +│ train_mean_nll  │ 2.320339  │ +└────────────────────────┴────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 41  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000045  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39832  │ +│ progress  │ 0.554054  │ +│ skyrl.ai/grad_norm  │ 0.330219  │ +│ skyrl.ai/learning_rate │ 0.000045  │ +│ time_total  │ 40.640421 │ +│ train_mean_nll  │ 2.251844  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 42  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000043  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 41346  │ +│ progress  │ 0.567568  │ +│ skyrl.ai/grad_norm  │ 0.348969  │ +│ skyrl.ai/learning_rate │ 0.000043  │ +│ time_total  │ 76.516095 │ +│ train_mean_nll  │ 2.186341  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_c.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 43  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000042  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35728  │ +│ progress  │ 0.581081  │ +│ skyrl.ai/grad_norm  │ 0.307291  │ +│ skyrl.ai/learning_rate │ 0.000042  │ +│ time_total  │ 76.478968 │ +│ train_mean_nll  │ 2.388552  │ +└────────────────────────┴───────────┘ diff --git a/tests/tinker/smoke_logs/sl_loop_c.log/metrics.jsonl b/tests/tinker/smoke_logs/sl_loop_c.log/metrics.jsonl new file mode 100644 index 0000000000..16149b840f --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_c.log/metrics.jsonl @@ -0,0 +1,44 @@ +{"step": 0, "skyrl.ai/grad_norm": 1.4194938927680651, "skyrl.ai/learning_rate": 0.0001, "num_sequences": 128, "num_tokens": 37834, "learning_rate": 0.0001, "train_mean_nll": 2.9373137950897217, "progress": 0.0, "time_total": 87.54626536369324} +{"step": 1, "skyrl.ai/grad_norm": 1.7495224164784222, "skyrl.ai/learning_rate": 9.864864864864865e-05, "num_sequences": 128, "num_tokens": 35654, "learning_rate": 9.864864864864865e-05, "train_mean_nll": 3.141416549682617, "progress": 0.013513513513513514, "time_total": 40.49442005157471} +{"step": 2, "skyrl.ai/grad_norm": 1.3742247910445407, "skyrl.ai/learning_rate": 9.729729729729731e-05, "num_sequences": 128, "num_tokens": 38474, "learning_rate": 9.729729729729731e-05, "train_mean_nll": 2.7577013969421387, "progress": 0.02702702702702703, "time_total": 77.74916410446167} +{"step": 3, "skyrl.ai/grad_norm": 1.0034464811186297, "skyrl.ai/learning_rate": 9.594594594594595e-05, "num_sequences": 128, "num_tokens": 39781, "learning_rate": 9.594594594594595e-05, "train_mean_nll": 2.669954776763916, "progress": 0.04054054054054054, "time_total": 77.53841614723206} +{"step": 4, "skyrl.ai/grad_norm": 1.014955270467524, "skyrl.ai/learning_rate": 9.45945945945946e-05, "num_sequences": 128, "num_tokens": 37346, "learning_rate": 9.45945945945946e-05, "train_mean_nll": 2.654061794281006, "progress": 0.05405405405405406, "time_total": 77.58434295654297} +{"step": 5, "skyrl.ai/grad_norm": 0.9961948418275205, "skyrl.ai/learning_rate": 9.324324324324324e-05, "num_sequences": 128, "num_tokens": 38710, "learning_rate": 9.324324324324324e-05, "train_mean_nll": 2.5060834884643555, "progress": 0.06756756756756757, "time_total": 76.49360680580139} +{"step": 6, "skyrl.ai/grad_norm": 0.6419861220214568, "skyrl.ai/learning_rate": 9.189189189189189e-05, "num_sequences": 128, "num_tokens": 33356, "learning_rate": 9.189189189189189e-05, "train_mean_nll": 2.669612407684326, "progress": 0.08108108108108109, "time_total": 76.47428822517395} +{"step": 7, "skyrl.ai/grad_norm": 0.5634222153184144, "skyrl.ai/learning_rate": 9.054054054054055e-05, "num_sequences": 128, "num_tokens": 39115, "learning_rate": 9.054054054054055e-05, "train_mean_nll": 2.484149694442749, "progress": 0.0945945945945946, "time_total": 76.49991822242737} +{"step": 8, "skyrl.ai/grad_norm": 0.5868782375274831, "skyrl.ai/learning_rate": 8.918918918918919e-05, "num_sequences": 128, "num_tokens": 38640, "learning_rate": 8.918918918918919e-05, "train_mean_nll": 2.4515039920806885, "progress": 0.10810810810810811, "time_total": 76.50067448616028} +{"step": 9, "skyrl.ai/grad_norm": 0.48877668042056244, "skyrl.ai/learning_rate": 8.783783783783784e-05, "num_sequences": 128, "num_tokens": 38863, "learning_rate": 8.783783783783784e-05, "train_mean_nll": 2.384385585784912, "progress": 0.12162162162162163, "time_total": 76.48336100578308} +{"step": 10, "skyrl.ai/grad_norm": 0.6209951118501068, "skyrl.ai/learning_rate": 8.64864864864865e-05, "num_sequences": 128, "num_tokens": 33155, "learning_rate": 8.64864864864865e-05, "train_mean_nll": 2.525290012359619, "progress": 0.13513513513513514, "time_total": 76.4662446975708} +{"step": 11, "skyrl.ai/grad_norm": 0.6330475311859658, "skyrl.ai/learning_rate": 8.513513513513514e-05, "num_sequences": 128, "num_tokens": 38406, "learning_rate": 8.513513513513514e-05, "train_mean_nll": 2.46425461769104, "progress": 0.14864864864864866, "time_total": 76.58413076400757} +{"step": 12, "skyrl.ai/grad_norm": 0.5797838691336037, "skyrl.ai/learning_rate": 8.378378378378379e-05, "num_sequences": 128, "num_tokens": 38843, "learning_rate": 8.378378378378379e-05, "train_mean_nll": 2.3350110054016113, "progress": 0.16216216216216217, "time_total": 77.50866913795471} +{"step": 13, "skyrl.ai/grad_norm": 0.5434433763130382, "skyrl.ai/learning_rate": 8.243243243243243e-05, "num_sequences": 128, "num_tokens": 35583, "learning_rate": 8.243243243243243e-05, "train_mean_nll": 2.3725218772888184, "progress": 0.17567567567567569, "time_total": 77.47994303703308} +{"step": 14, "skyrl.ai/grad_norm": 0.45187986236742045, "skyrl.ai/learning_rate": 8.108108108108108e-05, "num_sequences": 128, "num_tokens": 36717, "learning_rate": 8.108108108108108e-05, "train_mean_nll": 2.286111831665039, "progress": 0.1891891891891892, "time_total": 77.48939037322998} +{"step": 15, "skyrl.ai/grad_norm": 0.4723029116416624, "skyrl.ai/learning_rate": 7.972972972972974e-05, "num_sequences": 128, "num_tokens": 34002, "learning_rate": 7.972972972972974e-05, "train_mean_nll": 2.414177894592285, "progress": 0.20270270270270271, "time_total": 77.46714520454407} +{"step": 16, "skyrl.ai/grad_norm": 0.3466929473490256, "skyrl.ai/learning_rate": 7.837837837837838e-05, "num_sequences": 128, "num_tokens": 38620, "learning_rate": 7.837837837837838e-05, "train_mean_nll": 2.385986089706421, "progress": 0.21621621621621623, "time_total": 77.58641862869263} +{"step": 17, "skyrl.ai/grad_norm": 0.3302586078980289, "skyrl.ai/learning_rate": 7.702702702702703e-05, "num_sequences": 128, "num_tokens": 37639, "learning_rate": 7.702702702702703e-05, "train_mean_nll": 2.2570433616638184, "progress": 0.22972972972972974, "time_total": 75.48843789100647} +{"step": 18, "skyrl.ai/grad_norm": 0.3893822355807422, "skyrl.ai/learning_rate": 7.567567567567568e-05, "num_sequences": 128, "num_tokens": 34118, "learning_rate": 7.567567567567568e-05, "train_mean_nll": 2.4066498279571533, "progress": 0.24324324324324326, "time_total": 77.45973420143127} +{"step": 19, "skyrl.ai/grad_norm": 0.3670506729696611, "skyrl.ai/learning_rate": 7.432432432432433e-05, "num_sequences": 128, "num_tokens": 37178, "learning_rate": 7.432432432432433e-05, "train_mean_nll": 2.3912971019744873, "progress": 0.25675675675675674, "time_total": 76.48887205123901} +{"step": 20, "skyrl.ai/grad_norm": 0.3454687541752905, "skyrl.ai/learning_rate": 7.297297297297297e-05, "num_sequences": 128, "num_tokens": 35309, "learning_rate": 7.297297297297297e-05, "train_mean_nll": 2.355332136154175, "progress": 0.2702702702702703, "time_total": 118.81421756744385} +{"step": 21, "skyrl.ai/grad_norm": 0.3470970650120757, "skyrl.ai/learning_rate": 7.162162162162162e-05, "num_sequences": 128, "num_tokens": 37229, "learning_rate": 7.162162162162162e-05, "train_mean_nll": 2.3579814434051514, "progress": 0.28378378378378377, "time_total": 40.48443937301636} +{"step": 22, "skyrl.ai/grad_norm": 0.4361649818702482, "skyrl.ai/learning_rate": 7.027027027027026e-05, "num_sequences": 128, "num_tokens": 36087, "learning_rate": 7.027027027027026e-05, "train_mean_nll": 2.3515121936798096, "progress": 0.2972972972972973, "time_total": 76.4827127456665} +{"step": 23, "skyrl.ai/grad_norm": 0.6790872312870737, "skyrl.ai/learning_rate": 6.891891891891892e-05, "num_sequences": 128, "num_tokens": 33348, "learning_rate": 6.891891891891892e-05, "train_mean_nll": 2.315459728240967, "progress": 0.3108108108108108, "time_total": 75.44990110397339} +{"step": 24, "skyrl.ai/grad_norm": 0.3662012124359752, "skyrl.ai/learning_rate": 6.756756756756757e-05, "num_sequences": 128, "num_tokens": 35210, "learning_rate": 6.756756756756757e-05, "train_mean_nll": 2.3829991817474365, "progress": 0.32432432432432434, "time_total": 76.47622203826904} +{"step": 25, "skyrl.ai/grad_norm": 0.39012587129983894, "skyrl.ai/learning_rate": 6.621621621621621e-05, "num_sequences": 128, "num_tokens": 34833, "learning_rate": 6.621621621621621e-05, "train_mean_nll": 2.4460654258728027, "progress": 0.33783783783783783, "time_total": 76.47167229652405} +{"step": 26, "skyrl.ai/grad_norm": 0.3814466008615295, "skyrl.ai/learning_rate": 6.486486486486487e-05, "num_sequences": 128, "num_tokens": 37256, "learning_rate": 6.486486486486487e-05, "train_mean_nll": 2.4093592166900635, "progress": 0.35135135135135137, "time_total": 76.56024527549744} +{"step": 27, "skyrl.ai/grad_norm": 0.33032280695631916, "skyrl.ai/learning_rate": 6.351351351351352e-05, "num_sequences": 128, "num_tokens": 33947, "learning_rate": 6.351351351351352e-05, "train_mean_nll": 2.3500328063964844, "progress": 0.36486486486486486, "time_total": 76.468834400177} +{"step": 28, "skyrl.ai/grad_norm": 0.3327186649193775, "skyrl.ai/learning_rate": 6.216216216216216e-05, "num_sequences": 128, "num_tokens": 32694, "learning_rate": 6.216216216216216e-05, "train_mean_nll": 2.433610677719116, "progress": 0.3783783783783784, "time_total": 76.45450067520142} +{"step": 29, "skyrl.ai/grad_norm": 0.35863921265811594, "skyrl.ai/learning_rate": 6.0810810810810814e-05, "num_sequences": 128, "num_tokens": 37440, "learning_rate": 6.0810810810810814e-05, "train_mean_nll": 2.170107126235962, "progress": 0.3918918918918919, "time_total": 76.49256873130798} +{"step": 30, "skyrl.ai/grad_norm": 0.3261373695807218, "skyrl.ai/learning_rate": 5.945945945945945e-05, "num_sequences": 128, "num_tokens": 39453, "learning_rate": 5.945945945945945e-05, "train_mean_nll": 2.2829484939575195, "progress": 0.40540540540540543, "time_total": 76.50950980186462} +{"step": 31, "skyrl.ai/grad_norm": 0.3259241339666833, "skyrl.ai/learning_rate": 5.810810810810812e-05, "num_sequences": 128, "num_tokens": 36143, "learning_rate": 5.810810810810812e-05, "train_mean_nll": 2.2795701026916504, "progress": 0.4189189189189189, "time_total": 75.5440137386322} +{"step": 32, "skyrl.ai/grad_norm": 0.3864299967711568, "skyrl.ai/learning_rate": 5.6756756756756757e-05, "num_sequences": 128, "num_tokens": 34808, "learning_rate": 5.6756756756756757e-05, "train_mean_nll": 2.355539083480835, "progress": 0.43243243243243246, "time_total": 77.48012828826904} +{"step": 33, "skyrl.ai/grad_norm": 0.29336461343164494, "skyrl.ai/learning_rate": 5.540540540540541e-05, "num_sequences": 128, "num_tokens": 40403, "learning_rate": 5.540540540540541e-05, "train_mean_nll": 2.232614517211914, "progress": 0.44594594594594594, "time_total": 76.50881671905518} +{"step": 34, "skyrl.ai/grad_norm": 0.34436466791746617, "skyrl.ai/learning_rate": 5.405405405405406e-05, "num_sequences": 128, "num_tokens": 33581, "learning_rate": 5.405405405405406e-05, "train_mean_nll": 2.357985496520996, "progress": 0.4594594594594595, "time_total": 76.44539904594421} +{"step": 35, "skyrl.ai/grad_norm": 0.2714500680008173, "skyrl.ai/learning_rate": 5.27027027027027e-05, "num_sequences": 128, "num_tokens": 35478, "learning_rate": 5.27027027027027e-05, "train_mean_nll": 2.3428127765655518, "progress": 0.47297297297297297, "time_total": 75.62534189224243} +{"step": 36, "skyrl.ai/grad_norm": 0.43669954370291814, "skyrl.ai/learning_rate": 5.135135135135135e-05, "num_sequences": 128, "num_tokens": 37613, "learning_rate": 5.135135135135135e-05, "train_mean_nll": 2.2523462772369385, "progress": 0.4864864864864865, "time_total": 76.59294319152832} +{"step": 37, "skyrl.ai/grad_norm": 0.3239142930227886, "skyrl.ai/learning_rate": 5e-05, "num_sequences": 128, "num_tokens": 37243, "learning_rate": 5e-05, "train_mean_nll": 2.455328941345215, "progress": 0.5, "time_total": 75.48581290245056} +{"step": 38, "skyrl.ai/grad_norm": 0.35372994180702944, "skyrl.ai/learning_rate": 4.8648648648648654e-05, "num_sequences": 128, "num_tokens": 34551, "learning_rate": 4.8648648648648654e-05, "train_mean_nll": 2.281996726989746, "progress": 0.5135135135135135, "time_total": 76.46213150024414} +{"step": 39, "skyrl.ai/grad_norm": 0.32586765378478905, "skyrl.ai/learning_rate": 4.7297297297297306e-05, "num_sequences": 128, "num_tokens": 34720, "learning_rate": 4.7297297297297306e-05, "train_mean_nll": 2.2244105339050293, "progress": 0.527027027027027, "time_total": 76.47197270393372} +{"step": 40, "skyrl.ai/grad_norm": 0.29108088996807113, "skyrl.ai/learning_rate": 4.5945945945945944e-05, "num_sequences": 128, "num_tokens": 37150, "learning_rate": 4.5945945945945944e-05, "train_mean_nll": 2.3203389644622803, "progress": 0.5405405405405406, "time_total": 115.69880318641663} +{"step": 41, "skyrl.ai/grad_norm": 0.3302190919782092, "skyrl.ai/learning_rate": 4.4594594594594596e-05, "num_sequences": 128, "num_tokens": 39832, "learning_rate": 4.4594594594594596e-05, "train_mean_nll": 2.2518436908721924, "progress": 0.5540540540540541, "time_total": 40.64042091369629} +{"step": 42, "skyrl.ai/grad_norm": 0.34896948663718, "skyrl.ai/learning_rate": 4.324324324324325e-05, "num_sequences": 128, "num_tokens": 41346, "learning_rate": 4.324324324324325e-05, "train_mean_nll": 2.1863410472869873, "progress": 0.5675675675675675, "time_total": 76.51609539985657} +{"step": 43, "skyrl.ai/grad_norm": 0.3072908759780453, "skyrl.ai/learning_rate": 4.18918918918919e-05, "num_sequences": 128, "num_tokens": 35728, "learning_rate": 4.18918918918919e-05, "train_mean_nll": 2.388551950454712, "progress": 0.581081081081081, "time_total": 76.47896790504456} diff --git a/tests/tinker/smoke_logs/sl_loop_d.log/checkpoints.jsonl b/tests/tinker/smoke_logs/sl_loop_d.log/checkpoints.jsonl new file mode 100644 index 0000000000..cbdbaddb1f --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_d.log/checkpoints.jsonl @@ -0,0 +1,2 @@ +{"name": "000020", "batch": 20, "state_path": "tinker://model_a46bd29e/weights/000020"} +{"name": "000040", "batch": 40, "state_path": "tinker://model_a46bd29e/weights/000040"} diff --git a/tests/tinker/smoke_logs/sl_loop_d.log/code.diff b/tests/tinker/smoke_logs/sl_loop_d.log/code.diff new file mode 100644 index 0000000000..d8e559755f --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_d.log/code.diff @@ -0,0 +1,3 @@ +### repo: /home/etang/tinker-cookbook @ dacb835b31fb3f7a012851a1bc64c950d0495de3 +modules: tinker_cookbook +(no local changes) diff --git a/tests/tinker/smoke_logs/sl_loop_d.log/config.json b/tests/tinker/smoke_logs/sl_loop_d.log/config.json new file mode 100644 index 0000000000..6da5f67bb4 --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_d.log/config.json @@ -0,0 +1,12 @@ +{ + "base_url": "http://localhost:8000", + "log_path": "/tmp/sl_loop_d.log", + "model_name": "Qwen/Qwen3-0.6B", + "batch_size": 128, + "learning_rate": 0.0001, + "max_length": 32768, + "train_on_what": "last_assistant_message", + "lora_rank": 32, + "save_every": 20, + "ttl_seconds": 604800 +} \ No newline at end of file diff --git a/tests/tinker/smoke_logs/sl_loop_d.log/logs.log b/tests/tinker/smoke_logs/sl_loop_d.log/logs.log new file mode 100644 index 0000000000..1f11432f6a --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_d.log/logs.log @@ -0,0 +1,644 @@ +root:680 [INFO] Command line invocation: /home/etang/tinker-cookbook/tinker_cookbook/recipes/sl_loop.py base_url=http://localhost:8000 model_name=Qwen/Qwen3-0.6B train_on_what=LAST_ASSISTANT_MESSAGE lora_rank=32 log_path=/tmp/sl_loop_d.log +tinker_cookbook.utils.ml_log:618 [INFO] Logging to: /tmp/sl_loop_d.log +__main__:51 [INFO] Using renderer: qwen3 +__main__:54 [INFO] Loading dataset... +__main__:64 [INFO] Dropping last 28 examples to keep batch size uniform at 128 +__main__:67 [INFO] Train batches: 74 +tinker.lib.public_interfaces.service_client:75 [INFO] ServiceClient initialized for session session_ead9aee6 +tinker_cookbook.checkpoint_utils:395 [INFO] No checkpoints found at /tmp/sl_loop_d.log/checkpoints.jsonl +tinker_cookbook.checkpoint_utils:426 [INFO] No checkpoints found with key state_path in /tmp/sl_loop_d.log +tinker.lib.public_interfaces.service_client:159 [INFO] TrainingClient initialized for model model_a46bd29e +__main__:87 [INFO] Training for 74 steps +tinker_cookbook.supervised.common:188 [INFO] Weight reduction: 'mean' (token-mean loss) +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 0  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000100  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37834  │ +│ progress  │ 0.000000  │ +│ skyrl.ai/grad_norm  │ 1.419466  │ +│ skyrl.ai/learning_rate │ 0.000100  │ +│ time_total  │ 77.674056 │ +│ train_mean_nll  │ 2.937314  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 1  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000099  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35654  │ +│ progress  │ 0.013514  │ +│ skyrl.ai/grad_norm  │ 1.766389  │ +│ skyrl.ai/learning_rate │ 0.000099  │ +│ time_total  │ 77.489237 │ +│ train_mean_nll  │ 3.162877  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 2  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000097  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38474  │ +│ progress  │ 0.027027  │ +│ skyrl.ai/grad_norm  │ 1.380754  │ +│ skyrl.ai/learning_rate │ 0.000097  │ +│ time_total  │ 38.622305 │ +│ train_mean_nll  │ 2.814990  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 3  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000096  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39781  │ +│ progress  │ 0.040541  │ +│ skyrl.ai/grad_norm  │ 1.256220  │ +│ skyrl.ai/learning_rate │ 0.000096  │ +│ time_total  │ 76.494820 │ +│ train_mean_nll  │ 2.717247  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 4  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000095  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37346  │ +│ progress  │ 0.054054  │ +│ skyrl.ai/grad_norm  │ 1.049826  │ +│ skyrl.ai/learning_rate │ 0.000095  │ +│ time_total  │ 76.496100 │ +│ train_mean_nll  │ 2.699676  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 5  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000093  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38710  │ +│ progress  │ 0.067568  │ +│ skyrl.ai/grad_norm  │ 1.067086  │ +│ skyrl.ai/learning_rate │ 0.000093  │ +│ time_total  │ 76.584077 │ +│ train_mean_nll  │ 2.537158  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 6  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000092  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33356  │ +│ progress  │ 0.081081  │ +│ skyrl.ai/grad_norm  │ 0.736274  │ +│ skyrl.ai/learning_rate │ 0.000092  │ +│ time_total  │ 76.462485 │ +│ train_mean_nll  │ 2.691379  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 7  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000091  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39115  │ +│ progress  │ 0.094595  │ +│ skyrl.ai/grad_norm  │ 0.610632  │ +│ skyrl.ai/learning_rate │ 0.000091  │ +│ time_total  │ 76.602791 │ +│ train_mean_nll  │ 2.499062  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 8  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000089  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38640  │ +│ progress  │ 0.108108  │ +│ skyrl.ai/grad_norm  │ 0.598359  │ +│ skyrl.ai/learning_rate │ 0.000089  │ +│ time_total  │ 76.482659 │ +│ train_mean_nll  │ 2.461901  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 9  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000088  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38863  │ +│ progress  │ 0.121622  │ +│ skyrl.ai/grad_norm  │ 0.513877  │ +│ skyrl.ai/learning_rate │ 0.000088  │ +│ time_total  │ 76.514921 │ +│ train_mean_nll  │ 2.393402  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 10  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000086  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33155  │ +│ progress  │ 0.135135  │ +│ skyrl.ai/grad_norm  │ 0.675169  │ +│ skyrl.ai/learning_rate │ 0.000086  │ +│ time_total  │ 77.464031 │ +│ train_mean_nll  │ 2.537810  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 11  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000085  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38406  │ +│ progress  │ 0.148649  │ +│ skyrl.ai/grad_norm  │ 0.626751  │ +│ skyrl.ai/learning_rate │ 0.000085  │ +│ time_total  │ 77.508308 │ +│ train_mean_nll  │ 2.474285  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 12  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000084  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38843  │ +│ progress  │ 0.162162  │ +│ skyrl.ai/grad_norm  │ 0.587304  │ +│ skyrl.ai/learning_rate │ 0.000084  │ +│ time_total  │ 77.612279 │ +│ train_mean_nll  │ 2.344043  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 13  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000082  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35583  │ +│ progress  │ 0.175676  │ +│ skyrl.ai/grad_norm  │ 0.545789  │ +│ skyrl.ai/learning_rate │ 0.000082  │ +│ time_total  │ 77.486650 │ +│ train_mean_nll  │ 2.380935  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 14  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000081  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 36717  │ +│ progress  │ 0.189189  │ +│ skyrl.ai/grad_norm  │ 0.474083  │ +│ skyrl.ai/learning_rate │ 0.000081  │ +│ time_total  │ 77.482110 │ +│ train_mean_nll  │ 2.289858  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 15  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000080  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34002  │ +│ progress  │ 0.202703  │ +│ skyrl.ai/grad_norm  │ 0.511856  │ +│ skyrl.ai/learning_rate │ 0.000080  │ +│ time_total  │ 76.445762 │ +│ train_mean_nll  │ 2.420629  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 16  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000078  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 38620  │ +│ progress  │ 0.216216  │ +│ skyrl.ai/grad_norm  │ 0.358070  │ +│ skyrl.ai/learning_rate │ 0.000078  │ +│ time_total  │ 76.652211 │ +│ train_mean_nll  │ 2.389158  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 17  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000077  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37639  │ +│ progress  │ 0.229730  │ +│ skyrl.ai/grad_norm  │ 0.336696  │ +│ skyrl.ai/learning_rate │ 0.000077  │ +│ time_total  │ 76.554973 │ +│ train_mean_nll  │ 2.258745  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 18  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000076  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34118  │ +│ progress  │ 0.243243  │ +│ skyrl.ai/grad_norm  │ 0.420464  │ +│ skyrl.ai/learning_rate │ 0.000076  │ +│ time_total  │ 76.455621 │ +│ train_mean_nll  │ 2.409728  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 19  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000074  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37178  │ +│ progress  │ 0.256757  │ +│ skyrl.ai/grad_norm  │ 0.369806  │ +│ skyrl.ai/learning_rate │ 0.000074  │ +│ time_total  │ 42.479602 │ +│ train_mean_nll  │ 2.392418  │ +└────────────────────────┴───────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_a46bd29e/weights/000020'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 20  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩ +│ learning_rate  │ 0.000073  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35309  │ +│ progress  │ 0.270270  │ +│ skyrl.ai/grad_norm  │ 0.347546  │ +│ skyrl.ai/learning_rate │ 0.000073  │ +│ time_total  │ 116.676181 │ +│ train_mean_nll  │ 2.357243  │ +└────────────────────────┴────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 21  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000072  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37229  │ +│ progress  │ 0.283784  │ +│ skyrl.ai/grad_norm  │ 0.352529  │ +│ skyrl.ai/learning_rate │ 0.000072  │ +│ time_total  │ 76.480985 │ +│ train_mean_nll  │ 2.359369  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 22  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000070  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 36087  │ +│ progress  │ 0.297297  │ +│ skyrl.ai/grad_norm  │ 0.439494  │ +│ skyrl.ai/learning_rate │ 0.000070  │ +│ time_total  │ 75.470494 │ +│ train_mean_nll  │ 2.353091  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 23  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000069  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33348  │ +│ progress  │ 0.310811  │ +│ skyrl.ai/grad_norm  │ 0.682835  │ +│ skyrl.ai/learning_rate │ 0.000069  │ +│ time_total  │ 76.461006 │ +│ train_mean_nll  │ 2.316307  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 24  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000068  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35210  │ +│ progress  │ 0.324324  │ +│ skyrl.ai/grad_norm  │ 0.366110  │ +│ skyrl.ai/learning_rate │ 0.000068  │ +│ time_total  │ 76.467583 │ +│ train_mean_nll  │ 2.383547  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 25  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000066  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34833  │ +│ progress  │ 0.337838  │ +│ skyrl.ai/grad_norm  │ 0.395728  │ +│ skyrl.ai/learning_rate │ 0.000066  │ +│ time_total  │ 77.473791 │ +│ train_mean_nll  │ 2.448057  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 26  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000065  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37256  │ +│ progress  │ 0.351351  │ +│ skyrl.ai/grad_norm  │ 0.383532  │ +│ skyrl.ai/learning_rate │ 0.000065  │ +│ time_total  │ 76.483187 │ +│ train_mean_nll  │ 2.410551  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 27  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000064  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33947  │ +│ progress  │ 0.364865  │ +│ skyrl.ai/grad_norm  │ 0.334303  │ +│ skyrl.ai/learning_rate │ 0.000064  │ +│ time_total  │ 76.542615 │ +│ train_mean_nll  │ 2.351529  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 28  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000062  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 32694  │ +│ progress  │ 0.378378  │ +│ skyrl.ai/grad_norm  │ 0.336119  │ +│ skyrl.ai/learning_rate │ 0.000062  │ +│ time_total  │ 75.442138 │ +│ train_mean_nll  │ 2.435189  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 29  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000061  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37440  │ +│ progress  │ 0.391892  │ +│ skyrl.ai/grad_norm  │ 0.363025  │ +│ skyrl.ai/learning_rate │ 0.000061  │ +│ time_total  │ 76.492469 │ +│ train_mean_nll  │ 2.170725  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 30  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000059  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39453  │ +│ progress  │ 0.405405  │ +│ skyrl.ai/grad_norm  │ 0.329587  │ +│ skyrl.ai/learning_rate │ 0.000059  │ +│ time_total  │ 76.527954 │ +│ train_mean_nll  │ 2.284290  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 31  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000058  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 36143  │ +│ progress  │ 0.418919  │ +│ skyrl.ai/grad_norm  │ 0.324875  │ +│ skyrl.ai/learning_rate │ 0.000058  │ +│ time_total  │ 76.648657 │ +│ train_mean_nll  │ 2.280195  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 32  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000057  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34808  │ +│ progress  │ 0.432432  │ +│ skyrl.ai/grad_norm  │ 0.386744  │ +│ skyrl.ai/learning_rate │ 0.000057  │ +│ time_total  │ 76.552472 │ +│ train_mean_nll  │ 2.355594  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 33  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000055  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 40403  │ +│ progress  │ 0.445946  │ +│ skyrl.ai/grad_norm  │ 0.291313  │ +│ skyrl.ai/learning_rate │ 0.000055  │ +│ time_total  │ 76.512269 │ +│ train_mean_nll  │ 2.234097  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 34  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000054  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 33581  │ +│ progress  │ 0.459459  │ +│ skyrl.ai/grad_norm  │ 0.323469  │ +│ skyrl.ai/learning_rate │ 0.000054  │ +│ time_total  │ 75.450547 │ +│ train_mean_nll  │ 2.358581  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 35  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000053  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 35478  │ +│ progress  │ 0.472973  │ +│ skyrl.ai/grad_norm  │ 0.270337  │ +│ skyrl.ai/learning_rate │ 0.000053  │ +│ time_total  │ 76.495394 │ +│ train_mean_nll  │ 2.343688  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 36  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000051  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37613  │ +│ progress  │ 0.486486  │ +│ skyrl.ai/grad_norm  │ 0.438996  │ +│ skyrl.ai/learning_rate │ 0.000051  │ +│ time_total  │ 76.483823 │ +│ train_mean_nll  │ 2.253183  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 37  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000050  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37243  │ +│ progress  │ 0.500000  │ +│ skyrl.ai/grad_norm  │ 0.328100  │ +│ skyrl.ai/learning_rate │ 0.000050  │ +│ time_total  │ 76.559658 │ +│ train_mean_nll  │ 2.456638  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 38  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000049  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34551  │ +│ progress  │ 0.513514  │ +│ skyrl.ai/grad_norm  │ 0.351431  │ +│ skyrl.ai/learning_rate │ 0.000049  │ +│ time_total  │ 75.463238 │ +│ train_mean_nll  │ 2.281233  │ +└────────────────────────┴───────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 39  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000047  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 34720  │ +│ progress  │ 0.527027  │ +│ skyrl.ai/grad_norm  │ 0.326205  │ +│ skyrl.ai/learning_rate │ 0.000047  │ +│ time_total  │ 40.466141 │ +│ train_mean_nll  │ 2.225522  │ +└────────────────────────┴───────────┘ +tinker_cookbook.checkpoint_utils:466 [INFO] Saved checkpoints: {'state_path': 'tinker://model_a46bd29e/weights/000040'} +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 40  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩ +│ learning_rate  │ 0.000046  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 37150  │ +│ progress  │ 0.540541  │ +│ skyrl.ai/grad_norm  │ 0.291013  │ +│ skyrl.ai/learning_rate │ 0.000046  │ +│ time_total  │ 115.610721 │ +│ train_mean_nll  │ 2.320696  │ +└────────────────────────┴────────────┘ +tinker_cookbook.utils.ml_log:206 [INFO] Wrote metrics to /tmp/sl_loop_d.log/metrics.jsonl +tinker_cookbook.utils.ml_log:279 [INFO] + Step 41  +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ Metric  ┃ Value  ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ +│ learning_rate  │ 0.000045  │ +│ num_sequences  │ 128  │ +│ num_tokens  │ 39832  │ +│ progress  │ 0.554054  │ +│ skyrl.ai/grad_norm  │ 0.331071  │ +│ skyrl.ai/learning_rate │ 0.000045  │ +│ time_total  │ 77.513973 │ +│ train_mean_nll  │ 2.251503  │ +└────────────────────────┴───────────┘ diff --git a/tests/tinker/smoke_logs/sl_loop_d.log/metrics.jsonl b/tests/tinker/smoke_logs/sl_loop_d.log/metrics.jsonl new file mode 100644 index 0000000000..13e7652554 --- /dev/null +++ b/tests/tinker/smoke_logs/sl_loop_d.log/metrics.jsonl @@ -0,0 +1,42 @@ +{"step": 0, "skyrl.ai/grad_norm": 1.4194660110892312, "skyrl.ai/learning_rate": 0.0001, "num_sequences": 128, "num_tokens": 37834, "learning_rate": 0.0001, "train_mean_nll": 2.9373137950897217, "progress": 0.0, "time_total": 77.67405557632446} +{"step": 1, "skyrl.ai/grad_norm": 1.7663889894650249, "skyrl.ai/learning_rate": 9.864864864864865e-05, "num_sequences": 128, "num_tokens": 35654, "learning_rate": 9.864864864864865e-05, "train_mean_nll": 3.162876844406128, "progress": 0.013513513513513514, "time_total": 77.48923659324646} +{"step": 2, "skyrl.ai/grad_norm": 1.3807542213211028, "skyrl.ai/learning_rate": 9.729729729729731e-05, "num_sequences": 128, "num_tokens": 38474, "learning_rate": 9.729729729729731e-05, "train_mean_nll": 2.8149900436401367, "progress": 0.02702702702702703, "time_total": 38.622305393218994} +{"step": 3, "skyrl.ai/grad_norm": 1.2562199812002224, "skyrl.ai/learning_rate": 9.594594594594595e-05, "num_sequences": 128, "num_tokens": 39781, "learning_rate": 9.594594594594595e-05, "train_mean_nll": 2.717247486114502, "progress": 0.04054054054054054, "time_total": 76.49482035636902} +{"step": 4, "skyrl.ai/grad_norm": 1.049826376047764, "skyrl.ai/learning_rate": 9.45945945945946e-05, "num_sequences": 128, "num_tokens": 37346, "learning_rate": 9.45945945945946e-05, "train_mean_nll": 2.699676275253296, "progress": 0.05405405405405406, "time_total": 76.49610042572021} +{"step": 5, "skyrl.ai/grad_norm": 1.067086083437052, "skyrl.ai/learning_rate": 9.324324324324324e-05, "num_sequences": 128, "num_tokens": 38710, "learning_rate": 9.324324324324324e-05, "train_mean_nll": 2.537158250808716, "progress": 0.06756756756756757, "time_total": 76.58407664299011} +{"step": 6, "skyrl.ai/grad_norm": 0.7362744137224916, "skyrl.ai/learning_rate": 9.189189189189189e-05, "num_sequences": 128, "num_tokens": 33356, "learning_rate": 9.189189189189189e-05, "train_mean_nll": 2.6913793087005615, "progress": 0.08108108108108109, "time_total": 76.46248507499695} +{"step": 7, "skyrl.ai/grad_norm": 0.610632113256186, "skyrl.ai/learning_rate": 9.054054054054055e-05, "num_sequences": 128, "num_tokens": 39115, "learning_rate": 9.054054054054055e-05, "train_mean_nll": 2.4990618228912354, "progress": 0.0945945945945946, "time_total": 76.60279083251953} +{"step": 8, "skyrl.ai/grad_norm": 0.5983594111397604, "skyrl.ai/learning_rate": 8.918918918918919e-05, "num_sequences": 128, "num_tokens": 38640, "learning_rate": 8.918918918918919e-05, "train_mean_nll": 2.4619009494781494, "progress": 0.10810810810810811, "time_total": 76.48265886306763} +{"step": 9, "skyrl.ai/grad_norm": 0.513877102427449, "skyrl.ai/learning_rate": 8.783783783783784e-05, "num_sequences": 128, "num_tokens": 38863, "learning_rate": 8.783783783783784e-05, "train_mean_nll": 2.393401622772217, "progress": 0.12162162162162163, "time_total": 76.51492071151733} +{"step": 10, "skyrl.ai/grad_norm": 0.6751687024483758, "skyrl.ai/learning_rate": 8.64864864864865e-05, "num_sequences": 128, "num_tokens": 33155, "learning_rate": 8.64864864864865e-05, "train_mean_nll": 2.5378100872039795, "progress": 0.13513513513513514, "time_total": 77.46403121948242} +{"step": 11, "skyrl.ai/grad_norm": 0.6267508302218245, "skyrl.ai/learning_rate": 8.513513513513514e-05, "num_sequences": 128, "num_tokens": 38406, "learning_rate": 8.513513513513514e-05, "train_mean_nll": 2.4742848873138428, "progress": 0.14864864864864866, "time_total": 77.50830841064453} +{"step": 12, "skyrl.ai/grad_norm": 0.5873035274858249, "skyrl.ai/learning_rate": 8.378378378378379e-05, "num_sequences": 128, "num_tokens": 38843, "learning_rate": 8.378378378378379e-05, "train_mean_nll": 2.344043254852295, "progress": 0.16216216216216217, "time_total": 77.61227917671204} +{"step": 13, "skyrl.ai/grad_norm": 0.5457892948565732, "skyrl.ai/learning_rate": 8.243243243243243e-05, "num_sequences": 128, "num_tokens": 35583, "learning_rate": 8.243243243243243e-05, "train_mean_nll": 2.380934953689575, "progress": 0.17567567567567569, "time_total": 77.48664999008179} +{"step": 14, "skyrl.ai/grad_norm": 0.4740833931419579, "skyrl.ai/learning_rate": 8.108108108108108e-05, "num_sequences": 128, "num_tokens": 36717, "learning_rate": 8.108108108108108e-05, "train_mean_nll": 2.289858102798462, "progress": 0.1891891891891892, "time_total": 77.48211026191711} +{"step": 15, "skyrl.ai/grad_norm": 0.5118555948866679, "skyrl.ai/learning_rate": 7.972972972972974e-05, "num_sequences": 128, "num_tokens": 34002, "learning_rate": 7.972972972972974e-05, "train_mean_nll": 2.4206290245056152, "progress": 0.20270270270270271, "time_total": 76.44576215744019} +{"step": 16, "skyrl.ai/grad_norm": 0.358069682251165, "skyrl.ai/learning_rate": 7.837837837837838e-05, "num_sequences": 128, "num_tokens": 38620, "learning_rate": 7.837837837837838e-05, "train_mean_nll": 2.38915753364563, "progress": 0.21621621621621623, "time_total": 76.6522114276886} +{"step": 17, "skyrl.ai/grad_norm": 0.33669552556994264, "skyrl.ai/learning_rate": 7.702702702702703e-05, "num_sequences": 128, "num_tokens": 37639, "learning_rate": 7.702702702702703e-05, "train_mean_nll": 2.258744955062866, "progress": 0.22972972972972974, "time_total": 76.55497312545776} +{"step": 18, "skyrl.ai/grad_norm": 0.4204640990772262, "skyrl.ai/learning_rate": 7.567567567567568e-05, "num_sequences": 128, "num_tokens": 34118, "learning_rate": 7.567567567567568e-05, "train_mean_nll": 2.4097280502319336, "progress": 0.24324324324324326, "time_total": 76.45562148094177} +{"step": 19, "skyrl.ai/grad_norm": 0.3698055193101831, "skyrl.ai/learning_rate": 7.432432432432433e-05, "num_sequences": 128, "num_tokens": 37178, "learning_rate": 7.432432432432433e-05, "train_mean_nll": 2.392418146133423, "progress": 0.25675675675675674, "time_total": 42.479602098464966} +{"step": 20, "skyrl.ai/grad_norm": 0.34754613139614876, "skyrl.ai/learning_rate": 7.297297297297297e-05, "num_sequences": 128, "num_tokens": 35309, "learning_rate": 7.297297297297297e-05, "train_mean_nll": 2.3572428226470947, "progress": 0.2702702702702703, "time_total": 116.67618131637573} +{"step": 21, "skyrl.ai/grad_norm": 0.35252946750760694, "skyrl.ai/learning_rate": 7.162162162162162e-05, "num_sequences": 128, "num_tokens": 37229, "learning_rate": 7.162162162162162e-05, "train_mean_nll": 2.3593690395355225, "progress": 0.28378378378378377, "time_total": 76.48098468780518} +{"step": 22, "skyrl.ai/grad_norm": 0.4394941353205905, "skyrl.ai/learning_rate": 7.027027027027026e-05, "num_sequences": 128, "num_tokens": 36087, "learning_rate": 7.027027027027026e-05, "train_mean_nll": 2.353090763092041, "progress": 0.2972972972972973, "time_total": 75.4704942703247} +{"step": 23, "skyrl.ai/grad_norm": 0.6828352725772945, "skyrl.ai/learning_rate": 6.891891891891892e-05, "num_sequences": 128, "num_tokens": 33348, "learning_rate": 6.891891891891892e-05, "train_mean_nll": 2.3163068294525146, "progress": 0.3108108108108108, "time_total": 76.46100568771362} +{"step": 24, "skyrl.ai/grad_norm": 0.36611048019937215, "skyrl.ai/learning_rate": 6.756756756756757e-05, "num_sequences": 128, "num_tokens": 35210, "learning_rate": 6.756756756756757e-05, "train_mean_nll": 2.383546829223633, "progress": 0.32432432432432434, "time_total": 76.46758341789246} +{"step": 25, "skyrl.ai/grad_norm": 0.39572772281935936, "skyrl.ai/learning_rate": 6.621621621621621e-05, "num_sequences": 128, "num_tokens": 34833, "learning_rate": 6.621621621621621e-05, "train_mean_nll": 2.4480574131011963, "progress": 0.33783783783783783, "time_total": 77.47379088401794} +{"step": 26, "skyrl.ai/grad_norm": 0.3835315956991299, "skyrl.ai/learning_rate": 6.486486486486487e-05, "num_sequences": 128, "num_tokens": 37256, "learning_rate": 6.486486486486487e-05, "train_mean_nll": 2.410550832748413, "progress": 0.35135135135135137, "time_total": 76.4831874370575} +{"step": 27, "skyrl.ai/grad_norm": 0.33430281733441997, "skyrl.ai/learning_rate": 6.351351351351352e-05, "num_sequences": 128, "num_tokens": 33947, "learning_rate": 6.351351351351352e-05, "train_mean_nll": 2.351529359817505, "progress": 0.36486486486486486, "time_total": 76.54261541366577} +{"step": 28, "skyrl.ai/grad_norm": 0.3361192034416266, "skyrl.ai/learning_rate": 6.216216216216216e-05, "num_sequences": 128, "num_tokens": 32694, "learning_rate": 6.216216216216216e-05, "train_mean_nll": 2.4351885318756104, "progress": 0.3783783783783784, "time_total": 75.44213843345642} +{"step": 29, "skyrl.ai/grad_norm": 0.36302524693495586, "skyrl.ai/learning_rate": 6.0810810810810814e-05, "num_sequences": 128, "num_tokens": 37440, "learning_rate": 6.0810810810810814e-05, "train_mean_nll": 2.170724630355835, "progress": 0.3918918918918919, "time_total": 76.49246883392334} +{"step": 30, "skyrl.ai/grad_norm": 0.32958736842603464, "skyrl.ai/learning_rate": 5.945945945945945e-05, "num_sequences": 128, "num_tokens": 39453, "learning_rate": 5.945945945945945e-05, "train_mean_nll": 2.284289598464966, "progress": 0.40540540540540543, "time_total": 76.5279541015625} +{"step": 31, "skyrl.ai/grad_norm": 0.3248749033717518, "skyrl.ai/learning_rate": 5.810810810810812e-05, "num_sequences": 128, "num_tokens": 36143, "learning_rate": 5.810810810810812e-05, "train_mean_nll": 2.2801947593688965, "progress": 0.4189189189189189, "time_total": 76.64865732192993} +{"step": 32, "skyrl.ai/grad_norm": 0.386744315333098, "skyrl.ai/learning_rate": 5.6756756756756757e-05, "num_sequences": 128, "num_tokens": 34808, "learning_rate": 5.6756756756756757e-05, "train_mean_nll": 2.3555939197540283, "progress": 0.43243243243243246, "time_total": 76.55247235298157} +{"step": 33, "skyrl.ai/grad_norm": 0.29131346700737404, "skyrl.ai/learning_rate": 5.540540540540541e-05, "num_sequences": 128, "num_tokens": 40403, "learning_rate": 5.540540540540541e-05, "train_mean_nll": 2.2340970039367676, "progress": 0.44594594594594594, "time_total": 76.51226878166199} +{"step": 34, "skyrl.ai/grad_norm": 0.32346850076171424, "skyrl.ai/learning_rate": 5.405405405405406e-05, "num_sequences": 128, "num_tokens": 33581, "learning_rate": 5.405405405405406e-05, "train_mean_nll": 2.3585805892944336, "progress": 0.4594594594594595, "time_total": 75.45054650306702} +{"step": 35, "skyrl.ai/grad_norm": 0.27033689587266685, "skyrl.ai/learning_rate": 5.27027027027027e-05, "num_sequences": 128, "num_tokens": 35478, "learning_rate": 5.27027027027027e-05, "train_mean_nll": 2.3436880111694336, "progress": 0.47297297297297297, "time_total": 76.4953944683075} +{"step": 36, "skyrl.ai/grad_norm": 0.4389957845644059, "skyrl.ai/learning_rate": 5.135135135135135e-05, "num_sequences": 128, "num_tokens": 37613, "learning_rate": 5.135135135135135e-05, "train_mean_nll": 2.253182888031006, "progress": 0.4864864864864865, "time_total": 76.48382329940796} +{"step": 37, "skyrl.ai/grad_norm": 0.32809987426108383, "skyrl.ai/learning_rate": 5e-05, "num_sequences": 128, "num_tokens": 37243, "learning_rate": 5e-05, "train_mean_nll": 2.4566383361816406, "progress": 0.5, "time_total": 76.55965781211853} +{"step": 38, "skyrl.ai/grad_norm": 0.3514309849054616, "skyrl.ai/learning_rate": 4.8648648648648654e-05, "num_sequences": 128, "num_tokens": 34551, "learning_rate": 4.8648648648648654e-05, "train_mean_nll": 2.2812325954437256, "progress": 0.5135135135135135, "time_total": 75.46323847770691} +{"step": 39, "skyrl.ai/grad_norm": 0.32620477795966657, "skyrl.ai/learning_rate": 4.7297297297297306e-05, "num_sequences": 128, "num_tokens": 34720, "learning_rate": 4.7297297297297306e-05, "train_mean_nll": 2.2255215644836426, "progress": 0.527027027027027, "time_total": 40.46614122390747} +{"step": 40, "skyrl.ai/grad_norm": 0.29101311599366725, "skyrl.ai/learning_rate": 4.5945945945945944e-05, "num_sequences": 128, "num_tokens": 37150, "learning_rate": 4.5945945945945944e-05, "train_mean_nll": 2.3206958770751953, "progress": 0.5405405405405406, "time_total": 115.61072111129761} +{"step": 41, "skyrl.ai/grad_norm": 0.3310711724332053, "skyrl.ai/learning_rate": 4.4594594594594596e-05, "num_sequences": 128, "num_tokens": 39832, "learning_rate": 4.4594594594594596e-05, "train_mean_nll": 2.2515032291412354, "progress": 0.5540540540540541, "time_total": 77.5139729976654} From d178ca0fe26d2a611625e4eacbbd6464b3739d50 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Thu, 7 May 2026 01:52:20 +0000 Subject: [PATCH 21/21] [multi-lora-rl] Reset PR #1579 test files to upstream main version PR #1579's test files evolved between when our branch cherry-picked them and when the PR was merged to main (model field is now optional via _resolve_model). Reset them to main's version to keep the multi_lora_rl diff strictly to RL functionality. --- skyrl/benchmarks/load_test_concurrency.py | 2 +- .../test_new_inference_generation.py | 4 +--- .../test_remote_inference_client_chat_template.py | 2 +- .../test_vlm_inference_generation.py | 3 +-- .../gpu/gpu_ci/megatron/test_router_replay.py | 1 - .../gpu/gpu_ci/test_engine_generation.py | 8 ++++---- .../gpu_ci/test_pause_and_continue_generation.py | 2 +- .../gpu/gpu_ci/test_skyrl_gym_generator.py | 1 - .../gpu/gpu_ci/test_skyrl_vlm_gym_generator.py | 1 - tests/train/generators/test_skyrl_gym_generator.py | 14 -------------- .../test_skyrl_gym_generator_chat_templating.py | 1 - tests/train/generators/test_skyrl_vlm_generator.py | 1 - 12 files changed, 9 insertions(+), 31 deletions(-) diff --git a/skyrl/benchmarks/load_test_concurrency.py b/skyrl/benchmarks/load_test_concurrency.py index 88b36fbb8f..036011f89d 100644 --- a/skyrl/benchmarks/load_test_concurrency.py +++ b/skyrl/benchmarks/load_test_concurrency.py @@ -186,7 +186,7 @@ async def fire_client_generate( async def _call(idx: int): try: - return await client.generate(input_batch, model=client.model_name) + return await client.generate(input_batch) except Exception as e: raise RuntimeError(f"request {idx}: {type(e).__name__}: {e}") from e diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_new_inference_generation.py b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_new_inference_generation.py index 95338f7c0f..e622f132fc 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_new_inference_generation.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_new_inference_generation.py @@ -563,7 +563,7 @@ async def test_client_generate(vllm_server: InferenceEngineState): sampling_params=sampling_params, ) - output = await client.generate(engine_input, model=client.model_name) + output = await client.generate(engine_input) assert len(output["responses"]) == 1 assert len(output["response_ids"]) == 1 @@ -595,7 +595,6 @@ async def test_client_tokenize_detokenize_roundtrip(vllm_server: InferenceEngine def _build_sample_payload( token_ids: List[int], - model: str = MODEL_QWEN2_5, num_samples: int = 1, sampling_params: Dict[str, Any] | None = None, session_id: str | None = None, @@ -604,7 +603,6 @@ def _build_sample_payload( ) -> Dict[str, Any]: """Build a Tinker-format sample request payload.""" body: Dict[str, Any] = { - "model": model, "prompt": {"chunks": [{"tokens": token_ids}]}, "num_samples": num_samples, "sampling_params": sampling_params or {"temperature": 0.7, "max_tokens": 64}, diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py index 1ef6e10bc9..300fac372a 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py @@ -90,7 +90,7 @@ async def test_custom_chat_template(ray_init_fixture, use_custom_template: bool) prompt_token_ids=[prompt_token_ids], sampling_params={"max_tokens": 10}, ) - output = await client.generate(engine_input, model=client.model_name) + output = await client.generate(engine_input) assert len(output["responses"]) == 1 assert isinstance(output["responses"][0], str) diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_vlm_inference_generation.py b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_vlm_inference_generation.py index 04f2448c24..c432fa6dcd 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_vlm_inference_generation.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_vlm_inference_generation.py @@ -203,7 +203,6 @@ async def test_sample_with_multimodal_image(module_scoped_ray_init_fixture): request_payload = { "json": { - "model": MODEL_QWEN3_VL, "prompt": prompt, "num_samples": 1, "sampling_params": {"temperature": 0.0, "max_tokens": 20}, @@ -410,7 +409,7 @@ async def test_generate_with_multimodal_features_red_square(module_scoped_ray_in "sampling_params": {"max_tokens": 64, "temperature": 0.0}, "mm_features": [features], } - gen_result = await client.generate(input_batch, model=client.model_name) + gen_result = await client.generate(input_batch) # Structural assertions assert len(gen_result["responses"]) == 1 diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/megatron/test_router_replay.py b/tests/backends/skyrl_train/gpu/gpu_ci/megatron/test_router_replay.py index f543ebbb55..70be711746 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/megatron/test_router_replay.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/megatron/test_router_replay.py @@ -150,7 +150,6 @@ async def test_logprobs(ray_init_fixture, tp, pp, cp, ep, etp, extra_tf_kwargs): skyrl_gym_cfg=cfg.environment.skyrl_gym, inference_engine_client=client, tokenizer=tokenizer, - policy_model_name=client.model_name, ) input_batch: GeneratorInput = get_test_generator_input( diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_engine_generation.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_engine_generation.py index fba1dd6dad..82a65a198c 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_engine_generation.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_engine_generation.py @@ -41,7 +41,7 @@ def get_test_actor_config(model: str = MODEL) -> SkyRLTrainConfig: async def run_batch_generation(client, prompts, sampling_params): engine_input = InferenceEngineInput(prompts=prompts, sampling_params=sampling_params) - engine_output = await client.generate(engine_input, model=client.model_name) + engine_output = await client.generate(engine_input) return engine_output["responses"], engine_output["stop_reasons"] @@ -49,7 +49,7 @@ async def run_single_generation(client, prompts, sampling_params): tasks = [] for prompt in prompts: engine_input = InferenceEngineInput(prompts=[prompt], sampling_params=sampling_params) - task = client.generate(engine_input, model=client.model_name) + task = client.generate(engine_input) tasks.append(task) results = await asyncio.gather(*tasks) @@ -65,7 +65,7 @@ async def run_single_generation(client, prompts, sampling_params): async def run_batch_generation_with_tokens(client, prompt_token_ids, sampling_params): engine_input = InferenceEngineInput(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params) - engine_output = await client.generate(engine_input, model=client.model_name) + engine_output = await client.generate(engine_input) return engine_output["responses"], engine_output["stop_reasons"] @@ -73,7 +73,7 @@ async def run_single_generation_with_tokens(client, prompt_token_ids, sampling_p tasks = [] for tokens in prompt_token_ids: engine_input = InferenceEngineInput(prompt_token_ids=[tokens], sampling_params=sampling_params) - task = client.generate(engine_input, model=client.model_name) + task = client.generate(engine_input) tasks.append(task) results = await asyncio.gather(*tasks) diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_pause_and_continue_generation.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_pause_and_continue_generation.py index 55ec3f77d7..d3084482ba 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_pause_and_continue_generation.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_pause_and_continue_generation.py @@ -234,7 +234,7 @@ async def one_req(i: int): "sampling_params": dict(sampling_params), "session_ids": [i], } - return await client.generate(engine_input, model=client.model_name) + return await client.generate(engine_input) tasks = [asyncio.create_task(one_req(i)) for i in range(num_requests)] # Let requests start and enqueue; with max_num_seqs=2, 2 run and 1 wait per engine diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_gym_generator.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_gym_generator.py index c66461b1b8..8a3a258a5e 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_gym_generator.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_gym_generator.py @@ -171,7 +171,6 @@ async def run_generator_end_to_end( skyrl_gym_cfg=env_cfg, inference_engine_client=inference_engine_client, tokenizer=tokenizer, - policy_model_name=inference_engine_client.model_name, ) input_batch: GeneratorInput = get_test_generator_input( diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_vlm_gym_generator.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_vlm_gym_generator.py index 03bfe9a3fe..a671e8d0f8 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_vlm_gym_generator.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_skyrl_vlm_gym_generator.py @@ -191,7 +191,6 @@ async def test_vlm_generator_color_classification(ray_init_fixture): skyrl_gym_cfg=env_cfg, inference_engine_client=inference_client, tokenizer=tokenizer, - policy_model_name=inference_client.model_name, ) num_prompts = 2 diff --git a/tests/train/generators/test_skyrl_gym_generator.py b/tests/train/generators/test_skyrl_gym_generator.py index 21da71dbbd..d8bee68a1a 100644 --- a/tests/train/generators/test_skyrl_gym_generator.py +++ b/tests/train/generators/test_skyrl_gym_generator.py @@ -283,7 +283,6 @@ def mock_generate(_, model=None): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -335,7 +334,6 @@ async def test_generate_batched(mock_make, mock_tokenizer, mock_llm, mock_env, g skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -378,7 +376,6 @@ async def test_generate_interface_compliance( skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -501,7 +498,6 @@ def mock_generate(input_batch, model=None): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -591,7 +587,6 @@ def mock_encode(text, **kwargs): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -673,7 +668,6 @@ def mock_encode(text, **kwargs): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -737,7 +731,6 @@ def mock_apply_chat_template(messages, **kwargs): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -859,7 +852,6 @@ def mock_encode_or_tokenize(text): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] # to make sure observation_ids are encoded correctly @@ -959,7 +951,6 @@ def step(self, action): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) # Run agent loop @@ -1050,7 +1041,6 @@ def step(self, action): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) # Ensure base_conversation_token_ids doesn't shift observation slicing in test generator.base_conversation_token_ids = [] @@ -1141,7 +1131,6 @@ def step(self, action): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) # Force retokenize path regardless of model resolution logic if needed generator.custom_chat_template = "" @@ -1230,7 +1219,6 @@ def mock_make_func(*args, **kwargs): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) prompt = [{"role": "user", "content": "Q?"}] @@ -1322,7 +1310,6 @@ def mock_make_func(*args, **kwargs): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] @@ -1440,7 +1427,6 @@ def step(self, action): skyrl_gym_cfg=mock_env_cfg, inference_engine_client=mock_llm, tokenizer=mock_tokenizer, - policy_model_name="mock-model", ) generator.base_conversation_token_ids = [] diff --git a/tests/train/generators/test_skyrl_gym_generator_chat_templating.py b/tests/train/generators/test_skyrl_gym_generator_chat_templating.py index 5e7f48a1d7..2d06bb3983 100644 --- a/tests/train/generators/test_skyrl_gym_generator_chat_templating.py +++ b/tests/train/generators/test_skyrl_gym_generator_chat_templating.py @@ -101,7 +101,6 @@ def _build_generator(tokenizer, model_name: str, chat_template_config, extra_ove skyrl_gym_cfg=env_cfg, inference_engine_client=None, # to be replaced per-test tokenizer=tokenizer, - policy_model_name="mock-model", ) diff --git a/tests/train/generators/test_skyrl_vlm_generator.py b/tests/train/generators/test_skyrl_vlm_generator.py index 4b505f8595..675c212a99 100644 --- a/tests/train/generators/test_skyrl_vlm_generator.py +++ b/tests/train/generators/test_skyrl_vlm_generator.py @@ -90,7 +90,6 @@ def _build_vlm_generator(tokenizer): skyrl_gym_cfg=env_cfg, inference_engine_client=mock_client, tokenizer=tokenizer, - policy_model_name="mock-model", ) return generator