|
| 1 | +import argparse |
| 2 | + |
| 3 | +from llama_cpp.server.cli import add_args_from_model, parse_model_from_args |
| 4 | +import llama_cpp.server.model as server_model |
| 5 | +from llama_cpp.server.settings import ModelSettings |
| 6 | + |
| 7 | + |
| 8 | +def test_cli_parses_chat_template_kwargs_json(): |
| 9 | + parser = argparse.ArgumentParser() |
| 10 | + add_args_from_model(parser, ModelSettings) |
| 11 | + |
| 12 | + args = parser.parse_args( |
| 13 | + [ |
| 14 | + "--model", |
| 15 | + "test.gguf", |
| 16 | + "--chat_template_kwargs", |
| 17 | + '{"enable_thinking": true, "template_mode": "extended"}', |
| 18 | + ] |
| 19 | + ) |
| 20 | + settings = parse_model_from_args(ModelSettings, args) |
| 21 | + |
| 22 | + assert settings.chat_template_kwargs == { |
| 23 | + "enable_thinking": True, |
| 24 | + "template_mode": "extended", |
| 25 | + } |
| 26 | + |
| 27 | + |
| 28 | +def test_load_llama_from_model_settings_merges_chat_template_kwargs(monkeypatch): |
| 29 | + captured = {} |
| 30 | + |
| 31 | + def base_handler(*args, **kwargs): |
| 32 | + captured["args"] = args |
| 33 | + captured["kwargs"] = kwargs |
| 34 | + return "ok" |
| 35 | + |
| 36 | + class FakeLlama: |
| 37 | + def __init__(self, **kwargs): |
| 38 | + self.chat_handler = kwargs["chat_handler"] |
| 39 | + self.chat_format = kwargs["chat_format"] |
| 40 | + self._chat_handlers = {} |
| 41 | + |
| 42 | + def set_cache(self, cache): |
| 43 | + raise AssertionError("cache should not be set in this test") |
| 44 | + |
| 45 | + monkeypatch.setattr(server_model.llama_cpp, "Llama", FakeLlama) |
| 46 | + monkeypatch.setattr( |
| 47 | + server_model.llama_cpp.llama_chat_format, |
| 48 | + "get_chat_completion_handler", |
| 49 | + lambda chat_format: base_handler, |
| 50 | + ) |
| 51 | + |
| 52 | + model = server_model.LlamaProxy.load_llama_from_model_settings( |
| 53 | + ModelSettings( |
| 54 | + model="test.gguf", |
| 55 | + chat_format="chatml", |
| 56 | + chat_template_kwargs={ |
| 57 | + "enable_thinking": True, |
| 58 | + "template_mode": "default", |
| 59 | + }, |
| 60 | + ) |
| 61 | + ) |
| 62 | + |
| 63 | + result = model.chat_handler(template_mode="override", extra_flag="x") |
| 64 | + |
| 65 | + assert result == "ok" |
| 66 | + assert captured["kwargs"] == { |
| 67 | + "enable_thinking": True, |
| 68 | + "template_mode": "override", |
| 69 | + "extra_flag": "x", |
| 70 | + } |
0 commit comments