Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
2eabff0
Check
atobiszei Jan 16, 2025
829257b
Add config for queue
atobiszei Feb 17, 2026
5f96c66
Checkpoint - switchable queue
atobiszei Feb 18, 2026
3fb09d9
Checkpoint
atobiszei Feb 19, 2026
0ae35e1
Streaming with queue
atobiszei Feb 20, 2026
e61a174
All gtest tests pass
atobiszei Feb 23, 2026
662a8f1
Style fixes
atobiszei Feb 23, 2026
002dc9b
Fix windows
atobiszei Feb 23, 2026
468abd4
Fix windows test
atobiszei Feb 24, 2026
ecd8f52
Pool default env variable switch
atobiszei Mar 5, 2026
aaa69ef
Merge main
atobiszei Mar 5, 2026
f895c20
Pool configure
atobiszei Mar 9, 2026
b51a8d4
Rename variable
atobiszei Mar 9, 2026
d83d0c0
Cleanup debug logs in tests, remove GTEST_SKIP from InferWithParams, …
atobiszei Mar 20, 2026
f34ddda
Review fixes: ObserverHolder const-map pattern, unique_ptr for graph,…
atobiszei Mar 25, 2026
3d1a6b7
Fix clang-format: double space before inline comments
atobiszei Mar 26, 2026
11d4631
Merge remote-tracking branch 'origin/main' into atobisze_check_graph_…
atobiszei Apr 13, 2026
82ea59c
Apply clang-format to side_packet_builder
atobiszei Apr 27, 2026
836b97f
Rename OVMS_GRAPH_QUEUE_SIZE to OVMS_GRAPH_QUEUE_MAX_SIZE, 0 disables…
atobiszei May 12, 2026
2513a4b
Merge remote-tracking branch 'origin/main' into atobisze_check_graph_…
atobiszei May 12, 2026
c1dddbc
Fix missing graph_export dependency in mediapipegraphconfig BUILD rule
atobiszei May 12, 2026
3060a7f
Self-review: remove unused constants, privatize sidePacketMaps, decou…
atobiszei May 13, 2026
9ba2b72
Address Copilot review comments (round 1 & 2)
atobiszei May 13, 2026
608ee4a
Fix broken promise crash in retire(): drain in-flight requests before…
atobiszei May 13, 2026
121c53d
Reject PythonExecutorCalculator with LOOPBACK when graph queue is ena…
atobiszei May 19, 2026
1735942
PR #3999 review fixes: image gen AUTO, OVMS_GRAPH_QUEUE_OFF env var k…
atobiszei May 20, 2026
105434e
Merge remote-tracking branch 'origin/main' into atobisze_check_graph_…
atobiszei May 20, 2026
e60bff6
Fix GraphCreationTest: image gen queue directive is now AUTO
atobiszei May 21, 2026
e7d032f
Add OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO to export_model.py templates
atobiszei May 21, 2026
61bb6d1
Update docs
atobiszei May 21, 2026
a7155f7
Merge main into atobisze_check_graph_pool_2026
atobiszei May 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions common_settings.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,6 @@ COMMON_STATIC_TEST_COPTS = select({
"-Wall",
"-Wno-unknown-pragmas",
"-Werror",
# ov::Tensor::data method call results in deprecated warning and we use it in multiple places
"-Wno-deprecated-declarations",
Comment thread
dkalinowski marked this conversation as resolved.
"-Isrc",
"-fconcepts", # for gmock related utils
"-fvisibility=hidden",# Needed for pybind targets
Expand Down
4 changes: 4 additions & 0 deletions demos/benchmark/v3/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,4 +438,8 @@ async def limited_request_func(request_func_input, pbar):
print(f"Throughput - Tokens per second: {num_tokens / benchmark_results['duration']:^,.1f}")
print(f"Mean latency: {np.mean(benchmark_results['latencies'])*1000:.2f} ms")
print(f"Median latency: {np.median(benchmark_results['latencies'])*1000:.2f} ms")
# add printing 10 percentiles of latency to better understand latency distribution
percentiles = [10, 25, 50, 75, 90, 95, 99]
for p in percentiles:
print(f"{p}th percentile latency: {np.percentile(benchmark_results['latencies'], p)*1000:.2f} ms")
print(f"Average document length: {num_tokens / len(docs)} tokens")
14 changes: 8 additions & 6 deletions demos/common/export_models/export_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def add_common_arguments(parser):
parser_speech2text.add_argument('--enable_word_timestamps', default=False, action='store_true', help='Load model with word timestamps support.', dest='enable_word_timestamps')
args = vars(parser.parse_args())

t2s_graph_template = """
t2s_graph_template = """# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO
input_stream: "HTTP_REQUEST_PAYLOAD:input"
output_stream: "HTTP_RESPONSE_PAYLOAD:output"
node {
Expand All @@ -129,7 +129,7 @@ def add_common_arguments(parser):
}
"""

s2t_graph_template = """
s2t_graph_template = """# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO
input_stream: "HTTP_REQUEST_PAYLOAD:input"
output_stream: "HTTP_RESPONSE_PAYLOAD:output"
node {
Expand Down Expand Up @@ -165,7 +165,7 @@ def add_common_arguments(parser):
}
"""

embedding_graph_ov_template = """
embedding_graph_ov_template = """# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO
input_stream: "REQUEST_PAYLOAD:input"
output_stream: "RESPONSE_PAYLOAD:output"
node {
Expand All @@ -189,7 +189,7 @@ def add_common_arguments(parser):
}
"""

rerank_graph_ov_template = """
rerank_graph_ov_template = """# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO
input_stream: "REQUEST_PAYLOAD:input"
output_stream: "RESPONSE_PAYLOAD:output"
node {
Expand All @@ -208,7 +208,8 @@ def add_common_arguments(parser):
}
"""

text_generation_graph_template = """input_stream: "HTTP_REQUEST_PAYLOAD:input"
text_generation_graph_template = """# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO
input_stream: "HTTP_REQUEST_PAYLOAD:input"
output_stream: "HTTP_RESPONSE_PAYLOAD:output"

node: {
Expand Down Expand Up @@ -262,7 +263,8 @@ def add_common_arguments(parser):
}
}"""

image_generation_graph_template = """input_stream: "HTTP_REQUEST_PAYLOAD:input"
image_generation_graph_template = """# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO
input_stream: "HTTP_REQUEST_PAYLOAD:input"
output_stream: "HTTP_RESPONSE_PAYLOAD:output"

node: {
Expand Down
47 changes: 47 additions & 0 deletions docs/mediapipe.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,53 @@ Nodes in the MediaPipe graphs can reference both the models configured in model_

Subconfig file may only contain *model_config_list* section - in the same format as in [models config file](starting_server.md).

### Graph Pool (Pre-initialized Graph Queue)

OpenVINO Model Server can pre-initialize a pool of MediaPipe `CalculatorGraph` instances for a graph definition. Graphs in the pool are started once during server initialization and reused across inference requests, eliminating per-request graph initialization and teardown overhead. This is especially beneficial for graphs that involve expensive setup, done in calculators `Open()` method.

#### How it works

Without graph pool, each incoming request creates a new `CalculatorGraph`, calls `StartRun()` with side packets, processes the request, then tears down the graph via `CloseAllPacketSources()` and `WaitUntilDone()`.

With graph pool enabled, a fixed number of graphs are pre-initialized and kept in a queue. When a request arrives, an idle graph is acquired from the queue. After processing, the graph is returned to the queue for the next request. The graph is never torn down — instead, `WaitUntilIdle()` is called between requests and the internal timestamp is incremented.

#### Configuration

The graph pool size is controlled via a comment directive in the graph `.pbtxt` file:

```
# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO
```

| Value | Behavior |
|:------|:---------|
| `AUTO` | Pool size is set to the number of hardware threads (`std::thread::hardware_concurrency()`), or 16 if detection fails |
| Positive integer (e.g. `4`) | Pool size set to the given value (must not exceed hardware thread count) |
| `0` | Graph pool disabled — falls back to per-request graph creation |
| *(directive absent)* | Default: graph pool is disabled |

**Default behavior:** graph pool stays disabled unless `OVMS_GRAPH_QUEUE_MAX_SIZE` is explicitly present in `graph.pbtxt`. Since the OVMS CLI graph exporter (`--pull --task`) always emits this directive, **graphs created via the CLI exporter have the pool enabled by default**.

**Generated graphs from exporters:**
- OVMS `--task ...` graph export emits `# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO` for all graph types.
- `demos/common/export_models/export_model.py` also emits `# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO` for all graph types.

**Runtime kill-switch:**
Setting the environment variable `OVMS_GRAPH_QUEUE_OFF=1` globally disables graph pools at runtime, regardless of the directive in `graph.pbtxt`.

#### Important considerations for graph developers

**Stateful calculators:**
Since graphs in the pool are reused across requests, any state held by a calculator between `Process()` calls will persist across requests. If your calculator accumulates state (e.g. counters, buffers, history), that state will carry over to the next request that reuses the same graph instance. Design your calculators to either:
- Be stateless (reset any per-request state at the beginning of each `Process()` call), or
- Explicitly handle the fact that the graph may have already processed prior requests.

**Input side packets from requests are not supported:**
When graph pool is enabled, side packets are set once at pool construction time and cannot be overridden per request. If a client sends request parameters that would normally become input side packets (e.g. KServe request parameters other than `OVMS_MP_TIMESTAMP`), the request will be rejected with an error. If your graph relies on per-request side packets to configure calculator behavior, either disable the graph pool (`# OVMS_GRAPH_QUEUE_MAX_SIZE: 0`) or redesign the graph to accept such parameters as regular input stream packets instead of side packets.

**Python generative nodes (LOOPBACK) are not compatible with graph pool:**
Python nodes using generative mode (`execute` that `yield`s) rely on per-calculator state (`pyIteratorPtr`) that persists across `Process()` calls within a single request. With graph pool enabled, if a generator does not fully complete (e.g. client disconnects mid-stream), the stale iterator remains on the reused graph instance and subsequent requests will fail. Only Python nodes using regular mode (stateless `execute` that `return`s a list) are safe to use with graph pool.


## Deployment testing
### Debug logs
Expand Down
2 changes: 2 additions & 0 deletions docs/python_support/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,8 @@ node {

When using generative mode, the `execute` method in [`OvmsPythonModel`](#ovmspythonmodel-class) class must `yield` value.

> **Note:** Generative mode is not compatible with the [graph pool](../mediapipe.md#graph-pool-scalability-feature). Do not add `# OVMS_GRAPH_QUEUE_MAX_SIZE` directive to graphs that use generative Python nodes with LOOPBACK.

```python
from pyovms import Tensor
...
Expand Down
8 changes: 8 additions & 0 deletions src/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2432,8 +2432,10 @@ cc_test(
"test/configs/config_dummy_dynamic_shape.json",
"test/configs/emptyConfigWithMetrics.json",
"test/llm/config.json",
"test/llm/config_queue.json",
"test/llm/assisted_decoding_config.json",
"test/llm/lm_cb_regular.pbtxt",
"test/llm/lm_cb_regular_queue.pbtxt",
"test/llm/lm_cb_with_tool_parser.pbtxt",
"test/llm/lm_legacy_regular.pbtxt",
"test/llm/lm_cb_speculative.pbtxt",
Expand All @@ -2454,6 +2456,7 @@ cc_test(
"test/increment_1x3x4x5/1/increment_1x3x4x5.xml",
"test/increment_1x3x4x5/1/increment_1x3x4x5.bin",
"test/mediapipe/config_mediapipe_openai_chat_completions_mock.json",
"test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json",
"test/mediapipe/config_mediapipe_add_adapter_full.json",
"test/mediapipe/config_mediapipe_all_graphs_adapter_full.json",
"test/mediapipe/config_mediapipe_dummy_adapter_full_dag.json",
Expand Down Expand Up @@ -2483,6 +2486,10 @@ cc_test(
"test/mediapipe/graphscalar.pbtxt",
"test/mediapipe/graphWithParams.pbtxt",
"test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt",
"test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt",
"test/mediapipe/graph_gpt_with_queue.pbtxt",
"test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt",
"test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt",
"test/mediapipe/graphadapterfull_two_outputs_dag.pbtxt",
"test/mediapipe/graphdummyadapterfull_two_outputs.pbtxt",
"test/mediapipe/graph_multipart.pbtxt",
Expand Down Expand Up @@ -2905,6 +2912,7 @@ cc_library(
":test_test_with_temp_dir",
"//src/graph_export:graph_export",
"//src:libovms_server_settings",
"//src:libovms_systeminfo",
"@com_google_googletest//:gtest",
],
local_defines = COMMON_LOCAL_DEFINES,
Expand Down
1 change: 1 addition & 0 deletions src/capi_frontend/server_settings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ struct ExportSettings {
std::string modelName = "";
std::string modelPath = "./";
std::string targetDevice = "CPU";
std::optional<uint32_t> restWorkers;
std::optional<std::string> extraQuantizationParams;
std::optional<std::string> vocoder;
std::string precision = "int8";
Expand Down
1 change: 1 addition & 0 deletions src/cli_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,7 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl&
hfSettings.exportSettings.extraQuantizationParams = result->operator[]("extra_quantization_params").as<std::string>();
if (result->count("vocoder"))
hfSettings.exportSettings.vocoder = result->operator[]("vocoder").as<std::string>();
hfSettings.exportSettings.restWorkers = serverSettings.restWorkers;
hfSettings.downloadPath = result->operator[]("model_repository_path").as<std::string>();
// When --task is used with --model_path but without --pull/--source_model,
// use model_path as the model location (no HF download needed)
Expand Down
1 change: 1 addition & 0 deletions src/graph_export/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ ovms_cc_library(
"@ovms//src:libovms_module",
"@ovms//src/filesystem:libovmsfilesystem",
"@ovms//src/filesystem:libovmslocalfilesystem",
"@ovms//src:libovms_systeminfo",
"@com_github_tencent_rapidjson//:rapidjson",
"@ovms//src:libovmsschema",
"@ovms//src:libovms_version",
Expand Down
21 changes: 15 additions & 6 deletions src/graph_export/graph_export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ void GraphExport::clearInMemoryGraphContent() {
}

static const std::string OVMS_VERSION_GRAPH_LINE = std::string("# File created with: ") + PROJECT_NAME + std::string(" ") + PROJECT_VERSION + std::string("\n");
static const std::string OVMS_GRAPH_QUEUE_MAX_SIZE_LINE_PREFIX = "# OVMS_GRAPH_QUEUE_MAX_SIZE: ";
Comment thread
atobiszei marked this conversation as resolved.
static const std::string OVMS_GRAPH_QUEUE_SIZE_AUTO = "AUTO";

static std::string buildGraphHeader() {
std::ostringstream oss;
oss << OVMS_VERSION_GRAPH_LINE;
oss << OVMS_GRAPH_QUEUE_MAX_SIZE_LINE_PREFIX << OVMS_GRAPH_QUEUE_SIZE_AUTO << "\n";
return oss.str();
}

static std::string constructModelsPath(const std::string& modelPath, const std::optional<std::string>& ggufFilenameOpt) {
std::string modelsPath;
Expand Down Expand Up @@ -134,7 +143,7 @@ static Status createTextGenerationGraphTemplate(const std::string& directoryPath
auto& exportSettings = hfSettings.exportSettings;

std::ostringstream oss;
oss << OVMS_VERSION_GRAPH_LINE;
oss << buildGraphHeader();
std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);
Expand Down Expand Up @@ -229,7 +238,7 @@ static Status createRerankGraphTemplate(const std::string& directoryPath, const
auto& exportSettings = hfSettings.exportSettings;

std::ostringstream oss;
oss << OVMS_VERSION_GRAPH_LINE;
oss << buildGraphHeader();
// Windows path creation - graph parser needs forward slashes in paths
std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
Expand Down Expand Up @@ -273,7 +282,7 @@ static Status createEmbeddingsGraphTemplate(const std::string& directoryPath, co
auto& exportSettings = hfSettings.exportSettings;

std::ostringstream oss;
oss << OVMS_VERSION_GRAPH_LINE;
oss << buildGraphHeader();
std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);
Expand Down Expand Up @@ -319,7 +328,7 @@ static Status createTextToSpeechGraphTemplate(const std::string& directoryPath,
auto& exportSettings = hfSettings.exportSettings;

std::ostringstream oss;
oss << OVMS_VERSION_GRAPH_LINE;
oss << buildGraphHeader();
std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);
Expand Down Expand Up @@ -374,7 +383,7 @@ static Status createSpeechToTextGraphTemplate(const std::string& directoryPath,
auto& exportSettings = hfSettings.exportSettings;

std::ostringstream oss;
oss << OVMS_VERSION_GRAPH_LINE;
oss << buildGraphHeader();
std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);
Expand Down Expand Up @@ -448,7 +457,7 @@ static Status createImageGenerationGraphTemplate(const std::string& directoryPat
GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);

std::ostringstream oss;
oss << OVMS_VERSION_GRAPH_LINE;
oss << buildGraphHeader();
// clang-format off
oss << R"(
input_stream: "HTTP_REQUEST_PAYLOAD:input"
Expand Down
4 changes: 4 additions & 0 deletions src/http_frontend/http_graph_executor_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ namespace ovms {

static const std::string UNUSED_REQUEST_ID = "";

bool requestHasInputSidePackets(const HttpPayload& request) {
return false;
}

Status deserializeInputSidePacketsFromFirstRequestImpl(
std::map<std::string, mediapipe::Packet>& inputSidePackets, // out
const HttpPayload& request) { // in
Expand Down
2 changes: 2 additions & 0 deletions src/http_frontend/http_graph_executor_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class PythonBackend;

using HttpReaderWriter = HttpAsyncWriter;

bool requestHasInputSidePackets(const HttpPayload& request);

// Deserialization of parameters inside KServe gRPC request
// into mediapipe Packets.
// To be used by both - infer & inferStream.
Expand Down
13 changes: 12 additions & 1 deletion src/kfs_frontend/kfs_graph_executor_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include "../kfs_frontend/kfs_utils.hpp"
#include "../logging.hpp"
#include "../mediapipe_internal/graph_executor_constants.hpp"
#include "../mediapipe_internal/mediapipe_utils.hpp"
#include "../mediapipe_internal/mediapipegraphdefinition.hpp"
#include "../predict_request_validation_utils.hpp"
Expand Down Expand Up @@ -925,6 +926,7 @@ static Status createPacketAndPushIntoGraph(const std::string& name, std::shared_
}
std::unique_ptr<T> inputTensor;
OVMS_RETURN_ON_FAIL(deserializeTensor(name, *request, inputTensor, pythonBackend));
SPDLOG_TRACE("Current Timestamp before actual pushing:{}", timestamp.Value());
MP_RETURN_ON_FAIL(graph.AddPacketToInputStream(
name,
::mediapipe::packet_internal::Create(
Expand Down Expand Up @@ -1152,10 +1154,19 @@ Status createAndPushPacketsImpl(
return StatusCode::OK;
}

bool requestHasInputSidePackets(const KFSRequest& request) {
static const std::string TIMESTAMP_PARAM{"OVMS_MP_TIMESTAMP"};
for (const auto& [name, valueChoice] : request.parameters()) {
if (name != TIMESTAMP_PARAM) {
return true;
}
}
return false;
}

Status deserializeInputSidePacketsFromFirstRequestImpl(
std::map<std::string, mediapipe::Packet>& inputSidePackets,
const KFSRequest& request) {
static const std::string PYTHON_SESSION_SIDE_PACKET_TAG{"py"};
for (const auto& [name, valueChoice] : request.parameters()) {
SPDLOG_DEBUG("Found: {}; parameter in request for: {};", name, request.model_name());
if (name == TIMESTAMP_PARAMETER_NAME) {
Expand Down
4 changes: 4 additions & 0 deletions src/kfs_frontend/kfs_graph_executor_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ namespace ovms {
class PythonBackend;
class Status;

// Checks whether the request contains user-provided input side packets
// (parameters other than the reserved OVMS_MP_TIMESTAMP).
bool requestHasInputSidePackets(const KFSRequest& request);

// Deserialization of parameters inside KServe gRPC request
// into mediapipe Packets.
// To be used by both - infer & inferStream.
Expand Down
14 changes: 14 additions & 0 deletions src/llm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ ovms_cc_library(
"//third_party:openvino",
"@mediapipe//mediapipe/framework:calculator_framework",
"@com_github_tencent_rapidjson//:rapidjson",
"//src/mediapipe_internal:graph_side_packets",
"//src/kfserving_api:kfserving_api_cpp",
"//src:libovmsprofiler",
":genai_servables",
Expand Down Expand Up @@ -350,6 +351,19 @@ ovms_cc_library(
additional_copts = COPTS_PYTHON
)

ovms_cc_library(
name = "execution_context_utils",
hdrs = ["execution_context_utils.hpp"],
srcs = ["execution_context_utils.cpp"],
deps = [
":genai_servables",
"//src/mediapipe_internal:graph_side_packets",
"//src:libovmslogging",
"//src:libovmsstatus",
],
visibility = ["//visibility:public"],
)

ovms_cc_library(
name = "py_jinja_template_processor",
hdrs = ["py_jinja_template_processor.hpp"],
Expand Down
Loading