diff --git a/common_settings.bzl b/common_settings.bzl index 3faad26bb5..4773a175aa 100644 --- a/common_settings.bzl +++ b/common_settings.bzl @@ -209,8 +209,6 @@ COMMON_STATIC_TEST_COPTS = select({ "-Wall", "-Wno-unknown-pragmas", "-Werror", - # ov::Tensor::data method call results in deprecated warning and we use it in multiple places - "-Wno-deprecated-declarations", "-Isrc", "-fconcepts", # for gmock related utils "-fvisibility=hidden",# Needed for pybind targets diff --git a/demos/benchmark/v3/benchmark.py b/demos/benchmark/v3/benchmark.py index eec806da09..88c9aa8f18 100644 --- a/demos/benchmark/v3/benchmark.py +++ b/demos/benchmark/v3/benchmark.py @@ -438,4 +438,8 @@ async def limited_request_func(request_func_input, pbar): print(f"Throughput - Tokens per second: {num_tokens / benchmark_results['duration']:^,.1f}") print(f"Mean latency: {np.mean(benchmark_results['latencies'])*1000:.2f} ms") print(f"Median latency: {np.median(benchmark_results['latencies'])*1000:.2f} ms") +# add printing 10 percentiles of latency to better understand latency distribution +percentiles = [10, 25, 50, 75, 90, 95, 99] +for p in percentiles: + print(f"{p}th percentile latency: {np.percentile(benchmark_results['latencies'], p)*1000:.2f} ms") print(f"Average document length: {num_tokens / len(docs)} tokens") diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py index a1e109ab61..75e4b1164c 100644 --- a/demos/common/export_models/export_model.py +++ b/demos/common/export_models/export_model.py @@ -103,7 +103,7 @@ def add_common_arguments(parser): parser_speech2text.add_argument('--enable_word_timestamps', default=False, action='store_true', help='Load model with word timestamps support.', dest='enable_word_timestamps') args = vars(parser.parse_args()) -t2s_graph_template = """ +t2s_graph_template = """# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO input_stream: "HTTP_REQUEST_PAYLOAD:input" output_stream: "HTTP_RESPONSE_PAYLOAD:output" node { @@ -129,7 +129,7 @@ def add_common_arguments(parser): } """ -s2t_graph_template = """ +s2t_graph_template = """# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO input_stream: "HTTP_REQUEST_PAYLOAD:input" output_stream: "HTTP_RESPONSE_PAYLOAD:output" node { @@ -165,7 +165,7 @@ def add_common_arguments(parser): } """ -embedding_graph_ov_template = """ +embedding_graph_ov_template = """# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO input_stream: "REQUEST_PAYLOAD:input" output_stream: "RESPONSE_PAYLOAD:output" node { @@ -189,7 +189,7 @@ def add_common_arguments(parser): } """ -rerank_graph_ov_template = """ +rerank_graph_ov_template = """# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO input_stream: "REQUEST_PAYLOAD:input" output_stream: "RESPONSE_PAYLOAD:output" node { @@ -208,7 +208,8 @@ def add_common_arguments(parser): } """ -text_generation_graph_template = """input_stream: "HTTP_REQUEST_PAYLOAD:input" +text_generation_graph_template = """# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO +input_stream: "HTTP_REQUEST_PAYLOAD:input" output_stream: "HTTP_RESPONSE_PAYLOAD:output" node: { @@ -262,7 +263,8 @@ def add_common_arguments(parser): } }""" -image_generation_graph_template = """input_stream: "HTTP_REQUEST_PAYLOAD:input" +image_generation_graph_template = """# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO +input_stream: "HTTP_REQUEST_PAYLOAD:input" output_stream: "HTTP_RESPONSE_PAYLOAD:output" node: { diff --git a/docs/mediapipe.md b/docs/mediapipe.md index 73f0eb1f15..94cd0fac7c 100644 --- a/docs/mediapipe.md +++ b/docs/mediapipe.md @@ -215,6 +215,53 @@ Nodes in the MediaPipe graphs can reference both the models configured in model_ Subconfig file may only contain *model_config_list* section - in the same format as in [models config file](starting_server.md). +### Graph Pool (Pre-initialized Graph Queue) + +OpenVINO Model Server can pre-initialize a pool of MediaPipe `CalculatorGraph` instances for a graph definition. Graphs in the pool are started once during server initialization and reused across inference requests, eliminating per-request graph initialization and teardown overhead. This is especially beneficial for graphs that involve expensive setup, done in calculators `Open()` method. + +#### How it works + +Without graph pool, each incoming request creates a new `CalculatorGraph`, calls `StartRun()` with side packets, processes the request, then tears down the graph via `CloseAllPacketSources()` and `WaitUntilDone()`. + +With graph pool enabled, a fixed number of graphs are pre-initialized and kept in a queue. When a request arrives, an idle graph is acquired from the queue. After processing, the graph is returned to the queue for the next request. The graph is never torn down — instead, `WaitUntilIdle()` is called between requests and the internal timestamp is incremented. + +#### Configuration + +The graph pool size is controlled via a comment directive in the graph `.pbtxt` file: + +``` +# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO +``` + +| Value | Behavior | +|:------|:---------| +| `AUTO` | Pool size is set to the number of hardware threads (`std::thread::hardware_concurrency()`), or 16 if detection fails | +| Positive integer (e.g. `4`) | Pool size set to the given value (must not exceed hardware thread count) | +| `0` | Graph pool disabled — falls back to per-request graph creation | +| *(directive absent)* | Default: graph pool is disabled | + +**Default behavior:** graph pool stays disabled unless `OVMS_GRAPH_QUEUE_MAX_SIZE` is explicitly present in `graph.pbtxt`. Since the OVMS CLI graph exporter (`--pull --task`) always emits this directive, **graphs created via the CLI exporter have the pool enabled by default**. + +**Generated graphs from exporters:** +- OVMS `--task ...` graph export emits `# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO` for all graph types. +- `demos/common/export_models/export_model.py` also emits `# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO` for all graph types. + +**Runtime kill-switch:** +Setting the environment variable `OVMS_GRAPH_QUEUE_OFF=1` globally disables graph pools at runtime, regardless of the directive in `graph.pbtxt`. + +#### Important considerations for graph developers + +**Stateful calculators:** +Since graphs in the pool are reused across requests, any state held by a calculator between `Process()` calls will persist across requests. If your calculator accumulates state (e.g. counters, buffers, history), that state will carry over to the next request that reuses the same graph instance. Design your calculators to either: +- Be stateless (reset any per-request state at the beginning of each `Process()` call), or +- Explicitly handle the fact that the graph may have already processed prior requests. + +**Input side packets from requests are not supported:** +When graph pool is enabled, side packets are set once at pool construction time and cannot be overridden per request. If a client sends request parameters that would normally become input side packets (e.g. KServe request parameters other than `OVMS_MP_TIMESTAMP`), the request will be rejected with an error. If your graph relies on per-request side packets to configure calculator behavior, either disable the graph pool (`# OVMS_GRAPH_QUEUE_MAX_SIZE: 0`) or redesign the graph to accept such parameters as regular input stream packets instead of side packets. + +**Python generative nodes (LOOPBACK) are not compatible with graph pool:** +Python nodes using generative mode (`execute` that `yield`s) rely on per-calculator state (`pyIteratorPtr`) that persists across `Process()` calls within a single request. With graph pool enabled, if a generator does not fully complete (e.g. client disconnects mid-stream), the stale iterator remains on the reused graph instance and subsequent requests will fail. Only Python nodes using regular mode (stateless `execute` that `return`s a list) are safe to use with graph pool. + ## Deployment testing ### Debug logs diff --git a/docs/python_support/reference.md b/docs/python_support/reference.md index 7bb5e82a2d..9c5ae94000 100644 --- a/docs/python_support/reference.md +++ b/docs/python_support/reference.md @@ -749,6 +749,8 @@ node { When using generative mode, the `execute` method in [`OvmsPythonModel`](#ovmspythonmodel-class) class must `yield` value. +> **Note:** Generative mode is not compatible with the [graph pool](../mediapipe.md#graph-pool-scalability-feature). Do not add `# OVMS_GRAPH_QUEUE_MAX_SIZE` directive to graphs that use generative Python nodes with LOOPBACK. + ```python from pyovms import Tensor ... diff --git a/src/BUILD b/src/BUILD index 0bac2f29bd..2979598959 100644 --- a/src/BUILD +++ b/src/BUILD @@ -2432,8 +2432,10 @@ cc_test( "test/configs/config_dummy_dynamic_shape.json", "test/configs/emptyConfigWithMetrics.json", "test/llm/config.json", + "test/llm/config_queue.json", "test/llm/assisted_decoding_config.json", "test/llm/lm_cb_regular.pbtxt", + "test/llm/lm_cb_regular_queue.pbtxt", "test/llm/lm_cb_with_tool_parser.pbtxt", "test/llm/lm_legacy_regular.pbtxt", "test/llm/lm_cb_speculative.pbtxt", @@ -2454,6 +2456,7 @@ cc_test( "test/increment_1x3x4x5/1/increment_1x3x4x5.xml", "test/increment_1x3x4x5/1/increment_1x3x4x5.bin", "test/mediapipe/config_mediapipe_openai_chat_completions_mock.json", + "test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json", "test/mediapipe/config_mediapipe_add_adapter_full.json", "test/mediapipe/config_mediapipe_all_graphs_adapter_full.json", "test/mediapipe/config_mediapipe_dummy_adapter_full_dag.json", @@ -2483,6 +2486,10 @@ cc_test( "test/mediapipe/graphscalar.pbtxt", "test/mediapipe/graphWithParams.pbtxt", "test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt", + "test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt", + "test/mediapipe/graph_gpt_with_queue.pbtxt", + "test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt", + "test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt", "test/mediapipe/graphadapterfull_two_outputs_dag.pbtxt", "test/mediapipe/graphdummyadapterfull_two_outputs.pbtxt", "test/mediapipe/graph_multipart.pbtxt", @@ -2905,6 +2912,7 @@ cc_library( ":test_test_with_temp_dir", "//src/graph_export:graph_export", "//src:libovms_server_settings", + "//src:libovms_systeminfo", "@com_google_googletest//:gtest", ], local_defines = COMMON_LOCAL_DEFINES, diff --git a/src/capi_frontend/server_settings.hpp b/src/capi_frontend/server_settings.hpp index 9d906f1e38..ca6421beff 100644 --- a/src/capi_frontend/server_settings.hpp +++ b/src/capi_frontend/server_settings.hpp @@ -191,6 +191,7 @@ struct ExportSettings { std::string modelName = ""; std::string modelPath = "./"; std::string targetDevice = "CPU"; + std::optional restWorkers; std::optional extraQuantizationParams; std::optional vocoder; std::string precision = "int8"; diff --git a/src/cli_parser.cpp b/src/cli_parser.cpp index 5bc0a6c9df..e6f6365c26 100644 --- a/src/cli_parser.cpp +++ b/src/cli_parser.cpp @@ -757,6 +757,7 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl& hfSettings.exportSettings.extraQuantizationParams = result->operator[]("extra_quantization_params").as(); if (result->count("vocoder")) hfSettings.exportSettings.vocoder = result->operator[]("vocoder").as(); + hfSettings.exportSettings.restWorkers = serverSettings.restWorkers; hfSettings.downloadPath = result->operator[]("model_repository_path").as(); // When --task is used with --model_path but without --pull/--source_model, // use model_path as the model location (no HF download needed) diff --git a/src/graph_export/BUILD b/src/graph_export/BUILD index 58f286f8cf..0630805c53 100644 --- a/src/graph_export/BUILD +++ b/src/graph_export/BUILD @@ -29,6 +29,7 @@ ovms_cc_library( "@ovms//src:libovms_module", "@ovms//src/filesystem:libovmsfilesystem", "@ovms//src/filesystem:libovmslocalfilesystem", + "@ovms//src:libovms_systeminfo", "@com_github_tencent_rapidjson//:rapidjson", "@ovms//src:libovmsschema", "@ovms//src:libovms_version", diff --git a/src/graph_export/graph_export.cpp b/src/graph_export/graph_export.cpp index d1cd30b7b8..cb77a5e8f9 100644 --- a/src/graph_export/graph_export.cpp +++ b/src/graph_export/graph_export.cpp @@ -67,6 +67,15 @@ void GraphExport::clearInMemoryGraphContent() { } static const std::string OVMS_VERSION_GRAPH_LINE = std::string("# File created with: ") + PROJECT_NAME + std::string(" ") + PROJECT_VERSION + std::string("\n"); +static const std::string OVMS_GRAPH_QUEUE_MAX_SIZE_LINE_PREFIX = "# OVMS_GRAPH_QUEUE_MAX_SIZE: "; +static const std::string OVMS_GRAPH_QUEUE_SIZE_AUTO = "AUTO"; + +static std::string buildGraphHeader() { + std::ostringstream oss; + oss << OVMS_VERSION_GRAPH_LINE; + oss << OVMS_GRAPH_QUEUE_MAX_SIZE_LINE_PREFIX << OVMS_GRAPH_QUEUE_SIZE_AUTO << "\n"; + return oss.str(); +} static std::string constructModelsPath(const std::string& modelPath, const std::optional& ggufFilenameOpt) { std::string modelsPath; @@ -134,7 +143,7 @@ static Status createTextGenerationGraphTemplate(const std::string& directoryPath auto& exportSettings = hfSettings.exportSettings; std::ostringstream oss; - oss << OVMS_VERSION_GRAPH_LINE; + oss << buildGraphHeader(); std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename); SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt")); GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings); @@ -229,7 +238,7 @@ static Status createRerankGraphTemplate(const std::string& directoryPath, const auto& exportSettings = hfSettings.exportSettings; std::ostringstream oss; - oss << OVMS_VERSION_GRAPH_LINE; + oss << buildGraphHeader(); // Windows path creation - graph parser needs forward slashes in paths std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename); SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt")); @@ -273,7 +282,7 @@ static Status createEmbeddingsGraphTemplate(const std::string& directoryPath, co auto& exportSettings = hfSettings.exportSettings; std::ostringstream oss; - oss << OVMS_VERSION_GRAPH_LINE; + oss << buildGraphHeader(); std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename); SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt")); GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings); @@ -319,7 +328,7 @@ static Status createTextToSpeechGraphTemplate(const std::string& directoryPath, auto& exportSettings = hfSettings.exportSettings; std::ostringstream oss; - oss << OVMS_VERSION_GRAPH_LINE; + oss << buildGraphHeader(); std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename); SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt")); GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings); @@ -374,7 +383,7 @@ static Status createSpeechToTextGraphTemplate(const std::string& directoryPath, auto& exportSettings = hfSettings.exportSettings; std::ostringstream oss; - oss << OVMS_VERSION_GRAPH_LINE; + oss << buildGraphHeader(); std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename); SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt")); GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings); @@ -448,7 +457,7 @@ static Status createImageGenerationGraphTemplate(const std::string& directoryPat GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings); std::ostringstream oss; - oss << OVMS_VERSION_GRAPH_LINE; + oss << buildGraphHeader(); // clang-format off oss << R"( input_stream: "HTTP_REQUEST_PAYLOAD:input" diff --git a/src/http_frontend/http_graph_executor_impl.cpp b/src/http_frontend/http_graph_executor_impl.cpp index b970f62594..4848f3760a 100644 --- a/src/http_frontend/http_graph_executor_impl.cpp +++ b/src/http_frontend/http_graph_executor_impl.cpp @@ -38,6 +38,10 @@ namespace ovms { static const std::string UNUSED_REQUEST_ID = ""; +bool requestHasInputSidePackets(const HttpPayload& request) { + return false; +} + Status deserializeInputSidePacketsFromFirstRequestImpl( std::map& inputSidePackets, // out const HttpPayload& request) { // in diff --git a/src/http_frontend/http_graph_executor_impl.hpp b/src/http_frontend/http_graph_executor_impl.hpp index 9846b10158..620a6736ab 100644 --- a/src/http_frontend/http_graph_executor_impl.hpp +++ b/src/http_frontend/http_graph_executor_impl.hpp @@ -48,6 +48,8 @@ class PythonBackend; using HttpReaderWriter = HttpAsyncWriter; +bool requestHasInputSidePackets(const HttpPayload& request); + // Deserialization of parameters inside KServe gRPC request // into mediapipe Packets. // To be used by both - infer & inferStream. diff --git a/src/kfs_frontend/kfs_graph_executor_impl.cpp b/src/kfs_frontend/kfs_graph_executor_impl.cpp index 034f6f0907..b5033501d9 100644 --- a/src/kfs_frontend/kfs_graph_executor_impl.cpp +++ b/src/kfs_frontend/kfs_graph_executor_impl.cpp @@ -24,6 +24,7 @@ #include "../kfs_frontend/kfs_utils.hpp" #include "../logging.hpp" +#include "../mediapipe_internal/graph_executor_constants.hpp" #include "../mediapipe_internal/mediapipe_utils.hpp" #include "../mediapipe_internal/mediapipegraphdefinition.hpp" #include "../predict_request_validation_utils.hpp" @@ -925,6 +926,7 @@ static Status createPacketAndPushIntoGraph(const std::string& name, std::shared_ } std::unique_ptr inputTensor; OVMS_RETURN_ON_FAIL(deserializeTensor(name, *request, inputTensor, pythonBackend)); + SPDLOG_TRACE("Current Timestamp before actual pushing:{}", timestamp.Value()); MP_RETURN_ON_FAIL(graph.AddPacketToInputStream( name, ::mediapipe::packet_internal::Create( @@ -1152,10 +1154,19 @@ Status createAndPushPacketsImpl( return StatusCode::OK; } +bool requestHasInputSidePackets(const KFSRequest& request) { + static const std::string TIMESTAMP_PARAM{"OVMS_MP_TIMESTAMP"}; + for (const auto& [name, valueChoice] : request.parameters()) { + if (name != TIMESTAMP_PARAM) { + return true; + } + } + return false; +} + Status deserializeInputSidePacketsFromFirstRequestImpl( std::map& inputSidePackets, const KFSRequest& request) { - static const std::string PYTHON_SESSION_SIDE_PACKET_TAG{"py"}; for (const auto& [name, valueChoice] : request.parameters()) { SPDLOG_DEBUG("Found: {}; parameter in request for: {};", name, request.model_name()); if (name == TIMESTAMP_PARAMETER_NAME) { diff --git a/src/kfs_frontend/kfs_graph_executor_impl.hpp b/src/kfs_frontend/kfs_graph_executor_impl.hpp index cfa65b6a57..1c6e697455 100644 --- a/src/kfs_frontend/kfs_graph_executor_impl.hpp +++ b/src/kfs_frontend/kfs_graph_executor_impl.hpp @@ -36,6 +36,10 @@ namespace ovms { class PythonBackend; class Status; +// Checks whether the request contains user-provided input side packets +// (parameters other than the reserved OVMS_MP_TIMESTAMP). +bool requestHasInputSidePackets(const KFSRequest& request); + // Deserialization of parameters inside KServe gRPC request // into mediapipe Packets. // To be used by both - infer & inferStream. diff --git a/src/llm/BUILD b/src/llm/BUILD index 9cb85d657e..d7e13abd55 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -25,6 +25,7 @@ ovms_cc_library( "//third_party:openvino", "@mediapipe//mediapipe/framework:calculator_framework", "@com_github_tencent_rapidjson//:rapidjson", + "//src/mediapipe_internal:graph_side_packets", "//src/kfserving_api:kfserving_api_cpp", "//src:libovmsprofiler", ":genai_servables", @@ -350,6 +351,19 @@ ovms_cc_library( additional_copts = COPTS_PYTHON ) +ovms_cc_library( + name = "execution_context_utils", + hdrs = ["execution_context_utils.hpp"], + srcs = ["execution_context_utils.cpp"], + deps = [ + ":genai_servables", + "//src/mediapipe_internal:graph_side_packets", + "//src:libovmslogging", + "//src:libovmsstatus", + ], + visibility = ["//visibility:public"], +) + ovms_cc_library( name = "py_jinja_template_processor", hdrs = ["py_jinja_template_processor.hpp"], diff --git a/src/llm/execution_context_utils.cpp b/src/llm/execution_context_utils.cpp new file mode 100644 index 0000000000..96ecbbdb6b --- /dev/null +++ b/src/llm/execution_context_utils.cpp @@ -0,0 +1,51 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "execution_context_utils.hpp" + +#include + +#include "servable.hpp" + +namespace ovms { + +Status initializeLlmExecutionContexts(const GenAiServableMap& servableMap, GenAiExecutionContextMap& executionContextMap) { + for (const auto& [nodeName, servable] : servableMap) { + auto it = executionContextMap.find(nodeName); + if (it == executionContextMap.end() || !it->second) { + SPDLOG_DEBUG("Missing LLM execution context holder for node: {}", nodeName); + return StatusCode::INTERNAL_ERROR; + } + auto& holder = it->second; + auto ctx = servable->createExecutionContext(); + if (!ctx) { + SPDLOG_DEBUG("Failed to create LLM execution context for node: {}", nodeName); + return StatusCode::INTERNAL_ERROR; + } + holder->set(std::move(ctx)); + } + return StatusCode::OK; +} + +void resetLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap) { + for (auto& [_, holder] : executionContextMap) { + if (!holder) { + continue; + } + holder->reset(); + } +} + +} // namespace ovms diff --git a/src/llm/execution_context_utils.hpp b/src/llm/execution_context_utils.hpp new file mode 100644 index 0000000000..198359c659 --- /dev/null +++ b/src/llm/execution_context_utils.hpp @@ -0,0 +1,26 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include "src/mediapipe_internal/graph_side_packets.hpp" +#include "src/status.hpp" + +namespace ovms { + +Status initializeLlmExecutionContexts(const GenAiServableMap& servableMap, GenAiExecutionContextMap& executionContextMap); +void resetLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap); + +} // namespace ovms diff --git a/src/llm/http_llm_calculator.cc b/src/llm/http_llm_calculator.cc index cadce01dd0..8cf63c781f 100644 --- a/src/llm/http_llm_calculator.cc +++ b/src/llm/http_llm_calculator.cc @@ -14,6 +14,7 @@ // limitations under the License. //***************************************************************************** #include +#include #include #pragma warning(push) @@ -27,6 +28,7 @@ #include "../http_payload.hpp" #include "../logging.hpp" +#include "../mediapipe_internal/graph_side_packets.hpp" #include "../profiler.hpp" #include "apis/openai_completions.hpp" #include "servable.hpp" @@ -36,9 +38,11 @@ using namespace ovms; namespace mediapipe { const std::string LLM_SESSION_SIDE_PACKET_TAG = "LLM_NODE_RESOURCES"; +const std::string LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG = "LLM_NODE_EXECUTION_CONTEXTS"; class HttpLLMCalculator : public CalculatorBase { std::shared_ptr servable; + std::shared_ptr executionContextHolder; std::shared_ptr executionContext; static const std::string INPUT_TAG_NAME; @@ -54,6 +58,9 @@ class HttpLLMCalculator : public CalculatorBase { cc->Inputs().Tag(INPUT_TAG_NAME).Set(); cc->Inputs().Tag(LOOPBACK_TAG_NAME).Set(); cc->InputSidePackets().Tag(LLM_SESSION_SIDE_PACKET_TAG).Set(); + if (cc->InputSidePackets().HasTag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG)) { + cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).Set(); + } cc->Outputs().Tag(OUTPUT_TAG_NAME).Set(); cc->Outputs().Tag(LOOPBACK_TAG_NAME).Set(); return absl::OkStatus(); @@ -72,7 +79,17 @@ class HttpLLMCalculator : public CalculatorBase { auto it = servableMap.find(cc->NodeName()); RET_CHECK(it != servableMap.end()) << "Could not find initialized LLM node named: " << cc->NodeName(); this->servable = it->second; - this->executionContext = servable->createExecutionContext(); + + if (cc->InputSidePackets().HasTag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG) && !cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).IsEmpty()) { + ovms::GenAiExecutionContextMap executionContextMap = cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).Get(); + auto contextIt = executionContextMap.find(cc->NodeName()); + RET_CHECK(contextIt != executionContextMap.end()) << "Could not find LLM execution context holder for node named: " << cc->NodeName(); + this->executionContextHolder = contextIt->second; + } + + if (!this->executionContextHolder) { + this->executionContext = servable->createExecutionContext(); + } SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "LLMCalculator [Node: {}] Open end", cc->NodeName()); return absl::OkStatus(); } @@ -94,6 +111,11 @@ class HttpLLMCalculator : public CalculatorBase { OVMS_PROFILE_FUNCTION(); RET_CHECK(this->servable != nullptr); + if (this->executionContextHolder) { + this->executionContext = this->executionContextHolder->get(); + } + RET_CHECK(this->executionContext != nullptr) << "LLM execution context not initialized for node: " << cc->NodeName(); + // For cases where MediaPipe decides to trigger Process() when there are no inputs if (cc->Inputs().Tag(INPUT_TAG_NAME).IsEmpty() && cc->Inputs().Tag(LOOPBACK_TAG_NAME).IsEmpty()) { return absl::OkStatus(); diff --git a/src/llm/llm_node_initializer.cpp b/src/llm/llm_node_initializer.cpp index b8ab217300..04ddca78d3 100644 --- a/src/llm/llm_node_initializer.cpp +++ b/src/llm/llm_node_initializer.cpp @@ -61,6 +61,8 @@ class LlmNodeInitializer : public NodeInitializer { return status; } genAiServableMap.insert(std::pair>(nodeName, std::move(servable))); + sidePackets.genAiExecutionContextMap.emplace( + nodeName, std::make_shared()); return StatusCode::OK; } }; diff --git a/src/mediapipe_internal/BUILD b/src/mediapipe_internal/BUILD index 35456ec281..247b2c70b9 100644 --- a/src/mediapipe_internal/BUILD +++ b/src/mediapipe_internal/BUILD @@ -22,12 +22,79 @@ ovms_cc_library( ], hdrs = [ "node_initializer.hpp", + ], + deps = [ + ":graph_side_packets", + "//src:libovmsstatus", + ], + visibility = ["//visibility:public"], +) + +ovms_cc_library( + name = "graph_side_packets", + hdrs = [ "graph_side_packets.hpp", ], + visibility = ["//visibility:public"], +) + +ovms_cc_library( + name = "graph_executor_constants", + hdrs = [ + "graph_executor_constants.hpp", + ], + visibility = ["//visibility:public"], +) + +ovms_cc_library( + name = "outputstreamobserver", + hdrs = [ + "outputstreamobserver.hpp", + ], deps = [ + ":mediapipe_utils", + "//src:libovms_execution_context", + "//src:model_metric_reporter", + "//src:libovmsprofiler", "//src:libovmsstatus", + "//src:libovmstimer", + "@mediapipe//mediapipe/framework:calculator_graph", + ], + visibility = ["//visibility:public"], + additional_copts = COPTS_MEDIAPIPE, +) + +ovms_cc_library( + name = "graphqueue", + hdrs = [ + "graphqueue.hpp", + ], + srcs = [ + "graphqueue.cpp", + ], + deps = select({ + "//:not_disable_python": [ + "//src/python:libovmspythonmodule", + ], + "//:disable_python": [] + }) + [ + ":graph_executor_constants", + ":graph_side_packets", + ":mediapipe_utils", + ":outputstreamobserver", + ":side_packet_builder", + "//src:libovms_queue", + "//src:libovmslogging", + "//src:libovms_execution_context", + "//src:libovmstimer", + "//src/metrics:libovmsmetrics", + "//src:model_metric_reporter", + "//src:libovmsprofiler", + "//third_party:openvino", + "@mediapipe//mediapipe/framework:calculator_graph", ], visibility = ["//visibility:public"], + additional_copts = COPTS_PYTHON + COPTS_MEDIAPIPE, ) ovms_cc_library( @@ -47,13 +114,47 @@ ovms_cc_library( visibility = ["//visibility:public"], ) +ovms_cc_library( + name = "side_packet_builder", + srcs = [ + "side_packet_builder.cpp", + ], + hdrs = [ + "side_packet_builder.hpp", + ], + deps = [ + ":graph_executor_constants", + ":graph_side_packets", + "@mediapipe//mediapipe/framework:calculator_graph", + ], + visibility = ["//visibility:public"], + additional_copts = COPTS_PYTHON + COPTS_MEDIAPIPE, +) + +ovms_cc_library( + name = "mediapipegraphconfig", + srcs = [ + "mediapipegraphconfig.cpp", + ], + hdrs = [ + "mediapipegraphconfig.hpp", + ], + deps = [ + "//src/filesystem:libovmsfilesystem", + "//src:libovmsstatus", + "//src/graph_export:graph_export", + "@com_github_tencent_rapidjson//:rapidjson", + "//src/port:rapidjson_stringbuffer", + "//src/port:rapidjson_writer", + ], + visibility = ["//visibility:public"], +) + ovms_cc_library( name = "libovms_mediapipe", srcs = [ "mediapipefactory.cpp", "mediapipefactory.hpp", - "mediapipegraphconfig.hpp", - "mediapipegraphconfig.cpp", "mediapipegraphdefinition.cpp", "mediapipegraphdefinition.hpp", "mediapipegraphexecutor.cpp", @@ -65,8 +166,11 @@ ovms_cc_library( ], "//:disable_python": [] }) + [ + ":graphqueue", + ":mediapipegraphconfig", ":node_initializer", ":mediapipe_utils", + ":side_packet_builder", "//src/dags:pipelinedefinitionstatus", "//src:libovms_single_version_servable_definition", "//src:libovms_tensorinfo", @@ -84,9 +188,8 @@ ovms_cc_library( "//src:libovms_execution_context", "//src:libovmstimer", "//src:libovmsprofiler", - "@com_github_tencent_rapidjson//:rapidjson", - "//src/port:rapidjson_stringbuffer", - "//src/port:rapidjson_writer", + "//src/llm:execution_context_utils", + "//src/utils:env_guard", "@mediapipe//mediapipe/framework:calculator_graph", "@mediapipe//mediapipe/framework/port:parse_text_proto", ], diff --git a/src/mediapipe_internal/graph_executor_constants.hpp b/src/mediapipe_internal/graph_executor_constants.hpp new file mode 100644 index 0000000000..cf47a2f7f1 --- /dev/null +++ b/src/mediapipe_internal/graph_executor_constants.hpp @@ -0,0 +1,33 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include + +namespace ovms { + +inline const std::string PYTHON_SESSION_SIDE_PACKET_TAG = "py"; +inline const std::string LLM_SESSION_SIDE_PACKET_TAG = "llm"; +inline const std::string LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG = "llm_ctx"; +inline const std::string IMAGE_GEN_SESSION_SIDE_PACKET_TAG = "pipes"; +inline const std::string EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "embeddings_servable"; +inline const std::string RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable"; +inline const std::string STT_SESSION_SIDE_PACKET_TAG = "s2t_servable"; +inline const std::string TTS_SESSION_SIDE_PACKET_TAG = "t2s_servable"; +inline constexpr int64_t STARTING_TIMESTAMP_VALUE = 0; + +} // namespace ovms diff --git a/src/mediapipe_internal/graph_side_packets.hpp b/src/mediapipe_internal/graph_side_packets.hpp index 6804974c81..5d0287154d 100644 --- a/src/mediapipe_internal/graph_side_packets.hpp +++ b/src/mediapipe_internal/graph_side_packets.hpp @@ -14,14 +14,19 @@ // limitations under the License. //***************************************************************************** #pragma once + #include +#include #include #include +#include #include namespace ovms { + class PythonNodeResources; class GenAiServable; +struct GenAiServableExecutionContext; struct ImageGenerationPipelines; struct EmbeddingsServable; struct RerankServable; @@ -36,9 +41,36 @@ using TtsServableMap = std::unordered_map>; using ImageGenerationPipelinesMap = std::unordered_map>; +// Holds a per-graph LLM execution context that is swapped between requests. +// The mutex synchronizes the handoff between the executor thread (which creates +// a fresh context before each request via set()) and the MediaPipe scheduler +// thread (which reads it in the calculator's Process() via get()). +// In the queue path these run concurrently because the graph stays running. +class GenAiExecutionContextHolder { +public: + std::shared_ptr get() { + std::lock_guard lock(executionContextMtx); + return executionContext; + } + void set(std::shared_ptr ctx) { + std::lock_guard lock(executionContextMtx); + executionContext = std::move(ctx); + } + void reset() { + std::lock_guard lock(executionContextMtx); + executionContext.reset(); + } + +private: + std::mutex executionContextMtx; + std::shared_ptr executionContext; +}; +using GenAiExecutionContextMap = std::unordered_map>; + struct GraphSidePackets { PythonNodeResourcesMap pythonNodeResourcesMap; GenAiServableMap genAiServableMap; + GenAiExecutionContextMap genAiExecutionContextMap; ImageGenerationPipelinesMap imageGenPipelinesMap; EmbeddingsServableMap embeddingsServableMap; RerankServableMap rerankServableMap; @@ -49,6 +81,7 @@ struct GraphSidePackets { void clear() { pythonNodeResourcesMap.clear(); genAiServableMap.clear(); + genAiExecutionContextMap.clear(); imageGenPipelinesMap.clear(); embeddingsServableMap.clear(); rerankServableMap.clear(); @@ -60,6 +93,7 @@ struct GraphSidePackets { bool empty() { return (pythonNodeResourcesMap.empty() && genAiServableMap.empty() && + genAiExecutionContextMap.empty() && imageGenPipelinesMap.empty() && embeddingsServableMap.empty() && rerankServableMap.empty() && diff --git a/src/mediapipe_internal/graphqueue.cpp b/src/mediapipe_internal/graphqueue.cpp new file mode 100644 index 0000000000..acbb4e9870 --- /dev/null +++ b/src/mediapipe_internal/graphqueue.cpp @@ -0,0 +1,107 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "graphqueue.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../queue.hpp" +#include "src/python/pythonnoderesources.hpp" + +#pragma warning(push) +#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/port/status.h" +#pragma warning(pop) + +#include "graph_executor_constants.hpp" +#include "outputstreamobserver.hpp" +#include "side_packet_builder.hpp" +namespace ovms { +GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr sidePacketMaps, int streamsLength) : + Queue(streamsLength), + sidePacketMaps(sidePacketMaps) { + inferRequests.reserve(streamsLength); + for (auto i = 0; i < streamsLength; ++i) { + // Build observer map locally before constructing GraphHelper (const map) + std::unordered_map> observers; + for (auto& name : config.output_stream()) { + std::string streamName = getStreamName(name); + auto holder = std::make_shared(); + holder->current = std::make_shared(); + observers[streamName] = holder; + } + + auto graphHelper = std::make_shared(std::move(observers)); + graphHelper->graph = std::make_unique<::mediapipe::CalculatorGraph>(); + graphHelper->currentTimestamp = ::mediapipe::Timestamp(0); + + auto absStatus = graphHelper->graph->Initialize(config); + if (!absStatus.ok()) { + SPDLOG_ERROR("Graph queue initialization failed: {}", absStatus.ToString()); + throw std::runtime_error(absStatus.ToString()); + } + for (const auto& [streamName, holder] : graphHelper->outStreamObservers) { + // Lambda captures holder (shared_ptr) by value — safe regardless of map layout + absStatus = graphHelper->graph->ObserveOutputStream(streamName, [holder](const ::mediapipe::Packet& packet) -> absl::Status { return holder->current->handlePacket(packet); }); + if (!absStatus.ok()) { + SPDLOG_ERROR("Graph queue ObserveOutputStream failed: {}", absStatus.ToString()); + throw std::runtime_error(absStatus.ToString()); + } + } + for (const auto& [nodeName, _] : sidePacketMaps->genAiServableMap) { + graphHelper->genAiExecutionContextMap[nodeName] = std::make_shared(); + } + std::map inputSidePackets; + buildInputSidePackets(inputSidePackets, *sidePacketMaps); + // Override execution context with per-graph instance + inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(graphHelper->genAiExecutionContextMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + absStatus = graphHelper->graph->StartRun(inputSidePackets); + if (!absStatus.ok()) { + SPDLOG_ERROR("Graph queue StartRun failed: {}", absStatus.ToString()); + throw std::runtime_error(absStatus.ToString()); + } + inferRequests.emplace_back(std::move(graphHelper)); + } +} +GraphQueue::~GraphQueue() { + for (auto& graphHelper : inferRequests) { + auto absStatus = graphHelper->graph->WaitUntilIdle(); + if (!absStatus.ok()) { + SPDLOG_DEBUG("Graph queue WaitUntilIdle error: {}", absStatus.ToString()); + } + absStatus = graphHelper->graph->CloseAllPacketSources(); + if (!absStatus.ok()) { + SPDLOG_DEBUG("Graph queue CloseAllPacketSources error: {}", absStatus.ToString()); + } + absStatus = graphHelper->graph->WaitUntilDone(); + if (!absStatus.ok()) { + SPDLOG_DEBUG("Graph queue WaitUntilDone error: {}", absStatus.ToString()); + } + graphHelper->graph->Cancel(); + graphHelper->graph.reset(); + } +} +} // namespace ovms diff --git a/src/mediapipe_internal/graphqueue.hpp b/src/mediapipe_internal/graphqueue.hpp new file mode 100644 index 0000000000..d97ee5d18f --- /dev/null +++ b/src/mediapipe_internal/graphqueue.hpp @@ -0,0 +1,101 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "src/queue.hpp" + +#pragma warning(push) +#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/port/status.h" +#pragma GCC diagnostic pop +#pragma warning(pop) + +#include "graph_executor_constants.hpp" +#include "graph_side_packets.hpp" +#include "outputstreamobserver.hpp" +namespace ovms { +class OutputStreamObserverI; +class NullOutputStreamObserver; +struct ObserverHolder; +struct GraphHelper { + std::unique_ptr<::mediapipe::CalculatorGraph> graph; + // const after construction: keys are fixed, but observer implementations + // can be swapped via the mutable ObserverHolder inside each shared_ptr. + const std::unordered_map> outStreamObservers; + GenAiExecutionContextMap genAiExecutionContextMap; + ::mediapipe::Timestamp currentTimestamp; + GraphHelper() = default; + // Constructor that takes the pre-built observer map + GraphHelper(std::unordered_map>&& observers) : + outStreamObservers(std::move(observers)) {} + GraphHelper(const GraphHelper&) = delete; + GraphHelper& operator=(const GraphHelper&) = delete; + GraphHelper(GraphHelper&& gh) : + graph(std::move(gh.graph)), + outStreamObservers(std::move(const_cast>&>(gh.outStreamObservers))), + genAiExecutionContextMap(std::move(gh.genAiExecutionContextMap)), + currentTimestamp(gh.currentTimestamp) {} + GraphHelper& operator=(GraphHelper&&) = delete; +}; +// we need to keep Graph alive during MP reload hence shared_ptr +class GraphQueue : public Queue> { + std::shared_ptr sidePacketMaps; + +public: + GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr sidePacketMaps, int streamsLength); + ~GraphQueue(); +}; + +struct GraphIdGuard { + std::weak_ptr weakQueue; + const int id; + // shared_ptr because GraphIdGuard (and the executor holding it) must keep + // the GraphHelper alive even after the GraphQueue is destroyed during + // mediapipe graph reload/retire — the in-flight request continues using + // the old graph until completion. + std::shared_ptr graphHelper; + ::mediapipe::CalculatorGraph& graph; + GraphIdGuard(std::shared_ptr& queue) : + weakQueue(queue), + id(queue->getIdleStream().get()), + graphHelper((queue->getInferRequest(id))), + graph(*graphHelper->graph) { + } + GraphIdGuard(GraphIdGuard&&) = default; + GraphIdGuard(const GraphIdGuard&) = delete; + ~GraphIdGuard() { + auto existingQueue = weakQueue.lock(); + if (existingQueue) + existingQueue->returnStream(this->id); + } +}; +} // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphconfig.hpp b/src/mediapipe_internal/mediapipegraphconfig.hpp index 2e4f3d428e..a8237b1e0f 100644 --- a/src/mediapipe_internal/mediapipegraphconfig.hpp +++ b/src/mediapipe_internal/mediapipegraphconfig.hpp @@ -15,7 +15,12 @@ //***************************************************************************** #pragma once +#include #include +#include +#include + +#include #pragma warning(push) #pragma warning(disable : 6313) #include @@ -27,26 +32,18 @@ extern const std::string DEFAULT_GRAPH_FILENAME; extern const std::string DEFAULT_SUBCONFIG_FILENAME; extern const std::string DEFAULT_MODELMESH_SUBCONFIG_FILENAME; +struct GraphQueueAutoTag { + bool operator==(const GraphQueueAutoTag&) const { return true; } +}; + +using GraphQueueSizeValue = std::optional>; + class Status; -/** - * @brief This class represents Mediapie Graph configuration - */ class MediapipeGraphConfig { private: - /** - * @brief Mediapipe Graph Name - */ std::string graphName; - - /** - * @brief Mediapipe Base Path - */ std::string basePath; - - /** - * @brief Mediapipe Graph Path - */ std::string graphPath; /** @@ -69,16 +66,16 @@ class MediapipeGraphConfig { */ std::string currentGraphPbTxtMD5; -public: /** - * @brief Construct a new Mediapie Graph configuration object - * - * @param graphName - * @param basePath - * @param graphPath - * @param subconfigPath - * @param currentGraphPbTxtMD5 - */ + * @brief Graph queue size configuration. + * + * - std::nullopt => user did not set this field + * - int => user explicitly set a numeric size + * - GraphQueueAutoTag => user explicitly set "AUTO" + */ + GraphQueueSizeValue graphQueueSize; + +public: MediapipeGraphConfig(const std::string& graphName = "", const std::string& basePath = "", const std::string& graphPath = "", @@ -95,47 +92,22 @@ class MediapipeGraphConfig { graphPath.clear(); } - /** - * @brief Get the Graph name - * - * @return const std::string& - */ const std::string& getGraphName() const { return this->graphName; } - /** - * @brief Set the Graph name - * - * @param name - */ void setGraphName(const std::string& graphName) { this->graphName = graphName; } - /** - * @brief Get the Graph Path - * - * @return const std::string& - */ const std::string& getGraphPath() const { return this->graphPath; } - /** - * @brief Get the Base Path - * - * @return const std::string& - */ const std::string& getBasePath() const { return this->basePath; } - /** - * @brief Set the Graph Path - * - * @param graphPath - */ void setGraphPath(const std::string& graphPath); /** @@ -145,11 +117,6 @@ class MediapipeGraphConfig { */ void setBasePathWithRootPath(); - /** - * @brief Set the Base Path - * - * @param basePath - */ void setBasePath(const std::string& basePath); /** @@ -168,36 +135,15 @@ class MediapipeGraphConfig { */ void setSubconfigPath(const std::string& subconfigPath); - /** - * @brief Get the ModelMesh ModelsConfig Path - * - * @return const std::string& - */ const std::string& getModelMeshSubconfigPath() const { return this->modelMeshSubconfigPath; } - - /** - * @brief Set the Model Mesh Models Config Path - * - * @param subconfigPath - */ void setModelMeshSubconfigPath(const std::string& subconfigPath); - /** - * @brief Set root directory path - * - * @param rootDirectoryPath - */ void setRootDirectoryPath(const std::string& rootDirectoryPath) { this->rootDirectoryPath = rootDirectoryPath; } - /** - * @brief Get the root directory path - * - * @return const std::string& - */ const std::string& getRootDirectoryPath() const { return this->rootDirectoryPath; } @@ -206,6 +152,49 @@ class MediapipeGraphConfig { this->currentGraphPbTxtMD5 = currentGraphPbTxtMD5; } + /** + * @brief Get the graph queue size setting. + * + * @return const GraphQueueSizeValue& - nullopt if not set, int or GraphQueueAutoTag + */ + const GraphQueueSizeValue& getGraphQueueSize() const { + return this->graphQueueSize; + } + + void setGraphQueueSize(int size) { + this->graphQueueSize = size; + } + + void setGraphQueueSizeAuto() { + this->graphQueueSize = GraphQueueAutoTag{}; + } + + /** + * @brief Resolve the graph queue size setting to a concrete integer. + * + * Returns: + * 0 => queue creation disabled (user set 0 or not set) + * >0 => explicit size or resolved AUTO + * + * Negative values are rejected at parse time (resolveGraphQueueSize). + * When not set (nullopt): returns 0 (queue disabled). + * When AUTO: returns hardware_concurrency() or 16 as fallback. + */ + int getInitialQueueSize() const { + if (!this->graphQueueSize.has_value()) { + return 0; // not set - queue disabled by default + } + if (std::holds_alternative(*this->graphQueueSize)) { + unsigned int hwThreads = std::thread::hardware_concurrency(); + if (hwThreads == 0) { + SPDLOG_WARN("std::thread::hardware_concurrency() returned 0 (unknown). Falling back to graph queue size 16."); + return 16; + } + return static_cast(hwThreads); + } + return std::get(*this->graphQueueSize); + } + bool isReloadRequired(const MediapipeGraphConfig& rhs) const; /** @@ -215,9 +204,6 @@ class MediapipeGraphConfig { */ Status parseNode(const rapidjson::Value& v); - /** - * @brief Logs the content of the graph configuration - */ void logGraphConfigContent() const; }; } // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp index 2bc062d8e0..945dbf1d70 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.cpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp @@ -18,14 +18,17 @@ #include #include #include +#include #include #include +#include #include #include #include #include "../execution_context.hpp" #include "../config.hpp" +#include "src/utils/env_guard.hpp" #include "src/filesystem/filesystem.hpp" #include "src/graph_export/graph_export.hpp" #include "src/metrics/metric.hpp" @@ -45,6 +48,7 @@ #include "node_initializer.hpp" namespace ovms { + MediapipeGraphConfig MediapipeGraphDefinition::MGC; const std::string MediapipeGraphDefinition::SCHEDULER_CLASS_NAME{"Mediapipe"}; @@ -85,13 +89,83 @@ Status MediapipeGraphDefinition::validateForConfigFileExistence() { return StatusCode::OK; } +// Precondition: chosenConfig must be populated (call after validateForConfigFileExistence). +// Uses this->chosenConfig to parse the directive and this->config (parsed protobuf) for node inspection. +Status MediapipeGraphDefinition::resolveGraphQueueSize() { + if (this->chosenConfig.empty()) { + SPDLOG_ERROR("Internal error: resolveGraphQueueSize called with empty chosenConfig for mediapipe: {}", getName()); + return StatusCode::INTERNAL_ERROR; + } + // 0. Runtime kill-switch: OVMS_GRAPH_QUEUE_OFF disables all graph pools. + if (GetEnvVar("OVMS_GRAPH_QUEUE_OFF") == "1") { + SPDLOG_INFO("Graph queue globally disabled via OVMS_GRAPH_QUEUE_OFF=1 for mediapipe: {}", getName()); + return StatusCode::OK; + } + // 1. Explicit pbtxt directive: # OVMS_GRAPH_QUEUE_MAX_SIZE: + // Always honored regardless of calculator checks. + // Value 0 disables the queue, AUTO or positive integer enables it. + // Negative values are rejected as invalid. + static const std::regex directiveRegex( + R"((?:^|\n)\s*#\s*OVMS_GRAPH_QUEUE_MAX_SIZE\s*:\s*(\S+)\s*(?:\r?\n|$))"); + std::smatch match; + if (std::regex_search(this->chosenConfig, match, directiveRegex)) { + std::string value = match[1].str(); + if (value == "AUTO") { + this->mgconfig.setGraphQueueSizeAuto(); + } else { + auto parsed = stoi32(value); + if (!parsed.has_value()) { + SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_MAX_SIZE value: '{}'. Expected integer or 'AUTO'.", value); + return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + } + int queueSize = parsed.value(); + if (queueSize < 0) { + SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_MAX_SIZE value: {}. Must be 0 (disabled) or a positive integer.", queueSize); + return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + } + if (queueSize == 0) { + SPDLOG_DEBUG("Graph queue explicitly disabled (OVMS_GRAPH_QUEUE_MAX_SIZE=0) for mediapipe: {}", getName()); + return StatusCode::OK; + } + unsigned int maxThreads = std::thread::hardware_concurrency(); + if (maxThreads > 0 && queueSize > static_cast(maxThreads)) { + SPDLOG_WARN("OVMS_GRAPH_QUEUE_MAX_SIZE value: {} exceeds available hardware threads: {}. Clamping to {}.", queueSize, maxThreads, maxThreads); + queueSize = static_cast(maxThreads); + } + this->mgconfig.setGraphQueueSize(queueSize); + } + // 2. Reject PythonExecutorCalculator nodes using LOOPBACK with graph queue enabled. + // Generative Python nodes hold per-request iterator state that cannot be shared + // across pooled graph instances. + for (int i = 0; i < this->config.node_size(); ++i) { + const auto& node = this->config.node(i); + if (node.calculator() != "PythonExecutorCalculator") { + continue; + } + for (const auto& inputStream : node.input_stream()) { + if (inputStream.find("LOOPBACK") == 0) { + SPDLOG_ERROR("PythonExecutorCalculator with LOOPBACK stream is incompatible with graph queue " + "(OVMS_GRAPH_QUEUE_MAX_SIZE) in mediapipe: {}. " + "Generative Python nodes hold per-request state that cannot be shared across pooled graphs. " + "Set OVMS_GRAPH_QUEUE_MAX_SIZE to 0 or remove the LOOPBACK stream.", + getName()); + return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + } + } + } + return StatusCode::OK; + } + SPDLOG_DEBUG("Graph queue disabled by default for mediapipe: {}. Add '# OVMS_GRAPH_QUEUE_MAX_SIZE: ' directive in graph.pbtxt to enable.", getName()); + return StatusCode::OK; +} + Status MediapipeGraphDefinition::validateForConfigLoadableness() { - if (chosenConfig.empty()) { + if (this->chosenConfig.empty()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Trying to parse empty mediapipe graph definition: {} failed", this->getName(), this->chosenConfig); return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; } SPDLOG_TRACE("Will try to load pbtxt config: {}", this->chosenConfig); - bool success = ::google::protobuf::TextFormat::ParseFromString(chosenConfig, &this->config); + bool success = ::google::protobuf::TextFormat::ParseFromString(this->chosenConfig, &this->config); if (!success) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Trying to parse mediapipe graph definition: {} failed", this->getName(), this->chosenConfig); return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; @@ -119,7 +193,7 @@ Status MediapipeGraphDefinition::dryInitializeTest() { } Status MediapipeGraphDefinition::validate(const ServableNameChecker& checker) { SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Started validation of mediapipe: {}", getName()); - if (!this->sidePacketMaps.empty()) { + if (!this->sidePacketMaps->empty()) { SPDLOG_ERROR("Internal Error: MediaPipe definition is in unexpected state."); return StatusCode::INTERNAL_ERROR; } @@ -136,6 +210,10 @@ Status MediapipeGraphDefinition::validate(const ServableNameChecker& checker) { if (!validationResult.ok()) { return validationResult; } + validationResult = resolveGraphQueueSize(); + if (!validationResult.ok()) { + return validationResult; + } std::unique_lock lock(metadataMtx); auto status = createInputsInfo(); if (!status.ok()) { @@ -167,6 +245,10 @@ Status MediapipeGraphDefinition::validate(const ServableNameChecker& checker) { if (!status.ok()) { return status; } + status = this->initializeQueueIfRequired(); + if (!status.ok()) { + return status; + } if (!this->loraAliases.empty() && checker.aliasesConflict(this->loraAliases, getName())) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "LoRA alias in graph '{}' conflicts with an existing servable", getName()); @@ -182,12 +264,32 @@ Status MediapipeGraphDefinition::validate(const ServableNameChecker& checker) { return StatusCode::OK; } +Status MediapipeGraphDefinition::initializeQueueIfRequired() { + int initialQueueSize = this->mgconfig.getInitialQueueSize(); + if (initialQueueSize <= 0) { + SPDLOG_DEBUG("Graph queue creation disabled for mediapipe: {} (graph_queue_size={})", getName(), initialQueueSize); + return StatusCode::OK; + } + try { + this->queue = std::make_shared(this->config, this->sidePacketMaps, initialQueueSize); + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create graph queue for mediapipe: {} error: {}", getName(), e.what()); + return StatusCode::INTERNAL_ERROR; + } catch (...) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create graph queue for mediapipe: {} unknown error", getName()); + return StatusCode::INTERNAL_ERROR; + } + SPDLOG_DEBUG("Created graph queue with size {} for mediapipe: {}", initialQueueSize, getName()); + return StatusCode::OK; +} + MediapipeGraphDefinition::MediapipeGraphDefinition(const std::string name, const MediapipeGraphConfig& config, MetricRegistry* registry, const MetricConfig* metricConfig, PythonBackend* pythonBackend) : SingleVersionServableDefinition(name), + sidePacketMaps(std::make_shared()), status(SCHEDULER_CLASS_NAME, getName()), pythonBackend(pythonBackend), reporter(std::make_unique(metricConfig, registry, name)) { @@ -256,11 +358,19 @@ Status MediapipeGraphDefinition::create(std::unique_ptr& return status; } SPDLOG_DEBUG("Creating Mediapipe graph executor: {}", getName()); - - pipeline = std::make_unique(getName(), std::to_string(getVersion()), - this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames, - this->sidePacketMaps, - this->pythonBackend, this->reporter.get()); + if (this->queue) { + GraphIdGuard graphIdGuard(this->queue); + pipeline = std::make_unique(getName(), std::to_string(getVersion()), + this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames, + *this->sidePacketMaps, + this->pythonBackend, this->reporter.get(), std::move(graphIdGuard)); + } else { + pipeline = std::make_unique(getName(), std::to_string(getVersion()), + this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames, + *this->sidePacketMaps, + this->pythonBackend, this->reporter.get()); + } + SPDLOG_DEBUG("Created Mediapipe graph executor: {}", getName()); return status; } @@ -334,13 +444,19 @@ Status MediapipeGraphDefinition::reload(const ServableNameChecker& checker, cons std::this_thread::sleep_for(std::chrono::microseconds(1)); } this->mgconfig = config; - this->sidePacketMaps.clear(); + this->queue.reset(); + this->sidePacketMaps = std::make_shared(); return validate(checker); } void MediapipeGraphDefinition::retire() { - this->sidePacketMaps.clear(); + // Block creating new unloadGuards this->status.handle(RetireEvent()); + while (requestsHandlesCounter > 0) { + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + this->queue.reset(); + this->sidePacketMaps.reset(); } bool MediapipeGraphDefinition::isReloadRequired(const MediapipeGraphConfig& config) const { @@ -369,13 +485,13 @@ Status MediapipeGraphDefinition::initializeNodes() { if (!success) maps.clear(); } - } guard{sidePacketMaps, success}; + } guard{*sidePacketMaps, success}; auto& registry = NodeInitializerRegistry::instance(); for (int i = 0; i < config.node().size(); i++) { for (const auto& initializer : registry.all()) { if (initializer->matches(config.node(i).calculator())) { - Status status = initializer->initialize(config.node(i), getName(), mgconfig.getBasePath(), sidePacketMaps, pythonBackend); + Status status = initializer->initialize(config.node(i), getName(), mgconfig.getBasePath(), *sidePacketMaps, pythonBackend); if (!status.ok()) { return status; } @@ -383,8 +499,8 @@ Status MediapipeGraphDefinition::initializeNodes() { } } // Register LoRA aliases for routing from initialized image gen pipelines - this->loraAliases = sidePacketMaps.loraAliases; - this->hideBaseModelInRouting = sidePacketMaps.hideBaseModelInRouting; + this->loraAliases = sidePacketMaps->loraAliases; + this->hideBaseModelInRouting = sidePacketMaps->hideBaseModelInRouting; success = true; return StatusCode::OK; } diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp index 013ebcb782..b49ed7e456 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.hpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp @@ -39,6 +39,7 @@ #include "mediapipegraphconfig.hpp" #include "graph_side_packets.hpp" #include "packettypes.hpp" +#include "graphqueue.hpp" namespace ovms { class MetricConfig; @@ -80,7 +81,7 @@ class MediapipeGraphDefinition : public SingleVersionServableDefinition { static const std::string SCHEDULER_CLASS_NAME; protected: - GraphSidePackets sidePacketMaps; + std::shared_ptr sidePacketMaps; struct ValidationResultNotifier { ValidationResultNotifier(PipelineDefinitionStatus& status, std::condition_variable& loadedNotify) : @@ -103,10 +104,13 @@ class MediapipeGraphDefinition : public SingleVersionServableDefinition { }; virtual Status validateForConfigFileExistence(); + Status resolveGraphQueueSize(); Status validateForConfigLoadableness(); Status setStreamTypes(); Status dryInitializeTest(); + Status initializeQueueIfRequired(); + std::string chosenConfig; static MediapipeGraphConfig MGC; @@ -143,5 +147,6 @@ class MediapipeGraphDefinition : public SingleVersionServableDefinition { PythonBackend* pythonBackend; std::unique_ptr reporter; + std::shared_ptr queue; }; } // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp index 3c036a845a..26757de401 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.cpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp @@ -19,6 +19,8 @@ #include #include +#include "graph_executor_constants.hpp" + #pragma warning(push) #pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) #pragma GCC diagnostic push @@ -27,10 +29,11 @@ #pragma GCC diagnostic pop #pragma warning(pop) +#include "src/llm/execution_context_utils.hpp" + #if (PYTHON_DISABLE == 0) -#include "../python/python_backend.hpp" +#include "src/python/python_backend.hpp" #endif - namespace ovms { MediapipeGraphExecutor::MediapipeGraphExecutor( @@ -41,14 +44,10 @@ MediapipeGraphExecutor::MediapipeGraphExecutor( stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, - const PythonNodeResourcesMap& pythonNodeResourcesMap, - const GenAiServableMap& llmNodeResourcesMap, - const EmbeddingsServableMap& embeddingsServableMap, - const RerankServableMap& rerankServableMap, - const SttServableMap& sttServableMap, - const TtsServableMap& ttsServableMap, + const GraphSidePackets& sidePacketMaps, PythonBackend* pythonBackend, - MediapipeServableMetricReporter* mediapipeServableMetricReporter) : + MediapipeServableMetricReporter* mediapipeServableMetricReporter, + GraphIdGuard&& guard) : name(name), version(version), config(config), @@ -56,10 +55,11 @@ MediapipeGraphExecutor::MediapipeGraphExecutor( outputTypes(std::move(outputTypes)), inputNames(std::move(inputNames)), outputNames(std::move(outputNames)), - sidePacketMaps({pythonNodeResourcesMap, llmNodeResourcesMap, {}, embeddingsServableMap, rerankServableMap, sttServableMap, ttsServableMap}), + sidePacketMaps(sidePacketMaps), pythonBackend(pythonBackend), - currentStreamTimestamp(STARTING_TIMESTAMP), - mediapipeServableMetricReporter(mediapipeServableMetricReporter) {} + currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)), + mediapipeServableMetricReporter(mediapipeServableMetricReporter), + guard(std::move(guard)) {} MediapipeGraphExecutor::MediapipeGraphExecutor( const std::string& name, const std::string& version, @@ -80,16 +80,7 @@ MediapipeGraphExecutor::MediapipeGraphExecutor( outputNames(std::move(outputNames)), sidePacketMaps(sidePacketMaps), pythonBackend(pythonBackend), - currentStreamTimestamp(STARTING_TIMESTAMP), + currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)), mediapipeServableMetricReporter(mediapipeServableMetricReporter) {} -const std::string MediapipeGraphExecutor::PYTHON_SESSION_SIDE_PACKET_TAG = "py"; -const std::string MediapipeGraphExecutor::LLM_SESSION_SIDE_PACKET_TAG = "llm"; -const std::string MediapipeGraphExecutor::IMAGE_GEN_SESSION_SIDE_PACKET_TAG = "pipes"; -const std::string MediapipeGraphExecutor::EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "embeddings_servable"; -const std::string MediapipeGraphExecutor::RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable"; -const std::string MediapipeGraphExecutor::STT_SESSION_SIDE_PACKET_TAG = "s2t_servable"; -const std::string MediapipeGraphExecutor::TTS_SESSION_SIDE_PACKET_TAG = "t2s_servable"; -const ::mediapipe::Timestamp MediapipeGraphExecutor::STARTING_TIMESTAMP = ::mediapipe::Timestamp(0); - } // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp index 0d87c86088..f0f0428740 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.hpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp @@ -16,6 +16,7 @@ #pragma once #include #include +#include #include #include #include @@ -28,6 +29,7 @@ #include "../profiler.hpp" #include "../status.hpp" #include "../timer.hpp" +#include "src/llm/execution_context_utils.hpp" #pragma warning(push) #pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) #pragma GCC diagnostic push @@ -36,13 +38,17 @@ #include "mediapipe/framework/port/status.h" #pragma GCC diagnostic pop #pragma warning(pop) +#include "graph_executor_constants.hpp" #include "mediapipe_utils.hpp" #include "graph_side_packets.hpp" +#include "side_packet_builder.hpp" #include "packettypes.hpp" +#include "graphqueue.hpp" namespace ovms { class PythonBackend; class ServableMetricReporter; +class MediapipeGraphExecutor; inline StatusCode mediapipeAbslToOvmsStatus(absl::StatusCode code) { if (code == absl::StatusCode::kFailedPrecondition) { // ovms session calculator returns this status code when loading model fails @@ -71,9 +77,56 @@ inline StatusCode mediapipeAbslToOvmsStatus(absl::StatusCode code) { } \ _Pragma("warning(pop)") +template +struct MyFunctor : public OutputStreamObserverI { + const std::string& requestId; + MediapipeGraphExecutor& exec; + const std::string outputStreamName; + mediapipe_packet_type_enum packetType; + ResponseType& response; + MyFunctor(const std::string& outputStreamName, mediapipe_packet_type_enum packetType, MediapipeGraphExecutor& exec, const RequestType& request, ResponseType& response) : + requestId(getRequestId(request)), + exec(exec), + outputStreamName(outputStreamName), + packetType(packetType), + response(response) { + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override; + ~MyFunctor() = default; +}; + +template +struct StreamingFunctor : public OutputStreamObserverI { + ReaderWriterType& serverReaderWriter; + std::mutex& sendMutex; + const std::string& executorName; + const std::string& executorVersion; + const std::string outputStreamName; + mediapipe_packet_type_enum packetType; + ExecutionContext executionContext; + MediapipeServableMetricReporter* metricReporter; + StreamingFunctor(const std::string& outputStreamName, mediapipe_packet_type_enum packetType, + const std::string& executorName, const std::string& executorVersion, + ReaderWriterType& serverReaderWriter, std::mutex& sendMutex, + ExecutionContext executionContext, MediapipeServableMetricReporter* metricReporter) : + serverReaderWriter(serverReaderWriter), + sendMutex(sendMutex), + executorName(executorName), + executorVersion(executorVersion), + outputStreamName(outputStreamName), + packetType(packetType), + executionContext(executionContext), + metricReporter(metricReporter) { + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override; + ~StreamingFunctor() = default; +}; class MediapipeGraphExecutor { +public: const std::string name; const std::string version; + +private: const ::mediapipe::CalculatorGraphConfig config; stream_types_mapping_t inputTypes; stream_types_mapping_t outputTypes; @@ -86,30 +139,22 @@ class MediapipeGraphExecutor { ::mediapipe::Timestamp currentStreamTimestamp; MediapipeServableMetricReporter* mediapipeServableMetricReporter; + std::optional guard; public: - static const std::string PYTHON_SESSION_SIDE_PACKET_TAG; - static const std::string LLM_SESSION_SIDE_PACKET_TAG; - static const std::string IMAGE_GEN_SESSION_SIDE_PACKET_TAG; - static const std::string EMBEDDINGS_SESSION_SIDE_PACKET_TAG; - static const std::string RERANK_SESSION_SIDE_PACKET_TAG; - static const std::string STT_SESSION_SIDE_PACKET_TAG; - static const std::string TTS_SESSION_SIDE_PACKET_TAG; - static const ::mediapipe::Timestamp STARTING_TIMESTAMP; - - MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config, + MediapipeGraphExecutor(const std::string& name, + const std::string& version, + const ::mediapipe::CalculatorGraphConfig& config, stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, - const PythonNodeResourcesMap& pythonNodeResourcesMap, - const GenAiServableMap& llmNodeResourcesMap, - const EmbeddingsServableMap& embeddingsServableMap, - const RerankServableMap& rerankServableMap, - const SttServableMap& sttServableMap, - const TtsServableMap& ttsServableMap, + const GraphSidePackets& sidePacketMaps, PythonBackend* pythonBackend, - MediapipeServableMetricReporter* mediapipeServableMetricReporter); - MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config, + MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard); + // Constructor without graph queue (old path - graph created per-request) + MediapipeGraphExecutor(const std::string& name, + const std::string& version, + const ::mediapipe::CalculatorGraphConfig& config, stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, @@ -123,12 +168,81 @@ class MediapipeGraphExecutor { SPDLOG_DEBUG("Start unary KServe request mediapipe graph: {} execution", this->name); MetricCounterGuard failedRequestsGuard(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, false)); MetricGaugeGuard currentGraphsGuard(this->mediapipeServableMetricReporter->currentGraphs.get()); + if (this->guard.has_value()) { + return inferWithQueue(request, response, executionContext, failedRequestsGuard); + } else { + return inferWithoutQueue(request, response, executionContext, failedRequestsGuard); + } + } + + template + Status inferWithQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) { + // Graph queue does not support user-provided input side packets. + // Side packets are set at queue construction time. + if (requestHasInputSidePackets(*request)) { + SPDLOG_DEBUG("Graph queue does not support user-provided input side packets. " + "Side packets are set at graph queue construction time. Graph: {}", + this->name); + return Status(StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR, + "Input side packets are not supported for graphs with queue enabled"); + } + ::mediapipe::CalculatorGraph& graph = this->guard->graph; + auto llmContextStatus = initializeLlmExecutionContexts(this->sidePacketMaps.genAiServableMap, this->guard->graphHelper->genAiExecutionContextMap); + if (!llmContextStatus.ok()) { + return llmContextStatus; + } + for (auto& name : this->outputNames) { + if (name.empty()) { + SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", name); + return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR; + } + guard->graphHelper->outStreamObservers.at(name)->current = std::make_shared>(name, this->outputTypes.at(name), *this, *request, *response); + } + + size_t numberOfPacketsCreated = 0; + auto ovms_status = createAndPushPacketsImpl( + std::shared_ptr(request, [](const RequestType*) {}), + this->inputTypes, + this->pythonBackend, + graph, + this->guard->graphHelper->currentTimestamp, + numberOfPacketsCreated); + if (!ovms_status.ok()) { + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); + return ovms_status; + } + + if (this->inputNames.size() > numberOfPacketsCreated) { + SPDLOG_DEBUG("Not all input packets created. Expected: {}, Actual: {}. Aborting execution of mediapipe graph: {}", + this->inputNames.size(), numberOfPacketsCreated, this->name); + return Status(StatusCode::INVALID_NO_OF_INPUTS, "Not all input packets created"); + } + + failedRequestsGuard.disable(); + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, true)); + + auto status = graph.WaitUntilIdle(); + if (!status.ok()) { + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); + } + resetLlmExecutionContexts(this->guard->graphHelper->genAiExecutionContextMap); + MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); + // Increment timestamp for next request reusing this graph from the queue + this->guard->graphHelper->currentTimestamp = ::mediapipe::Timestamp(this->guard->graphHelper->currentTimestamp.Value() + 1); + SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name); + return StatusCode::OK; + } + + template + Status inferWithoutQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) { ::mediapipe::CalculatorGraph graph; MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); - enum : unsigned int { - PROCESS, - TIMER_END2 - }; + auto llmContextStatus = initializeLlmExecutionContexts(this->sidePacketMaps.genAiServableMap, this->sidePacketMaps.genAiExecutionContextMap); + if (!llmContextStatus.ok()) { + return llmContextStatus; + } + enum : unsigned int { PROCESS, + TIMER_END2 }; Timer timer; timer.start(PROCESS); std::unordered_map outputPollers; @@ -147,16 +261,7 @@ class MediapipeGraphExecutor { } std::map inputSidePackets; OVMS_RETURN_ON_FAIL(deserializeInputSidePacketsFromFirstRequestImpl(inputSidePackets, *request)); -#if (PYTHON_DISABLE == 0) - inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.pythonNodeResourcesMap).At(STARTING_TIMESTAMP); -#endif - inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiServableMap).At(STARTING_TIMESTAMP); - inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.imageGenPipelinesMap).At(STARTING_TIMESTAMP); - inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.embeddingsServableMap).At(STARTING_TIMESTAMP); - - inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.rerankServableMap).At(STARTING_TIMESTAMP); - inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.sttServableMap).At(STARTING_TIMESTAMP); - inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.ttsServableMap).At(STARTING_TIMESTAMP); + buildInputSidePackets(inputSidePackets, this->sidePacketMaps); MP_RETURN_ON_FAIL(graph.StartRun(inputSidePackets), std::string("start MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_START_ERROR); @@ -165,11 +270,7 @@ class MediapipeGraphExecutor { size_t numberOfPacketsCreated = 0; auto ovms_status = createAndPushPacketsImpl( - std::shared_ptr(request, - // Custom deleter to avoid deallocation by custom holder - // Conversion to shared_ptr is required for unified deserialization method - // for first and subsequent requests - [](const RequestType*) {}), + std::shared_ptr(request, [](const RequestType*) {}), this->inputTypes, this->pythonBackend, graph, @@ -180,25 +281,20 @@ class MediapipeGraphExecutor { return ovms_status; } - // This differs from inferStream - we require user to feed all streams if (this->inputNames.size() > numberOfPacketsCreated) { SPDLOG_DEBUG("Not all input packets created. Expected: {}, Actual: {}. Aborting execution of mediapipe graph: {}", - this->inputNames.size(), - numberOfPacketsCreated, - this->name); + this->inputNames.size(), numberOfPacketsCreated, this->name); return Status(StatusCode::INVALID_NO_OF_INPUTS, "Not all input packets created"); } failedRequestsGuard.disable(); INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, true)); - // we wait idle since some calculators could hold ownership on packet content while nodes further down the graph - // can be still processing those. Closing packet sources triggers Calculator::Close() on nodes that do not expect - // new packets auto status = graph.WaitUntilIdle(); - if (!status.ok()) { // Collect error metric after Open() + if (!status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } + resetLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap); MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); MP_RETURN_ON_FAIL(graph.CloseAllPacketSources(), "graph close all packet sources", StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR); @@ -226,7 +322,7 @@ class MediapipeGraphExecutor { SPDLOG_TRACE("Received all: {} packets for: {}", receivedOutputs, outputStreamName); } status = graph.WaitUntilDone(); - if (!status.ok()) { // Collect error metric after Process() + if (!status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code())); @@ -245,6 +341,131 @@ class MediapipeGraphExecutor { template Status inferStream(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) { OVMS_PROFILE_FUNCTION(); + if (this->guard.has_value()) { + return inferStreamWithQueue(req, serverReaderWriter, executionContext); + } else { + return inferStreamWithoutQueue(req, serverReaderWriter, executionContext); + } + } + + template + Status inferStreamWithQueue(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) { + SPDLOG_DEBUG("Start streaming mediapipe graph: {} execution (queue path)", this->name); + std::mutex sendMutex; + try { + // Graph queue does not support user-provided input side packets. + // Side packets are set at queue construction time. + if (requestHasInputSidePackets(req)) { + SPDLOG_DEBUG("Graph queue does not support user-provided input side packets. " + "Side packets are set at graph queue construction time. Graph: {}", + this->name); + return Status(StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR, + "Input side packets are not supported for graphs with queue enabled"); + } + MetricGaugeGuard currentGraphs(this->mediapipeServableMetricReporter->currentGraphs.get()); + ::mediapipe::CalculatorGraph& graph = this->guard->graph; + auto llmContextStatus = initializeLlmExecutionContexts(this->sidePacketMaps.genAiServableMap, this->guard->graphHelper->genAiExecutionContextMap); + if (!llmContextStatus.ok()) { + return llmContextStatus; + } + + enum : unsigned int { + PROCESS, + TIMER_END2 + }; + Timer timer; + timer.start(PROCESS); + + // Swap output stream observers to streaming functors. + // Observers are already installed on the graph at queue construction time; + // we only replace the functor implementation to serialize+send to the client. + // Lifetime: sendMutex and serverReaderWriter are stack-local in this method + // and outlive all callbacks because we WaitUntilIdle() before returning. + for (const auto& outputName : this->outputNames) { + if (outputName.empty()) { + SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", outputName); + return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR; + } + guard->graphHelper->outStreamObservers.at(outputName)->current = std::make_shared>( + outputName, this->outputTypes.at(outputName), + this->name, this->version, + serverReaderWriter, sendMutex, + executionContext, this->mediapipeServableMetricReporter); + } + + size_t numberOfPacketsCreated = 0; + { + OVMS_PROFILE_SCOPE("Mediapipe graph deserializing first request"); + bool isSuccess = true; + OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE( + createAndPushPacketsImpl( + std::shared_ptr(&req, + [](const RequestType*) {}), + this->inputTypes, + this->pythonBackend, + graph, + this->guard->graphHelper->currentTimestamp, + numberOfPacketsCreated), + "partial deserialization of first request", isSuccess); + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, isSuccess)); + } + + // Read loop + auto newReq = std::make_shared(); + while (waitForNewRequest(serverReaderWriter, *newReq)) { + auto pstatus = validateSubsequentRequestImpl( + *newReq, + this->name, + this->version, + this->inputTypes); + bool isSuccess = true; + if (pstatus.ok()) { + OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE( + createAndPushPacketsImpl( + newReq, + this->inputTypes, + this->pythonBackend, + graph, + this->guard->graphHelper->currentTimestamp, + numberOfPacketsCreated), + "partial deserialization of subsequent requests", isSuccess); + } else { + OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE(std::move(pstatus), "validate subsequent requests", isSuccess); + } + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, isSuccess)); + + if (graph.HasError()) { + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); + SPDLOG_DEBUG("Graph {}: encountered an error, stopping the execution", this->name); + break; + } + + newReq = std::make_shared(); + } + + // Do NOT CloseAllPacketSources or WaitUntilDone - graph stays alive for reuse + auto status = graph.WaitUntilIdle(); + if (!status.ok()) { + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); + } + resetLlmExecutionContexts(this->guard->graphHelper->genAiExecutionContextMap); + MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); + // Increment timestamp for next request reusing this graph from the queue + this->guard->graphHelper->currentTimestamp = ::mediapipe::Timestamp(this->guard->graphHelper->currentTimestamp.Value() + 1); + SPDLOG_DEBUG("Graph {}: Done streaming execution (queue path)", this->name); + + timer.stop(PROCESS); + double processTime = timer.template elapsed(PROCESS); + OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime); + return StatusCode::OK; + } catch (...) { + SPDLOG_DEBUG("Graph {}: Exception while processing MediaPipe graph (queue path)", this->name); + return Status(StatusCode::UNKNOWN_ERROR, "Exception while processing MediaPipe graph"); + } + } + + template + Status inferStreamWithoutQueue(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) { SPDLOG_DEBUG("Start MediapipeGraphExecutor::inferEx mediapipe graph: {} execution", this->name); std::mutex sendMutex; try { @@ -255,6 +476,10 @@ class MediapipeGraphExecutor { // Init MP_RETURN_ON_FAIL(graph.Initialize(this->config), "graph initialization", StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); } + auto llmContextStatus = initializeLlmExecutionContexts(this->sidePacketMaps.genAiServableMap, this->sidePacketMaps.genAiExecutionContextMap); + if (!llmContextStatus.ok()) { + return llmContextStatus; + } enum : unsigned int { PROCESS, TIMER_END2 @@ -297,13 +522,7 @@ class MediapipeGraphExecutor { { OVMS_PROFILE_SCOPE("Mediapipe graph creating input side packets"); OVMS_RETURN_ON_FAIL(deserializeInputSidePacketsFromFirstRequestImpl(inputSidePackets, req)); -#if (PYTHON_DISABLE == 0) - inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.pythonNodeResourcesMap) - .At(STARTING_TIMESTAMP); -#endif - inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiServableMap).At(STARTING_TIMESTAMP); - inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.embeddingsServableMap).At(STARTING_TIMESTAMP); - inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.sttServableMap).At(STARTING_TIMESTAMP); + buildInputSidePackets(inputSidePackets, this->sidePacketMaps); } { @@ -380,6 +599,7 @@ class MediapipeGraphExecutor { if (!status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } + resetLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap); MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code())); SPDLOG_DEBUG("Graph {}: Done execution", this->name); } @@ -394,4 +614,44 @@ class MediapipeGraphExecutor { } }; +template +absl::Status MyFunctor::handlePacket(const ::mediapipe::Packet& packet) { + auto status = onPacketReadySerializeImpl( + this->requestId, + this->exec.name, + this->exec.version, + this->outputStreamName, + this->packetType, + packet, + response); + return status.ok() ? absl::OkStatus() : absl::Status(absl::StatusCode::kInternal, status.string()); +} + +template +absl::Status StreamingFunctor::handlePacket(const ::mediapipe::Packet& packet) { + OVMS_PROFILE_SCOPE("Mediapipe Packet Ready Callback"); + try { + std::lock_guard lock(sendMutex); + auto status = onPacketReadySerializeAndSendImpl( + "" /*no ids for streaming*/, + executorName, + executorVersion, + outputStreamName, + packetType, + packet, + serverReaderWriter); + if (!status.ok()) { + SPDLOG_DEBUG("error in send packet routine {}", status.string()); + return absl::Status(absl::StatusCode::kInternal, "error in send packet routine"); + } + auto now = std::chrono::system_clock::now(); + auto currentTimestamp = ::mediapipe::Timestamp(std::chrono::duration_cast(now.time_since_epoch()).count()); + OBSERVE_IF_ENABLED(metricReporter->getRequestLatencyMetric(executionContext), (currentTimestamp - packet.Timestamp()).Microseconds()); + INCREMENT_IF_ENABLED(metricReporter->getResponsesMetric(executionContext)); + return absl::OkStatus(); + } catch (...) { + SPDLOG_DEBUG("Error occurred during packet serialization in mediapipe graph: {}", executorName); + return absl::Status(absl::StatusCode::kCancelled, "error in serialization"); + } +} } // namespace ovms diff --git a/src/mediapipe_internal/outputstreamobserver.hpp b/src/mediapipe_internal/outputstreamobserver.hpp new file mode 100644 index 0000000000..32a2da38d1 --- /dev/null +++ b/src/mediapipe_internal/outputstreamobserver.hpp @@ -0,0 +1,63 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../execution_context.hpp" +#include "../model_metric_reporter.hpp" +#include "../profiler.hpp" +#include "../status.hpp" +#include "../timer.hpp" +#pragma warning(push) +#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/port/status.h" +#pragma GCC diagnostic pop +#pragma warning(pop) +#include "mediapipe_utils.hpp" +#include "packettypes.hpp" + +namespace ovms { +class PythonBackend; +class ServableMetricReporter; +class OutputStreamObserverI { +public: + virtual absl::Status handlePacket(const ::mediapipe::Packet& packet) = 0; + virtual ~OutputStreamObserverI() = default; +}; +class NullOutputStreamObserver : public OutputStreamObserverI { +public: + NullOutputStreamObserver() = default; + absl::Status handlePacket(const ::mediapipe::Packet& packet) override { + SPDLOG_ERROR("NullOutputStreamObserver::handlePacket called - graph observer was not replaced before execution"); + return absl::InternalError("NullOutputStreamObserver should have been replaced before graph execution"); + } +}; +// Mutable holder for an observer, allowing the observer implementation to be +// swapped while the map that owns this holder remains const. +struct ObserverHolder { + std::shared_ptr current; +}; +} // namespace ovms diff --git a/src/mediapipe_internal/side_packet_builder.cpp b/src/mediapipe_internal/side_packet_builder.cpp new file mode 100644 index 0000000000..366b25d227 --- /dev/null +++ b/src/mediapipe_internal/side_packet_builder.cpp @@ -0,0 +1,38 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "side_packet_builder.hpp" + +#include "graph_executor_constants.hpp" +#include "graph_side_packets.hpp" + +namespace ovms { + +void buildInputSidePackets(std::map& inputSidePackets, + const GraphSidePackets& sidePackets) { + const auto ts = ::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE); +#if (PYTHON_DISABLE == 0) + inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePackets.pythonNodeResourcesMap).At(ts); +#endif + inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePackets.genAiServableMap).At(ts); + inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePackets.genAiExecutionContextMap).At(ts); + inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePackets.imageGenPipelinesMap).At(ts); + inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePackets.embeddingsServableMap).At(ts); + inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePackets.rerankServableMap).At(ts); + inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePackets.sttServableMap).At(ts); + inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePackets.ttsServableMap).At(ts); +} + +} // namespace ovms diff --git a/src/mediapipe_internal/side_packet_builder.hpp b/src/mediapipe_internal/side_packet_builder.hpp new file mode 100644 index 0000000000..8eacaacb89 --- /dev/null +++ b/src/mediapipe_internal/side_packet_builder.hpp @@ -0,0 +1,37 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include + +#pragma warning(push) +#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#include "mediapipe/framework/calculator_graph.h" +#pragma GCC diagnostic pop +#pragma warning(pop) + +namespace ovms { +struct GraphSidePackets; + +// Builds standard input side packets from all servable maps in GraphSidePackets. +// Adds entries to the provided map without clearing existing entries. +void buildInputSidePackets(std::map& inputSidePackets, + const GraphSidePackets& sidePackets); + +} // namespace ovms diff --git a/src/systeminfo.cpp b/src/systeminfo.cpp index 7722d7808e..29dbc3b874 100644 --- a/src/systeminfo.cpp +++ b/src/systeminfo.cpp @@ -16,6 +16,7 @@ #include "systeminfo.hpp" #include +#include #include #include #include diff --git a/src/test/azurefilesystem_test.cpp b/src/test/azurefilesystem_test.cpp index f46de1ef6d..ff38edc09f 100644 --- a/src/test/azurefilesystem_test.cpp +++ b/src/test/azurefilesystem_test.cpp @@ -18,7 +18,10 @@ #include "spdlog/spdlog.h" +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include "src/filesystem/azurefilesystem.hpp" +#pragma GCC diagnostic pop #include "gtest/gtest.h" using namespace ovms; diff --git a/src/test/ensemble_config_change_stress.cpp b/src/test/ensemble_config_change_stress.cpp index 225a2ab703..ff86bcc3b5 100644 --- a/src/test/ensemble_config_change_stress.cpp +++ b/src/test/ensemble_config_change_stress.cpp @@ -813,7 +813,8 @@ TEST_F(StressMediapipeChanges, ReloadMediapipeGraphDuringMetadataLoad) { SetUpConfig(basicMediapipeConfig); bool performWholeConfigReload = true; std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuity of operation - std::set allowedLoadResults = {}; + // Graph path change triggers real reload, briefly entering NOT_LOADED_YET state + std::set allowedLoadResults = {StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET}; performStressTest( &ConfigChangeStressTest::triggerKFSGetPipelineMetadataInALoop, &ConfigChangeStressTest::reloadMediapipeGraph, @@ -821,4 +822,90 @@ TEST_F(StressMediapipeChanges, ReloadMediapipeGraphDuringMetadataLoad) { requiredLoadResults, allowedLoadResults); } + +class StressMediapipeQueueChanges : public StressPipelineConfigChanges { + const std::string modelName = PIPELINE_1_DUMMY_NAME; + const std::string modelInputName = "b"; + const std::string modelOutputName = "a"; + +public: + std::string getServableName() override { + return modelName; + } + void SetUp() override { + SetUpCAPIServerInstance(createStressTestPipelineOneDummyConfig()); + } +}; +TEST_F(StressMediapipeQueueChanges, AddGraphDuringPredictLoad) { + // we add another graph definition during load (queue-enabled graph) + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuity of operation + std::set allowedLoadResults = {}; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::addNewMediapipeQueueGraph, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeQueueChanges, RemoveGraphDuringPredictLoad) { + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK, + StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE}; + std::set allowedLoadResults = {}; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::removeMediapipeQueueGraph, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeQueueChanges, RemoveModelDuringPredictLoad) { + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + // With queue path, pre-initialized graphs may keep working with cached sessions + // even after model removal, so MEDIAPIPE_PRECONDITION_FAILED may not occur + std::set requiredLoadResults = { + StatusCode::OK, + }; + std::set allowedLoadResults = { + StatusCode::MEDIAPIPE_EXECUTION_ERROR, + StatusCode::MEDIAPIPE_GRAPH_ADD_PACKET_INPUT_STREAM, + StatusCode::MEDIAPIPE_PRECONDITION_FAILED, + }; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::removeMediapipeQueueGraphUsedModel, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeQueueChanges, ReloadModelDuringPredictLoad) { + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK}; + std::set allowedLoadResults = {}; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::reloadMediapipeQueueGraphUsedModel, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeQueueChanges, ReloadMediapipeGraphDuringPredictLoad) { + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK}; + std::set allowedLoadResults = {}; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::reloadMediapipeQueueGraph, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +// Status and metadata tests are not duplicated for queue fixture because +// neither status nor metadata operations exercise the graph queue path. #endif diff --git a/src/test/graph_export_test.cpp b/src/test/graph_export_test.cpp index ed79061a63..f5eed290be 100644 --- a/src/test/graph_export_test.cpp +++ b/src/test/graph_export_test.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -538,6 +539,45 @@ class GraphCreationTest : public TestWithTempDir { TestWithTempDir::TearDown(); } + std::string getExpectedGraphQueueSizeDirective(const ovms::HFSettingsImpl& hfSettings) const { + (void)hfSettings; + return "AUTO"; + } + + std::string createGraphAndReadContents(const ovms::HFSettingsImpl& hfSettings) { + std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; + std::unique_ptr graphExporter = std::make_unique(); + auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); + if (status != ovms::StatusCode::OK) { + ADD_FAILURE() << status.string(); + return ""; + } + return GetFileContents(graphPath); + } + + void assertGraphQueueHeader(const std::string& graphContents, const ovms::HFSettingsImpl& hfSettings) { + const std::string queueLinePrefix = "# OVMS_GRAPH_QUEUE_MAX_SIZE: "; + auto firstLineEnd = graphContents.find("\n"); + ASSERT_NE(firstLineEnd, std::string::npos) << graphContents; + auto queueLineStart = firstLineEnd + 1; + auto queueLineEnd = graphContents.find("\n", queueLineStart); + ASSERT_NE(queueLineEnd, std::string::npos) << graphContents; + + std::string actualQueueLine = graphContents.substr(queueLineStart, queueLineEnd - queueLineStart); + ASSERT_EQ(0, actualQueueLine.rfind(queueLinePrefix, 0)) << graphContents; + std::string expectedQueueLine = queueLinePrefix + getExpectedGraphQueueSizeDirective(hfSettings); + ASSERT_EQ(expectedQueueLine, actualQueueLine) << graphContents; + } + + void assertCreatedGraphEquals(const ovms::HFSettingsImpl& hfSettings, const std::string& expectedGraphContents, bool assertVersion = false) { + std::string graphContents = createGraphAndReadContents(hfSettings); + if (assertVersion) { + ASSERT_EQ(0, graphContents.find(getVersionString())) << graphContents; + } + assertGraphQueueHeader(graphContents, hfSettings); + ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents; + } + std::string getVersionString() { std::stringstream expected; expected << "# File created with: " << PROJECT_NAME << " " << PROJECT_VERSION << std::endl; @@ -548,14 +588,7 @@ class GraphCreationTest : public TestWithTempDir { TEST_F(GraphCreationTest, positiveDefaultWithVersionString) { ovms::HFSettingsImpl hfSettings; hfSettings.task = ovms::TEXT_GENERATION_GRAPH; - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - std::string expected = getVersionString() + expectedDefaultGraphContents; - ASSERT_EQ(expected, graphContents) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedDefaultGraphContents, true); } TEST_F(GraphCreationTest, positiveRerankWithVersionString) { @@ -564,14 +597,7 @@ TEST_F(GraphCreationTest, positiveRerankWithVersionString) { hfSettings.task = ovms::RERANK_GRAPH; ovms::RerankGraphSettingsImpl rerankGraphSettings; hfSettings.graphSettings = std::move(rerankGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - std::string expected = getVersionString() + expectedRerankGraphContentsDefault; - ASSERT_EQ(expected, graphContents) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedRerankGraphContentsDefault, true); } TEST_F(GraphCreationTest, positiveEmbeddingsWithVersionString) { @@ -580,14 +606,7 @@ TEST_F(GraphCreationTest, positiveEmbeddingsWithVersionString) { hfSettings.task = ovms::EMBEDDINGS_GRAPH; ovms::EmbeddingsGraphSettingsImpl embeddingsGraphSettings; hfSettings.graphSettings = std::move(embeddingsGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - std::string expected = getVersionString() + expectedEmbeddingsGraphContentsDefault; - ASSERT_EQ(expected, graphContents) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedEmbeddingsGraphContentsDefault, true); } TEST_F(GraphCreationTest, positiveTextToSpeechWithVersionString) { @@ -595,14 +614,7 @@ TEST_F(GraphCreationTest, positiveTextToSpeechWithVersionString) { hfSettings.task = ovms::TEXT_TO_SPEECH_GRAPH; ovms::TextToSpeechGraphSettingsImpl textToSpeechGraphSettings; hfSettings.graphSettings = std::move(textToSpeechGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - std::string expected = getVersionString() + expectedTextToSpeechGraphContentsDefault; - ASSERT_EQ(expected, graphContents) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedTextToSpeechGraphContentsDefault, true); } TEST_F(GraphCreationTest, positiveSTTWithVersionString) { @@ -610,14 +622,7 @@ TEST_F(GraphCreationTest, positiveSTTWithVersionString) { hfSettings.task = ovms::SPEECH_TO_TEXT_GRAPH; ovms::SpeechToTextGraphSettingsImpl speechToTextGraphSettings; hfSettings.graphSettings = std::move(speechToTextGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - std::string expected = getVersionString() + expectedSpeechToTextGraphContentsDefault; - ASSERT_EQ(expected, graphContents) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedSpeechToTextGraphContentsDefault, true); } TEST_F(GraphCreationTest, positiveImageGenWithVersionString) { @@ -625,26 +630,13 @@ TEST_F(GraphCreationTest, positiveImageGenWithVersionString) { hfSettings.task = ovms::IMAGE_GENERATION_GRAPH; ovms::ImageGenerationGraphSettingsImpl imageGenerationGraphSettings; hfSettings.graphSettings = std::move(imageGenerationGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - std::string expected = getVersionString() + expectedImageGenerationGraphContentsDefault; - ASSERT_EQ(expected, graphContents) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedImageGenerationGraphContentsDefault, true); } TEST_F(GraphCreationTest, positiveDefault) { ovms::HFSettingsImpl hfSettings; hfSettings.task = ovms::TEXT_GENERATION_GRAPH; - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedDefaultGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedDefaultGraphContents); } TEST_F(GraphCreationTest, positiveDraftAndFuse) { @@ -655,13 +647,7 @@ TEST_F(GraphCreationTest, positiveDraftAndFuse) { graphSettings.dynamicSplitFuse = "false"; hfSettings.graphSettings = std::move(graphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedDraftAndFuseGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedDraftAndFuseGraphContents); } TEST_F(GraphCreationTest, positiveGGUF) { @@ -669,33 +655,18 @@ TEST_F(GraphCreationTest, positiveGGUF) { ovms::HFSettingsImpl hfSettings; hfSettings.task = ovms::TEXT_GENERATION_GRAPH; hfSettings.ggufFilename = "PRETTY_GOOD_GGUF_MODEL.gguf"; - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGGUFGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedGGUFGraphContents); } TEST_F(GraphCreationTest, WillOverwriteExistingGraphPbtxtGGUF) { this->filesToPrintInCaseOfFailure.emplace_back("graph.pbtxt"); ovms::HFSettingsImpl hfSettings; hfSettings.task = ovms::TEXT_GENERATION_GRAPH; - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - hfSettings.ggufFilename = "PRETTY_GOOD_GGUF_MODEL.gguf"; - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGGUFGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedGGUFGraphContents); hfSettings.ggufFilename = "PRETTY_GOOD_GGUF_MODEL_Q8-00001-of-20000.gguf"; - status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGGUFGraphContents2, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedGGUFGraphContents2); } TEST_F(GraphCreationTest, rerankPositiveNonDefault) { @@ -710,13 +681,7 @@ TEST_F(GraphCreationTest, rerankPositiveNonDefault) { rerankGraphSettings.maxAllowedChunks = 18; hfSettings.graphSettings = std::move(rerankGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedRerankGraphContentsNonDefault, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedRerankGraphContentsNonDefault); } TEST_F(GraphCreationTest, rerankPositiveDefault) { @@ -726,13 +691,7 @@ TEST_F(GraphCreationTest, rerankPositiveDefault) { ovms::RerankGraphSettingsImpl rerankGraphSettings; hfSettings.graphSettings = std::move(rerankGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedRerankGraphContentsDefault, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedRerankGraphContentsDefault); } TEST_F(GraphCreationTest, rerankCreatedPbtxtInvalid) { @@ -766,13 +725,7 @@ TEST_F(GraphCreationTest, embeddingsPositiveNonDefault) { embeddingsGraphSettings.truncate = "true"; embeddingsGraphSettings.pooling = "LAST"; hfSettings.graphSettings = std::move(embeddingsGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedEmbeddingsGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedEmbeddingsGraphContents); } TEST_F(GraphCreationTest, embeddingsPositiveDefault) { @@ -781,13 +734,7 @@ TEST_F(GraphCreationTest, embeddingsPositiveDefault) { ovms::EmbeddingsGraphSettingsImpl embeddingsGraphSettings; hfSettings.graphSettings = std::move(embeddingsGraphSettings); hfSettings.exportSettings.pluginConfig.numStreams = 1; - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedEmbeddingsGraphContentsDefault, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedEmbeddingsGraphContentsDefault); } TEST_F(GraphCreationTest, embeddingsCreatedPbtxtInvalid) { @@ -841,13 +788,7 @@ TEST_F(GraphCreationTest, textToSpeechPositiveNonDefault) { hfSettings.exportSettings.modelPath = "/model1/path"; hfSettings.exportSettings.pluginConfig.numStreams = 2; hfSettings.graphSettings = std::move(textToSpeechGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedTextToSpeechGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedTextToSpeechGraphContents); } TEST_F(GraphCreationTest, textToSpeechPositiveDefault) { @@ -855,13 +796,7 @@ TEST_F(GraphCreationTest, textToSpeechPositiveDefault) { hfSettings.task = ovms::TEXT_TO_SPEECH_GRAPH; ovms::TextToSpeechGraphSettingsImpl textToSpeechGraphSettings; hfSettings.graphSettings = std::move(textToSpeechGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedTextToSpeechGraphContentsDefault, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedTextToSpeechGraphContentsDefault); } TEST_F(GraphCreationTest, textToSpeechCreatedPbtxtInvalid) { @@ -890,13 +825,7 @@ TEST_F(GraphCreationTest, speechToTextPositiveNonDefault) { hfSettings.exportSettings.modelPath = "/model1/path"; hfSettings.exportSettings.pluginConfig.numStreams = 2; hfSettings.graphSettings = std::move(speechToTextGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedSpeechToTextGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedSpeechToTextGraphContents); } TEST_F(GraphCreationTest, speechToTextPositiveDefault) { @@ -904,13 +833,7 @@ TEST_F(GraphCreationTest, speechToTextPositiveDefault) { hfSettings.task = ovms::SPEECH_TO_TEXT_GRAPH; ovms::SpeechToTextGraphSettingsImpl speechToTextGraphSettings; hfSettings.graphSettings = std::move(speechToTextGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedSpeechToTextGraphContentsDefault, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedSpeechToTextGraphContentsDefault); } TEST_F(GraphCreationTest, speechToTextCreatedPbtxtInvalid) { @@ -940,13 +863,7 @@ TEST_F(GraphCreationTest, positivePluginConfigAll) { hfSettings.graphSettings = std::move(graphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedFullPluginGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedFullPluginGraphContents); } TEST_F(GraphCreationTest, positiveWithParsersAndToolGuidedGeneration) { @@ -959,13 +876,7 @@ TEST_F(GraphCreationTest, positiveWithParsersAndToolGuidedGeneration) { hfSettings.graphSettings = std::move(graphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContentsWithResponseParser, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedGraphContentsWithResponseParser); } TEST_F(GraphCreationTest, positivePluginConfigOne) { @@ -975,13 +886,7 @@ TEST_F(GraphCreationTest, positivePluginConfigOne) { hfSettings.exportSettings.pluginConfig.kvCachePrecision = "u8"; hfSettings.graphSettings = std::move(graphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedOneSettingPluginGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedOneSettingPluginGraphContents); } TEST_F(GraphCreationTest, negativeCreateFileWrongDirectoryPaths) { @@ -1052,11 +957,8 @@ TEST_F(GraphCreationTest, positiveTextGeneration) { hfSettings.graphSettings = std::move(graphSettings); hfSettings.exportSettings.targetDevice = "NPU"; hfSettings.exportSettings.pluginConfig.useNpuPrefixCaching = true; - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::string subconfigPath = ovms::FileSystem::appendSlash(this->directoryPath) + "subconfig.json"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); + std::string graphContents = createGraphAndReadContents(hfSettings); + assertGraphQueueHeader(graphContents, hfSettings); } TEST_F(GraphCreationTest, imageGenerationPositiveDefault) { @@ -1064,13 +966,7 @@ TEST_F(GraphCreationTest, imageGenerationPositiveDefault) { hfSettings.task = ovms::IMAGE_GENERATION_GRAPH; ovms::ImageGenerationGraphSettingsImpl imageGenerationGraphSettings; hfSettings.graphSettings = std::move(imageGenerationGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedImageGenerationGraphContentsDefault, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedImageGenerationGraphContentsDefault); } TEST_F(GraphCreationTest, imageGenerationPositiveFull) { @@ -1086,13 +982,7 @@ TEST_F(GraphCreationTest, imageGenerationPositiveFull) { imageGenerationGraphSettings.defaultNumInferenceSteps = 2; imageGenerationGraphSettings.maxNumInferenceSteps = 3; hfSettings.graphSettings = std::move(imageGenerationGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedImageGenerationGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedImageGenerationGraphContents); } #ifdef _WIN32 diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index 4390a9eac9..3105acd2c6 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -218,7 +218,7 @@ Key: content-type; Value: application/json } JSON Parser: -{"model":"gpt","stream":false,"messages":[]}0)"; +{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only ASSERT_EQ(response, expectedResponse); } @@ -250,7 +250,7 @@ Key: test2; Value: header } JSON Parser: -{"model":"gpt","stream":false,"messages":[]}0)"; +{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only ASSERT_EQ(response, expectedResponse); } @@ -4025,6 +4025,98 @@ TEST_F(HttpOpenAIHandlerParsingTest, responseFormatNullValue) { EXPECT_FALSE(apiHandler->getResponseFormat().has_value()); } +// ==================== HttpOpenAIHandlerWithQueueTest ==================== +// Same as HttpOpenAIHandlerTest but uses config with graph_queue_size=1 +// to verify the graph pool (GraphQueue) path works correctly. +class HttpOpenAIHandlerWithQueueTest : public ::testing::Test { +protected: + ovms::Server& server = ovms::Server::instance(); + std::unique_ptr handler; + + std::unique_ptr t; + std::string port = "9173"; + + std::unordered_map headers{{"content-type", "application/json"}}; + ovms::HttpRequestComponents comp; + std::string endpoint = "/v3/chat/completions"; + std::shared_ptr writer; + std::shared_ptr multiPartParser; + std::string response; + ovms::HttpResponseComponents responseComponents; + + void SetUpServer(const char* configPath) { + ::SetUpServer(this->t, this->server, this->port, configPath); + EnsureServerStartedWithTimeout(this->server, 5); + handler = std::make_unique(server, 5); + } + + void SetUp() { + writer = std::make_shared(); + multiPartParser = std::make_shared(); + SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json").c_str()); + ASSERT_EQ(handler->parseRequestComponents(comp, "POST", endpoint, headers), ovms::StatusCode::OK); + } + + void TearDown() { + handler.reset(); + server.setShutdownRequest(1); + t->join(); + server.setShutdownRequest(0); + } +}; + +TEST_F(HttpOpenAIHandlerWithQueueTest, UnaryWithQueue) { + std::string requestBody = R"( + { + "model": "gpt", + "stream": false, + "messages": [] + } + )"; + + const std::string URI = "/v3/something"; + ASSERT_EQ( + handler->dispatchToProcessor(URI, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + + std::string expectedResponse = R"(URI: /v3/something +Key: content-type; Value: application/json +Body: + + { + "model": "gpt", + "stream": false, + "messages": [] + } + +JSON Parser: +{"model":"gpt","stream":false,"messages":[]}012345678)"; + ASSERT_EQ(response, expectedResponse); +} + +TEST_F(HttpOpenAIHandlerWithQueueTest, StreamWithQueue) { + std::string requestBody = R"( + { + "model": "gpt", + "stream": true, + "messages": [] + } + )"; + + EXPECT_CALL(*writer, PartialReplyBegin(::testing::_)).WillOnce(testing::Invoke([](std::function fn) { fn(); })); + EXPECT_CALL(*writer, PartialReplyEnd()).Times(1); + // The calculator produces 9 packets (timestamps 0-8) via loopback, + // each containing the accumulated body + timestamp. The '8' in the body stops the loop. + EXPECT_CALL(*writer, PartialReply(::testing::_)).Times(9); + EXPECT_CALL(*writer, IsDisconnected()).Times(9); + + ASSERT_EQ( + handler->dispatchToProcessor("/v3/completions", requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::PARTIAL_END); + + // For streaming, the response body stays empty (content goes through PartialReply callbacks) + ASSERT_EQ(response, ""); +} TEST_F(HttpOpenAIHandlerParsingTest, parseChatTemplateKwargsWithBooleanValue) { std::string json = R"({ "model": "llama", diff --git a/src/test/light_test_utils.hpp b/src/test/light_test_utils.hpp index 64084f6ab1..ab28455185 100644 --- a/src/test/light_test_utils.hpp +++ b/src/test/light_test_utils.hpp @@ -19,7 +19,22 @@ std::string GetFileContents(const std::string& filePath); bool createConfigFileWithContent(const std::string& content, std::string filename = "/tmp/ovms_config_file.json"); -// Removes the version comment line from the beginning of graph.pbtxt content -inline std::string removeVersionString(std::string input) { - return input.erase(0, input.find("\n") + 1); +// Removes generated graph header lines (version and optional queue size directive) +// which differ across build/runtime setup. +inline std::string removeGeneratedGraphHeaders(std::string input) { + auto firstLineEnd = input.find("\n"); + if (firstLineEnd == std::string::npos) { + return ""; + } + input.erase(0, firstLineEnd + 1); + + const std::string queueLinePrefix = "# OVMS_GRAPH_QUEUE_MAX_SIZE:"; + if (input.rfind(queueLinePrefix, 0) == 0) { + auto secondLineEnd = input.find("\n"); + if (secondLineEnd == std::string::npos) { + return ""; + } + input.erase(0, secondLineEnd + 1); + } + return input; } diff --git a/src/test/llm/config_queue.json b/src/test/llm/config_queue.json new file mode 100644 index 0000000000..1e16802ed9 --- /dev/null +++ b/src/test/llm/config_queue.json @@ -0,0 +1,9 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name":"lm_cb_regular_queue", + "graph_path":"/ovms/src/test/llm/lm_cb_regular_queue.pbtxt" + } + ] +} diff --git a/src/test/llm/llmnode_test.cpp b/src/test/llm/llmnode_test.cpp index 0e52c1bd1f..a889803e1a 100644 --- a/src/test/llm/llmnode_test.cpp +++ b/src/test/llm/llmnode_test.cpp @@ -180,6 +180,51 @@ std::shared_ptr LLMFlowHttpTest::cbPipe; std::shared_ptr LLMFlowHttpTest::llmExecutorWrapper; std::unique_ptr LLMFlowHttpTest::t; +class LLMFlowHttpQueueGraphTest : public ::testing::Test { +protected: + static std::unique_ptr t; + +public: + std::unique_ptr handler; + std::unordered_map headers{{"content-type", "application/json"}}; + ovms::HttpRequestComponents comp; + const std::string endpointChatCompletions = "/v3/chat/completions"; + const std::string endpointCompletions = "/v3/completions"; + std::shared_ptr writer; + std::shared_ptr multiPartParser; + std::string response; + rapidjson::Document parsedResponse; + ovms::HttpResponseComponents responseComponents; + + static void SetUpTestSuite() { + std::string port = "9173"; + ovms::Server& server = ovms::Server::instance(); + ::SetUpServer(t, server, port, getGenericFullPathForSrcTest("/ovms/src/test/llm/config_queue.json").c_str(), 60); + } + + static void TearDownTestSuite() { + ovms::Server& server = ovms::Server::instance(); + server.setShutdownRequest(1); + t->join(); + server.setShutdownRequest(0); + } + + void SetUp() { + writer = std::make_shared(); + multiPartParser = std::make_shared(); + ON_CALL(*writer, PartialReplyBegin(::testing::_)).WillByDefault(testing::Invoke([](std::function fn) { fn(); })); + ovms::Server& server = ovms::Server::instance(); + handler = std::make_unique(server, 5); + ASSERT_EQ(handler->parseRequestComponents(comp, "POST", endpointCompletions, headers), ovms::StatusCode::OK); + } + + void TearDown() { + handler.reset(); + } +}; + +std::unique_ptr LLMFlowHttpQueueGraphTest::t; + // --------------------------------------- OVMS LLM nodes tests /* @@ -281,6 +326,158 @@ TEST_P(LLMFlowHttpTestParameterized, unaryCompletionsJson) { } } +TEST_F(LLMFlowHttpQueueGraphTest, unaryCompletionsJsonQueueGraph) { + std::string requestBody = R"( + { + "model": "lm_cb_regular_queue", + "stream": false, + "seed" : 1, + "best_of": 16, + "max_tokens": 5, + "prompt": "What is OpenVINO?" + } + )"; + + ASSERT_EQ( + handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + parsedResponse.Parse(response.c_str()); + ASSERT_TRUE(parsedResponse["choices"].IsArray()); + ASSERT_EQ(parsedResponse["choices"].Capacity(), 1); + for (auto& choice : parsedResponse["choices"].GetArray()) { + ASSERT_TRUE(choice["finish_reason"].IsString()); + ASSERT_FALSE(choice["logprobs"].IsObject()); + ASSERT_TRUE(choice["text"].IsString()); + } + + ASSERT_TRUE(parsedResponse["usage"].IsObject()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["prompt_tokens"].IsInt()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["completion_tokens"].IsInt()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["total_tokens"].IsInt()); + ASSERT_EQ(parsedResponse["usage"].GetObject()["completion_tokens"].GetInt(), 5); + EXPECT_STREQ(parsedResponse["model"].GetString(), "lm_cb_regular_queue"); + EXPECT_STREQ(parsedResponse["object"].GetString(), "text_completion"); +} + +TEST_F(LLMFlowHttpQueueGraphTest, unaryChatCompletionsJsonQueueGraph) { + std::string requestBody = R"( + { + "model": "lm_cb_regular_queue", + "stream": false, + "seed" : 1, + "max_tokens": 5, + "messages": [ + { + "role": "user", + "content": "What is OpenVINO?" + } + ] + } + )"; + + ASSERT_EQ( + handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + parsedResponse.Parse(response.c_str()); + ASSERT_TRUE(parsedResponse["choices"].IsArray()); + ASSERT_EQ(parsedResponse["choices"].Capacity(), 1); + for (auto& choice : parsedResponse["choices"].GetArray()) { + ASSERT_TRUE(choice["finish_reason"].IsString()); + ASSERT_TRUE(choice["message"].IsObject()); + ASSERT_TRUE(choice["message"]["content"].IsString()); + EXPECT_STREQ(choice["message"]["role"].GetString(), "assistant"); + } + + ASSERT_TRUE(parsedResponse["usage"].IsObject()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["prompt_tokens"].IsInt()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["completion_tokens"].IsInt()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["total_tokens"].IsInt()); + ASSERT_EQ(parsedResponse["usage"].GetObject()["completion_tokens"].GetInt(), 5); + EXPECT_STREQ(parsedResponse["model"].GetString(), "lm_cb_regular_queue"); + EXPECT_STREQ(parsedResponse["object"].GetString(), "chat.completion"); +} + +TEST_F(LLMFlowHttpQueueGraphTest, streamChatCompletionsQueueGraph) { + std::string requestBody = R"( + { + "model": "lm_cb_regular_queue", + "stream": true, + "seed" : 1, + "max_tokens": 5, + "ignore_eos": true, + "messages": [ + { + "role": "user", + "content": "What is OpenVINO?" + } + ] + } + )"; + ON_CALL(*writer, PartialReply).WillByDefault([this](std::string response) { + rapidjson::Document d; + std::string dataPrefix = "data:"; + ASSERT_STREQ(response.substr(0, dataPrefix.size()).c_str(), dataPrefix.c_str()); + size_t pos = response.find("\n"); + ASSERT_NE(pos, response.npos); + rapidjson::ParseResult parsingSucceeded = d.Parse(response.substr(dataPrefix.size(), (pos - dataPrefix.size())).c_str()); + ASSERT_EQ(parsingSucceeded.Code(), 0); + ASSERT_TRUE(d["choices"].IsArray()); + ASSERT_EQ(d["choices"].Capacity(), 1); + int i = 0; + for (auto& choice : d["choices"].GetArray()) { + if (choice["finish_reason"].IsString()) { + EXPECT_STREQ(choice["finish_reason"].GetString(), "length"); + } else { + ASSERT_TRUE(choice["finish_reason"].IsNull()); + } + ASSERT_EQ(choice["index"], i++); + ASSERT_TRUE(choice["delta"].IsObject()); + // First chunk may have null content (role announcement) + ASSERT_TRUE(choice["delta"]["content"].IsString() || choice["delta"]["content"].IsNull()); + } + EXPECT_STREQ(d["model"].GetString(), "lm_cb_regular_queue"); + EXPECT_STREQ(d["object"].GetString(), "chat.completion.chunk"); + }); + ASSERT_EQ( + handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::PARTIAL_END); +} + +// Test that verifies graph reuse works correctly with queue size 1 +// Sends 2 sequential requests to ensure the same graph instance is reused +TEST_F(LLMFlowHttpQueueGraphTest, queueGraphReuseTwoRequests) { + std::string requestBody = R"( + { + "model": "lm_cb_regular_queue", + "stream": false, + "seed" : 1, + "max_tokens": 5, + "prompt": "What is OpenVINO?" + } + )"; + + // First request + ASSERT_EQ( + handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + parsedResponse.Parse(response.c_str()); + ASSERT_TRUE(parsedResponse["choices"].IsArray()); + ASSERT_EQ(parsedResponse["choices"].Capacity(), 1); + ASSERT_TRUE(parsedResponse["choices"].GetArray()[0]["text"].IsString()); + + // Second request - reuses the same graph from the queue + // This validates that timestamp increment works for graph reuse + response.clear(); + ASSERT_EQ( + handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + parsedResponse.Parse(response.c_str()); + ASSERT_TRUE(parsedResponse["choices"].IsArray()); + ASSERT_EQ(parsedResponse["choices"].Capacity(), 1); + ASSERT_TRUE(parsedResponse["choices"].GetArray()[0]["text"].IsString()); + // Note: Responses may differ due to KV cache state despite same seed +} + TEST_P(LLMFlowHttpTestParameterized, unaryCompletionsJsonEchoWithCompletion) { auto params = GetParam(); // TODO: In the next step we should break this suite into smaller ones, use proper configuration instead of skipping diff --git a/src/test/llm/lm_cb_regular_queue.pbtxt b/src/test/llm/lm_cb_regular_queue.pbtxt new file mode 100644 index 0000000000..e22deed0e1 --- /dev/null +++ b/src/test/llm/lm_cb_regular_queue.pbtxt @@ -0,0 +1,47 @@ +# Copyright 2026 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# OVMS_GRAPH_QUEUE_MAX_SIZE: 1 +input_stream: "HTTP_REQUEST_PAYLOAD:input" +output_stream: "HTTP_RESPONSE_PAYLOAD:output" +node { + name: "llmNode1" + calculator: "HttpLLMCalculator" + input_side_packet: "LLM_NODE_RESOURCES:llm" + input_side_packet: "LLM_NODE_EXECUTION_CONTEXTS:llm_ctx" + input_stream: "LOOPBACK:loopback" + input_stream: "HTTP_REQUEST_PAYLOAD:input" + output_stream: "LOOPBACK:loopback" + output_stream: "HTTP_RESPONSE_PAYLOAD:output" + input_stream_info: { + tag_index: 'LOOPBACK:0', + back_edge: true + } + node_options: { + [type.googleapis.com/mediapipe.LLMCalculatorOptions]: { + models_path: "/ovms/src/test/llm_testing/HuggingFaceTB/SmolLM2-360M-Instruct" + cache_size: 1 + } + } + input_stream_handler { + input_stream_handler: "SyncSetInputStreamHandler", + options { + [mediapipe.SyncSetInputStreamHandlerOptions.ext] { + sync_set { + tag_index: "LOOPBACK:0" + } + } + } + } +} diff --git a/src/test/lora_graph_export_test.cpp b/src/test/lora_graph_export_test.cpp index 023209ae58..1ac03102a3 100644 --- a/src/test/lora_graph_export_test.cpp +++ b/src/test/lora_graph_export_test.cpp @@ -89,7 +89,7 @@ TEST_F(LoraGraphCreationTest, imageGenerationWithOneLora) { ASSERT_EQ(status, ovms::StatusCode::OK); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedImageGenWithOneLora, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedImageGenWithOneLora, removeGeneratedGraphHeaders(graphContents)) << graphContents; } TEST_F(LoraGraphCreationTest, imageGenerationWithTwoLoras) { @@ -107,7 +107,7 @@ TEST_F(LoraGraphCreationTest, imageGenerationWithTwoLoras) { ASSERT_EQ(status, ovms::StatusCode::OK); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedImageGenWithTwoLoras, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedImageGenWithTwoLoras, removeGeneratedGraphHeaders(graphContents)) << graphContents; } const std::string expectedImageGenerationGraphContentsDefault = R"( @@ -141,7 +141,7 @@ TEST_F(LoraGraphCreationTest, imageGenerationNoLorasRemainsUnchanged) { ASSERT_EQ(status, ovms::StatusCode::OK); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedImageGenerationGraphContentsDefault, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedImageGenerationGraphContentsDefault, removeGeneratedGraphHeaders(graphContents)) << graphContents; } // ===================== LoRA CLI-to-Settings Tests ===================== @@ -394,7 +394,7 @@ TEST_F(LoraGraphCreationTest, imageGenerationWithUrlLora) { ASSERT_EQ(status, ovms::StatusCode::OK); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedImageGenWithUrlLora, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedImageGenWithUrlLora, removeGeneratedGraphHeaders(graphContents)) << graphContents; } const std::string expectedImageGenWithLocalLora = R"( @@ -430,7 +430,7 @@ TEST_F(LoraGraphCreationTest, imageGenerationWithLocalLora) { ASSERT_EQ(status, ovms::StatusCode::OK); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedImageGenWithLocalLora, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedImageGenWithLocalLora, removeGeneratedGraphHeaders(graphContents)) << graphContents; } TEST_F(LoraGraphCreationTest, imageGenerationHfRepoLoraWithoutFilenameReturnsError) { @@ -650,7 +650,7 @@ TEST_F(LoraGraphCreationTest, imageGenerationWithCompositeLora) { ASSERT_EQ(status, ovms::StatusCode::OK); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedImageGenWithCompositeLora, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedImageGenWithCompositeLora, removeGeneratedGraphHeaders(graphContents)) << graphContents; } // ===================== LoRA Alias Validation Tests ===================== @@ -847,7 +847,7 @@ TEST_F(LoraGraphCreationTest, imageGenerationFullCompositeWithAlphas) { ASSERT_EQ(status, ovms::StatusCode::OK); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedImageGenFullComposite, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedImageGenFullComposite, removeGeneratedGraphHeaders(graphContents)) << graphContents; } const std::string expectedImageGenNpuStatic = R"( @@ -884,5 +884,5 @@ TEST_F(LoraGraphCreationTest, imageGenerationNpuAutoStaticMode) { ASSERT_EQ(status, ovms::StatusCode::OK); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedImageGenNpuStatic, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedImageGenNpuStatic, removeGeneratedGraphHeaders(graphContents)) << graphContents; } diff --git a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json index 5137dbea92..d2803b795f 100644 --- a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json +++ b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json @@ -6,4 +6,4 @@ "graph_path": "/ovms/src/test/mediapipe/graph_gpt.pbtxt" } ] -} \ No newline at end of file +} diff --git a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json new file mode 100644 index 0000000000..ea25079556 --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json @@ -0,0 +1,9 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name": "gpt", + "graph_path": "/ovms/src/test/mediapipe/graph_gpt_with_queue.pbtxt" + } + ] +} diff --git a/src/test/mediapipe/graph_gpt_with_queue.pbtxt b/src/test/mediapipe/graph_gpt_with_queue.pbtxt new file mode 100644 index 0000000000..0fa894e12e --- /dev/null +++ b/src/test/mediapipe/graph_gpt_with_queue.pbtxt @@ -0,0 +1,40 @@ +# +# Copyright 2026 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# OVMS_GRAPH_QUEUE_MAX_SIZE: 1 +input_stream: "HTTP_REQUEST_PAYLOAD:input" +output_stream: "HTTP_RESPONSE_PAYLOAD:output" + +node: { + calculator: "OpenAIChatCompletionsMockCalculator" + input_stream: "LOOPBACK:loopback" + input_stream: "HTTP_REQUEST_PAYLOAD:input" + output_stream: "LOOPBACK:loopback" + output_stream: "HTTP_RESPONSE_PAYLOAD:output" + input_stream_info: { + tag_index: 'LOOPBACK:0', + back_edge: true + } + input_stream_handler { + input_stream_handler: "SyncSetInputStreamHandler", + options { + [mediapipe.SyncSetInputStreamHandlerOptions.ext] { + sync_set { + tag_index: "LOOPBACK:0" + } + } + } + } +} diff --git a/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt new file mode 100644 index 0000000000..eb5b7f0aa9 --- /dev/null +++ b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt @@ -0,0 +1,46 @@ +# +# Copyright 2026 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# OVMS_GRAPH_QUEUE_MAX_SIZE: 16 +input_stream: "custom_dummy_input" +output_stream: "custom_dummy_output" +node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:custom_dummy_input" + output_stream: "A:custom_dummy_output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} diff --git a/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt new file mode 100644 index 0000000000..eb5b7f0aa9 --- /dev/null +++ b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt @@ -0,0 +1,46 @@ +# +# Copyright 2026 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# OVMS_GRAPH_QUEUE_MAX_SIZE: 16 +input_stream: "custom_dummy_input" +output_stream: "custom_dummy_output" +node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:custom_dummy_input" + output_stream: "A:custom_dummy_output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} diff --git a/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt new file mode 100644 index 0000000000..5aa4a747b9 --- /dev/null +++ b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt @@ -0,0 +1,45 @@ +# +# Copyright 2026 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "custom_dummy_input" +output_stream: "custom_dummy_output" +node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:custom_dummy_input" + output_stream: "A:custom_dummy_output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} diff --git a/src/test/mediapipe_framework_test.cpp b/src/test/mediapipe_framework_test.cpp index e66d965c59..8b05f194d3 100644 --- a/src/test/mediapipe_framework_test.cpp +++ b/src/test/mediapipe_framework_test.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -30,9 +31,13 @@ #include "../grpcservermodule.hpp" #include "../http_rest_api_handler.hpp" #include "../kfs_frontend/kfs_grpc_inference_service.hpp" +#include "../mediapipe_internal/outputstreamobserver.hpp" #include "../mediapipe_internal/mediapipefactory.hpp" #include "../mediapipe_internal/mediapipegraphdefinition.hpp" +#include "../mediapipe_internal/mediapipe_utils.hpp" #include "../mediapipe_internal/node_initializer.hpp" +#include "mediapipe/framework/thread_pool_executor.h" +#include "mediapipe/framework/port/parse_text_proto.h" #include "src/metrics/metric_config.hpp" #include "src/metrics/metric_module.hpp" #include "../model_service.hpp" @@ -43,6 +48,7 @@ #include "../stringutils.hpp" #include "../tfs_frontend/tfs_utils.hpp" #include "c_api_test_utils.hpp" +#include "environment.hpp" #include "test_utils.hpp" #include "platform_utils.hpp" #include "test_with_temp_dir.hpp" @@ -81,9 +87,386 @@ class MediapipeFrameworkTest : public TestWithTempDir { class MediapipeNegativeFrameworkTest : public MediapipeFrameworkTest { }; -// purpose of this test is to ensure there is no hang in case of one of the graph nodes -// not producing output packet +using mediapipe::Adopt; +using mediapipe::CalculatorGraphConfig; +using mediapipe::Packet; +using mediapipe::ParseTextProtoOrDie; +using mediapipe::Timestamp; + +#define MP_ERROR_STOP(A) \ + { \ + absStatus = A; \ + if (!absStatus.ok()) { \ + const std::string absMessage = absStatus.ToString(); \ + SPDLOG_DEBUG("{}", absMessage); \ + ASSERT_TRUE(false); \ + } \ + } +TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerCheckNoInputPackets) { + // we need it only so that dummy is available via C-API + ServerGuard servGuard(getGenericFullPathForSrcTest("/ovms/src/test/configs/config_benchmark.json")); + std::string graph_proto = R"( + input_stream: "IN:input" + output_stream: "OUT:output" + node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + } + } + } + node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "OVTENSOR:input" + output_stream: "OVTENSOR:output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "OVTENSOR" + value: "b" + } + tag_to_output_tensor_names { + key: "OVTENSOR" + value: "a" + } + } + } + } + )"; + CalculatorGraphConfig graphConfig = + ParseTextProtoOrDie(graph_proto); + const std::string inputStreamName = "input"; + const std::string outputStreamName = "output"; + // avoid creating pollers, retrieving packets etc. + ////////////////// + // model mgmt thread + ////////////////// + // std::shared_ptr queue; + // queue = std::make_shared(graphConfig, 1); + ::mediapipe::CalculatorGraph graph; + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + // Install NullObserver + // its not per graph but per output + std::shared_ptr perGraphObserverFunctor = std::make_shared(); + const std::string outputName{"output"}; + absl::Status absStatus; + MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); })); + // Here ends model management + // Here starts mp graph executor + // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? + // get graphIdGuard from queue + // create FrontendAppropriateObserver + float expVal = 13.5; + struct MyFunctor : public OutputStreamObserverI { + float expVal; + MyFunctor(float expVal) : + expVal(expVal) { + SPDLOG_TRACE("MyFunctor observer constructed:{}", (void*)this); + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override { + SPDLOG_TRACE("my functor:{}", (void*)this); + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + return absl::OkStatus(); + } + }; + perGraphObserverFunctor = std::make_shared(expVal); + auto copyOfMyFunctor = perGraphObserverFunctor; + // now start execution + absStatus = graph.StartRun({}); + auto datatype = ov::element::Type_t::f32; + ov::Shape shape{1, 10}; + int timestamp{0}; + std::vector data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + MP_ERROR_STOP(graph.WaitUntilIdle()); + SPDLOG_TRACE("Now swap Functor, we don't have to call ObserverOutputStream"); + expVal = 42; + data[0] = expVal - 1; + perGraphObserverFunctor = std::make_shared(expVal); + // now add second packet + auto inputTensor2 = std::make_unique(datatype, shape, data.data()); + // MP_ERROR_STOP(graph.AddPacketToInputStream( + // inputStreamName, Adopt(inputTensor2.release()).At(Timestamp(timestamp++)))); + // MP_ERROR_STOP(graph.WaitUntilIdle()); + MP_ERROR_STOP(graph.CloseAllPacketSources()); + MP_ERROR_STOP(graph.WaitUntilDone()); +} +TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOC) { + // we need it only so that dummy is available via C-API + ServerGuard servGuard(getGenericFullPathForSrcTest("/ovms/src/test/configs/config_benchmark.json")); + std::string graph_proto = R"( + input_stream: "IN:input" + output_stream: "OUT:output" + node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + } + } + } + node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "OVTENSOR:input" + output_stream: "OVTENSOR:output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "OVTENSOR" + value: "b" + } + tag_to_output_tensor_names { + key: "OVTENSOR" + value: "a" + } + } + } + } + )"; + CalculatorGraphConfig graphConfig = + ParseTextProtoOrDie(graph_proto); + const std::string inputStreamName = "input"; + const std::string outputStreamName = "output"; + // avoid creating pollers, retrieving packets etc. + ////////////////// + // model mgmt thread + ////////////////// + // std::shared_ptr queue; + // queue = std::make_shared(graphConfig, 1); + ::mediapipe::CalculatorGraph graph; + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + // Install NullObserver + // its not per graph but per output + std::shared_ptr perGraphObserverFunctor = std::make_shared(); + const std::string outputName{"output"}; + absl::Status absStatus; + MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); })); + // Here ends model management + // Here starts mp graph executor + // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? + // get graphIdGuard from queue + // create FrontendAppropriateObserver + float expVal = 13.5; + struct MyFunctor : public OutputStreamObserverI { + float expVal; + MyFunctor(float expVal) : + expVal(expVal) { + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override { + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + return absl::OkStatus(); + } + }; + perGraphObserverFunctor = std::make_shared(expVal); + auto copyOfMyFunctor = perGraphObserverFunctor; + // now start execution + absStatus = graph.StartRun({}); + auto datatype = ov::element::Type_t::f32; + ov::Shape shape{1, 10}; + int timestamp{0}; + std::vector data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + MP_ERROR_STOP(graph.WaitUntilIdle()); + expVal = 42; + data[0] = expVal - 1; + perGraphObserverFunctor = std::make_shared(expVal); + // now add second packet + auto inputTensor2 = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor2.release()).At(Timestamp(timestamp++)))); + MP_ERROR_STOP(graph.WaitUntilIdle()); +} +TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOCCompare) { + SKIP_AND_EXIT_IF_NOT_RUNNING_UNSTABLE(); + // we need it only so that dummy is available via C-API + ServerGuard servGuard(getGenericFullPathForSrcTest("/ovms/src/test/configs/config_standard_dummy.json")); + std::string graph_proto = R"( + input_stream: "IN:input" + output_stream: "OUT:output" + node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + } + } + } + node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "OVTENSOR:input" + output_stream: "OVTENSOR:output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "OVTENSOR" + value: "b" + } + tag_to_output_tensor_names { + key: "OVTENSOR" + value: "a" + } + } + } + } + )"; + CalculatorGraphConfig graphConfig = + ParseTextProtoOrDie(graph_proto); + const std::string inputStreamName = "input"; + const std::string outputStreamName = "output"; + // avoid creating pollers, retrieving packets etc. + ////////////////// + // model mgmt thread + ////////////////// + // std::shared_ptr queue; + // queue = std::make_shared(graphConfig, 1); + auto datatype = ov::element::Type_t::f32; + ov::Shape shape{1, 10}; + int timestamp{0}; + float expVal = 13.5; + std::vector data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + ovms::Timer<3> timer; + const std::string outputName{"output"}; + int N = 1000; + + absl::Status absStatus; + // here starts new case of ovms + { // new case of ovms + ::mediapipe::CalculatorGraph graph; + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + auto inputTensor = std::make_unique(datatype, shape, data.data()); + // Install NullObserver + // its not per graph but per output + std::shared_ptr perGraphObserverFunctor = std::make_shared(); + MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); })); + // Here ends model management + // Here starts mp graph executor + // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? + // get graphIdGuard from queue + // create FrontendAppropriateObserver + struct MyFunctor : public OutputStreamObserverI { + float expVal; + MyFunctor(float expVal) : + expVal(expVal) { + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override { + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + return absl::OkStatus(); + } + }; + absStatus = graph.StartRun({}); + { + perGraphObserverFunctor = std::make_shared(expVal); + auto copyOfMyFunctor = perGraphObserverFunctor; + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + } + std::this_thread::sleep_for(std::chrono::seconds(1)); + timer.start(0); + for (auto i = 0; i < N; ++i) { // iter begin + perGraphObserverFunctor = std::make_shared(expVal); + auto copyOfMyFunctor = perGraphObserverFunctor; + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + MP_ERROR_STOP(graph.WaitUntilIdle()); + } // iter end + timer.stop(0); + } // end of new case ovms + { // current ovms case + timer.start(1); + for (auto i = 0; i < N; ++i) { // iter begin + ::mediapipe::CalculatorGraph graph; + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + auto absStatusOrPoller = graph.AddOutputStreamPoller(outputName); + MP_ERROR_STOP(graph.StartRun({})); + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + ::mediapipe::Packet packet; + absStatusOrPoller.value().Next(&packet); + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + MP_ERROR_STOP(graph.WaitUntilIdle()); + MP_ERROR_STOP(graph.CloseAllPacketSources()); + MP_ERROR_STOP(graph.WaitUntilDone()); + } // iter end + timer.stop(1); + } + { // thread pool case + // auto sharedThreadPool = std::make_shared(std::thread::hardware_concurrency()); + auto sharedThreadPool = std::make_shared(24); + timer.start(2); + for (auto i = 0; i < N; ++i) { // iter begin + ::mediapipe::CalculatorGraph graph; + MP_ERROR_STOP(graph.SetExecutor("", sharedThreadPool)); + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + auto absStatusOrPoller = graph.AddOutputStreamPoller(outputName); + MP_ERROR_STOP(graph.StartRun({})); + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + ::mediapipe::Packet packet; + absStatusOrPoller.value().Next(&packet); + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + MP_ERROR_STOP(graph.WaitUntilIdle()); + MP_ERROR_STOP(graph.CloseAllPacketSources()); + MP_ERROR_STOP(graph.WaitUntilDone()); + } // iter end + timer.stop(2); + } // end of thread pool case + double ms = timer.elapsed(0) / 1000; + SPDLOG_DEBUG("{} iterations of new flow took:{} ms. FPS:{}", N, ms, N / ms * 1000); + ms = timer.elapsed(1) / 1000; + SPDLOG_DEBUG("{} iterations of old flow took:{} ms. FPS:{}", N, ms, N / ms * 1000); + ms = timer.elapsed(2) / 1000; + SPDLOG_DEBUG("{} iterations of thread pool flow took:{} ms. FPS:{}", N, ms, N / ms * 1000); + SPDLOG_DEBUG("Threads: {}", std::thread::hardware_concurrency()); +} + TEST_F(MediapipeNegativeFrameworkTest, NoOutputPacketProduced) { + // purpose of this test is to ensure there is no hang in case of one of the graph nodes + // not producing output packet SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/negative/config_no_calc_output_stream.json").c_str()); const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); @@ -101,7 +484,7 @@ TEST_F(MediapipeNegativeFrameworkTest, NoOutputPacketProduced) { } TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringProcess) { - GTEST_SKIP() << "Terminate called otherwise"; + GTEST_SKIP() << "Terminate called otherwise"; // TODO FIXME check SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/negative/config_exception_during_process.json").c_str()); const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); @@ -118,12 +501,12 @@ TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringProcess) { auto status = impl.ModelInfer(nullptr, &request, &response); ASSERT_EQ(status.error_code(), grpc::StatusCode::INVALID_ARGUMENT) << status.error_message(); } catch (std::exception& e) { - SPDLOG_ERROR("ERs"); + SPDLOG_ERROR("ER: {}", e.what()); } catch (...) { - SPDLOG_ERROR("ER"); + SPDLOG_ERROR("ER: unknown exception"); } } -TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetContract) { +TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetContract) { // TODO FIXME add checks to exception handling? SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/negative/config_exception_during_getcontract.json").c_str()); const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); @@ -140,9 +523,9 @@ TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetContract) { auto status = impl.ModelInfer(nullptr, &request, &response); ASSERT_EQ(status.error_code(), grpc::StatusCode::UNAVAILABLE) << status.error_message(); } catch (std::exception& e) { - SPDLOG_ERROR("ERs"); + SPDLOG_ERROR("ER: {}", e.what()); } catch (...) { - SPDLOG_ERROR("ER"); + SPDLOG_ERROR("ER: unknown exception"); } } TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetOpen) { @@ -163,9 +546,9 @@ TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetOpen) { auto status = impl.ModelInfer(nullptr, &request, &response); ASSERT_EQ(status.error_code(), grpc::StatusCode::INVALID_ARGUMENT) << status.error_message(); } catch (std::exception& e) { - SPDLOG_ERROR("ERs"); + SPDLOG_ERROR("ER: {}", e.what()); } catch (...) { - SPDLOG_ERROR("ER"); + SPDLOG_ERROR("ER: unknown exception"); } } TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringClose) { @@ -186,9 +569,9 @@ TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringClose) { auto status = impl.ModelInfer(nullptr, &request, &response); ASSERT_EQ(status.error_code(), grpc::StatusCode::INVALID_ARGUMENT) << status.error_message(); } catch (std::exception& e) { - SPDLOG_ERROR("ERs"); + SPDLOG_ERROR("ER: {}", e.what()); } catch (...) { - SPDLOG_ERROR("ER"); + SPDLOG_ERROR("ER: unknown exception"); } } diff --git a/src/test/mediapipeflow_test.cpp b/src/test/mediapipeflow_test.cpp index 4486b539a0..6cdfef2e24 100644 --- a/src/test/mediapipeflow_test.cpp +++ b/src/test/mediapipeflow_test.cpp @@ -65,6 +65,7 @@ #include "mediapipe/framework/formats/tensor.h" #include "opencv2/opencv.hpp" #include "platform_utils.hpp" +#include "src/utils/env_guard.hpp" #include "test_utils.hpp" #include "light_test_utils.hpp" #include "test_with_temp_dir.hpp" @@ -236,9 +237,11 @@ class MediapipeFlowTest : public ::testing::TestWithParam { void SetUp() override { } void TearDown() { - server.setShutdownRequest(1); - t->join(); - server.setShutdownRequest(0); + if (t) { + server.setShutdownRequest(1); + t->join(); + server.setShutdownRequest(0); + } } }; @@ -1724,7 +1727,7 @@ TEST_F(MediapipeFlowTest, InferWithParams) { ASSERT_EQ(it->shape_size(), 1); ASSERT_EQ(it->shape(0), stringParamValue.size()); const std::string& content = response.raw_output_contents(outputId); - SPDLOG_ERROR("Received output size:{} content:{}", content.size(), content); + SPDLOG_DEBUG("Received output size:{} content:{}", content.size(), content); EXPECT_EQ(content, stringParamValue); break; } @@ -1743,7 +1746,7 @@ TEST_F(MediapipeFlowTest, InferWithParams) { const std::string& content = response.raw_output_contents(outputId); ASSERT_EQ(content.size(), sizeof(bool)); const bool castContent = *((bool*)content.data()); - SPDLOG_ERROR("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent); + SPDLOG_DEBUG("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent); EXPECT_EQ(castContent, boolParamValue); break; } @@ -1762,7 +1765,7 @@ TEST_F(MediapipeFlowTest, InferWithParams) { const std::string& content = response.raw_output_contents(outputId); ASSERT_EQ(content.size(), sizeof(int64_t)); const int64_t castContent = *((int64_t*)content.data()); - SPDLOG_ERROR("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent); + SPDLOG_DEBUG("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent); EXPECT_EQ(castContent, int64ParamValue); break; } @@ -1991,6 +1994,28 @@ TEST(Mediapipe, MetadataDummyInputTypes) { } } } + node { + calculator: "OVMSOVCalculator" + input_stream: "B:in2" + output_stream: "A:out2" + node_options: { + [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: { + servable_name: "dummyUpper" + servable_version: "1" + } + } + } + node { + calculator: "OVMSOVCalculator" + input_stream: "B:in2" + output_stream: "A:out3" + node_options: { + [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: { + servable_name: "dummyUpper" + servable_version: "1" + } + } + } )"; ovms::MediapipeGraphConfig mgc{"mediaDummy", "", ""}; @@ -2681,13 +2706,17 @@ class MediapipeSerialization : public ::testing::Test { stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, - const PythonNodeResourcesMap& pythonNodeResourcesMap, - MediapipeServableMetricReporter* mediapipeServableMetricReporter) : - MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, pythonNodeResourcesMap, {}, {}, {}, {}, {}, nullptr, mediapipeServableMetricReporter) {} + const GraphSidePackets& sidePackets, + MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) : + MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, + sidePackets, + nullptr, mediapipeServableMetricReporter, std::move(guard)) {} }; protected: std::unique_ptr reporter; + std::shared_ptr sidePackets; + std::shared_ptr queue; std::unique_ptr executor; ::inference::ModelInferResponse mp_response; void SetUp() { @@ -2700,9 +2729,11 @@ class MediapipeSerialization : public ::testing::Test { const std::vector inputNames; const std::vector outputNames; const ::mediapipe::CalculatorGraphConfig config; - PythonNodeResourcesMap pythonNodeResourcesMap; this->reporter = std::make_unique(nullptr, nullptr, ""); // disabled reporter - executor = std::make_unique("", "", config, mapping, mapping, inputNames, outputNames, pythonNodeResourcesMap, this->reporter.get()); + sidePackets = std::make_shared(); + queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); + executor = std::make_unique("", "", config, mapping, mapping, inputNames, outputNames, *sidePackets, this->reporter.get(), std::move(guard)); } }; @@ -3099,7 +3130,7 @@ class MediapipeFlowStartTest : public TestWithTempDir { auto start = std::chrono::high_resolution_clock::now(); while (!isMpReady(waitForServable) && (std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start).count() < SERVER_START_FROM_CONFIG_TIMEOUT_SECONDS)) { - std::this_thread::sleep_for(std::chrono::microseconds(100)); + std::this_thread::sleep_for(std::chrono::microseconds(1000)); } const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); if (!grpcModule) { @@ -4035,3 +4066,232 @@ TEST(WhitelistRegistered, MediapipeSubgraphList) { ASSERT_THAT(mediapipe::SubgraphRegistry::GetRegisteredNames(), UnorderedElementsAreArray(expected)) << readableSetError(mediapipe::SubgraphRegistry::GetRegisteredNames(), expected); } + +// --- OVMS_GRAPH_QUEUE_MAX_SIZE pbtxt directive tests --- + +// Minimal valid pbtxt that MediaPipe can parse (uses a registered test calculator) +static const char* MINIMAL_PBTXT_TEMPLATE = R"( +input_stream: "HTTP_REQUEST_PAYLOAD:input" +output_stream: "HTTP_RESPONSE_PAYLOAD:output" +node: { + calculator: "OpenAIChatCompletionsMockCalculator" + input_stream: "LOOPBACK:loopback" + input_stream: "HTTP_REQUEST_PAYLOAD:input" + output_stream: "LOOPBACK:loopback" + output_stream: "HTTP_RESPONSE_PAYLOAD:output" + input_stream_info: { + tag_index: 'LOOPBACK:0', + back_edge: true + } + input_stream_handler { + input_stream_handler: "SyncSetInputStreamHandler", + options { + [mediapipe.SyncSetInputStreamHandlerOptions.ext] { + sync_set { + tag_index: "LOOPBACK:0" + } + } + } + } +} +)"; + +static std::string makePbtxtWithDirective(const std::string& directive) { + return directive + "\n" + MINIMAL_PBTXT_TEMPLATE; +} + +TEST(MediapipeGraphQueueSizeDirective, NoDirectiveMeansDisabled) { + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, MINIMAL_PBTXT_TEMPLATE); + ovms::ModelManager manager; + auto status = def.validate(manager); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_FALSE(mgc.getGraphQueueSize().has_value()); + // getInitialQueueSize on default mgc returns 0 + EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), 0); +} + +TEST(MediapipeGraphQueueSizeDirective, ExplicitPositiveValue) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_MAX_SIZE: 4"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), 4); +} + +TEST(MediapipeGraphQueueSizeDirective, AutoValue) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_GT(def.getMediapipeGraphConfig().getInitialQueueSize(), 0); +} + +TEST(MediapipeGraphQueueSizeDirective, ZeroDisablesQueue) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_MAX_SIZE: 0"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), 0); +} + +TEST(MediapipeGraphQueueSizeDirective, NegativeValueRejected) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_MAX_SIZE: -1"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID); +} + +TEST(MediapipeGraphQueueSizeDirective, ExceedsHardwareThreads) { + unsigned int maxThreads = std::thread::hardware_concurrency(); + if (maxThreads == 0) { + GTEST_SKIP() << "hardware_concurrency() returned 0, cannot test thread limit"; + } + int oversized = static_cast(maxThreads) + 1; + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_MAX_SIZE: " + std::to_string(oversized)); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + // Queue size is clamped to hardware_concurrency with a warning, not rejected + EXPECT_EQ(status, ovms::StatusCode::OK); +} + +TEST(MediapipeGraphQueueSizeDirective, InvalidStringRejected) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_MAX_SIZE: INVALID"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID); +} + +TEST(MediapipeGraphQueueSizeDirective, PythonLoopbackWithQueueRejected) { + static const char* PYTHON_LOOPBACK_PBTXT = R"( +# OVMS_GRAPH_QUEUE_MAX_SIZE: 4 +input_stream: "OVMS_PY_TENSOR:input" +output_stream: "OVMS_PY_TENSOR:output" +node: { + calculator: "PythonExecutorCalculator" + input_side_packet: "PYTHON_NODE_RESOURCES:py" + input_stream: "LOOPBACK:loopback" + input_stream: "OVMS_PY_TENSOR:input" + output_stream: "LOOPBACK:loopback" + output_stream: "OVMS_PY_TENSOR:output" + input_stream_info: { + tag_index: 'LOOPBACK:0', + back_edge: true + } +} +)"; + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, PYTHON_LOOPBACK_PBTXT); + ovms::ModelManager manager; + auto status = def.validate(manager); + EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID); +} + +TEST(MediapipeGraphQueueSizeDirective, PythonLoopbackWithAutoQueueRejected) { + static const char* PYTHON_LOOPBACK_AUTO_PBTXT = R"( +# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO +input_stream: "OVMS_PY_TENSOR:input" +output_stream: "OVMS_PY_TENSOR:output" +node: { + calculator: "PythonExecutorCalculator" + input_side_packet: "PYTHON_NODE_RESOURCES:py" + input_stream: "LOOPBACK:loopback" + input_stream: "OVMS_PY_TENSOR:input" + output_stream: "LOOPBACK:loopback" + output_stream: "OVMS_PY_TENSOR:output" + input_stream_info: { + tag_index: 'LOOPBACK:0', + back_edge: true + } +} +)"; + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, PYTHON_LOOPBACK_AUTO_PBTXT); + ovms::ModelManager manager; + auto status = def.validate(manager); + EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID); +} + +TEST(MediapipeGraphQueueSizeDirective, PythonWithoutLoopbackAllowsQueue) { + // PythonExecutorCalculator in regular (non-generative) mode is compatible with graph queue + static const char* PYTHON_NO_LOOPBACK_PBTXT = R"( +# OVMS_GRAPH_QUEUE_MAX_SIZE: 4 +input_stream: "OVMS_PY_TENSOR:input" +output_stream: "OVMS_PY_TENSOR:output" +node: { + calculator: "PythonExecutorCalculator" + input_side_packet: "PYTHON_NODE_RESOURCES:py" + input_stream: "OVMS_PY_TENSOR:input" + output_stream: "OVMS_PY_TENSOR:output" +} +)"; + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, PYTHON_NO_LOOPBACK_PBTXT); + ovms::ModelManager manager; + auto status = def.validate(manager); + // resolveGraphQueueSize should pass; later stages may fail (calculator not registered) + // but it should NOT be MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID from our check + EXPECT_NE(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID); +} + +TEST(MediapipeGraphQueueSizeDirective, PythonLoopbackWithQueueDisabledAllowed) { + // Queue explicitly disabled (0) — LOOPBACK Python node is fine + static const char* PYTHON_LOOPBACK_DISABLED_PBTXT = R"( +# OVMS_GRAPH_QUEUE_MAX_SIZE: 0 +input_stream: "OVMS_PY_TENSOR:input" +output_stream: "OVMS_PY_TENSOR:output" +node: { + calculator: "PythonExecutorCalculator" + input_side_packet: "PYTHON_NODE_RESOURCES:py" + input_stream: "LOOPBACK:loopback" + input_stream: "OVMS_PY_TENSOR:input" + output_stream: "LOOPBACK:loopback" + output_stream: "OVMS_PY_TENSOR:output" + input_stream_info: { + tag_index: 'LOOPBACK:0', + back_edge: true + } +} +)"; + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, PYTHON_LOOPBACK_DISABLED_PBTXT); + ovms::ModelManager manager; + auto status = def.validate(manager); + // Queue is disabled so the LOOPBACK check should not trigger + EXPECT_NE(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID); +} + +TEST(MediapipeGraphQueueSizeDirective, EnvVarOVMS_GRAPH_QUEUE_OFF_DisablesPool) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO"); + ovms::MediapipeGraphConfig mgc; + SetEnvironmentVar("OVMS_GRAPH_QUEUE_OFF", "1"); + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + UnSetEnvironmentVar("OVMS_GRAPH_QUEUE_OFF"); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), 0); +} + +TEST(MediapipeGraphQueueSizeDirective, EnvVarOVMS_GRAPH_QUEUE_OFF_NotSetDoesNotDisable) { + UnSetEnvironmentVar("OVMS_GRAPH_QUEUE_OFF"); + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_MAX_SIZE: AUTO"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_GT(def.getMediapipeGraphConfig().getInitialQueueSize(), 0); +} diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index 6314f0b0ac..204afa4df6 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -240,7 +240,7 @@ TEST_F(HfPull, Download) { ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents; } // Truncate the file to half its size, keeping the first half. @@ -278,6 +278,8 @@ bool createGitLfsPointerFile(const std::string& path) { } // Returns lowercase hex SHA-256 string on success, empty string on failure. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" std::string sha256File(std::string_view path, std::error_code& ec) { ec.clear(); @@ -324,6 +326,7 @@ std::string sha256File(std::string_view path, std::error_code& ec) { } return oss.str(); } +#pragma GCC diagnostic pop class TestHfDownloader : public ovms::HfDownloader { public: @@ -372,7 +375,7 @@ TEST_F(HfPullCache, Resume) { ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents; EXPECT_EXIT({ auto guardOrError = ovms::createLibGitGuard(); @@ -408,7 +411,7 @@ TEST_F(HfPullCache, Resume) { ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents; std::string resumedDigest = sha256File(modelPath, ec); ASSERT_EQ(ec, std::errc()); @@ -1098,7 +1101,8 @@ TEST_F(HfPull, Start) { ASSERT_EQ(std::filesystem::exists(graphPath), true) << graphPath; ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + + ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents; } TEST_F(HfPull, OutOfOvOrg) { @@ -1124,7 +1128,7 @@ TEST_F(HfPull, OutOfOvOrg) { ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents; std::string changePath = ovms::FileSystem::joinPath({this->directoryPath, "OpenVINO"}); std::string newPath = ovms::FileSystem::joinPath({this->directoryPath, "META"}); @@ -1159,7 +1163,8 @@ TEST_F(HfPull, StartOutsideOvOrg) { ASSERT_EQ(std::filesystem::exists(modelPath), true) << modelPath; ASSERT_EQ(std::filesystem::exists(graphPath), true) << graphPath; std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + + ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents; } TEST_F(HfPull, DraftModel) { @@ -1180,7 +1185,7 @@ TEST_F(HfPull, DraftModel) { ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContentsDraft, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedGraphContentsDraft, removeGeneratedGraphHeaders(graphContents)) << graphContents; std::string basePath2 = ovms::FileSystem::joinPath({basePath, "OpenVINO-distil-small.en-int4-ov"}); std::string modelPath2 = ovms::FileSystem::appendSlash(basePath2) + "openvino_tokenizer.bin"; diff --git a/src/test/pythonnode_test.cpp b/src/test/pythonnode_test.cpp index 5b31146d19..3f7495650c 100644 --- a/src/test/pythonnode_test.cpp +++ b/src/test/pythonnode_test.cpp @@ -1002,10 +1002,12 @@ class MockedMediapipeGraphExecutorPy : public ovms::MediapipeGraphExecutor { stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, - const PythonNodeResourcesMap& pythonNodeResourcesMap, + const GraphSidePackets& sidePackets, PythonBackend* pythonBackend, - MediapipeServableMetricReporter* mediapipeServableMetricReporter) : - MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, pythonNodeResourcesMap, {}, {}, {}, {}, {}, pythonBackend, mediapipeServableMetricReporter) {} + MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) : + MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, + sidePackets, + pythonBackend, mediapipeServableMetricReporter, std::move(guard)) {} }; TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) { @@ -1014,8 +1016,10 @@ TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) { const std::vector inputNames; const std::vector outputNames; const ::mediapipe::CalculatorGraphConfig config; - PythonNodeResourcesMap pythonNodeResourcesMap; - auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, pythonNodeResourcesMap, getPythonBackend(), this->reporter.get()); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); + auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, *sidePackets, getPythonBackend(), this->reporter.get(), std::move(guard)); std::string datatype = "FP32"; std::string name = "python_result"; diff --git a/src/test/streaming_test.cpp b/src/test/streaming_test.cpp index 5af193b6c3..edadeb15de 100644 --- a/src/test/streaming_test.cpp +++ b/src/test/streaming_test.cpp @@ -71,6 +71,35 @@ class StreamingTest : public Test { } }; +class StreamingQueueTest : public StreamingTest { +protected: + std::shared_ptr queue; + + MediapipeGraphExecutor createQueueExecutor( + const ::mediapipe::CalculatorGraphConfig& config, + stream_types_mapping_t inputTypes, + stream_types_mapping_t outputTypes, + std::vector inputNames, + std::vector outputNames, + int queueSize = 1) { + auto sidePackets = std::make_shared(); + queue = std::make_shared(config, sidePackets, queueSize); + GraphIdGuard graphIdGuard(queue); + return MediapipeGraphExecutor{ + this->name, + this->version, + config, + std::move(inputTypes), + std::move(outputTypes), + std::move(inputNames), + std::move(outputNames), + *sidePackets, + nullptr, + this->reporter.get(), + std::move(graphIdGuard)}; + } +}; + #if (PYTHON_DISABLE == 0) class PythonStreamingTest : public StreamingTest { protected: @@ -360,7 +389,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::KFS_REQUEST}}, {{"out", mediapipe_packet_type_enum::KFS_RESPONSE}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}); @@ -417,7 +446,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}); // no timestamp specified, server will assign one @@ -560,7 +589,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests with manually (client) assigned ascending order of timestamp and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}, 3); // first request with timestamp 3 @@ -605,7 +634,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock only 1 request and disconnect immediately prepareRequest(this->firstRequest, {{"in", 3.5f}}); @@ -622,6 +651,184 @@ node { ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); } +TEST_F(StreamingQueueTest, SingleStreamSend3Receive3AutomaticTimestamp) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOneSingleStreamTestCalculator" + input_stream: "in" + output_stream: "out" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 1); + + prepareRequest(this->firstRequest, {{"in", 3.5f}}); + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(Receive({{"in", 7.2f}})) + .WillOnce(Receive({{"in", 102.4f}})) + .WillOnce(Disconnect()); + + auto timestamp = std::make_shared(-1); + EXPECT_CALL(this->stream, Write(_, _)) + .WillOnce(SendWithAutomaticTimestamp({{"out", 4.5f}}, timestamp)) + .WillOnce(SendWithAutomaticTimestamp({{"out", 8.2f}}, timestamp)) + .WillOnce(SendWithAutomaticTimestamp({{"out", 103.4f}}, timestamp)); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); +} + +TEST_F(StreamingQueueTest, SingleStreamSend1Receive3) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOne3CycleIterationsTestCalculator" + input_stream: "in" + input_stream: "signal" + input_stream_info: { + tag_index: ':1', + back_edge: true + } + input_stream_handler { + input_stream_handler: 'ImmediateInputStreamHandler' + } + output_stream: "out" + output_stream: "signal" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 1); + + prepareRequest(this->firstRequest, {{"in", 3.5f}}); + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(Disconnect()); + + EXPECT_CALL(this->stream, Write(_, _)) + .WillOnce(SendWithTimestamp({{"out", 4.5f}}, 1)) + .WillOnce(SendWithTimestamp({{"out", 5.5f}}, 2)) + .WillOnce(SendWithTimestamp({{"out", 6.5f}}, 3)); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); +} + +TEST_F(StreamingQueueTest, ExitOnDisconnectionDuringRead) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOneSingleStreamTestCalculator" + input_stream: "in" + output_stream: "out" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 1); + + prepareRequest(this->firstRequest, {}); + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(Disconnect()); + + EXPECT_CALL(this->stream, Write(_, _)).Times(0); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); +} + +TEST_F(StreamingQueueTest, ErrorOnDisconnectionDuringWrite) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOneSingleStreamTestCalculator" + input_stream: "in" + output_stream: "out" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 1); + + std::promise signalPromise; + std::future signalFuture = signalPromise.get_future(); + + prepareRequest(this->firstRequest, {{"in", 3.5f}}); + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(DisconnectWhenNotified(signalFuture)); + + EXPECT_CALL(this->stream, Write(_, _)) + .WillOnce(DisconnectOnWriteAndNotifyEnd(signalPromise)); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_EXECUTION_ERROR); +} + +TEST_F(StreamingQueueTest, ErrorDuringFirstRequestDeserialization) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOneSingleStreamTestCalculator" + input_stream: "in" + output_stream: "out" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 1); + + prepareInvalidRequest(this->firstRequest, {"in"}); + + std::promise signalPromise; + std::future signalFuture = signalPromise.get_future(); + + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(DisconnectWhenNotified(signalFuture)); + EXPECT_CALL(this->stream, Write(_, _)) + .WillOnce(SendErrorAndNotifyEnd( + Status(StatusCode::INVALID_CONTENT_SIZE).string() + std::string{" - Expected: 4 bytes; Actual: 0 bytes; input name: in; partial deserialization of first request"}, + signalPromise)); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); +} + // PYTHON CALCULATOR CASES #if (PYTHON_DISABLE == 0) @@ -1231,7 +1438,7 @@ node { {"out3", mediapipe_packet_type_enum::OVTENSOR}}, {"in1", "in2", "in3"}, {"out1", "out2", "out3"}, - {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1283,7 +1490,7 @@ node { {"out3", mediapipe_packet_type_enum::OVTENSOR}}, {"in1", "in2", "in3"}, {"out1", "out2", "out3"}, - {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1318,7 +1525,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1352,7 +1559,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"wrong_name"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; // cannot install observer due to wrong output name (should never happen due to validation) + {"in"}, {"wrong_name"}, {}, nullptr, this->reporter.get()}; // cannot install observer due to wrong output name (should never happen due to validation) EXPECT_CALL(this->stream, Read(_)).Times(0); EXPECT_CALL(this->stream, Write(_, _)).Times(0); @@ -1377,7 +1584,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {}); EXPECT_CALL(this->stream, Read(_)) @@ -1405,7 +1612,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1441,7 +1648,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); @@ -1464,7 +1671,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Invalid request - missing data in buffer prepareInvalidRequest(this->firstRequest, {"in"}); // no timestamp specified, server will assign one @@ -1499,7 +1706,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise[3]; std::future signalFuture[3] = { @@ -1546,7 +1753,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}, 0); EXPECT_CALL(this->stream, Read(_)) @@ -1574,7 +1781,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); setRequestTimestamp(this->firstRequest, std::string("not an int")); @@ -1609,7 +1816,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Timestamps not allowed in stream // Expect continuity of operation and response with error message @@ -1651,7 +1858,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Allowed in stream for (auto timestamp : std::vector<::mediapipe::Timestamp>{ @@ -1687,7 +1894,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests and disconnection prepareRequestWithParam(this->firstRequest, {{"in", 3.5f}}, {"val", 65}); // request with parameter val @@ -1724,7 +1931,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving the invalid request and disconnection // Request with invalid param py (special pythons session side packet) @@ -1753,7 +1960,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); // missing required request param EXPECT_CALL(this->stream, Read(_)).Times(0); @@ -1779,7 +1986,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 2 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}, std::nullopt, this->name, this->version); // no timestamp specified, server will assign one @@ -1813,7 +2020,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); diff --git a/src/test/stress_test_utils.hpp b/src/test/stress_test_utils.hpp index 172056101e..a5a7caffdd 100644 --- a/src/test/stress_test_utils.hpp +++ b/src/test/stress_test_utils.hpp @@ -51,6 +51,7 @@ #include "../server.hpp" #include "../status.hpp" #include "../stringutils.hpp" +#include "src/timer.hpp" #include "../tfs_frontend/tfs_utils.hpp" #include "c_api_test_utils.hpp" #include "test_utils.hpp" @@ -1068,7 +1069,99 @@ static const std::string basicMediapipeConfigWithNewGraphPath = R"({ "mediapipe_config_list": [ { "name":"pipeline1Dummy", - "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt" + "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt" + } + ] +})"; + +const std::string basicMediapipeQueueConfig = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +static const std::string basicMediapipeQueueConfigWithAddedGraph = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + }, + { + "name":"pipeline2Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +static const std::string basicMediapipeQueueConfigWithRemovedGraph = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + ] +})"; + +static const std::string basicMediapipeQueueConfigWithRemovedModel = R"({ + "model_config_list": [ + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +static const std::string basicMediapipeQueueConfigWithReloadedModel = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy", + "nireq": 47 + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +static const std::string basicMediapipeQueueConfigWithNewGraphPath = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt" } ] })"; @@ -1095,6 +1188,13 @@ static void mediacreate(std::unique_ptr& executorPtr, ov sc = static_cast(code); \ } +enum StressTimerSlot : unsigned int { + STRESS_LOOP, + CREATE, + EXECUTE, + TIMER_END +}; + class ConfigChangeStressTest : public TestWithTempDir { protected: const uint32_t loadThreadCount = 20; @@ -1292,6 +1392,12 @@ class ConfigChangeStressTest : public TestWithTempDir { createConfigFileWithContent(ovmsConfig, configFilePath); SPDLOG_INFO("{} end", __FUNCTION__); } + void addNewMediapipeQueueGraph() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithAddedGraph); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } void removeMediapipeGraph() { SPDLOG_INFO("{} start", __FUNCTION__); SetUpConfig(basicMediapipeConfigWithRemovedGraph); @@ -1316,6 +1422,30 @@ class ConfigChangeStressTest : public TestWithTempDir { createConfigFileWithContent(ovmsConfig, configFilePath); SPDLOG_INFO("{} end", __FUNCTION__); } + void removeMediapipeQueueGraph() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithRemovedGraph); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } + void removeMediapipeQueueGraphUsedModel() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithRemovedModel); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } + void reloadMediapipeQueueGraphUsedModel() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithReloadedModel); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } + void reloadMediapipeQueueGraph() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithNewGraphPath); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } void checkMetricGreaterThan(const std::string& metricName, double value, std::string& metricOutput, bool& result) { ASSERT_THAT(metricOutput, ::testing::HasSubstr(metricName + std::string{"{name=\"dummy\",version=\"1\"} "})) << "cannot find dummys " << metricName << " metric\n" << metricOutput; @@ -1707,6 +1837,8 @@ class ConfigChangeStressTest : public TestWithTempDir { auto stressIterationsCounter = stressIterationsLimit; bool breakLoop = false; while (stressIterationsCounter-- > 0) { + ovms::Timer timer; + timer.start(STRESS_LOOP); auto futureWaitResult = stopSignal.wait_for(std::chrono::milliseconds(0)); if (true == breakLoop) { SPDLOG_INFO("Ending Load"); @@ -1726,6 +1858,7 @@ class ConfigChangeStressTest : public TestWithTempDir { RequestType request2; RequestType request = preparePipelinePredictRequest(request2); ovms::Status createPipelineStatus = StatusCode::UNKNOWN_ERROR; + timer.start(CREATE); if (typeid(ServableType) == typeid(ovms::Pipeline)) { createPipelineStatus = this->manager->getPipelineFactory().create(pipelinePtr, pipelineName, &request, &response, *(this->manager)); #if (MEDIAPIPE_DISABLE == 0) @@ -1733,6 +1866,8 @@ class ConfigChangeStressTest : public TestWithTempDir { mediacreate(executorPtr, *(this->manager), request, response, createPipelineStatus); #endif } + timer.stop(CREATE); + SPDLOG_TRACE("Executor creation time: {} us", timer.elapsed(CREATE)); // we need to make sure that expected status happened and still accept // some that could happen but we may not hit them EXPECT_TRUE((requiredLoadResults.find(createPipelineStatus.getCode()) != requiredLoadResults.end()) || @@ -1744,6 +1879,7 @@ class ConfigChangeStressTest : public TestWithTempDir { } ovms::Status executePipelineStatus = StatusCode::UNKNOWN_ERROR; + timer.start(EXECUTE); if (typeid(ServableType) == typeid(ovms::Pipeline)) { executePipelineStatus = pipelinePtr->execute(ovms::ExecutionContext( ovms::ExecutionContext::Interface::GRPC, @@ -1753,6 +1889,7 @@ class ConfigChangeStressTest : public TestWithTempDir { mediaexec(executorPtr, *(this->manager), request, response, executePipelineStatus); #endif } + timer.stop(EXECUTE); createPipelineRetCodesCounters[executePipelineStatus.getCode()]++; EXPECT_TRUE((requiredLoadResults.find(executePipelineStatus.getCode()) != requiredLoadResults.end()) || (allowedLoadResults.find(executePipelineStatus.getCode()) != allowedLoadResults.end())) @@ -1764,6 +1901,8 @@ class ConfigChangeStressTest : public TestWithTempDir { SPDLOG_INFO("Earlier fail detected. Stopping execution"); break; } + timer.stop(STRESS_LOOP); + SPDLOG_TRACE("Execution time: {} us", timer.elapsed(CREATE)); } for (auto& [retCode, counter] : createPipelineRetCodesCounters) { if (counter > 0) { diff --git a/src/test/test_utils.hpp b/src/test/test_utils.hpp index a1c07adc9c..ac6b4a93db 100644 --- a/src/test/test_utils.hpp +++ b/src/test/test_utils.hpp @@ -826,8 +826,8 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition { std::string inputConfig; #if (PYTHON_DISABLE == 0) ovms::PythonNodeResources* getPythonNodeResources(const std::string& nodeName) { - auto it = this->sidePacketMaps.pythonNodeResourcesMap.find(nodeName); - if (it == std::end(this->sidePacketMaps.pythonNodeResourcesMap)) { + auto it = this->sidePacketMaps->pythonNodeResourcesMap.find(nodeName); + if (it == std::end(this->sidePacketMaps->pythonNodeResourcesMap)) { return nullptr; } else { return it->second.get(); @@ -836,8 +836,8 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition { #endif ovms::GenAiServable* getGenAiServable(const std::string& nodeName) { - auto it = this->sidePacketMaps.genAiServableMap.find(nodeName); - if (it == std::end(this->sidePacketMaps.genAiServableMap)) { + auto it = this->sidePacketMaps->genAiServableMap.find(nodeName); + if (it == std::end(this->sidePacketMaps->genAiServableMap)) { return nullptr; } else { return it->second.get(); @@ -848,13 +848,15 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition { return this->validateForConfigLoadableness(); } - ovms::GenAiServableMap& getGenAiServableMap() { return this->sidePacketMaps.genAiServableMap; } + ovms::GenAiServableMap& getGenAiServableMap() { return this->sidePacketMaps->genAiServableMap; } DummyMediapipeGraphDefinition(const std::string name, const ovms::MediapipeGraphConfig& config, std::string inputConfig, ovms::PythonBackend* pythonBackend = nullptr) : - ovms::MediapipeGraphDefinition(name, config, nullptr, nullptr, pythonBackend) { this->inputConfig = inputConfig; } + ovms::MediapipeGraphDefinition(name, config, nullptr, nullptr, pythonBackend) { + this->inputConfig = inputConfig; + } // Do not read from path - use predefined config contents ovms::Status validateForConfigFileExistence() override {