Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions demos/common/export_models/export_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def add_common_arguments(parser):
parser.add_argument('--target_device', default="CPU", help='CPU, GPU, NPU or HETERO, default is CPU', dest='target_device')
parser.add_argument('--ov_cache_dir', default=None, help='Folder path for compilation cache to speedup initialization time', dest='ov_cache_dir')
parser.add_argument('--extra_quantization_params', required=False, help='Add advanced quantization parameters. Check optimum-intel documentation. Example: "--sym --group-size -1 --ratio 1.0 --awq --scale-estimation --dataset wikitext2"', dest='extra_quantization_params')
parser.add_argument('--graph_initial_queue_size', required=False, default=None, help='Initial graph pool size. Use AUTO for hardware_concurrency or a positive integer. If not set, defaults per task are used.', dest='graph_initial_queue_size')
parser.add_argument('--graph_queue_max_size', required=False, default=None, help='Maximum graph pool size (expansion ceiling). Use AUTO for hardware_concurrency or a positive integer. If not set, defaults per task are used.', dest='graph_queue_max_size')

parser = argparse.ArgumentParser(description='Export Hugging face models to OVMS models repository including all configuration for deployments')

Expand Down Expand Up @@ -148,6 +150,8 @@ def add_common_arguments(parser):
"""

embedding_graph_ov_template = """
# OVMS_GRAPH_INITIAL_QUEUE_SIZE: {{graph_initial_queue_size|default("1", true)}}
# OVMS_GRAPH_QUEUE_MAX_SIZE: {{graph_queue_max_size|default("AUTO", true)}}
input_stream: "REQUEST_PAYLOAD:input"
output_stream: "RESPONSE_PAYLOAD:output"
node {
Expand All @@ -172,6 +176,8 @@ def add_common_arguments(parser):
"""

rerank_graph_ov_template = """
# OVMS_GRAPH_INITIAL_QUEUE_SIZE: {{graph_initial_queue_size|default("1", true)}}
# OVMS_GRAPH_QUEUE_MAX_SIZE: {{graph_queue_max_size|default("AUTO", true)}}
input_stream: "REQUEST_PAYLOAD:input"
output_stream: "RESPONSE_PAYLOAD:output"
node {
Expand All @@ -190,7 +196,9 @@ def add_common_arguments(parser):
}
"""

rerank_graph_template = """input_stream: "REQUEST_PAYLOAD:input"
rerank_graph_template = """# OVMS_GRAPH_INITIAL_QUEUE_SIZE: {{graph_initial_queue_size|default("1", true)}}
# OVMS_GRAPH_QUEUE_MAX_SIZE: {{graph_queue_max_size|default("AUTO", true)}}
input_stream: "REQUEST_PAYLOAD:input"
output_stream: "RESPONSE_PAYLOAD:output"
node {
calculator: "OpenVINOModelServerSessionCalculator"
Expand Down Expand Up @@ -219,7 +227,9 @@ def add_common_arguments(parser):
}
"""

text_generation_graph_template = """input_stream: "HTTP_REQUEST_PAYLOAD:input"
text_generation_graph_template = """# OVMS_GRAPH_INITIAL_QUEUE_SIZE: {{graph_initial_queue_size|default("1", true)}}
# OVMS_GRAPH_QUEUE_MAX_SIZE: {{graph_queue_max_size|default("AUTO", true)}}
input_stream: "HTTP_REQUEST_PAYLOAD:input"
output_stream: "HTTP_RESPONSE_PAYLOAD:output"

node: {
Expand Down Expand Up @@ -292,7 +302,9 @@ def add_common_arguments(parser):
]
}"""

image_generation_graph_template = """input_stream: "HTTP_REQUEST_PAYLOAD:input"
image_generation_graph_template = """# OVMS_GRAPH_INITIAL_QUEUE_SIZE: {{graph_initial_queue_size|default("1", true)}}
# OVMS_GRAPH_QUEUE_MAX_SIZE: {{graph_queue_max_size|default("1", true)}}
input_stream: "HTTP_REQUEST_PAYLOAD:input"
output_stream: "HTTP_RESPONSE_PAYLOAD:output"

node: {
Expand Down
1 change: 1 addition & 0 deletions src/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2221,6 +2221,7 @@ cc_test(
+ select({
"//:not_disable_mediapipe": [
"test/embeddingsnode_test.cpp",
"test/graphqueue_test.cpp",
"test/listmodelsendpoint_test.cpp",
"test/mediapipeflow_test.cpp",
"test/mediapipe/inputsidepacketusertestcalc.cc",
Expand Down
2 changes: 2 additions & 0 deletions src/capi_frontend/server_settings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ struct ExportSettings {
std::optional<std::string> extraQuantizationParams;
std::optional<std::string> vocoder;
std::string precision = "int8";
std::optional<std::string> graphInitialQueueSize;
std::optional<std::string> graphQueueMaxSize;
PluginConfigSettingsImpl pluginConfig;
};

Expand Down
5 changes: 4 additions & 1 deletion src/graph_export/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@ ovms_cc_library(
ovms_cc_library(
name = "graph_cli_parser",
srcs = ["graph_cli_parser.cpp"],
hdrs = ["graph_cli_parser.hpp"],
hdrs = [
"graph_cli_parser.hpp",
"graph_queue_cli_options.hpp",
],
deps = [
"@ovms//src:cpp_headers",
"@ovms//src:libovms_server_settings",
Expand Down
3 changes: 3 additions & 0 deletions src/graph_export/embeddings_graph_cli_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "../capi_frontend/server_settings.hpp"
#include "../ovms_exit_codes.hpp"
#include "../status.hpp"
#include "graph_queue_cli_options.hpp"

namespace ovms {

Expand Down Expand Up @@ -56,6 +57,7 @@ void EmbeddingsGraphCLIParser::createOptions() {
"Pooling option. One of: CLS, LAST, MEAN.",
cxxopts::value<std::string>()->default_value("CLS"),
"POOLING");
addGraphQueueOptions(*options, "embeddings");
}

void EmbeddingsGraphCLIParser::printHelp() {
Expand Down Expand Up @@ -97,6 +99,7 @@ void EmbeddingsGraphCLIParser::prepare(OvmsServerMode serverMode, HFSettingsImpl
embeddingsGraphSettings.normalize = result->operator[]("normalize").as<std::string>();
embeddingsGraphSettings.truncate = result->operator[]("truncate").as<std::string>();
embeddingsGraphSettings.pooling = result->operator[]("pooling").as<std::string>();
extractGraphQueueOptions(*result, hfSettings);
}
if (!(embeddingsGraphSettings.pooling == "CLS" || embeddingsGraphSettings.pooling == "LAST" || embeddingsGraphSettings.pooling == "MEAN")){
throw std::invalid_argument("Only CLS and LAST pooling modes are supported");
Expand Down
4 changes: 3 additions & 1 deletion src/graph_export/graph_cli_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "../capi_frontend/server_settings.hpp"
#include "../ovms_exit_codes.hpp"
#include "../status.hpp"
#include "graph_queue_cli_options.hpp"

namespace ovms {

Expand Down Expand Up @@ -80,7 +81,7 @@ void GraphCLIParser::createOptions() {
"Enables enforcing tool schema during generation. Requires setting tool parser. Default: false.",
cxxopts::value<std::string>()->default_value("false"),
"ENABLE_TOOL_GUIDED_GENERATION");

addGraphQueueOptions(*options, "plugin config");
options->add_options("plugin config")
("max_prompt_len",
"Sets NPU specific property for maximum number of tokens in the prompt.",
Expand Down Expand Up @@ -168,6 +169,7 @@ void GraphCLIParser::prepare(OvmsServerMode serverMode, HFSettingsImpl& hfSettin
if (result->count("kv_cache_precision")) {
hfSettings.exportSettings.pluginConfig.kvCachePrecision = result->operator[]("kv_cache_precision").as<std::string>();
}
extractGraphQueueOptions(*result, hfSettings);
}

hfSettings.graphSettings = std::move(graphSettings);
Expand Down
39 changes: 35 additions & 4 deletions src/graph_export/graph_export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,51 @@
namespace ovms {

static const std::string OVMS_VERSION_GRAPH_LINE = std::string("# File created with: ") + PROJECT_NAME + std::string(" ") + PROJECT_VERSION + std::string("\n");
static const std::string OVMS_GRAPH_QUEUE_SIZE_LINE_PREFIX = "# OVMS_GRAPH_QUEUE_SIZE: ";
static const std::string OVMS_GRAPH_INITIAL_QUEUE_SIZE_LINE_PREFIX = "# OVMS_GRAPH_INITIAL_QUEUE_SIZE: ";
static const std::string OVMS_GRAPH_QUEUE_MAX_SIZE_LINE_PREFIX = "# OVMS_GRAPH_QUEUE_MAX_SIZE: ";
static const std::string OVMS_GRAPH_QUEUE_SIZE_AUTO = "AUTO";

static std::string getDefaultGraphQueueSizeDirective(const HFSettingsImpl& hfSettings) {
static std::optional<std::string> getDefaultInitialGraphQueueSizeDirective(const HFSettingsImpl& hfSettings) {
if (hfSettings.exportSettings.graphInitialQueueSize.has_value()) {
return hfSettings.exportSettings.graphInitialQueueSize.value();
}
if (hfSettings.task == IMAGE_GENERATION_GRAPH) {
return "1";
}
if (hfSettings.task == TEXT_GENERATION_GRAPH ||
hfSettings.task == EMBEDDINGS_GRAPH ||
hfSettings.task == RERANK_GRAPH) {
return "1";
}
return std::nullopt;
}

static std::optional<std::string> getDefaultGraphQueueMaxSizeDirective(const HFSettingsImpl& hfSettings) {
if (hfSettings.exportSettings.graphQueueMaxSize.has_value()) {
return hfSettings.exportSettings.graphQueueMaxSize.value();
}
if (hfSettings.task == IMAGE_GENERATION_GRAPH) {
return "1";
}
return OVMS_GRAPH_QUEUE_SIZE_AUTO;
if (hfSettings.task == TEXT_GENERATION_GRAPH ||
hfSettings.task == EMBEDDINGS_GRAPH ||
hfSettings.task == RERANK_GRAPH) {
return OVMS_GRAPH_QUEUE_SIZE_AUTO;
}
return std::nullopt;
}

static std::string buildGraphHeader(const HFSettingsImpl& hfSettings) {
std::ostringstream oss;
oss << OVMS_VERSION_GRAPH_LINE;
oss << OVMS_GRAPH_QUEUE_SIZE_LINE_PREFIX << getDefaultGraphQueueSizeDirective(hfSettings) << "\n";
auto queueDirective = getDefaultInitialGraphQueueSizeDirective(hfSettings);
if (queueDirective.has_value()) {
oss << OVMS_GRAPH_INITIAL_QUEUE_SIZE_LINE_PREFIX << queueDirective.value() << "\n";
}
auto maxDirective = getDefaultGraphQueueMaxSizeDirective(hfSettings);
if (maxDirective.has_value()) {
oss << OVMS_GRAPH_QUEUE_MAX_SIZE_LINE_PREFIX << maxDirective.value() << "\n";
}
return oss.str();
}

Expand Down
46 changes: 46 additions & 0 deletions src/graph_export/graph_queue_cli_options.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//*****************************************************************************
// Copyright 2025 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once

#include <cxxopts.hpp>

#include "../capi_frontend/server_settings.hpp"

namespace ovms {

// Common graph queue CLI options shared across all mediapipe graph task parsers.
// Call addGraphQueueOptions() in createOptions() and extractGraphQueueOptions() in prepare().

inline void addGraphQueueOptions(cxxopts::Options& options, const std::string& group = "graph pool") {
options.add_options(group)("graph_initial_queue_size",
"Initial graph pool size at startup. Positive integer or AUTO. Default: 1.",
cxxopts::value<std::string>(),
"GRAPH_INITIAL_QUEUE_SIZE")("graph_queue_max_size",
"Maximum graph pool size (expansion ceiling). Positive integer or AUTO. Default: same as initial (no expansion).",
cxxopts::value<std::string>(),
"GRAPH_QUEUE_MAX_SIZE");
}

inline void extractGraphQueueOptions(const cxxopts::ParseResult& result, HFSettingsImpl& hfSettings) {
if (result.count("graph_initial_queue_size")) {
hfSettings.exportSettings.graphInitialQueueSize = result["graph_initial_queue_size"].as<std::string>();
}
if (result.count("graph_queue_max_size")) {
hfSettings.exportSettings.graphQueueMaxSize = result["graph_queue_max_size"].as<std::string>();
}
}

} // namespace ovms
3 changes: 3 additions & 0 deletions src/graph_export/image_generation_graph_cli_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "../capi_frontend/server_settings.hpp"
#include "../ovms_exit_codes.hpp"
#include "../status.hpp"
#include "graph_queue_cli_options.hpp"

namespace ovms {

Expand Down Expand Up @@ -82,6 +83,7 @@ void ImageGenerationGraphCLIParser::createOptions() {
"The number of parallel execution streams to use for the image generation models. Use at least 2 on 2 socket CPU systems.",
cxxopts::value<uint32_t>(),
"NUM_STREAMS");
addGraphQueueOptions(*options, "image_generation");
}

void ImageGenerationGraphCLIParser::printHelp() {
Expand Down Expand Up @@ -162,6 +164,7 @@ void ImageGenerationGraphCLIParser::prepare(ServerSettingsImpl& serverSettings,
hfSettings.exportSettings.pluginConfig.cacheDir = serverSettings.cacheDir;
}
}
extractGraphQueueOptions(*result, hfSettings);
}

hfSettings.graphSettings = std::move(imageGenerationGraphSettings);
Expand Down
3 changes: 3 additions & 0 deletions src/graph_export/rerank_graph_cli_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "../capi_frontend/server_settings.hpp"
#include "../ovms_exit_codes.hpp"
#include "../status.hpp"
#include "graph_queue_cli_options.hpp"

namespace ovms {

Expand All @@ -48,6 +49,7 @@ void RerankGraphCLIParser::createOptions() {
"Maximum allowed chunks.",
cxxopts::value<uint64_t>()->default_value("10000"),
"MAX_ALLOWED_CHUNKS");
addGraphQueueOptions(*options, "rerank");
}

void RerankGraphCLIParser::printHelp() {
Expand Down Expand Up @@ -88,6 +90,7 @@ void RerankGraphCLIParser::prepare(OvmsServerMode serverMode, HFSettingsImpl& hf
} else {
hfSettings.exportSettings.pluginConfig.numStreams = result->operator[]("num_streams").as<uint32_t>();
rerankGraphSettings.maxAllowedChunks = result->operator[]("max_allowed_chunks").as<uint64_t>();
extractGraphQueueOptions(*result, hfSettings);
}

hfSettings.graphSettings = std::move(rerankGraphSettings);
Expand Down
3 changes: 3 additions & 0 deletions src/graph_export/s2t_graph_cli_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "../capi_frontend/server_settings.hpp"
#include "../ovms_exit_codes.hpp"
#include "../status.hpp"
#include "graph_queue_cli_options.hpp"

namespace ovms {

Expand All @@ -44,6 +45,7 @@ void SpeechToTextGraphCLIParser::createOptions() {
"The number of parallel execution streams to use for the model. Use at least 2 on 2 socket CPU systems.",
cxxopts::value<uint32_t>()->default_value("1"),
"NUM_STREAMS");
addGraphQueueOptions(*options, "SpeechToText");
}

void SpeechToTextGraphCLIParser::printHelp() {
Expand Down Expand Up @@ -82,6 +84,7 @@ void SpeechToTextGraphCLIParser::prepare(OvmsServerMode serverMode, HFSettingsIm
}
} else {
hfSettings.exportSettings.pluginConfig.numStreams = result->operator[]("num_streams").as<uint32_t>();
extractGraphQueueOptions(*result, hfSettings);
}
hfSettings.graphSettings = std::move(speechToTextGraphSettings);
}
Expand Down
3 changes: 3 additions & 0 deletions src/graph_export/t2s_graph_cli_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "../capi_frontend/server_settings.hpp"
#include "../ovms_exit_codes.hpp"
#include "../status.hpp"
#include "graph_queue_cli_options.hpp"

namespace ovms {

Expand All @@ -44,6 +45,7 @@ void TextToSpeechGraphCLIParser::createOptions() {
"The number of parallel execution streams to use for the model. Use at least 2 on 2 socket CPU systems.",
cxxopts::value<uint32_t>()->default_value("1"),
"NUM_STREAMS");
addGraphQueueOptions(*options, "TextToSpeech");
}

void TextToSpeechGraphCLIParser::printHelp() {
Expand Down Expand Up @@ -82,6 +84,7 @@ void TextToSpeechGraphCLIParser::prepare(OvmsServerMode serverMode, HFSettingsIm
}
} else {
hfSettings.exportSettings.pluginConfig.numStreams = result->operator[]("num_streams").as<uint32_t>();
extractGraphQueueOptions(*result, hfSettings);
}
hfSettings.graphSettings = std::move(textToSpeechGraphSettings);
}
Expand Down
1 change: 0 additions & 1 deletion src/mediapipe_internal/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ ovms_cc_library(
":mediapipe_utils",
":outputstreamobserver",
":side_packet_builder",
"//src:libovms_queue",
"//src:libovmslogging",
"//src:libovms_execution_context",
"//src:libovmstimer",
Expand Down
Loading