From 58ceadbd0efa5e2241400007ae2b5198dba223cb Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Thu, 2 Apr 2026 01:08:03 +0200 Subject: [PATCH 01/21] restrict rest workers based on open files limit --- src/config.cpp | 26 ++++++++++++++++++++++++-- src/test/ovmsconfig_test.cpp | 31 +++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/src/config.cpp b/src/config.cpp index 3222e775b1..f8fa8749e5 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -43,12 +43,28 @@ const uint32_t AVAILABLE_CORES = getCoreCount(); const uint32_t WIN_MAX_GRPC_WORKERS = 1; const uint32_t MAX_PORT_NUMBER = std::numeric_limits::max(); -// For drogon, we need to minimize the number of default workers since this value is set for both: unary and streaming (making it always double) -const uint64_t DEFAULT_REST_WORKERS = AVAILABLE_CORES; const uint32_t DEFAULT_GRPC_MAX_THREADS = AVAILABLE_CORES * 8.0; const size_t DEFAULT_GRPC_MEMORY_QUOTA = (size_t)2 * 1024 * 1024 * 1024; // 2GB const uint64_t MAX_REST_WORKERS = 10'000; +// We need to minimize the number of default drogon workers since this value is set for both: unary and streaming (making it always double) +// on linux, restrict also based on the max allowed number of open files +#ifdef __linux__ +#include +const uint64_t MAX_OPEN_FILES = []() { + struct rlimit limit; + if (getrlimit(RLIMIT_NOFILE, &limit) == 0) { + return limit.rlim_cur; + } + return std::numeric_limits::max(); +}(); +const uint64_t RESERVED_OPEN_FILES = 10; // we need to reserve some file descriptors for other operations, so we don't want to use all of them for drogon workers +const uint64_t DEFAULT_REST_WORKERS = (MAX_OPEN_FILES <= RESERVED_OPEN_FILES) ? AVAILABLE_CORES + : std::min(static_cast(AVAILABLE_CORES), (MAX_OPEN_FILES - RESERVED_OPEN_FILES) / 5); +#else +const uint64_t DEFAULT_REST_WORKERS = AVAILABLE_CORES; +#endif + Config& Config::parse(int argc, char** argv) { ovms::CLIParser parser; ovms::ServerSettingsImpl serverSettings; @@ -306,6 +322,12 @@ bool Config::validate() { std::cerr << "rest_workers is set but rest_port is not set. rest_port is required to start rest servers" << std::endl; return false; } +#ifdef __linux__ + if (restWorkers() > (MAX_OPEN_FILES - RESERVED_OPEN_FILES) / 5) { + std::cerr << "rest_workers count cannot be larger than " << (MAX_OPEN_FILES - RESERVED_OPEN_FILES) / 5 << " due to open files limit. Current open files limit: " << MAX_OPEN_FILES << std::endl; + return false; + } +#endif #ifdef _WIN32 if (grpcWorkers() > WIN_MAX_GRPC_WORKERS) { diff --git a/src/test/ovmsconfig_test.cpp b/src/test/ovmsconfig_test.cpp index 5e6f694f56..7f5b43b420 100644 --- a/src/test/ovmsconfig_test.cpp +++ b/src/test/ovmsconfig_test.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -202,6 +203,36 @@ TEST_F(OvmsConfigDeathTest, restWorkersTooLarge) { EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "rest_workers count should be from 2 to "); } +TEST_F(OvmsConfigDeathTest, restWorkersDefaultReducedForOpenFilesLimit) { + // limit allowed number of open files to 1024 to make sure that rest_workers count is too large for the limit based on number of cpu cores alone + int cpu_cores = ovms::getCoreCount(); + struct rlimit limit; + ASSERT_EQ(getrlimit(RLIMIT_NOFILE, &limit), 0); + struct rlimit newLimit = {static_cast(cpu_cores * 5), limit.rlim_max}; + std::cout << "Setting open files limit to " << newLimit.rlim_cur << " to test that default rest_workers count is reduced based on open files limit" << std::endl; + ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &newLimit), 0); + + char* n_argv[] = {"ovms", "--config_path", "/path1", "--rest_port", "8080", "--port", "8081"}; + int arg_count = 7; + ovms::Config::instance().parse(arg_count, n_argv); + EXPECT_TRUE(ovms::Config::instance().validate()); + + ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &limit), 0); +} + +TEST_F(OvmsConfigDeathTest, restWorkersTooLargeForOpenFilesLimit) { + // limit allowed number of open files to 1024 to make sure that rest_workers count is too large. + struct rlimit limit; + ASSERT_EQ(getrlimit(RLIMIT_NOFILE, &limit), 0); + struct rlimit newLimit = {1024, limit.rlim_max}; + std::cout << "Setting open files limit to " << newLimit.rlim_cur << " to test that rest_workers count is too large for the limit based on number of cpu cores alone" << std::endl; + ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &newLimit), 0); + char* n_argv[] = {"ovms", "--config_path", "/path1", "--rest_port", "8080", "--port", "8081", "--rest_workers", "1000"}; + int arg_count = 9; + EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "rest_workers count cannot be larger than 202 due to open files limit. Current open files limit: 1024"); + ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &limit), 0); +} + TEST_F(OvmsConfigDeathTest, restWorkersDefinedRestPortUndefined) { char* n_argv[] = {"ovms", "--config_path", "/path1", "--port", "8080", "--rest_workers", "60"}; int arg_count = 7; From d55e2b3719a43c6abf03b26f173cad10e1a573f9 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Thu, 2 Apr 2026 09:18:52 +0200 Subject: [PATCH 02/21] style --- src/config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.cpp b/src/config.cpp index f8fa8749e5..3782f8ea1a 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -14,7 +14,7 @@ // limitations under the License. //***************************************************************************** #include "config.hpp" - +#include #include #include #include From 5821c8a48080d38b77b54200c813083724428d8c Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Thu, 2 Apr 2026 10:34:46 +0200 Subject: [PATCH 03/21] win build --- src/config.cpp | 5 ++++- src/test/ovmsconfig_test.cpp | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/config.cpp b/src/config.cpp index 3782f8ea1a..be9d8d5327 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -37,6 +37,10 @@ #include "stringutils.hpp" #include "systeminfo.hpp" +#ifdef __linux__ +#include +#endif + namespace ovms { const uint32_t AVAILABLE_CORES = getCoreCount(); @@ -50,7 +54,6 @@ const uint64_t MAX_REST_WORKERS = 10'000; // We need to minimize the number of default drogon workers since this value is set for both: unary and streaming (making it always double) // on linux, restrict also based on the max allowed number of open files #ifdef __linux__ -#include const uint64_t MAX_OPEN_FILES = []() { struct rlimit limit; if (getrlimit(RLIMIT_NOFILE, &limit) == 0) { diff --git a/src/test/ovmsconfig_test.cpp b/src/test/ovmsconfig_test.cpp index 7f5b43b420..7b719e351a 100644 --- a/src/test/ovmsconfig_test.cpp +++ b/src/test/ovmsconfig_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -32,6 +31,10 @@ #include "../systeminfo.hpp" #include "test_utils.hpp" +#ifdef __linux__ +#include +#endif + using testing::_; using testing::ContainerEq; using testing::Return; From 56c9b8d580abfdad26a522ec292fcf7baeef3bf2 Mon Sep 17 00:00:00 2001 From: "Trawinski, Dariusz" Date: Thu, 2 Apr 2026 13:30:16 +0200 Subject: [PATCH 04/21] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/config.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/config.cpp b/src/config.cpp index be9d8d5327..2184f34c99 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -54,16 +54,25 @@ const uint64_t MAX_REST_WORKERS = 10'000; // We need to minimize the number of default drogon workers since this value is set for both: unary and streaming (making it always double) // on linux, restrict also based on the max allowed number of open files #ifdef __linux__ -const uint64_t MAX_OPEN_FILES = []() { + +namespace { +uint64_t getMaxOpenFilesLimit() { struct rlimit limit; if (getrlimit(RLIMIT_NOFILE, &limit) == 0) { return limit.rlim_cur; } return std::numeric_limits::max(); -}(); +} +} // namespace + const uint64_t RESERVED_OPEN_FILES = 10; // we need to reserve some file descriptors for other operations, so we don't want to use all of them for drogon workers -const uint64_t DEFAULT_REST_WORKERS = (MAX_OPEN_FILES <= RESERVED_OPEN_FILES) ? AVAILABLE_CORES - : std::min(static_cast(AVAILABLE_CORES), (MAX_OPEN_FILES - RESERVED_OPEN_FILES) / 5); +const uint64_t DEFAULT_REST_WORKERS = []() { + const uint64_t maxOpenFiles = getMaxOpenFilesLimit(); + if (maxOpenFiles <= RESERVED_OPEN_FILES) { + return static_cast(AVAILABLE_CORES); + } + return std::min(static_cast(AVAILABLE_CORES), (maxOpenFiles - RESERVED_OPEN_FILES) / 5); +}(); #else const uint64_t DEFAULT_REST_WORKERS = AVAILABLE_CORES; #endif From c452b51140ddc42eb3d9832703588bd17e9251a2 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Thu, 2 Apr 2026 16:38:07 +0200 Subject: [PATCH 05/21] fix tests --- src/config.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/config.cpp b/src/config.cpp index 2184f34c99..608f6e143c 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -55,26 +55,26 @@ const uint64_t MAX_REST_WORKERS = 10'000; // on linux, restrict also based on the max allowed number of open files #ifdef __linux__ -namespace { -uint64_t getMaxOpenFilesLimit() { +static uint64_t getMaxOpenFilesLimit() { struct rlimit limit; if (getrlimit(RLIMIT_NOFILE, &limit) == 0) { return limit.rlim_cur; } return std::numeric_limits::max(); } -} // namespace const uint64_t RESERVED_OPEN_FILES = 10; // we need to reserve some file descriptors for other operations, so we don't want to use all of them for drogon workers -const uint64_t DEFAULT_REST_WORKERS = []() { +uint64_t getDefaultRestWorkers() { const uint64_t maxOpenFiles = getMaxOpenFilesLimit(); if (maxOpenFiles <= RESERVED_OPEN_FILES) { - return static_cast(AVAILABLE_CORES); + return static_cast(2); // minimum functional number } return std::min(static_cast(AVAILABLE_CORES), (maxOpenFiles - RESERVED_OPEN_FILES) / 5); -}(); +} #else -const uint64_t DEFAULT_REST_WORKERS = AVAILABLE_CORES; +uint64_t getDefaultRestWorkers() { + return AVAILABLE_CORES; +} #endif Config& Config::parse(int argc, char** argv) { @@ -335,8 +335,8 @@ bool Config::validate() { return false; } #ifdef __linux__ - if (restWorkers() > (MAX_OPEN_FILES - RESERVED_OPEN_FILES) / 5) { - std::cerr << "rest_workers count cannot be larger than " << (MAX_OPEN_FILES - RESERVED_OPEN_FILES) / 5 << " due to open files limit. Current open files limit: " << MAX_OPEN_FILES << std::endl; + if (restWorkers() > (getMaxOpenFilesLimit() - RESERVED_OPEN_FILES) / 5) { + std::cerr << "rest_workers count cannot be larger than " << (getMaxOpenFilesLimit() - RESERVED_OPEN_FILES) / 5 << " due to open files limit. Current open files limit: " << getMaxOpenFilesLimit() << std::endl; return false; } #endif @@ -402,7 +402,7 @@ const std::string Config::restBindAddress() const { return this->serverSettings. uint32_t Config::grpcWorkers() const { return this->serverSettings.grpcWorkers; } uint32_t Config::grpcMaxThreads() const { return this->serverSettings.grpcMaxThreads.value_or(DEFAULT_GRPC_MAX_THREADS); } size_t Config::grpcMemoryQuota() const { return this->serverSettings.grpcMemoryQuota.value_or(DEFAULT_GRPC_MEMORY_QUOTA); } -uint32_t Config::restWorkers() const { return this->serverSettings.restWorkers.value_or(DEFAULT_REST_WORKERS); } +uint32_t Config::restWorkers() const { return this->serverSettings.restWorkers.value_or(getDefaultRestWorkers()); } const std::string& Config::modelName() const { return this->modelsSettings.modelName; } const std::string& Config::modelPath() const { return this->modelsSettings.modelPath; } const std::string& Config::batchSize() const { From 502a8538cd43c40415d6d7194ef79dc428255acf Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Thu, 2 Apr 2026 16:58:13 +0200 Subject: [PATCH 06/21] fix win tests --- src/test/ovmsconfig_test.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/ovmsconfig_test.cpp b/src/test/ovmsconfig_test.cpp index 7b719e351a..c8fd57a0e4 100644 --- a/src/test/ovmsconfig_test.cpp +++ b/src/test/ovmsconfig_test.cpp @@ -206,6 +206,7 @@ TEST_F(OvmsConfigDeathTest, restWorkersTooLarge) { EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "rest_workers count should be from 2 to "); } +#ifdef __linux__ TEST_F(OvmsConfigDeathTest, restWorkersDefaultReducedForOpenFilesLimit) { // limit allowed number of open files to 1024 to make sure that rest_workers count is too large for the limit based on number of cpu cores alone int cpu_cores = ovms::getCoreCount(); @@ -222,6 +223,7 @@ TEST_F(OvmsConfigDeathTest, restWorkersDefaultReducedForOpenFilesLimit) { ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &limit), 0); } +#endif TEST_F(OvmsConfigDeathTest, restWorkersTooLargeForOpenFilesLimit) { // limit allowed number of open files to 1024 to make sure that rest_workers count is too large. From 9d534d8e8cfd52500000ed4014a95d94e4652a0f Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Thu, 2 Apr 2026 17:15:38 +0200 Subject: [PATCH 07/21] fix win tests2 --- src/test/ovmsconfig_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/ovmsconfig_test.cpp b/src/test/ovmsconfig_test.cpp index c8fd57a0e4..7d472ef5ce 100644 --- a/src/test/ovmsconfig_test.cpp +++ b/src/test/ovmsconfig_test.cpp @@ -223,7 +223,6 @@ TEST_F(OvmsConfigDeathTest, restWorkersDefaultReducedForOpenFilesLimit) { ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &limit), 0); } -#endif TEST_F(OvmsConfigDeathTest, restWorkersTooLargeForOpenFilesLimit) { // limit allowed number of open files to 1024 to make sure that rest_workers count is too large. @@ -237,6 +236,7 @@ TEST_F(OvmsConfigDeathTest, restWorkersTooLargeForOpenFilesLimit) { EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "rest_workers count cannot be larger than 202 due to open files limit. Current open files limit: 1024"); ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &limit), 0); } +#endif TEST_F(OvmsConfigDeathTest, restWorkersDefinedRestPortUndefined) { char* n_argv[] = {"ovms", "--config_path", "/path1", "--port", "8080", "--rest_workers", "60"}; From eab2fea045757afe582fa3b26e7caf71ed7030f5 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Fri, 3 Apr 2026 00:05:57 +0200 Subject: [PATCH 08/21] improve limits --- src/config.cpp | 8 ++++---- src/test/ovmsconfig_test.cpp | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/config.cpp b/src/config.cpp index 608f6e143c..67b9ccde8a 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -63,13 +63,13 @@ static uint64_t getMaxOpenFilesLimit() { return std::numeric_limits::max(); } -const uint64_t RESERVED_OPEN_FILES = 10; // we need to reserve some file descriptors for other operations, so we don't want to use all of them for drogon workers +const uint64_t RESERVED_OPEN_FILES = 15; // we need to reserve some file descriptors for other operations, so we don't want to use all of them for drogon workers uint64_t getDefaultRestWorkers() { const uint64_t maxOpenFiles = getMaxOpenFilesLimit(); if (maxOpenFiles <= RESERVED_OPEN_FILES) { return static_cast(2); // minimum functional number } - return std::min(static_cast(AVAILABLE_CORES), (maxOpenFiles - RESERVED_OPEN_FILES) / 5); + return std::min(static_cast(AVAILABLE_CORES), (maxOpenFiles - RESERVED_OPEN_FILES) / 7); // 5x rest_workers to initialize ovms and 2x rest_workers for new connections } #else uint64_t getDefaultRestWorkers() { @@ -335,8 +335,8 @@ bool Config::validate() { return false; } #ifdef __linux__ - if (restWorkers() > (getMaxOpenFilesLimit() - RESERVED_OPEN_FILES) / 5) { - std::cerr << "rest_workers count cannot be larger than " << (getMaxOpenFilesLimit() - RESERVED_OPEN_FILES) / 5 << " due to open files limit. Current open files limit: " << getMaxOpenFilesLimit() << std::endl; + if (restWorkers() > (getMaxOpenFilesLimit() - RESERVED_OPEN_FILES) / 6) { + std::cerr << "rest_workers count cannot be larger than " << (getMaxOpenFilesLimit() - RESERVED_OPEN_FILES) / 6 << " due to open files limit. Current open files limit: " << getMaxOpenFilesLimit() << std::endl; return false; } #endif diff --git a/src/test/ovmsconfig_test.cpp b/src/test/ovmsconfig_test.cpp index 7d472ef5ce..959277a2ed 100644 --- a/src/test/ovmsconfig_test.cpp +++ b/src/test/ovmsconfig_test.cpp @@ -208,11 +208,11 @@ TEST_F(OvmsConfigDeathTest, restWorkersTooLarge) { #ifdef __linux__ TEST_F(OvmsConfigDeathTest, restWorkersDefaultReducedForOpenFilesLimit) { - // limit allowed number of open files to 1024 to make sure that rest_workers count is too large for the limit based on number of cpu cores alone + // limit allowed number of open files to value that enforce default rest_workers to be determined based on open files limit instead of number of cpu cores alone. This is to test that default rest_workers count is reduced when open files limit is low. int cpu_cores = ovms::getCoreCount(); struct rlimit limit; ASSERT_EQ(getrlimit(RLIMIT_NOFILE, &limit), 0); - struct rlimit newLimit = {static_cast(cpu_cores * 5), limit.rlim_max}; + struct rlimit newLimit = {std::min(static_cast(cpu_cores * 5), limit.rlim_max), limit.rlim_max}; std::cout << "Setting open files limit to " << newLimit.rlim_cur << " to test that default rest_workers count is reduced based on open files limit" << std::endl; ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &newLimit), 0); @@ -221,7 +221,7 @@ TEST_F(OvmsConfigDeathTest, restWorkersDefaultReducedForOpenFilesLimit) { ovms::Config::instance().parse(arg_count, n_argv); EXPECT_TRUE(ovms::Config::instance().validate()); - ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &limit), 0); + ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &limit), 0); // revert ulimit to original value } TEST_F(OvmsConfigDeathTest, restWorkersTooLargeForOpenFilesLimit) { @@ -233,7 +233,7 @@ TEST_F(OvmsConfigDeathTest, restWorkersTooLargeForOpenFilesLimit) { ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &newLimit), 0); char* n_argv[] = {"ovms", "--config_path", "/path1", "--rest_port", "8080", "--port", "8081", "--rest_workers", "1000"}; int arg_count = 9; - EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "rest_workers count cannot be larger than 202 due to open files limit. Current open files limit: 1024"); + EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "rest_workers count cannot be larger than 169 due to open files limit. Current open files limit: 1024"); ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &limit), 0); } #endif From f3af31696ada0abb550d17cab7153a526d327217 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Fri, 3 Apr 2026 14:51:46 +0200 Subject: [PATCH 09/21] style --- src/config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.cpp b/src/config.cpp index 67b9ccde8a..3206777de5 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -69,7 +69,7 @@ uint64_t getDefaultRestWorkers() { if (maxOpenFiles <= RESERVED_OPEN_FILES) { return static_cast(2); // minimum functional number } - return std::min(static_cast(AVAILABLE_CORES), (maxOpenFiles - RESERVED_OPEN_FILES) / 7); // 5x rest_workers to initialize ovms and 2x rest_workers for new connections + return std::min(static_cast(AVAILABLE_CORES), (maxOpenFiles - RESERVED_OPEN_FILES) / 7); // 5x rest_workers to initialize ovms and 2x rest_workers for new connections } #else uint64_t getDefaultRestWorkers() { From d83678c092dda66a4b12413c95ca717fc3cf426f Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Sat, 4 Apr 2026 01:36:12 +0200 Subject: [PATCH 10/21] unit test --- src/test/ovmsconfig_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/ovmsconfig_test.cpp b/src/test/ovmsconfig_test.cpp index 959277a2ed..e3f2211ab4 100644 --- a/src/test/ovmsconfig_test.cpp +++ b/src/test/ovmsconfig_test.cpp @@ -233,7 +233,7 @@ TEST_F(OvmsConfigDeathTest, restWorkersTooLargeForOpenFilesLimit) { ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &newLimit), 0); char* n_argv[] = {"ovms", "--config_path", "/path1", "--rest_port", "8080", "--port", "8081", "--rest_workers", "1000"}; int arg_count = 9; - EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "rest_workers count cannot be larger than 169 due to open files limit. Current open files limit: 1024"); + EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "rest_workers count cannot be larger than 168 due to open files limit. Current open files limit: 1024"); ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &limit), 0); } #endif From 08dfab28542e4d0d2b1debff94e25cbc8ad9560a Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Fri, 8 May 2026 01:45:18 +0200 Subject: [PATCH 11/21] include detection of CPU cores in docker containers --- src/config.cpp | 2 +- src/systeminfo.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/config.cpp b/src/config.cpp index 19e7e71f88..6879e9891e 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -64,7 +64,7 @@ uint64_t getDefaultRestWorkers() { if (maxOpenFiles <= RESERVED_OPEN_FILES) { return static_cast(2); // minimum functional number } - + return std::min(static_cast(AVAILABLE_CORES), (maxOpenFiles - RESERVED_OPEN_FILES) / 7); // 5x rest_workers to initialize ovms and 2x rest_workers for new connections } #else diff --git a/src/systeminfo.cpp b/src/systeminfo.cpp index be953d7482..a463e817ca 100644 --- a/src/systeminfo.cpp +++ b/src/systeminfo.cpp @@ -86,12 +86,12 @@ bool isRunningInDocker() { uint16_t getCpuAffinityCount() { cpu_set_t mask; CPU_ZERO(&mask); - + if (sched_getaffinity(0, sizeof(mask), &mask) == -1) { SPDLOG_DEBUG("sched_getaffinity failed, returning hardware concurrency"); return std::thread::hardware_concurrency(); } - + int cpu_count = CPU_COUNT(&mask); SPDLOG_DEBUG("CPU affinity count: {}", cpu_count); return static_cast(cpu_count); @@ -124,7 +124,7 @@ uint16_t getDockerCpuQuota() { // Try cgroup v1 cpu.cfs_quota_us and cpu.cfs_period_us std::ifstream quota_file("/sys/fs/cgroup/cpu/cpu.cfs_quota_us"); std::ifstream period_file("/sys/fs/cgroup/cpu/cpu.cfs_period_us"); - + if (quota_file.is_open() && period_file.is_open()) { std::string quota_str, period_str; if (std::getline(quota_file, quota_str) && std::getline(period_file, period_str)) { From c7a3e8ff552195661acf7fd27a443dd7428e38e7 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Fri, 8 May 2026 01:53:06 +0200 Subject: [PATCH 12/21] style --- src/config.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/config.cpp b/src/config.cpp index 6879e9891e..8c4b9eb2b5 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -40,10 +40,6 @@ #include #endif -#ifdef __linux__ -#include -#endif - namespace ovms { const uint32_t AVAILABLE_CORES = getCoreCount(); From c3457b0be034b70dcfde1973815af221c416ea47 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Fri, 8 May 2026 18:00:37 +0200 Subject: [PATCH 13/21] review changes --- src/config.cpp | 35 +++++++++-------------------------- src/modelmanager.cpp | 7 +++++++ src/systeminfo.cpp | 8 +------- src/test/ovmsconfig_test.cpp | 22 +++++++++++++++++----- src/test/systeminfo_test.cpp | 4 +--- 5 files changed, 35 insertions(+), 41 deletions(-) diff --git a/src/config.cpp b/src/config.cpp index 8c4b9eb2b5..0da232ad9c 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include "config.hpp" #include +#include #include #include #include @@ -36,9 +37,6 @@ #include "modelconfig.hpp" #include "stringutils.hpp" #include "systeminfo.hpp" -#ifdef __linux__ -#include -#endif namespace ovms { @@ -55,13 +53,15 @@ const uint64_t MAX_REST_WORKERS = 10'000; #ifdef __linux__ const uint64_t RESERVED_OPEN_FILES = 15; // we need to reserve some file descriptors for other operations, so we don't want to use all of them for drogon workers +const uint64_t OPEN_FILES_PER_REST_WORKER = 7; // 5x rest_workers to initialize ovms and 2x rest_workers for new connections uint64_t getDefaultRestWorkers() { const uint64_t maxOpenFiles = getMaxOpenFilesLimit(); if (maxOpenFiles <= RESERVED_OPEN_FILES) { return static_cast(2); // minimum functional number } - - return std::min(static_cast(AVAILABLE_CORES), (maxOpenFiles - RESERVED_OPEN_FILES) / 7); // 5x rest_workers to initialize ovms and 2x rest_workers for new connections + // 2 is a minimal number of default rest workers + const uint64_t MIN_DEFAULT_REST_WORKERS = 2; + return std::max(std::min(static_cast(AVAILABLE_CORES), (maxOpenFiles - RESERVED_OPEN_FILES) / OPEN_FILES_PER_REST_WORKER), MIN_DEFAULT_REST_WORKERS); } #else uint64_t getDefaultRestWorkers() { @@ -317,7 +317,8 @@ bool Config::validate() { } // check rest_workers value - if (((restWorkers() > MAX_REST_WORKERS) || (restWorkers() < 2))) { + const uint32_t restWorkersValue = restWorkers(); // Cache to avoid multiple calls + if (((restWorkersValue > MAX_REST_WORKERS) || (restWorkersValue < 2))) { std::cerr << "rest_workers count should be from 2 to " << MAX_REST_WORKERS << std::endl; return false; } @@ -327,7 +328,7 @@ bool Config::validate() { return false; } #ifdef __linux__ - if (restWorkers() > (getMaxOpenFilesLimit() - RESERVED_OPEN_FILES) / 6) { + if (restWorkersValue > (getMaxOpenFilesLimit() - RESERVED_OPEN_FILES) / 6) { std::cerr << "rest_workers count cannot be larger than " << (getMaxOpenFilesLimit() - RESERVED_OPEN_FILES) / 6 << " due to open files limit. Current open files limit: " << getMaxOpenFilesLimit() << std::endl; return false; } @@ -394,25 +395,7 @@ const std::string Config::restBindAddress() const { return this->serverSettings. uint32_t Config::grpcWorkers() const { return this->serverSettings.grpcWorkers; } uint32_t Config::grpcMaxThreads() const { return this->serverSettings.grpcMaxThreads.value_or(DEFAULT_GRPC_MAX_THREADS); } size_t Config::grpcMemoryQuota() const { return this->serverSettings.grpcMemoryQuota.value_or(DEFAULT_GRPC_MEMORY_QUOTA); } -uint32_t Config::restWorkers() const { - if (this->serverSettings.restWorkers.has_value()) { - return this->serverSettings.restWorkers.value(); - } - - const uint64_t defaultRestWorkers = getDefaultRestWorkers(); -#ifdef __linux__ - const uint64_t maxOpenFiles = getMaxOpenFilesLimit(); - const uint16_t detectedCores = getCoreCount(); - SPDLOG_DEBUG("Detected cores: {} (may be constrained by Docker limits), max open files: {}, calculated default rest workers: {}", detectedCores, maxOpenFiles, defaultRestWorkers); - if (isRunningInDocker()) { - SPDLOG_DEBUG("Docker CPU quota detected: {}, CPU affinity count: {}", getDockerCpuQuota(), getCpuAffinityCount()); - } -#else - const uint16_t detectedCores = getCoreCount(); - SPDLOG_DEBUG("Detected cores: {}, calculated default rest workers: {}", detectedCores, defaultRestWorkers); -#endif - return static_cast(defaultRestWorkers); -} +uint32_t Config::restWorkers() const { return this->serverSettings.restWorkers.value_or(getDefaultRestWorkers()); } const std::string& Config::modelName() const { return this->modelsSettings.modelName; } const std::string& Config::modelPath() const { return this->modelsSettings.modelPath; } const std::string& Config::batchSize() const { diff --git a/src/modelmanager.cpp b/src/modelmanager.cpp index 67a0a38399..8b643ff78a 100644 --- a/src/modelmanager.cpp +++ b/src/modelmanager.cpp @@ -69,6 +69,7 @@ #include "schema.hpp" #include "servable_definition.hpp" #include "stringutils.hpp" +#include "systeminfo.hpp" namespace ovms { @@ -151,6 +152,12 @@ ModelManager::ModelManager(const std::string& modelCacheDirectory, MetricRegistr throw; } this->logPluginConfiguration(); +#ifdef __linux__ + if (isRunningInDocker()) { + SPDLOG_INFO("Running inside Docker container"); + SPDLOG_INFO("cpu quota: {}, cpu affinity: {}, max_open_files: {}", getDockerCpuQuota(), getCpuAffinityCount(), getMaxOpenFilesLimit()); + } +#endif } void ModelManager::logPluginConfiguration() { diff --git a/src/systeminfo.cpp b/src/systeminfo.cpp index a463e817ca..b09f29a8c3 100644 --- a/src/systeminfo.cpp +++ b/src/systeminfo.cpp @@ -45,7 +45,7 @@ uint16_t getCoreCount() { } } #endif - return std::max(static_cast(2), detectedCoreCount); + return detectedCoreCount; } uint64_t getMaxOpenFilesLimit() { @@ -64,7 +64,6 @@ bool isRunningInDocker() { // Check for /.dockerenv file std::ifstream dockerenv("/.dockerenv"); if (dockerenv.good()) { - SPDLOG_DEBUG("Running inside Docker container (/.dockerenv detected)"); return true; } @@ -74,7 +73,6 @@ bool isRunningInDocker() { std::string line; while (std::getline(cgroup, line)) { if (line.find("docker") != std::string::npos) { - SPDLOG_DEBUG("Running inside Docker container (docker reference in /proc/self/cgroup detected)"); return true; } } @@ -88,12 +86,10 @@ uint16_t getCpuAffinityCount() { CPU_ZERO(&mask); if (sched_getaffinity(0, sizeof(mask), &mask) == -1) { - SPDLOG_DEBUG("sched_getaffinity failed, returning hardware concurrency"); return std::thread::hardware_concurrency(); } int cpu_count = CPU_COUNT(&mask); - SPDLOG_DEBUG("CPU affinity count: {}", cpu_count); return static_cast(cpu_count); } @@ -111,7 +107,6 @@ uint16_t getDockerCpuQuota() { uint64_t period = std::stoull(period_str); if (quota > 0 && period > 0 && quota != ULLONG_MAX) { uint16_t cpu_count = static_cast((quota + period - 1) / period); - SPDLOG_DEBUG("Docker CPU quota (v2): {} / {} = {} CPUs", quota, period, cpu_count); return cpu_count; } } catch (const std::exception&) { @@ -136,7 +131,6 @@ uint16_t getDockerCpuQuota() { uint64_t period = std::stoull(period_str); if (quota > 0 && period > 0) { uint16_t cpu_count = static_cast((quota + period - 1) / period); - SPDLOG_DEBUG("Docker CPU quota (v1): {} / {} = {} CPUs", quota, period, cpu_count); return cpu_count; } } catch (const std::exception&) { diff --git a/src/test/ovmsconfig_test.cpp b/src/test/ovmsconfig_test.cpp index caef79ab01..8be3cb7dbd 100644 --- a/src/test/ovmsconfig_test.cpp +++ b/src/test/ovmsconfig_test.cpp @@ -33,6 +33,19 @@ #ifdef __linux__ #include + +namespace { +class ScopedNoFileRlimitRestore { +public: + explicit ScopedNoFileRlimitRestore(const struct rlimit& originalLimit) : originalLimit(originalLimit) {} + ~ScopedNoFileRlimitRestore() { + setrlimit(RLIMIT_NOFILE, &originalLimit); + } + +private: + struct rlimit originalLimit; +}; +} // namespace #endif using testing::_; @@ -212,6 +225,7 @@ TEST_F(OvmsConfigDeathTest, restWorkersDefaultReducedForOpenFilesLimit) { int cpu_cores = ovms::getCoreCount(); struct rlimit limit; ASSERT_EQ(getrlimit(RLIMIT_NOFILE, &limit), 0); + ScopedNoFileRlimitRestore restoreOriginalLimit(limit); struct rlimit newLimit = {std::min(static_cast(cpu_cores * 5), limit.rlim_max), limit.rlim_max}; std::cout << "Setting open files limit to " << newLimit.rlim_cur << " to test that default rest_workers count is reduced based on open files limit" << std::endl; ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &newLimit), 0); @@ -220,21 +234,19 @@ TEST_F(OvmsConfigDeathTest, restWorkersDefaultReducedForOpenFilesLimit) { int arg_count = 7; ovms::Config::instance().parse(arg_count, n_argv); EXPECT_TRUE(ovms::Config::instance().validate()); - - ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &limit), 0); // revert ulimit to original value } TEST_F(OvmsConfigDeathTest, restWorkersTooLargeForOpenFilesLimit) { // limit allowed number of open files to 1024 to make sure that rest_workers count is too large. struct rlimit limit; ASSERT_EQ(getrlimit(RLIMIT_NOFILE, &limit), 0); - struct rlimit newLimit = {1024, limit.rlim_max}; + ScopedNoFileRlimitRestore restoreOriginalLimit(limit); + struct rlimit newLimit = {std::min(static_cast(1024), limit.rlim_max), limit.rlim_max}; std::cout << "Setting open files limit to " << newLimit.rlim_cur << " to test that rest_workers count is too large for the limit based on number of cpu cores alone" << std::endl; ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &newLimit), 0); char* n_argv[] = {"ovms", "--config_path", "/path1", "--rest_port", "8080", "--port", "8081", "--rest_workers", "1000"}; int arg_count = 9; - EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "rest_workers count cannot be larger than 168 due to open files limit. Current open files limit: 1024"); - ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &limit), 0); + EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "rest_workers count cannot be larger than .* due to open files limit. Current open files limit: .*1024"); } #endif diff --git a/src/test/systeminfo_test.cpp b/src/test/systeminfo_test.cpp index 3440912250..73424823d6 100644 --- a/src/test/systeminfo_test.cpp +++ b/src/test/systeminfo_test.cpp @@ -14,7 +14,6 @@ // limitations under the License. //***************************************************************************** -#include #include #include @@ -32,6 +31,5 @@ using ovms::StatusCode; TEST(SystemInfo, getCoreCount) { uint16_t cpuCount = getCoreCount(); - EXPECT_GE(cpuCount, 2); - EXPECT_LE(cpuCount, std::max(static_cast(2), static_cast(std::thread::hardware_concurrency()))); + EXPECT_GE(cpuCount, 1); } From 255a95e11e8d928457c25767f05b9a3fd7d8d885 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Fri, 8 May 2026 18:03:05 +0200 Subject: [PATCH 14/21] style --- src/config.cpp | 2 +- src/test/ovmsconfig_test.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/config.cpp b/src/config.cpp index 0da232ad9c..f44a460523 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -52,7 +52,7 @@ const uint64_t MAX_REST_WORKERS = 10'000; // on linux, restrict also based on the max allowed number of open files #ifdef __linux__ -const uint64_t RESERVED_OPEN_FILES = 15; // we need to reserve some file descriptors for other operations, so we don't want to use all of them for drogon workers +const uint64_t RESERVED_OPEN_FILES = 15; // we need to reserve some file descriptors for other operations, so we don't want to use all of them for drogon workers const uint64_t OPEN_FILES_PER_REST_WORKER = 7; // 5x rest_workers to initialize ovms and 2x rest_workers for new connections uint64_t getDefaultRestWorkers() { const uint64_t maxOpenFiles = getMaxOpenFilesLimit(); diff --git a/src/test/ovmsconfig_test.cpp b/src/test/ovmsconfig_test.cpp index 8be3cb7dbd..d308b3db76 100644 --- a/src/test/ovmsconfig_test.cpp +++ b/src/test/ovmsconfig_test.cpp @@ -37,7 +37,8 @@ namespace { class ScopedNoFileRlimitRestore { public: - explicit ScopedNoFileRlimitRestore(const struct rlimit& originalLimit) : originalLimit(originalLimit) {} + explicit ScopedNoFileRlimitRestore(const struct rlimit& originalLimit) : + originalLimit(originalLimit) {} ~ScopedNoFileRlimitRestore() { setrlimit(RLIMIT_NOFILE, &originalLimit); } From 6f6954d7db39592bd5e42d4e22a0fcd187cb27f8 Mon Sep 17 00:00:00 2001 From: "Trawinski, Dariusz" Date: Fri, 8 May 2026 20:05:21 +0200 Subject: [PATCH 15/21] Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> Co-authored-by: Trawinski, Dariusz --- src/systeminfo.cpp | 6 ++++-- src/test/systeminfo_test.cpp | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/systeminfo.cpp b/src/systeminfo.cpp index b09f29a8c3..d7abe5335b 100644 --- a/src/systeminfo.cpp +++ b/src/systeminfo.cpp @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -102,10 +101,13 @@ uint16_t getDockerCpuQuota() { std::istringstream iss(line); std::string quota_str, period_str; if (iss >> quota_str >> period_str) { + if (quota_str == "max") { + return 0; // No quota set + } try { uint64_t quota = std::stoull(quota_str); uint64_t period = std::stoull(period_str); - if (quota > 0 && period > 0 && quota != ULLONG_MAX) { + if (quota > 0 && period > 0) { uint16_t cpu_count = static_cast((quota + period - 1) / period); return cpu_count; } diff --git a/src/test/systeminfo_test.cpp b/src/test/systeminfo_test.cpp index 73424823d6..c83aba9c77 100644 --- a/src/test/systeminfo_test.cpp +++ b/src/test/systeminfo_test.cpp @@ -32,4 +32,5 @@ using ovms::StatusCode; TEST(SystemInfo, getCoreCount) { uint16_t cpuCount = getCoreCount(); EXPECT_GE(cpuCount, 1); + EXPECT_LE(cpuCount, std::thread::hardware_concurrency()); } From 687ccd3fdbf228aa176cc5292ae231183e331893 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Sat, 9 May 2026 01:20:31 +0200 Subject: [PATCH 16/21] apply OV properies based on HW resources --- src/BUILD | 1 + src/config.cpp | 17 ++++++++++++----- src/llm/BUILD | 2 ++ .../servable_initializer.cpp | 7 ++++++- src/modelmanager.cpp | 18 +++++++++++++++++- 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/BUILD b/src/BUILD index 41f7ac8116..5c976c57e0 100644 --- a/src/BUILD +++ b/src/BUILD @@ -348,6 +348,7 @@ ovms_cc_library( "libovms_cliparser", "libovms_systeminfo", "ovms_exit_codes", + "//src/utils:env_guard", ], visibility = ["//visibility:public",], additional_copts = COPTS_DROGON, diff --git a/src/config.cpp b/src/config.cpp index f44a460523..a00efb2246 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -37,6 +37,7 @@ #include "modelconfig.hpp" #include "stringutils.hpp" #include "systeminfo.hpp" +#include "utils/env_guard.hpp" namespace ovms { @@ -57,11 +58,9 @@ const uint64_t OPEN_FILES_PER_REST_WORKER = 7; // 5x rest_workers to initialize uint64_t getDefaultRestWorkers() { const uint64_t maxOpenFiles = getMaxOpenFilesLimit(); if (maxOpenFiles <= RESERVED_OPEN_FILES) { - return static_cast(2); // minimum functional number + return static_cast(0); } - // 2 is a minimal number of default rest workers - const uint64_t MIN_DEFAULT_REST_WORKERS = 2; - return std::max(std::min(static_cast(AVAILABLE_CORES), (maxOpenFiles - RESERVED_OPEN_FILES) / OPEN_FILES_PER_REST_WORKER), MIN_DEFAULT_REST_WORKERS); + return std::min(static_cast(AVAILABLE_CORES), (maxOpenFiles - RESERVED_OPEN_FILES) / OPEN_FILES_PER_REST_WORKER); } #else uint64_t getDefaultRestWorkers() { @@ -93,6 +92,14 @@ Config& Config::parse(int argc, char** argv) { bool Config::parse(ServerSettingsImpl* serverSettings, ModelsSettingsImpl* modelsSettings) { this->serverSettings = *serverSettings; this->modelsSettings = *modelsSettings; + + static EnvGuard envGuard; +#if defined(__linux__) || defined(_WIN32) + if (this->serverSettings.logLevel == "DEBUG") { + envGuard.set("OPENVINO_LOG_LEVEL", "4"); + } +#endif + return validate(); } @@ -395,7 +402,7 @@ const std::string Config::restBindAddress() const { return this->serverSettings. uint32_t Config::grpcWorkers() const { return this->serverSettings.grpcWorkers; } uint32_t Config::grpcMaxThreads() const { return this->serverSettings.grpcMaxThreads.value_or(DEFAULT_GRPC_MAX_THREADS); } size_t Config::grpcMemoryQuota() const { return this->serverSettings.grpcMemoryQuota.value_or(DEFAULT_GRPC_MEMORY_QUOTA); } -uint32_t Config::restWorkers() const { return this->serverSettings.restWorkers.value_or(getDefaultRestWorkers()); } +uint32_t Config::restWorkers() const { return static_cast(std::max(static_cast(2), static_cast(this->serverSettings.restWorkers.value_or(getDefaultRestWorkers())))); } const std::string& Config::modelName() const { return this->modelsSettings.modelName; } const std::string& Config::modelPath() const { return this->modelsSettings.modelPath; } const std::string& Config::batchSize() const { diff --git a/src/llm/BUILD b/src/llm/BUILD index 8fe6059d71..9832dcb5a3 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -283,6 +283,8 @@ ovms_cc_library( "//src:httppayload", "//src:libhttpclientconnection", "//src:sse_utils", + "//src:libovms_systeminfo", + "//src:libovms_config", "//third_party:genai",] + select({ "//:disable_python": [], "//:not_disable_python" : [":py_jinja_template_processor"], diff --git a/src/llm/language_model/continuous_batching/servable_initializer.cpp b/src/llm/language_model/continuous_batching/servable_initializer.cpp index 27f4f51aee..779e3344ce 100644 --- a/src/llm/language_model/continuous_batching/servable_initializer.cpp +++ b/src/llm/language_model/continuous_batching/servable_initializer.cpp @@ -32,10 +32,12 @@ #pragma GCC diagnostic pop #pragma warning(pop) +#include "../../../config.hpp" #include "../../../json_parser.hpp" #include "../../../logging.hpp" #include "../../../mediapipe_internal/mediapipe_utils.hpp" #include "../../../status.hpp" +#include "../../../systeminfo.hpp" #include "llm_executor.hpp" #include "servable.hpp" #include "servable_initializer.hpp" @@ -204,7 +206,10 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptrtokenizerPluginConfig = {{"PERFORMANCE_HINT", "THROUGHPUT"}}; + const uint32_t numStreams = std::min(static_cast(Config::instance().restWorkers()), static_cast(getCoreCount())); + SPDLOG_DEBUG("Setting tokenizer/detokenizer NUM_STREAMS to: {}", numStreams); + properties->tokenizerPluginConfig = {{"NUM_STREAMS", static_cast(numStreams)}, {"PERFORMANCE_HINT", "THROUGHPUT"}}; + try { properties->pipeline = std::make_shared(parsedModelsPath, properties->schedulerConfig, properties->device, diff --git a/src/modelmanager.cpp b/src/modelmanager.cpp index 8b643ff78a..d22d009367 100644 --- a/src/modelmanager.cpp +++ b/src/modelmanager.cpp @@ -80,7 +80,6 @@ const std::string DEFAULT_MODEL_CACHE_DIRECTORY = "c:\\Intel\\openvino_cache"; const std::string DEFAULT_MODEL_CACHE_DIRECTORY = "/opt/cache"; #endif ModelManager::ModelManager(const std::string& modelCacheDirectory, MetricRegistry* registry, PythonBackend* pythonBackend) : - ieCore(std::make_unique()), pipelineFactory(std::make_unique()), #if (MEDIAPIPE_DISABLE == 0) mediapipeFactory(std::make_unique(pythonBackend)), @@ -90,6 +89,23 @@ ModelManager::ModelManager(const std::string& modelCacheDirectory, MetricRegistr modelCacheDirectory(modelCacheDirectory), metricRegistry(registry), pythonBackend(pythonBackend) { + try { + this->ieCore = std::make_unique(); + const uint16_t detectedCoreCount = getCoreCount(); + SPDLOG_DEBUG("Setting CPU inference_num_threads to: {}", detectedCoreCount); + this->ieCore->set_property("CPU", ov::inference_num_threads(static_cast(detectedCoreCount))); + +#ifdef __linux__ + if (isRunningInDocker()) { + const bool cpuQuotaDefined = getDockerCpuQuota() > 0; + this->ieCore->set_property("CPU", ov::hint::enable_cpu_pinning(!cpuQuotaDefined)); + } +#endif + } catch (const std::exception& ex) { + SPDLOG_CRITICAL("Failed to initialize OpenVINO Core with CPU properties set from detected core count and Docker constraints. Reason: {}", ex.what()); + throw; + } + OV_LOGGER("ov::Core(): {}", reinterpret_cast(this->ieCore.get())); // Take --cache_dir from CLI if (this->modelCacheDirectory.empty()) { From f29aedb574b23a3d08f5c675c47b33a1956bc5b1 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Sat, 9 May 2026 01:40:56 +0200 Subject: [PATCH 17/21] style fix --- .../language_model/continuous_batching/servable_initializer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llm/language_model/continuous_batching/servable_initializer.cpp b/src/llm/language_model/continuous_batching/servable_initializer.cpp index 779e3344ce..28d474562b 100644 --- a/src/llm/language_model/continuous_batching/servable_initializer.cpp +++ b/src/llm/language_model/continuous_batching/servable_initializer.cpp @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** +#include #include #include #include From 128e1f3f5095860fa4d8c8ff275c239dccab1b50 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Sun, 10 May 2026 02:11:10 +0200 Subject: [PATCH 18/21] set default properties in genai pipelines --- src/BUILD | 4 +- src/llm/BUILD | 1 + .../servable_initializer.cpp | 20 ++++++-- src/modelmanager.cpp | 19 ++++---- src/ov_utils.cpp | 46 +++++++++++++++++++ src/ov_utils.hpp | 9 ++++ src/sidepacket_servable.cpp | 8 ++++ 7 files changed, 93 insertions(+), 14 deletions(-) diff --git a/src/BUILD b/src/BUILD index 5c976c57e0..151d1efe2f 100644 --- a/src/BUILD +++ b/src/BUILD @@ -2077,6 +2077,7 @@ ovms_cc_library( "libovmsshape", "libovmsprofiler", "libovms_tensorinfo", + "libovms_systeminfo", ], visibility = ["//visibility:public"], ) @@ -3110,7 +3111,8 @@ ovms_cc_library( "@mediapipe//mediapipe/framework:calculator_framework", "//third_party:openvino", "@com_github_tencent_rapidjson//:rapidjson", - "//third_party:genai",], + "//third_party:genai", + "//src:libovms_ov_utils",], visibility = ["//visibility:public"], alwayslink = 1, ) diff --git a/src/llm/BUILD b/src/llm/BUILD index 9832dcb5a3..0939d3376c 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -285,6 +285,7 @@ ovms_cc_library( "//src:sse_utils", "//src:libovms_systeminfo", "//src:libovms_config", + "//src:libovms_ov_utils", "//third_party:genai",] + select({ "//:disable_python": [], "//:not_disable_python" : [":py_jinja_template_processor"], diff --git a/src/llm/language_model/continuous_batching/servable_initializer.cpp b/src/llm/language_model/continuous_batching/servable_initializer.cpp index 28d474562b..4aed056ca0 100644 --- a/src/llm/language_model/continuous_batching/servable_initializer.cpp +++ b/src/llm/language_model/continuous_batching/servable_initializer.cpp @@ -37,6 +37,7 @@ #include "../../../json_parser.hpp" #include "../../../logging.hpp" #include "../../../mediapipe_internal/mediapipe_utils.hpp" +#include "../../../ov_utils.hpp" #include "../../../status.hpp" #include "../../../systeminfo.hpp" #include "llm_executor.hpp" @@ -207,9 +208,22 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr(Config::instance().restWorkers()), static_cast(getCoreCount())); - SPDLOG_DEBUG("Setting tokenizer/detokenizer NUM_STREAMS to: {}", numStreams); - properties->tokenizerPluginConfig = {{"NUM_STREAMS", static_cast(numStreams)}, {"PERFORMANCE_HINT", "THROUGHPUT"}}; + if (properties->device == "CPU") { + status = applyDefaultCpuProperties(properties->pluginConfig); + if (!status.ok()) { + SPDLOG_ERROR("Failed to apply default CPU properties for LLM model: {}", status.string()); + return status; + } + } + + ov::AnyMap tokenProperties; + tokenProperties[ov::hint::performance_mode.name()] = ov::hint::PerformanceMode::THROUGHPUT; + status = applyDefaultCpuProperties(tokenProperties); + if (!status.ok()) { + SPDLOG_ERROR("Failed to apply default CPU properties for tokenizer: {}", status.string()); + return status; + } + properties->tokenizerPluginConfig = tokenProperties; try { properties->pipeline = std::make_shared(parsedModelsPath, diff --git a/src/modelmanager.cpp b/src/modelmanager.cpp index d22d009367..fa7f849094 100644 --- a/src/modelmanager.cpp +++ b/src/modelmanager.cpp @@ -91,18 +91,15 @@ ModelManager::ModelManager(const std::string& modelCacheDirectory, MetricRegistr pythonBackend(pythonBackend) { try { this->ieCore = std::make_unique(); - const uint16_t detectedCoreCount = getCoreCount(); - SPDLOG_DEBUG("Setting CPU inference_num_threads to: {}", detectedCoreCount); - this->ieCore->set_property("CPU", ov::inference_num_threads(static_cast(detectedCoreCount))); - -#ifdef __linux__ - if (isRunningInDocker()) { - const bool cpuQuotaDefined = getDockerCpuQuota() > 0; - this->ieCore->set_property("CPU", ov::hint::enable_cpu_pinning(!cpuQuotaDefined)); + ov::AnyMap cpuProperties; + Status status = applyDefaultCpuProperties(cpuProperties); + if (!status.ok()) { + SPDLOG_CRITICAL("Failed to apply default CPU properties. Reason: {}", status.string()); + throw std::runtime_error("Failed to apply default CPU properties"); } -#endif + this->ieCore->set_property("CPU", cpuProperties); } catch (const std::exception& ex) { - SPDLOG_CRITICAL("Failed to initialize OpenVINO Core with CPU properties set from detected core count and Docker constraints. Reason: {}", ex.what()); + SPDLOG_CRITICAL("Failed to initialize OpenVINO Core with CPU properties. Reason: {}", ex.what()); throw; } @@ -174,6 +171,8 @@ ModelManager::ModelManager(const std::string& modelCacheDirectory, MetricRegistr SPDLOG_INFO("cpu quota: {}, cpu affinity: {}, max_open_files: {}", getDockerCpuQuota(), getCpuAffinityCount(), getMaxOpenFilesLimit()); } #endif + + } void ModelManager::logPluginConfiguration() { diff --git a/src/ov_utils.cpp b/src/ov_utils.cpp index 4b976d4916..e3adf23dc8 100644 --- a/src/ov_utils.cpp +++ b/src/ov_utils.cpp @@ -26,6 +26,7 @@ #include "logging.hpp" #include "profiler.hpp" #include "status.hpp" +#include "systeminfo.hpp" #include "tensorinfo.hpp" namespace ovms { @@ -148,4 +149,49 @@ Status validatePluginConfiguration(const plugin_config_t& pluginConfig, const st return StatusCode::OK; } + +Status applyDefaultCpuProperties(ov::AnyMap& properties) { + try { + const uint16_t coreCount = getCoreCount(); + + if (properties.find(ov::inference_num_threads.name()) == properties.end()) { + properties[ov::inference_num_threads.name()] = static_cast(coreCount); + SPDLOG_DEBUG("applyDefaultCpuProperties: setting inference_num_threads to {}", coreCount); + } + +#ifdef __linux__ + if (properties.find(ov::hint::enable_cpu_pinning.name()) == properties.end()) { + if (isRunningInDocker()) { + const bool cpuPinning = getDockerCpuQuota() <= 0; + properties[ov::hint::enable_cpu_pinning.name()] = cpuPinning; + SPDLOG_DEBUG("applyDefaultCpuProperties: setting enable_cpu_pinning to {}", cpuPinning); + } + } +#endif + + const auto perfIt = properties.find(ov::hint::performance_mode.name()); + if (perfIt != properties.end()) { + bool isThroughput = false; + try { + isThroughput = (perfIt->second.as() == ov::hint::PerformanceMode::THROUGHPUT); + } catch (...) { + try { + isThroughput = (perfIt->second.as() == "THROUGHPUT"); + } catch (...) {} + } + if (isThroughput && properties.find(ov::num_streams.name()) == properties.end()) { + properties[ov::num_streams.name()] = static_cast(coreCount); + SPDLOG_DEBUG("applyDefaultCpuProperties: setting num_streams to {} (THROUGHPUT hint active)", coreCount); + } + } + } catch (const std::exception& ex) { + SPDLOG_ERROR("Exception while applying default CPU properties: {}", ex.what()); + return StatusCode::INTERNAL_ERROR; + } catch (...) { + SPDLOG_ERROR("Unknown exception while applying default CPU properties"); + return StatusCode::INTERNAL_ERROR; + } + return StatusCode::OK; +} + } // namespace ovms diff --git a/src/ov_utils.hpp b/src/ov_utils.hpp index 011f342a0d..b13ef31c95 100644 --- a/src/ov_utils.hpp +++ b/src/ov_utils.hpp @@ -52,6 +52,13 @@ std::optional getLayoutFromRTMap(const ov::RTMap& rtMap); Status validatePluginConfiguration(const plugin_config_t& pluginConfig, const std::string& targetDevice, const ov::Core& ieCore); +// Applies resource-aware CPU defaults to an OpenVINO property map. +// Sets inference_num_threads and (on Linux) enable_cpu_pinning only when not +// already present in the map. When PERFORMANCE_HINT=THROUGHPUT is set, +// num_streams is also capped to the detected core count if not already set. +// Returns StatusCode::INTERNAL_ERROR on any OpenVINO exception. +Status applyDefaultCpuProperties(ov::AnyMap& properties); + // Logging // #1 model/global plugin CompiledMode:DUMMY / Global OpenVINO plugin:CPU // #2 version/_ @@ -96,4 +103,6 @@ static void logOVPluginConfig(PropertyExtractor&& propertyExtractor, const std:: std::string pluginConfigNameValuesString = joins(pluginConfigNameValues, ", "); SPDLOG_LOGGER_DEBUG(modelmanager_logger, "{}; {}plugin configuration: {{ {} }}", loggingAuthor, loggingDetails, pluginConfigNameValuesString); } + + } // namespace ovms diff --git a/src/sidepacket_servable.cpp b/src/sidepacket_servable.cpp index c8978f2f2c..f8e7228bbf 100644 --- a/src/sidepacket_servable.cpp +++ b/src/sidepacket_servable.cpp @@ -20,6 +20,7 @@ #include "openvino/runtime/core.hpp" #include "sidepacket_servable.hpp" #include "logging.hpp" +#include "ov_utils.hpp" #include #include #include @@ -134,6 +135,13 @@ void SidepacketServable::initialize(const std::string& modelDir, const std::stri ov::Core core; std::shared_ptr m_model = core.read_model(parsedModelsPath / std::filesystem::path("openvino_model.xml"), {}, properties); m_model = this->applyPrePostProcessing(core, m_model, properties); + if (targetDevice == "CPU") { + auto status = applyDefaultCpuProperties(properties); + if (!status.ok()) { + SPDLOG_ERROR("Failed to apply default CPU properties for embeddings model: {}", status.string()); + return; + } + } compiledModel = core.compile_model(m_model, targetDevice, properties); SPDLOG_DEBUG("Model compiled {} for {}", parsedModelsPath.string(), targetDevice); From e701fe00636d2a263638789233fe0f60b88b1c77 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Sun, 10 May 2026 02:13:44 +0200 Subject: [PATCH 19/21] style --- src/modelmanager.cpp | 2 -- src/ov_utils.cpp | 3 ++- src/ov_utils.hpp | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/modelmanager.cpp b/src/modelmanager.cpp index fa7f849094..1b65eb4e59 100644 --- a/src/modelmanager.cpp +++ b/src/modelmanager.cpp @@ -171,8 +171,6 @@ ModelManager::ModelManager(const std::string& modelCacheDirectory, MetricRegistr SPDLOG_INFO("cpu quota: {}, cpu affinity: {}, max_open_files: {}", getDockerCpuQuota(), getCpuAffinityCount(), getMaxOpenFilesLimit()); } #endif - - } void ModelManager::logPluginConfiguration() { diff --git a/src/ov_utils.cpp b/src/ov_utils.cpp index e3adf23dc8..96d156b041 100644 --- a/src/ov_utils.cpp +++ b/src/ov_utils.cpp @@ -177,7 +177,8 @@ Status applyDefaultCpuProperties(ov::AnyMap& properties) { } catch (...) { try { isThroughput = (perfIt->second.as() == "THROUGHPUT"); - } catch (...) {} + } catch (...) { + } } if (isThroughput && properties.find(ov::num_streams.name()) == properties.end()) { properties[ov::num_streams.name()] = static_cast(coreCount); diff --git a/src/ov_utils.hpp b/src/ov_utils.hpp index b13ef31c95..c1ca92ecb4 100644 --- a/src/ov_utils.hpp +++ b/src/ov_utils.hpp @@ -104,5 +104,4 @@ static void logOVPluginConfig(PropertyExtractor&& propertyExtractor, const std:: SPDLOG_LOGGER_DEBUG(modelmanager_logger, "{}; {}plugin configuration: {{ {} }}", loggingAuthor, loggingDetails, pluginConfigNameValuesString); } - } // namespace ovms From 773c941135dcd39b73ad0b5435838f57816eb3f2 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Mon, 11 May 2026 12:49:27 +0200 Subject: [PATCH 20/21] extra condition for condainerd and k8s --- .../continuous_batching/servable_initializer.cpp | 3 +++ src/systeminfo.cpp | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/src/llm/language_model/continuous_batching/servable_initializer.cpp b/src/llm/language_model/continuous_batching/servable_initializer.cpp index 4aed056ca0..d5cc5f4cff 100644 --- a/src/llm/language_model/continuous_batching/servable_initializer.cpp +++ b/src/llm/language_model/continuous_batching/servable_initializer.cpp @@ -217,7 +217,10 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr(Config::instance().restWorkers()), static_cast(getCoreCount())); + tokenProperties[ov::num_streams.name()] = static_cast(tokenizerNumStreams); tokenProperties[ov::hint::performance_mode.name()] = ov::hint::PerformanceMode::THROUGHPUT; + SPDLOG_DEBUG("Setting tokenizer/detokenizer NUM_STREAMS to: {}", tokenizerNumStreams); status = applyDefaultCpuProperties(tokenProperties); if (!status.ok()) { SPDLOG_ERROR("Failed to apply default CPU properties for tokenizer: {}", status.string()); diff --git a/src/systeminfo.cpp b/src/systeminfo.cpp index d7abe5335b..b1501c428f 100644 --- a/src/systeminfo.cpp +++ b/src/systeminfo.cpp @@ -65,6 +65,11 @@ bool isRunningInDocker() { if (dockerenv.good()) { return true; } + // Check for /run/.containerenv file + std::ifstream containerenv("/run/.containerenv"); + if (containerenv.good()) { + return true; + } // Check /proc/self/cgroup for docker references std::ifstream cgroup("/proc/self/cgroup"); @@ -74,6 +79,9 @@ bool isRunningInDocker() { if (line.find("docker") != std::string::npos) { return true; } + if (line.find("kubepods") != std::string::npos) { + return true; + } } } From 605206bfb2cc3f5327238c9fa7f04a3ab9e5df16 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Mon, 11 May 2026 15:32:09 +0200 Subject: [PATCH 21/21] fix windows compilation --- src/sidepacket_servable.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sidepacket_servable.cpp b/src/sidepacket_servable.cpp index f8e7228bbf..18778f77d1 100644 --- a/src/sidepacket_servable.cpp +++ b/src/sidepacket_servable.cpp @@ -136,9 +136,9 @@ void SidepacketServable::initialize(const std::string& modelDir, const std::stri std::shared_ptr m_model = core.read_model(parsedModelsPath / std::filesystem::path("openvino_model.xml"), {}, properties); m_model = this->applyPrePostProcessing(core, m_model, properties); if (targetDevice == "CPU") { - auto status = applyDefaultCpuProperties(properties); - if (!status.ok()) { - SPDLOG_ERROR("Failed to apply default CPU properties for embeddings model: {}", status.string()); + auto cpuPropertiesStatus = applyDefaultCpuProperties(properties); + if (!cpuPropertiesStatus.ok()) { + SPDLOG_ERROR("Failed to apply default CPU properties for embeddings model: {}", cpuPropertiesStatus.string()); return; } }