diff --git a/.gitignore b/.gitignore index 6e91b6c47f..c404eb8cb8 100644 --- a/.gitignore +++ b/.gitignore @@ -39,4 +39,4 @@ tmp/ *.tar.gz models genhtml -.github/skills/ \ No newline at end of file +.github/skills/ diff --git a/src/modelinstance.cpp b/src/modelinstance.cpp index 285230b193..8df4922dcc 100644 --- a/src/modelinstance.cpp +++ b/src/modelinstance.cpp @@ -16,6 +16,7 @@ #include "modelinstance.hpp" #include +#include #include #include #include @@ -82,6 +83,109 @@ enum : unsigned int { POSTPROCESS, TIMER_END }; + +enum class PluginConfigValueType { + INT64, + BOOL, +}; + +struct PluginConfigNormalizationRule { + const char* key; + PluginConfigValueType valueType; +}; + +const std::array TYPED_PLUGIN_CONFIG_NORMALIZATION_RULES{{ + {"NUM_STREAMS", PluginConfigValueType::INT64}, + {"INFERENCE_NUM_THREADS", PluginConfigValueType::INT64}, + {"AUTO_BATCH_TIMEOUT", PluginConfigValueType::INT64}, + {"ENABLE_CPU_PINNING", PluginConfigValueType::BOOL}, +}}; + +std::string pluginConfigValueToString(const ov::Any& value) { + if (value.is()) { + return value.as(); + } + if (value.is()) { + return value.as(); + } + if (value.is()) { + return std::to_string(value.as()); + } + if (value.is()) { + return std::to_string(value.as()); + } + if (value.is()) { + return std::to_string(value.as()); + } + if (value.is()) { + return std::to_string(value.as()); + } + if (value.is()) { + return value.as() ? "true" : "false"; + } + return ""; +} + +bool normalizePluginConfigValue(ov::Any& value, const PluginConfigNormalizationRule& rule) { + if (!value.is()) { + return false; + } + + const auto& stringValue = value.as(); + switch (rule.valueType) { + case PluginConfigValueType::INT64: { + auto parsedValue = ovms::stoi64(stringValue); + if (!parsedValue.has_value()) { + return false; + } + value = parsedValue.value(); + return true; + } + case PluginConfigValueType::BOOL: { + auto normalized = ovms::toLower(stringValue); + if (normalized == "true") { + value = true; + return true; + } + if (normalized == "false") { + value = false; + return true; + } + return false; + } + } + return false; +} + +template +void normalizeTypedPluginConfigValues(MapType& pluginConfig) { + for (const auto& rule : TYPED_PLUGIN_CONFIG_NORMALIZATION_RULES) { + auto it = pluginConfig.find(rule.key); + if (it == pluginConfig.end()) { + continue; + } + if (!normalizePluginConfigValue(it->second, rule) && it->second.template is()) { + SPDLOG_LOGGER_DEBUG(ovms::modelmanager_logger, "Keeping plugin config key: {} as string value: {}", rule.key, it->second.template as()); + } + } + + auto devicePropertiesIt = pluginConfig.find("DEVICE_PROPERTIES"); + if (devicePropertiesIt == pluginConfig.end() || !devicePropertiesIt->second.template is()) { + return; + } + + ov::AnyMap deviceProperties = devicePropertiesIt->second.template as(); + for (auto& devicePropertiesEntry : deviceProperties) { + auto& devicePropertiesAny = devicePropertiesEntry.second; + if (!devicePropertiesAny.is()) { + continue; + } + ov::AnyMap nestedDeviceProperties = devicePropertiesAny.as(); + normalizeTypedPluginConfigValues(nestedDeviceProperties); + devicePropertiesAny = nestedDeviceProperties; + } + devicePropertiesIt->second = deviceProperties; +} } // namespace namespace ov { @@ -910,6 +1014,7 @@ void ModelInstance::loadCompiledModelPtr(const plugin_config_t& pluginConfig) { plugin_config_t ModelInstance::prepareDefaultPluginConfig(const ModelConfig& config) { plugin_config_t pluginConfig = config.getPluginConfig(); + normalizeTypedPluginConfigValues(pluginConfig); if ((pluginConfig.count("NUM_STREAMS") == 1) || (pluginConfig.count("PERFORMANCE_HINT") == 1)) { return pluginConfig; } else { @@ -958,7 +1063,7 @@ Status ModelInstance::loadOVCompiledModel(const ModelConfig& config) { for (const auto& pair : pluginConfig) { const auto& key = pair.first; const auto& value = pair.second; - SPDLOG_LOGGER_INFO(modelmanager_logger, "OVMS set plugin settings key: {}; value: {};", key, value.as()); + SPDLOG_LOGGER_INFO(modelmanager_logger, "OVMS set plugin settings key: {}; value: {};", key, pluginConfigValueToString(value)); } logOVPluginConfig([this](const std::string& key) { OV_LOGGER("ov::CompiledModel:{} get_property({})", reinterpret_cast(this->model.get()), key); diff --git a/src/test/modelinstance_test.cpp b/src/test/modelinstance_test.cpp index c6418375bb..fa296683e2 100644 --- a/src/test/modelinstance_test.cpp +++ b/src/test/modelinstance_test.cpp @@ -1216,6 +1216,60 @@ TEST(CpuThroughputStreamsNotSpecified, NotSetWhenPerfHintSpecified) { EXPECT_EQ(pluginConfig.count("CPU_THROUGHPUT_STREAMS"), 0); } +TEST(PluginConfigNormalization, ConvertsKnownTopLevelStringValuesToTypedValues) { + ovms::ModelConfig config; + config.setPluginConfig({{"NUM_STREAMS", "4"}, + {"INFERENCE_NUM_THREADS", "2"}, + {"AUTO_BATCH_TIMEOUT", "7"}, + {"ENABLE_CPU_PINNING", "false"}, + {"PERFORMANCE_HINT", "LATENCY"}}); + + ovms::plugin_config_t pluginConfig = ovms::ModelInstance::prepareDefaultPluginConfig(config); + + ASSERT_EQ(pluginConfig.count("NUM_STREAMS"), 1); + ASSERT_TRUE(pluginConfig.at("NUM_STREAMS").is()); + EXPECT_EQ(pluginConfig.at("NUM_STREAMS").as(), 4); + ASSERT_TRUE(pluginConfig.at("INFERENCE_NUM_THREADS").is()); + EXPECT_EQ(pluginConfig.at("INFERENCE_NUM_THREADS").as(), 2); + ASSERT_TRUE(pluginConfig.at("AUTO_BATCH_TIMEOUT").is()); + EXPECT_EQ(pluginConfig.at("AUTO_BATCH_TIMEOUT").as(), 7); + ASSERT_TRUE(pluginConfig.at("ENABLE_CPU_PINNING").is()); + EXPECT_FALSE(pluginConfig.at("ENABLE_CPU_PINNING").as()); + ASSERT_TRUE(pluginConfig.at("PERFORMANCE_HINT").is()); + EXPECT_EQ(pluginConfig.at("PERFORMANCE_HINT").as(), "LATENCY"); +} + +TEST(PluginConfigNormalization, ConvertsKnownNestedDevicePropertiesStringValuesToTypedValues) { + ovms::ModelConfig config; + ov::AnyMap cpuProperties; + cpuProperties["NUM_STREAMS"] = std::string("3"); + cpuProperties["INFERENCE_NUM_THREADS"] = std::string("5"); + cpuProperties["ENABLE_CPU_PINNING"] = std::string("true"); + cpuProperties["PERFORMANCE_HINT"] = std::string("LATENCY"); + + ov::AnyMap deviceProperties; + deviceProperties["CPU"] = cpuProperties; + + ovms::plugin_config_t rawPluginConfig; + rawPluginConfig["DEVICE_PROPERTIES"] = deviceProperties; + config.setPluginConfig(rawPluginConfig); + + ovms::plugin_config_t pluginConfig = ovms::ModelInstance::prepareDefaultPluginConfig(config); + + ASSERT_TRUE(pluginConfig.at("DEVICE_PROPERTIES").is()); + auto normalizedDeviceProperties = pluginConfig.at("DEVICE_PROPERTIES").as(); + ASSERT_TRUE(normalizedDeviceProperties.at("CPU").is()); + auto normalizedCpuProperties = normalizedDeviceProperties.at("CPU").as(); + ASSERT_TRUE(normalizedCpuProperties.at("NUM_STREAMS").is()); + EXPECT_EQ(normalizedCpuProperties.at("NUM_STREAMS").as(), 3); + ASSERT_TRUE(normalizedCpuProperties.at("INFERENCE_NUM_THREADS").is()); + EXPECT_EQ(normalizedCpuProperties.at("INFERENCE_NUM_THREADS").as(), 5); + ASSERT_TRUE(normalizedCpuProperties.at("ENABLE_CPU_PINNING").is()); + EXPECT_TRUE(normalizedCpuProperties.at("ENABLE_CPU_PINNING").as()); + ASSERT_TRUE(normalizedCpuProperties.at("PERFORMANCE_HINT").is()); + EXPECT_EQ(normalizedCpuProperties.at("PERFORMANCE_HINT").as(), "LATENCY"); +} + TEST(TensorMap, TestProcessingHintFromShape) { auto servableInputs = ovms::tensor_map_t({ {"Input_FP32_1_224_224_3_NHWC", diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index e9044ad2cd..17ac5027d7 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -892,7 +892,27 @@ TEST_F(HfDownloadModelModule, TestInvalidProxyTimeout) { ConstructorEnabledConfig config; { EnvGuard eGuard; + // prepareLibgit2Opts() in hf_pull_model_module.cpp only applies the + // GIT_OPT_SET_SERVER_CONNECT_TIMEOUT option when https_proxy is empty, + // so we always clear https_proxy here. + // + // To make the timeout actually fire we need the destination to be + // unreachable. The behavior depends on the host's network setup: + // * Host originally used a proxy (https_proxy was set in the + // environment): the host is on a proxy-only network where a + // direct connection to huggingface.co will hang and hit the + // timeout. Keep the default HF_ENDPOINT. + // * Host has no proxy configured (direct internet access): a direct + // connection to huggingface.co would succeed within the 1 s + // timeout and the assertion below would fail. Redirect the clone + // to an unroutable RFC 5737 TEST-NET-1 address so the connect + // must time out. + const char* hostHttpsProxy = std::getenv("https_proxy"); + const bool hostHadProxy = (hostHttpsProxy != nullptr) && (std::string(hostHttpsProxy) != ""); eGuard.set("https_proxy", ""); + if (!hostHadProxy) { + eGuard.set("HF_ENDPOINT", "https://192.0.2.1/"); + } const std::string timeoutConnectVal = "1000"; eGuard.set(ovms::HfPullModelModule::GIT_SERVER_CONNECT_TIMEOUT_ENV, timeoutConnectVal); config.parse(arg_count, const_cast(n_argv));