diff --git a/src/BUILD b/src/BUILD index 41f7ac8116..c7f9631bfd 100644 --- a/src/BUILD +++ b/src/BUILD @@ -643,6 +643,7 @@ ovms_cc_library( "//src/dags:pipelinedefinition", "//src/filesystem:libovmsfilesystem", "//src/filesystem:libovmsfilesystemfactory", + "//src/graph_export:graph_export", "//src/metrics:libovms_metric_provider", "//src/metrics:libovmsmetrics", "@com_github_tencent_rapidjson//:rapidjson", @@ -931,6 +932,7 @@ ovms_cc_library( "//src/kfserving_api:kfserving_api_cpp", "capimodule", "//src/pull_module:hf_pull_model_module", + "//src/graph_export:graph_export", "//src/servables_config_manager_module:servablesconfigmanagermodule", "predict_request_validation_utils", # to be removed when capi has its own lib and added there @atobisze "kfs_backend_impl", diff --git a/src/capi_frontend/server_settings.hpp b/src/capi_frontend/server_settings.hpp index 5b8a3dce54..db5bbe1597 100644 --- a/src/capi_frontend/server_settings.hpp +++ b/src/capi_frontend/server_settings.hpp @@ -90,6 +90,7 @@ enum OvmsServerMode : int { HF_PULL_AND_START_MODE, LIST_MODELS_MODE, MODIFY_CONFIG_MODE, + GENAI_CONFIGURE_AND_START, UNKNOWN_MODE }; @@ -171,7 +172,7 @@ struct HFSettingsImpl { std::string downloadPath = ""; bool overwriteModels = false; ModelDownlaodType downloadType = GIT_CLONE_DOWNLOAD; - GraphExportType task = TEXT_GENERATION_GRAPH; + GraphExportType task = UNKNOWN_GRAPH; std::variant graphSettings; }; diff --git a/src/cli_parser.cpp b/src/cli_parser.cpp index 9437fd9755..98134f3f4b 100644 --- a/src/cli_parser.cpp +++ b/src/cli_parser.cpp @@ -344,8 +344,8 @@ std::variant> CLIParser::parse(int argc, char* result = std::make_unique(options->parse(argc, argv)); - // HF pull mode or pull and start mode - if (isHFPullOrPullAndStart(this->result)) { + // HF pull mode or pull and start mode or starting from local folder with graph created in memory + if (isHFPullOrPullAndStart(this->result) || isGenAIConfigureAndStart(this->result)) { std::vector unmatchedOptions; GraphExportType task; if (result->count("task")) { @@ -692,13 +692,27 @@ void CLIParser::prepareModel(ModelsSettingsImpl& modelsSettings, HFSettingsImpl& } bool CLIParser::isHFPullOrPullAndStart(const std::unique_ptr& result) { + // Keep `--task` in the broad mutually exclusive task/pull CLI category so + // parse-time checks that rely on this helper continue to reject combining + // task-based flows with config-management modes. More specific mode + // differentiation is handled by isGenAIConfigureAndStart(). return (result->count("pull") || result->count("task")); } +bool CLIParser::isGenAIConfigureAndStart(const std::unique_ptr& result) { + return (result->count("task") && !result->count("source_model") && !result->count("pull")); +} + void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl& hfSettings, const std::string& modelName) { + // Always propagate source_model so validation can detect misuse + if (result->count("source_model")) { + hfSettings.sourceModel = result->operator[]("source_model").as(); + } // Ovms Pull models mode || pull and start models mode - if (isHFPullOrPullAndStart(this->result)) { - if (result->count("pull")) { + if (isHFPullOrPullAndStart(this->result) || isGenAIConfigureAndStart(this->result)) { + if (isGenAIConfigureAndStart(this->result)) { + serverSettings.serverMode = GENAI_CONFIGURE_AND_START; + } else if (result->count("pull")) { serverSettings.serverMode = HF_PULL_MODE; } else { serverSettings.serverMode = HF_PULL_AND_START_MODE; @@ -711,8 +725,11 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl& hfSettings.overwriteModels = result->operator[]("overwrite_models").as(); } if (result->count("source_model")) { + // Already set above, but keep the original flow for downloadType logic hfSettings.sourceModel = result->operator[]("source_model").as(); - } else if (result->count("model_name")) { + } else if (result->count("model_name") && !result->count("model_path")) { + // Only use model_name as source_model when model_path is not set + // (when model_path is set, user wants to use local model without HF pull) hfSettings.sourceModel = result->operator[]("model_name").as(); } if ((result->count("weight-format") || result->count("extra_quantization_params")) && isOptimumCliDownload(hfSettings.sourceModel, hfSettings.ggufFilename)) { @@ -732,6 +749,11 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl& if (result->count("vocoder")) hfSettings.exportSettings.vocoder = result->operator[]("vocoder").as(); hfSettings.downloadPath = result->operator[]("model_repository_path").as(); + // When --task is used with --model_path but without --pull/--source_model, + // use model_path as the model location (no HF download needed) + if (!result->count("pull") && !result->count("source_model") && result->count("model_path")) { + hfSettings.exportSettings.modelPath = result->operator[]("model_path").as(); + } if (result->count("task")) { hfSettings.task = stringToEnum(result->operator[]("task").as()); switch (hfSettings.task) { @@ -798,7 +820,8 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl& if (!serverSettings.cacheDir.empty()) { hfSettings.exportSettings.pluginConfig.cacheDir = serverSettings.cacheDir; } - // No pull nor pull and start mode + + // No pull nor pull and start mode and no start with local model_path } else { if (result->count("weight-format")) { throw std::logic_error("--weight-format parameter unsupported for Openvino huggingface organization models."); @@ -844,7 +867,10 @@ void CLIParser::prepareGraphStart(HFSettingsImpl& hfSettings, ModelsSettingsImpl modelsSettings.modelName = hfSettings.sourceModel; } - modelsSettings.modelPath = FileSystem::joinPath({hfSettings.downloadPath, hfSettings.sourceModel}); + // Only override modelPath if it wasn't already set via --model_path + if (!result->count("model_path")) { + modelsSettings.modelPath = FileSystem::joinPath({hfSettings.downloadPath, hfSettings.sourceModel}); + } } void CLIParser::prepare(ServerSettingsImpl* serverSettings, ModelsSettingsImpl* modelsSettings) { diff --git a/src/cli_parser.hpp b/src/cli_parser.hpp index f7a1dd0df9..ba2ddd81e4 100644 --- a/src/cli_parser.hpp +++ b/src/cli_parser.hpp @@ -51,6 +51,7 @@ class CLIParser { void prepareGraphStart(HFSettingsImpl& hfSettings, ModelsSettingsImpl& modelsSettings); void prepareConfigExport(ModelsSettingsImpl& modelsSettings); bool isHFPullOrPullAndStart(const std::unique_ptr& result); + bool isGenAIConfigureAndStart(const std::unique_ptr& result); }; } // namespace ovms diff --git a/src/config.cpp b/src/config.cpp index 3222e775b1..748418c33e 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -148,18 +148,22 @@ bool Config::validateUserSettingsInConfigAddRemoveModel(const ModelsSettingsImpl } bool Config::validate() { - if (this->serverSettings.serverMode == HF_PULL_MODE || this->serverSettings.serverMode == HF_PULL_AND_START_MODE) { - if (!serverSettings.hfSettings.sourceModel.size()) { - std::cerr << "source_model parameter is required for pull mode"; - return false; - } - if (!serverSettings.hfSettings.downloadPath.size()) { - std::cerr << "model_repository_path parameter is required for pull mode"; - return false; - } - if (this->serverSettings.hfSettings.task == UNKNOWN_GRAPH) { - std::cerr << "Error: --task parameter not set." << std::endl; - return false; + if (!this->serverSettings.hfSettings.sourceModel.empty() && this->serverSettings.hfSettings.task == UNKNOWN_GRAPH) { + std::cerr << "--source_model should be used combined with --task" << std::endl; + return false; + } + if (this->serverSettings.serverMode == HF_PULL_MODE || this->serverSettings.serverMode == HF_PULL_AND_START_MODE || this->serverSettings.serverMode == GENAI_CONFIGURE_AND_START) { + // When --task is used with --model_path (no HF pulling), sourceModel and downloadPath are not required + bool taskWithModelPath = (this->serverSettings.serverMode == HF_PULL_AND_START_MODE || this->serverSettings.serverMode == GENAI_CONFIGURE_AND_START) && !this->modelsSettings.modelPath.empty(); + if (!taskWithModelPath) { + if (!serverSettings.hfSettings.sourceModel.size()) { + std::cerr << "source_model parameter is required for pull mode"; + return false; + } + if (!serverSettings.hfSettings.downloadPath.size()) { + std::cerr << "model_repository_path parameter is required for pull mode"; + return false; + } } if (this->serverSettings.hfSettings.task == TEXT_GENERATION_GRAPH) { if (!std::holds_alternative(this->serverSettings.hfSettings.graphSettings)) { diff --git a/src/graph_export/graph_export.cpp b/src/graph_export/graph_export.cpp index ee273dbb0c..df59dd33bc 100644 --- a/src/graph_export/graph_export.cpp +++ b/src/graph_export/graph_export.cpp @@ -52,6 +52,20 @@ #endif namespace ovms { +static std::string inMemoryGraphContent; + +bool GraphExport::hasInMemoryGraphContent() { + return !inMemoryGraphContent.empty(); +} + +const std::string& GraphExport::getInMemoryGraphContent() { + return inMemoryGraphContent; +} + +void GraphExport::clearInMemoryGraphContent() { + inMemoryGraphContent.clear(); +} + static const std::string OVMS_VERSION_GRAPH_LINE = std::string("# File created with: ") + PROJECT_NAME + std::string(" ") + PROJECT_VERSION + std::string("\n"); static std::string constructModelsPath(const std::string& modelPath, const std::optional& ggufFilenameOpt) { @@ -91,22 +105,26 @@ std::string GraphExport::getDraftModelDirectoryPath(const std::string& directory } \ auto pluginConfigOpt = std::get>(pluginConfigOrStatus) -static Status createPbtxtFile(const std::string& directoryPath, const std::string& pbtxtContent) { +static Status createPbtxtFile(const std::string& directoryPath, const std::string& pbtxtContent, bool writeToFile) { #if (MEDIAPIPE_DISABLE == 0) ::mediapipe::CalculatorGraphConfig config; - SPDLOG_TRACE("Generated pbtxt: {}", pbtxtContent); + SPDLOG_TRACE("Created graph config file:\n{}", pbtxtContent); bool success = ::google::protobuf::TextFormat::ParseFromString(pbtxtContent, &config); if (!success) { SPDLOG_ERROR("Created graph config file couldn't be parsed - check used task parameters values."); return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; } #endif + if (!writeToFile) { + inMemoryGraphContent = pbtxtContent; + return StatusCode::OK; + } // clang-format on std::string fullPath = FileSystem::joinPath({directoryPath, "graph.pbtxt"}); return FileSystem::createFileOverwrite(fullPath, pbtxtContent); } -static Status createTextGenerationGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { +static Status createTextGenerationGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { if (!std::holds_alternative(hfSettings.graphSettings)) { SPDLOG_ERROR("Graph options not initialized for text generation."); return StatusCode::INTERNAL_ERROR; @@ -198,10 +216,10 @@ static Status createTextGenerationGraphTemplate(const std::string& directoryPath } } })"; - return createPbtxtFile(directoryPath, oss.str()); + return createPbtxtFile(directoryPath, oss.str(), writeToFile); } -static Status createRerankGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { +static Status createRerankGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { if (!std::holds_alternative(hfSettings.graphSettings)) { SPDLOG_ERROR("Graph options not initialized for reranking."); return StatusCode::INTERNAL_ERROR; @@ -242,10 +260,10 @@ node { } } })"; - return createPbtxtFile(directoryPath, oss.str()); + return createPbtxtFile(directoryPath, oss.str(), writeToFile); } -static Status createEmbeddingsGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { +static Status createEmbeddingsGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { if (!std::holds_alternative(hfSettings.graphSettings)) { SPDLOG_ERROR("Graph options not initialized for embeddings."); return StatusCode::INTERNAL_ERROR; @@ -289,10 +307,10 @@ node { oss << R"(} } })"; - return createPbtxtFile(directoryPath, oss.str()); + return createPbtxtFile(directoryPath, oss.str(), writeToFile); } -static Status createTextToSpeechGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { +static Status createTextToSpeechGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { if (!std::holds_alternative(hfSettings.graphSettings)) { SPDLOG_ERROR("Graph options not initialized for speech generation."); return StatusCode::INTERNAL_ERROR; @@ -339,11 +357,15 @@ node { } #endif // clang-format on + if (!writeToFile) { + inMemoryGraphContent = oss.str(); + return StatusCode::OK; + } std::string fullPath = FileSystem::joinPath({directoryPath, "graph.pbtxt"}); return FileSystem::createFileOverwrite(fullPath, oss.str()); } -static Status createSpeechToTextGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { +static Status createSpeechToTextGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { if (!std::holds_alternative(hfSettings.graphSettings)) { SPDLOG_ERROR("Graph options not initialized for speech to text."); return StatusCode::INTERNAL_ERROR; @@ -405,11 +427,15 @@ node { } #endif // clang-format on + if (!writeToFile) { + inMemoryGraphContent = oss.str(); + return StatusCode::OK; + } std::string fullPath = FileSystem::joinPath({directoryPath, "graph.pbtxt"}); return FileSystem::createFileOverwrite(fullPath, oss.str()); } -static Status createImageGenerationGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { +static Status createImageGenerationGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { if (!std::holds_alternative(hfSettings.graphSettings)) { SPDLOG_ERROR("Graph options not initialized for image generation."); return StatusCode::INTERNAL_ERROR; @@ -489,13 +515,13 @@ node: { } )"; // clang-format on - return createPbtxtFile(directoryPath, oss.str()); + return createPbtxtFile(directoryPath, oss.str(), writeToFile); } GraphExport::GraphExport() { } -Status GraphExport::createServableConfig(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { +Status GraphExport::createServableConfig(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { if (directoryPath.empty()) { SPDLOG_ERROR("Directory path empty: {}", directoryPath); return StatusCode::PATH_INVALID; @@ -518,17 +544,17 @@ Status GraphExport::createServableConfig(const std::string& directoryPath, const } } if (hfSettings.task == TEXT_GENERATION_GRAPH) { - return createTextGenerationGraphTemplate(directoryPath, hfSettings); + return createTextGenerationGraphTemplate(directoryPath, hfSettings, writeToFile); } else if (hfSettings.task == EMBEDDINGS_GRAPH) { - return createEmbeddingsGraphTemplate(directoryPath, hfSettings); + return createEmbeddingsGraphTemplate(directoryPath, hfSettings, writeToFile); } else if (hfSettings.task == RERANK_GRAPH) { - return createRerankGraphTemplate(directoryPath, hfSettings); + return createRerankGraphTemplate(directoryPath, hfSettings, writeToFile); } else if (hfSettings.task == IMAGE_GENERATION_GRAPH) { - return createImageGenerationGraphTemplate(directoryPath, hfSettings); + return createImageGenerationGraphTemplate(directoryPath, hfSettings, writeToFile); } else if (hfSettings.task == TEXT_TO_SPEECH_GRAPH) { - return createTextToSpeechGraphTemplate(directoryPath, hfSettings); + return createTextToSpeechGraphTemplate(directoryPath, hfSettings, writeToFile); } else if (hfSettings.task == SPEECH_TO_TEXT_GRAPH) { - return createSpeechToTextGraphTemplate(directoryPath, hfSettings); + return createSpeechToTextGraphTemplate(directoryPath, hfSettings, writeToFile); } else if (hfSettings.task == UNKNOWN_GRAPH) { SPDLOG_ERROR("Graph options not initialized."); return StatusCode::INTERNAL_ERROR; diff --git a/src/graph_export/graph_export.hpp b/src/graph_export/graph_export.hpp index e6f9fdcbef..4b44193610 100644 --- a/src/graph_export/graph_export.hpp +++ b/src/graph_export/graph_export.hpp @@ -27,9 +27,13 @@ class Status; class GraphExport { public: GraphExport(); - Status createServableConfig(const std::string& directoryPath, const HFSettingsImpl& graphSettings); + Status createServableConfig(const std::string& directoryPath, const HFSettingsImpl& graphSettings, bool writeToFile = true); static std::variant, Status> createPluginString(const ExportSettings& exportSettings); static std::string getDraftModelDirectoryName(std::string draftModel); static std::string getDraftModelDirectoryPath(const std::string& directoryPath, const std::string& draftModel); + + static bool hasInMemoryGraphContent(); + static const std::string& getInMemoryGraphContent(); + static void clearInMemoryGraphContent(); }; } // namespace ovms diff --git a/src/mediapipe_internal/BUILD b/src/mediapipe_internal/BUILD index a01a96b807..35456ec281 100644 --- a/src/mediapipe_internal/BUILD +++ b/src/mediapipe_internal/BUILD @@ -79,6 +79,7 @@ ovms_cc_library( "//src:libovms_servable_name_checker", "//src/metrics:libovms_metric_provider", "//src/filesystem:libovmsfilesystem", + "//src/graph_export:graph_export", "//src:libovms_version", "//src:libovms_execution_context", "//src:libovmstimer", diff --git a/src/mediapipe_internal/mediapipegraphconfig.cpp b/src/mediapipe_internal/mediapipegraphconfig.cpp index d767942535..200de9c289 100644 --- a/src/mediapipe_internal/mediapipegraphconfig.cpp +++ b/src/mediapipe_internal/mediapipegraphconfig.cpp @@ -25,6 +25,7 @@ #include #include "src/filesystem/filesystem.hpp" +#include "src/graph_export/graph_export.hpp" #include "../status.hpp" namespace ovms { @@ -129,6 +130,10 @@ Status MediapipeGraphConfig::parseNode(const rapidjson::Value& v) { } void MediapipeGraphConfig::logGraphConfigContent() const { + if (GraphExport::hasInMemoryGraphContent()) { + SPDLOG_DEBUG("Content of in-memory graph config:\n{}", GraphExport::getInMemoryGraphContent()); + return; + } std::ifstream fileStream(this->graphPath); if (!fileStream.is_open()) { SPDLOG_ERROR("Failed to open file: {}", this->graphPath); diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp index d26806edab..06e9552f4b 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.cpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp @@ -25,7 +25,9 @@ #include #include "../execution_context.hpp" +#include "../config.hpp" #include "src/filesystem/filesystem.hpp" +#include "src/graph_export/graph_export.hpp" #include "src/metrics/metric.hpp" #include "../model_metric_reporter.hpp" #include "../ov_utils.hpp" @@ -60,6 +62,13 @@ const tensor_map_t MediapipeGraphDefinition::getOutputsInfo() const { } Status MediapipeGraphDefinition::validateForConfigFileExistence() { + if (GraphExport::hasInMemoryGraphContent() && ovms::Config::instance().getServerSettings().serverMode == GENAI_CONFIGURE_AND_START) { + const std::string& content = GraphExport::getInMemoryGraphContent(); + this->chosenConfig = content; + this->mgconfig.setCurrentGraphPbTxtMD5(ovms::FileSystem::getStringMD5(content)); + SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Using in-memory graph content for mediapipe graph definition: {}", this->getName()); + return StatusCode::OK; + } std::ifstream ifs(this->mgconfig.getGraphPath()); if (!ifs.is_open()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to open mediapipe graph definition: {}, file: {}\n", this->getName(), this->mgconfig.getGraphPath()); diff --git a/src/modelmanager.cpp b/src/modelmanager.cpp index 67a0a38399..fd14f267d1 100644 --- a/src/modelmanager.cpp +++ b/src/modelmanager.cpp @@ -55,6 +55,7 @@ #include "dags/pipelinedefinition.hpp" #include "filesystem/filesystem.hpp" #include "filesystem/filesystemfactory.hpp" +#include "graph_export/graph_export.hpp" #include "logging.hpp" #if (MEDIAPIPE_DISABLE == 0) #include "mediapipe_internal/mediapipefactory.hpp" @@ -228,7 +229,8 @@ Status ModelManager::startFromConfig() { std::vector mediapipesInConfigFile; std::ifstream ifs(mpConfig.getGraphPath()); - if (ifs.is_open()) { + bool graphAvailable = ifs.is_open() || (GraphExport::hasInMemoryGraphContent() && config.getServerSettings().serverMode == GENAI_CONFIGURE_AND_START); + if (graphAvailable) { // Single model with graph.pbtxt, check if user passed model unsupported model parameters in cmd arguments status = ModelManager::validateUserSettingsInSingleModelCliGraphStart(config.getModelSettings()); if (!status.ok()) @@ -405,10 +407,13 @@ bool ModelManager::CheckStartFromGraph(std::string inputPath, MediapipeGraphConf if (ifs.is_open()) { SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Graph: {} path: {} exists", mpConfig.getGraphName(), mpConfig.getGraphPath()); return true; - } else { - SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Graph: {} path: {} does not exist", mpConfig.getGraphName(), mpConfig.getGraphPath()); - return false; } + if (GraphExport::hasInMemoryGraphContent() && Config::instance().getServerSettings().serverMode == GENAI_CONFIGURE_AND_START) { + SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Graph: {} using in-memory graph content", mpConfig.getGraphName()); + return true; + } + SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Graph: {} path: {} does not exist", mpConfig.getGraphName(), mpConfig.getGraphPath()); + return false; } Status ModelManager::validateUserSettingsInSingleModelCliGraphStart(const ModelsSettingsImpl& modelsSettings) { diff --git a/src/pull_module/hf_pull_model_module.cpp b/src/pull_module/hf_pull_model_module.cpp index b73cad6638..24b39686cc 100644 --- a/src/pull_module/hf_pull_model_module.cpp +++ b/src/pull_module/hf_pull_model_module.cpp @@ -151,12 +151,11 @@ Status HfPullModelModule::clone() const { } GraphExport graphExporter; - status = graphExporter.createServableConfig(graphDirectory, this->hfSettings); + status = graphExporter.createServableConfig(graphDirectory, this->hfSettings, true); // when downloading from HF we always create config file, but when using local model with --task we create config in memory without writing to file if (!status.ok()) { return status; } std::cout << "Graph: graph.pbtxt created in: " << graphDirectory << std::endl; - return StatusCode::OK; } diff --git a/src/server.cpp b/src/server.cpp index bdf572f76c..25243f352b 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -53,6 +53,7 @@ #include "capi_frontend/server_settings.hpp" #include "cli_parser.hpp" #include "config.hpp" +#include "graph_export/graph_export.hpp" #include "grpcservermodule.hpp" #include "http_server.hpp" #include "httpservermodule.hpp" @@ -419,9 +420,20 @@ Status Server::startModules(ovms::Config& config) { } auto hfModule = dynamic_cast(it->second.get()); status = hfModule->clone(); + // Return only on clone error; otherwise start the rest of modules + if (!status.ok()) + return status; + } + if (config.getServerSettings().serverMode == GENAI_CONFIGURE_AND_START) { + // --task with --model_path: create graph in memory without HF download + GraphExport graphExporter; + const auto& hfSettings = config.getServerSettings().hfSettings; + status = graphExporter.createServableConfig(config.modelPath(), hfSettings, false); if (!status.ok()) { + SPDLOG_ERROR("Failed to create in-memory graph config: {}", status.string()); return status; } + SPDLOG_INFO("Graph config created in memory from model_path: {}", config.modelPath()); } GET_MODULE(SERVABLE_MANAGER_MODULE_NAME, it); START_MODULE(it); @@ -488,6 +500,7 @@ void Server::shutdownModules() { ensureModuleShutdown(PYTHON_INTERPRETER_MODULE_NAME); } #endif + GraphExport::clearInMemoryGraphContent(); // we need to be able to quickly start grpc or start it without port // this is because the OS can have a delay between freeing up port before it can be requested and used again std::shared_lock lock(modulesMtx); @@ -533,7 +546,7 @@ int Server::startServerFromSettings(ServerSettingsImpl& serverSettings, ModelsSe return statusToExitCode(ret); } while (!getShutdownStatus() && - (serverSettings.serverMode == HF_PULL_AND_START_MODE || serverSettings.serverMode == SERVING_MODELS_MODE)) { + (serverSettings.serverMode == HF_PULL_AND_START_MODE || serverSettings.serverMode == SERVING_MODELS_MODE || serverSettings.serverMode == GENAI_CONFIGURE_AND_START)) { std::this_thread::sleep_for(std::chrono::milliseconds(200)); } } catch (const std::exception& e) { diff --git a/src/test/graph_export_test.cpp b/src/test/graph_export_test.cpp index 70961e3493..2c31388542 100644 --- a/src/test/graph_export_test.cpp +++ b/src/test/graph_export_test.cpp @@ -551,6 +551,7 @@ class GraphCreationTest : public TestWithTempDir { TEST_F(GraphCreationTest, positiveDefaultWithVersionString) { ovms::HFSettingsImpl hfSettings; + hfSettings.task = ovms::TEXT_GENERATION_GRAPH; std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; std::unique_ptr graphExporter = std::make_unique(); auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); @@ -640,6 +641,7 @@ TEST_F(GraphCreationTest, positiveImageGenWithVersionString) { TEST_F(GraphCreationTest, positiveDefault) { ovms::HFSettingsImpl hfSettings; + hfSettings.task = ovms::TEXT_GENERATION_GRAPH; std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; std::unique_ptr graphExporter = std::make_unique(); auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); @@ -651,6 +653,7 @@ TEST_F(GraphCreationTest, positiveDefault) { TEST_F(GraphCreationTest, positiveDraftAndFuse) { ovms::HFSettingsImpl hfSettings; + hfSettings.task = ovms::TEXT_GENERATION_GRAPH; ovms::TextGenGraphSettingsImpl graphSettings; graphSettings.draftModelDirName = "/ovms/src/test/llm_testing/facebook/opt-125m"; graphSettings.dynamicSplitFuse = "false"; @@ -668,6 +671,7 @@ TEST_F(GraphCreationTest, positiveDraftAndFuse) { TEST_F(GraphCreationTest, positiveGGUF) { this->filesToPrintInCaseOfFailure.emplace_back("graph.pbtxt"); ovms::HFSettingsImpl hfSettings; + hfSettings.task = ovms::TEXT_GENERATION_GRAPH; hfSettings.ggufFilename = "PRETTY_GOOD_GGUF_MODEL.gguf"; std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; std::unique_ptr graphExporter = std::make_unique(); @@ -681,6 +685,7 @@ TEST_F(GraphCreationTest, positiveGGUF) { TEST_F(GraphCreationTest, WillOverwriteExistingGraphPbtxtGGUF) { this->filesToPrintInCaseOfFailure.emplace_back("graph.pbtxt"); ovms::HFSettingsImpl hfSettings; + hfSettings.task = ovms::TEXT_GENERATION_GRAPH; std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; std::unique_ptr graphExporter = std::make_unique(); @@ -931,6 +936,7 @@ TEST_F(GraphCreationTest, speechToTextCreatedPbtxtInvalid) { TEST_F(GraphCreationTest, positivePluginConfigAll) { ovms::HFSettingsImpl hfSettings; + hfSettings.task = ovms::TEXT_GENERATION_GRAPH; ovms::TextGenGraphSettingsImpl graphSettings; hfSettings.exportSettings.pluginConfig.kvCachePrecision = "u8"; hfSettings.exportSettings.pluginConfig.maxPromptLength = 123; @@ -949,6 +955,7 @@ TEST_F(GraphCreationTest, positivePluginConfigAll) { TEST_F(GraphCreationTest, positiveWithParsersAndToolGuidedGeneration) { ovms::HFSettingsImpl hfSettings; + hfSettings.task = ovms::TEXT_GENERATION_GRAPH; ovms::TextGenGraphSettingsImpl graphSettings; graphSettings.reasoningParser = "REASONING_PARSER"; graphSettings.toolParser = "TOOL_PARSER"; @@ -967,6 +974,7 @@ TEST_F(GraphCreationTest, positiveWithParsersAndToolGuidedGeneration) { TEST_F(GraphCreationTest, positivePluginConfigOne) { ovms::HFSettingsImpl hfSettings; + hfSettings.task = ovms::TEXT_GENERATION_GRAPH; ovms::TextGenGraphSettingsImpl graphSettings; hfSettings.exportSettings.pluginConfig.kvCachePrecision = "u8"; hfSettings.graphSettings = std::move(graphSettings); diff --git a/src/test/llm/llmnode_test.cpp b/src/test/llm/llmnode_test.cpp index ad69a9692f..a7c8e3e975 100644 --- a/src/test/llm/llmnode_test.cpp +++ b/src/test/llm/llmnode_test.cpp @@ -46,6 +46,7 @@ #include "../../llm/text_utils.hpp" #include "../../ov_utils.hpp" #include "../../server.hpp" +#include "src/graph_export/graph_export.hpp" #include "rapidjson/document.h" #include "rapidjson/stringbuffer.h" #include "rapidjson/writer.h" @@ -4532,3 +4533,132 @@ TEST_F(IsolatedServableTests, PromtSizeBetweenDefaultAndNonDefaultMaxPromptLenNP } // TODO: Add missing tests for reading max prompt len property from configuration + +class LLMStartWithTaskParameter : public ::testing::Test { +protected: + static std::unique_ptr t; + std::string srcModelDir = getGenericFullPathForSrcTest("/ovms/src/test/llm_testing/HuggingFaceTB/SmolLM2-360M-Instruct"); +#ifdef __linux__ + std::string tempDir; + std::string modelDir; + std::string graphPath; +#else + std::string modelDir = srcModelDir; + std::string graphPath = modelDir + "/graph.pbtxt"; + std::string graphPathRenamed = modelDir + "/graph.pbtxt.bak"; +#endif + + void SetUp() override { + GraphExport::clearInMemoryGraphContent(); +#ifdef __linux__ + tempDir = std::filesystem::temp_directory_path().string() + "/LLMStartWithTaskParameter_" + ::testing::UnitTest::GetInstance()->current_test_info()->name(); + std::filesystem::remove_all(tempDir); + std::filesystem::copy(srcModelDir, tempDir, std::filesystem::copy_options::recursive); + modelDir = tempDir; + graphPath = modelDir + "/graph.pbtxt"; +#endif + } + void TearDown() override { + ovms::Server& server = ovms::Server::instance(); + server.setShutdownRequest(1); + if (t && t->joinable()) + t->join(); + server.setShutdownRequest(0); + GraphExport::clearInMemoryGraphContent(); +#ifdef __linux__ + std::filesystem::remove_all(tempDir); +#else + // Restore graph.pbtxt if it was renamed + if (std::filesystem::exists(graphPathRenamed)) { + if (std::filesystem::exists(graphPath)) { + std::filesystem::remove(graphPath); + } + std::filesystem::rename(graphPathRenamed, graphPath); + } +#endif + } +}; + +std::unique_ptr LLMStartWithTaskParameter::t = nullptr; + +TEST_F(LLMStartWithTaskParameter, StartWithModelPathAndTaskWithoutGraphFile) { +#ifdef __linux__ + // On Linux models are on readonly FS - we use a temp copy with graph.pbtxt removed + std::filesystem::remove(graphPath); +#else + // On Windows models are on RW FS - rename graph.pbtxt so we can check it's not recreated + if (std::filesystem::exists(graphPath)) { + std::filesystem::rename(graphPath, graphPathRenamed); + } +#endif + + std::string port = "9173"; + ovms::Server& server = ovms::Server::instance(); + ::SetUpServer(t, server, port, + modelDir.c_str(), + "SmolLM2", + 60, + "text_generation"); + ASSERT_EQ(server.getModuleState(ovms::SERVABLE_MANAGER_MODULE_NAME), ovms::ModuleState::INITIALIZED); + ASSERT_FALSE(std::filesystem::exists(graphPath)) << "graph.pbtxt should not be created when using --task with --model_path"; +} + +TEST_F(LLMStartWithTaskParameter, StartWithModelPathAndTaskDoesNotModifyExistingGraph) { + ASSERT_TRUE(std::filesystem::exists(graphPath)) << "graph.pbtxt must exist for this test"; + auto modTimeBefore = std::filesystem::last_write_time(graphPath); + + std::string port = "9174"; + ovms::Server& server = ovms::Server::instance(); + ::SetUpServer(t, server, port, + modelDir.c_str(), + "SmolLM2", + 60, + "text_generation"); + ASSERT_EQ(server.getModuleState(ovms::SERVABLE_MANAGER_MODULE_NAME), ovms::ModuleState::INITIALIZED); + + auto modTimeAfter = std::filesystem::last_write_time(graphPath); + ASSERT_EQ(modTimeBefore, modTimeAfter) << "graph.pbtxt should not be modified when using --task with --model_path"; +} + +TEST_F(LLMStartWithTaskParameter, StartWithModelPathAndTaskAndValidPipelineType) { + std::string port = "9175"; + ovms::Server& server = ovms::Server::instance(); + server.setShutdownRequest(0); + randomizeAndEnsureFree(port); + std::string fullModelPath = getGenericFullPathForSrcTest(modelDir.c_str()); + char* argv[] = {(char*)"ovms", + (char*)"--model_name", (char*)"SmolLM2", + (char*)"--model_path", (char*)fullModelPath.c_str(), + (char*)"--port", (char*)port.c_str(), + (char*)"--task", (char*)"text_generation", + (char*)"--pipeline_type", (char*)"LM_CB"}; + int argc = 11; + t.reset(new std::thread([&argc, &argv, &server]() { + EXPECT_EQ(EXIT_SUCCESS, server.start(argc, argv)); + })); + EnsureServerStartedWithTimeout(server, 60); + ASSERT_EQ(server.getModuleState(ovms::SERVABLE_MANAGER_MODULE_NAME), ovms::ModuleState::INITIALIZED); +} + +TEST_F(LLMStartWithTaskParameter, StartWithModelPathAndTaskAndInvalidPipelineType) { + std::string port = "9176"; + ovms::Server& server = ovms::Server::instance(); + server.setShutdownRequest(0); + randomizeAndEnsureFree(port); + std::string fullModelPath = getGenericFullPathForSrcTest(modelDir.c_str()); + char* argv[] = {(char*)"ovms", + (char*)"--model_name", (char*)"SmolLM2", + (char*)"--model_path", (char*)fullModelPath.c_str(), + (char*)"--port", (char*)port.c_str(), + (char*)"--task", (char*)"text_generation", + (char*)"--pipeline_type", (char*)"invalid"}; + int argc = 11; + t.reset(new std::thread([&argc, &argv, &server]() { + EXPECT_NE(EXIT_SUCCESS, server.start(argc, argv)); + })); + // Validation failure should complete quickly + if (t && t->joinable()) + t->join(); + ASSERT_NE(server.getModuleState(ovms::SERVABLE_MANAGER_MODULE_NAME), ovms::ModuleState::INITIALIZED) + << "Server should not start with invalid pipeline_type"; +} diff --git a/src/test/ovmsconfig_test.cpp b/src/test/ovmsconfig_test.cpp index 6b76d0e2fa..e7ad8d8175 100644 --- a/src/test/ovmsconfig_test.cpp +++ b/src/test/ovmsconfig_test.cpp @@ -786,6 +786,18 @@ TEST_F(OvmsConfigDeathTest, hfPullNoSourceModel) { EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "source_model parameter is required for pull mode"); } +TEST_F(OvmsConfigDeathTest, hfSourceModelWithoutTask) { + char* n_argv[] = { + "ovms", + "--source_model", + "some/model", + "--model_repository_path", + "/some/path", + }; + int arg_count = 5; + EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "--source_model should be used combined with --task"); +} + TEST_F(OvmsConfigDeathTest, hfPullNoRepositoryPath) { char* n_argv[] = { "ovms", diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index e9044ad2cd..726883112b 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -33,6 +33,7 @@ #include "src/test/test_with_temp_dir.hpp" #include "src/filesystem/filesystem.hpp" #include "src/pull_module/hf_pull_model_module.hpp" +#include "src/graph_export/graph_export.hpp" #include "src/pull_module/libgit2.hpp" #include "src/pull_module/optimum_export.hpp" #include "src/servables_config_manager_module/listmodels.hpp" @@ -357,7 +358,6 @@ TEST_F(HfDownloaderPullHfModel, PositiveDownloadAndStart) { ASSERT_EQ(std::filesystem::exists(graphPath), true) << graphPath; ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; } @@ -421,7 +421,6 @@ TEST_F(HfDownloaderPullHfModel, PositiveDownloadAndStartModelOutsideOvOrg) { ASSERT_EQ(std::filesystem::exists(modelPath), true) << modelPath; ASSERT_EQ(std::filesystem::exists(graphPath), true) << graphPath; std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; } @@ -1028,8 +1027,8 @@ TEST(ServerModulesBehaviorTests, PullAndStartModeErrorAndExpectFailAndCheckOther DefaultEmptyValuesConfig config; config.getServerSettings().serverMode = ovms::HF_PULL_AND_START_MODE; auto retCode = server.startModules(config); - // Empty config.getServerSettings().hfSettings.downloadPath - // [error][libit2.cpp:336] Libgit2 clone error: 6 message: cannot pick working directory for non-bare repository that isn't a '.git' directory + // Empty sourceModel: takes task+model_path path, but model_path is empty + // -> GraphExport::createServableConfig fails with PATH_INVALID EXPECT_TRUE(!retCode.ok()) << retCode.string(); serverGuard = std::make_unique(server); EXPECT_TRUE(server.getModule(ovms::HF_MODEL_PULL_MODULE_NAME) != nullptr); diff --git a/src/test/test_utils.cpp b/src/test/test_utils.cpp index 5c21e15158..a9e22659f9 100644 --- a/src/test/test_utils.cpp +++ b/src/test/test_utils.cpp @@ -875,11 +875,12 @@ void SetUpServer(std::unique_ptr& t, ovms::Server& server, std::str void SetUpServer(std::unique_ptr& t, ovms::Server& server, std::string& port, const char* modelPath, const char* modelName, int timeoutSeconds) { server.setShutdownRequest(0); randomizeAndEnsureFree(port); + std::string fullModelPath = getGenericFullPathForSrcTest(modelPath); char* argv[] = {(char*)"ovms", (char*)"--model_name", (char*)modelName, (char*)"--model_path", - (char*)getGenericFullPathForSrcTest(modelPath).c_str(), + (char*)fullModelPath.c_str(), (char*)"--port", (char*)port.c_str()}; int argc = 7; @@ -889,6 +890,26 @@ void SetUpServer(std::unique_ptr& t, ovms::Server& server, std::str EnsureServerStartedWithTimeout(server, timeoutSeconds); } +void SetUpServer(std::unique_ptr& t, ovms::Server& server, std::string& port, const char* modelPath, const char* modelName, int timeoutSeconds, const char* task) { + server.setShutdownRequest(0); + randomizeAndEnsureFree(port); + std::string fullModelPath = getGenericFullPathForSrcTest(modelPath); + char* argv[] = {(char*)"ovms", + (char*)"--model_name", + (char*)modelName, + (char*)"--model_path", + (char*)fullModelPath.c_str(), + (char*)"--port", + (char*)port.c_str(), + (char*)"--task", + (char*)task}; + int argc = 9; + t.reset(new std::thread([&argc, &argv, &server]() { + EXPECT_EQ(EXIT_SUCCESS, server.start(argc, argv)); + })); + EnsureServerStartedWithTimeout(server, timeoutSeconds); +} + std::shared_ptr createTensorInfoCopyWithPrecision(std::shared_ptr src, ovms::Precision newPrecision) { return std::make_shared( src->getName(), diff --git a/src/test/test_utils.hpp b/src/test/test_utils.hpp index 7f5c80202d..8fb0885cda 100644 --- a/src/test/test_utils.hpp +++ b/src/test/test_utils.hpp @@ -796,6 +796,7 @@ void SetUpServerForDownloadAndStart(std::unique_ptr& t, ovms::Serve */ void SetUpServer(std::unique_ptr& t, ovms::Server& server, std::string& port, const char* configPath, int timeoutSeconds = SERVER_START_FROM_CONFIG_TIMEOUT_SECONDS, std::string apiKeyFile = ""); void SetUpServer(std::unique_ptr& t, ovms::Server& server, std::string& port, const char* modelPath, const char* modelName, int timeoutSeconds = SERVER_START_FROM_CONFIG_TIMEOUT_SECONDS); +void SetUpServer(std::unique_ptr& t, ovms::Server& server, std::string& port, const char* modelPath, const char* modelName, int timeoutSeconds, const char* task); class ConstructorEnabledConfig : public ovms::Config { public: