-
Notifications
You must be signed in to change notification settings - Fork 251
configuring genai model deployment with model readonly filesystem #4139
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4e9850c
933e65a
84f9cbb
2fc1646
02846f3
8167677
eafe795
fdd58ed
d00e994
5916e53
e34585a
030a31e
826d183
2f668f4
2fd8c77
14d1017
b35e4b2
eb95ca7
67ef822
af16bb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -344,8 +344,8 @@ std::variant<bool, std::pair<int, std::string>> CLIParser::parse(int argc, char* | |
|
|
||
| result = std::make_unique<cxxopts::ParseResult>(options->parse(argc, argv)); | ||
|
|
||
| // HF pull mode or pull and start mode | ||
| if (isHFPullOrPullAndStart(this->result)) { | ||
| // HF pull mode or pull and start mode or starting from local folder with graph created in memory | ||
| if (isHFPullOrPullAndStart(this->result) || isGenAIConfigureAndStart(this->result)) { | ||
| std::vector<std::string> unmatchedOptions; | ||
| GraphExportType task; | ||
| if (result->count("task")) { | ||
|
|
@@ -692,13 +692,23 @@ void CLIParser::prepareModel(ModelsSettingsImpl& modelsSettings, HFSettingsImpl& | |
| } | ||
|
|
||
| bool CLIParser::isHFPullOrPullAndStart(const std::unique_ptr<cxxopts::ParseResult>& result) { | ||
| return (result->count("pull") || result->count("task")); | ||
| return (result->count("pull") || (result->count("task") && result->count("source_model"))); | ||
| } | ||
|
|
||
| bool CLIParser::isGenAIConfigureAndStart(const std::unique_ptr<cxxopts::ParseResult>& result) { | ||
| return (result->count("task") && !result->count("source_model") && !result->count("pull")); | ||
| } | ||
|
|
||
| void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl& hfSettings, const std::string& modelName) { | ||
| // Always propagate source_model so validation can detect misuse | ||
| if (result->count("source_model")) { | ||
| hfSettings.sourceModel = result->operator[]("source_model").as<std::string>(); | ||
| } | ||
| // Ovms Pull models mode || pull and start models mode | ||
| if (isHFPullOrPullAndStart(this->result)) { | ||
| if (result->count("pull")) { | ||
| if (isHFPullOrPullAndStart(this->result) || isGenAIConfigureAndStart(this->result)) { | ||
| if (isGenAIConfigureAndStart(this->result)) { | ||
| serverSettings.serverMode = GENAI_CONFIGURE_AND_START; | ||
| } else if (result->count("pull")) { | ||
| serverSettings.serverMode = HF_PULL_MODE; | ||
| } else { | ||
| serverSettings.serverMode = HF_PULL_AND_START_MODE; | ||
|
|
@@ -711,8 +721,11 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl& | |
| hfSettings.overwriteModels = result->operator[]("overwrite_models").as<bool>(); | ||
| } | ||
| if (result->count("source_model")) { | ||
| // Already set above, but keep the original flow for downloadType logic | ||
| hfSettings.sourceModel = result->operator[]("source_model").as<std::string>(); | ||
| } else if (result->count("model_name")) { | ||
| } else if (result->count("model_name") && !result->count("model_path")) { | ||
| // Only use model_name as source_model when model_path is not set | ||
| // (when model_path is set, user wants to use local model without HF pull) | ||
| hfSettings.sourceModel = result->operator[]("model_name").as<std::string>(); | ||
| } | ||
| if ((result->count("weight-format") || result->count("extra_quantization_params")) && isOptimumCliDownload(hfSettings.sourceModel, hfSettings.ggufFilename)) { | ||
|
|
@@ -732,6 +745,11 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl& | |
| if (result->count("vocoder")) | ||
| hfSettings.exportSettings.vocoder = result->operator[]("vocoder").as<std::string>(); | ||
| hfSettings.downloadPath = result->operator[]("model_repository_path").as<std::string>(); | ||
| // When --task is used with --model_path but without --pull/--source_model, | ||
| // use model_path as the model location (no HF download needed) | ||
| if (!result->count("pull") && !result->count("source_model") && result->count("model_path")) { | ||
| hfSettings.exportSettings.modelPath = result->operator[]("model_path").as<std::string>(); | ||
| } | ||
| if (result->count("task")) { | ||
| hfSettings.task = stringToEnum(result->operator[]("task").as<std::string>()); | ||
| switch (hfSettings.task) { | ||
|
|
@@ -798,7 +816,8 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl& | |
| if (!serverSettings.cacheDir.empty()) { | ||
| hfSettings.exportSettings.pluginConfig.cacheDir = serverSettings.cacheDir; | ||
| } | ||
| // No pull nor pull and start mode | ||
|
|
||
| // No pull nor pull and start mode and no start with local model_path | ||
| } else { | ||
| if (result->count("weight-format")) { | ||
| throw std::logic_error("--weight-format parameter unsupported for Openvino huggingface organization models."); | ||
|
|
@@ -840,11 +859,14 @@ void CLIParser::prepareGraphStart(HFSettingsImpl& hfSettings, ModelsSettingsImpl | |
| // Model settings | ||
| if (result->count("model_name")) { | ||
| modelsSettings.modelName = result->operator[]("model_name").as<std::string>(); | ||
| } else { | ||
| } else if (!hfSettings.sourceModel.empty()) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why this change? what happens with |
||
| modelsSettings.modelName = hfSettings.sourceModel; | ||
| } | ||
|
|
||
| modelsSettings.modelPath = FileSystem::joinPath({hfSettings.downloadPath, hfSettings.sourceModel}); | ||
| // Only override modelPath if it wasn't already set via --model_path | ||
| if (!result->count("model_path")) { | ||
| modelsSettings.modelPath = FileSystem::joinPath({hfSettings.downloadPath, hfSettings.sourceModel}); | ||
| } | ||
| } | ||
|
|
||
| void CLIParser::prepare(ServerSettingsImpl* serverSettings, ModelsSettingsImpl* modelsSettings) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -52,6 +52,20 @@ | |||||
| #endif | ||||||
| namespace ovms { | ||||||
|
|
||||||
| static std::string s_inMemoryGraphContent; | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why |
||||||
|
|
||||||
| bool GraphExport::hasInMemoryGraphContent() { | ||||||
| return !s_inMemoryGraphContent.empty(); | ||||||
| } | ||||||
|
|
||||||
| const std::string& GraphExport::getInMemoryGraphContent() { | ||||||
| return s_inMemoryGraphContent; | ||||||
| } | ||||||
|
|
||||||
| void GraphExport::clearInMemoryGraphContent() { | ||||||
| s_inMemoryGraphContent.clear(); | ||||||
| } | ||||||
|
|
||||||
| static const std::string OVMS_VERSION_GRAPH_LINE = std::string("# File created with: ") + PROJECT_NAME + std::string(" ") + PROJECT_VERSION + std::string("\n"); | ||||||
|
|
||||||
| static std::string constructModelsPath(const std::string& modelPath, const std::optional<std::string>& ggufFilenameOpt) { | ||||||
|
|
@@ -91,22 +105,26 @@ std::string GraphExport::getDraftModelDirectoryPath(const std::string& directory | |||||
| } \ | ||||||
| auto pluginConfigOpt = std::get<std::optional<std::string>>(pluginConfigOrStatus) | ||||||
|
|
||||||
| static Status createPbtxtFile(const std::string& directoryPath, const std::string& pbtxtContent) { | ||||||
| static Status createPbtxtFile(const std::string& directoryPath, const std::string& pbtxtContent, bool writeToFile) { | ||||||
| #if (MEDIAPIPE_DISABLE == 0) | ||||||
| ::mediapipe::CalculatorGraphConfig config; | ||||||
| SPDLOG_TRACE("Generated pbtxt: {}", pbtxtContent); | ||||||
| SPDLOG_TRACE("Generated pbtxt\n: {}", pbtxtContent); | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| bool success = ::google::protobuf::TextFormat::ParseFromString(pbtxtContent, &config); | ||||||
| if (!success) { | ||||||
| SPDLOG_ERROR("Created graph config file couldn't be parsed - check used task parameters values."); | ||||||
| return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; | ||||||
| } | ||||||
| #endif | ||||||
| if (!writeToFile) { | ||||||
| s_inMemoryGraphContent = pbtxtContent; | ||||||
| return StatusCode::OK; | ||||||
| } | ||||||
| // clang-format on | ||||||
| std::string fullPath = FileSystem::joinPath({directoryPath, "graph.pbtxt"}); | ||||||
| return FileSystem::createFileOverwrite(fullPath, pbtxtContent); | ||||||
|
Comment on lines
+118
to
124
|
||||||
| } | ||||||
|
|
||||||
| static Status createTextGenerationGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { | ||||||
| static Status createTextGenerationGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { | ||||||
| if (!std::holds_alternative<TextGenGraphSettingsImpl>(hfSettings.graphSettings)) { | ||||||
| SPDLOG_ERROR("Graph options not initialized for text generation."); | ||||||
| return StatusCode::INTERNAL_ERROR; | ||||||
|
|
@@ -198,10 +216,10 @@ static Status createTextGenerationGraphTemplate(const std::string& directoryPath | |||||
| } | ||||||
| } | ||||||
| })"; | ||||||
| return createPbtxtFile(directoryPath, oss.str()); | ||||||
| return createPbtxtFile(directoryPath, oss.str(), writeToFile); | ||||||
| } | ||||||
|
|
||||||
| static Status createRerankGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { | ||||||
| static Status createRerankGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { | ||||||
| if (!std::holds_alternative<RerankGraphSettingsImpl>(hfSettings.graphSettings)) { | ||||||
| SPDLOG_ERROR("Graph options not initialized for reranking."); | ||||||
| return StatusCode::INTERNAL_ERROR; | ||||||
|
|
@@ -242,10 +260,10 @@ node { | |||||
| } | ||||||
| } | ||||||
| })"; | ||||||
| return createPbtxtFile(directoryPath, oss.str()); | ||||||
| return createPbtxtFile(directoryPath, oss.str(), writeToFile); | ||||||
| } | ||||||
|
|
||||||
| static Status createEmbeddingsGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { | ||||||
| static Status createEmbeddingsGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { | ||||||
| if (!std::holds_alternative<EmbeddingsGraphSettingsImpl>(hfSettings.graphSettings)) { | ||||||
| SPDLOG_ERROR("Graph options not initialized for embeddings."); | ||||||
| return StatusCode::INTERNAL_ERROR; | ||||||
|
|
@@ -289,10 +307,10 @@ node { | |||||
| oss << R"(} | ||||||
| } | ||||||
| })"; | ||||||
| return createPbtxtFile(directoryPath, oss.str()); | ||||||
| return createPbtxtFile(directoryPath, oss.str(), writeToFile); | ||||||
| } | ||||||
|
|
||||||
| static Status createTextToSpeechGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { | ||||||
| static Status createTextToSpeechGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { | ||||||
| if (!std::holds_alternative<TextToSpeechGraphSettingsImpl>(hfSettings.graphSettings)) { | ||||||
| SPDLOG_ERROR("Graph options not initialized for speech generation."); | ||||||
| return StatusCode::INTERNAL_ERROR; | ||||||
|
|
@@ -339,11 +357,15 @@ node { | |||||
| } | ||||||
| #endif | ||||||
| // clang-format on | ||||||
| if (!writeToFile) { | ||||||
| s_inMemoryGraphContent = oss.str(); | ||||||
| return StatusCode::OK; | ||||||
| } | ||||||
| std::string fullPath = FileSystem::joinPath({directoryPath, "graph.pbtxt"}); | ||||||
| return FileSystem::createFileOverwrite(fullPath, oss.str()); | ||||||
| } | ||||||
|
|
||||||
| static Status createSpeechToTextGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { | ||||||
| static Status createSpeechToTextGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { | ||||||
| if (!std::holds_alternative<SpeechToTextGraphSettingsImpl>(hfSettings.graphSettings)) { | ||||||
| SPDLOG_ERROR("Graph options not initialized for speech to text."); | ||||||
| return StatusCode::INTERNAL_ERROR; | ||||||
|
|
@@ -405,11 +427,15 @@ node { | |||||
| } | ||||||
| #endif | ||||||
| // clang-format on | ||||||
| if (!writeToFile) { | ||||||
| s_inMemoryGraphContent = oss.str(); | ||||||
| return StatusCode::OK; | ||||||
| } | ||||||
| std::string fullPath = FileSystem::joinPath({directoryPath, "graph.pbtxt"}); | ||||||
| return FileSystem::createFileOverwrite(fullPath, oss.str()); | ||||||
| } | ||||||
|
|
||||||
| static Status createImageGenerationGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { | ||||||
| static Status createImageGenerationGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { | ||||||
| if (!std::holds_alternative<ImageGenerationGraphSettingsImpl>(hfSettings.graphSettings)) { | ||||||
| SPDLOG_ERROR("Graph options not initialized for image generation."); | ||||||
| return StatusCode::INTERNAL_ERROR; | ||||||
|
|
@@ -489,13 +515,13 @@ node: { | |||||
| } | ||||||
| )"; | ||||||
| // clang-format on | ||||||
| return createPbtxtFile(directoryPath, oss.str()); | ||||||
| return createPbtxtFile(directoryPath, oss.str(), writeToFile); | ||||||
| } | ||||||
|
|
||||||
| GraphExport::GraphExport() { | ||||||
| } | ||||||
|
|
||||||
| Status GraphExport::createServableConfig(const std::string& directoryPath, const HFSettingsImpl& hfSettings) { | ||||||
| Status GraphExport::createServableConfig(const std::string& directoryPath, const HFSettingsImpl& hfSettings, bool writeToFile) { | ||||||
| if (directoryPath.empty()) { | ||||||
| SPDLOG_ERROR("Directory path empty: {}", directoryPath); | ||||||
| return StatusCode::PATH_INVALID; | ||||||
|
|
@@ -518,17 +544,17 @@ Status GraphExport::createServableConfig(const std::string& directoryPath, const | |||||
| } | ||||||
| } | ||||||
| if (hfSettings.task == TEXT_GENERATION_GRAPH) { | ||||||
| return createTextGenerationGraphTemplate(directoryPath, hfSettings); | ||||||
| return createTextGenerationGraphTemplate(directoryPath, hfSettings, writeToFile); | ||||||
| } else if (hfSettings.task == EMBEDDINGS_GRAPH) { | ||||||
| return createEmbeddingsGraphTemplate(directoryPath, hfSettings); | ||||||
| return createEmbeddingsGraphTemplate(directoryPath, hfSettings, writeToFile); | ||||||
| } else if (hfSettings.task == RERANK_GRAPH) { | ||||||
| return createRerankGraphTemplate(directoryPath, hfSettings); | ||||||
| return createRerankGraphTemplate(directoryPath, hfSettings, writeToFile); | ||||||
| } else if (hfSettings.task == IMAGE_GENERATION_GRAPH) { | ||||||
| return createImageGenerationGraphTemplate(directoryPath, hfSettings); | ||||||
| return createImageGenerationGraphTemplate(directoryPath, hfSettings, writeToFile); | ||||||
| } else if (hfSettings.task == TEXT_TO_SPEECH_GRAPH) { | ||||||
| return createTextToSpeechGraphTemplate(directoryPath, hfSettings); | ||||||
| return createTextToSpeechGraphTemplate(directoryPath, hfSettings, writeToFile); | ||||||
| } else if (hfSettings.task == SPEECH_TO_TEXT_GRAPH) { | ||||||
| return createSpeechToTextGraphTemplate(directoryPath, hfSettings); | ||||||
| return createSpeechToTextGraphTemplate(directoryPath, hfSettings, writeToFile); | ||||||
| } else if (hfSettings.task == UNKNOWN_GRAPH) { | ||||||
| SPDLOG_ERROR("Graph options not initialized."); | ||||||
| return StatusCode::INTERNAL_ERROR; | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ | |
|
|
||
| #include "../execution_context.hpp" | ||
| #include "src/filesystem/filesystem.hpp" | ||
| #include "src/graph_export/graph_export.hpp" | ||
| #include "src/metrics/metric.hpp" | ||
| #include "../model_metric_reporter.hpp" | ||
| #include "../ov_utils.hpp" | ||
|
|
@@ -60,6 +61,13 @@ const tensor_map_t MediapipeGraphDefinition::getOutputsInfo() const { | |
| } | ||
|
|
||
| Status MediapipeGraphDefinition::validateForConfigFileExistence() { | ||
| if (GraphExport::hasInMemoryGraphContent()) { | ||
| const std::string& content = GraphExport::getInMemoryGraphContent(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it global? Not tied to any specific servable object?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, assumption is that only one graph can be stored in memory. using multiple models requires creating graph.pbtxt in model folder via --pull command. |
||
| this->chosenConfig = content; | ||
| this->mgconfig.setCurrentGraphPbTxtMD5(ovms::FileSystem::getStringMD5(content)); | ||
| SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Using in-memory graph content for mediapipe graph definition: {}", this->getName()); | ||
| return StatusCode::OK; | ||
| } | ||
|
Comment on lines
63
to
+70
|
||
| std::ifstream ifs(this->mgconfig.getGraphPath()); | ||
| if (!ifs.is_open()) { | ||
| SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to open mediapipe graph definition: {}, file: {}\n", this->getName(), this->mgconfig.getGraphPath()); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GenAI name but it has nothing to do with OpenVINO GenAI, I dont think its good naming