Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2587,6 +2587,7 @@ cc_test(
"@mediapipe//mediapipe/calculators/ovms:ovms_calculator",
"@mediapipe//mediapipe/framework:calculator_runner",
":text2image_test",
":lora_adapter_test",
],
"//:disable_mediapipe" :
[
Expand All @@ -2598,6 +2599,22 @@ cc_test(
linkopts = LINKOPTS_ADJUSTED,
)

cc_library(
name = "lora_adapter_test",
linkstatic = 1,
alwayslink = True,
srcs = [
"test/llm/lora_adapter_test.cpp",
],
deps = [
":test_test_with_temp_dir",
"//src/llm:genai_servables",
"@com_google_googletest//:gtest",
],
copts = COPTS_TESTS,
local_defines = COMMON_LOCAL_DEFINES,
)

cc_library(
name = "test_constructor_enabled_model_manager",
hdrs = ["test/constructor_enabled_model_manager.hpp",],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,11 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr<GenAiSe
return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
}

status = initializeLoraAdapters(nodeOptions, graphPath, properties);
if (!status.ok()) {
return status;
}

status = JsonParser::parsePluginConfig(nodeOptions.plugin_config(), properties->pluginConfig);
if (!status.ok()) {
SPDLOG_ERROR("Error during llm node plugin_config option parsing to JSON: {}", nodeOptions.plugin_config());
Comment on lines +201 to 208
Expand Down
5 changes: 5 additions & 0 deletions src/llm/language_model/legacy/servable_initializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ Status LegacyServableInitializer::initialize(std::shared_ptr<GenAiServable>& ser
return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
}

status = initializeLoraAdapters(nodeOptions, graphPath, properties);
if (!status.ok()) {
return status;
}

status = JsonParser::parsePluginConfig(nodeOptions.plugin_config(), properties->pluginConfig);
if (!status.ok()) {
SPDLOG_ERROR("Error during llm node plugin_config option parsing to JSON: {}", nodeOptions.plugin_config());
Comment on lines +79 to 86
Expand Down
7 changes: 7 additions & 0 deletions src/llm/llm_calculator.proto
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ message LLMCalculatorOptions {
optional LLMCalculatorOptions ext = 113473750;
}

message LoraAdapter {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We dont need alias in LLM/VLM. It's not possible to interact with it via request. What do you think?

required string model_path = 1;
optional float alpha = 2 [default = 1];
}

message KVCrushConfig {
enum AnchorPointMode {
RANDOM = 0;
Expand Down Expand Up @@ -135,4 +140,6 @@ message LLMCalculatorOptions {
optional bool enable_tool_guided_generation = 23 [default = false];

optional SparseAttentionConfig sparse_attention_config = 24;

repeated LoraAdapter lora_adapter = 25;
}
39 changes: 39 additions & 0 deletions src/llm/servable_initializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <vector>
#include <iterator>

#include <openvino/genai/lora_adapter.hpp>
#include <spdlog/spdlog.h>

#include <fstream>
Expand Down Expand Up @@ -316,6 +317,44 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr<GenAiServ
}
#endif

Status initializeLoraAdapters(const mediapipe::LLMCalculatorOptions& nodeOptions, const std::string& graphPath, std::shared_ptr<GenAiServableProperties> properties) {
if (nodeOptions.lora_adapter_size() <= 0) {
return StatusCode::OK;
}
SPDLOG_INFO("LoRA adapters will be applied to the model. Number of adapters: {}", nodeOptions.lora_adapter_size());
ov::genai::AdapterConfig adapterConfig;
for (int i = 0; i < nodeOptions.lora_adapter_size(); ++i) {
const auto& loraAdapterOption = nodeOptions.lora_adapter(i);
SPDLOG_INFO("Processing LoRA adapter number {} with model path: {} alpha: {}", i, loraAdapterOption.model_path(), loraAdapterOption.alpha());
if (loraAdapterOption.alpha() <= 0.0f || loraAdapterOption.alpha() > 1.0f) {
SPDLOG_ERROR("LoRA adapter alpha value {} is out of valid range (0.0, 1.0]", loraAdapterOption.alpha());
return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
}
Comment on lines +328 to +332
auto fsLoraPath = std::filesystem::path(loraAdapterOption.model_path());
std::string loraPath;
if (fsLoraPath.is_relative()) {
loraPath = (std::filesystem::path(graphPath) / fsLoraPath).string();
} else {
Comment on lines +333 to +337
loraPath = fsLoraPath.string();
}
try {
ov::genai::Adapter adapter(loraPath);
adapterConfig.add(adapter, loraAdapterOption.alpha());
SPDLOG_INFO("Registered LoRA adapter from path: {} with alpha: {}", loraPath, loraAdapterOption.alpha());
Comment thread
dkalinowski marked this conversation as resolved.
} catch (const std::exception& e) {
SPDLOG_ERROR("Error during LoRA adapter initialization for model_path: {} exception: {}", loraPath, e.what());
return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
} catch (...) {
SPDLOG_ERROR("Error during LoRA adapter initialization for model_path: {}", loraPath);
return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
}
}
// since it is only applied once at initialization, static mode is sufficient and more efficient.
adapterConfig.set_mode(ov::genai::AdapterConfig::MODE_STATIC);
properties->pluginConfig.insert(ov::genai::adapters(adapterConfig));
return StatusCode::OK;
}

Status parseModelsPath(std::string& outPath, std::string modelsPath, std::string graphPath) {
auto fsModelsPath = std::filesystem::path(modelsPath);
if (fsModelsPath.is_relative()) {
Expand Down
1 change: 1 addition & 0 deletions src/llm/servable_initializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class GenAiServableInitializer {
virtual Status initialize(std::shared_ptr<GenAiServable>& servable, const mediapipe::LLMCalculatorOptions& nodeOptions, std::string graphPath) = 0;
};
Status parseModelsPath(std::string& outPath, std::string modelsPath, std::string graphPath);
Status initializeLoraAdapters(const mediapipe::LLMCalculatorOptions& nodeOptions, const std::string& graphPath, std::shared_ptr<GenAiServableProperties> properties);
std::optional<uint32_t> parseMaxModelLength(std::string& modelsPath);
Status determinePipelineType(PipelineType& pipelineType, const mediapipe::LLMCalculatorOptions& nodeOptions, const std::string& graphPath);
Status initializeGenAiServable(std::shared_ptr<GenAiServable>& servable, const ::mediapipe::CalculatorGraphConfig::Node& graphNodeConfig, std::string graphPath);
Expand Down
5 changes: 5 additions & 0 deletions src/llm/visual_language_model/legacy/servable_initializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ Status VisualLanguageModelLegacyServableInitializer::initialize(std::shared_ptr<
return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
}

status = initializeLoraAdapters(nodeOptions, graphPath, properties);
if (!status.ok()) {
return status;
}

status = JsonParser::parsePluginConfig(nodeOptions.plugin_config(), properties->pluginConfig);
if (!status.ok()) {
SPDLOG_ERROR("Error during llm node plugin_config option parsing to JSON: {}", nodeOptions.plugin_config());
Comment on lines +78 to 85
Expand Down
217 changes: 217 additions & 0 deletions src/test/llm/lora_adapter_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
//*****************************************************************************
// Copyright 2026 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <cstdint>
#include <filesystem>
#include <fstream>
#include <memory>
#include <string>
#include <vector>

#include <gtest/gtest.h>
#include <openvino/genai/lora_adapter.hpp>

#pragma warning(push)
#pragma warning(disable : 4005 4309 6001 6385 6386 6326 6011 4005 4456 6246)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#include "mediapipe/framework/calculator_graph.h"
#pragma GCC diagnostic pop
#pragma warning(pop)

#include "src/llm/servable.hpp"
#include "src/llm/servable_initializer.hpp"
#include "src/status.hpp"
#include "src/test/test_with_temp_dir.hpp"

using namespace ovms;

class LoraAdapterInitTest : public TestWithTempDir {
protected:
std::shared_ptr<GenAiServableProperties> properties;
mediapipe::LLMCalculatorOptions nodeOptions;
std::string loraDir;
std::string loraFilePath;

// Creates a minimal valid safetensors file that ov::genai::Adapter can load
static void createMinimalSafetensorsFile(const std::string& dir) {
std::filesystem::create_directories(dir);
std::string path = dir + "/adapter_model.safetensors";

std::string header =
R"({"lora_A.weight":{"dtype":"F32","shape":[1,2],"data_offsets":[0,8]},)"
R"("lora_B.weight":{"dtype":"F32","shape":[2,1],"data_offsets":[8,16]}})";
// Pad header to 8-byte alignment
while (header.size() % 8 != 0)
header += ' ';

uint64_t headerLen = header.size();
std::ofstream f(path, std::ios::binary);
f.write(reinterpret_cast<const char*>(&headerLen), sizeof(headerLen));
f.write(header.data(), headerLen);
// 16 bytes of zero tensor data (4 floats of zeros)
const std::vector<char> zeros(16, 0);
f.write(zeros.data(), zeros.size());
}

void SetUp() override {
TestWithTempDir::SetUp();
properties = std::make_shared<GenAiServableProperties>();
loraDir = directoryPath + "/lora_adapter";
createMinimalSafetensorsFile(loraDir);
loraFilePath = loraDir + "/adapter_model.safetensors";
}
};

// --- Protobuf parsing tests ---

TEST_F(LoraAdapterInitTest, ProtobufLoraAdapterFieldsParsedCorrectly) {
std::string pbtxt = R"(
models_path: "/some/model"
lora_adapter { model_path: "/path/to/lora1" alpha: 0.5 }
lora_adapter { model_path: "/path/to/lora2" }
)";
mediapipe::LLMCalculatorOptions opts;
ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(pbtxt, &opts));
ASSERT_EQ(opts.lora_adapter_size(), 2);
EXPECT_EQ(opts.lora_adapter(0).model_path(), "/path/to/lora1");
EXPECT_FLOAT_EQ(opts.lora_adapter(0).alpha(), 0.5f);
EXPECT_EQ(opts.lora_adapter(1).model_path(), "/path/to/lora2");
EXPECT_FLOAT_EQ(opts.lora_adapter(1).alpha(), 1.0f); // default
}

// --- No adapters ---

TEST_F(LoraAdapterInitTest, NoAdaptersReturnsOk) {
ASSERT_EQ(initializeLoraAdapters(nodeOptions, "/some/path", properties), StatusCode::OK);
EXPECT_TRUE(properties->pluginConfig.empty());
}

// --- Invalid path ---

TEST_F(LoraAdapterInitTest, NonExistentPathFails) {
auto* adapter = nodeOptions.add_lora_adapter();
adapter->set_model_path(directoryPath + "/nonexistent_lora");
adapter->set_alpha(0.5f);
EXPECT_EQ(initializeLoraAdapters(nodeOptions, "", properties),
StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED);
}

// --- Alpha validation ---

TEST_F(LoraAdapterInitTest, AlphaZeroFails) {
auto* adapter = nodeOptions.add_lora_adapter();
adapter->set_model_path(loraFilePath);
adapter->set_alpha(0.0f);
EXPECT_EQ(initializeLoraAdapters(nodeOptions, "", properties),
StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED);
}

TEST_F(LoraAdapterInitTest, AlphaNegativeFails) {
auto* adapter = nodeOptions.add_lora_adapter();
adapter->set_model_path(loraFilePath);
adapter->set_alpha(-0.5f);
EXPECT_EQ(initializeLoraAdapters(nodeOptions, "", properties),
StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED);
}

TEST_F(LoraAdapterInitTest, AlphaAboveOneFails) {
auto* adapter = nodeOptions.add_lora_adapter();
adapter->set_model_path(loraFilePath);
adapter->set_alpha(1.5f);
EXPECT_EQ(initializeLoraAdapters(nodeOptions, "", properties),
StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED);
}

// --- Happy paths ---

TEST_F(LoraAdapterInitTest, ValidAdapterWithAlpha) {
auto* adapter = nodeOptions.add_lora_adapter();
adapter->set_model_path(loraFilePath);
adapter->set_alpha(0.5f);
ASSERT_EQ(initializeLoraAdapters(nodeOptions, "", properties), StatusCode::OK);
EXPECT_FALSE(properties->pluginConfig.empty());
EXPECT_EQ(properties->pluginConfig.count("adapters"), 1);
}

TEST_F(LoraAdapterInitTest, DefaultAlphaSucceeds) {
auto* adapter = nodeOptions.add_lora_adapter();
adapter->set_model_path(loraFilePath);
// alpha defaults to 1.0 in proto
ASSERT_EQ(initializeLoraAdapters(nodeOptions, "", properties), StatusCode::OK);
EXPECT_EQ(properties->pluginConfig.count("adapters"), 1);
}

TEST_F(LoraAdapterInitTest, AlphaExactlyOneSucceeds) {
auto* adapter = nodeOptions.add_lora_adapter();
adapter->set_model_path(loraFilePath);
adapter->set_alpha(1.0f);
ASSERT_EQ(initializeLoraAdapters(nodeOptions, "", properties), StatusCode::OK);
EXPECT_EQ(properties->pluginConfig.count("adapters"), 1);
}

TEST_F(LoraAdapterInitTest, MultipleAdaptersRegistered) {
auto* a1 = nodeOptions.add_lora_adapter();
a1->set_model_path(loraFilePath);
a1->set_alpha(0.3f);
auto* a2 = nodeOptions.add_lora_adapter();
a2->set_model_path(loraFilePath);
a2->set_alpha(0.7f);
ASSERT_EQ(initializeLoraAdapters(nodeOptions, "", properties), StatusCode::OK);
EXPECT_EQ(properties->pluginConfig.count("adapters"), 1);
}

// --- Path resolution ---

TEST_F(LoraAdapterInitTest, RelativePathResolvedAgainstGraphPath) {
auto* adapter = nodeOptions.add_lora_adapter();
adapter->set_model_path("lora_adapter/adapter_model.safetensors");
adapter->set_alpha(0.5f);
// graphPath = directoryPath, so relative "lora_adapter" resolves to directoryPath/lora_adapter
ASSERT_EQ(initializeLoraAdapters(nodeOptions, directoryPath, properties), StatusCode::OK);
EXPECT_EQ(properties->pluginConfig.count("adapters"), 1);
}

TEST_F(LoraAdapterInitTest, AbsolutePathIgnoresGraphPath) {
auto* adapter = nodeOptions.add_lora_adapter();
adapter->set_model_path(loraFilePath); // absolute path
adapter->set_alpha(0.5f);
ASSERT_EQ(initializeLoraAdapters(nodeOptions, "/wrong/graph/path", properties), StatusCode::OK);
EXPECT_EQ(properties->pluginConfig.count("adapters"), 1);
}

// --- Mixed valid/invalid ---

TEST_F(LoraAdapterInitTest, SecondAdapterInvalidAlphaFailsAll) {
auto* a1 = nodeOptions.add_lora_adapter();
a1->set_model_path(loraFilePath);
a1->set_alpha(0.5f);
auto* a2 = nodeOptions.add_lora_adapter();
a2->set_model_path(loraFilePath);
a2->set_alpha(0.0f); // invalid
EXPECT_EQ(initializeLoraAdapters(nodeOptions, "", properties),
StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED);
}

TEST_F(LoraAdapterInitTest, SecondAdapterInvalidPathFailsAll) {
auto* a1 = nodeOptions.add_lora_adapter();
a1->set_model_path(loraFilePath);
a1->set_alpha(0.5f);
auto* a2 = nodeOptions.add_lora_adapter();
a2->set_model_path(directoryPath + "/no_such_adapter.safetensors");
a2->set_alpha(0.5f);
EXPECT_EQ(initializeLoraAdapters(nodeOptions, "", properties),
StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED);
}