Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ include(${PROJECT_SOURCE_DIR}/tools/cmake/Codegen.cmake)
include(${PROJECT_SOURCE_DIR}/tools/cmake/Utils.cmake)
include(CMakeDependentOption)
include(ExternalProject)
include(FetchContent)
include(GNUInstallDirs)

if(NOT CMAKE_CXX_STANDARD)
Expand Down Expand Up @@ -406,6 +407,14 @@ set(_common_include_directories
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/executorch/runtime/core/portable_type/c10>
)

if(TARGET jinja2cpp)
install(
TARGETS jinja2cpp
EXPORT ExecuTorchTargets
DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
endif()

#
# The `_<target>_srcs` lists are defined by executorch_load_build_variables.
#
Expand Down Expand Up @@ -803,7 +812,7 @@ endif()

if(EXECUTORCH_BUILD_EXTENSION_LLM)
if(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER)
set(SUPPORT_REGEX_LOOKAHEAD ON)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/chat_template)
# llama/runner/CMakeLists.txt builds a shared library libllama_runner.so
# that transitively depends on tokenizers. Need to build tokenizers with
# -fPIC.
Expand Down
18 changes: 18 additions & 0 deletions extension/llm/chat_template/BUCK
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
load("@fbcode_macros//build_defs:build_file_migration.bzl", "fbcode_target", "non_fbcode_target")
oncall("executorch")

# Any targets that should be shared between fbcode and xplat must be defined in
# targets.bzl. This file can contain fbcode-only targets.

load(":targets.bzl", "define_common_targets")

non_fbcode_target(_kind = define_common_targets,)

# !!!! fbcode/executorch/extension/llm/chat_template/TARGETS was merged into this file, see https://fburl.com/workplace/xl8l9yuo for more info !!!!

# Any targets that should be shared between fbcode and xplat must be defined in
# targets.bzl. This file can contain fbcode-only targets.

load(":targets.bzl", "define_common_targets")

fbcode_target(_kind = define_common_targets,)
116 changes: 116 additions & 0 deletions extension/llm/chat_template/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
if(NOT EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER)
return()
endif()

include(FetchContent)
cmake_policy(SET CMP0077 NEW)

FetchContent_Declare(
jinja2cpp
GIT_REPOSITORY https://github.com/jinja2cpp/Jinja2Cpp.git
GIT_TAG 1.3.2
GIT_SUBMODULES_RECURSE TRUE
)

set(JINJA2CPP_BUILD_TESTS
OFF
CACHE BOOL ""
FORCE
)
set(JINJA2CPP_BUILD_SHARED
OFF
CACHE BOOL ""
FORCE
)
set(JINJA2CPP_INSTALL
OFF
CACHE BOOL ""
FORCE
)
# Enable PCRE2-based regex lookahead support in Jinja2Cpp. This must be set
# BEFORE FetchContent_MakeAvailable(jinja2cpp) so it propagates to the
# Jinja2Cpp configure step.
set(SUPPORT_REGEX_LOOKAHEAD
ON
CACHE BOOL ""
FORCE
)

FetchContent_MakeAvailable(jinja2cpp)
if(NOT TARGET jinja2cpp)
message(FATAL_ERROR "Jinja2Cpp target not found after FetchContent.")
endif()

if(DEFINED jinja2cpp_SOURCE_DIR)
function(executorch_copy_nonstd_header dep_name target header_name dest_root)
set(_copied FALSE)
if(TARGET ${target})
get_target_property(_aliased ${target} ALIASED_TARGET)
if(_aliased)
set(_resolved_target ${_aliased})
else()
set(_resolved_target ${target})
endif()
get_target_property(
_include_dirs ${_resolved_target} INTERFACE_INCLUDE_DIRECTORIES
)
foreach(_dir IN LISTS _include_dirs)
if(EXISTS "${_dir}/nonstd/${header_name}")
file(MAKE_DIRECTORY "${dest_root}/nonstd")
file(
COPY "${_dir}/nonstd/${header_name}"
DESTINATION "${dest_root}/nonstd"
)
set(_copied TRUE)
break()
endif()
endforeach()
endif()
if(NOT _copied)
set(
_fallback_path
"${CMAKE_BINARY_DIR}/_deps/${dep_name}-src/include/nonstd/${header_name}"
)
if(EXISTS "${_fallback_path}")
file(MAKE_DIRECTORY "${dest_root}/nonstd")
file(COPY "${_fallback_path}" DESTINATION "${dest_root}/nonstd")
endif()
endif()
endfunction()

set(_jinja2cpp_nonstd_root
"${jinja2cpp_SOURCE_DIR}/thirdparty/nonstd"
)
executorch_copy_nonstd_header(
expected-lite
nonstd::expected-lite
expected.hpp
"${_jinja2cpp_nonstd_root}/expected-lite/include"
)
executorch_copy_nonstd_header(
variant-lite
nonstd::variant-lite
variant.hpp
"${_jinja2cpp_nonstd_root}/variant-lite/include"
)
executorch_copy_nonstd_header(
optional-lite
nonstd::optional-lite
optional.hpp
"${_jinja2cpp_nonstd_root}/optional-lite/include"
)
executorch_copy_nonstd_header(
string-view-lite
nonstd::string-view-lite
string_view.hpp
"${_jinja2cpp_nonstd_root}/string-view-lite/include"
)
endif()

# Install the chat_templates.h header so that downstream consumers of the
# installed ExecuTorch SDK can include
# <executorch/extension/llm/chat_template/chat_templates.h>.
install(
FILES chat_templates.h
DESTINATION include/executorch/extension/llm/chat_template
)
51 changes: 51 additions & 0 deletions extension/llm/chat_template/chat_templates.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#pragma once

#include <string>
#include <string_view>
#include <unordered_map>
#include <vector>

namespace executorch::extension::llm {

enum class ChatTemplateType {
None,
Llama3,
Llama32,
Gemma3,
Custom,
};

constexpr std::string_view kLlama3Template = R"({{ bos_token }}{%- for message in messages -%}<|start_header_id|>{{ message.role }}<|end_header_id|>

{{ message.content }}<|eot_id|>{%- endfor -%}{%- if add_generation_prompt -%}<|start_header_id|>assistant<|end_header_id|>

{%- endif -%})";

constexpr std::string_view kGemma3Template = R"({{ bos_token }}{%- for message in messages -%}{%- if message.role == 'assistant' -%}<start_of_turn>model
{%- else -%}<start_of_turn>{{ message.role }}
{%- endif -%}{{ message.content }}<end_of_turn>{%- endfor -%}{%- if add_generation_prompt -%}<start_of_turn>model
{%- endif -%})";

inline const std::unordered_map<ChatTemplateType, std::string_view>
kEmbeddedTemplates = {
{ChatTemplateType::Llama3, kLlama3Template},
{ChatTemplateType::Llama32, kLlama3Template},
{ChatTemplateType::Gemma3, kGemma3Template},
};

struct ModelTokens {
std::string bos_token;
std::string eos_token;
std::vector<std::string> stop_tokens;
};

inline const std::unordered_map<ChatTemplateType, ModelTokens> kModelTokens = {
{ChatTemplateType::Llama3,
{"<|begin_of_text|>", "<|eot_id|>", {"<|eot_id|>", "<|end_of_text|>"}}},
{ChatTemplateType::Llama32,
{"<|begin_of_text|>", "<|eot_id|>", {"<|eot_id|>", "<|end_of_text|>"}}},
{ChatTemplateType::Gemma3,
{"<bos>", "<end_of_turn>", {"<end_of_turn>", "<eos>"}}},
};

} // namespace executorch::extension::llm
16 changes: 16 additions & 0 deletions extension/llm/chat_template/targets.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

def define_common_targets():
runtime.cxx_library(
name = "chat_templates",
exported_headers = [
"chat_templates.h",
],
visibility = ["PUBLIC"],
)
20 changes: 12 additions & 8 deletions extension/llm/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
Expand Down Expand Up @@ -54,6 +54,14 @@
list(APPEND runner_deps kernels_util_all_deps)

target_link_libraries(extension_llm_runner PUBLIC ${runner_deps})
target_link_libraries(extension_llm_runner PRIVATE $<LINK_ONLY:jinja2cpp>)
target_include_directories(
extension_llm_runner PRIVATE
$<TARGET_PROPERTY:jinja2cpp,INTERFACE_INCLUDE_DIRECTORIES>
)
target_compile_definitions(
extension_llm_runner PUBLIC EXECUTORCH_USE_JINJA2CPP
)
set_target_properties(
extension_llm_runner PROPERTIES POSITION_INDEPENDENT_CODE ON
)
Expand Down Expand Up @@ -116,23 +124,19 @@
portable_lib ${TORCH_PYTHON_LIBRARY} ${TORCH_LIBRARIES}
)

# Set properties for the Python extension
set_target_properties(
_llm_runner
PROPERTIES POSITION_INDEPENDENT_CODE ON
CXX_VISIBILITY_PRESET "hidden"
INTERPROCEDURAL_OPTIMIZATION TRUE
CXX_STANDARD 20
)
if(APPLE)
set(RPATH
"@loader_path/../../pybindings;@loader_path/../../../../torch/lib"
)
set(RPATH "@loader_path/../../pybindings")
else()
set(RPATH "$ORIGIN/../../pybindings:$ORIGIN/../../../../torch/lib")
set(RPATH "$ORIGIN/../../pybindings")
endif()
set_target_properties(
_llm_runner PROPERTIES BUILD_RPATH "${RPATH}" INSTALL_RPATH "${RPATH}"
)
set_target_properties(_llm_runner PROPERTIES INSTALL_RPATH ${RPATH})
# Add include directories
target_include_directories(
_llm_runner PRIVATE ${_common_include_directories} ${TORCH_INCLUDE_DIRS}
Expand Down
20 changes: 20 additions & 0 deletions extension/llm/runner/chat_types.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#pragma once

#include <string>
#include <vector>

namespace executorch::extension::llm {

struct ChatMessage {
std::string role;
std::string content;
};

struct ChatConversation {
std::vector<ChatMessage> messages;
std::string bos_token;
std::string eos_token;
bool add_generation_prompt = true;
};

} // namespace executorch::extension::llm
Loading
Loading