-
Notifications
You must be signed in to change notification settings - Fork 963
Unify MultimodalRunner under IRunner with multimodal prefill #17741
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d69117c
222f76d
5cd9cf5
52d8eb9
e7e5562
297ad1b
06963e2
8a22d1e
6ab9e7c
e0ee4dc
37159cd
bcc7616
f03b171
dd91e4f
f865cff
72d6034
c05c80a
2c44df7
e068f7e
9e6e103
e83bd7e
7e944e9
cb1284a
736fd9b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -15,6 +15,7 @@ | |||||||||
| #include <cstdint> | ||||||||||
| #include <functional> | ||||||||||
| #include <memory> | ||||||||||
| #include <optional> | ||||||||||
| #include <string> | ||||||||||
| #include <unordered_map> | ||||||||||
|
|
||||||||||
|
|
@@ -74,7 +75,7 @@ namespace llm { | |||||||||
| * | ||||||||||
| * runner->generate(inputs, config, token_callback, stats_callback); | ||||||||||
| */ | ||||||||||
| class ET_EXPERIMENTAL MultimodalRunner { | ||||||||||
| class ET_EXPERIMENTAL MultimodalRunner : public IRunner { | ||||||||||
| public: | ||||||||||
| /** | ||||||||||
| * @brief Constructor for MultimodalRunner with dependency injection | ||||||||||
|
|
@@ -105,8 +106,24 @@ class ET_EXPERIMENTAL MultimodalRunner { | |||||||||
| std::unique_ptr<TextTokenGenerator> text_token_generator, | ||||||||||
| std::unique_ptr<Stats> stats); | ||||||||||
|
|
||||||||||
| virtual bool is_loaded(); | ||||||||||
| virtual ::executorch::runtime::Error load(); | ||||||||||
| bool is_loaded() const override; | ||||||||||
| ::executorch::runtime::Error load() override; | ||||||||||
|
|
||||||||||
| /** | ||||||||||
| * Generate tokens from a text prompt. Wraps the prompt as a MultimodalInput | ||||||||||
| * and delegates to generate(vector). Empty prompt is allowed if prefill() | ||||||||||
| * was called beforehand. | ||||||||||
| * @param prompt The text prompt to generate from. | ||||||||||
|
kirklandsign marked this conversation as resolved.
|
||||||||||
| * @param config Generation configuration parameters. | ||||||||||
| * @param token_callback Callback function called for each generated token. | ||||||||||
| * @param stats_callback Callback function for generation statistics. | ||||||||||
| * @return The error code. KV cache position is tracked internally in pos_. | ||||||||||
| */ | ||||||||||
| ::executorch::runtime::Error generate( | ||||||||||
| const std::string& prompt, | ||||||||||
| const GenerationConfig& config, | ||||||||||
| std::function<void(const std::string&)> token_callback = {}, | ||||||||||
| std::function<void(const Stats&)> stats_callback = {}) override; | ||||||||||
|
|
||||||||||
| /** | ||||||||||
| * Generate tokens from the given multimodal inputs using GenerationConfig. | ||||||||||
|
|
@@ -124,24 +141,42 @@ class ET_EXPERIMENTAL MultimodalRunner { | |||||||||
| std::function<void(const Stats&)> stats_callback = {}); | ||||||||||
|
|
||||||||||
| /** | ||||||||||
| * Prefill multimodal inputs, for example to reload chat history. | ||||||||||
| * Prefill multimodal inputs to fill the KV cache, for example to reload | ||||||||||
| * chat history. Call generate() with a non-empty prompt afterwards to | ||||||||||
| * start decoding. | ||||||||||
|
kirklandsign marked this conversation as resolved.
Comment on lines
+145
to
+146
|
||||||||||
| * chat history. Call generate() with a non-empty prompt afterwards to | |
| * start decoding. | |
| * chat history. Call generate() afterwards to start decoding (you may use | |
| * empty inputs to consume the prefilled token). |
Uh oh!
There was an error while loading. Please reload this page.