From a83e7c479568df009375a0154b00123abcf585c7 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Fri, 22 May 2026 12:20:46 -0700
Subject: [PATCH 001/103] Fix 2 broken tests caused by D105910457

Differential Revision: D105973185

Pull Request resolved: https://github.com/pytorch/executorch/pull/19736
---
 backends/vulkan/test/op_tests/utils/gen_computegraph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backends/vulkan/test/op_tests/utils/gen_computegraph.py b/backends/vulkan/test/op_tests/utils/gen_computegraph.py
index a09b4d36b18..507719b8555 100644
--- a/backends/vulkan/test/op_tests/utils/gen_computegraph.py
+++ b/backends/vulkan/test/op_tests/utils/gen_computegraph.py
@@ -286,7 +286,7 @@ def create_aten_fn_call(self) -> str:
     def create_aten_method_call(self) -> str:
         # For functions with only Method variant, we fallback to the function
         # declared in MethodOperators.h
-        cpp_sig = gen_static_dispatch_backend_call_signature(self.f_sig, self.f)
+        cpp_sig = gen_static_dispatch_backend_call_signature(self.f)
         exprs = translate_args(self.f_sig, cpp_sig)
         func_call = f"at::_ops::{self.f_sig.name()}::call({exprs});"
         return func_call

From ec764702419ddc62570c06a282cb34f6d0ed0172 Mon Sep 17 00:00:00 2001
From: Adrian Lundell <36153706+AdrianLundell@users.noreply.github.com>
Date: Fri, 22 May 2026 22:51:45 +0200
Subject: [PATCH 002/103] Cortex_M backend: Add more model tests (#19720)

Add model tests of currently not supported models
- yolo11
- wav2letter
- silero_vad

cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils
@Sebastian-Larsson @robell @rascani

Signed-off-by: Adrian Lundell <adrian.lundell@arm.com>
---
 .../cortex_m/test/models/test_silero_vad.py   | 94 +++++++++++++++++++
 .../cortex_m/test/models/test_wav2letter.py   | 34 +++++++
 backends/cortex_m/test/models/test_yolo11.py  | 45 +++++++++
 3 files changed, 173 insertions(+)
 create mode 100644 backends/cortex_m/test/models/test_silero_vad.py
 create mode 100644 backends/cortex_m/test/models/test_wav2letter.py
 create mode 100644 backends/cortex_m/test/models/test_yolo11.py

diff --git a/backends/cortex_m/test/models/test_silero_vad.py b/backends/cortex_m/test/models/test_silero_vad.py
new file mode 100644
index 00000000000..27b958627bb
--- /dev/null
+++ b/backends/cortex_m/test/models/test_silero_vad.py
@@ -0,0 +1,94 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm.test.common import parametrize
+from executorch.backends.cortex_m.test.tester import CortexMTester, McuTestCase
+from executorch.examples.models.silero_vad.export_silero_vad import (
+    CONTEXT_SIZE,
+    HIDDEN_DIM,
+    SileroVAD16k,
+    WINDOW_SIZE,
+)
+
+
+ops_before_transforms: dict[str, int] = {
+    "executorch_exir_dialects_edge__ops_aten_abs_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_add_Tensor": 3,
+    "executorch_exir_dialects_edge__ops_aten_arange_start_step": 1,
+    "executorch_exir_dialects_edge__ops_aten_cat_default": 1,
+    "executorch_exir_dialects_edge__ops_aten_convolution_default": 6,
+    "executorch_exir_dialects_edge__ops_aten_index_Tensor": 1,
+    "executorch_exir_dialects_edge__ops_aten_linear_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_mean_dim": 1,
+    "executorch_exir_dialects_edge__ops_aten_mul_Tensor": 3,
+    "executorch_exir_dialects_edge__ops_aten_pow_Tensor_Scalar": 2,
+    "executorch_exir_dialects_edge__ops_aten_relu_default": 5,
+    "executorch_exir_dialects_edge__ops_aten_select_copy_int": 2,
+    "executorch_exir_dialects_edge__ops_aten_sigmoid_default": 4,
+    "executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor": 2,
+    "executorch_exir_dialects_edge__ops_aten_split_with_sizes_copy_default": 1,
+    "executorch_exir_dialects_edge__ops_aten_sqrt_default": 1,
+    "executorch_exir_dialects_edge__ops_aten_squeeze_copy_dims": 2,
+    "executorch_exir_dialects_edge__ops_aten_sub_Tensor": 2,
+    "executorch_exir_dialects_edge__ops_aten_tanh_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_unsqueeze_copy_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_view_copy_default": 1,
+    "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 12,
+    "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 11,
+}
+ops_after_transforms: dict[str, int] = {
+    "executorch_exir_dialects_edge__ops_aten_abs_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_add_Tensor": 2,
+    "executorch_exir_dialects_edge__ops_aten_arange_start_step": 1,
+    "executorch_exir_dialects_edge__ops_aten_cat_default": 1,
+    "executorch_exir_dialects_edge__ops_aten_convolution_default": 6,
+    "executorch_exir_dialects_edge__ops_aten_index_Tensor": 1,
+    "executorch_exir_dialects_edge__ops_aten_linear_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_mean_dim": 1,
+    "executorch_exir_dialects_edge__ops_aten_mul_Tensor": 3,
+    "executorch_exir_dialects_edge__ops_aten_pow_Tensor_Scalar": 2,
+    "executorch_exir_dialects_edge__ops_aten_relu_default": 5,
+    "executorch_exir_dialects_edge__ops_aten_select_copy_int": 2,
+    "executorch_exir_dialects_edge__ops_aten_sigmoid_default": 4,
+    "executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor": 2,
+    "executorch_exir_dialects_edge__ops_aten_split_with_sizes_copy_default": 1,
+    "executorch_exir_dialects_edge__ops_aten_sqrt_default": 1,
+    "executorch_exir_dialects_edge__ops_aten_squeeze_copy_dims": 2,
+    "executorch_exir_dialects_edge__ops_aten_sub_Tensor": 2,
+    "executorch_exir_dialects_edge__ops_aten_tanh_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_unsqueeze_copy_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_view_copy_default": 1,
+    "executorch_exir_dialects_edge__ops_cortex_m_dequantize_per_tensor_default": 6,
+    "executorch_exir_dialects_edge__ops_cortex_m_quantize_per_tensor_default": 6,
+    "executorch_exir_dialects_edge__ops_cortex_m_quantized_add_default": 1,
+}
+
+
+pt_model = SileroVAD16k().eval()
+
+x = torch.randn(
+    1, CONTEXT_SIZE + WINDOW_SIZE
+)  # (1, 576) — 64 context + 512 audio samples
+state = torch.zeros(2, 1, HIDDEN_DIM)  # (2, 1, 128) — [h, c] LSTM state
+
+test_cases = {
+    "silero_vad_16k": McuTestCase(
+        model=pt_model,
+        example_inputs=lambda: (x, state),
+    ),
+}
+
+
+@parametrize("test_case", test_cases)
+def test_dialect_silero_vad_16k(test_case):
+    """This model currently does largely not lower to accelerated kernels due to missing LSTM and conv1d support, this test is to track development progress."""
+    inputs = test_case.get_example_inputs()
+    tester = CortexMTester(test_case.model, inputs)
+    tester.test_dialect(
+        ops_before_transforms,
+        ops_after_transforms,
+        qtol=10,
+    )
diff --git a/backends/cortex_m/test/models/test_wav2letter.py b/backends/cortex_m/test/models/test_wav2letter.py
new file mode 100644
index 00000000000..ddc5354293c
--- /dev/null
+++ b/backends/cortex_m/test/models/test_wav2letter.py
@@ -0,0 +1,34 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from executorch.backends.arm.test.common import parametrize
+from executorch.backends.cortex_m.test.tester import CortexMTester, McuTestCase
+from executorch.examples.models.wav2letter.model import Wav2LetterModel
+
+
+ops_before_transforms: dict[str, int] = {}
+ops_after_transforms: dict[str, int] = {}
+
+model = Wav2LetterModel()
+pt_model = model.get_eager_model()
+
+test_cases = {
+    "wav2letter": McuTestCase(
+        model=pt_model,
+        example_inputs=lambda: model.get_example_inputs(),
+    ),
+}
+
+
+@parametrize("test_case", test_cases)
+def test_dialect_wav2letter(test_case):
+    """This model currently does largely not lower to accelerated kernels due to missing conv1d support, this test is to track development progress."""
+    inputs = test_case.get_example_inputs()
+    tester = CortexMTester(test_case.model, inputs)
+    tester.test_dialect(
+        ops_before_transforms,
+        ops_after_transforms,
+        qtol=10,
+    )
diff --git a/backends/cortex_m/test/models/test_yolo11.py b/backends/cortex_m/test/models/test_yolo11.py
new file mode 100644
index 00000000000..f17c5ced331
--- /dev/null
+++ b/backends/cortex_m/test/models/test_yolo11.py
@@ -0,0 +1,45 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import pytest
+import torch
+from executorch.backends.arm.test.common import parametrize
+
+from executorch.backends.cortex_m.test.tester import CortexMTester, McuTestCase
+
+YOLO = pytest.importorskip(
+    "ultralytics",
+    reason="ultralytics is optional; install it locally to run YOLO tests.",
+).YOLO
+
+
+ops_before_transforms: dict[str, int] = {}
+ops_after_transforms: dict[str, int] = {}
+
+
+WEIGHTS = "yolo11n.pt"
+yolo = YOLO(WEIGHTS)
+pt_model = yolo.model.eval()
+
+test_cases = {
+    "yolo11n": McuTestCase(
+        model=pt_model,
+        example_inputs=lambda: (
+            torch.randn(1, 3, 640, 640).to(memory_format=torch.channels_last),
+        ),
+    ),
+}
+
+
+@parametrize("test_case", test_cases)
+def test_dialect_yolo11(test_case):
+    """This model currently does not lower in the cortex-m backend, this test is to track development progress."""
+    inputs = test_case.get_example_inputs()
+    tester = CortexMTester(test_case.model, inputs)
+    tester.test_dialect(
+        ops_before_transforms,
+        ops_after_transforms,
+        qtol=10,
+    )

From 158c5d8f109479ecfb9ca6ef5e638a4961f5b379 Mon Sep 17 00:00:00 2001
From: Hansong Zhang <107070759+kirklandsign@users.noreply.github.com>
Date: Fri, 22 May 2026 17:39:32 -0700
Subject: [PATCH 003/103] Convert Android LLM extension from Java to Kotlin
 (#19211)

Differential Revision: D102880053

Pull Request resolved: https://github.com/pytorch/executorch/pull/19211
---
 extension/android/BUCK                        |  11 +-
 .../android/executorch_android/build.gradle   |   1 +
 .../llm/{LlmCallback.java => LlmCallback.kt}  |  27 +-
 .../extension/llm/LlmGenerationConfig.java    | 198 ----
 .../extension/llm/LlmGenerationConfig.kt      |  78 ++
 .../executorch/extension/llm/LlmModule.java   | 823 ----------------
 .../executorch/extension/llm/LlmModule.kt     | 898 ++++++++++++++++++
 .../extension/llm/LlmModuleConfig.java        | 252 -----
 .../extension/llm/LlmModuleConfig.kt          | 134 +++
 .../extension/llm/package-info.java           |  51 -
 10 files changed, 1129 insertions(+), 1344 deletions(-)
 rename extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/{LlmCallback.java => LlmCallback.kt} (53%)
 delete mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.java
 create mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.kt
 delete mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java
 create mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.kt
 delete mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.java
 create mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.kt
 delete mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/package-info.java

diff --git a/extension/android/BUCK b/extension/android/BUCK
index c7e275805e2..110b428575d 100644
--- a/extension/android/BUCK
+++ b/extension/android/BUCK
@@ -47,13 +47,14 @@ non_fbcode_target(_kind = fb_android_library,
     name = "executorch_llama",
     warnings_as_errors = False,
     srcs = [
-        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.java",
-        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.java",
-        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java",
-        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.java",
+        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.kt",
+        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.kt",
+        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.kt",
+        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.kt",
     ],
     autoglob = False,
-    language = "JAVA",
+    language = "KOTLIN",
+    extra_kotlinc_arguments = ["-Xjvm-default=all"],
     deps = [
         ":executorch",
         "//fbandroid/java/com/facebook/jni:jni",
diff --git a/extension/android/executorch_android/build.gradle b/extension/android/executorch_android/build.gradle
index 3ee5b5877b3..2dbe0e1fb5f 100644
--- a/extension/android/executorch_android/build.gradle
+++ b/extension/android/executorch_android/build.gradle
@@ -51,6 +51,7 @@ android {
     }
     kotlinOptions {
         jvmTarget = "11"
+        freeCompilerArgs += ["-Xjvm-default=all"]
     }
 }
 
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.kt
similarity index 53%
rename from extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.java
rename to extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.kt
index 4e834d06721..3b56986bf14 100644
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.java
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.kt
@@ -6,45 +6,42 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-package org.pytorch.executorch.extension.llm;
+package org.pytorch.executorch.extension.llm
 
-import com.facebook.jni.annotations.DoNotStrip;
-import org.pytorch.executorch.annotations.Experimental;
+import com.facebook.jni.annotations.DoNotStrip
+import org.pytorch.executorch.annotations.Experimental
 
 /**
- * Callback interface for Llama model. Users can implement this interface to receive the generated
+ * Callback interface for Llm model. Users can implement this interface to receive the generated
  * tokens and statistics.
  *
- * <p>Warning: These APIs are experimental and subject to change without notice
+ * Warning: These APIs are experimental and subject to change without notice
  */
 @Experimental
-public interface LlmCallback {
+interface LlmCallback {
   /**
    * Called when a new result is available from JNI. Users will keep getting onResult() invocations
    * until generate() finishes.
    *
    * @param result Last generated token
    */
-  @DoNotStrip
-  public void onResult(String result);
+  @DoNotStrip fun onResult(result: String)
 
   /**
    * Called when the statistics for the generate() is available.
    *
-   * <p>The result will be a JSON string. See extension/llm/stats.h for the field definitions.
+   * The result will be a JSON string. See extension/llm/stats.h for the field definitions.
    *
    * @param stats JSON string containing the statistics for the generate()
    */
-  @DoNotStrip
-  default void onStats(String stats) {}
+  @DoNotStrip fun onStats(stats: String) {}
 
   /**
    * Called when an error occurs during generate().
    *
-   * @param errorCode Error code from the ExecuTorch runtime (see {@link
-   *     org.pytorch.executorch.ExecutorchRuntimeException})
+   * @param errorCode Error code from the ExecuTorch runtime (see
+   *   [org.pytorch.executorch.ExecutorchRuntimeException])
    * @param message Human-readable error description
    */
-  @DoNotStrip
-  default void onError(int errorCode, String message) {}
+  @DoNotStrip fun onError(errorCode: Int, message: String) {}
 }
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.java
deleted file mode 100644
index db7941aadad..00000000000
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.java
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.executorch.extension.llm;
-
-/**
- * Configuration class for controlling text generation parameters in LLM operations.
- *
- * <p>This class provides settings for text generation behavior including output formatting,
- * generation limits, and sampling parameters. Instances should be created using the {@link
- * #create()} method and the fluent builder pattern.
- */
-public class LlmGenerationConfig {
-  private final boolean echo;
-  private final int maxNewTokens;
-  private final boolean warming;
-  private final int seqLen;
-  private final float temperature;
-  private final int numBos;
-  private final int numEos;
-
-  private LlmGenerationConfig(Builder builder) {
-    this.echo = builder.echo;
-    this.maxNewTokens = builder.maxNewTokens;
-    this.warming = builder.warming;
-    this.seqLen = builder.seqLen;
-    this.temperature = builder.temperature;
-    this.numBos = builder.numBos;
-    this.numEos = builder.numEos;
-  }
-
-  /**
-   * Creates a new Builder instance for constructing generation configurations.
-   *
-   * @return a new Builder with default configuration values
-   */
-  public static Builder create() {
-    return new Builder();
-  }
-
-  /**
-   * @return true if input prompt should be included in the output
-   */
-  public boolean isEcho() {
-    return echo;
-  }
-
-  /**
-   * @return maximum number of tokens to generate (-1 for unlimited)
-   */
-  public int getMaxNewTokens() {
-    return maxNewTokens;
-  }
-
-  /**
-   * @return true if model warming is enabled
-   */
-  public boolean isWarming() {
-    return warming;
-  }
-
-  /**
-   * @return maximum sequence length for generation (-1 for default)
-   */
-  public int getSeqLen() {
-    return seqLen;
-  }
-
-  /**
-   * @return temperature value for sampling (higher = more random)
-   */
-  public float getTemperature() {
-    return temperature;
-  }
-
-  /**
-   * @return number of BOS tokens to prepend
-   */
-  public int getNumBos() {
-    return numBos;
-  }
-
-  /**
-   * @return number of EOS tokens to append
-   */
-  public int getNumEos() {
-    return numEos;
-  }
-
-  /**
-   * Builder class for constructing LlmGenerationConfig instances.
-   *
-   * <p>Provides a fluent interface for configuring generation parameters with sensible defaults.
-   * All methods return the builder instance to enable method chaining.
-   */
-  public static class Builder {
-    private boolean echo = true;
-    private int maxNewTokens = -1;
-    private boolean warming = false;
-    private int seqLen = -1;
-    private float temperature = 0.8f;
-    private int numBos = 0;
-    private int numEos = 0;
-
-    Builder() {}
-
-    /**
-     * Sets whether to include the input prompt in the generated output.
-     *
-     * @param echo true to include input prompt, false to return only new tokens
-     * @return this builder instance
-     */
-    public Builder echo(boolean echo) {
-      this.echo = echo;
-      return this;
-    }
-
-    /**
-     * Sets the maximum number of new tokens to generate.
-     *
-     * @param maxNewTokens the token limit (-1 for unlimited generation)
-     * @return this builder instance
-     */
-    public Builder maxNewTokens(int maxNewTokens) {
-      this.maxNewTokens = maxNewTokens;
-      return this;
-    }
-
-    /**
-     * Enables or disables model warming.
-     *
-     * @param warming true to generate initial tokens for model warmup
-     * @return this builder instance
-     */
-    public Builder warming(boolean warming) {
-      this.warming = warming;
-      return this;
-    }
-
-    /**
-     * Sets the maximum sequence length for generation.
-     *
-     * @param seqLen maximum sequence length (-1 for default behavior)
-     * @return this builder instance
-     */
-    public Builder seqLen(int seqLen) {
-      this.seqLen = seqLen;
-      return this;
-    }
-
-    /**
-     * Sets the temperature for random sampling.
-     *
-     * @param temperature sampling temperature (typical range 0.0-1.0)
-     * @return this builder instance
-     */
-    public Builder temperature(float temperature) {
-      this.temperature = temperature;
-      return this;
-    }
-
-    /**
-     * Sets the number of BOS tokens to prepend.
-     *
-     * @param numBos number of BOS tokens
-     * @return this builder instance
-     */
-    public Builder numBos(int numBos) {
-      this.numBos = numBos;
-      return this;
-    }
-
-    /**
-     * Sets the number of EOS tokens to append.
-     *
-     * @param numEos number of EOS tokens
-     * @return this builder instance
-     */
-    public Builder numEos(int numEos) {
-      this.numEos = numEos;
-      return this;
-    }
-
-    /**
-     * Constructs the LlmGenerationConfig instance with the configured parameters.
-     *
-     * @return new LlmGenerationConfig instance with current builder settings
-     */
-    public LlmGenerationConfig build() {
-      return new LlmGenerationConfig(this);
-    }
-  }
-}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.kt b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.kt
new file mode 100644
index 00000000000..c0f8956fb7f
--- /dev/null
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.kt
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.executorch.extension.llm
+
+/**
+ * Configuration class for controlling text generation parameters in LLM operations.
+ *
+ * This class provides settings for text generation behavior including output formatting, generation
+ * limits, and sampling parameters. Instances should be created using the [create] method and the
+ * fluent builder pattern.
+ */
+class LlmGenerationConfig
+private constructor(
+    @get:JvmName("isEcho") val echo: Boolean,
+    val maxNewTokens: Int,
+    @get:JvmName("isWarming") val warming: Boolean,
+    val seqLen: Int,
+    val temperature: Float,
+    val numBos: Int,
+    val numEos: Int,
+) {
+
+  companion object {
+    /**
+     * Creates a new Builder instance for constructing generation configurations.
+     *
+     * @return a new Builder with default configuration values
+     */
+    @JvmStatic fun create(): Builder = Builder()
+  }
+
+  /**
+   * Builder class for constructing LlmGenerationConfig instances.
+   *
+   * Provides a fluent interface for configuring generation parameters with sensible defaults. All
+   * methods return the builder instance to enable method chaining.
+   */
+  class Builder internal constructor() {
+    private var echo: Boolean = true
+    private var maxNewTokens: Int = -1
+    private var warming: Boolean = false
+    private var seqLen: Int = -1
+    private var temperature: Float = 0.8f
+    private var numBos: Int = 0
+    private var numEos: Int = 0
+
+    /** Sets whether to include the input prompt in the generated output. */
+    fun echo(echo: Boolean): Builder = apply { this.echo = echo }
+
+    /** Sets the maximum number of new tokens to generate. */
+    fun maxNewTokens(maxNewTokens: Int): Builder = apply { this.maxNewTokens = maxNewTokens }
+
+    /** Enables or disables model warming. */
+    fun warming(warming: Boolean): Builder = apply { this.warming = warming }
+
+    /** Sets the maximum sequence length for generation. */
+    fun seqLen(seqLen: Int): Builder = apply { this.seqLen = seqLen }
+
+    /** Sets the temperature for random sampling. */
+    fun temperature(temperature: Float): Builder = apply { this.temperature = temperature }
+
+    /** Sets the number of BOS tokens to prepend. */
+    fun numBos(numBos: Int): Builder = apply { this.numBos = numBos }
+
+    /** Sets the number of EOS tokens to append. */
+    fun numEos(numEos: Int): Builder = apply { this.numEos = numEos }
+
+    /** Constructs the LlmGenerationConfig instance with the configured parameters. */
+    fun build(): LlmGenerationConfig =
+        LlmGenerationConfig(echo, maxNewTokens, warming, seqLen, temperature, numBos, numEos)
+  }
+}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java
deleted file mode 100644
index 0c467b13f44..00000000000
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java
+++ /dev/null
@@ -1,823 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.executorch.extension.llm;
-
-import com.facebook.jni.HybridData;
-import com.facebook.jni.annotations.DoNotStrip;
-import java.io.Closeable;
-import java.nio.ByteBuffer;
-import java.util.List;
-import java.util.concurrent.locks.ReentrantLock;
-import org.pytorch.executorch.ExecuTorchRuntime;
-import org.pytorch.executorch.ExecutorchRuntimeException;
-import org.pytorch.executorch.annotations.Experimental;
-
-/**
- * LlmModule is a wrapper around the Executorch LLM. It provides a simple interface to generate text
- * from the model.
- *
- * <p>Warning: These APIs are experimental and subject to change without notice
- */
-@Experimental
-public class LlmModule implements Closeable {
-
-  public static final int MODEL_TYPE_TEXT = 1;
-  public static final int MODEL_TYPE_TEXT_VISION = 2;
-  public static final int MODEL_TYPE_MULTIMODAL = 2;
-
-  private final HybridData mHybridData;
-  private final ReentrantLock mLock = new ReentrantLock();
-  private volatile boolean mDestroyed = false;
-  private static final int DEFAULT_SEQ_LEN = 128;
-  private static final boolean DEFAULT_ECHO = true;
-  private static final float DEFAULT_TEMPERATURE = -1.0f;
-  private static final int DEFAULT_BOS = 0;
-  private static final int DEFAULT_EOS = 0;
-  private static final int DEFAULT_LOAD_MODE = LlmModuleConfig.LOAD_MODE_MMAP;
-
-  @DoNotStrip
-  private static native HybridData initHybrid(
-      int modelType,
-      String modulePath,
-      String tokenizerPath,
-      float temperature,
-      List<String> dataFiles,
-      int numBos,
-      int numEos,
-      int loadMode);
-
-  private LlmModule(
-      int modelType,
-      String modulePath,
-      String tokenizerPath,
-      float temperature,
-      List<String> dataFiles,
-      int numBos,
-      int numEos,
-      int loadMode) {
-    ExecuTorchRuntime.getRuntime();
-    ExecuTorchRuntime.validateFilePath(modulePath, "model path");
-    ExecuTorchRuntime.validateFilePath(tokenizerPath, "tokenizer path");
-
-    mHybridData =
-        initHybrid(
-            modelType, modulePath, tokenizerPath, temperature, dataFiles, numBos, numEos, loadMode);
-  }
-
-  /**
-   * Constructs a LLM Module for a model with given type, model path, tokenizer, temperature, and
-   * dataFiles.
-   */
-  public LlmModule(
-      int modelType,
-      String modulePath,
-      String tokenizerPath,
-      float temperature,
-      List<String> dataFiles,
-      int numBos,
-      int numEos) {
-    this(
-        modelType,
-        modulePath,
-        tokenizerPath,
-        temperature,
-        dataFiles,
-        numBos,
-        numEos,
-        DEFAULT_LOAD_MODE);
-  }
-
-  /**
-   * Constructs a LLM Module for a model with given type, model path, tokenizer, temperature, and
-   * dataFiles.
-   */
-  public LlmModule(
-      int modelType,
-      String modulePath,
-      String tokenizerPath,
-      float temperature,
-      List<String> dataFiles) {
-    this(
-        modelType,
-        modulePath,
-        tokenizerPath,
-        temperature,
-        dataFiles,
-        DEFAULT_BOS,
-        DEFAULT_EOS,
-        DEFAULT_LOAD_MODE);
-  }
-
-  /**
-   * Constructs a LLM Module for a model with given type, model path, tokenizer, temperature, and
-   * data path.
-   */
-  public LlmModule(
-      int modelType,
-      String modulePath,
-      String tokenizerPath,
-      float temperature,
-      String dataPath,
-      int numBos,
-      int numEos) {
-    this(
-        modelType,
-        modulePath,
-        tokenizerPath,
-        temperature,
-        dataPath != null ? List.of(dataPath) : List.of(),
-        numBos,
-        numEos);
-  }
-
-  /**
-   * Constructs a LLM Module for a model with given type, model path, tokenizer, temperature, and
-   * data path.
-   */
-  public LlmModule(
-      int modelType, String modulePath, String tokenizerPath, float temperature, String dataPath) {
-    this(modelType, modulePath, tokenizerPath, temperature, dataPath, DEFAULT_BOS, DEFAULT_EOS);
-  }
-
-  /** Constructs a LLM Module for a model with given model path, tokenizer, temperature. */
-  public LlmModule(String modulePath, String tokenizerPath, float temperature) {
-    this(
-        MODEL_TYPE_TEXT,
-        modulePath,
-        tokenizerPath,
-        temperature,
-        List.of(),
-        DEFAULT_BOS,
-        DEFAULT_EOS);
-  }
-
-  /**
-   * Constructs a LLM Module for a model with given model path, tokenizer, temperature and data
-   * path.
-   */
-  public LlmModule(String modulePath, String tokenizerPath, float temperature, String dataPath) {
-    this(
-        MODEL_TYPE_TEXT,
-        modulePath,
-        tokenizerPath,
-        temperature,
-        List.of(dataPath),
-        DEFAULT_BOS,
-        DEFAULT_EOS);
-  }
-
-  /** Constructs a LLM Module for a model with given path, tokenizer, and temperature. */
-  public LlmModule(int modelType, String modulePath, String tokenizerPath, float temperature) {
-    this(modelType, modulePath, tokenizerPath, temperature, List.of(), DEFAULT_BOS, DEFAULT_EOS);
-  }
-
-  /** Constructs a LLM Module for a model with the given LlmModuleConfig */
-  public LlmModule(LlmModuleConfig config) {
-    this(
-        config.getModelType(),
-        config.getModulePath(),
-        config.getTokenizerPath(),
-        config.getTemperature(),
-        config.getDataPath() != null ? List.of(config.getDataPath()) : List.of(),
-        config.getNumBos(),
-        config.getNumEos(),
-        config.getLoadMode());
-  }
-
-  private void checkNotDestroyed() {
-    if (mDestroyed) throw new IllegalStateException("LlmModule has been destroyed");
-  }
-
-  private void checkNotReentrant() {
-    if (mLock.getHoldCount() > 1) {
-      throw new IllegalStateException("Cannot call LlmModule methods from within a callback");
-    }
-  }
-
-  /**
-   * Releases native resources. Callers must ensure no other methods are in-flight. Call {@link
-   * #stop()} and wait for {@link #generate(String, LlmCallback)} to return before calling this
-   * method.
-   */
-  @Override
-  public void close() {
-    if (mLock.tryLock()) {
-      try {
-        if (mLock.getHoldCount() > 1) {
-          throw new IllegalStateException(
-              "Cannot close module from within a callback during execution");
-        }
-        if (!mDestroyed) {
-          mDestroyed = true;
-          mHybridData.resetNative();
-        }
-      } finally {
-        mLock.unlock();
-      }
-    } else {
-      throw new IllegalStateException("Cannot close module while method is executing");
-    }
-  }
-
-  /**
-   * @deprecated Use {@link #close()} instead.
-   */
-  @Deprecated
-  public void resetNative() {
-    close();
-  }
-
-  /**
-   * Start generating tokens from the module.
-   *
-   * @param prompt Input prompt
-   * @param llmCallback callback object to receive results.
-   */
-  public void generate(String prompt, LlmCallback llmCallback) {
-    generate(
-        prompt,
-        DEFAULT_SEQ_LEN,
-        llmCallback,
-        DEFAULT_ECHO,
-        DEFAULT_TEMPERATURE,
-        DEFAULT_BOS,
-        DEFAULT_EOS);
-  }
-
-  /**
-   * Start generating tokens from the module.
-   *
-   * @param prompt Input prompt
-   * @param seqLen sequence length
-   * @param llmCallback callback object to receive results.
-   */
-  public void generate(String prompt, int seqLen, LlmCallback llmCallback) {
-    generate(
-        null,
-        0,
-        0,
-        0,
-        prompt,
-        seqLen,
-        llmCallback,
-        DEFAULT_ECHO,
-        DEFAULT_TEMPERATURE,
-        DEFAULT_BOS,
-        DEFAULT_EOS);
-  }
-
-  /**
-   * Start generating tokens from the module.
-   *
-   * @param prompt Input prompt
-   * @param llmCallback callback object to receive results
-   * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
-   */
-  public void generate(String prompt, LlmCallback llmCallback, boolean echo) {
-    generate(
-        null,
-        0,
-        0,
-        0,
-        prompt,
-        DEFAULT_SEQ_LEN,
-        llmCallback,
-        echo,
-        DEFAULT_TEMPERATURE,
-        DEFAULT_BOS,
-        DEFAULT_EOS);
-  }
-
-  /**
-   * Start generating tokens from the module.
-   *
-   * @param prompt Input prompt
-   * @param seqLen sequence length
-   * @param llmCallback callback object to receive results
-   * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
-   */
-  public void generate(String prompt, int seqLen, LlmCallback llmCallback, boolean echo) {
-    generate(prompt, seqLen, llmCallback, echo, DEFAULT_TEMPERATURE, DEFAULT_BOS, DEFAULT_EOS);
-  }
-
-  /**
-   * Start generating tokens from the module.
-   *
-   * @param prompt Input prompt
-   * @param seqLen sequence length
-   * @param llmCallback callback object to receive results
-   * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
-   * @param temperature temperature for sampling (use negative value to use module default)
-   * @param numBos number of BOS tokens to prepend
-   * @param numEos number of EOS tokens to append
-   */
-  public void generate(
-      String prompt,
-      int seqLen,
-      LlmCallback llmCallback,
-      boolean echo,
-      float temperature,
-      int numBos,
-      int numEos) {
-    mLock.lock();
-    try {
-      checkNotReentrant();
-      checkNotDestroyed();
-      int err = generateNative(prompt, seqLen, llmCallback, echo, temperature, numBos, numEos);
-      if (err != 0) {
-        throw ExecutorchRuntimeException.makeExecutorchException(err, "Failed to generate");
-      }
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  @DoNotStrip
-  private native int generateNative(
-      String prompt,
-      int seqLen,
-      LlmCallback llmCallback,
-      boolean echo,
-      float temperature,
-      int numBos,
-      int numEos);
-
-  /**
-   * Start generating tokens from the module.
-   *
-   * @param prompt Input prompt
-   * @param config the config for generation
-   * @param llmCallback callback object to receive results
-   */
-  public void generate(String prompt, LlmGenerationConfig config, LlmCallback llmCallback) {
-    int seqLen = config.getSeqLen();
-    boolean echo = config.isEcho();
-    float temperature = config.getTemperature();
-    int numBos = config.getNumBos();
-    int numEos = config.getNumEos();
-    generate(null, 0, 0, 0, prompt, seqLen, llmCallback, echo, temperature, numBos, numEos);
-  }
-
-  /**
-   * Start generating tokens from the module.
-   *
-   * @param image Input image as a byte array
-   * @param width Input image width
-   * @param height Input image height
-   * @param channels Input image number of channels
-   * @param prompt Input prompt
-   * @param seqLen sequence length
-   * @param llmCallback callback object to receive results.
-   * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
-   */
-  public void generate(
-      int[] image,
-      int width,
-      int height,
-      int channels,
-      String prompt,
-      int seqLen,
-      LlmCallback llmCallback,
-      boolean echo) {
-    generate(
-        image,
-        width,
-        height,
-        channels,
-        prompt,
-        seqLen,
-        llmCallback,
-        echo,
-        DEFAULT_TEMPERATURE,
-        DEFAULT_BOS,
-        DEFAULT_EOS);
-  }
-
-  /**
-   * Start generating tokens from the module.
-   *
-   * @param image Input image as a byte array
-   * @param width Input image width
-   * @param height Input image height
-   * @param channels Input image number of channels
-   * @param prompt Input prompt
-   * @param seqLen sequence length
-   * @param llmCallback callback object to receive results.
-   * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
-   * @param temperature temperature for sampling (use negative value to use module default)
-   */
-  public void generate(
-      int[] image,
-      int width,
-      int height,
-      int channels,
-      String prompt,
-      int seqLen,
-      LlmCallback llmCallback,
-      boolean echo,
-      float temperature) {
-    generate(
-        image,
-        width,
-        height,
-        channels,
-        prompt,
-        seqLen,
-        llmCallback,
-        echo,
-        temperature,
-        DEFAULT_BOS,
-        DEFAULT_EOS);
-  }
-
-  /**
-   * Start generating tokens from the module.
-   *
-   * @param image Input image as a byte array
-   * @param width Input image width
-   * @param height Input image height
-   * @param channels Input image number of channels
-   * @param prompt Input prompt
-   * @param seqLen sequence length
-   * @param llmCallback callback object to receive results.
-   * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
-   * @param temperature temperature for sampling (use negative value to use module default)
-   * @param numBos number of BOS tokens to prepend
-   * @param numEos number of EOS tokens to append
-   */
-  public void generate(
-      int[] image,
-      int width,
-      int height,
-      int channels,
-      String prompt,
-      int seqLen,
-      LlmCallback llmCallback,
-      boolean echo,
-      float temperature,
-      int numBos,
-      int numEos) {
-    mLock.lock();
-    try {
-      checkNotReentrant();
-      checkNotDestroyed();
-      if (image != null) {
-        int nativeResult = prefillImagesInput(image, width, height, channels);
-        if (nativeResult != 0) {
-          throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed");
-        }
-      }
-      int err = generateNative(prompt, seqLen, llmCallback, echo, temperature, numBos, numEos);
-      if (err != 0) {
-        throw ExecutorchRuntimeException.makeExecutorchException(err, "Failed to generate");
-      }
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  /**
-   * Prefill the KV cache with the given image input.
-   *
-   * @param image Input image as a byte array
-   * @param width Input image width
-   * @param height Input image height
-   * @param channels Input image number of channels
-   * @throws ExecutorchRuntimeException if the prefill failed
-   */
-  @Experimental
-  public void prefillImages(int[] image, int width, int height, int channels) {
-    mLock.lock();
-    try {
-      checkNotReentrant();
-      checkNotDestroyed();
-      int nativeResult = prefillImagesInput(image, width, height, channels);
-      if (nativeResult != 0) {
-        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed");
-      }
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  /**
-   * Prefill a multimodal Module with the given image input via a direct ByteBuffer. The buffer data
-   * is accessed directly without JNI array copies, unlike {@link #prefillImages(int[], int, int,
-   * int)}. The ByteBuffer must contain raw uint8 pixel data in CHW format with at least channels *
-   * height * width bytes remaining. Only the first channels * height * width bytes from the
-   * buffer's current position are read; the position of the original ByteBuffer is not modified.
-   *
-   * @param image Input image as a direct ByteBuffer containing uint8 pixel data
-   * @param width Input image width
-   * @param height Input image height
-   * @param channels Input image number of channels
-   * @throws IllegalArgumentException if the ByteBuffer is not direct or has insufficient remaining
-   *     bytes
-   * @throws ExecutorchRuntimeException if the prefill failed
-   */
-  @Experimental
-  public void prefillImages(ByteBuffer image, int width, int height, int channels) {
-    mLock.lock();
-    try {
-      checkNotReentrant();
-      checkNotDestroyed();
-      if (!image.isDirect()) {
-        throw new IllegalArgumentException("Input ByteBuffer must be direct.");
-      }
-      long expectedBytes;
-      try {
-        long pixels = Math.multiplyExact((long) width, (long) height);
-        expectedBytes = Math.multiplyExact(pixels, (long) channels);
-      } catch (ArithmeticException ex) {
-        throw new IllegalArgumentException(
-            "width*height*channels is too large and overflows the allowed range.", ex);
-      }
-      if (width <= 0
-          || height <= 0
-          || channels <= 0
-          || expectedBytes > Integer.MAX_VALUE
-          || image.remaining() < expectedBytes) {
-        throw new IllegalArgumentException(
-            "ByteBuffer remaining ("
-                + image.remaining()
-                + ") must be at least width*height*channels ("
-                + expectedBytes
-                + ").");
-      }
-      // slice() so that getDirectBufferAddress on the native side returns a pointer
-      // starting at the current position, not the base address.
-      int nativeResult = prefillImagesInputBuffer(image.slice(), width, height, channels);
-      if (nativeResult != 0) {
-        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed");
-      }
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  /**
-   * Prefill a multimodal Module with the given normalized image input via a direct ByteBuffer. The
-   * buffer data is accessed directly without JNI array copies, unlike {@link
-   * #prefillImages(float[], int, int, int)}. The ByteBuffer must contain normalized float pixel
-   * data in CHW format with at least channels * height * width * 4 bytes remaining. Only the first
-   * channels * height * width floats from the buffer's current position are consumed. The buffer
-   * must use the platform's native byte order (set via {@code
-   * buffer.order(ByteOrder.nativeOrder())}).
-   *
-   * @param image Input normalized image as a direct ByteBuffer containing float pixel data in
-   *     native byte order
-   * @param width Input image width
-   * @param height Input image height
-   * @param channels Input image number of channels
-   * @throws IllegalArgumentException if the ByteBuffer is not direct, has insufficient remaining
-   *     bytes, is not float-aligned, or does not use native byte order
-   * @throws ExecutorchRuntimeException if the prefill failed
-   */
-  @Experimental
-  public void prefillNormalizedImage(ByteBuffer image, int width, int height, int channels) {
-    mLock.lock();
-    try {
-      checkNotReentrant();
-      checkNotDestroyed();
-      if (!image.isDirect()) {
-        throw new IllegalArgumentException("Input ByteBuffer must be direct.");
-      }
-      if (image.order() != java.nio.ByteOrder.nativeOrder()) {
-        throw new IllegalArgumentException(
-            "Input ByteBuffer must use native byte order (ByteOrder.nativeOrder()).");
-      }
-      if (image.position() % Float.BYTES != 0) {
-        throw new IllegalArgumentException(
-            "Input ByteBuffer position (" + image.position() + ") must be 4-byte aligned.");
-      }
-      final long expectedBytes;
-      try {
-        int wh = Math.multiplyExact(width, height);
-        long whc = Math.multiplyExact((long) wh, (long) channels);
-        long totalBytes = Math.multiplyExact(whc, (long) Float.BYTES);
-        if (totalBytes > Integer.MAX_VALUE) {
-          throw new IllegalArgumentException(
-              "ByteBuffer size (width*height*channels*4) exceeds Integer.MAX_VALUE bytes: "
-                  + totalBytes);
-        }
-        expectedBytes = totalBytes;
-      } catch (ArithmeticException e) {
-        throw new IllegalArgumentException(
-            "Overflow while computing width*height*channels*4 for ByteBuffer size.", e);
-      }
-      if (width <= 0 || height <= 0 || channels <= 0 || image.remaining() < expectedBytes) {
-        throw new IllegalArgumentException(
-            "ByteBuffer remaining ("
-                + image.remaining()
-                + ") must be at least width*height*channels*4 ("
-                + expectedBytes
-                + ").");
-      }
-      if (image.remaining() % Float.BYTES != 0) {
-        throw new IllegalArgumentException(
-            "ByteBuffer remaining ("
-                + image.remaining()
-                + ") must be a multiple of 4 (float size).");
-      }
-      // slice() so that getDirectBufferAddress on the native side returns a pointer
-      // starting at the current position, not the base address.
-      int nativeResult = prefillNormalizedImagesInputBuffer(image.slice(), width, height, channels);
-      if (nativeResult != 0) {
-        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed");
-      }
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  private native int prefillImagesInput(int[] image, int width, int height, int channels);
-
-  private native int prefillImagesInputBuffer(
-      ByteBuffer image, int width, int height, int channels);
-
-  private native int prefillNormalizedImagesInputBuffer(
-      ByteBuffer image, int width, int height, int channels);
-
-  /**
-   * Prefill the KV cache with the given normalized image input.
-   *
-   * @param image Input normalized image as a float array
-   * @param width Input image width
-   * @param height Input image height
-   * @param channels Input image number of channels
-   * @throws ExecutorchRuntimeException if the prefill failed
-   */
-  @Experimental
-  public void prefillImages(float[] image, int width, int height, int channels) {
-    mLock.lock();
-    try {
-      checkNotReentrant();
-      checkNotDestroyed();
-      int nativeResult = prefillNormalizedImagesInput(image, width, height, channels);
-      if (nativeResult != 0) {
-        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed");
-      }
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  private native int prefillNormalizedImagesInput(
-      float[] image, int width, int height, int channels);
-
-  /**
-   * Prefill the KV cache with the given preprocessed audio input.
-   *
-   * @param audio Input preprocessed audio as a byte array
-   * @param batch_size Input batch size
-   * @param n_bins Input number of bins
-   * @param n_frames Input number of frames
-   * @throws ExecutorchRuntimeException if the prefill failed
-   */
-  @Experimental
-  public void prefillAudio(byte[] audio, int batch_size, int n_bins, int n_frames) {
-    mLock.lock();
-    try {
-      checkNotReentrant();
-      checkNotDestroyed();
-      int nativeResult = prefillAudioInput(audio, batch_size, n_bins, n_frames);
-      if (nativeResult != 0) {
-        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed");
-      }
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  private native int prefillAudioInput(byte[] audio, int batch_size, int n_bins, int n_frames);
-
-  /**
-   * Prefill the KV cache with the given preprocessed audio input.
-   *
-   * @param audio Input preprocessed audio as a float array
-   * @param batch_size Input batch size
-   * @param n_bins Input number of bins
-   * @param n_frames Input number of frames
-   * @throws ExecutorchRuntimeException if the prefill failed
-   */
-  @Experimental
-  public void prefillAudio(float[] audio, int batch_size, int n_bins, int n_frames) {
-    mLock.lock();
-    try {
-      checkNotReentrant();
-      checkNotDestroyed();
-      int nativeResult = prefillAudioInputFloat(audio, batch_size, n_bins, n_frames);
-      if (nativeResult != 0) {
-        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed");
-      }
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  private native int prefillAudioInputFloat(
-      float[] audio, int batch_size, int n_bins, int n_frames);
-
-  /**
-   * Prefill the KV cache with the given raw audio input.
-   *
-   * @param audio Input raw audio as a byte array
-   * @param batch_size Input batch size
-   * @param n_channels Input number of channels
-   * @param n_samples Input number of samples
-   * @throws ExecutorchRuntimeException if the prefill failed
-   */
-  @Experimental
-  public void prefillRawAudio(byte[] audio, int batch_size, int n_channels, int n_samples) {
-    mLock.lock();
-    try {
-      checkNotReentrant();
-      checkNotDestroyed();
-      int nativeResult = prefillRawAudioInput(audio, batch_size, n_channels, n_samples);
-      if (nativeResult != 0) {
-        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed");
-      }
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  private native int prefillRawAudioInput(
-      byte[] audio, int batch_size, int n_channels, int n_samples);
-
-  /**
-   * Prefill the KV cache with the given text prompt.
-   *
-   * @param prompt The text prompt to prefill.
-   * @throws ExecutorchRuntimeException if the prefill failed
-   */
-  @Experimental
-  public void prefillPrompt(String prompt) {
-    mLock.lock();
-    try {
-      checkNotReentrant();
-      checkNotDestroyed();
-      int nativeResult = prefillTextInput(prompt);
-      if (nativeResult != 0) {
-        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed");
-      }
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  // returns status
-  private native int prefillTextInput(String prompt);
-
-  /**
-   * Reset the context of the LLM. This will clear the KV cache and reset the state of the LLM.
-   *
-   * <p>The startPos will be reset to 0.
-   */
-  public void resetContext() {
-    mLock.lock();
-    try {
-      checkNotReentrant();
-      checkNotDestroyed();
-      resetContextNative();
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  @DoNotStrip
-  private native void resetContextNative();
-
-  /** Stop current generate() before it finishes. */
-  public void stop() {
-    if (mDestroyed) return;
-    stopNative();
-  }
-
-  @DoNotStrip
-  private native void stopNative();
-
-  /** Force loading the module. Otherwise the model is loaded during first generate(). */
-  public void load() {
-    mLock.lock();
-    try {
-      checkNotReentrant();
-      checkNotDestroyed();
-      int err = loadNative();
-      if (err != 0) {
-        throw ExecutorchRuntimeException.makeExecutorchException(err, "Failed to load model");
-      }
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  @DoNotStrip
-  private native int loadNative();
-}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.kt b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.kt
new file mode 100644
index 00000000000..f95e796b83b
--- /dev/null
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.kt
@@ -0,0 +1,898 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.executorch.extension.llm
+
+import com.facebook.jni.HybridData
+import com.facebook.jni.annotations.DoNotStrip
+import java.io.Closeable
+import java.nio.ByteBuffer
+import java.nio.ByteOrder
+import java.util.concurrent.locks.ReentrantLock
+import org.pytorch.executorch.ExecuTorchRuntime
+import org.pytorch.executorch.ExecutorchRuntimeException
+import org.pytorch.executorch.annotations.Experimental
+
+/**
+ * LlmModule is a wrapper around the Executorch LLM. It provides a simple interface to generate text
+ * from the model.
+ *
+ * Warning: These APIs are experimental and subject to change without notice
+ */
+@Experimental
+class LlmModule
+private constructor(
+    modelType: Int,
+    modulePath: String,
+    tokenizerPath: String,
+    temperature: Float,
+    dataFiles: List<String>,
+    numBos: Int,
+    numEos: Int,
+    loadMode: Int,
+) : Closeable {
+
+  private val mHybridData: HybridData
+  private val mLock = ReentrantLock()
+  @Volatile private var mDestroyed = false
+
+  init {
+    ExecuTorchRuntime.getRuntime()
+    ExecuTorchRuntime.validateFilePath(modulePath, "model path")
+    ExecuTorchRuntime.validateFilePath(tokenizerPath, "tokenizer path")
+    mHybridData =
+        initHybrid(
+            modelType,
+            modulePath,
+            tokenizerPath,
+            temperature,
+            dataFiles,
+            numBos,
+            numEos,
+            loadMode,
+        )
+  }
+
+  /**
+   * Constructs a LLM Module for a model with given type, model path, tokenizer, temperature, and
+   * dataFiles.
+   */
+  constructor(
+      modelType: Int,
+      modulePath: String,
+      tokenizerPath: String,
+      temperature: Float,
+      dataFiles: List<String>,
+      numBos: Int,
+      numEos: Int,
+  ) : this(
+      modelType,
+      modulePath,
+      tokenizerPath,
+      temperature,
+      dataFiles,
+      numBos,
+      numEos,
+      DEFAULT_LOAD_MODE,
+  )
+
+  /**
+   * Constructs a LLM Module for a model with given type, model path, tokenizer, temperature, and
+   * dataFiles.
+   */
+  constructor(
+      modelType: Int,
+      modulePath: String,
+      tokenizerPath: String,
+      temperature: Float,
+      dataFiles: List<String>,
+  ) : this(
+      modelType,
+      modulePath,
+      tokenizerPath,
+      temperature,
+      dataFiles,
+      DEFAULT_BOS,
+      DEFAULT_EOS,
+      DEFAULT_LOAD_MODE,
+  )
+
+  /**
+   * Constructs a LLM Module for a model with given type, model path, tokenizer, temperature, and
+   * data path.
+   */
+  constructor(
+      modelType: Int,
+      modulePath: String,
+      tokenizerPath: String,
+      temperature: Float,
+      dataPath: String?,
+      numBos: Int,
+      numEos: Int,
+  ) : this(
+      modelType,
+      modulePath,
+      tokenizerPath,
+      temperature,
+      listOfNotNull(dataPath),
+      numBos,
+      numEos,
+  )
+
+  /**
+   * Constructs a LLM Module for a model with given type, model path, tokenizer, temperature, and
+   * data path.
+   */
+  constructor(
+      modelType: Int,
+      modulePath: String,
+      tokenizerPath: String,
+      temperature: Float,
+      dataPath: String?,
+  ) : this(
+      modelType,
+      modulePath,
+      tokenizerPath,
+      temperature,
+      dataPath,
+      DEFAULT_BOS,
+      DEFAULT_EOS,
+  )
+
+  /** Constructs a LLM Module for a model with given model path, tokenizer, temperature. */
+  constructor(
+      modulePath: String,
+      tokenizerPath: String,
+      temperature: Float,
+  ) : this(
+      MODEL_TYPE_TEXT,
+      modulePath,
+      tokenizerPath,
+      temperature,
+      emptyList(),
+      DEFAULT_BOS,
+      DEFAULT_EOS,
+  )
+
+  /**
+   * Constructs a LLM Module for a model with given model path, tokenizer, temperature and data
+   * path.
+   */
+  constructor(
+      modulePath: String,
+      tokenizerPath: String,
+      temperature: Float,
+      dataPath: String,
+  ) : this(
+      MODEL_TYPE_TEXT,
+      modulePath,
+      tokenizerPath,
+      temperature,
+      listOf(dataPath),
+      DEFAULT_BOS,
+      DEFAULT_EOS,
+  )
+
+  /** Constructs a LLM Module for a model with given path, tokenizer, and temperature. */
+  constructor(
+      modelType: Int,
+      modulePath: String,
+      tokenizerPath: String,
+      temperature: Float,
+  ) : this(
+      modelType,
+      modulePath,
+      tokenizerPath,
+      temperature,
+      emptyList(),
+      DEFAULT_BOS,
+      DEFAULT_EOS,
+  )
+
+  /** Constructs a LLM Module for a model with the given LlmModuleConfig */
+  constructor(
+      config: LlmModuleConfig
+  ) : this(
+      config.modelType,
+      config.modulePath,
+      config.tokenizerPath,
+      config.temperature,
+      listOfNotNull(config.dataPath),
+      config.numBos,
+      config.numEos,
+      config.loadMode,
+  )
+
+  private fun checkNotDestroyed() {
+    if (mDestroyed) throw IllegalStateException("LlmModule has been destroyed")
+  }
+
+  private fun checkNotReentrant() {
+    if (mLock.holdCount > 1) {
+      throw IllegalStateException("Cannot call LlmModule methods from within a callback")
+    }
+  }
+
+  /**
+   * Releases native resources. Callers must ensure no other methods are in-flight. Call [stop] and
+   * wait for [generate] to return before calling this method.
+   */
+  override fun close() {
+    if (mLock.tryLock()) {
+      try {
+        if (mLock.holdCount > 1) {
+          throw IllegalStateException("Cannot close module from within a callback during execution")
+        }
+        if (!mDestroyed) {
+          mDestroyed = true
+          mHybridData.resetNative()
+        }
+      } finally {
+        mLock.unlock()
+      }
+    } else {
+      throw IllegalStateException("Cannot close module while method is executing")
+    }
+  }
+
+  /** @deprecated Use [close] instead. */
+  @Deprecated("Use close() instead", replaceWith = ReplaceWith("close()"))
+  fun resetNative() {
+    close()
+  }
+
+  // --- generate overloads ---
+
+  /**
+   * Start generating tokens from the module.
+   *
+   * @param prompt Input prompt
+   * @param llmCallback callback object to receive results.
+   */
+  fun generate(prompt: String, llmCallback: LlmCallback) {
+    generate(
+        prompt,
+        DEFAULT_SEQ_LEN,
+        llmCallback,
+        DEFAULT_ECHO,
+        DEFAULT_TEMPERATURE,
+        DEFAULT_BOS,
+        DEFAULT_EOS,
+    )
+  }
+
+  /**
+   * Start generating tokens from the module.
+   *
+   * @param prompt Input prompt
+   * @param seqLen sequence length
+   * @param llmCallback callback object to receive results.
+   */
+  fun generate(prompt: String, seqLen: Int, llmCallback: LlmCallback) {
+    generate(
+        null,
+        0,
+        0,
+        0,
+        prompt,
+        seqLen,
+        llmCallback,
+        DEFAULT_ECHO,
+        DEFAULT_TEMPERATURE,
+        DEFAULT_BOS,
+        DEFAULT_EOS,
+    )
+  }
+
+  /**
+   * Start generating tokens from the module.
+   *
+   * @param prompt Input prompt
+   * @param llmCallback callback object to receive results
+   * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
+   */
+  fun generate(prompt: String, llmCallback: LlmCallback, echo: Boolean) {
+    generate(
+        null,
+        0,
+        0,
+        0,
+        prompt,
+        DEFAULT_SEQ_LEN,
+        llmCallback,
+        echo,
+        DEFAULT_TEMPERATURE,
+        DEFAULT_BOS,
+        DEFAULT_EOS,
+    )
+  }
+
+  /**
+   * Start generating tokens from the module.
+   *
+   * @param prompt Input prompt
+   * @param seqLen sequence length
+   * @param llmCallback callback object to receive results
+   * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
+   */
+  fun generate(prompt: String, seqLen: Int, llmCallback: LlmCallback, echo: Boolean) {
+    generate(prompt, seqLen, llmCallback, echo, DEFAULT_TEMPERATURE, DEFAULT_BOS, DEFAULT_EOS)
+  }
+
+  /**
+   * Start generating tokens from the module.
+   *
+   * @param prompt Input prompt
+   * @param seqLen sequence length
+   * @param llmCallback callback object to receive results
+   * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
+   * @param temperature temperature for sampling (use negative value to use module default)
+   * @param numBos number of BOS tokens to prepend
+   * @param numEos number of EOS tokens to append
+   */
+  fun generate(
+      prompt: String,
+      seqLen: Int,
+      llmCallback: LlmCallback,
+      echo: Boolean,
+      temperature: Float,
+      numBos: Int,
+      numEos: Int,
+  ) {
+    mLock.lock()
+    try {
+      checkNotReentrant()
+      checkNotDestroyed()
+      val err = generateNative(prompt, seqLen, llmCallback, echo, temperature, numBos, numEos)
+      if (err != 0) {
+        throw ExecutorchRuntimeException.makeExecutorchException(err, "Failed to generate")
+      }
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  @DoNotStrip
+  private external fun generateNative(
+      prompt: String,
+      seqLen: Int,
+      llmCallback: LlmCallback,
+      echo: Boolean,
+      temperature: Float,
+      numBos: Int,
+      numEos: Int,
+  ): Int
+
+  /**
+   * Start generating tokens from the module.
+   *
+   * @param prompt Input prompt
+   * @param config the config for generation
+   * @param llmCallback callback object to receive results
+   */
+  fun generate(prompt: String, config: LlmGenerationConfig, llmCallback: LlmCallback) {
+    generate(
+        null,
+        0,
+        0,
+        0,
+        prompt,
+        config.seqLen,
+        llmCallback,
+        config.echo,
+        config.temperature,
+        config.numBos,
+        config.numEos,
+    )
+  }
+
+  /**
+   * Start generating tokens from the module.
+   *
+   * @param image Input image as a byte array
+   * @param width Input image width
+   * @param height Input image height
+   * @param channels Input image number of channels
+   * @param prompt Input prompt
+   * @param seqLen sequence length
+   * @param llmCallback callback object to receive results.
+   * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
+   */
+  fun generate(
+      image: IntArray?,
+      width: Int,
+      height: Int,
+      channels: Int,
+      prompt: String,
+      seqLen: Int,
+      llmCallback: LlmCallback,
+      echo: Boolean,
+  ) {
+    generate(
+        image,
+        width,
+        height,
+        channels,
+        prompt,
+        seqLen,
+        llmCallback,
+        echo,
+        DEFAULT_TEMPERATURE,
+        DEFAULT_BOS,
+        DEFAULT_EOS,
+    )
+  }
+
+  /**
+   * Start generating tokens from the module.
+   *
+   * @param image Input image as a byte array
+   * @param width Input image width
+   * @param height Input image height
+   * @param channels Input image number of channels
+   * @param prompt Input prompt
+   * @param seqLen sequence length
+   * @param llmCallback callback object to receive results.
+   * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
+   * @param temperature temperature for sampling (use negative value to use module default)
+   */
+  fun generate(
+      image: IntArray?,
+      width: Int,
+      height: Int,
+      channels: Int,
+      prompt: String,
+      seqLen: Int,
+      llmCallback: LlmCallback,
+      echo: Boolean,
+      temperature: Float,
+  ) {
+    generate(
+        image,
+        width,
+        height,
+        channels,
+        prompt,
+        seqLen,
+        llmCallback,
+        echo,
+        temperature,
+        DEFAULT_BOS,
+        DEFAULT_EOS,
+    )
+  }
+
+  /**
+   * Start generating tokens from the module.
+   *
+   * @param image Input image as a byte array
+   * @param width Input image width
+   * @param height Input image height
+   * @param channels Input image number of channels
+   * @param prompt Input prompt
+   * @param seqLen sequence length
+   * @param llmCallback callback object to receive results.
+   * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
+   * @param temperature temperature for sampling (use negative value to use module default)
+   * @param numBos number of BOS tokens to prepend
+   * @param numEos number of EOS tokens to append
+   */
+  fun generate(
+      image: IntArray?,
+      width: Int,
+      height: Int,
+      channels: Int,
+      prompt: String,
+      seqLen: Int,
+      llmCallback: LlmCallback,
+      echo: Boolean,
+      temperature: Float,
+      numBos: Int,
+      numEos: Int,
+  ) {
+    mLock.lock()
+    try {
+      checkNotReentrant()
+      checkNotDestroyed()
+      if (image != null) {
+        val nativeResult = prefillImagesInput(image, width, height, channels)
+        if (nativeResult != 0) {
+          throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed")
+        }
+      }
+      val err = generateNative(prompt, seqLen, llmCallback, echo, temperature, numBos, numEos)
+      if (err != 0) {
+        throw ExecutorchRuntimeException.makeExecutorchException(err, "Failed to generate")
+      }
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  // --- prefill methods ---
+
+  /**
+   * Prefill the KV cache with the given image input.
+   *
+   * @param image Input image as a byte array
+   * @param width Input image width
+   * @param height Input image height
+   * @param channels Input image number of channels
+   * @throws ExecutorchRuntimeException if the prefill failed
+   */
+  @Experimental
+  fun prefillImages(image: IntArray, width: Int, height: Int, channels: Int) {
+    mLock.lock()
+    try {
+      checkNotReentrant()
+      checkNotDestroyed()
+      val nativeResult = prefillImagesInput(image, width, height, channels)
+      if (nativeResult != 0) {
+        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed")
+      }
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  /**
+   * Prefill a multimodal Module with the given image input via a direct ByteBuffer. The buffer data
+   * is accessed directly without JNI array copies, unlike [prefillImages]. The ByteBuffer must
+   * contain raw uint8 pixel data in CHW format with at least channels * height * width bytes
+   * remaining. Only the first channels * height * width bytes from the buffer's current position
+   * are read; the position of the original ByteBuffer is not modified.
+   *
+   * @param image Input image as a direct ByteBuffer containing uint8 pixel data
+   * @param width Input image width
+   * @param height Input image height
+   * @param channels Input image number of channels
+   * @throws IllegalArgumentException if the ByteBuffer is not direct or has insufficient remaining
+   *   bytes
+   * @throws ExecutorchRuntimeException if the prefill failed
+   */
+  @Experimental
+  fun prefillImages(image: ByteBuffer, width: Int, height: Int, channels: Int) {
+    mLock.lock()
+    try {
+      checkNotReentrant()
+      checkNotDestroyed()
+      require(image.isDirect) { "Input ByteBuffer must be direct." }
+      val expectedBytes: Long
+      try {
+        val pixels = Math.multiplyExact(width.toLong(), height.toLong())
+        expectedBytes = Math.multiplyExact(pixels, channels.toLong())
+      } catch (ex: ArithmeticException) {
+        throw IllegalArgumentException(
+            "width*height*channels is too large and overflows the allowed range.",
+            ex,
+        )
+      }
+      require(
+          width > 0 &&
+              height > 0 &&
+              channels > 0 &&
+              expectedBytes <= Int.MAX_VALUE.toLong() &&
+              image.remaining().toLong() >= expectedBytes
+      ) {
+        "ByteBuffer remaining (${image.remaining()}) must be at least width*height*channels ($expectedBytes)."
+      }
+      // slice() so that getDirectBufferAddress on the native side returns a pointer
+      // starting at the current position, not the base address.
+      val nativeResult = prefillImagesInputBuffer(image.slice(), width, height, channels)
+      if (nativeResult != 0) {
+        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed")
+      }
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  /**
+   * Prefill a multimodal Module with the given normalized image input via a direct ByteBuffer. The
+   * buffer data is accessed directly without JNI array copies, unlike [prefillImages]. The
+   * ByteBuffer must contain normalized float pixel data in CHW format with at least channels *
+   * height * width * 4 bytes remaining. Only the first channels * height * width floats from the
+   * buffer's current position are consumed. The buffer must use the platform's native byte order
+   * (set via `buffer.order(ByteOrder.nativeOrder())`).
+   *
+   * @param image Input normalized image as a direct ByteBuffer containing float pixel data in
+   *   native byte order
+   * @param width Input image width
+   * @param height Input image height
+   * @param channels Input image number of channels
+   * @throws IllegalArgumentException if the ByteBuffer is not direct, has insufficient remaining
+   *   bytes, is not float-aligned, or does not use native byte order
+   * @throws ExecutorchRuntimeException if the prefill failed
+   */
+  @Experimental
+  fun prefillNormalizedImage(image: ByteBuffer, width: Int, height: Int, channels: Int) {
+    mLock.lock()
+    try {
+      checkNotReentrant()
+      checkNotDestroyed()
+      require(image.isDirect) { "Input ByteBuffer must be direct." }
+      require(image.order() == ByteOrder.nativeOrder()) {
+        "Input ByteBuffer must use native byte order (ByteOrder.nativeOrder())."
+      }
+      require(image.position() % Float.SIZE_BYTES == 0) {
+        "Input ByteBuffer position (${image.position()}) must be 4-byte aligned."
+      }
+      val expectedBytes: Long
+      try {
+        val wh = Math.multiplyExact(width, height)
+        val whc = Math.multiplyExact(wh.toLong(), channels.toLong())
+        val totalBytes = Math.multiplyExact(whc, Float.SIZE_BYTES.toLong())
+        if (totalBytes > Int.MAX_VALUE.toLong()) {
+          throw IllegalArgumentException(
+              "ByteBuffer size (width*height*channels*4) exceeds Integer.MAX_VALUE bytes: $totalBytes",
+          )
+        }
+        expectedBytes = totalBytes
+      } catch (e: ArithmeticException) {
+        throw IllegalArgumentException(
+            "Overflow while computing width*height*channels*4 for ByteBuffer size.",
+            e,
+        )
+      }
+      require(
+          width > 0 && height > 0 && channels > 0 && image.remaining().toLong() >= expectedBytes
+      ) {
+        "ByteBuffer remaining (${image.remaining()}) must be at least width*height*channels*4 ($expectedBytes)."
+      }
+      require(image.remaining() % Float.SIZE_BYTES == 0) {
+        "ByteBuffer remaining (${image.remaining()}) must be a multiple of 4 (float size)."
+      }
+      // slice() so that getDirectBufferAddress on the native side returns a pointer
+      // starting at the current position, not the base address.
+      val nativeResult = prefillNormalizedImagesInputBuffer(image.slice(), width, height, channels)
+      if (nativeResult != 0) {
+        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed")
+      }
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  private external fun prefillImagesInput(
+      image: IntArray,
+      width: Int,
+      height: Int,
+      channels: Int,
+  ): Int
+
+  private external fun prefillImagesInputBuffer(
+      image: ByteBuffer,
+      width: Int,
+      height: Int,
+      channels: Int,
+  ): Int
+
+  private external fun prefillNormalizedImagesInputBuffer(
+      image: ByteBuffer,
+      width: Int,
+      height: Int,
+      channels: Int,
+  ): Int
+
+  /**
+   * Prefill the KV cache with the given normalized image input.
+   *
+   * @param image Input normalized image as a float array
+   * @param width Input image width
+   * @param height Input image height
+   * @param channels Input image number of channels
+   * @throws ExecutorchRuntimeException if the prefill failed
+   */
+  @Experimental
+  fun prefillImages(image: FloatArray, width: Int, height: Int, channels: Int) {
+    mLock.lock()
+    try {
+      checkNotReentrant()
+      checkNotDestroyed()
+      val nativeResult = prefillNormalizedImagesInput(image, width, height, channels)
+      if (nativeResult != 0) {
+        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed")
+      }
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  private external fun prefillNormalizedImagesInput(
+      image: FloatArray,
+      width: Int,
+      height: Int,
+      channels: Int,
+  ): Int
+
+  /**
+   * Prefill the KV cache with the given preprocessed audio input.
+   *
+   * @param audio Input preprocessed audio as a byte array
+   * @param batchSize Input batch size
+   * @param nBins Input number of bins
+   * @param nFrames Input number of frames
+   * @throws ExecutorchRuntimeException if the prefill failed
+   */
+  @Experimental
+  fun prefillAudio(audio: ByteArray, batchSize: Int, nBins: Int, nFrames: Int) {
+    mLock.lock()
+    try {
+      checkNotReentrant()
+      checkNotDestroyed()
+      val nativeResult = prefillAudioInput(audio, batchSize, nBins, nFrames)
+      if (nativeResult != 0) {
+        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed")
+      }
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  private external fun prefillAudioInput(
+      audio: ByteArray,
+      batchSize: Int,
+      nBins: Int,
+      nFrames: Int,
+  ): Int
+
+  /**
+   * Prefill the KV cache with the given preprocessed audio input.
+   *
+   * @param audio Input preprocessed audio as a float array
+   * @param batchSize Input batch size
+   * @param nBins Input number of bins
+   * @param nFrames Input number of frames
+   * @throws ExecutorchRuntimeException if the prefill failed
+   */
+  @Experimental
+  fun prefillAudio(audio: FloatArray, batchSize: Int, nBins: Int, nFrames: Int) {
+    mLock.lock()
+    try {
+      checkNotReentrant()
+      checkNotDestroyed()
+      val nativeResult = prefillAudioInputFloat(audio, batchSize, nBins, nFrames)
+      if (nativeResult != 0) {
+        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed")
+      }
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  private external fun prefillAudioInputFloat(
+      audio: FloatArray,
+      batchSize: Int,
+      nBins: Int,
+      nFrames: Int,
+  ): Int
+
+  /**
+   * Prefill the KV cache with the given raw audio input.
+   *
+   * @param audio Input raw audio as a byte array
+   * @param batchSize Input batch size
+   * @param nChannels Input number of channels
+   * @param nSamples Input number of samples
+   * @throws ExecutorchRuntimeException if the prefill failed
+   */
+  @Experimental
+  fun prefillRawAudio(audio: ByteArray, batchSize: Int, nChannels: Int, nSamples: Int) {
+    mLock.lock()
+    try {
+      checkNotReentrant()
+      checkNotDestroyed()
+      val nativeResult = prefillRawAudioInput(audio, batchSize, nChannels, nSamples)
+      if (nativeResult != 0) {
+        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed")
+      }
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  private external fun prefillRawAudioInput(
+      audio: ByteArray,
+      batchSize: Int,
+      nChannels: Int,
+      nSamples: Int,
+  ): Int
+
+  /**
+   * Prefill the KV cache with the given text prompt.
+   *
+   * @param prompt The text prompt to prefill.
+   * @throws ExecutorchRuntimeException if the prefill failed
+   */
+  @Experimental
+  fun prefillPrompt(prompt: String) {
+    mLock.lock()
+    try {
+      checkNotReentrant()
+      checkNotDestroyed()
+      val nativeResult = prefillTextInput(prompt)
+      if (nativeResult != 0) {
+        throw ExecutorchRuntimeException.makeExecutorchException(nativeResult, "Prefill failed")
+      }
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  // returns status
+  private external fun prefillTextInput(prompt: String): Int
+
+  /**
+   * Reset the context of the LLM. This will clear the KV cache and reset the state of the LLM.
+   *
+   * The startPos will be reset to 0.
+   */
+  fun resetContext() {
+    mLock.lock()
+    try {
+      checkNotReentrant()
+      checkNotDestroyed()
+      resetContextNative()
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  @DoNotStrip private external fun resetContextNative()
+
+  /** Stop current generate() before it finishes. */
+  fun stop() {
+    if (mDestroyed) return
+    stopNative()
+  }
+
+  @DoNotStrip private external fun stopNative()
+
+  /** Force loading the module. Otherwise the model is loaded during first generate(). */
+  fun load() {
+    mLock.lock()
+    try {
+      checkNotReentrant()
+      checkNotDestroyed()
+      val err = loadNative()
+      if (err != 0) {
+        throw ExecutorchRuntimeException.makeExecutorchException(err, "Failed to load model")
+      }
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  @DoNotStrip private external fun loadNative(): Int
+
+  companion object {
+    const val MODEL_TYPE_TEXT = 1
+    const val MODEL_TYPE_TEXT_VISION = 2
+    const val MODEL_TYPE_MULTIMODAL = 2
+
+    private const val DEFAULT_SEQ_LEN = 128
+    private const val DEFAULT_ECHO = true
+    private const val DEFAULT_TEMPERATURE = -1.0f
+    private const val DEFAULT_BOS = 0
+    private const val DEFAULT_EOS = 0
+    private const val DEFAULT_LOAD_MODE = LlmModuleConfig.LOAD_MODE_MMAP
+
+    @DoNotStrip
+    @JvmStatic
+    private external fun initHybrid(
+        modelType: Int,
+        modulePath: String,
+        tokenizerPath: String,
+        temperature: Float,
+        dataFiles: List<String>,
+        numBos: Int,
+        numEos: Int,
+        loadMode: Int,
+    ): HybridData
+  }
+}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.java
deleted file mode 100644
index feb52a2b34b..00000000000
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.java
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.executorch.extension.llm;
-
-/**
- * Configuration class for initializing a LlmModule.
- *
- * <p>{@link #create()} method and the fluent builder pattern.
- */
-public class LlmModuleConfig {
-  private final String modulePath;
-  private final String tokenizerPath;
-  private final float temperature;
-  private final String dataPath;
-  private final int modelType;
-  private final int numBos;
-  private final int numEos;
-  private final int loadMode;
-
-  /** Load entire model file into a buffer (no mmap). */
-  public static final int LOAD_MODE_FILE = 0;
-
-  /** Load model via mmap without mlock (default). Pages faulted in on demand. */
-  public static final int LOAD_MODE_MMAP = 1;
-
-  /** Load model via mmap and pin all pages with mlock. */
-  public static final int LOAD_MODE_MMAP_USE_MLOCK = 2;
-
-  /** Load model via mmap and attempt mlock, ignoring mlock failures. */
-  public static final int LOAD_MODE_MMAP_USE_MLOCK_IGNORE_ERRORS = 3;
-
-  private LlmModuleConfig(Builder builder) {
-    this.modulePath = builder.modulePath;
-    this.tokenizerPath = builder.tokenizerPath;
-    this.temperature = builder.temperature;
-    this.dataPath = builder.dataPath;
-    this.modelType = builder.modelType;
-    this.numBos = builder.numBos;
-    this.numEos = builder.numEos;
-    this.loadMode = builder.loadMode;
-  }
-
-  /** Model type constant for text-only models. */
-  public static final int MODEL_TYPE_TEXT = 1;
-
-  /** Model type constant for text-and-vision multimodal models. */
-  public static final int MODEL_TYPE_TEXT_VISION = 2;
-
-  /** Model type constant for generic multimodal models. */
-  public static final int MODEL_TYPE_MULTIMODAL = 2;
-
-  /**
-   * Creates a new Builder instance for constructing LlmModuleConfig objects.
-   *
-   * @return a new Builder instance with default configuration values
-   */
-  public static Builder create() {
-    return new Builder();
-  }
-
-  // Getters with documentation
-  /**
-   * @return Path to the compiled model module (.pte file)
-   */
-  public String getModulePath() {
-    return modulePath;
-  }
-
-  /**
-   * @return Path to the tokenizer file or directory
-   */
-  public String getTokenizerPath() {
-    return tokenizerPath;
-  }
-
-  /**
-   * @return Temperature value for sampling (higher = more random)
-   */
-  public float getTemperature() {
-    return temperature;
-  }
-
-  /**
-   * @return Optional path to additional data files
-   */
-  public String getDataPath() {
-    return dataPath;
-  }
-
-  /**
-   * @return Type of model (text-only or text-vision)
-   */
-  public int getModelType() {
-    return modelType;
-  }
-
-  /**
-   * @return Number of BOS tokens to prepend
-   */
-  public int getNumBos() {
-    return numBos;
-  }
-
-  /**
-   * @return Number of EOS tokens to append
-   */
-  public int getNumEos() {
-    return numEos;
-  }
-
-  /**
-   * @return Load mode for the model file (one of LOAD_MODE_* constants)
-   */
-  public int getLoadMode() {
-    return loadMode;
-  }
-
-  /**
-   * Builder class for constructing LlmModuleConfig instances with optional parameters.
-   *
-   * <p>The builder provides a fluent interface for configuring model parameters and validates
-   * required fields before construction.
-   */
-  public static class Builder {
-    private String modulePath;
-    private String tokenizerPath;
-    private float temperature = 0.8f;
-    private String dataPath = "";
-    private int modelType = MODEL_TYPE_TEXT;
-    private int numBos = 0;
-    private int numEos = 0;
-    private int loadMode = LOAD_MODE_MMAP;
-
-    Builder() {}
-
-    /**
-     * Sets the path to the module.
-     *
-     * @param modulePath Path to module
-     * @return This builder instance for method chaining
-     */
-    public Builder modulePath(String modulePath) {
-      this.modulePath = modulePath;
-      return this;
-    }
-
-    /**
-     * Sets the path to the tokenizer.
-     *
-     * @param tokenizerPath Path to tokenizer
-     * @return This builder instance for method chaining
-     */
-    public Builder tokenizerPath(String tokenizerPath) {
-      this.tokenizerPath = tokenizerPath;
-      return this;
-    }
-
-    /**
-     * Sets the temperature for sampling generation.
-     *
-     * @param temperature Temperature value (typical range 0.0-1.0)
-     * @return This builder instance for method chaining
-     */
-    public Builder temperature(float temperature) {
-      this.temperature = temperature;
-      return this;
-    }
-
-    /**
-     * Sets the path to optional additional data files.
-     *
-     * @param dataPath Path to supplementary data resources
-     * @return This builder instance for method chaining
-     */
-    public Builder dataPath(String dataPath) {
-      this.dataPath = dataPath;
-      return this;
-    }
-
-    /**
-     * Sets the model type (text-only or multimodal).
-     *
-     * @param modelType One of MODEL_TYPE_TEXT, MODEL_TYPE_TEXT_VISION, MODEL_TYPE_MULTIMODAL
-     * @return This builder instance for method chaining
-     */
-    public Builder modelType(int modelType) {
-      this.modelType = modelType;
-      return this;
-    }
-
-    /**
-     * Sets the number of BOS tokens to prepend.
-     *
-     * @param numBos number of BOS tokens
-     * @return This builder instance for method chaining
-     */
-    public Builder numBos(int numBos) {
-      this.numBos = numBos;
-      return this;
-    }
-
-    /**
-     * Sets the number of EOS tokens to append.
-     *
-     * @param numEos number of EOS tokens
-     * @return This builder instance for method chaining
-     */
-    public Builder numEos(int numEos) {
-      this.numEos = numEos;
-      return this;
-    }
-
-    /**
-     * Sets the load mode for the model file. Defaults to {@link #LOAD_MODE_MMAP} (mmap without
-     * mlock), which avoids pinning model pages in RAM.
-     *
-     * @param loadMode One of LOAD_MODE_FILE, LOAD_MODE_MMAP, LOAD_MODE_MMAP_USE_MLOCK,
-     *     LOAD_MODE_MMAP_USE_MLOCK_IGNORE_ERRORS
-     * @return This builder instance for method chaining
-     * @throws IllegalArgumentException if {@code loadMode} is not one of the supported constants
-     */
-    public Builder loadMode(int loadMode) {
-      if (loadMode != LOAD_MODE_FILE
-          && loadMode != LOAD_MODE_MMAP
-          && loadMode != LOAD_MODE_MMAP_USE_MLOCK
-          && loadMode != LOAD_MODE_MMAP_USE_MLOCK_IGNORE_ERRORS) {
-        throw new IllegalArgumentException("Unknown load mode: " + loadMode);
-      }
-      this.loadMode = loadMode;
-      return this;
-    }
-
-    /**
-     * Constructs the LlmModuleConfig instance with validated parameters.
-     *
-     * @return New LlmModuleConfig instance with configured values
-     * @throws IllegalArgumentException if required fields are missing
-     */
-    public LlmModuleConfig build() {
-      if (modulePath == null || tokenizerPath == null) {
-        throw new IllegalArgumentException("Module path and tokenizer path are required");
-      }
-      return new LlmModuleConfig(this);
-    }
-  }
-}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.kt b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.kt
new file mode 100644
index 00000000000..2d65633bb9f
--- /dev/null
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.kt
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.executorch.extension.llm
+
+/**
+ * Configuration class for initializing a LlmModule.
+ *
+ * Use [create] method and the fluent builder pattern.
+ */
+class LlmModuleConfig
+private constructor(
+    val modulePath: String,
+    val tokenizerPath: String,
+    val temperature: Float,
+    val dataPath: String?,
+    val modelType: Int,
+    val numBos: Int,
+    val numEos: Int,
+    val loadMode: Int,
+) {
+
+  companion object {
+    /** Load entire model file into a buffer (no mmap). */
+    const val LOAD_MODE_FILE = 0
+
+    /** Load model via mmap without mlock (default). Pages faulted in on demand. */
+    const val LOAD_MODE_MMAP = 1
+
+    /** Load model via mmap and pin all pages with mlock. */
+    const val LOAD_MODE_MMAP_USE_MLOCK = 2
+
+    /** Load model via mmap and attempt mlock, ignoring mlock failures. */
+    const val LOAD_MODE_MMAP_USE_MLOCK_IGNORE_ERRORS = 3
+
+    /** Model type constant for text-only models. */
+    const val MODEL_TYPE_TEXT = 1
+
+    /** Model type constant for text-and-vision multimodal models. */
+    const val MODEL_TYPE_TEXT_VISION = 2
+
+    /** Model type constant for generic multimodal models. */
+    const val MODEL_TYPE_MULTIMODAL = 2
+
+    /**
+     * Creates a new Builder instance for constructing LlmModuleConfig objects.
+     *
+     * @return a new Builder instance with default configuration values
+     */
+    @JvmStatic fun create(): Builder = Builder()
+  }
+
+  /**
+   * Builder class for constructing LlmModuleConfig instances with optional parameters.
+   *
+   * The builder provides a fluent interface for configuring model parameters and validates required
+   * fields before construction.
+   */
+  class Builder internal constructor() {
+    private var modulePath: String? = null
+    private var tokenizerPath: String? = null
+    private var temperature: Float = 0.8f
+    private var dataPath: String? = ""
+    private var modelType: Int = MODEL_TYPE_TEXT
+    private var numBos: Int = 0
+    private var numEos: Int = 0
+    private var loadMode: Int = LOAD_MODE_MMAP
+
+    /** Sets the path to the module. */
+    fun modulePath(modulePath: String): Builder = apply { this.modulePath = modulePath }
+
+    /** Sets the path to the tokenizer. */
+    fun tokenizerPath(tokenizerPath: String): Builder = apply { this.tokenizerPath = tokenizerPath }
+
+    /** Sets the temperature for sampling generation. */
+    fun temperature(temperature: Float): Builder = apply { this.temperature = temperature }
+
+    /** Sets the path to optional additional data files. */
+    fun dataPath(dataPath: String?): Builder = apply { this.dataPath = dataPath }
+
+    /** Sets the model type (text-only or multimodal). */
+    fun modelType(modelType: Int): Builder = apply { this.modelType = modelType }
+
+    /** Sets the number of BOS tokens to prepend. */
+    fun numBos(numBos: Int): Builder = apply { this.numBos = numBos }
+
+    /** Sets the number of EOS tokens to append. */
+    fun numEos(numEos: Int): Builder = apply { this.numEos = numEos }
+
+    /**
+     * Sets the load mode for the model file. Defaults to [LOAD_MODE_MMAP] (mmap without mlock),
+     * which avoids pinning model pages in RAM.
+     *
+     * @throws IllegalArgumentException if loadMode is not one of the supported constants
+     */
+    fun loadMode(loadMode: Int): Builder {
+      require(
+          loadMode == LOAD_MODE_FILE ||
+              loadMode == LOAD_MODE_MMAP ||
+              loadMode == LOAD_MODE_MMAP_USE_MLOCK ||
+              loadMode == LOAD_MODE_MMAP_USE_MLOCK_IGNORE_ERRORS
+      ) {
+        "Unknown load mode: $loadMode"
+      }
+      return apply { this.loadMode = loadMode }
+    }
+
+    /**
+     * Constructs the LlmModuleConfig instance with validated parameters.
+     *
+     * @throws IllegalArgumentException if required fields are missing
+     */
+    fun build(): LlmModuleConfig {
+      require(modulePath != null && tokenizerPath != null) {
+        "Module path and tokenizer path are required"
+      }
+      return LlmModuleConfig(
+          modulePath!!,
+          tokenizerPath!!,
+          temperature,
+          dataPath,
+          modelType,
+          numBos,
+          numEos,
+          loadMode,
+      )
+    }
+  }
+}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/package-info.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/package-info.java
deleted file mode 100644
index 86e19d09133..00000000000
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/package-info.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/**
- * ExecuTorch LLM extension for Android.
- *
- * <p>This package provides Java bindings for running large language models (LLMs) on Android using
- * ExecuTorch. It supports text generation, tokenization, and streaming token callbacks.
- *
- * <h2>Quick Start</h2>
- *
- * <pre>{@code
- * import org.pytorch.executorch.extension.llm.LlmModule;
- *
- * // Load a Llama model
- * LlmModule llm = new LlmModule(
- *     "/data/local/tmp/llama.pte",
- *     "/data/local/tmp/tokenizer.bin",
- *     0.8f
- * );
- * llm.load();
- *
- * // Generate text token by token
- * llm.generate("Hello, my name is", 200, new LlmCallback() {
- *     public void onResult(String token) {
- *         System.out.print(token);
- *     }
- *     public void onStats(String stats) {
- *         System.out.println("\nStats: " + stats);
- *     }
- * });
- * }</pre>
- *
- * <h2>Key Classes</h2>
- *
- * <ul>
- *   <li>{@link org.pytorch.executorch.extension.llm.LlmModule} — load and run an LLM
- *   <li>{@link org.pytorch.executorch.extension.llm.LlmModuleConfig} — configure model paths and
- *       settings
- *   <li>{@link org.pytorch.executorch.extension.llm.LlmGenerationConfig} — control generation
- *       (temperature, seq length)
- * </ul>
- *
- * <h2>More Resources</h2>
- *
- * <ul>
- *   <li><a
- *       href="https://github.com/meta-pytorch/executorch-examples/tree/main/llm/android/LlamaDemo">
- *       Llama Android Demo App</a> — full working app with UI
- *   <li><a href="https://pytorch.org/executorch/main/using-executorch-android.html">Using
- *       ExecuTorch on Android</a>
- * </ul>
- */
-package org.pytorch.executorch.extension.llm;

From 6bda6c490ed8c2e2ac02049725b9a454dc92ec07 Mon Sep 17 00:00:00 2001
From: Gregory Comer <gjcomer@meta.com>
Date: Fri, 22 May 2026 18:25:34 -0700
Subject: [PATCH 004/103] Globally serialize XNNPACK execution, add logging
 (#19742)

Differential Revision: D106123930

Pull Request resolved: https://github.com/pytorch/executorch/pull/19742
---
 backends/xnnpack/runtime/XNNPACKBackend.cpp | 53 ++++++++++++++++++++-
 1 file changed, 51 insertions(+), 2 deletions(-)

diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp
index c20fa985f46..2fe1e4d162e 100644
--- a/backends/xnnpack/runtime/XNNPACKBackend.cpp
+++ b/backends/xnnpack/runtime/XNNPACKBackend.cpp
@@ -16,6 +16,7 @@
 #include <executorch/runtime/core/evalue.h>
 #include <executorch/runtime/executor/pte_data_map.h>
 
+#include <cinttypes>
 #include <memory>
 #include <mutex>
 
@@ -41,6 +42,13 @@ using executorch::runtime::FreeableBuffer;
 using executorch::runtime::Result;
 using executorch::runtime::Span;
 
+// Global mutex for all XNNPACK operations. This is temporary, tracked by
+// T272407942.
+static std::mutex& global_xnnpack_mutex() {
+  static std::mutex m;
+  return m;
+}
+
 class XnnpackBackend final
     : public ::executorch::ET_RUNTIME_NAMESPACE::BackendInterface {
  public:
@@ -66,6 +74,8 @@ class XnnpackBackend final
       BackendInitContext& context,
       FreeableBuffer* processed,
       ArrayRef<CompileSpec> compile_specs) const override {
+    const std::lock_guard<std::mutex> global_lock(global_xnnpack_mutex());
+
     auto executor = context.get_runtime_allocator()
                         ->allocateInstance<xnnpack::delegate::XNNExecutor>();
     if (executor == nullptr) {
@@ -129,6 +139,17 @@ class XnnpackBackend final
           Error, "XNNCompiler::compileModel failed: 0x%x", (unsigned int)err);
       return err;
     }
+
+    ET_LOG(
+        Info,
+        "XnnpackBackend::init delegate=%p workspace_id=%" PRIu64
+        " workspace_ptr=%p program_id=0x%" PRIxPTR " weight_cache=%s",
+        (void*)executor,
+        workspace->id(),
+        (void*)workspace_ptr,
+        program_id,
+        use_weight_cache ? "true" : "false");
+
     return executor;
   }
 
@@ -136,15 +157,27 @@ class XnnpackBackend final
       BackendExecutionContext& context,
       DelegateHandle* handle,
       Span<EValue*> args) const override {
+    const std::lock_guard<std::mutex> global_lock(global_xnnpack_mutex());
+
     auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
 
+    auto workspace = executor->get_workspace();
+    ET_LOG(
+        Info,
+        "XnnpackBackend::execute begin delegate=%p workspace_id=%" PRIu64
+        " num_args=%zu weight_cache=%s",
+        (void*)executor,
+        workspace->id(),
+        (size_t)args.size(),
+        executor->uses_weight_cache() ? "true" : "false");
+
     std::unique_lock<std::mutex> lock_weights_cache(
         weights_cache_mutex_, std::defer_lock);
     if (executor->uses_weight_cache()) {
       lock_weights_cache.lock();
     }
 
-    auto [raii_lock, _] = executor->get_workspace()->acquire();
+    auto [raii_lock, _] = workspace->acquire();
 
     // Prepare Inputs/Outputs and Propagate Input Shapes
     Error err = executor->prepare_args(args);
@@ -161,12 +194,29 @@ class XnnpackBackend final
     // Convert output data types if necessary (e.g., int32 -> int64 for Long)
     err = executor->convert_outputs(args);
 
+    ET_LOG(
+        Info,
+        "XnnpackBackend::execute end delegate=%p workspace_id=%" PRIu64
+        " err=0x%x",
+        (void*)executor,
+        workspace->id(),
+        (unsigned int)err);
+
     return err;
   }
 
   void destroy(DelegateHandle* handle) const override {
     if (handle != nullptr) {
+      const std::lock_guard<std::mutex> global_lock(global_xnnpack_mutex());
+
       auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
+      auto workspace = executor->get_workspace();
+
+      ET_LOG(
+          Info,
+          "XnnpackBackend::destroy delegate=%p workspace_id=%" PRIu64,
+          (void*)executor,
+          workspace->id());
 
 #ifdef ENABLE_XNNPACK_PROFILING
       executor->print_avg_op_timings();
@@ -183,7 +233,6 @@ class XnnpackBackend final
       // the same backend instance. Make sure to hold onto the workspace
       // shared_ptr, as the pointer in the executor is freed, which includes
       // the mutex referenced by raii_lock.
-      auto workspace = executor->get_workspace();
       auto [raii_lock, _] = workspace->acquire();
 
       // XNNExecutor is not trivially destructible. Since this was constructed

From 12f62f2eb869eddbe4c612efe3f957bfc965aff0 Mon Sep 17 00:00:00 2001
From: Gasoonjia <gasoonjia@icloud.com>
Date: Fri, 22 May 2026 20:48:11 -0700
Subject: [PATCH 005/103] [ET Device Support] Module: allocate device memory
 for planned buffers (#19746)

https://github.com/pytorch/executorch/pull/18476 clone version due to
bot crash
---
 extension/module/module.cpp                   |  78 ++++++-
 extension/module/module.h                     |   9 +
 extension/module/targets.bzl                  |   1 +
 .../module/test/module_device_memory_test.cpp | 218 ++++++++++++++++++
 extension/module/test/targets.bzl             |  22 +-
 .../executorch/build/build_variables.bzl      |   2 +
 test/models/targets.bzl                       |   1 +
 7 files changed, 328 insertions(+), 3 deletions(-)
 create mode 100644 extension/module/test/module_device_memory_test.cpp

diff --git a/extension/module/module.cpp b/extension/module/module.cpp
index 5422fb15b71..11fea031603 100644
--- a/extension/module/module.cpp
+++ b/extension/module/module.cpp
@@ -13,6 +13,7 @@
 #include <executorch/extension/flat_tensor/flat_tensor_data_map.h>
 #include <executorch/extension/memory_allocator/malloc_memory_allocator.h>
 #include <executorch/extension/named_data_map/merged_data_map.h>
+#include <executorch/runtime/core/device_memory_buffer.h>
 #include <executorch/runtime/platform/runtime.h>
 
 namespace executorch {
@@ -367,6 +368,51 @@ Module::make_planned_memory_with_shared_arenas(
   return planned;
 }
 
+std::unique_ptr<Module::PlannedMemory> Module::make_planned_memory_with_devices(
+    const ET_RUNTIME_NAMESPACE::MethodMeta& method_meta) {
+  auto planned = std::make_unique<PlannedMemory>();
+  const size_t num_buffers = method_meta.num_memory_planned_buffers();
+  planned->planned_buffers.reserve(num_buffers);
+  planned->planned_spans.reserve(num_buffers);
+  planned->device_buffers.reserve(num_buffers);
+  planned->planned_devices.reserve(num_buffers);
+
+  for (size_t i = 0; i < num_buffers; ++i) {
+    auto size = method_meta.memory_planned_buffer_size(i);
+    ET_CHECK_MSG(size.ok(), "Failed to get buffer size for index %zu", i);
+    auto device = method_meta.memory_planned_buffer_device(i);
+    ET_CHECK_MSG(device.ok(), "Failed to get buffer device for index %zu", i);
+    planned->planned_devices.push_back(device.get());
+
+    if (device->is_cpu()) {
+      planned->planned_buffers.emplace_back(size.get());
+      planned->planned_spans.emplace_back(
+          planned->planned_buffers.back().data(), size.get());
+    } else {
+      // Allocate device memory via DeviceAllocator and store the RAII buffer.
+      planned->planned_buffers.emplace_back(); // empty CPU placeholder
+      auto dmb = runtime::DeviceMemoryBuffer::create(
+          size.get(), device->type(), device->index());
+      ET_CHECK_MSG(
+          dmb.ok(),
+          "Failed to allocate device memory for buffer %zu (device_type=%d)",
+          i,
+          static_cast<int>(device->type()));
+      planned->planned_spans.emplace_back(dmb->as_span());
+      planned->device_buffers.push_back(std::move(dmb.get()));
+    }
+  }
+
+  // HierarchicalAllocator owns the per-buffer Device metadata so the
+  // MemoryManager can later expose it via planned_buffer_devices().
+  planned->planned_memory = std::make_unique<runtime::HierarchicalAllocator>(
+      runtime::Span<runtime::Span<uint8_t>>(
+          planned->planned_spans.data(), planned->planned_spans.size()),
+      runtime::Span<const runtime::etensor::Device>(
+          planned->planned_devices.data(), planned->planned_devices.size()));
+  return planned;
+}
+
 runtime::Result<std::vector<size_t>> Module::get_mem_planned_buffer_sizes(
     const std::string& method_name) {
   auto meta_res = program_->method_meta(method_name.c_str());
@@ -422,10 +468,38 @@ runtime::Error Module::load_method(
     MethodHolder method_holder;
 
     if (!planned_memory) {
-      if (!share_memory_arenas_) {
+      // Check if any buffers need device memory allocation.
+      auto meta_res = program_->method_meta(method_name.c_str());
+      ET_CHECK_OK_OR_RETURN_ERROR(meta_res.error());
+      auto& meta = meta_res.get();
+
+      bool has_device_buffers = false;
+      for (size_t i = 0; i < meta.num_memory_planned_buffers(); ++i) {
+        auto dev = meta.memory_planned_buffer_device(i);
+        if (dev.ok() && !dev->is_cpu()) {
+          has_device_buffers = true;
+          break;
+        }
+      }
+
+      if (has_device_buffers) {
+        // Device memory with shared arenas is not yet supported.
+        ET_CHECK_OR_RETURN_ERROR(
+            !share_memory_arenas_,
+            NotSupported,
+            "Device memory buffers are not yet compatible with "
+            "share_memory_arenas. Please disable share_memory_arenas "
+            "when using models with device-planned memory.");
+
+        // Device-aware path: allocate CPU and device buffers. The device
+        // span is owned by the HierarchicalAllocator inside PlannedMemory.
+        method_holder.planned_memory = make_planned_memory_with_devices(meta);
+        planned_memory = method_holder.planned_memory->planned_memory.get();
+      } else if (!share_memory_arenas_) {
         auto sizes_res = get_mem_planned_buffer_sizes(method_name);
         ET_CHECK_OK_OR_RETURN_ERROR(sizes_res.error());
         method_holder.planned_memory = make_planned_memory(sizes_res.get());
+        planned_memory = method_holder.planned_memory->planned_memory.get();
       } else {
         auto sizes_res = get_mem_planned_buffer_sizes(method_name);
         ET_CHECK_OK_OR_RETURN_ERROR(sizes_res.error());
@@ -442,8 +516,8 @@ runtime::Error Module::load_method(
         }
         method_holder.planned_memory =
             make_planned_memory_with_shared_arenas(sizes, shared_arenas_);
+        planned_memory = method_holder.planned_memory->planned_memory.get();
       }
-      planned_memory = method_holder.planned_memory->planned_memory.get();
     }
 
     method_holder.memory_manager = std::make_unique<runtime::MemoryManager>(
diff --git a/extension/module/module.h b/extension/module/module.h
index 47ead23032e..91c7feaad9b 100644
--- a/extension/module/module.h
+++ b/extension/module/module.h
@@ -18,6 +18,8 @@
 #include <executorch/runtime/backend/options.h>
 #include <executorch/runtime/executor/program.h>
 
+#include <executorch/runtime/core/device_memory_buffer.h>
+
 #ifdef USE_ATEN_LIB
 #define ET_MODULE_NAMESPACE module::aten
 #else // !USE_ATEN_LIB
@@ -716,6 +718,11 @@ class Module {
   struct PlannedMemory {
     std::vector<std::vector<uint8_t>> planned_buffers;
     std::vector<runtime::Span<uint8_t>> planned_spans;
+    std::vector<runtime::DeviceMemoryBuffer> device_buffers;
+    /// Per-buffer Device (type + index) metadata used by
+    /// HierarchicalAllocator. Owns the storage backing the device span the
+    /// allocator references, so it must outlive `planned_memory`.
+    std::vector<runtime::etensor::Device> planned_devices;
     std::unique_ptr<runtime::HierarchicalAllocator> planned_memory;
   };
   std::unique_ptr<PlannedMemory> make_planned_memory(
@@ -723,6 +730,8 @@ class Module {
   std::unique_ptr<PlannedMemory> make_planned_memory_with_shared_arenas(
       const std::vector<size_t>& buffer_sizes,
       std::vector<std::vector<uint8_t>>& shared_arenas);
+  std::unique_ptr<PlannedMemory> make_planned_memory_with_devices(
+      const ET_RUNTIME_NAMESPACE::MethodMeta& method_meta);
   runtime::Result<std::vector<size_t>> get_mem_planned_buffer_sizes(
       const std::string& method_name);
   runtime::Result<std::vector<size_t>> get_max_mem_planned_buffer_sizes();
diff --git a/extension/module/targets.bzl b/extension/module/targets.bzl
index fa80203831a..e622b138ff6 100644
--- a/extension/module/targets.bzl
+++ b/extension/module/targets.bzl
@@ -30,6 +30,7 @@ def define_common_targets():
                 "//executorch/runtime/backend:backend_options",
                 "//executorch/runtime/backend:backend_options_map",
                 "//executorch/runtime/executor:program_no_prim_ops" + aten_suffix,
+                "//executorch/runtime/core:device_memory_buffer",
             ],
         )
 
diff --git a/extension/module/test/module_device_memory_test.cpp b/extension/module/test/module_device_memory_test.cpp
new file mode 100644
index 00000000000..5031273ac2b
--- /dev/null
+++ b/extension/module/test/module_device_memory_test.cpp
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/**
+ * Tests that Module's device-aware memory allocation path works correctly.
+ *
+ * Uses ModuleAddWithDevice.pte which has:
+ *   non_const_buffer_sizes: [0, 48]  (1 buffer, index 0 reserved)
+ *   non_const_buffer_device: [{buffer_idx=1, device_type=CUDA, device_index=0}]
+ *
+ * Since we don't have a real CUDA backend, we test that:
+ * 1. CPU-only models load through Module without invoking device allocator
+ * 2. Device-annotated models trigger DeviceMemoryBuffer::create via a mock
+ */
+
+#include <executorch/extension/module/module.h>
+
+#include <gtest/gtest.h>
+
+#include <executorch/runtime/core/device_allocator.h>
+#include <executorch/runtime/core/device_memory_buffer.h>
+#include <executorch/runtime/platform/runtime.h>
+
+using executorch::extension::Module;
+using executorch::runtime::DeviceAllocator;
+using executorch::runtime::DeviceMemoryBuffer;
+using executorch::runtime::Error;
+using executorch::runtime::register_device_allocator;
+using executorch::runtime::Result;
+using executorch::runtime::etensor::DeviceIndex;
+using executorch::runtime::etensor::DeviceType;
+
+namespace {
+
+class MockCudaAllocator : public DeviceAllocator {
+ public:
+  Result<void*> allocate(
+      size_t nbytes,
+      DeviceIndex index,
+      size_t alignment = kDefaultAlignment) override {
+    (void)alignment;
+    allocate_count_++;
+    last_allocate_size_ = nbytes;
+    last_allocate_index_ = index;
+    buffer_ = std::make_unique<uint8_t[]>(nbytes);
+    return static_cast<void*>(buffer_.get());
+  }
+
+  void deallocate(void* ptr, DeviceIndex index) override {
+    deallocate_count_++;
+    buffer_.reset();
+  }
+
+  Error copy_host_to_device(void*, const void*, size_t, DeviceIndex) override {
+    return Error::Ok;
+  }
+
+  Error copy_device_to_host(void*, const void*, size_t, DeviceIndex) override {
+    return Error::Ok;
+  }
+
+  DeviceType device_type() const override {
+    return DeviceType::CUDA;
+  }
+
+  int allocate_count_ = 0;
+  int deallocate_count_ = 0;
+  size_t last_allocate_size_ = 0;
+  DeviceIndex last_allocate_index_ = -1;
+
+ private:
+  std::unique_ptr<uint8_t[]> buffer_;
+};
+
+} // namespace
+
+static MockCudaAllocator g_mock_cuda;
+
+class ModuleDeviceMemoryTest : public ::testing::Test {
+ protected:
+  static void SetUpTestSuite() {
+    executorch::runtime::runtime_init();
+    register_device_allocator(&g_mock_cuda);
+  }
+
+  void SetUp() override {
+    g_mock_cuda.allocate_count_ = 0;
+    g_mock_cuda.deallocate_count_ = 0;
+    g_mock_cuda.last_allocate_size_ = 0;
+    g_mock_cuda.last_allocate_index_ = -1;
+  }
+};
+
+TEST_F(ModuleDeviceMemoryTest, CpuOnlyModelDoesNotAllocateDeviceMemory) {
+  const char* path = std::getenv("ET_MODULE_ADD_PATH");
+  ASSERT_NE(path, nullptr) << "ET_MODULE_ADD_PATH not set";
+
+  Module module(path);
+  auto err = module.load_method("forward");
+  ASSERT_EQ(err, Error::Ok);
+
+  EXPECT_EQ(g_mock_cuda.allocate_count_, 0)
+      << "CPU-only model should not allocate device memory";
+}
+
+TEST_F(ModuleDeviceMemoryTest, DeviceMemoryBufferCreateCallsAllocator) {
+  // Directly test DeviceMemoryBuffer::create with the registered mock.
+  // This verifies the RAII allocation/deallocation path that Module uses.
+  {
+    auto result = DeviceMemoryBuffer::create(48, DeviceType::CUDA, 0);
+    ASSERT_TRUE(result.ok());
+    auto buf = std::move(result.get());
+
+    EXPECT_EQ(g_mock_cuda.allocate_count_, 1);
+    EXPECT_EQ(g_mock_cuda.last_allocate_size_, 48);
+    EXPECT_EQ(g_mock_cuda.last_allocate_index_, 0);
+    EXPECT_NE(buf.data(), nullptr);
+    EXPECT_EQ(buf.size(), 48);
+
+    // as_span() wraps the device pointer for HierarchicalAllocator.
+    auto span = buf.as_span();
+    EXPECT_EQ(span.data(), static_cast<uint8_t*>(buf.data()));
+    EXPECT_EQ(span.size(), 48);
+
+    EXPECT_EQ(g_mock_cuda.deallocate_count_, 0);
+  }
+  // RAII deallocation on scope exit.
+  EXPECT_EQ(g_mock_cuda.deallocate_count_, 1);
+}
+
+TEST_F(ModuleDeviceMemoryTest, DeviceModelMethodMetaReportsCudaBuffer) {
+  // Verify MethodMeta reports the correct device for buffers in the
+  // device-annotated model, without needing to load the full method.
+  const char* path = std::getenv("ET_MODULE_ADD_WITH_DEVICE_PATH");
+  ASSERT_NE(path, nullptr) << "ET_MODULE_ADD_WITH_DEVICE_PATH not set";
+
+  Module module(path);
+  auto err = module.load();
+  ASSERT_EQ(err, Error::Ok);
+
+  auto meta = module.method_meta("forward");
+  ASSERT_TRUE(meta.ok());
+
+  // ModuleAddWithDevice has 1 planned buffer (48 bytes) on CUDA.
+  ASSERT_EQ(meta->num_memory_planned_buffers(), 1);
+
+  auto size = meta->memory_planned_buffer_size(0);
+  ASSERT_TRUE(size.ok());
+  EXPECT_EQ(size.get(), 48);
+
+  auto device = meta->memory_planned_buffer_device(0);
+  ASSERT_TRUE(device.ok());
+  EXPECT_EQ(device->type(), DeviceType::CUDA);
+  EXPECT_EQ(device->index(), 0);
+}
+
+TEST_F(ModuleDeviceMemoryTest, DeviceModelWithSharedArenasReturnsNotSupported) {
+  const char* path = std::getenv("ET_MODULE_ADD_WITH_DEVICE_PATH");
+  ASSERT_NE(path, nullptr) << "ET_MODULE_ADD_WITH_DEVICE_PATH not set";
+
+  // share_memory_arenas = true with a device-annotated model should fail.
+  Module module(
+      path,
+      Module::LoadMode::File,
+      /*event_tracer=*/nullptr,
+      /*memory_allocator=*/nullptr,
+      /*temp_allocator=*/nullptr,
+      /*share_memory_arenas=*/true);
+
+  auto err = module.load_method("forward");
+  EXPECT_EQ(err, Error::NotSupported);
+}
+
+TEST_F(
+    ModuleDeviceMemoryTest,
+    LoadMethodAllocatesDeviceMemoryAndDeallocatesOnDestroy) {
+  const char* path = std::getenv("ET_MODULE_ADD_WITH_DEVICE_PATH");
+  ASSERT_NE(path, nullptr) << "ET_MODULE_ADD_WITH_DEVICE_PATH not set";
+
+  {
+    Module module(path);
+    auto err = module.load_method("forward");
+
+    // Regardless of whether load_method succeeds or fails (e.g. due to
+    // backend init issues), the device-aware memory allocation path
+    // (make_planned_memory_with_devices) runs BEFORE backend init.
+    EXPECT_EQ(g_mock_cuda.allocate_count_, 1)
+        << "Expected 1 device allocation for the CUDA buffer"
+        << " (actual: " << g_mock_cuda.allocate_count_ << ")"
+        << ", deallocate_count=" << g_mock_cuda.deallocate_count_
+        << ", load_method returned error=" << static_cast<int>(err);
+    EXPECT_EQ(g_mock_cuda.last_allocate_size_, 48)
+        << "Expected 48 bytes allocated (3 CUDA tensors sharing one buffer)";
+    EXPECT_EQ(g_mock_cuda.last_allocate_index_, 0)
+        << "Expected device_index=0 (cuda:0)";
+
+    if (err == Error::Ok) {
+      // Success path: MethodHolder moved into methods_ map.
+      // DeviceMemoryBuffer is alive as long as Module is alive.
+      EXPECT_EQ(g_mock_cuda.deallocate_count_, 0)
+          << "No deallocation while method is loaded";
+    } else {
+      // Error path: local MethodHolder destroyed on return from load_method.
+      // RAII deallocation already happened.
+      EXPECT_EQ(g_mock_cuda.deallocate_count_, 1)
+          << "RAII deallocation on error path";
+    }
+  }
+
+  // After Module destroyed, all device memory must be freed.
+  EXPECT_EQ(g_mock_cuda.deallocate_count_, 1)
+      << "Expected deallocation after Module destroyed";
+}
diff --git a/extension/module/test/targets.bzl b/extension/module/test/targets.bzl
index f0d7e449efd..4dc3fb537f3 100644
--- a/extension/module/test/targets.bzl
+++ b/extension/module/test/targets.bzl
@@ -28,7 +28,7 @@ def define_common_targets(is_fbcode=False):
             aten_suffix = ("_aten" if aten_mode else "")
 
             runtime.cxx_test(
-                name = "test" + aten_suffix,
+                name = "module_test" + aten_suffix,
                 srcs = [
                     "module_test.cpp",
                 ],
@@ -68,6 +68,26 @@ def define_common_targets(is_fbcode=False):
                 ],
             )
 
+            runtime.cxx_test(
+                name = "module_device_memory_test" + aten_suffix,
+                srcs = [
+                    "module_device_memory_test.cpp",
+                ],
+                deps = [
+                    "//executorch/kernels/portable:generated_lib" + aten_suffix,
+                    "//executorch/extension/module:module" + aten_suffix,
+                    "//executorch/runtime/core:device_allocator",
+                    "//executorch/runtime/core:device_memory_buffer",
+                ],
+                env = {
+                    "ET_MODULE_ADD_WITH_DEVICE_PATH": "$(location fbcode//executorch/test/models:exported_program_with_device_info[ModuleAddWithDevice.pte])",
+                    "ET_MODULE_ADD_PATH": "$(location fbcode//executorch/test/models:exported_programs[ModuleAdd.pte])",
+                },
+                compiler_flags = [
+                    "-Wno-error=deprecated-declarations",
+                ],
+            )
+
     runtime.filegroup(
         name = "resources",
         srcs = native.glob([
diff --git a/shim_et/xplat/executorch/build/build_variables.bzl b/shim_et/xplat/executorch/build/build_variables.bzl
index b0545b8ce18..659a128994f 100644
--- a/shim_et/xplat/executorch/build/build_variables.bzl
+++ b/shim_et/xplat/executorch/build/build_variables.bzl
@@ -50,6 +50,8 @@ PLATFORM_SRCS = [
 
 EXECUTORCH_CORE_SRCS = sorted([
     "runtime/backend/interface.cpp",
+    "runtime/core/device_allocator.cpp",
+    "runtime/core/device_memory_buffer.cpp",
     "runtime/core/evalue.cpp",
     "runtime/core/exec_aten/util/tensor_shape_to_c_string.cpp",
     "runtime/core/exec_aten/util/tensor_util_portable.cpp",
diff --git a/test/models/targets.bzl b/test/models/targets.bzl
index c9fb67b7d31..a80244b1383 100644
--- a/test/models/targets.bzl
+++ b/test/models/targets.bzl
@@ -226,6 +226,7 @@ def define_common_targets():
         default_outs = ["."],
         visibility = [
             "//executorch/runtime/executor/test/...",
+            "//executorch/extension/module/test/...",
         ],
     )
 

From c27cc5d5bb872603ec90378c486049bc2c77a382 Mon Sep 17 00:00:00 2001
From: Gasoonjia <gasoonjia@icloud.com>
Date: Fri, 22 May 2026 20:54:37 -0700
Subject: [PATCH 006/103] [ET Device Support] CudaAllocator: device memory
 allocator for CUDA backend (#19747)

clone https://github.com/pytorch/executorch/pull/18477 due to bot crash
---
 backends/aoti/slim/core/storage.h        |  44 ++--
 backends/aoti/slim/core/targets.bzl      |   1 +
 backends/cuda/runtime/TARGETS            |  29 +++
 backends/cuda/runtime/cuda_allocator.cpp | 258 +++++++++++++++++++++++
 backends/cuda/runtime/cuda_allocator.h   |  84 ++++++++
 backends/cuda/runtime/cuda_backend.cpp   |   9 +
 6 files changed, 395 insertions(+), 30 deletions(-)
 create mode 100644 backends/cuda/runtime/cuda_allocator.cpp
 create mode 100644 backends/cuda/runtime/cuda_allocator.h

diff --git a/backends/aoti/slim/core/storage.h b/backends/aoti/slim/core/storage.h
index 73c4d32d955..a3d17a89903 100644
--- a/backends/aoti/slim/core/storage.h
+++ b/backends/aoti/slim/core/storage.h
@@ -13,6 +13,7 @@
 #ifdef CUDA_AVAILABLE
 #include <executorch/backends/aoti/slim/c10/cuda/Exception.h>
 #include <executorch/backends/aoti/slim/cuda/guard.h>
+#include <executorch/backends/cuda/runtime/cuda_allocator.h>
 #endif
 
 #include <executorch/backends/aoti/slim/c10/core/Device.h>
@@ -107,9 +108,6 @@ struct DeviceTraits<c10::DeviceType::CUDA> {
   /// @param device The target CUDA device (used to get the stream).
   /// @return Pointer to allocated device memory.
   static void* allocate(size_t nbytes, const c10::Device& device) {
-    // Get the current stream for this device (set by CUDAStreamGuard if any)
-    // This follows PyTorch's pattern where the allocator assumes the caller
-    // has already set the correct device via CUDAStreamGuard.
     auto stream_result =
         executorch::backends::cuda::getCurrentCUDAStream(device.index());
     ET_CHECK_MSG(
@@ -118,31 +116,23 @@ struct DeviceTraits<c10::DeviceType::CUDA> {
         static_cast<int>(device.index()));
 
     cudaStream_t stream = stream_result.get();
-    void* data = nullptr;
-    ET_CUDA_CHECK(cudaMallocAsync(&data, nbytes, stream));
-    return data;
+    auto result = executorch::backends::cuda::CudaAllocator::allocate_async(
+        nbytes, device.index(), stream);
+    ET_CHECK_MSG(
+        result.ok(),
+        "CudaAllocator::allocate_async failed for %zu bytes on device %d",
+        nbytes,
+        static_cast<int>(device.index()));
+    return result.get();
   }
 
-  /// Frees CUDA device memory on the current stream.
-  /// @param ptr Pointer to device memory to free.
   static void free(void* ptr) {
-    // Get the current stream for the current device
-    // Currently all cuda slimtensors should be on the same device same stream,
-    // so we can just use the stream on current device.
-    // TODO(gasoonjia): add cuda stream as a member of MaybeOwningStorage to
-    // support multiple devices.
     auto stream_result = executorch::backends::cuda::getCurrentCUDAStream(-1);
     ET_CHECK_MSG(stream_result.ok(), "Failed to get current CUDA stream");
-    ET_CUDA_LOG_WARN(cudaFreeAsync(ptr, stream_result.get()));
+    executorch::backends::cuda::CudaAllocator::deallocate_async(
+        ptr, -1, stream_result.get());
   }
 
-  /// Copies memory between CPU and CUDA or CUDA and CUDA asynchronously.
-  /// @param dst Destination pointer.
-  /// @param src Source pointer.
-  /// @param nbytes Number of bytes to copy.
-  /// @param dst_device Destination device.
-  /// @param src_device Source device.
-  /// @param stream CUDA stream for async copy.
   static void memcpy_async(
       void* dst,
       const void* src,
@@ -151,7 +141,6 @@ struct DeviceTraits<c10::DeviceType::CUDA> {
       const c10::Device& src_device,
       cudaStream_t stream) {
     cudaMemcpyKind direction = cudaMemcpyDeviceToDevice;
-
     if (src_device.is_cpu()) {
       direction = cudaMemcpyHostToDevice;
     } else if (dst_device.is_cpu()) {
@@ -164,15 +153,11 @@ struct DeviceTraits<c10::DeviceType::CUDA> {
           static_cast<int>(dst_device.index()));
     }
 
-    ET_CUDA_CHECK(cudaMemcpyAsync(dst, src, nbytes, direction, stream));
+    auto err = executorch::backends::cuda::CudaAllocator::memcpy_async(
+        dst, src, nbytes, direction, stream);
+    ET_CHECK_MSG(err == executorch::runtime::Error::Ok, "memcpy_async failed");
   }
 
-  /// Copies memory between CPU and CUDA or CUDA and CUDA synchronously.
-  /// @param dst Destination pointer.
-  /// @param src Source pointer.
-  /// @param nbytes Number of bytes to copy.
-  /// @param dst_device Destination device.
-  /// @param src_device Source device.
   static void memcpy(
       void* dst,
       const void* src,
@@ -180,7 +165,6 @@ struct DeviceTraits<c10::DeviceType::CUDA> {
       const c10::Device& dst_device,
       const c10::Device& src_device) {
     cudaMemcpyKind direction = cudaMemcpyDeviceToDevice;
-
     if (src_device.is_cpu()) {
       direction = cudaMemcpyHostToDevice;
     } else if (dst_device.is_cpu()) {
diff --git a/backends/aoti/slim/core/targets.bzl b/backends/aoti/slim/core/targets.bzl
index b9148305c91..42a7b79da6e 100644
--- a/backends/aoti/slim/core/targets.bzl
+++ b/backends/aoti/slim/core/targets.bzl
@@ -19,6 +19,7 @@ def define_common_targets():
             "//executorch/runtime/platform:platform",
             "//executorch/backends/aoti/slim/c10/cuda:exception",
             "//executorch/backends/aoti/slim/cuda:guard",
+            "//executorch/backends/cuda/runtime:cuda_allocator",
         ],
     )
 
diff --git a/backends/cuda/runtime/TARGETS b/backends/cuda/runtime/TARGETS
index f13f41ab8b7..c8449a95718 100644
--- a/backends/cuda/runtime/TARGETS
+++ b/backends/cuda/runtime/TARGETS
@@ -74,6 +74,33 @@ runtime.cxx_library(
     ],
 )
 
+runtime.cxx_library(
+    name = "cuda_allocator",
+    srcs = [
+        "cuda_allocator.cpp",
+    ],
+    headers = [
+        "cuda_allocator.h",
+    ],
+    # @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)
+    link_whole = True,
+    supports_python_dlopen = True,
+    visibility = ["PUBLIC"],
+    exported_deps = [
+        "//executorch/runtime/core:device_allocator",
+    ],
+    deps = [
+        "//executorch/runtime/platform:platform",
+    ],
+    nvcc_flags = get_nvcc_arch_args() + [
+        "-_NVCC_HOST_COMPILER_FLAG_",
+        "gcc",
+    ],
+    external_deps = [
+        ("cuda", None, "cuda-lazy"),
+    ],
+)
+
 runtime.cxx_library(
     name = "cuda_backend",
     srcs = [
@@ -92,6 +119,8 @@ runtime.cxx_library(
     deps = [
         ":cuda_platform",
         ":runtime_shims",
+        ":cuda_allocator",
+        ":cuda_platform",
         "//executorch/backends/aoti:aoti_common_slim",
         "//executorch/backends/aoti/slim/core:slimtensor",
         "//executorch/backends/aoti/slim/factory:empty",
diff --git a/backends/cuda/runtime/cuda_allocator.cpp b/backends/cuda/runtime/cuda_allocator.cpp
new file mode 100644
index 00000000000..94294b08fa0
--- /dev/null
+++ b/backends/cuda/runtime/cuda_allocator.cpp
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/cuda/runtime/cuda_allocator.h>
+
+#include <cuda_runtime.h>
+
+#include <executorch/runtime/platform/log.h>
+
+namespace executorch::backends::cuda {
+
+using executorch::runtime::Error;
+using executorch::runtime::Result;
+using executorch::runtime::etensor::DeviceIndex;
+using executorch::runtime::etensor::DeviceType;
+
+Result<void*>
+CudaAllocator::allocate(size_t nbytes, DeviceIndex index, size_t alignment) {
+  // index == -1 means "use the current CUDA device"; any value < -1 is invalid.
+  ET_CHECK_OR_RETURN_ERROR(
+      index >= -1,
+      InvalidArgument,
+      "CudaAllocator::allocate: invalid device index %d (must be >= -1)",
+      static_cast<int>(index));
+
+  // Alignment must be a non-zero power of 2.
+  ET_CHECK_OR_RETURN_ERROR(
+      alignment != 0 && (alignment & (alignment - 1)) == 0,
+      InvalidArgument,
+      "CudaAllocator::allocate: alignment must be a power of 2, got %zu",
+      alignment);
+
+  // cudaMalloc is documented to return memory aligned to at least 256 bytes,
+  // which trivially satisfies kDefaultAlignment (alignof(void*)). For any
+  // requested alignment <= 256 bytes, the returned pointer is already aligned.
+  // Stricter alignment would require over-allocation plus bookkeeping that
+  // deallocate() does not currently support, so reject that case.
+  constexpr size_t kCudaMallocAlignment = 256;
+  ET_CHECK_OR_RETURN_ERROR(
+      alignment <= kCudaMallocAlignment,
+      NotSupported,
+      "CudaAllocator::allocate: requested alignment %zu exceeds cudaMalloc's "
+      "guaranteed alignment of %zu bytes; stricter alignment is not supported",
+      alignment,
+      kCudaMallocAlignment);
+
+  void* ptr = nullptr;
+  int prev_device = 0;
+  cudaError_t prev_device_err = cudaGetDevice(&prev_device);
+
+  // If index == -1, fall back to the current device returned by cudaGetDevice
+  // and skip the set/restore round-trip.
+  const bool switch_device = index >= 0 && prev_device_err == cudaSuccess &&
+      static_cast<int>(index) != prev_device;
+  if (switch_device) {
+    cudaSetDevice(index);
+  }
+
+  cudaError_t err = cudaMalloc(&ptr, nbytes);
+
+  if (switch_device) {
+    cudaSetDevice(prev_device);
+  }
+
+  if (err != cudaSuccess) {
+    ET_LOG(
+        Error,
+        "cudaMalloc failed: %s (requested %zu bytes on device %d)",
+        cudaGetErrorString(err),
+        nbytes,
+        static_cast<int>(index));
+    return Error::MemoryAllocationFailed;
+  }
+
+  // Sanity check: the pointer returned by cudaMalloc should already meet the
+  // requested alignment. If a future CUDA runtime weakens this guarantee, we
+  // want to fail loudly rather than silently return a misaligned pointer.
+  if ((reinterpret_cast<uintptr_t>(ptr) & (alignment - 1)) != 0) {
+    ET_LOG(
+        Error,
+        "cudaMalloc returned pointer %p not aligned to %zu bytes",
+        ptr,
+        alignment);
+    cudaFree(ptr);
+    return Error::MemoryAllocationFailed;
+  }
+
+  return ptr;
+}
+
+void CudaAllocator::deallocate(void* ptr, DeviceIndex index) {
+  if (ptr == nullptr) {
+    return;
+  }
+
+  int prev_device = 0;
+  cudaError_t prev_device_err = cudaSuccess;
+
+  if (index >= 0) {
+    prev_device_err = cudaGetDevice(&prev_device);
+    if (prev_device_err == cudaSuccess) {
+      cudaSetDevice(index);
+    }
+  }
+
+  cudaError_t err = cudaFree(ptr);
+
+  if (index >= 0 && prev_device_err == cudaSuccess) {
+    cudaSetDevice(prev_device);
+  }
+
+  if (err != cudaSuccess) {
+    ET_LOG(
+        Error,
+        "cudaFree failed: %s (ptr=%p, device %d)",
+        cudaGetErrorString(err),
+        ptr,
+        static_cast<int>(index));
+  }
+}
+
+// TODO(gasoonjia): Add support for async copy
+Error CudaAllocator::copy_host_to_device(
+    void* dst,
+    const void* src,
+    size_t nbytes,
+    DeviceIndex index) {
+  int prev_device = 0;
+  cudaError_t prev_device_err = cudaSuccess;
+
+  if (index >= 0) {
+    prev_device_err = cudaGetDevice(&prev_device);
+    if (prev_device_err == cudaSuccess) {
+      cudaSetDevice(index);
+    }
+  }
+
+  cudaError_t err = cudaMemcpy(dst, src, nbytes, cudaMemcpyHostToDevice);
+
+  if (index >= 0 && prev_device_err == cudaSuccess) {
+    cudaSetDevice(prev_device);
+  }
+
+  if (err != cudaSuccess) {
+    ET_LOG(
+        Error,
+        "cudaMemcpy H2D failed: %s (%zu bytes, device %d)",
+        cudaGetErrorString(err),
+        nbytes,
+        static_cast<int>(index));
+    return Error::Internal;
+  }
+  return Error::Ok;
+}
+
+// TODO(gasoonjia): Add support for async copy
+Error CudaAllocator::copy_device_to_host(
+    void* dst,
+    const void* src,
+    size_t nbytes,
+    DeviceIndex index) {
+  int prev_device = 0;
+  cudaError_t prev_device_err = cudaSuccess;
+
+  if (index >= 0) {
+    prev_device_err = cudaGetDevice(&prev_device);
+    if (prev_device_err == cudaSuccess) {
+      cudaSetDevice(index);
+    }
+  }
+
+  cudaError_t err = cudaMemcpy(dst, src, nbytes, cudaMemcpyDeviceToHost);
+
+  if (index >= 0 && prev_device_err == cudaSuccess) {
+    cudaSetDevice(prev_device);
+  }
+
+  if (err != cudaSuccess) {
+    ET_LOG(
+        Error,
+        "cudaMemcpy D2H failed: %s (%zu bytes, device %d)",
+        cudaGetErrorString(err),
+        nbytes,
+        static_cast<int>(index));
+    return Error::Internal;
+  }
+  return Error::Ok;
+}
+
+DeviceType CudaAllocator::device_type() const {
+  return DeviceType::CUDA;
+}
+
+CudaAllocator& CudaAllocator::instance() {
+  static CudaAllocator allocator;
+  return allocator;
+}
+
+Result<void*> CudaAllocator::allocate_async(
+    size_t nbytes,
+    DeviceIndex index,
+    cudaStream_t stream) {
+  void* ptr = nullptr;
+  cudaError_t err = cudaMallocAsync(&ptr, nbytes, stream);
+  if (err != cudaSuccess) {
+    ET_LOG(
+        Error,
+        "cudaMallocAsync failed: %s (requested %zu bytes on device %d)",
+        cudaGetErrorString(err),
+        nbytes,
+        static_cast<int>(index));
+    return Error::MemoryAllocationFailed;
+  }
+  return ptr;
+}
+
+void CudaAllocator::deallocate_async(
+    void* ptr,
+    DeviceIndex index,
+    cudaStream_t stream) {
+  if (ptr == nullptr) {
+    return;
+  }
+  cudaError_t err = cudaFreeAsync(ptr, stream);
+  if (err != cudaSuccess) {
+    ET_LOG(
+        Error,
+        "cudaFreeAsync failed: %s (ptr=%p, device %d)",
+        cudaGetErrorString(err),
+        ptr,
+        static_cast<int>(index));
+  }
+}
+
+Error CudaAllocator::memcpy_async(
+    void* dst,
+    const void* src,
+    size_t nbytes,
+    cudaMemcpyKind direction,
+    cudaStream_t stream) {
+  cudaError_t err = cudaMemcpyAsync(dst, src, nbytes, direction, stream);
+  if (err != cudaSuccess) {
+    ET_LOG(
+        Error,
+        "cudaMemcpyAsync failed: %s (%zu bytes)",
+        cudaGetErrorString(err),
+        nbytes);
+    return Error::Internal;
+  }
+  return Error::Ok;
+}
+
+} // namespace executorch::backends::cuda
diff --git a/backends/cuda/runtime/cuda_allocator.h b/backends/cuda/runtime/cuda_allocator.h
new file mode 100644
index 00000000000..fcd8224305a
--- /dev/null
+++ b/backends/cuda/runtime/cuda_allocator.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <cuda_runtime.h>
+
+#include <executorch/runtime/core/device_allocator.h>
+
+namespace executorch::backends::cuda {
+
+/**
+ * CUDA implementation of DeviceAllocator.
+ *
+ * Uses cudaMalloc/cudaFree for allocation and cudaMemcpy for host-device
+ * transfers. This allocator is automatically registered as a singleton
+ * with the DeviceAllocatorRegistry when the CUDA backend library is linked.
+ *
+ * All CUDA memory operations in the CUDA backend should go through this
+ * allocator for consistent memory management.
+ */
+class CudaAllocator final : public executorch::runtime::DeviceAllocator {
+ public:
+  executorch::runtime::Result<void*> allocate(
+      size_t nbytes,
+      executorch::runtime::etensor::DeviceIndex index,
+      size_t alignment = kDefaultAlignment) override;
+
+  void deallocate(void* ptr, executorch::runtime::etensor::DeviceIndex index)
+      override;
+
+  executorch::runtime::Error copy_host_to_device(
+      void* dst,
+      const void* src,
+      size_t nbytes,
+      executorch::runtime::etensor::DeviceIndex index) override;
+
+  executorch::runtime::Error copy_device_to_host(
+      void* dst,
+      const void* src,
+      size_t nbytes,
+      executorch::runtime::etensor::DeviceIndex index) override;
+
+  executorch::runtime::etensor::DeviceType device_type() const override;
+
+  /// Returns the global CudaAllocator singleton.
+  static CudaAllocator& instance();
+
+  // --- Async (stream-based) operations for SlimTensor/Storage layer ---
+
+  /**
+   * Allocate device memory asynchronously on the given CUDA stream.
+   */
+  static executorch::runtime::Result<void*> allocate_async(
+      size_t nbytes,
+      executorch::runtime::etensor::DeviceIndex index,
+      cudaStream_t stream);
+
+  /**
+   * Deallocate device memory asynchronously on the given CUDA stream.
+   */
+  static void deallocate_async(
+      void* ptr,
+      executorch::runtime::etensor::DeviceIndex index,
+      cudaStream_t stream);
+
+  /**
+   * Copy memory asynchronously on the given CUDA stream.
+   * Supports H2D, D2H, and D2D based on src/dst device types.
+   */
+  static executorch::runtime::Error memcpy_async(
+      void* dst,
+      const void* src,
+      size_t nbytes,
+      cudaMemcpyKind direction,
+      cudaStream_t stream);
+};
+
+} // namespace executorch::backends::cuda
diff --git a/backends/cuda/runtime/cuda_backend.cpp b/backends/cuda/runtime/cuda_backend.cpp
index 1497ba1e376..d2738f7a976 100644
--- a/backends/cuda/runtime/cuda_backend.cpp
+++ b/backends/cuda/runtime/cuda_backend.cpp
@@ -40,6 +40,7 @@
 // Include our shim layer headers
 #include <executorch/backends/aoti/aoti_delegate_handle.h>
 #include <executorch/backends/aoti/utils.h>
+#include <executorch/backends/cuda/runtime/cuda_allocator.h>
 #include <executorch/backends/cuda/runtime/cuda_delegate_handle.h>
 #include <executorch/backends/cuda/runtime/platform/platform.h>
 #include <executorch/backends/cuda/runtime/shims/memory.h>
@@ -1273,5 +1274,13 @@ auto cls = cuda::CudaBackend();
 executorch::runtime::Backend backend{"CudaBackend", &cls};
 static executorch::runtime::Error success_with_compiler =
     register_backend(backend);
+
+// Auto-register the CudaAllocator so that DeviceMemoryBuffer::create(CUDA)
+// works whenever the CUDA backend library is linked.
+static bool cuda_allocator_registered = [] {
+  executorch::runtime::register_device_allocator(
+      &cuda::CudaAllocator::instance());
+  return true;
+}();
 } // namespace
 } // namespace executorch::backends

From 7d8063f9e6221ad8724f122ad3ec4cbb1aae2fc6 Mon Sep 17 00:00:00 2001
From: Gasoonjia <gasoonjia@icloud.com>
Date: Fri, 22 May 2026 20:56:14 -0700
Subject: [PATCH 007/103] [ET Device Support] Define AOT device copy ops
 registry (#19748)

clone https://github.com/pytorch/executorch/pull/18728 due to bot crash
---
 exir/passes/BUCK                         |  8 +++
 exir/passes/_device_copy_ops_registry.py | 58 +++++++++++++++++++
 exir/tests/TARGETS                       | 11 ++++
 exir/tests/test_device_copy_ops.py       | 73 ++++++++++++++++++++++++
 4 files changed, 150 insertions(+)
 create mode 100644 exir/passes/_device_copy_ops_registry.py
 create mode 100644 exir/tests/test_device_copy_ops.py

diff --git a/exir/passes/BUCK b/exir/passes/BUCK
index 954f1cfdb4f..4647388b388 100644
--- a/exir/passes/BUCK
+++ b/exir/passes/BUCK
@@ -381,6 +381,14 @@ fbcode_target(_kind = runtime.python_library,
     ],
 )
 
+fbcode_target(_kind = runtime.python_library,
+    name = "device_copy_ops_registry",
+    srcs = ["_device_copy_ops_registry.py"],
+    deps = [
+        "//caffe2:torch",
+    ],
+)
+
 fbcode_target(_kind = runtime.python_library,
     name = "memory_format_ops_pass",
     srcs = [
diff --git a/exir/passes/_device_copy_ops_registry.py b/exir/passes/_device_copy_ops_registry.py
new file mode 100644
index 00000000000..a62b88d4234
--- /dev/null
+++ b/exir/passes/_device_copy_ops_registry.py
@@ -0,0 +1,58 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Registry for device copy ops used to insert explicit H2D (host-to-device)
+and D2H (device-to-host) data transfer operations at delegate boundaries.
+
+These ops are inserted by PropagateDevicePass when enable_non_cpu_memory_planning
+is True, making the graph functional by explicitly transferring data between
+CPU and device memory.
+
+Follows the same registration pattern as dim_order_ops_registry.py.
+"""
+
+import torch
+from torch.library import impl, Library
+
+lib = Library("et_copy", "DEF")
+
+# _h2d_copy: copies a CPU tensor to device memory.
+# At tracing time, this is a clone (both on CPU). At runtime, the out tensor
+# is memory-planned on device, and the kernel calls
+# DeviceAllocator::copy_host_to_device.
+lib.define("_h2d_copy(Tensor self) -> Tensor")
+lib.define("_h2d_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)")
+
+# _d2h_copy: copies a device tensor to CPU memory.
+# At tracing time, this is a clone (both on CPU). At runtime, the self tensor
+# has device memory, and the kernel calls DeviceAllocator::copy_device_to_host.
+lib.define("_d2h_copy(Tensor self) -> Tensor")
+lib.define("_d2h_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)")
+
+
+@impl(lib, "_h2d_copy", "CompositeImplicitAutograd")
+def _h2d_copy_impl(self: torch.Tensor) -> torch.Tensor:
+    # During tracing, both tensors are on CPU. Just clone to represent the transfer.
+    return self.clone()
+
+
+@impl(lib, "_h2d_copy.out", "CompositeImplicitAutograd")
+def _h2d_copy_out_impl(self: torch.Tensor, *, out: torch.Tensor) -> torch.Tensor:
+    out.copy_(self)
+    return out
+
+
+@impl(lib, "_d2h_copy", "CompositeImplicitAutograd")
+def _d2h_copy_impl(self: torch.Tensor) -> torch.Tensor:
+    # During tracing, both tensors are on CPU. Just clone to represent the transfer.
+    return self.clone()
+
+
+@impl(lib, "_d2h_copy.out", "CompositeImplicitAutograd")
+def _d2h_copy_out_impl(self: torch.Tensor, *, out: torch.Tensor) -> torch.Tensor:
+    out.copy_(self)
+    return out
diff --git a/exir/tests/TARGETS b/exir/tests/TARGETS
index 322f72c870a..21493a69644 100644
--- a/exir/tests/TARGETS
+++ b/exir/tests/TARGETS
@@ -504,3 +504,14 @@ python_unittest(
         "//executorch/exir/passes:propagate_device_pass",
     ],
 )
+
+python_unittest(
+    name = "device_copy_ops",
+    srcs = [
+        "test_device_copy_ops.py",
+    ],
+    deps = [
+        "//caffe2:torch",
+        "//executorch/exir/passes:device_copy_ops_registry",
+    ],
+)
diff --git a/exir/tests/test_device_copy_ops.py b/exir/tests/test_device_copy_ops.py
new file mode 100644
index 00000000000..805159d9d81
--- /dev/null
+++ b/exir/tests/test_device_copy_ops.py
@@ -0,0 +1,73 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+# Import the registry to register the ops
+import executorch.exir.passes._device_copy_ops_registry  # noqa: F401
+
+import torch
+
+
+class DeviceCopyOpsRegistryTest(unittest.TestCase):
+    """Tests that et_copy._h2d_copy and et_copy._d2h_copy ops are correctly
+    registered and produce expected outputs during tracing (CPU-only)."""
+
+    def test_h2d_copy_functional(self):
+        """_h2d_copy should return a clone of the input tensor."""
+        x = torch.randn(2, 3)
+        result = torch.ops.et_copy._h2d_copy(x)
+        self.assertEqual(result.shape, x.shape)
+        self.assertEqual(result.dtype, x.dtype)
+        self.assertTrue(torch.equal(result, x))
+        # Should be a new tensor, not the same object
+        self.assertFalse(result.data_ptr() == x.data_ptr())
+
+    def test_d2h_copy_functional(self):
+        """_d2h_copy should return a clone of the input tensor."""
+        x = torch.randn(4, 5)
+        result = torch.ops.et_copy._d2h_copy(x)
+        self.assertEqual(result.shape, x.shape)
+        self.assertEqual(result.dtype, x.dtype)
+        self.assertTrue(torch.equal(result, x))
+        self.assertFalse(result.data_ptr() == x.data_ptr())
+
+    def test_h2d_copy_out_variant(self):
+        """_h2d_copy.out should copy data into the provided out tensor."""
+        x = torch.randn(3, 3)
+        out = torch.empty(3, 3)
+        result = torch.ops.et_copy._h2d_copy.out(x, out=out)
+        self.assertTrue(result is out)
+        self.assertTrue(torch.equal(out, x))
+
+    def test_d2h_copy_out_variant(self):
+        """_d2h_copy.out should copy data into the provided out tensor."""
+        x = torch.randn(2, 4)
+        out = torch.empty(2, 4)
+        result = torch.ops.et_copy._d2h_copy.out(x, out=out)
+        self.assertTrue(result is out)
+        self.assertTrue(torch.equal(out, x))
+
+    def test_h2d_copy_preserves_dtype(self):
+        """_h2d_copy should work with various dtypes."""
+        for dtype in [torch.float32, torch.float16, torch.int32, torch.int64]:
+            x = torch.ones(2, 2, dtype=dtype)
+            result = torch.ops.et_copy._h2d_copy(x)
+            self.assertEqual(result.dtype, dtype)
+            self.assertTrue(torch.equal(result, x))
+
+    def test_h2d_copy_scalar_tensor(self):
+        """_h2d_copy should handle 0-dim tensors."""
+        x = torch.tensor(3.14)
+        result = torch.ops.et_copy._h2d_copy(x)
+        self.assertEqual(result.shape, torch.Size([]))
+        self.assertTrue(torch.equal(result, x))
+
+    def test_d2h_copy_empty_tensor(self):
+        """_d2h_copy should handle empty tensors."""
+        x = torch.empty(0, 3)
+        result = torch.ops.et_copy._d2h_copy(x)
+        self.assertEqual(result.shape, torch.Size([0, 3]))

From d757776f51bc41aedac47fe51dd020474726774c Mon Sep 17 00:00:00 2001
From: Hansong Zhang <107070759+kirklandsign@users.noreply.github.com>
Date: Sat, 23 May 2026 11:50:33 -0700
Subject: [PATCH 008/103] Add extension_llm_runner to CMake deps (#19749)

Differential Revision: D106162684

Pull Request resolved: https://github.com/pytorch/executorch/pull/19749
---
 examples/models/parakeet/main.cpp          |  9 +++++----
 extension/asr/runner/CMakeLists.txt        |  2 +-
 extension/asr/runner/transducer_runner.cpp | 16 ++++++++++++----
 extension/asr/runner/transducer_runner.h   | 13 +++++++++++--
 4 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/examples/models/parakeet/main.cpp b/examples/models/parakeet/main.cpp
index 249e8fd14d4..b8a052004e4 100644
--- a/examples/models/parakeet/main.cpp
+++ b/examples/models/parakeet/main.cpp
@@ -152,13 +152,14 @@ int main(int argc, char** argv) {
     ET_LOG(Error, "Preprocessing failed.");
     return 1;
   }
-  auto mel_features = preprocess_result.get();
+  auto preprocess_out = preprocess_result.get();
 
   // --- Transcribe ---
   ET_LOG(Info, "Running TDT greedy decode...");
-  auto result = runner.transcribe(mel_features, [](const std::string& piece) {
-    std::cout << piece << std::flush;
-  });
+  auto result = runner.transcribe(
+      preprocess_out.features,
+      [](const std::string& piece) { std::cout << piece << std::flush; },
+      preprocess_out.length);
 
   if (!result.ok()) {
     ET_LOG(Error, "Transcription failed.");
diff --git a/extension/asr/runner/CMakeLists.txt b/extension/asr/runner/CMakeLists.txt
index 66974aa2a24..b47cddaf48c 100644
--- a/extension/asr/runner/CMakeLists.txt
+++ b/extension/asr/runner/CMakeLists.txt
@@ -22,7 +22,7 @@ endif()
 include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
 
 set(runner_deps executorch_core extension_module extension_tensor
-                tokenizers::tokenizers
+                extension_llm_runner tokenizers::tokenizers
 )
 
 # Define runner library
diff --git a/extension/asr/runner/transducer_runner.cpp b/extension/asr/runner/transducer_runner.cpp
index 3461cb09cc1..7b9298845a9 100644
--- a/extension/asr/runner/transducer_runner.cpp
+++ b/extension/asr/runner/transducer_runner.cpp
@@ -200,7 +200,7 @@ Error TransducerRunner::load() {
   return Error::Ok;
 }
 
-Result<::executorch::extension::TensorPtr> TransducerRunner::preprocess(
+Result<PreprocessResult> TransducerRunner::preprocess(
     ::executorch::extension::TensorPtr raw_audio) {
   if (!is_loaded()) {
     ET_CHECK_OK_OR_RETURN_ERROR(load());
@@ -229,12 +229,18 @@ Result<::executorch::extension::TensorPtr> TransducerRunner::preprocess(
       "Preprocessor returned unexpected output.");
 
   auto mel = outputs[0].toTensor();
-  return std::make_shared<::executorch::aten::Tensor>(std::move(mel));
+  int64_t mel_len = mel.sizes()[1]; // default to tensor dim
+  if (outputs.size() >= 2 && outputs[1].isTensor()) {
+    mel_len = outputs[1].toTensor().const_data_ptr<int64_t>()[0];
+  }
+  return PreprocessResult{
+      std::make_shared<::executorch::aten::Tensor>(std::move(mel)), mel_len};
 }
 
 Result<std::vector<Token>> TransducerRunner::transcribe(
     ::executorch::extension::TensorPtr preprocessed_features,
-    std::function<void(const std::string&)> token_callback) {
+    std::function<void(const std::string&)> token_callback,
+    int64_t features_length) {
   if (!is_loaded()) {
     ET_CHECK_OK_OR_RETURN_ERROR(load());
   }
@@ -242,7 +248,9 @@ Result<std::vector<Token>> TransducerRunner::transcribe(
   stats_.inference_start_ms = ::executorch::extension::llm::time_in_ms();
 
   // --- Encode ---
-  int64_t mel_len_value = preprocessed_features->size(1);
+  // Use provided length, or fall back to tensor dimension
+  int64_t mel_len_value =
+      features_length > 0 ? features_length : preprocessed_features->size(1);
   std::vector<int64_t> mel_len_data = {mel_len_value};
   auto mel_len = ::executorch::extension::from_blob(
       mel_len_data.data(), {1}, ::executorch::aten::ScalarType::Long);
diff --git a/extension/asr/runner/transducer_runner.h b/extension/asr/runner/transducer_runner.h
index ee819590141..aed0ad84cd6 100644
--- a/extension/asr/runner/transducer_runner.h
+++ b/extension/asr/runner/transducer_runner.h
@@ -29,6 +29,14 @@ using ::executorch::extension::llm::Stats;
 using ::executorch::runtime::Error;
 using ::executorch::runtime::Result;
 
+/**
+ * Preprocessed audio features with actual (unpadded) length.
+ */
+struct PreprocessResult {
+  ::executorch::extension::TensorPtr features;
+  int64_t length; // Actual number of valid frames (excluding padding)
+};
+
 /**
  * A decoded token with frame-level timing information.
  */
@@ -97,7 +105,7 @@ class ET_EXPERIMENTAL TransducerRunner {
    * @returns Preprocessed features tensor (e.g., mel spectrogram),
    *   ready to pass to transcribe().
    */
-  Result<::executorch::extension::TensorPtr> preprocess(
+  Result<PreprocessResult> preprocess(
       ::executorch::extension::TensorPtr raw_audio);
 
   /**
@@ -112,7 +120,8 @@ class ET_EXPERIMENTAL TransducerRunner {
    */
   Result<std::vector<Token>> transcribe(
       ::executorch::extension::TensorPtr preprocessed_features,
-      std::function<void(const std::string&)> token_callback = {});
+      std::function<void(const std::string&)> token_callback = {},
+      int64_t features_length = -1);
 
   /**
    * Returns a reference to the loaded tokenizer, or nullptr if not loaded.

From b69cbcd6ffefe6e13fa25c4ea9285786b04692ca Mon Sep 17 00:00:00 2001
From: roman-janik-nxp <roman.janik@nxp.com>
Date: Sun, 24 May 2026 11:43:13 +0200
Subject: [PATCH 009/103] NXP backend: Enable Add Tensor with new Neutron flow
 (#19550)

### Summary
Add tests verifying correct support for add.tensor by the Neutron
backend using the new Neutron MLIR flow.

### Test plan
Unit tests provided.

cc @robert-kalmar
---
 .../ops_converters/add_tensor_converter.py    |  42 ++-
 .../test_add_tensor_converter.py              | 263 +++++++++++++++++-
 backends/nxp/tests/models.py                  |   4 +-
 backends/nxp/tests/ops_aliases.py             |   1 +
 4 files changed, 293 insertions(+), 17 deletions(-)

diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py
index fd28b077b8a..673af19310f 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py
@@ -3,6 +3,9 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import torch
+
+from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
     NodeConverter,
@@ -23,11 +26,33 @@ def _is_supported_on_target(
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
-        if NodeConverter.uses_shape_broadcasting(node):
-            # Shape broadcasting may require the addition of `Transpose` ops during conversion.
-            return False
+        if custom_delegation_options.use_new_flow_neutron_c:
+            if not NodeConverter.at_least_one_input_shape_matches_the_output_shape(
+                node
+            ):
+                return False
 
-        return True
+            # If one input is in channel first and ranks of input tensors are not equal, we need to add Transposes
+            # Transpose is currently not supported for new flow
+            if any(
+                input_node.meta[NXP_NODE_FORMAT].is_channels_first()
+                for input_node in node.all_input_nodes
+            ) and NodeConverter._node_inputs_ranks_not_equal(node):
+                return False
+
+            supported_types = [torch.int8, torch.uint8]
+            if not NodeConverter.uses_quantization_type_for_io(
+                node, supported_types, [0, 1], [0]
+            ):
+                return False
+
+            return True
+        else:
+            if NodeConverter.uses_shape_broadcasting(node):
+                # Shape broadcasting may require the addition of `Transpose` ops during conversion.
+                return False
+
+            return True
 
     @staticmethod
     def _is_supported_in_IR(
@@ -43,12 +68,13 @@ def _is_supported_in_IR(
 
         return True
 
-    # add.Tensor Node format: (Tensor self, Tensor other, *, Scalar alpha=1)
     def convert(self, node: Node):
-        """Convert 'add_tensor' operator to TFLite 'add'."""
+        """Convert 'add_tensor' operator to NeutronIR 'Add'.
+        The ExecuTorch schema is:
+            add.Tensor(Tensor self, Tensor other, Scalar alpha=1)
+        """
         self.assert_convertible(node)
-
         t_op = self._create_tflite_op_with_io_tensors(node)
-
         t_op.builtin_options = add_options.Add()
+
         self.builder.append_operators([t_op])
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py
index 1aa58ab5d95..4a656eb9517 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py
@@ -1,7 +1,8 @@
-# Copyright 2025 NXP
+# Copyright 2025-2026 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+
 import numpy as np
 import pytest
 import torch
@@ -9,17 +10,29 @@
 from executorch.backends.nxp.backend.edge_program_converter import (
     EdgeProgramToIRConverter,
 )
-from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
+from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator
+from executorch.backends.nxp.tests.executorch_pipeline import (
+    ModelInputSpec,
+    to_quantized_edge_program,
+)
 from executorch.backends.nxp.tests.executors import (
     convert_run_compare,
+    graph_contains_any_of_ops,
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
+from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
 from executorch.backends.nxp.tests.models import (
     AddTensorConvModule,
     AddTensorModule,
     AddTensorOneInputModule,
 )
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+from executorch.backends.nxp.tests.ops_aliases import (
+    AddTensor,
+    Convolution,
+    ExecutorchDelegateCall,
+)
 from torch.export import ExportedProgram
 from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
@@ -92,20 +105,26 @@ def test_add_tensor_one_input_quant_conversion(mocker, input_shape, use_qat):
 
 
 @pytest.mark.parametrize(
-    "input_shape",
+    "x_input_shape",
     [
         pytest.param((1, 4, 8, 8), id="4D."),
         pytest.param((1, 4, 5, 5), id="4D, product of dims is not a multiple of 8."),
     ],
 )
-def test_add_tensor_w_conv_quant_conversion(mocker, input_shape, use_qat):
+def test_add_tensor_w_conv_quant_conversion(mocker, x_input_shape, use_qat):
     model = AddTensorConvModule()
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
+    n, c, h, w = x_input_shape
+    y_input_shape = (n, 8, h, w)
+
     # Run conversion
     _ = to_quantized_edge_program(
-        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
+        model,
+        [x_input_shape, y_input_shape],
+        use_qat=use_qat,
+        use_neutron_for_format_conversion=False,
     )
 
     # Capture generated model
@@ -114,7 +133,13 @@ def test_add_tensor_w_conv_quant_conversion(mocker, input_shape, use_qat):
     # Capture converted program
     exported_program: ExportedProgram = converter_spy.call_args.args[1]
 
-    input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8)
+    input_data_1 = (np.random.random(x_input_shape).astype(np.float32) * 50).astype(
+        np.int8
+    )
+    input_data_2 = (np.random.random(y_input_shape).astype(np.float32) * 50).astype(
+        np.int8
+    )
+    input_data = {0: input_data_1, 1: input_data_2}
 
     convert_run_compare(
         exported_program,
@@ -149,7 +174,7 @@ def test_add_tensor_broadcasting_unsupported_quant_conversion(
     nodes = list(edge_program.graph.nodes)
 
     # Broadcast is not supported, node is not converted
-    assert nodes[6].target.__name__ == "aten.add.Tensor"  # Add Tensor is not delegated.
+    assert nodes[6].target == AddTensor  # Add Tensor is not delegated.
 
     # Capture converted program
     # exported_program: ExportedProgram = converter_spy.call_args.args[1]
@@ -159,3 +184,227 @@ def test_add_tensor_broadcasting_unsupported_quant_conversion(
     # input_data = {0: x_input_data, 1: y_input_data}
     #
     # convert_run_compare(exported_program, tfl_model=tflite_flatbuffers_model, input_data=input_data)
+
+
+class TestAddTensorNewNeutronFlow:
+    @pytest.mark.parametrize(
+        "x_input_shape",
+        [
+            pytest.param((1,), id="1D."),
+            pytest.param((6, 5), id="2D."),
+            pytest.param((1, 4, 7), id="3D."),
+            pytest.param((2, 4, 3, 15), id="4D."),
+            pytest.param(
+                (6, 82),
+                id="2D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+            pytest.param(
+                (1, 68, 7),
+                id="3D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+            pytest.param(
+                (1, 4, 9, 11, 4),
+                id="5D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+        ],
+    )
+    def test__basic_nsys_inference(self, x_input_shape, mocker):
+        x_input_spec = ModelInputSpec(x_input_shape)
+        model = AddTensorModule()
+        graph_verifier = DetailedGraphVerifier(
+            mocker, expected_delegated_ops={AddTensor: 1}, expected_non_delegated_ops={}
+        )
+        dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0)
+
+        lower_run_compare(
+            model,
+            [x_input_spec, x_input_spec],
+            graph_verifier,
+            dataset_creator,
+            use_new_flow_neutron_c=True,
+        )
+
+    @pytest.mark.parametrize(
+        "x_input_shape",
+        [
+            pytest.param((1,), id="1D."),
+            pytest.param((6, 5), id="2D."),
+            pytest.param((1, 4, 7), id="3D."),
+            pytest.param((2, 4, 3, 15), id="4D."),
+            pytest.param(
+                (1, 4, 9, 11, 4),
+                id="5D.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+        ],
+    )
+    def test__basic_nsys_inference_qat(self, x_input_shape, mocker):
+        x_input_spec = ModelInputSpec(x_input_shape)
+        model = AddTensorModule()
+        graph_verifier = DetailedGraphVerifier(
+            mocker, expected_delegated_ops={AddTensor: 1}, expected_non_delegated_ops={}
+        )
+        dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0)
+
+        lower_run_compare(
+            model,
+            [x_input_spec, x_input_spec],
+            graph_verifier,
+            dataset_creator,
+            use_new_flow_neutron_c=True,
+            use_qat=True,
+        )
+
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((4, 6)), ModelInputSpec((1, 6))], id="2 inputs 2D."
+            ),
+            pytest.param(
+                [ModelInputSpec((5, 3, 4)), ModelInputSpec((1, 3, 1))],
+                id="2 inputs 3D.",
+            ),
+            pytest.param(
+                [ModelInputSpec((4,)), ModelInputSpec((4, 4))], id="2 inputs 1D + 2D."
+            ),
+            pytest.param(
+                [ModelInputSpec((69, 73)), ModelInputSpec((1, 73))],
+                id="2 inputs 2D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+        ],
+    )
+    def test__broadcast(self, input_spec, mocker):
+        model = AddTensorModule()
+        graph_verifier = DetailedGraphVerifier(
+            mocker, expected_delegated_ops={AddTensor: 1}, expected_non_delegated_ops={}
+        )
+        dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0)
+
+        lower_run_compare(
+            model,
+            input_spec,
+            graph_verifier,
+            dataset_creator,
+            use_new_flow_neutron_c=True,
+        )
+
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((4, 1)), ModelInputSpec((1, 6))], id="2 inputs 2D."
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 3, 4)), ModelInputSpec((5, 3, 1))],
+                id="2 inputs 3D.",
+            ),
+            pytest.param(
+                [ModelInputSpec((6, 4)), ModelInputSpec((6, 6, 1))],
+                id="2 inputs 2D + 3D.",
+            ),
+        ],
+    )
+    def test__broadcast_unsupported(self, input_spec):
+        # Broadcast where at least one of the inputs is not equal to output is not supported
+        model = AddTensorModule()
+
+        delegated_ep = to_quantized_edge_program(
+            model, input_spec, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Make sure the `add.Tensor` was NOT delegated.
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
+        assert graph_contains_any_of_ops(delegated_ep.graph, [AddTensor])
+
+    @pytest.mark.parametrize(
+        "x_input_shape",
+        [
+            pytest.param(
+                (1, 4, 5, 5), id="4D, product of dims is not a multiple of 8."
+            ),
+        ],
+    )
+    def test__w_conv(self, x_input_shape, mocker):
+        model = AddTensorConvModule()
+
+        n, c, h, w = x_input_shape
+        y_input_spec = ModelInputSpec((n, 8, h, w))
+        x_input_spec = ModelInputSpec(x_input_shape)
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops={AddTensor: 1, Convolution: 1},
+            expected_non_delegated_ops={},
+        )
+        dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0)
+
+        lower_run_compare(
+            model,
+            [x_input_spec, y_input_spec],
+            graph_verifier,
+            dataset_creator,
+            use_new_flow_neutron_c=True,
+        )
+
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 8, 5, 1))],
+                id="2 inputs 4D + 4D.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 5, 67)), ModelInputSpec((1, 8, 5, 1))],
+                id="2 inputs 4D + 4D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+        ],
+    )
+    def test__w_conv_broadcast(self, input_spec, mocker):
+        model = AddTensorConvModule()
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops={AddTensor: 1, Convolution: 1},
+            expected_non_delegated_ops={},
+        )
+        dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0)
+
+        lower_run_compare(
+            model,
+            input_spec,
+            graph_verifier,
+            dataset_creator,
+            use_new_flow_neutron_c=True,
+        )
+
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 5))],
+                id="2 inputs 4D + 2D.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 4, 10)), ModelInputSpec((1, 4, 1))],
+                id="2 inputs 4D + 3D.",
+            ),
+        ],
+    )
+    def test__w_conv_unsupported(self, input_spec):
+        model = AddTensorConvModule()
+
+        delegated_ep = to_quantized_edge_program(
+            model, input_spec, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Make sure the `add.Tensor` was NOT delegated.
+        assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
+        assert graph_contains_any_of_ops(delegated_ep.graph, [AddTensor])
diff --git a/backends/nxp/tests/models.py b/backends/nxp/tests/models.py
index 045dcfaba40..1292c4cf17d 100644
--- a/backends/nxp/tests/models.py
+++ b/backends/nxp/tests/models.py
@@ -656,9 +656,9 @@ def __init__(self):
         super().__init__()
         self.conv = Conv2dModule(padding=1, stride=1)
 
-    def forward(self, x):
+    def forward(self, x, y):
         x = self.conv(x)
-        return x + x
+        return x + y
 
 
 class AddTensorOneInputModule(torch.nn.Module):
diff --git a/backends/nxp/tests/ops_aliases.py b/backends/nxp/tests/ops_aliases.py
index ec58072658d..9e6bedc5dba 100644
--- a/backends/nxp/tests/ops_aliases.py
+++ b/backends/nxp/tests/ops_aliases.py
@@ -13,6 +13,7 @@
 
 Abs = exir_ops.edge.aten.abs.default
 AdaptiveAvgPool2D = exir_ops.edge.aten._adaptive_avg_pool2d.default
+AddTensor = exir_ops.edge.aten.add.Tensor
 AvgPool2D = exir_ops.edge.aten.avg_pool2d.default
 Bmm = exir_ops.edge.aten.bmm.default
 ConstantPadND = exir_ops.edge.aten.constant_pad_nd.default

From ba6074c3868abb8f602a22565445b52f8b5bdfb1 Mon Sep 17 00:00:00 2001
From: Julian Chan <128482247+julianchan-meta@users.noreply.github.com>
Date: Sun, 24 May 2026 23:53:19 -0700
Subject: [PATCH 010/103] Back out "Globally serialize XNNPACK execution, add
 logging" (#19752)

Differential Revision: D106254596

Pull Request resolved: https://github.com/pytorch/executorch/pull/19752
---
 backends/xnnpack/runtime/XNNPACKBackend.cpp | 53 +--------------------
 1 file changed, 2 insertions(+), 51 deletions(-)

diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp
index 2fe1e4d162e..c20fa985f46 100644
--- a/backends/xnnpack/runtime/XNNPACKBackend.cpp
+++ b/backends/xnnpack/runtime/XNNPACKBackend.cpp
@@ -16,7 +16,6 @@
 #include <executorch/runtime/core/evalue.h>
 #include <executorch/runtime/executor/pte_data_map.h>
 
-#include <cinttypes>
 #include <memory>
 #include <mutex>
 
@@ -42,13 +41,6 @@ using executorch::runtime::FreeableBuffer;
 using executorch::runtime::Result;
 using executorch::runtime::Span;
 
-// Global mutex for all XNNPACK operations. This is temporary, tracked by
-// T272407942.
-static std::mutex& global_xnnpack_mutex() {
-  static std::mutex m;
-  return m;
-}
-
 class XnnpackBackend final
     : public ::executorch::ET_RUNTIME_NAMESPACE::BackendInterface {
  public:
@@ -74,8 +66,6 @@ class XnnpackBackend final
       BackendInitContext& context,
       FreeableBuffer* processed,
       ArrayRef<CompileSpec> compile_specs) const override {
-    const std::lock_guard<std::mutex> global_lock(global_xnnpack_mutex());
-
     auto executor = context.get_runtime_allocator()
                         ->allocateInstance<xnnpack::delegate::XNNExecutor>();
     if (executor == nullptr) {
@@ -139,17 +129,6 @@ class XnnpackBackend final
           Error, "XNNCompiler::compileModel failed: 0x%x", (unsigned int)err);
       return err;
     }
-
-    ET_LOG(
-        Info,
-        "XnnpackBackend::init delegate=%p workspace_id=%" PRIu64
-        " workspace_ptr=%p program_id=0x%" PRIxPTR " weight_cache=%s",
-        (void*)executor,
-        workspace->id(),
-        (void*)workspace_ptr,
-        program_id,
-        use_weight_cache ? "true" : "false");
-
     return executor;
   }
 
@@ -157,27 +136,15 @@ class XnnpackBackend final
       BackendExecutionContext& context,
       DelegateHandle* handle,
       Span<EValue*> args) const override {
-    const std::lock_guard<std::mutex> global_lock(global_xnnpack_mutex());
-
     auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
 
-    auto workspace = executor->get_workspace();
-    ET_LOG(
-        Info,
-        "XnnpackBackend::execute begin delegate=%p workspace_id=%" PRIu64
-        " num_args=%zu weight_cache=%s",
-        (void*)executor,
-        workspace->id(),
-        (size_t)args.size(),
-        executor->uses_weight_cache() ? "true" : "false");
-
     std::unique_lock<std::mutex> lock_weights_cache(
         weights_cache_mutex_, std::defer_lock);
     if (executor->uses_weight_cache()) {
       lock_weights_cache.lock();
     }
 
-    auto [raii_lock, _] = workspace->acquire();
+    auto [raii_lock, _] = executor->get_workspace()->acquire();
 
     // Prepare Inputs/Outputs and Propagate Input Shapes
     Error err = executor->prepare_args(args);
@@ -194,29 +161,12 @@ class XnnpackBackend final
     // Convert output data types if necessary (e.g., int32 -> int64 for Long)
     err = executor->convert_outputs(args);
 
-    ET_LOG(
-        Info,
-        "XnnpackBackend::execute end delegate=%p workspace_id=%" PRIu64
-        " err=0x%x",
-        (void*)executor,
-        workspace->id(),
-        (unsigned int)err);
-
     return err;
   }
 
   void destroy(DelegateHandle* handle) const override {
     if (handle != nullptr) {
-      const std::lock_guard<std::mutex> global_lock(global_xnnpack_mutex());
-
       auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
-      auto workspace = executor->get_workspace();
-
-      ET_LOG(
-          Info,
-          "XnnpackBackend::destroy delegate=%p workspace_id=%" PRIu64,
-          (void*)executor,
-          workspace->id());
 
 #ifdef ENABLE_XNNPACK_PROFILING
       executor->print_avg_op_timings();
@@ -233,6 +183,7 @@ class XnnpackBackend final
       // the same backend instance. Make sure to hold onto the workspace
       // shared_ptr, as the pointer in the executor is freed, which includes
       // the mutex referenced by raii_lock.
+      auto workspace = executor->get_workspace();
       auto [raii_lock, _] = workspace->acquire();
 
       // XNNExecutor is not trivially destructible. Since this was constructed

From ee4c90ad03f33398cbfa93cfed09caf04fca6099 Mon Sep 17 00:00:00 2001
From: Per Held <per.held@arm.com>
Date: Mon, 25 May 2026 08:59:44 +0200
Subject: [PATCH 011/103] Arm backend: Exclude build metadata from license
 checks

Treat BUCK and TARGETS files as build metadata in the Arm
pre-push license check so they do not need copyright headers.

Signed-off-by: Per Held <per.held@arm.com>
Change-Id: I4b3bbd1e03ba4b9c38fd06225156344985f0cc70
---
 backends/arm/scripts/pre-push | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backends/arm/scripts/pre-push b/backends/arm/scripts/pre-push
index 8e26463cd94..6aa32d07286 100755
--- a/backends/arm/scripts/pre-push
+++ b/backends/arm/scripts/pre-push
@@ -177,7 +177,7 @@ for COMMIT in ${COMMITS}; do
     for committed_file in "${license_files[@]}"; do
         # Skip files with certain extensions
         case "$committed_file" in
-            *.md|*.md.in|*.json|*.yml|*.yaml|*.cmake|*.patch|.gitignore|*.bzl)
+            *.md|*.md.in|*.json|*.yml|*.yaml|*.cmake|*.patch|.gitignore|*.bzl|BUCK|*/BUCK|TARGETS|*/TARGETS)
                 echo -e "${INFO} Skipping license check for ${committed_file} (excluded extension)"
                 continue
                 ;;

From b73df0b4696885c6e03f3789daeece8376078364 Mon Sep 17 00:00:00 2001
From: roman-janik-nxp <roman.janik@nxp.com>
Date: Mon, 25 May 2026 13:49:04 +0200
Subject: [PATCH 012/103] NXP backend: Enable Sub Tensor with new Neutron flow
 (#19588)

### Summary
Add tests verifying correct support for sub.tensor by the Neutron
backend using the new Neutron MLIR flow.

### Test plan
Unit tests provided.


cc @robert-kalmar @JakeStevens @digantdesai @rascani
---
 .../ops_converters/sub_tensor_converter.py    |  40 ++-
 .../test_avg_pool2d_converter.py              |   9 +-
 .../test_max_pool_2d_converter.py             |   7 +-
 .../test_mul_tensor_converter.py              |   5 -
 .../test_sub_tensor_converter.py              | 260 +++++++++++++++++-
 backends/nxp/tests/ops_aliases.py             |   1 +
 6 files changed, 289 insertions(+), 33 deletions(-)

diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/sub_tensor_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/sub_tensor_converter.py
index e97f4bf63c2..79dbcbcc012 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/sub_tensor_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/sub_tensor_converter.py
@@ -3,6 +3,9 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import torch
+
+from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
     NodeConverter,
@@ -23,11 +26,33 @@ def _is_supported_on_target(
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
-        if NodeConverter.uses_shape_broadcasting(node):
-            # Shape broadcasting may require the addition of `Transpose` ops during conversion.
-            return False
+        if custom_delegation_options.use_new_flow_neutron_c:
+            if not NodeConverter.at_least_one_input_shape_matches_the_output_shape(
+                node
+            ):
+                return False
 
-        return True
+            # If one input is in channel first and ranks of input tensors are not equal, we need to add Transposes
+            # Transpose is currently not supported for new flow
+            if any(
+                input_node.meta[NXP_NODE_FORMAT].is_channels_first()
+                for input_node in node.all_input_nodes
+            ) and NodeConverter._node_inputs_ranks_not_equal(node):
+                return False
+
+            supported_types = [torch.int8, torch.uint8]
+            if not NodeConverter.uses_quantization_type_for_io(
+                node, supported_types, [0, 1], [0]
+            ):
+                return False
+
+            return True
+        else:
+            if NodeConverter.uses_shape_broadcasting(node):
+                # Shape broadcasting may require the addition of `Transpose` ops during conversion.
+                return False
+
+            return True
 
     @staticmethod
     def _is_supported_in_IR(
@@ -45,9 +70,12 @@ def _is_supported_in_IR(
 
         return True
 
-    # sub.Tensor Node format: (Tensor self, Tensor other, *, Scalar alpha=1)
     def convert(self, node: Node):
-        """Convert 'sub_tensor' operator to NeutronIR 'Sub'."""
+        """Convert 'sub_tensor' operator to NeutronIR 'Sub'.
+        The ExecuTorch schema is:
+            sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1)
+        """
+
         self.assert_convertible(node)
 
         t_op = self._create_tflite_op_with_io_tensors(node)
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py
index 2c73ccd8092..193b7ecf9ab 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pytest
 import torch
+
 from executorch.backends.nxp.backend.edge_program_converter import (
     EdgeProgramToIRConverter,
 )
@@ -29,13 +30,8 @@
     ToNHWCPreprocess,
 )
 from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
-from executorch.backends.nxp.tests.model_output_comparator import (
-    NumericalStatsOutputComparator,
-)
 from executorch.backends.nxp.tests.models import AvgPool2dConvModule, AvgPool2dModule
-
 from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
-
 from executorch.backends.nxp.tests.ops_aliases import (
     AvgPool2D,
     ExecutorchDelegateCall,
@@ -45,6 +41,7 @@
     Unsqueeze,
     ViewCopy,
 )
+
 from torch.export import ExportedProgram
 from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
@@ -320,7 +317,6 @@ def test__basic_nsys_inference(self, mocker):
     def test__basic_nsys_inference_qat(self, mocker):
         input_shape = (2, 9, 6, 15)
         model = AvgPool2dModule(False, 0)
-        comparator = NumericalStatsOutputComparator()
         graph_verifier = DetailedGraphVerifier(
             mocker, expected_delegated_ops={AvgPool2D: 1}, expected_non_delegated_ops={}
         )
@@ -329,7 +325,6 @@ def test__basic_nsys_inference_qat(self, mocker):
             model,
             input_shape,
             graph_verifier,
-            output_comparator=comparator,
             use_new_flow_neutron_c=True,
             use_qat=True,
         )
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py
index 583dc2bfd04..9062d5efbfc 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import numpy as np
+import pytest
 import torch
 
 from executorch.backends.nxp.backend.edge_program_converter import (
@@ -17,9 +18,6 @@
     ToChannelLastPreprocess,
 )
 from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
-from executorch.backends.nxp.tests.model_output_comparator import (
-    NumericalStatsOutputComparator,
-)
 from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
 from executorch.backends.nxp.tests.ops_aliases import (
     ExecutorchDelegateCall,
@@ -32,7 +30,6 @@
     ViewCopy,
 )
 from executorch.backends.nxp.tests.use_qat import *  # noqa F403
-import pytest
 
 
 class MaxPool1DModule(torch.nn.Module):
@@ -286,7 +283,6 @@ def test__basic_nsys_inference(self, mocker):
     def test__basic_nsys_inference_qat(self, mocker):
         input_shape = (2, 11, 7, 16)  # The old flow limited the batch size to 1.
         model = MaxPool2dModule()
-        comparator = NumericalStatsOutputComparator()
         graph_verifier = DetailedGraphVerifier(
             mocker,
             expected_delegated_ops={MaxPool2DWithIndices: 1, GetItem: 1},
@@ -297,7 +293,6 @@ def test__basic_nsys_inference_qat(self, mocker):
             model,
             input_shape,
             graph_verifier,
-            output_comparator=comparator,
             use_new_flow_neutron_c=True,
             use_qat=True,
         )
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py
index 927af47bbf5..90113f484ad 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py
@@ -21,9 +21,6 @@
     ToChannelLastPreprocess,
 )
 from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
-from executorch.backends.nxp.tests.model_output_comparator import (
-    NumericalStatsOutputComparator,
-)
 from executorch.backends.nxp.tests.models import (
     MulTensorConvModule,
     MulTensorModule,
@@ -256,7 +253,6 @@ def test__basic_nsys_inference(self, x_input_shape, mocker):
     def test__basic_nsys_inference_qat(self, x_input_shape, mocker):
         x_input_spec = ModelInputSpec(x_input_shape)
         model = MulTensorModule()
-        comparator = NumericalStatsOutputComparator()
         graph_verifier = DetailedGraphVerifier(
             mocker, expected_delegated_ops={MulTensor: 1}, expected_non_delegated_ops={}
         )
@@ -265,7 +261,6 @@ def test__basic_nsys_inference_qat(self, x_input_shape, mocker):
             model,
             [x_input_spec, x_input_spec],
             graph_verifier,
-            output_comparator=comparator,
             use_new_flow_neutron_c=True,
             use_qat=True,
         )
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py
index 9ce3e93f39b..2734e89bc5d 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py
@@ -1,7 +1,8 @@
-# Copyright 2025 NXP
+# Copyright 2025-2026 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+
 import numpy as np
 import pytest
 import torch
@@ -9,18 +10,29 @@
 from executorch.backends.nxp.backend.edge_program_converter import (
     EdgeProgramToIRConverter,
 )
-from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
+from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator
+from executorch.backends.nxp.tests.executorch_pipeline import (
+    ModelInputSpec,
+    to_quantized_edge_program,
+)
 from executorch.backends.nxp.tests.executors import (
     convert_run_compare,
+    graph_contains_any_of_ops,
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
+from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
 from executorch.backends.nxp.tests.models import (
     SubTensorConvModule,
     SubTensorModule,
     SubTensorOneInputModule,
 )
-from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+from executorch.backends.nxp.tests.ops_aliases import (
+    Convolution,
+    ExecutorchDelegateCall,
+    SubTensor,
+)
 from torch.export import ExportedProgram
 from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
@@ -63,7 +75,7 @@ def test_sub_tensor_quant_conversion(mocker, input_shape, use_qat):
     input_data = {0: input_data_1, 1: input_data_2}
 
     nodes = list(exported_program.graph.nodes)
-    assert nodes[4].target == exir_ops.edge.aten.sub.Tensor
+    assert nodes[4].target == SubTensor
 
     convert_run_compare(
         exported_program, tfl_model=tflite_flatbuffers_model, input_data=input_data
@@ -96,7 +108,7 @@ def test_sub_tensor_one_input_quant_conversion(mocker, input_shape, use_qat):
     input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8)
 
     nodes = list(exported_program.graph.nodes)
-    assert nodes[2].target == exir_ops.edge.aten.sub.Tensor
+    assert nodes[2].target == SubTensor
 
     convert_run_compare(
         exported_program, tfl_model=tflite_flatbuffers_model, input_data=input_data
@@ -141,7 +153,7 @@ def test_sub_tensor_w_conv_quant_conversion(mocker, x_input_shape, use_qat):
     input_data = {0: input_data_1, 1: input_data_2}
 
     nodes = list(exported_program.graph.nodes)
-    assert nodes[15].target == exir_ops.edge.aten.sub.Tensor
+    assert nodes[15].target == SubTensor
 
     convert_run_compare(
         exported_program,
@@ -176,6 +188,236 @@ def test_sub_tensor_broadcasting_unsupported_quant_conversion(
     nodes = list(edge_program.graph.nodes)
 
     # Broadcast is not supported, node is not converted
-    assert (
-        nodes[6].target == exir_ops.edge.aten.sub.Tensor
-    )  # Sub Tensor is not delegated.
+    assert nodes[6].target == SubTensor  # Sub Tensor is not delegated.
+
+
+class TestSubTensorNewNeutronFlow:
+    @pytest.mark.parametrize(
+        "x_input_shape",
+        [
+            pytest.param((1,), id="1D."),
+            pytest.param((6, 5), id="2D."),
+            pytest.param((1, 4, 7), id="3D."),
+            pytest.param(
+                (6, 82),
+                id="2D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+            pytest.param(
+                (1, 68, 7),
+                id="3D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+            pytest.param(
+                (2, 4, 3, 15),
+                id="4D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+            pytest.param(
+                (1, 4, 9, 11, 4),
+                id="5D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+        ],
+    )
+    def test__basic_nsys_inference(self, x_input_shape, mocker):
+        x_input_spec = ModelInputSpec(x_input_shape)
+        model = SubTensorModule()
+        graph_verifier = DetailedGraphVerifier(
+            mocker, expected_delegated_ops={SubTensor: 1}, expected_non_delegated_ops={}
+        )
+        dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0)
+
+        lower_run_compare(
+            model,
+            [x_input_spec, x_input_spec],
+            graph_verifier,
+            dataset_creator,
+            use_new_flow_neutron_c=True,
+        )
+
+    @pytest.mark.parametrize(
+        "x_input_shape",
+        [
+            pytest.param((1,), id="1D."),
+            pytest.param((6, 5), id="2D."),
+            pytest.param((2, 4, 3, 15), id="4D."),
+            pytest.param(
+                (1, 4, 7),
+                id="3D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+            pytest.param(
+                (1, 4, 9, 11, 4),
+                id="5D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+        ],
+    )
+    def test__basic_nsys_inference_qat(self, x_input_shape, mocker):
+        x_input_spec = ModelInputSpec(x_input_shape)
+        model = SubTensorModule()
+        graph_verifier = DetailedGraphVerifier(
+            mocker, expected_delegated_ops={SubTensor: 1}, expected_non_delegated_ops={}
+        )
+        dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0)
+
+        lower_run_compare(
+            model,
+            [x_input_spec, x_input_spec],
+            graph_verifier,
+            dataset_creator,
+            use_new_flow_neutron_c=True,
+            use_qat=True,
+        )
+
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((4, 6)), ModelInputSpec((1, 6))], id="2 inputs 2D."
+            ),
+            pytest.param(
+                [ModelInputSpec((4,)), ModelInputSpec((4, 4))], id="2 inputs 1D + 2D."
+            ),
+            pytest.param(
+                [ModelInputSpec((5, 3, 4)), ModelInputSpec((1, 3, 1))],
+                id="2 inputs 3D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+            pytest.param(
+                [ModelInputSpec((69, 73)), ModelInputSpec((1, 73))],
+                id="2 inputs 2D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+        ],
+    )
+    def test__broadcast(self, input_spec, mocker):
+        model = SubTensorModule()
+        graph_verifier = DetailedGraphVerifier(
+            mocker, expected_delegated_ops={SubTensor: 1}, expected_non_delegated_ops={}
+        )
+        dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0)
+
+        lower_run_compare(
+            model,
+            input_spec,
+            graph_verifier,
+            dataset_creator,
+            use_new_flow_neutron_c=True,
+        )
+
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((4, 1)), ModelInputSpec((1, 6))], id="2 inputs 2D."
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 3, 4)), ModelInputSpec((5, 3, 1))],
+                id="2 inputs 3D.",
+            ),
+            pytest.param(
+                [ModelInputSpec((6, 4)), ModelInputSpec((6, 6, 1))],
+                id="2 inputs 2D+3D.",
+            ),
+        ],
+    )
+    def test__broadcast_unsupported(self, input_spec):
+        # Broadcast where at least one of the inputs is not equal to output is not supported
+        model = SubTensorModule()
+
+        delegated_ep = to_quantized_edge_program(
+            model, input_spec, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Make sure the `sub.Tensor` was NOT delegated.
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
+        assert graph_contains_any_of_ops(delegated_ep.graph, [SubTensor])
+
+    @pytest.mark.parametrize(
+        "x_input_shape",
+        [
+            pytest.param(
+                (1, 4, 5, 5), id="4D, product of dims is not a multiple of 8."
+            ),
+        ],
+    )
+    def test__w_conv(self, x_input_shape, mocker):
+        model = SubTensorConvModule()
+
+        n, c, h, w = x_input_shape
+        y_input_spec = ModelInputSpec((n, 8, h, w))
+        x_input_spec = ModelInputSpec(x_input_shape)
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops={SubTensor: 1, Convolution: 1},
+            expected_non_delegated_ops={},
+        )
+        dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0)
+
+        lower_run_compare(
+            model,
+            [x_input_spec, y_input_spec],
+            graph_verifier,
+            dataset_creator,
+            use_new_flow_neutron_c=True,
+        )
+
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((1, 4, 7, 1)), ModelInputSpec((1, 8, 1, 1))],
+                id="2 inputs 4D + 4D.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 8, 5, 1))],
+                id="2 inputs 4D + 4D incorrect.",
+                marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"),
+            ),
+        ],
+    )
+    def test__w_conv_broadcast(self, input_spec, mocker):
+        model = SubTensorConvModule()
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops={SubTensor: 1, Convolution: 1},
+            expected_non_delegated_ops={},
+        )
+        dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0)
+
+        lower_run_compare(
+            model,
+            input_spec,
+            graph_verifier,
+            dataset_creator,
+            use_new_flow_neutron_c=True,
+        )
+
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 5))],
+                id="2 inputs 4D + 2D.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 4, 10)), ModelInputSpec((1, 4, 1))],
+                id="2 inputs 4D + 3D.",
+            ),
+        ],
+    )
+    def test__w_conv_unsupported(self, input_spec):
+        model = SubTensorConvModule()
+
+        delegated_ep = to_quantized_edge_program(
+            model, input_spec, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Make sure the `sub.Tensor` was NOT delegated.
+        assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
+        assert graph_contains_any_of_ops(delegated_ep.graph, [SubTensor])
diff --git a/backends/nxp/tests/ops_aliases.py b/backends/nxp/tests/ops_aliases.py
index 9e6bedc5dba..7f855dd63af 100644
--- a/backends/nxp/tests/ops_aliases.py
+++ b/backends/nxp/tests/ops_aliases.py
@@ -37,6 +37,7 @@
 Squeeze = exir_ops.edge.aten.squeeze.default
 SqueezeDim = exir_ops.edge.aten.squeeze.dim
 SqueezeDims = exir_ops.edge.aten.squeeze.dims
+SubTensor = exir_ops.edge.aten.sub.Tensor
 Unsqueeze = exir_ops.edge.aten.unsqueeze.default
 UpsampleBilinear2D = exir_ops.edge.aten.upsample_bilinear2d.vec
 UpsampleNearest2D = exir_ops.edge.aten.upsample_nearest2d.vec

From 03e14ef8b3964deb589f3f172b4bbee7d206795a Mon Sep 17 00:00:00 2001
From: Youngsik Yang <vacu9708@gmail.com>
Date: Tue, 26 May 2026 01:55:50 +0900
Subject: [PATCH 013/103] Arm backend: Add bf16 support for aten.index_select
 and aten.unfold_copy (#19751)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to #17097, which added BF16 support to the TOSA GATHER op.
`aten.index_select` and `aten.unfold_copy` both lower via TOSA GATHER
but their support checks were not updated at the time.

In both decompositions(`DecomposeIndexSelectToGatherPass()` and
`DecomposeUnfoldToGatherPass()`),
the bf16 values tensor flows through dtype-agnostic reshape ops and
`tosa.GATHER`, which accepts `BF16`.
The support check was the only blocker.

| Op                  | bf16 before | bf16 after |
|---------------------|:-----------:|:----------:|
| `aten.gather`       | ✅          | ✅         |
| `aten.index.Tensor` | ✅          | ✅         |
| `aten.slice_copy`   | ✅          | ✅         |
| `aten.index_select` | ❌          | ✅         |
| `aten.unfold_copy`  | ❌          | ✅         |

Changes:
- `index_select_support.py`, `unfold_copy_support.py`: extend float
branch
to include `bfloat16`; add bf16 extension guard; update rejection
message.
- `test_index_select.py`, `test_unfold_copy.py`: add isolated
  `_tosa_FP_bf16` test functions using
  `TosaPipelineFP(..., tosa_extensions=["bf16"])`.

### Test plan

`test_index_select_tosa_FP_bf16` and `test_unfold_copy_tosa_FP_bf16`
exercise the bf16 path end-to-end through `TosaPipelineFP` with the bf16
extension enabled, following the same pattern of the existing
`test_slice_tensor_tosa_FP_bf16` from #17492
---
 .../operator_support/index_select_support.py  | 14 ++++++--
 .../operator_support/unfold_copy_support.py   | 14 ++++++--
 backends/arm/test/ops/test_index_select.py    | 32 +++++++++++++++++++
 backends/arm/test/ops/test_unfold_copy.py     | 24 ++++++++++++++
 4 files changed, 78 insertions(+), 6 deletions(-)

diff --git a/backends/arm/operator_support/index_select_support.py b/backends/arm/operator_support/index_select_support.py
index a3188e739c7..285b2cfe79f 100644
--- a/backends/arm/operator_support/index_select_support.py
+++ b/backends/arm/operator_support/index_select_support.py
@@ -77,8 +77,16 @@ def is_node_tosa_supported(
                     f"{node.target}: dtype {values_dtype} requires INT profile.",
                 )
                 return False
-        # fp16/fp32: either FP profile, or INT profile (via quantization)
-        elif values_dtype in (torch.float16, torch.float32):
+        # fp16/fp32/bf16: either FP profile, or INT profile (via quantization)
+        elif values_dtype in (torch.float16, torch.float32, torch.bfloat16):
+            if values_dtype == torch.bfloat16 and not tosa_spec.support_extension(
+                "bf16"
+            ):
+                self.reporter.report_reject(
+                    node,
+                    f"{node.target}: dtype {values_dtype} requires bf16 extension.",
+                )
+                return False
             if not (tosa_spec.support_float() or tosa_spec.support_integer()):
                 self.reporter.report_reject(
                     node,
@@ -90,7 +98,7 @@ def is_node_tosa_supported(
             self.reporter.report_reject(
                 node,
                 f"{node.target}: unsupported values dtype {values_dtype}; "
-                "expected bool/int8/int16/int32/float16/float32.",
+                "expected bool/int8/int16/int32/float16/bfloat16/float32.",
             )
             return False
 
diff --git a/backends/arm/operator_support/unfold_copy_support.py b/backends/arm/operator_support/unfold_copy_support.py
index bf6c1cad22e..ac9fc7d0ee3 100644
--- a/backends/arm/operator_support/unfold_copy_support.py
+++ b/backends/arm/operator_support/unfold_copy_support.py
@@ -84,8 +84,16 @@ def is_node_tosa_supported(
                     f"{node.target}: dtype {values_dtype} requires INT profile.",
                 )
                 return False
-        # fp16/fp32: either FP profile, or INT profile (via quantization)
-        elif values_dtype in (torch.float16, torch.float32):
+        # fp16/fp32/bf16: either FP profile, or INT profile (via quantization)
+        elif values_dtype in (torch.float16, torch.float32, torch.bfloat16):
+            if values_dtype == torch.bfloat16 and not tosa_spec.support_extension(
+                "bf16"
+            ):
+                self.reporter.report_reject(
+                    node,
+                    f"{node.target}: dtype {values_dtype} requires bf16 extension.",
+                )
+                return False
             if not (tosa_spec.support_float() or tosa_spec.support_integer()):
                 self.reporter.report_reject(
                     node,
@@ -97,7 +105,7 @@ def is_node_tosa_supported(
             self.reporter.report_reject(
                 node,
                 f"{node.target}: unsupported values dtype {values_dtype}; "
-                "expected bool/int8/int16/int32/float16/float32.",
+                "expected bool/int8/int16/int32/float16/bfloat16/float32.",
             )
             return False
 
diff --git a/backends/arm/test/ops/test_index_select.py b/backends/arm/test/ops/test_index_select.py
index bb5f0a92c51..4de19d30daf 100644
--- a/backends/arm/test/ops/test_index_select.py
+++ b/backends/arm/test/ops/test_index_select.py
@@ -61,6 +61,26 @@ def forward(self, input_: torch.Tensor, dim: int, index_: torch.Tensor):
         torch.tensor([3, 1], dtype=torch.int32),  # [W=2]
     ),
 }
+test_data_fp_bf16: dict[str, input_params] = {
+    # Rank-2: [K, C] -> index_select dim=0 => [W, C]
+    "test_bf16_rank2_dim0": (
+        torch.tensor(
+            [[0.5, 1.25, 2.5], [3.5, 4.25, 5.75], [6.5, 7.25, 8.75]],
+            dtype=torch.bfloat16,
+        ),  # [K=3, C=3]
+        0,
+        torch.tensor([2, 0], dtype=torch.int32),  # [W=2]
+    ),
+    # Rank-3: [N, K, C] -> index_select dim=-1 => [N, K, W]
+    "test_bf16_rank3_dim_neg1": (
+        torch.tensor(
+            [[[0.5, 1.5], [2.5, 3.5]], [[4.5, 5.5], [6.5, 7.5]]],
+            dtype=torch.bfloat16,
+        ),  # [N=2, K=2, C=2]
+        -1,
+        torch.tensor([1, 0], dtype=torch.int32),  # [W=2]
+    ),
+}
 
 # ---- INT profile: integer inputs + bool ----
 test_data_int: dict[str, input_params] = {
@@ -104,6 +124,18 @@ def test_index_select_tosa_FP(test_data: input_params):
     pipeline.run()
 
 
+@common.parametrize("test_data", test_data_fp_bf16)
+def test_index_select_tosa_FP_bf16(test_data: input_params):
+    pipeline = TosaPipelineFP[input_params](
+        IndexSelect(),
+        test_data,
+        aten_op=IndexSelect.aten_op,
+        exir_op=IndexSelect.exir_op,
+        tosa_extensions=["bf16"],
+    )
+    pipeline.run()
+
+
 @common.parametrize("test_data", test_data_int | test_data_fp)
 def test_index_select_tosa_INT(test_data: input_params):
     # INT profile runs quantized, so we test both int inputs and float inputs here.
diff --git a/backends/arm/test/ops/test_unfold_copy.py b/backends/arm/test/ops/test_unfold_copy.py
index 2b502a9be10..baa4b7f64bc 100644
--- a/backends/arm/test/ops/test_unfold_copy.py
+++ b/backends/arm/test/ops/test_unfold_copy.py
@@ -120,6 +120,18 @@ def forward(self, input_: torch.Tensor, dim_: int, size_: int, step_: int):
     ),
 }
 
+test_data_bf16: dict[str, input_params] = {
+    "test_bf16_2d_dim1": (
+        torch.tensor(
+            [[0.1, 0.2, 0.3, 0.4, 0.5], [1.1, 1.2, 1.3, 1.4, 1.5]],
+            dtype=torch.bfloat16,
+        ),  # [B=2, T=5]
+        1,
+        3,
+        2,  # U=(5-3)//2+1=2 -> [B=2, U=2, C=3]
+    ),
+}
+
 
 @common.parametrize("test_data", test_data_fp)
 def test_unfold_copy_tosa_FP(test_data: input_params):
@@ -132,6 +144,18 @@ def test_unfold_copy_tosa_FP(test_data: input_params):
     pipeline.run()
 
 
+@common.parametrize("test_data", test_data_bf16)
+def test_unfold_copy_tosa_FP_bf16(test_data: input_params):
+    pipeline = TosaPipelineFP[input_params](
+        UnfoldCopy(),
+        test_data,
+        aten_op=UnfoldCopy.aten_op,
+        exir_op=UnfoldCopy.exir_op,
+        tosa_extensions=["bf16"],
+    )
+    pipeline.run()
+
+
 @common.parametrize("test_data", test_data_int | test_data_fp)
 def test_unfold_copy_tosa_INT(test_data: input_params):
     pipeline = TosaPipelineINT[input_params](

From b581615fa86dd2357d866064427a0b93b2ad947f Mon Sep 17 00:00:00 2001
From: Erik Lundell <erik.lundell@arm.com>
Date: Tue, 26 May 2026 09:50:10 +0200
Subject: [PATCH 014/103] Cortex-M backend: Add AoT scratch-buffer planning.
 (#19636)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is done for conv, depthwise conv, transpose conv, and bmm.

Add scratch tensors to the operator signatures, which are then
assigned exir.memory.alloc. These allocs are automatically memory
planned by ExecuTorch.

Introduce `required_cmsis_buffer_size`which computes the buffer
size from node properties + the Cortex-M configuration.
The function uses functions registered by target in
backends/cortex_m/passes/scratch_buffer_sizes.py
This is used to set the size of the allocs in ConvertToCortexMPass

Finally, modify the kernels to use the new scratch tensor instead
of allocating temporary memory. Add a new macro
CORTEX_M_ENABLE_RUNTIME_CHECKS
to do a safety check that the aot computed buffer size is equal to the
buffer size computed at runtime. Use this when testing.


cc @psiddh @AdrianLundell @digantdesai @rascani @freddan80 @per @zingo
@oscarandersson8218 @mansnils @Sebastian-Larsson @robell

---------

Signed-off-by: Erik Lundell <erik.lundell@arm.com>
Co-authored-by: Måns Nilsson <mans.nilsson@arm.com>
---
 backends/arm/scripts/build_executorch.sh      |   8 +
 backends/cortex_m/CMakeLists.txt              |   9 +
 .../ops/op_quantized_batch_matmul.cpp         |  35 +--
 backends/cortex_m/ops/op_quantized_conv2d.cpp |  34 +--
 .../ops/op_quantized_depthwise_conv2d.cpp     |  31 +-
 .../ops/op_quantized_transpose_conv2d.cpp     |  44 +--
 backends/cortex_m/ops/operators.py            |  28 +-
 backends/cortex_m/ops/operators.yaml          |   9 +-
 backends/cortex_m/passes/__init__.py          |   1 +
 .../passes/convert_to_cortex_m_pass.py        |  64 ++++-
 .../cortex_m/passes/scratch_buffer_sizes.py   | 266 ++++++++++++++++++
 backends/cortex_m/test/build_test_runner.sh   |   4 +-
 12 files changed, 451 insertions(+), 82 deletions(-)
 create mode 100644 backends/cortex_m/passes/scratch_buffer_sizes.py

diff --git a/backends/arm/scripts/build_executorch.sh b/backends/arm/scripts/build_executorch.sh
index 54d2091d1f4..5ac2674f964 100755
--- a/backends/arm/scripts/build_executorch.sh
+++ b/backends/arm/scripts/build_executorch.sh
@@ -7,6 +7,7 @@
 # Optional parameter:
 # --build_type= "Release" | "Debug" | "RelWithDebInfo" | "UndefinedSanitizer" | "AddressSanitizer"
 # --etdump      build with devtools-etdump support
+# --cmake-args= Additional arguments passed to cmake configure
 
 set -eu
 
@@ -24,6 +25,7 @@ build_type="Release"
 build_devtools=OFF
 build_with_etdump=OFF
 is_linux_musl=0
+extra_cmake_args=()
 target_cpu=""
 
 help() {
@@ -33,6 +35,7 @@ help() {
     echo "  --build_type=<TYPE>       Build with Release, Debug, RelWithDebInfo, UndefinedSanitizer or AddressSanitizer, default is ${build_type}"
     echo "  --devtools                Build Devtools libs"
     echo "  --etdump                  Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log"
+    echo "  --cmake-args=<ARGS>       Additional arguments passed to cmake configure"
     echo "  --toolchain=<TOOLCHAIN>   Toolchain can be specified (arm-none-eabi-gcc, arm-zephyr-eabi-gcc, aarch64-linux-musl-gcc). Default: ${toolchain}"
     echo "  --target_cpu=<CPU>        Override the toolchain's default TARGET_CPU (e.g. cortex-m4). Switching target_cpu reuses the same cmake-out dir, so clear ${et_build_root}/cmake-out first to avoid stale per-CPU artifacts. Default: unset (toolchain default)."
     exit 0
@@ -45,6 +48,10 @@ for arg in "$@"; do
       --build_type=*) build_type="${arg#*=}";;
       --devtools) build_devtools=ON ;;
       --etdump) build_with_etdump=ON ;;
+      --cmake-args=*)
+        # shellcheck disable=SC2206
+        extra_cmake_args=(${arg#*=})
+        ;;
       --toolchain=*) toolchain="${arg#*=}";;
       --target_cpu=*) target_cpu="${arg#*=}";;
       *)
@@ -89,6 +96,7 @@ cmake_args=(
     -DEXECUTORCH_BUILD_DEVTOOLS=${build_devtools}
     -DEXECUTORCH_BUILD_ARM_ETDUMP=${build_with_etdump}
     -DEXECUTORCH_BAREMETAL_SKIP_INSTALL=OFF
+    "${extra_cmake_args[@]}"
 )
 
 if [[ -n "${target_cpu}" ]]; then
diff --git a/backends/cortex_m/CMakeLists.txt b/backends/cortex_m/CMakeLists.txt
index 876c65982e6..627406c1935 100644
--- a/backends/cortex_m/CMakeLists.txt
+++ b/backends/cortex_m/CMakeLists.txt
@@ -30,6 +30,10 @@ set(CMSIS_NN_LOCAL_PATH
     ""
     CACHE PATH "Path to existing local CMSIS-NN installation"
 )
+option(CORTEX_M_ENABLE_RUNTIME_CHECKS
+       "Enable additional Cortex-M runtime assertions and validation checks"
+       OFF
+)
 
 # Try to find existing / local CMSIS-NN installation. This is useful for
 # debugging and testing with local changes. This is not common, as the CMSIS-NN
@@ -107,6 +111,11 @@ target_link_libraries(
   PRIVATE executorch
   PRIVATE kernels_util_all_deps
 )
+target_compile_definitions(
+  cortex_m_kernels
+  PRIVATE
+    $<$<BOOL:${CORTEX_M_ENABLE_RUNTIME_CHECKS}>:CORTEX_M_ENABLE_RUNTIME_CHECKS>
+)
 
 # Include directories for cortex_m_kernels
 target_include_directories(
diff --git a/backends/cortex_m/ops/op_quantized_batch_matmul.cpp b/backends/cortex_m/ops/op_quantized_batch_matmul.cpp
index e6bc5a949ce..345753ca8fc 100644
--- a/backends/cortex_m/ops/op_quantized_batch_matmul.cpp
+++ b/backends/cortex_m/ops/op_quantized_batch_matmul.cpp
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
+ * Copyright 2026 Arm Limited and/or its affiliates.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
@@ -71,6 +72,7 @@ Tensor& quantized_batch_matmul_out(
     int64_t output_offset,
     int64_t output_multiplier,
     int64_t output_shift,
+    const Tensor& scratch,
     Tensor& out) {
   if (!validate_batch_matmul_arguments(context, lhs, rhs_transposed, out)) {
     return out;
@@ -100,25 +102,26 @@ Tensor& quantized_batch_matmul_out(
   quant_params.multiplier = static_cast<int32_t>(output_multiplier);
   quant_params.shift = static_cast<int32_t>(output_shift);
 
-  const int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&out_dims);
-
   cmsis_nn_context ctx;
   ctx.buf = nullptr;
-  ctx.size = 0;
-
-  if (buf_size > 0) {
-    auto buffer_or_error = context.allocate_temp(buf_size);
-    if (!buffer_or_error.ok()) {
-      ET_LOG(
-          Error,
-          "quantized_batch_matmul: failed to allocate scratch buffer (%d bytes)",
-          buf_size);
-      context.fail(buffer_or_error.error());
-      return out;
-    }
-    ctx.buf = buffer_or_error.get();
-    ctx.size = buf_size;
+  ctx.size = scratch.nbytes();
+  if (ctx.size > 0) {
+    ctx.buf = scratch.mutable_data_ptr<int8_t>();
+  }
+
+#ifdef CORTEX_M_ENABLE_RUNTIME_CHECKS
+  const int32_t runtime_buffer_bytes =
+      arm_fully_connected_s8_get_buffer_size(&out_dims);
+  if (ctx.size != static_cast<size_t>(runtime_buffer_bytes)) {
+    ET_LOG(
+        Error,
+        "quantized_batch_matmul: scratch buffer size incorrect - actual: (%d) needed: (%d)",
+        static_cast<int>(ctx.size),
+        runtime_buffer_bytes);
+    context.fail(Error::Internal);
+    return out;
   }
+#endif
 
   const arm_cmsis_nn_status status = arm_batch_matmul_s8(
       &ctx,
diff --git a/backends/cortex_m/ops/op_quantized_conv2d.cpp b/backends/cortex_m/ops/op_quantized_conv2d.cpp
index 7d4433690f6..8af374c03f8 100644
--- a/backends/cortex_m/ops/op_quantized_conv2d.cpp
+++ b/backends/cortex_m/ops/op_quantized_conv2d.cpp
@@ -112,6 +112,7 @@ Tensor& quantized_conv2d_out(
     const Tensor& requantize_shifts,
     const int64_t activation_min,
     const int64_t activation_max,
+    const Tensor& scratch,
     Tensor& out) {
   if (!validate_conv2d_arguments(
           context,
@@ -182,31 +183,30 @@ Tensor& quantized_conv2d_out(
 
   cmsis_nn_context cmsis_context;
   cmsis_context.buf = nullptr;
-  cmsis_context.size = 0;
+  cmsis_context.size = scratch.nbytes();
+  if (cmsis_context.size > 0) {
+    cmsis_context.buf = scratch.mutable_data_ptr<int8_t>();
+  }
 
-  const int32_t buffer_bytes = arm_convolve_wrapper_s8_get_buffer_size(
+#ifdef CORTEX_M_ENABLE_RUNTIME_CHECKS
+  const int32_t runtime_buffer_bytes = arm_convolve_wrapper_s8_get_buffer_size(
       &conv_params, &input_dims, &filter_dims, &output_dims);
-  if (buffer_bytes < 0) {
+  if (runtime_buffer_bytes < 0) {
     ET_LOG(
         Error, "quantized_conv2d_out: CMSIS-NN buffer size calculation failed");
     context.fail(Error::Internal);
     return out;
   }
-  if (buffer_bytes > 0) {
-    auto buffer_or_error =
-        context.allocate_temp(buffer_bytes, kCortexMMveAlignment);
-    if (!buffer_or_error.ok()) {
-      ET_LOG(
-          Error,
-          "quantized_conv2d_out: failed to allocate scratch buffer (%d bytes, error %d)",
-          static_cast<int>(buffer_bytes),
-          static_cast<int>(buffer_or_error.error()));
-      context.fail(buffer_or_error.error());
-      return out;
-    }
-    cmsis_context.buf = buffer_or_error.get();
-    cmsis_context.size = buffer_bytes;
+  if (scratch.nbytes() != static_cast<size_t>(runtime_buffer_bytes)) {
+    ET_LOG(
+        Error,
+        "quantized_conv2d_out: scratch buffer size incorrect - actual: (%d) needed: (%d)",
+        static_cast<int>(scratch.nbytes()),
+        static_cast<int>(runtime_buffer_bytes));
+    context.fail(Error::Internal);
+    return out;
   }
+#endif
 
   const arm_cmsis_nn_status status = arm_convolve_wrapper_s8(
       &cmsis_context,
diff --git a/backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp b/backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp
index 8dec61e0af1..21d4f257501 100644
--- a/backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp
+++ b/backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp
@@ -150,6 +150,7 @@ Tensor& quantized_depthwise_conv2d_out(
     const Tensor& requantize_shifts,
     const int64_t activation_min,
     const int64_t activation_max,
+    const Tensor& scratch,
     Tensor& out) {
   if (!validate_depthwise_conv2d_arguments(
           context,
@@ -220,32 +221,32 @@ Tensor& quantized_depthwise_conv2d_out(
 
   cmsis_nn_context cmsis_context;
   cmsis_context.buf = nullptr;
-  cmsis_context.size = 0;
+  cmsis_context.size = scratch.nbytes();
+  if (cmsis_context.size > 0) {
+    cmsis_context.buf = scratch.mutable_data_ptr<int8_t>();
+  }
 
-  const int32_t buffer_bytes = arm_depthwise_conv_wrapper_s8_get_buffer_size(
-      &dw_conv_params, &input_dims, &filter_dims, &output_dims);
-  if (buffer_bytes < 0) {
+#ifdef CORTEX_M_ENABLE_RUNTIME_CHECKS
+  const int32_t runtime_buffer_bytes =
+      arm_depthwise_conv_wrapper_s8_get_buffer_size(
+          &dw_conv_params, &input_dims, &filter_dims, &output_dims);
+  if (runtime_buffer_bytes < 0) {
     ET_LOG(
         Error,
         "quantized_depthwise_conv2d_out: CMSIS-NN buffer size calculation failed");
     context.fail(Error::Internal);
     return out;
   }
-
-  auto buffer_or_error = context.allocate_temp(
-      static_cast<size_t>(buffer_bytes), kCortexMMveAlignment);
-  if (!buffer_or_error.ok()) {
+  if (scratch.nbytes() != static_cast<size_t>(runtime_buffer_bytes)) {
     ET_LOG(
         Error,
-        "quantized_depthwise_conv2d_out: failed to allocate scratch buffer (%d bytes, error %d)",
-        static_cast<int>(buffer_bytes),
-        static_cast<int>(buffer_or_error.error()));
-    context.fail(buffer_or_error.error());
+        "quantized_depthwise_conv2d_out: scratch buffer size incorrect - actual: (%d) needed: (%d)",
+        static_cast<int>(scratch.nbytes()),
+        static_cast<int>(runtime_buffer_bytes));
+    context.fail(Error::Internal);
     return out;
   }
-  cmsis_context.buf = buffer_or_error.get();
-  cmsis_context.size = buffer_bytes;
-
+#endif
   const arm_cmsis_nn_status status = arm_depthwise_conv_wrapper_s8(
       &cmsis_context,
       &dw_conv_params,
diff --git a/backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp b/backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp
index e3f6135c7b9..d2b66b18802 100644
--- a/backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp
+++ b/backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
+ * Copyright 2026 Arm Limited and/or its affiliates.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
@@ -97,6 +98,8 @@ Tensor& quantized_transpose_conv2d_out(
     const Tensor& requantize_shifts,
     const int64_t activation_min,
     const int64_t activation_max,
+    const Tensor& scratch,
+    const Tensor& output_scratch,
     Tensor& out) {
   if (!validate_transpose_conv2d_arguments(
           context,
@@ -179,44 +182,43 @@ Tensor& quantized_transpose_conv2d_out(
 
   cmsis_nn_context cmsis_context;
   cmsis_context.buf = nullptr;
-  cmsis_context.size = 0;
+  cmsis_context.size = scratch.nbytes();
+  if (cmsis_context.size > 0) {
+    cmsis_context.buf = scratch.mutable_data_ptr<int8_t>();
+  }
 
   cmsis_nn_context output_context;
   output_context.buf = nullptr;
-  output_context.size = 0;
-
+  output_context.size = output_scratch.nbytes();
+  if (output_context.size > 0) {
+    output_context.buf = output_scratch.mutable_data_ptr<int8_t>();
+  }
+#ifdef CORTEX_M_ENABLE_RUNTIME_CHECKS
   const int32_t buffer_bytes = arm_transpose_conv_s8_get_buffer_size(
       &transpose_conv_params, &input_dims, &filter_dims, &output_dims);
-  auto buffer_or_error = context.allocate_temp(
-      static_cast<size_t>(buffer_bytes), kCortexMMveAlignment);
-  if (!buffer_or_error.ok()) {
+  if (scratch.nbytes() != static_cast<size_t>(buffer_bytes)) {
     ET_LOG(
         Error,
-        "quantized_transpose_conv2d_out: failed to allocate scratch buffer (%d bytes, error %d)",
-        buffer_bytes,
-        static_cast<int>(buffer_or_error.error()));
-    context.fail(buffer_or_error.error());
+        "quantized_transpose_conv2d_out: scratch buffer size incorrect - actual: (%d) needed: (%d)",
+        static_cast<int>(scratch.nbytes()),
+        buffer_bytes);
+    context.fail(Error::Internal);
     return out;
   }
-  cmsis_context.buf = buffer_or_error.get();
-  cmsis_context.size = buffer_bytes;
 
   const int32_t output_buffer_bytes =
       arm_transpose_conv_s8_get_reverse_conv_buffer_size(
           &transpose_conv_params, &input_dims, &filter_dims);
-  auto output_buffer_or_error = context.allocate_temp(
-      static_cast<size_t>(output_buffer_bytes), kCortexMMveAlignment);
-  if (!output_buffer_or_error.ok()) {
+  if (output_scratch.nbytes() != static_cast<size_t>(output_buffer_bytes)) {
     ET_LOG(
         Error,
-        "quantized_transpose_conv2d_out: failed to allocate output scratch buffer (%d bytes, error %d)",
-        output_buffer_bytes,
-        static_cast<int>(output_buffer_or_error.error()));
-    context.fail(output_buffer_or_error.error());
+        "quantized_transpose_conv2d_out: output scratch buffer size incorrect - actual: (%d) needed: (%d)",
+        static_cast<int>(output_scratch.nbytes()),
+        output_buffer_bytes);
+    context.fail(Error::Internal);
     return out;
   }
-  output_context.buf = output_buffer_or_error.get();
-  output_context.size = output_buffer_bytes;
+#endif
 
   const arm_cmsis_nn_status status = arm_transpose_conv_wrapper_s8(
       &cmsis_context,
diff --git a/backends/cortex_m/ops/operators.py b/backends/cortex_m/ops/operators.py
index 2c35ed8730b..d4393bc7ada 100644
--- a/backends/cortex_m/ops/operators.py
+++ b/backends/cortex_m/ops/operators.py
@@ -271,13 +271,15 @@ def quantized_mul_impl(
     "quantized_batch_matmul("
     "Tensor lhs, int lhs_zero_point, "
     "Tensor rhs_transposed, int rhs_zero_point, "
-    "int output_zero_point, int output_multiplier, int output_shift) -> Tensor"
+    "int output_zero_point, int output_multiplier, int output_shift, "
+    "Tensor scratch) -> Tensor"
 )
 lib.define(
     "quantized_batch_matmul.out("
     "Tensor lhs, int lhs_zero_point, "
     "Tensor rhs_transposed, int rhs_zero_point, "
     "int output_zero_point, int output_multiplier, int output_shift, "
+    "Tensor scratch, "
     "*, Tensor(a!) out) -> Tensor(a!)"
 )
 
@@ -291,6 +293,7 @@ def quantized_batch_matmul_meta(
     output_zero_point: int,
     output_multiplier: int,
     output_shift: int,
+    scratch: torch.Tensor,
 ) -> torch.Tensor:
     batch, lhs_rows, inner = lhs.shape
     batch_rhs, rhs_cols, inner_rhs = rhs_transposed.shape
@@ -307,6 +310,7 @@ def quantized_batch_matmul_impl(
     output_zero_point: int,
     output_multiplier: int,
     output_shift: int,
+    scratch: torch.Tensor,
 ) -> torch.Tensor:
     # Offsets are negated zero points (CMSIS-NN convention)
     lhs_fp = lhs.to(torch.float32) + float(lhs_zero_point)
@@ -638,7 +642,8 @@ def pad_impl(
     "Tensor requantize_multipliers, "
     "Tensor requantize_shifts, "
     "int activation_min, "
-    "int activation_max"
+    "int activation_max, "
+    "Tensor scratch"
     ") -> Tensor"
 )
 
@@ -657,6 +662,7 @@ def pad_impl(
     "Tensor requantize_shifts, "
     "int activation_min, "
     "int activation_max, "
+    "Tensor scratch, "
     "*, Tensor(a!) out"
     ") -> Tensor(a!)"
 )
@@ -733,6 +739,7 @@ def quantized_conv2d_meta(
     requantize_shifts: torch.Tensor,
     activation_min: int,
     activation_max: int,
+    scratch: torch.Tensor,
 ) -> torch.Tensor:
     stride_vals = list(stride)
     padding_vals = list(padding)
@@ -762,6 +769,7 @@ def quantized_conv2d_impl(
     requantize_shifts: torch.Tensor,
     activation_min: int,
     activation_max: int,
+    scratch: torch.Tensor,
 ) -> torch.Tensor:
     if input.dim() != 4 or weight.dim() != 4:
         raise RuntimeError("quantized_conv2d expects 4D input and weight tensors")
@@ -830,7 +838,8 @@ def quantized_conv2d_impl(
     "Tensor requantize_multipliers, "
     "Tensor requantize_shifts, "
     "int activation_min, "
-    "int activation_max"
+    "int activation_max, "
+    "Tensor scratch"
     ") -> Tensor"
 )
 
@@ -850,6 +859,7 @@ def quantized_conv2d_impl(
     "Tensor requantize_shifts, "
     "int activation_min, "
     "int activation_max, "
+    "Tensor scratch, "
     "*, Tensor(a!) out"
     ") -> Tensor(a!)"
 )
@@ -870,6 +880,7 @@ def quantized_depthwise_conv2d_meta(
     requantize_shifts: torch.Tensor,
     activation_min: int,
     activation_max: int,
+    scratch: torch.Tensor,
 ) -> torch.Tensor:
     stride_vals = list(stride)
     padding_vals = list(padding)
@@ -900,6 +911,7 @@ def quantized_depthwise_conv2d_impl(
     requantize_shifts: torch.Tensor,
     activation_min: int,
     activation_max: int,
+    scratch: torch.Tensor,
 ) -> torch.Tensor:
     if input.dim() != 4 or weight.dim() != 4:
         raise RuntimeError(
@@ -973,7 +985,9 @@ def quantized_depthwise_conv2d_impl(
     "Tensor requantize_multipliers, "
     "Tensor requantize_shifts, "
     "int activation_min, "
-    "int activation_max"
+    "int activation_max, "
+    "Tensor scratch, "
+    "Tensor output_scratch"
     ") -> Tensor"
 )
 
@@ -992,6 +1006,8 @@ def quantized_depthwise_conv2d_impl(
     "Tensor requantize_shifts, "
     "int activation_min, "
     "int activation_max, "
+    "Tensor scratch, "
+    "Tensor output_scratch, "
     "*, Tensor(a!) out) -> Tensor(a!)"
 )
 
@@ -1057,6 +1073,8 @@ def quantized_transpose_conv2d_meta(
     requantize_shifts: torch.Tensor,
     activation_min: int,
     activation_max: int,
+    scratch: torch.Tensor,
+    output_scratch: torch.Tensor,
 ) -> torch.Tensor:
     stride_vals = list(stride)
     padding_vals = list(padding)
@@ -1095,6 +1113,8 @@ def quantized_transpose_conv2d_impl(
     requantize_shifts: torch.Tensor,
     activation_min: int,
     activation_max: int,
+    scratch: torch.Tensor,
+    output_scratch: torch.Tensor,
 ) -> torch.Tensor:
     """
     Reference implementation of quantized transposed convolution.
diff --git a/backends/cortex_m/ops/operators.yaml b/backends/cortex_m/ops/operators.yaml
index e0ebbfab868..8db109dea43 100644
--- a/backends/cortex_m/ops/operators.yaml
+++ b/backends/cortex_m/ops/operators.yaml
@@ -65,19 +65,20 @@
     - arg_meta: null
       kernel_name: cortex_m::pad_out
 
-- func: cortex_m::quantized_conv2d.out(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int input_offset, int output_offset, Tensor requantize_multipliers, Tensor requantize_shifts, int activation_min, int activation_max, *, Tensor(a!) out) -> Tensor(a!)
+- func: cortex_m::quantized_conv2d.out(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int input_offset, int output_offset, Tensor requantize_multipliers, Tensor requantize_shifts, int activation_min, int activation_max, Tensor scratch, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
   kernels:
     - arg_meta: null
       kernel_name: cortex_m::quantized_conv2d_out
 
-- func: cortex_m::quantized_depthwise_conv2d.out(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int depth_multiplier, int input_offset, int output_offset, Tensor requantize_multipliers, Tensor requantize_shifts, int activation_min, int activation_max, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: cortex_m::quantized_depthwise_conv2d.out(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int depth_multiplier, int input_offset, int output_offset, Tensor requantize_multipliers, Tensor requantize_shifts, int activation_min, int activation_max, Tensor scratch, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
   kernels:
     - arg_meta: null
       kernel_name: cortex_m::quantized_depthwise_conv2d_out
 
-- func: cortex_m::quantized_transpose_conv2d.out(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] output_padding, int[] dilation, int input_offset, int output_offset, Tensor requantize_multipliers, Tensor requantize_shifts, int activation_min, int activation_max, *, Tensor(a!) out) -> Tensor(a!)
+- func: cortex_m::quantized_transpose_conv2d.out(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] output_padding, int[] dilation, int input_offset, int output_offset, Tensor requantize_multipliers, Tensor requantize_shifts, int activation_min, int activation_max, Tensor scratch, Tensor output_scratch, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
   kernels:
     - arg_meta: null
@@ -94,7 +95,7 @@
     - arg_meta: null
       kernel_name: cortex_m::quantized_max_pool2d_out
 
-- func: cortex_m::quantized_batch_matmul.out(Tensor lhs, int lhs_zero_point, Tensor rhs_transposed, int rhs_zero_point, int output_zero_point, int output_multiplier, int output_shift, *, Tensor(a!) out) -> Tensor(a!)
+- func: cortex_m::quantized_batch_matmul.out(Tensor lhs, int lhs_zero_point, Tensor rhs_transposed, int rhs_zero_point, int output_zero_point, int output_multiplier, int output_shift, Tensor scratch, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
   kernels:
     - arg_meta: null
diff --git a/backends/cortex_m/passes/__init__.py b/backends/cortex_m/passes/__init__.py
index 92179ec6654..c379461949f 100644
--- a/backends/cortex_m/passes/__init__.py
+++ b/backends/cortex_m/passes/__init__.py
@@ -33,6 +33,7 @@ def _ensure_cortex_m_dependencies() -> None:
 
 _ensure_cortex_m_dependencies()
 
+from .cortex_m_pass import CortexMPass  # noqa  # usort: skip
 from .activation_fusion_pass import ActivationFusionPass  # noqa
 from .clamp_hardswish_pass import ClampHardswishPass  # noqa
 from .convert_to_cortex_m_pass import ConvertToCortexMPass  # noqa
diff --git a/backends/cortex_m/passes/convert_to_cortex_m_pass.py b/backends/cortex_m/passes/convert_to_cortex_m_pass.py
index 418f6cd63ff..e61ddaf63bc 100644
--- a/backends/cortex_m/passes/convert_to_cortex_m_pass.py
+++ b/backends/cortex_m/passes/convert_to_cortex_m_pass.py
@@ -6,25 +6,32 @@
 # LICENSE file in the root directory of this source tree.
 
 import executorch.backends.cortex_m.ops.operators  # noqa
+import executorch.exir as exir
 
 import torch
 import torch.fx
 from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
+
+from executorch.backends.cortex_m.passes import CortexMPass
 from executorch.backends.cortex_m.passes.passes_utils import quantize_multiplier_aot
+from executorch.backends.cortex_m.passes.scratch_buffer_sizes import (
+    required_cmsis_nn_buffer_sizes,
+)
 
 from executorch.backends.transforms.utils import (
     create_constant_placeholder,
     get_param_tensor,
     is_param_node,
 )
-
-from executorch.backends.xnnpack._passes.xnnpack_pass import XNNPACKPass
 from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.passes import make_alloc_node
+from torch._subclasses.fake_tensor import FakeTensorMode
+
 from torch.export.graph_signature import InputKind
 from torch.fx.passes.infra.pass_manager import PassResult
 
 
-class ConvertToCortexMPass(XNNPACKPass):
+class ConvertToCortexMPass(CortexMPass):
     """
     Cortex-M backend pass for replacing supported quantized kernels with Cortex-M
     accelerated kernels.
@@ -33,6 +40,15 @@ class ConvertToCortexMPass(XNNPACKPass):
     by call_operator.
     """
 
+    def _create_uninitialized_alloc_node(self):
+        """Create an unitialized alloc node to be initialize at a later point."""
+        with FakeTensorMode() as mode:
+            return make_alloc_node(
+                self.exported_program.graph_module,
+                mode.from_tensor(torch.empty(0)),
+                None,
+            )
+
     def _compute_kernel_sum(self, weights, bias, input_offset, weight_offset):
         """
         Computes the precomputed kernel sum term (bias optional)
@@ -238,6 +254,9 @@ def _get_convolution_replacement(self, node):
                 torch.tensor(quantized_shifts, dtype=torch.int32),
             )
 
+        with node.graph.inserting_before(node):
+            scratch = self._create_uninitialized_alloc_node()
+
         if use_depthwise_conv:
             # Compute depth_multiplier for depthwise convolution
             # For depthwise: output_channels = input_channels * depth_multiplier
@@ -263,6 +282,7 @@ def _get_convolution_replacement(self, node):
                 quantized_shift_tensor,
                 output_qmin,
                 output_qmax,
+                scratch,
             )
             return exir_ops.edge.cortex_m.quantized_depthwise_conv2d.default, new_args
         else:
@@ -280,9 +300,36 @@ def _get_convolution_replacement(self, node):
                 quantized_shift_tensor,
                 output_qmin,
                 output_qmax,
+                scratch,
             )
             return exir_ops.edge.cortex_m.quantized_conv2d.default, new_args
 
+    def _initialize_alloc_node_size(self, node: torch.fx.Node) -> None:
+        """For nodes with a registered buffer size function for node.target, set the buffer sizes
+        of the last n args, which should be exir.memory.alloc nodes. For nodes without a
+        registered function, do nothing.
+        """
+
+        scratch_buffer_sizes = required_cmsis_nn_buffer_sizes(
+            node, self.target_config.backend
+        )
+        if scratch_buffer_sizes is None:
+            return
+
+        # Assume that scratch_buffer_sizes are given from left to right in the call signature of node.target.
+        for i, scratch_buffer_size in enumerate(reversed(scratch_buffer_sizes)):
+            scratch_arg = node.args[-(i + 1)]
+            if (
+                not isinstance(scratch_arg, torch.fx.Node)
+                or scratch_arg.target != exir.memory.alloc
+            ):
+                raise RuntimeError(
+                    f"Expected scratch alloc node as final argument(s) for {node.target}, got {scratch_arg}."
+                )
+
+            # buffer size is given in bytes, always use uint8 as dtype.
+            scratch_arg.args = (((scratch_buffer_size,), torch.uint8),)
+
     def _get_transpose_conv2d_replacement(self, node):
         """
         Transform aten.convolution with transposed=True to cortex_m.quantized_transpose_conv2d
@@ -363,6 +410,10 @@ def _get_transpose_conv2d_replacement(self, node):
                 torch.tensor(quantized_shifts, dtype=torch.int32),
             )
 
+        with node.graph.inserting_before(node):
+            scratch = self._create_uninitialized_alloc_node()
+            output_scratch = self._create_uninitialized_alloc_node()
+
         new_args = (
             x,
             weight_nhwc,
@@ -377,6 +428,8 @@ def _get_transpose_conv2d_replacement(self, node):
             quantized_shift_tensor,
             output_qmin,
             output_qmax,
+            scratch,
+            output_scratch,
         )
         return exir_ops.edge.cortex_m.quantized_transpose_conv2d.default, new_args
 
@@ -415,6 +468,9 @@ def _get_bmm_replacement(self, node):
                     args=(rhs_node, [0, 2, 1]),
                 )
 
+        with node.graph.inserting_before(node):
+            scratch = self._create_uninitialized_alloc_node()
+
         args = (
             lhs_node,
             -lhs_zp,
@@ -423,6 +479,7 @@ def _get_bmm_replacement(self, node):
             output_zp,
             output_mult,
             output_shift,
+            scratch,
         )
         return exir_ops.edge.cortex_m.quantized_batch_matmul.default, args
 
@@ -459,6 +516,7 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
                     args=args,
                     kwargs={},
                 )
+                self._initialize_alloc_node_size(cortex_m_op)
 
                 node.replace_all_uses_with(cortex_m_op)
                 graph_module.graph.erase_node(node)
diff --git a/backends/cortex_m/passes/scratch_buffer_sizes.py b/backends/cortex_m/passes/scratch_buffer_sizes.py
new file mode 100644
index 00000000000..36f3f8bbc17
--- /dev/null
+++ b/backends/cortex_m/passes/scratch_buffer_sizes.py
@@ -0,0 +1,266 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections.abc import Callable
+from typing import Any, cast
+
+import cmsis_nn  # type: ignore[import-not-found, import-untyped]
+import executorch.backends.cortex_m.ops.operators  # noqa
+
+import torch
+import torch.fx
+
+from executorch.exir.dialects._ops import ops as exir_ops
+
+BufferSizeFunction = Callable[[cmsis_nn.Backend, torch.fx.Node], list[int]]
+
+
+def _tensor_from_node(node: torch.fx.Node) -> torch.Tensor:
+    if "val" in node.meta:
+        return node.meta["val"]
+    elif node.op == "call_function":
+        args = (
+            _tensor_from_node(arg) if isinstance(arg, torch.fx.Node) else arg
+            for arg in node.args
+        )
+        return node.target(*args, **node.kwargs)  # type: ignore[operator]
+    else:
+        raise RuntimeError("Encountered non-call_function without 'val' meta.")
+
+
+def _shape_from_node(node: torch.fx.Node) -> torch.Size:
+    return _tensor_from_node(node).shape
+
+
+def _get_common_conv_buffer_size_inputs(
+    conv_node: torch.fx.Node,
+    *,
+    stride_arg_idx: int = 3,
+    padding_arg_idx: int = 4,
+    dilation_arg_idx: int = 5,
+) -> tuple[
+    list[int],
+    list[int],
+    list[int],
+    list[int],
+    list[int],
+    list[int],
+]:
+    x = cast(torch.fx.Node, conv_node.args[0])
+    weight = cast(torch.fx.Node, conv_node.args[1])
+    stride = cast(list[int], conv_node.args[stride_arg_idx])
+    padding = cast(list[int], conv_node.args[padding_arg_idx])
+    dilation = cast(list[int], conv_node.args[dilation_arg_idx])
+
+    # Input is NCHW (PyTorch); CMSIS-NN wants NHWC dims.
+    n, c_in, height, width = _shape_from_node(x)
+
+    weight_shape = _shape_from_node(weight)
+
+    # Output is NCHW; convert to NHWC dims.
+    out_n, out_c, out_h, out_w = _shape_from_node(conv_node)
+
+    input_nhwc = [n, height, width, c_in]
+    output_nhwc = [out_n, out_h, out_w, out_c]
+    stride_hw = [int(stride[0]), int(stride[1])]
+    padding_hw = [int(padding[0]), int(padding[1])]
+    dilation_hw = [int(dilation[0]), int(dilation[1])]
+
+    return (
+        input_nhwc,
+        list(weight_shape),
+        output_nhwc,
+        stride_hw,
+        padding_hw,
+        dilation_hw,
+    )
+
+
+def cmsis_nn_conv_buffer_size(
+    backend: cmsis_nn.Backend,
+    conv_node: torch.fx.Node,
+) -> list[int]:
+    (
+        input_nhwc,
+        weight_shape,
+        output_nhwc,
+        stride_hw,
+        padding_hw,
+        dilation_hw,
+    ) = _get_common_conv_buffer_size_inputs(conv_node=conv_node)
+    input_offset = cast(int, conv_node.args[6])
+    output_offset = cast(int, conv_node.args[7])
+    output_qmin = cast(int, conv_node.args[10])
+    output_qmax = cast(int, conv_node.args[11])
+
+    # Weight is in OHWI layout after conversion.
+    c_out, kernel_h, kernel_w, c_in = weight_shape
+    filter_nhwc = [c_out, kernel_h, kernel_w, c_in]
+
+    return [
+        int(
+            cmsis_nn.convolve_wrapper_buffer_size(
+                backend,
+                cmsis_nn.DataType.A8W8,
+                input_nhwc=input_nhwc,
+                filter_nhwc=filter_nhwc,
+                output_nhwc=output_nhwc,
+                padding_hw=padding_hw,
+                stride_hw=stride_hw,
+                dilation_hw=dilation_hw,
+                input_offset=input_offset,
+                output_offset=output_offset,
+                activation_min=output_qmin,
+                activation_max=output_qmax,
+            )
+        )
+    ]
+
+
+def cmsis_nn_depthwise_conv_buffer_size(
+    backend: cmsis_nn.Backend,
+    conv_node: torch.fx.Node,
+) -> list[int]:
+    (
+        input_nhwc,
+        weight_shape,
+        output_nhwc,
+        stride_hw,
+        padding_hw,
+        dilation_hw,
+    ) = _get_common_conv_buffer_size_inputs(conv_node=conv_node)
+    depth_multiplier = cast(int, conv_node.args[6])
+    input_offset = cast(int, conv_node.args[7])
+    output_offset = cast(int, conv_node.args[8])
+    output_qmin = cast(int, conv_node.args[11])
+    output_qmax = cast(int, conv_node.args[12])
+
+    # Weight is in IHWO layout after conversion.
+    _, kernel_h, kernel_w, c_out = weight_shape
+    filter_nhwc = [c_out, kernel_h, kernel_w, 1]
+
+    return [
+        int(
+            cmsis_nn.depthwise_conv_wrapper_buffer_size(
+                backend,
+                cmsis_nn.DataType.A8W8,
+                input_nhwc=input_nhwc,
+                filter_nhwc=filter_nhwc,
+                output_nhwc=output_nhwc,
+                padding_hw=padding_hw,
+                stride_hw=stride_hw,
+                dilation_hw=dilation_hw,
+                ch_mult=depth_multiplier,
+                input_offset=input_offset,
+                output_offset=output_offset,
+                activation_min=output_qmin,
+                activation_max=output_qmax,
+            )
+        )
+    ]
+
+
+def cmsis_nn_batch_matmul_buffer_size(
+    backend: cmsis_nn.Backend,
+    matmul_node: torch.fx.Node,
+) -> list[int]:
+    rhs_transposed = cast(torch.fx.Node, matmul_node.args[2])
+    rhs_shape = _shape_from_node(rhs_transposed)
+
+    _, rhs_cols, inner = rhs_shape
+
+    return [
+        int(
+            cmsis_nn.fully_connected_buffer_size(
+                backend,
+                cmsis_nn.DataType.A8W8,
+                filter_nhwc=[inner, -1, -1, rhs_cols],  # H and W values are unused.
+            )
+        )
+    ]
+
+
+def cmsis_nn_transpose_conv_buffer_size(
+    backend: cmsis_nn.Backend,
+    conv_node: torch.fx.Node,
+) -> list[int]:
+    (
+        input_nhwc,
+        weight_shape,
+        output_nhwc,
+        stride_hw,
+        padding_hw,
+        dilation_hw,
+    ) = _get_common_conv_buffer_size_inputs(
+        conv_node=conv_node,
+        stride_arg_idx=3,
+        padding_arg_idx=4,
+        dilation_arg_idx=6,
+    )
+    output_padding = cast(list[int], conv_node.args[5])
+    input_offset = cast(int, conv_node.args[7])
+    output_offset = cast(int, conv_node.args[8])
+    output_qmin = cast(int, conv_node.args[11])
+    output_qmax = cast(int, conv_node.args[12])
+    c_out, kernel_h, kernel_w, kernel_c_in = weight_shape
+    filter_nhwc = [c_out, kernel_h, kernel_w, kernel_c_in]
+    padding_offsets_hw = [int(output_padding[0]), int(output_padding[1])]
+
+    return [
+        int(
+            cmsis_nn.transpose_conv_buffer_size(
+                backend,
+                cmsis_nn.DataType.A8W8,
+                input_nhwc=input_nhwc,
+                filter_nhwc=filter_nhwc,
+                output_nhwc=output_nhwc,
+                padding_hw=padding_hw,
+                stride_hw=stride_hw,
+                dilation_hw=dilation_hw,
+                padding_offsets_hw=padding_offsets_hw,
+                input_offset=input_offset,
+                output_offset=output_offset,
+                activation_min=output_qmin,
+                activation_max=output_qmax,
+            )
+        ),
+        int(
+            cmsis_nn.transpose_conv_reverse_conv_buffer_size(
+                backend,
+                cmsis_nn.DataType.A8W8,
+                input_nhwc=input_nhwc,
+                filter_nhwc=filter_nhwc,
+                padding_hw=padding_hw,
+                stride_hw=stride_hw,
+                dilation_hw=dilation_hw,
+                padding_offsets_hw=padding_offsets_hw,
+                input_offset=input_offset,
+                output_offset=output_offset,
+                activation_min=output_qmin,
+                activation_max=output_qmax,
+            )
+        ),
+    ]
+
+
+_target_to_buffer_sizes_registry: dict[Any, BufferSizeFunction] = {
+    exir_ops.edge.cortex_m.quantized_conv2d.default: cmsis_nn_conv_buffer_size,
+    exir_ops.edge.cortex_m.quantized_depthwise_conv2d.default: cmsis_nn_depthwise_conv_buffer_size,
+    exir_ops.edge.cortex_m.quantized_batch_matmul.default: cmsis_nn_batch_matmul_buffer_size,
+    exir_ops.edge.cortex_m.quantized_transpose_conv2d.default: cmsis_nn_transpose_conv_buffer_size,
+}
+
+
+def required_cmsis_nn_buffer_sizes(
+    node: torch.fx.Node, backend: cmsis_nn.Backend
+) -> list[int] | None:
+    """Returns a sequence of scratch buffer sizes required by node, in bytes.
+    If no function is registered to compute this for the target of the node, return None.
+    """
+    if node.target not in _target_to_buffer_sizes_registry:
+        return None
+
+    buffer_size_function = _target_to_buffer_sizes_registry[node.target]
+    return buffer_size_function(backend, node)
diff --git a/backends/cortex_m/test/build_test_runner.sh b/backends/cortex_m/test/build_test_runner.sh
index bdca1a21e7c..a67c5a907a4 100755
--- a/backends/cortex_m/test/build_test_runner.sh
+++ b/backends/cortex_m/test/build_test_runner.sh
@@ -28,7 +28,7 @@ fi
 script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
 et_root_dir=$(realpath "${script_dir}/../../..")
 build_executorch="${et_root_dir}/backends/arm/scripts/build_executorch.sh"
-${build_executorch} --devtools --target_cpu="${target_cpu}"
+${build_executorch} --devtools --target_cpu="${target_cpu}" --cmake-args="-DCORTEX_M_ENABLE_RUNTIME_CHECKS=ON"
 
 # Build executor runner with selected aten ops and semi hosting
 build_dir="${et_root_dir}/arm_test"
@@ -48,4 +48,4 @@ aten::unsqueeze_copy.out,\
 aten::select_copy.int_out,\
 aten::amax.out"
 
-${build_executor_runner} --pte=semihosting --bundleio --target="${target}" --output="${build_root_test_dir}" --select_ops_list="${select_ops_list}" --extra_build_flags="-DET_ATOL=5.0 -DET_RTOL=1.0"
+${build_executor_runner} --pte=semihosting --bundleio --target="${target}" --output="${build_root_test_dir}" --select_ops_list="${select_ops_list}" --extra_build_flags="-DET_ATOL=5.0 -DET_RTOL=1.0 -DET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE=0"

From 5fc929fa88e3b76c7ef26a482c896b344054ef48 Mon Sep 17 00:00:00 2001
From: qti-chenweng <168707118+chenweng-quic@users.noreply.github.com>
Date: Tue, 26 May 2026 16:55:09 +0800
Subject: [PATCH 015/103] Qualcomm AI Engine Direct - Refactor llama runner for
 dynamic IO dtypes (#19146)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Summary
To enable GPU backend support in the Llama runner, refactoring is
required because the dtypes of kv_cache, attention_mask, and logits are
currently hardcoded, preventing floating‑point models from running.
This PR focuses on removing the hardcode dtype for them.

#### Key changes
- Remove template parameter <typename T> from KVManager,
LhdTokenGenerator,
  MultimodalPromptProcessor, and related runner classes
- Detect kv_cache and attention_mask dtypes dynamically from MethodMeta
at
  construction time instead of compile-time bitwidth detection
- Switch to std::byte* pointer arithmetic with getDtypeSize() for all
buffer
  offsets; add fill_mask() helper for multi-dtype attention mask filling
- Update spec_prop pass for custom llama op for sharding case greater
than 1


### Test plan
```
python backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder /local/mnt/workspace/chenweng/executorch/executorch/build-android  --device acfa9311 --executorch_root . --artifact_dir ./stories_110m_pte_size --llama_artifacts . --use_fp16
```
<img width="1977" height="468" alt="image"
src="https://github.com/user-attachments/assets/8bf3bffa-9b9f-4655-9cbc-b20127c2468a"
/>


cc @cccclai @cbilgin @abhinaykukkadapu
---
 backends/qualcomm/_passes/build_quant_io.py   |  48 +--
 backends/qualcomm/tests/test_qnn_delegate.py  |  18 +-
 backends/qualcomm/tests/utils.py              |   1 +
 .../stories260k_hybrid_llama_qnn.pte          | Bin 1355520 -> 1350272 bytes
 .../llama/decoder_runtime_evaluator.py        |   2 +-
 .../oss_scripts/llama/decoder_utils.py        |   6 +-
 examples/qualcomm/oss_scripts/llama/llama.py  |  70 +++-
 .../oss_scripts/llama/qnn_llama_runner.cpp    |  25 +-
 .../llama/qnn_multimodal_runner.cpp           |  38 +-
 .../oss_scripts/llama/runner/decoder_runner.h |  28 +-
 .../oss_scripts/llama/runner/kv_manager.cpp   | 366 +++++++++++-------
 .../oss_scripts/llama/runner/kv_manager.h     |  43 +-
 .../llama/runner/lhd_token_generator.cpp      |  29 +-
 .../llama/runner/lhd_token_generator.h        |  18 +-
 .../multimodal_lhd_token_generator.cpp        |  26 +-
 .../multimodal_lhd_token_generator.h          |  18 +-
 .../multimodal_prompt_processor.cpp           |  53 ++-
 .../multimodal_prompt_processor.h             |  51 ++-
 .../multimodal_runner/multimodal_runner.cpp   |  73 ++--
 .../multimodal_runner/multimodal_runner.h     |  12 +-
 .../multimodal_token_generator.cpp            |  50 +--
 .../multimodal_token_generator.h              |  43 +-
 .../llama/runner/prompt_processor.cpp         |  84 ++--
 .../llama/runner/prompt_processor.h           |  30 +-
 .../oss_scripts/llama/runner/runner.cpp       |  71 ++--
 .../oss_scripts/llama/runner/runner.h         |  13 +-
 .../llama/runner/token_generator.cpp          |  80 ++--
 .../llama/runner/token_generator.h            |  30 +-
 .../qualcomm/oss_scripts/llama/runner/utils.h |  41 ++
 .../llama/wrappers/attention_sink_wrappers.py |   2 +
 .../llama/wrappers/llm_wrappers.py            |  46 ++-
 exir/passes/spec_prop_pass.py                 |  15 +-
 extension/android/jni/jni_layer_llama.cpp     |  43 +-
 extension/llm/custom_ops/model_sharding.py    |  24 +-
 extension/llm/custom_ops/op_fallback.py       |  29 ++
 35 files changed, 820 insertions(+), 706 deletions(-)
 create mode 100644 extension/llm/custom_ops/op_fallback.py

diff --git a/backends/qualcomm/_passes/build_quant_io.py b/backends/qualcomm/_passes/build_quant_io.py
index d43842e84a5..057dcc0f864 100644
--- a/backends/qualcomm/_passes/build_quant_io.py
+++ b/backends/qualcomm/_passes/build_quant_io.py
@@ -5,11 +5,10 @@
 # LICENSE file in the root directory of this source tree.
 import torch
 from executorch.backends.qualcomm.utils.constants import QCOM_QUANTIZED_IO
-from executorch.exir.delegate import executorch_call_delegate
 
-from executorch.exir.pass_base import ExportPass, ProxyValue
+from executorch.exir.delegate import executorch_call_delegate
+from executorch.exir.pass_base import ExportPass, PassResult
 from executorch.exir.tensor import TensorSpec
-from torch.utils import _pytree as pytree
 
 
 class BuildQuantIo(ExportPass):
@@ -28,22 +27,27 @@ def _make_spec(self, x):
         else:
             return None
 
-    def placeholder(self, name: str, arg, meta):
-        if quantized_dtype := meta.data.get(QCOM_QUANTIZED_IO, None):
-            arg = arg.to(dtype=quantized_dtype)
-            meta["spec"] = self._make_spec(arg)
-        return super().placeholder(name, arg, meta)
-
-    def call_getitem(self, value, key: int, meta):
-        meta["spec"] = value.node.meta["spec"][key]
-        return super().call_getitem(value, key, meta)
-
-    def call_delegate(self, lowered_module, args, kwargs, meta):
-        args_data, _ = pytree.tree_map_only(
-            ProxyValue, lambda x: x.data, (args, kwargs)
-        )
-        meta["spec"] = pytree.tree_map(
-            self._make_spec,
-            executorch_call_delegate(lowered_module, *args_data),
-        )
-        return super().call_delegate(lowered_module, args, kwargs, meta)
+    def _build(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
+        # Forcedly update delegate node's meta['spec'] to get correct output
+        # tensor size in runtime
+        call_delegates = [
+            node
+            for node in graph_module.graph.nodes
+            if node.op == "call_function" and node.target == executorch_call_delegate
+        ]
+        for n in graph_module.graph.nodes:
+            if QCOM_QUANTIZED_IO in n.meta:
+                n.meta["val"] = n.meta["val"].to(dtype=n.meta[QCOM_QUANTIZED_IO])
+                n.meta["spec"] = self._make_spec(n.meta["val"])
+
+        for call_delegate in call_delegates:
+            spec = []
+            for user in list(call_delegate.users):
+                spec.append(self._make_spec(user.meta["val"]))
+            call_delegate.meta["spec"] = tuple(spec)
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        self._build(graph_module)
+        graph_module.graph.eliminate_dead_code()
+        graph_module.recompile()
+        return PassResult(graph_module, True)
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
index 6d5b44d7a35..ee6678fa499 100644
--- a/backends/qualcomm/tests/test_qnn_delegate.py
+++ b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -7730,8 +7730,11 @@ def test_llama_stories_110m(self):
             "--max_context_len",
             "128",
         ]
+        if self.use_fp16:
+            cmds.append("--use_fp16")
         self.add_default_cmds(cmds)
-
+        print(" ".join(cmds))
+        exit(0)
         golden_start_with = "Once upon a time,"
         p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
         with Listener((self.ip, self.port)) as listener:
@@ -7750,7 +7753,10 @@ def test_llama_stories_110m(self):
                 # x86 does not allow weight sharing, so we don't check pte size
                 if not self.enable_x86_64:
                     pte_size = msg["pte_size"]
-                    self.assertLessEqual(pte_size, 135_000_000)  # 135MB
+                    if self.use_fp16:
+                        self.assertLessEqual(pte_size, 275_000_000)  # 275MB
+                    else:
+                        self.assertLessEqual(pte_size, 135_000_000)  # 135MB
                 if not self.compile_only and not self.enable_x86_64:
                     self.assertGreaterEqual(msg["inference_speed"], 220)  # Lanai
 
@@ -10087,6 +10093,13 @@ def setup_environment():
         choices=["wikitext_ppl", "hellaswag_acc_norm", "sqnr"],
         type=str,
     )
+    parser.add_argument(
+        "-F",
+        "--use_fp16",
+        help="If specified, will run in fp16 precision and discard ptq setting",
+        action="store_true",
+        default=False,
+    )
 
     args, ns_args = parser.parse_known_args(namespace=unittest)
     TestQNN.host = args.host
@@ -10114,6 +10127,7 @@ def setup_environment():
     TestQNN.backend = args.backend
     TestQNN.static_llm_eval_method = args.static_llm_eval_method
     TestQNN.direct_build_folder = args.direct_build_folder
+    TestQNN.use_fp16 = args.use_fp16
 
     return sys.argv[:1] + ns_args
 
diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
index d8802f74e68..c22ee8371e0 100644
--- a/backends/qualcomm/tests/utils.py
+++ b/backends/qualcomm/tests/utils.py
@@ -221,6 +221,7 @@ class TestQNN(unittest.TestCase):
     static_llm_eval_method = ""
     direct_build_folder: str = ""
     dsp_heap_profile_filename = "htp_heap_usage.txt"
+    use_fp16 = False
 
     @classmethod
     def setUpClass(cls):
diff --git a/examples/qualcomm/oss_scripts/llama/artifacts/stories260k_hybrid_llama_qnn.pte b/examples/qualcomm/oss_scripts/llama/artifacts/stories260k_hybrid_llama_qnn.pte
index ad6bee06146c78f8fe1df1c77c610d72dcda8c13..5903c5b5c32277c0eaa795ae65c54370451900e8 100644
GIT binary patch
delta 306914
zcmcG%dt6j?{>Og~!;Gk-q5`4}7X|OAsAyEESX5Scjmip<iqZ-%6_pjrSX6GISwnuh
zpkYy2k?BTdyK1y+Wi6Q%l@%$qRBn-JQCR`=d%n;444k(8?eDvP{CITo^?Khwm-9KF
z&zTYZ2j}^ppLbrSVHl_7j-PPZ;<U6RaKxGXj~~DO)0i7&3;2Je5zGIh_|t1d7~%XW
z%rH{8vK?36o_o%lP<s5!U=X486VLy5o@f|JqYcA1$}mQZG>r5ShLJJcFfxZ3M%GXw
z5N1ZQVdM@mjDo?2QJ7>HixUl_D8Vq66QI~<7%SrVbC6-|9YEG-s_bVNjeUsI%P=Z>
z7{<|VhLPhoj4fftSPC?ZeXj+BQeK+lngcG>S1W(!g&Bsa2A}O25NH}7(d*7vgF!E1
z>k4dlo!V>L%fVm-5`H|F2Uf=nxTK*=61MJxU}r;H9P+%w!C;$%zkr?J3PP{dbw&)|
zha7QIT+H959oF`F=5sfN-(rqj`o{lMepN<y@7{l_zI4vtI&*nE-voowEj_u4mtAw|
z;<49Vv9vhDDDJWe8~agLd&(X1wy%T1N(Vm=J3n#cr?yKTz7#p4x({0_r>*&0u$$F&
zG-yzF8q~Y~e`=77$D42dTZ?)f-P6^g#{+}o23!)wnCTG;+xJOV{izOl<+s6Lri1^4
zonMOQC$~!;-n-GNKP&3`H>Zuza?1ZpFUsG#?|&@6yZ5bstG;yo>z%cFPCxpe$D_8(
zCT!`!uJ+VB<eB(3I`{_c{8noWqqR#Oz7;uQfAG=3w7su)jmL%kXwa;8{-*}{GrN2D
z|67B4r7=LE+vCf?D~SUx$?cMa_5Q4@{33_E@%vz~#K9+H7b?G^OCCN8xs?BA?rYN)
zYdOR5YYf#NdGCL!US`u%@BUl)rTdwZq4ID4@js78YnM&f)_-<2NJcj_?@I9XIrs<I
zg&LIJB@aJ>oCX!Vf9pNdBHrp6j|byukt`<v!+KRWxx4rMe{az8w>yjS3_Jdx8&uI{
z6E=&5S}KyC8i(A6?@kB59y`BfWf%=z^6+)`Lf`b;qBo~)dAqAYCneD!Sxx?j2FVb8
z{o%j2D3U=74bf}A{^u5@ciDs;Vd)LED90h+iEo~R$Na0SMa5n6@Uh5gXx-WO-ZpK<
zyIn2%IE5C;`tm=v=+2M-y+zv^x>|I3`+sgxLzhk1Iu`Cwi&`A=Jbc?6`~~bnV-&^A
z(7hJE4>>LJ_xkPiX@?uST6EJ$S|lrsv?$ea<$w$H)yAKN@{kOSpB>k0*r&g5%(&7r
z^ozOV!72~ha4zo=+sg2Gti6Iq%m47?VX#AYQ14@1jl|qouO|X9ES#tkx7uN!PY%Cf
zu!rDhXO|-UdK8GV%!O;FNf1fEnlWnZE#}Y|)|3C#SSenv<y>`v)n;O^eTSS4eP?|C
zz_ye7rZAsF^)CH^{5N2)e`>TYJ8;ZAJFq^@H#Fw+|DLP}&z*L%rvH#EiFu=wg|DM?
zu&#Ulf$V8pv^+a7ef*_Ed$XYYw-$9Jd5yMv31VFVX;GaSIk=t)9ct3;-*;l8O3Ur$
zlud{o>S_MJX4*}|f1jzq(>IC!5~1OeInm9KIl+&4KTljKK$>$-pnPKF(2oC)L|udQ
z0*(KDBAPUGCv!B^r1O5J<Cwjnp9-zBo8?cB8v4^0|GimV*|t*G@3XC++Bb#UCzNa!
zYbN{8&YyCvvy-J|Mg;tmyhD$$V*a-g>q_`IrTspkG_Qu48OnJ2FZSvYN?E3LcE;tC
zqlSL@)ql@cgsVLXH&NH`v-#uuCNVQZ$@1w-Q)gPC<#wXN6C#Ixz_R<_n$?wP9VPuf
zkxax2re`RVbYv*gVlB5b<z_?<eT@bB|CVVLHT^!5Ohjg9DA82<&##*_U(4-8*(b&h
z-Tn1{Pt-M1*HhB(6Uj)GFg-(=Vpyv~nIvJZ*4f#zPKp{TtM~t{Sxc$w57`D6Gd)Au
z#{SmTfitw+&Xj&~?9i<(|2<PzyDq1uKV%wO#Jmh;@~~=aZ1rQd*4f$8GNXn*{;i!Y
z4SndEBR!z675uq3u-cW}>k{hvL$;A4=sA%ZVIB^h;nu>DC0zexGjF8=opFkH=q8>3
z|F@nzkJ|o_ac~|JG}OMBj$kmfexzx6cHo^U7Y$wa-TyVq>H2YKW$IjcbC{f=EYgdi
z37@3p*@0K4UNm$S8+~a@sORKQXYZB3>#g29iF$tD7XP$<qk9<L=`+a_>O-&Y!_Iun
zUAzBYnRe00RXk}VUj|nJCUUiLji3|ctX#n%dq!^t!#*=_JE40L6<V>eis0fu>tWrj
z<L(bEnf;YFmYF0Pe#}6@oY=tn>ET1KB$>Fb(O1ZIKE|Q0DG0nY{ZMHH>+1e+!>Hsc
z<Vxjgb+L4Dm2u^8MR7G!Vl`J0S2~xJzK5%vE0@d1)l9Y;u3|2E141aHnyIHVx-0cG
zj6}{)9ASBDVTKt1t93~tCinq|QwcLk0(b+{=fwpIP7ROtY6(L3HivdVVCN+f(OC{{
zvqNk97CW@D%$`tW+jomYo8!>hzKss8-=VdABeg`Ts&!~>-)x8WutRJ6u5f65%(hTf
zwr`C?JIA55eVZKGvfpXTPm73-(h{j^#~-va9NLyYXiFSgnTw&S1_UJXHitI%587sj
zc8x<D7ic;?JUUiOq^g~N(B?R_NB^MpJG3cG^H5cG<XVSz{=aKW4?DC?V7W!~WOOm@
ztVt+;GM9}cu2>xb=#JF4lw<P#Vea}d%yIZ+|K2C@KPJi04k@nZM$OpkIXQ*YKHdp-
zy%CHdYzkL8S01&=<)zLHF1a7;Svvf(ba0YLjQ>wd#tNSM(oy!3QNzQT5b`5S#*k6=
zi$~t{S!>yP{*+5h<Q6`Txo&k>pk~hDQXgwZBbqH-|9`9*IXq;gT7KNwV8<}Yni1$f
zFFM-qut{m_ng)r{wGPdEhh{)v{k(|i!wzk^Lz@`w(@e6~IW)H49EY~mp-qe~b7<s|
z8k&%{-wuaXj+G&8Vswi`v)-Yx{gSjqs@c=29ehqwltXd6Q!#y>?Y73DP2~w|r@J~g
zF?y#%Q}8>@QHQ4DcbXK<q=>!0)692h+J2`gcWBb|9>yDZ<=n*RI)|pvp-Ju=8tnTW
z(!}Tyno0J0r)F>-EDEt`y!P|gIu6T3v0F^OoJxm(1=ul-{%|A8To2`tsC#~I9~X$6
z7nSa}!h3noYd`qAN{BRgUEW!OGf}{Ier#`ftuJ-SW!>gmJ%OVOB31I)o)vxGdV0lq
z=biQOJ4ed4#~O2@<nez<rJe1GRs}u6!=LN6aF%Jl7C5iqP4mIP;EPpQYQMnlOD6<c
zE?(U4qivB>{ZyLDW1Z0EU41Rp{g*86yZiF5GDUSjtCk1)UiwhqN4A|~Ih65$6^FM2
zyDoh?thTr1R#~_rtiKIsT(-F1(+TBPgf%>1#P+z?vTD6-ao<DJ>@ex9@}fFFu;lWm
z!!mmY@^6?B$XQg_x9Gq$D~~MwV(?Jll|{<z@!SQAe>PoI9XRL8+sxg8kFLDKMM;56
zue!}_3;gS<HDQlW2wYoqn~U6m-}sY6&)vFYLReqFY!UcbuTQr_o_lSXC(QgP@YdB&
z6X?cko;DwOZs@hHF!Sla$m^aq4?Xw(b?2C7QDEc^{~+&=H^hl?-<$5h_yPX^$a8~l
zRwl+TmE3{x^p$<Yc<!nfG45U(8QHf$o&)xX$)Ag>1EcOfe`J%MsrpZ`@r^aDXNcf>
zHgbNh!_|SO?#?eQ7-1Ss!%ZWP>%cJ6$mQBU)HHIq_NJJ8He(uf$)+)fi;r3i-x$-_
zk%}GH(b1++&NYH-&nVL<;A$Ib8uxRha~&Xd30Ge_q;-gCG;q~&Rd5w^<qk1leECgt
zOsspx>+Qk#z^*5UyNf}-z|~I;>yeR0x@zcv!1S7--6k3P!F^GshVN|;_6cl$YJ>o9
z;4K?O1di4CB6$9kAnk@aIgs_VTTDDS4G%20i6PN23xA_M_-mrt+H59xMu2}w4D34;
z*PV~5a$v_314o`tc7uLMR0&_0!#q{se<i9}|1?t~CK&awe<lW&){GXL&k_ThYKFVt
zY!4nx47^#BEa>Az_3IbJE(U)@j4vey_<(o~Q_H_J!#(@q@9LQ2jW$@lrR_0^rl<a+
zJ@_U?Ju}=5+M5{o^_g*k_9O<zZnb1}f`;@Mk8v}Kor!AQS7eE-Zx8OURTEKVLTeKP
z2k`-IvlWt{0$P)(rXI$p4!p%yoq(zZ`dDIM-Lu2pZ?y*>5tI@!-pGbLoT!e7$8vC`
zt(a(3LLNvAOx<SLZ?MIaj8?>fM78N(*eC6453aWq-A*uaAa@5o+v)R8Sh#p016_<@
zU8jH+RU%m1DVV@U9zl7hfQGdoxT{mZxJSL+9=x+tKnpVw{HaqgnT-H~H6rLGX=!LV
zf;(vTpOf97+Y?p(H|7veBlxz&z$&Es+k<6^YO6?7!M8FVw)B=n)gjUnq^lDH3C~%!
zeg?AzX+6@L8OrB|ON1K)+2fS*PJ8g$M76Jl9+?APW~*eJN}yK}sdl&<w8&P-I5j{o
zO;oGC#m9KJJ-E<T$vCA#^QozJv>UWQ&@gL0ltc4uO5Yhfp%*0v@}IZtF0>VNnc;h{
zJ$ON4U^fcTd|N?J870v3spf^z;h=K^bN>?GGs7CI2KYIhoM{W6+sP*wDGlwx*}}bI
zKhem6&*XM`VPZHaC(w7ts2*}_8#|z9SyX1Q(E^<T9p_GZzdd+*;OZUYJh`CkL^bx9
zIn44YgP&^gQJz}xDOx9OY=%$g7C2^(@<e^m9?a4@X=XNjn_#qg=zl;2}p4W44_
zx=Wunz)z<BWOp0rM9|=F<BgOL+k+=qc!H4wo@C*PMlpCIIKJm(t9L5l<2(6yV-I{>
zC!b)nz{hm*iAL0c_TcDFo^E8qM|Sc_Mj?E7C!cJT!-op@O1=|}TKEtq$BE`JcN2Ip
z(>vJ{@lks)AyLiBM4AqcN1D`~Zq9=YVwz`|gL}xNGu9v&Xld!gZIBqPl`b_J5ky;B
zjKe=}5BB3`_>rJ#;J$(JyOTZhLA~j|Q<0W|dm&A<=3ou12hq+W7t1O~W(3_ut*U-v
z^)o(UY1Vpq<6xvBaErdXl$Hw%C%4a=X7#L5g22@&pfS}5!a4;MT#vx)6cD5pfgyrk
z5`&rGYh)vt5J;#S;>iYfB&Z`lbH5gW+Y<sy5mx})64cb=h<Ad2O9<>n+zk9RL9G+9
z_fsAh34z|P4DqA`k0+>uBF+Q<oDj%ITn79pL5=+daSiy#guqtB4ZzlfK*uYI-MItW
zU_S^O;z>E!9{fH*?GyVP@G;wn78Jvd3XAJyPcWkr$#-26`e6@}BV7_Yq6NvfT@vC%
z9cmA@B&glLk|7iPO+sMm--mb#fnN(u?2cPG>|Zv+r51KLAux82Rc#~eE1Qurys16-
zWrEuJ8{tyHUx;RiCl}aE_r7Yml)(OJGZ`(|=Qfj}f_-K)87Ej%g4)zZINxXO!9${9
zh=2!eBi5uM*rzt*KCOT?G6=6(sdmCXu^FkFVIMPC?S%7w&hu9^3>NT!ZNyZ_gMDZ-
zmV`3c2R0*B4eWiJk*WdKkf2s|5UvgU9z9FAlz;N{7HIWd4(uJ9$*{rp+l*9|u(xeS
zsy(oM32JGOa4q1s5&}nFAL5B>=JB5pIJnnpP$ujjHlsm>us3Zc!vuT7W-?5$y$Sc_
zhw<P7zAlO(p71Z)gRj{(v?mqzs?BImF6{3K_jTY=0(>PQu<DH=o@(IB32K&!xE{PK
zA+Qf|D{u!xV}_0Lq<q;P{4>*3a4xteL3Ic&13%7m4+GbNs}fYc;AZfH32L3-sIS_C
z6$xs$;B4?ZZau-p;57*<!3C}c-$Dt38^9$AYL(#d!|lQAnQem8!OOYh1Q&v@Oi;bU
z!4=>_?pwio!1?sB;8yT_>KB~yulC?P>KB{~&Y^z6W#H4P-wmz>XHmc4X7EYWFF5M!
z_Fy{o3(f|QrGCN1;L+6Y37g=q0gnoNwr`Xw^@O?IP0$h0aqg&Z+JnPH5t#P2+dTu8
zBFtWbOQ1sn+ut6gW_QDD8#D<z&fN@35JjNRez!ZeMRvw8+bah;D6o3}D3ut2mmeAn
z9p|nE#fTzs2(QDiC}Fmj?_1Vz%j-Mwngfjt%zbB^yA0IZ@_P0iw|fW7YkA2N(P)A8
z2*kfTT$U}vcSM%)_;*LUb3hT6oLOm<z`6xiBCi2?0-tL8MwlBm+U@;LmUNMeeJ0Em
znDX9ecM&Kou<<=RekII=jds_A43YN``!*O)vX1vgc~XzEcE<-cy*I3fJl^x5?eT&9
zhEbk!V4JOxZOl&SZ}EZMXj*{3#s`i7eaBe9;{yrrkMiUIkH-gQ0ZV{C#|KscYk)t+
z2etwmfj`Cv4g$U3vwX(~dVes=lL`DGJ}?zn1pGceuoPGcJVtV0J@9CJU>~pz_#Jfv
zQ-5Hkjt`9eaFizx_$@U9%YiNNfpx&0z;CGc!%^-Q;Makx4-M;ig0#`s%8DCrJxzOB
zgE9lbXSC;|5#7>^6_8KFIPmZRw`U9NLv1U)U^F0jH(srZ3>)PR|B-dZjw`*I4&7_n
z4)YX%|89qzV62Di5<9i9Z<yOt2Yo4C?LLoPXbZSj>_)kLKeY$9+R<cg<UqI3oga^K
zmjEBJG?V4&UjwbAOVKm}H`p3^?t6dcWI(rmGRmC^ywlc58;hW2bS;`n;3``qEvkpE
zpnK7@0k5$&vb9S+-X1KXiyKF|^MIGz8tK7uXaU`fW+(7M(e#+i3Tm`Ka|7Qtj&}Qg
zX%Eh_71B95&>4Z5pN@8yfKIU$tj9(TG&ArFibl|6TOl3b{k1)q7U*?wv^x_t3dL|a
zQyN9El)w_?m7oMmE{(5;`2uep9PU028W=d%G`L%ak@TDFy9OUh^kjpg;?<Fuuwmx%
zz_G(oftd#fsH_2D;qEF_y#kvLajpY(3miKXr`C(&DAWT@^hC9B>oI5p!iKmr!3O3-
zBPJV*!9V!|Q=3vECK(lwqmYC21{Ljr9`UKAv0=kJN5S9t)Pu1+6Jp!h0QdrfK1=Xq
z0>8xLvtgdaz%MN4!1~X^J@-Qo*@4rIJ&=!xG>~Po75t%3Z5l|_qz<00KK16nu(6(8
z@H;-$A#y+XEni^k=k{5t2Kt83VV(oPS8a`qS9p-8mQQURgi{)Lm#vcF&x5|?3*`TE
zm}d>}d0QissTTTYpURKJsR{h7FVMSLPPJa6BUmG3m?s1Hq%EJwH$^*wkNZ?_9}erm
zkJ^e!#!g6;FR&Yj!@!M}oKtO-*%7P^Y(M7no?!1-GZ8%0DUjzlf(JVV%&>9<4|EFT
zX^!CjP65-l5rOIy(8Tbt4(^^%7Dg-;!F?j=C247DE`kdBG+rL?;D9f%>PsH*z<Yga
zR)Rd>!T0zA`w+JR*E2FA_PIKOcl!bfU$OTBuJfrQA}#{2^#zt9t^ls_si}!{N*%b&
z7dY~jJzgzPztCZx*zk^EiLH_G%7(5WmN+d37uzZsuS)20UtrVWVV=FfB3mQl)e60e
zx{{bZDejKol@_*-x4G~uXrbuJz>Bp`lGVZ&G0ub0HG?nLI>{F0=?GrtQ@cf%4KB2G
z*14}3eo0{Pckw;bt<kH57j$w?a`604&PfiQC)_LcoaDN71TQ4^kgy4!9PkCykQ_Er
zB_^|a`VpLO3wp}>Vbmfx*Ak5N90s3bEA>-4qJtxKdyD-%%z@2;u?T@?TXI>PYGAVh
z3%+Fy0?oAKvd($CcW}(!g*+2<wk4Mvy9jnxApQtT5NHOa59Z0S2YkAP#~Ur+Y;dwW
zsz(RQomfmXGQnBk_@4I5nnL*0PCni!hi7*32}Ujaq)tB3XoP2Ua^Af4>|n(V#hYZL
z!qYqXWFr?oLAdpD=met#KF+5S&NqjP>sY3Gl59lwLq_}5s`EK{wt+`k3VJui+Yua2
ze_nuM4tN-fWX}pvGEzB?%1UiiBVgY}Fs4mCB#|k4QRuLG96>_B<n>?Oh_G-^Qm>9+
zoYq^1)f@zaL~osSi(vyS^)Po8I0j9!XD_I~c9Ib=S`qXUC+mdk>)pX2TJGdY_Ii<x
zAhJ_HONtQm=@c;N6$pBF3aEQ0f?k~hhQAqsw^P8R_x9-s_Usf;MmmBXodRyPJOtgT
zDo!Ph4vTP?K_i6T`(uQ=2GY&aEf-w_)FU)72W1<~Z7J(SnG)H-_D+;lD05&gOBtDp
zvKSg>DUYJ8gqeYvKP5#l6nh|s5P3t<0tv<iX8pv8sBcHGBTns3WeLgzw~M&*99)Q`
ztxJMyIg;PHBy?9Tl3%+d^j9O2U*ZB&f98nPuOoOoPHi1S_*C%EBDT(Qxv-yXhD!<T
z$GE`Q<JQ5f8rEtvQr5$0@7Uw4qrmUu)TXhrj`r^e9*YYk{9?Ie!;aca#uWCQ&167f
zM{Fje3Hw$UFC2g^acbQ-!g-@Qg5TIiGJdeHZ6<>U`&V3`_per}GT338k*Wsvl`vjC
z0Kbe=tI`P92L8e}V#1_EcLbYl#%#%f{nKWoDu#V-Gg4K;K8p)<5N;2!DNZdNPq-HF
zArV`B7sZ!m4%&<cWx_tS8L0|kjW#1yIqZ|Tz>(jEcxr(k$Eo}YglhzU6c;$yW;G~$
zKu7R^&1g_6>_eN;ce$_+Y$n45dtVq&I$%Sbnl+Jd_2BnxBO25Sd)H<($QRoY+#eTM
z)z0G%xGzo}5tkxxeOzE4;tJrKacXKh;+^1E>GuxK62O<4S3uvuj^Ing0OkOniwkT8
zmH?k253mOK1brCfECJliyaIX$u{DSbECpr)?~4m;0u}-9CUbC<S*h-cFr)74WyX2x
zk=`12-&kH&wgFeh-Iotcjq3>B5O?1?U>@-5xchdSquk}dtM2>O9PQo-x=h8Jz!u;_
zHB+F^$CF8|6qo}%M?E931UOTDDzFB4hUygtYy?hMQv`bBJAx;xB?2>n8ERvgIniAV
zJVBW*bDVn%aFV)MU?Xs%dPQJlLPv1C8XOMH298t5!c7jx;ilWY0zSs#)&gqOz(;HS
zNpg6Gj}*@7Ig#~Tz2e4owj0+O@SzsBT}$A}T7R;fpy7jsbA$#a+NwkkuCYlS!FbzM
z_5(%^JWlH~<robgsP6ILS_>SYj(KQuGcZQra9N>@sKFh<Xp5@--OO-zHayB!GY^eo
zXn&!}Dr*WWUp2g+&6f*rfcLd|mGJN(tok<JFFYOI$Ko5NLJOh2ZFQRP3V1J@7YN@2
z_u72D@K$(Fo7V|XNoKjX`BCAy@a{HGng%a}N7#H0+*1qh7N_#3$^HrKiBszYN2RdX
z$En?dv%%rqErN@|t~iyD1+E5%apMVY0Gn}YmEiE9tne(gg44mlLF%C3LU6|*)q6U)
z0^B}GO%=Qc+%`xp72FE`ZIId|IAs{?`5?7Va4z_lL8?P=8Tj}hHTD$Fx3%D()v*|J
zSkFl^9L?|_2B~$Yu=$J}&T2bI<@Yw_(3}N3Y+0&4r?N>~4sRZ0?E!kqM#`u}aL6(r
z<*o;Rq{GT2XoJ5`xeGWCj~Kx^qQg!w=EL5!!%pjCx;<;)uj#O?T0E5yyn=Z))i#6w
zYKKjib7IuUj^Oiy)NavbgP+wp*>e=bpB$tTPD57>epKsN$&3d0#zAV8=)y;FWY9X+
zJR==`4?QlrLhxN$CwD*v{5G0#I=VgJQmvErx596v=S7z?x+8e4)=97C!k5tdqALS0
z(mH8iE&O8IcLus<@I_i@-8-r5dugBOvca>pPIhF)@H1)O40P4tQ$;u2`s|<qI)&zm
zDtwHrI+jX$B^^4R)`_YR%pxLQ(kT_tku>hiuwm{x@G!MH%1m}21`bi1V@&JgV((a4
zE#8VUhr2U?NtU}xisr^%3?HP9MVl$zC!`q_uqdLP6_()H0q#%Iv&^^%hOH6WPi>4Q
zc=$N$_KY@%c018XgSaHXa8DkveV|H+VZAN~|29y~Vvc%tfR7JUH^uNaxCz`kP~CYp
z8N$<;wgc7HvzfMO;O{NpVH<MTj26IK@IRZ+qE9f&VPC=q5P2uKd7xS~fXIizpAS?|
z4KT;LedBp5tGNkgqVmU@;TyQzGmw33%OYp85ER2d=(15+Gvy@%!go7`5$<~UzD^#l
zwn?Pp2;a1YbrLCM0yExWbW|eELHIXYm^RRi@RYz`9;l`cq|`0oofeh@c?0Yv8ZnEe
z8xuQ%FKEp~BNg`CKy^fX=7YCs%_O4?_RK&vb~ZlSz)#s4PUVfTC)GW}%+3#+!_zy0
zTe>8&uR!udmqhj!NFMK!$o>M!V_gz%kZL5=T@r4mdL)l_No2!;<Pl2}`pnrkiA}&j
zbuf+r&jwe~o;kAX06$C%e8?-nn`psY*>-?8(i<Xg23HPL>qPFI%no9p+9&dK@Poue
z?#=_>C;iOhC!WDs1HZ@O*2Ak7y4F_v=P@iz@ITpVxv#w^upZdzqvD+bUu~;pxhRCL
zwAE?n;JqGxqwwMGoxl}pWdhI37T^s6?dPR0qa%2|`cyPI!0T*{WGjIdt0{?SYJk_;
z8cEa$y+&;mjrT-$H?~G@>P+Y|)t+c3dkTR|2CClY((H2ZRcdAu;#%Mp1696=8^Mdz
z#w2ztQ77>-K$(Miw$A_-s*44d054Im2;2rNP=kk<<2=p4d^-9(9xhQQcLWz$*natt
z4bNlHL{|*HNb96Q)$j|s7tcr60KP!$q)FkK9l`nB(V|NSpKt4|mk)*TbCsFG0#Xh<
zM_>}Sf>8^bYcpnLBW$+K$QC|Dc5PVty!J=)Mk<oCyCnGMA~~~5LW~k5XLL!3Q;p=b
zE(tTc9?7X)5~^=SGQCT}%=S&~2u`yky(BwBlZ_;kj=6y4b~*TDnfJ+_`+*sV<!Egk
zy!RkDL3H+k50*YqEnN{hdB;wZ{iCI(+$>m{R$B+}A_U`XHMLj3QY|%0Jb1MFY&ba&
z14n2tc?23!S+a*jXP?(Ip($D~V`D5vkZkEyS{_U6dU%pHu=WBw5hPj$*4e!onjlql
zemw1+E?dh^34NcA#MdREF?mShx+K(JhGbBegj6+1*n);4Q(Xg+*e(g}X+tugOG4kL
zoFZFP+83wz8WyK-c(lzU^JQ^|N7;Ov=qus<wY})~!24Nz!)Vkk(7xJS)KRC(9@Ppz
zNA#KSKDPNn)P>OAs(mzbmJu$8^|F|(IJGb@cjH328^JwA!*eoQwxvLy^-TucQjv7;
zlE~ym6451*$%~|$C9ysZm&uF7!yQ*Z_<FEgeVa=7R$#cmB)+X>_)e4Ou*Go6hJ~q5
z$Jo^t!Ay%uwG}XfmG@#<dBJr2!7;M(f;(c>zOkltEcKqw(_B*-X=t0KGR)B5G?g)i
z{u-;sUP8<o@Gq8%GdcLUHj_bx{;a8t=o!3(h*i7C*%@=7KWa+GVrXluO1PAmmEa#N
z6$f+h_u7of)B-)GDKjZ*25%xXC1WP^J59-02t5+3dKVJ29Q<vp+L}h&Yr!oRwodqs
z&~G%Qb>V07Izm%2rb7RvDH(I2hhtTT#4G`S6{|LlCuTMH%UHE;g5A1$=ogyOx>jhj
zrZRkI@lrxl89(UfvFgZW#4G}TW~q2;f}6A%t=kDbq$#ayhJFgY953(Lyn0|nCd#uD
z{1Kxp@;vZ|v1*mb%fRnqvxw!o7F-{z@&z}8Un7p-sGN>q9UUk*8~hil5?l<fWpoyU
ztHE1i)hxjc;4Q=#96pns0Uaec9b8Ebf(yYL7=<gq72x%;YO3Hp;5%d0Qo*g@GI~;Q
z%B+syDyEI#T<{8dTW}fp8tT6iTnjFue!<P)%c);*)NG!3)Gs(2d?ED<E(YgP|5e~>
z@GR;V+yI_I{er{i@YJCGtN7qM6P&I3o@5SF&t{n2-OJ&p*u2k)@M?IL&6f#30H12}
zw}pG>^5hUcN*yZV7@h?`N%%N-F)+hcEjx)5{1*6Ri`(zY8{z5dGjV-(39gazczZ0I
zR}R25TQ%)udF22fYjNAP2A*o`OPAu>1RtfgpNwnNIXpjXRi8|G<p3XMyE2tnhu|q{
z{1kJzs>)=WR0B`8xV6b@gboqP?j=`_7h-8WH!`6~LX%a~6c*AVc%scCr^>nvPq2BG
z@OpT>&6f*rgZpe=B|P<9d7@bH_6yI053=}%Y0z@$KwF(Ad?!5C<^{rA-~(*FUbyc(
zd5l=$>V)UOqiudvcnLg8wNH}`8?e6`lLc%9_EQT4de4^!$6A4zz(}=AU=gs7`c_~i
zu(yhz4y*_EQZog%0ljLaz|{HjjCe+19<Yb{RA4!<yXtica3?TAO%d1v?536o^j*Mn
zKy4J519Ypm1eO59)#hxzKRA(v#HfKA>f$NpV6XMbuW<muZv#|Ew)I9J;zAZ#wfZcX
z$ER|1&ww98dnf0OLU79f)%$wRnHAtKExVo*WlOUM{`mm4^m<MoN5P-!h!c#Yi&#<Y
zh~Hh$JBB&%_poHQ)6?1vl_1z>nU8YUfcM&AC&*r>5&pLUD*rTI?0NGzB3Zf~6J!cz
z!e3CkPNR`Uz-?N^x@T0vpHlIsqpAm1YnAMC+TfeiN>QaQ;FCtJk{wPS{9g5`sLFxm
zTE%*6?1bO0rksJQ1?bl*=}=#ONAM=KQB*m=Vy$8gH%j13RlBHafQz+CTHOf0L@k(s
z%Da%=w5aUQ$uptnsa>Ke0?x5D(ixS|vsC<<XzGF4wnn<54LVh=6isS@tQJ-@>4`k(
zWcBHp98cB&Cn__?9NaD4*ajJ=#-C*-dK!RZ2dGsgY+8+r<((wnqupu1;c7c&x$}U7
zm3cN>tE82jdHnDs;fbDVVB!EZYo%;Hz=OnNXtxQ*aY&?1`0nT>GL5Au>)R^1&<I9+
zB^mu-w^Wtv-U2kGrnv6X+<K^?X6Eqf;5d-Ga@`E3WmOK}oE&i}n}`_u{GVpz!hW#T
z*7sz};KyRr*itzPgTISW7nX*NbvJ{*QSZ#-9l*AkCP(*D8WUN_Gr3d9#t#0^P9CnB
zuyij*c*qu}`PuGOz&~*q75Ir%hwwvNSTIYL7WjJ(qxG|h6nj}m@NHYzL?ll(ygo)9
z@l)$^@Eb8IVK#3y#v7He*DS^-!{EPb%|xRWRu`i-t;Q$m@{Zsxt(j!x!2S}W_RZ#Y
zT>*a4*6<Rb8uo&EC66)8nnQ8*@aJv5{uWx)3a`~%^u9&BXs~&c=(FK})?D;O@a;BF
zyH$DwzD@JF=y$@OwYeX?ry0JLF}jtB;$6(E3bl4F$CPwnO^iAy;ym!v>QfPy0iR+7
z%MjOqpHyS!A#MO}iBYRW+y;I^twiijxk6qsFs{k!Xc+@B2fj)8aCZstL0gq{4&A>6
zzQN+w!>9pzudS}S4Nv1rKJ!<r&%rYlc&Dw}FP`(^w_DtLvsMPZjS6pPBTx%2v#@<W
zZie4VTSXUjRY&j^t&^1`8@`(H7F{veuXVCERl`e})pww40I#xj*7-QRh%XeV@fSKy
z3rKG2lCa?9B3aQTVRb4&a=j(#JS`w8CdV2+q;CXYOD${okp9^<tj&>2c+p@B><{U)
z5iGL=V?8UtOKhdyJs`MB^_tJZWi4sYD}}P8E#>`#t&t@%6S`O}5ls<rk*$#>w-S1}
zdP_9*z{_lnEL?5SLN(?BG^xvEt)%JJ*(w)SU^ALr0?QYc?5+mp*$O(k9(JKHpY`no
z3T#C(ze|F%Z@DbBoi>!3jU=~ALYyKb^SUGyU4djymxO`aiDXungz}q_<a9|`qrF$l
zYl!U^@YI_mueps31T*9YN!+lOnO6Wk&C;umwWh~i4$rpK_Dd`1bX&b@ZJ5X11kVz6
zvOD4$-V9r+z_h2`9wQx|X?x0q%Y&Y1t6#1`y#~&9GM`0o&N>orgHN&}Py3Q5zR`#v
z-7>H;hhHn7U`gSD@18-O22T@pvU@&ojHT*1NygbILy&5_O6nTuXsIMlEx(&Jq5(e2
z=KJ$mBii62ZC)q(lw#R)TJ}i`(dWSV*h~B)*P}0n53~7l(O1Ix7)<m9qTd5gv3Z^7
zTj0r3PMq2%`l#z<Q)=<Z0`!^i!4}_e59&f_lC3TfbvZoI=5s_}3s125Hqkf2<859g
z`ta*zXDZ=0T#Pyu8mAUqtfwt(kj-S;!UhUU_EZC7xgqamO{)hF5OL=#fmS3jT@snJ
zH^_#x(?%vOlBh0;Oj;!U)yzu>UjghFqoxK3zZ2Y7#MUc;W>}=naPh8?4eFFj?P}9u
zy=_LyJXkMbEX+VJ%Tk3b%;27COrh=40PA5hT-spWZAOJDH_C?9vLsawtedc8PchKL
zGwMFVRf65N5qHoYSh&r&ds<*F6@QtXD(WVAePJ<DWx`Bhybc8#F{<|lPO{}-Ix68Z
z&ez<vwa^Ytxn&!n?V6G;{AONXXiB<N=x@=g_vM^|bHTqxs}6}-0{+F4Tkip?p~p3)
zUG>nPH6>#!^e0Wp=qq7oO&p1t4Q`EAM-(xOz(0T&+3x`=px<js<90%iX-Y>nLyu}I
zgSL{*jYT=dgO5b3gZIlR9{jB~qjhD_7ENhg4fN}1wQez$G=L9Bt5px+)dv2O2qI5e
z#rub7wNK<Z;6u@B>=k^xQUd-cTICC_0l!Bau)7ibmfCfNEdHe(!PnKd0yBZHsQ4>^
zMZmwRnF1?;&#RRJ>w(XzX9TtZpH!a;O!do$2)(WX<^eaVDFVxZN-Yt%6L^o>D6j>1
zmwHQ}Z?(L+Zx@&YELCHQfF-~i)dGPvz-!f7fsMc=YL`IoE%HeER$wOZVimsxSOmOC
z%@kM(JXft0SPz`1o>{`n^zd8dz4Yp(9J~80<$#<4pKWpbGx;L;Oj|$gVb0c7@Uw-F
zb2k9bRCAZfVP9M$%H)N#aN9K#ewwZCQ-$kt_^HCjxvPQG)!^lF*k6Y00r)hF+pgZ*
z<Y8dx4~c6Q{AA(0M*yB^t4f#Sx&?lM?JAoOqY*yI)<3%$*T~!Dyl@OxPd0Eo6KJz+
zTEJ-*wl_P~@Nv;<)+4fM0gu%>rl%2p2M>s7wN-TK;8Y9SeuePS+#Zjjs{oJEI`P{B
z9~rHdimnwr!os#+${HRD%uLbcf`@6H_?5wjGE=M3)q+#BPTJQDPiD4?F6vJ_2(*qH
z!pMdXrf$&{gOgYa9s^f{6KR{^25<s*is0}&S^23~a5~t>T_d;<97mfT2UmaxMXRZT
z_kahIPjD+ZHd<{GoN^cIJ#7)33yxuZ5L^b1rX5d!Yr#?6ae|w{{b_^XsB#weXtf*c
z&Ib2Y+i%bxHx)yBsn2dO2dk-D*eq1RBh=~}n85piVPZMjW2|LGj8ePr;jEAf{v}HF
zUcs%L2mV2A_M1b!6Vha>QjXxeD3!mNy^FPV*@56|%X+xHTRseJwj9)v6{g1%yN<Vz
z+TQxWA{)UcQR>K(lv)gaKNMa1z8b;XQEJvxC>y|Ugp@2?M)=*Fd!p1<QKo};g_JU)
zg$Q1xjHgjnfVYQ~va{TS;OQu}RFtjY$3jY(Cn@Xsq%lhE6J;*=fsm5c7-a|oQEF@r
z%3AQ+kdo!tXhv{Hlv*dssC(p-LETZZ1I|WpGwl>*G59)B+8^InLzmIYXHYeOudr3p
zJ>mCu1TUqHqDluZuvOAGh0yb9;Z{@?;JLO+dS(ywY}zNPR`6-IO8O-wz;Z$BwsO3l
z1I|+8SFu}L0nAi${T#2QgDRmXtCg!b>g)xcWcjQ8t2pW$ho`H}rF`m7s+0<DYQjAm
zmf)TN9-~H<nu+crV5*u~O5_UQaLb-A(;IcLcwxicEx-X*VEgNy_sQ0!T{JU*{iH7W
ztEI~!y`{25PX(}7l$yGo+Uvkx@gD6y3iPOV=uNk8gS-~7I#vcG2innJja@A#UT|A~
zb>V7rtfvNiyuVr}@&n*j3)|n*4p%%y`>U;g=KP!nZqYiK8hP-;{ngZ4@LL0J);j6f
zT6j}`mH!-mP2fgbXPwWy_sh3q2A6RLjl7jU%z%GjacdV{2z}31r`59SUJu`It0n7B
z=v%hBPP`Ap-?Y_|IO+j;q*}q7#5)VVNBD4e5%BNo*xl@L)1GG&S^@u?&DWQ)3*8BS
z#pZs|H^b{}-XwbOgM10W=KDpT4&QC_wA;|<!FSm_=>_y<@SQgIi@pZ_SGD~%GoJ4g
z?T7qDNTRzH_>wK3V8lMu5!_)DzD*5z(I)9e0ptZW{dPRcfY00V$;K8)txZla>LKhB
zZfA$r3Vgc1+VmnDMqj180caO-Ht^B@>WGMozz?gLcOb3+KEz<|K)e&I)J73E0|Shm
zh`k%-4Zyc|Fv&83_o(<az#`y!HB(?E@NTtIU_Ef1dPZOyaIN}OVCp7$SJ>-Mz&zkx
zYKp*e;GfjRYuU={zu{kt<c=;0leZDcZCw%;pYVrybI>JWYNR4rt$N+bj-wFhS6AOj
z^a|iAfg`zn_CQK(GQntt++>l6iAG8lF9iCleShV7kqf?|zv>WN2ELw~W+%87d>xZo
za5K1=ju9NSnaxsvwOepD_!_!ba54Dm{wiS?3r{t8nUtICt_Lm=Xnknk3cJc?w95C0
zEO?@mWgr{2h+w;9tiYG4XV%GB0WW2&cFS0SFIMd$ZU!!-BoTWbmDO?P-H6kH7xh;M
zMVtq|Ky5_qE(4yY`rc~}QKg&N<7|VUE3-8@?7*X{_r2!Oz|n*J#q02DSxk{9T6bqE
zbe8QF$=@IH81oU#wDc;ejvLMoKg%{)A1ekm2+p(v$o<;@J>Aj=3iwMvhVhuJ$kXqk
zq!B<q36hfP28wGgg6X2ObC*E*BnZ8|gWCe1VyV^gS4h1d&L=^lmuHO820zhum&_@T
z%W?jld#(F#2IK^rj5mrPlWoG^WrIu-lIX4lrrYv#;{ar$O(q#_kO?-KY$QD)=l+WW
zcw__9)Ukk-V=;uUu?pdDK0wCWWP-5`!WUY_qXC#|%hQeH5WbTp9+M2;7I_C?k;z6D
zWTaYM!NOk%9MNC(-ot>FgNLiPDj4Tl;4o&Vh#SE}Rs4O3!=Gex#|kduRB*CdBH~=&
z5azguOTdHGE)iD)llrS!uOhAoC#qf>5VryoSW86gdrBT!3q+g^^f5n0Tm+6&&xp7J
zIEeZA8seScfvR1^&A?dZr-;2z%j1qOq%c2$G0aa9=YgZuMiG|*qqy;2M_dE$uf7#=
z1F&CzwMxWo;J#|g{fJX)IH7a<iZ}<{N39icF|apx;9kU);9lxe5$^$d`>UlQZUJ*N
ze*kgRGn~u0k42mb?ygpfxDXgYf4_ma9NbO4CE{A3hyE6EBiOCtA4D9!mD4!=E#g$L
zODz#`E-;M#7I6vKRJ%l64K(QQH#t1kgL%^TeuzV5E4ZVd+9Yz{vmC(tseF-VgWLM4
zBO)&X|JF}!6?p~t*M4g1KalSP|I$x&h`bs6b3e6C<lb!@tNW>0mB`b<KlD=vMV<#f
zMlO+;fm`~ivGrl2Odcv5`4pkFKCHXhtUeR`EZ7sZo$mtnQ@gS8WP=;1M{qHCUq6-b
z7QWSL^Cs+<y@hXsdPnfXU{Cm;c_vabHlB3w3)CRE5WJ1H?*mtWpQ0^-_kgSWsilHj
z!JDX4aLRLhAk|Op6Pydam-d3)W#Dpk>|s6@U<ud;yIt6DcO%emD_CYDYGvuS%vc%H
zVZ}CMyH@~Psy0{QR1REhE7;xbfL&rS*~T=(7Sgl(`AFOMyu4bS{s_;QG$RL+XOr<p
z3FJZ{9I1ixZ8?9>338s0p*`)t&*ObTUZ-{n#v7Rk=5+}s7)1!?SOT@|ab9~?!e`n1
zVd3@g9Gkx_ybXSq&4a>IUzE3~l0l7o0-gsy-Qw0?VoNv55oC7>_`8b;PU#X%Hd+v5
zbqP)|d^==CKK2;HyYMj{B02CW7PnXE5_qO?nh!kLQoVTX<L1nP-5=~{81YH`GA;G#
zt9&$m^kp-XzglA$Gx@b4d(2UR$V;+RCBGfleOF(@7_GL~nKyfuM;ZoyZ%*sS1bXq<
zt$YQWx8<MrP}^TIZ+6%6n*$fA@qYvM2lA#^;BkJ5S+N@bci{QJd1|}BOn%Y$YBhe3
zd4u~QeqG?gJD%BNzWB_Zuv;f@2!?%RX1w@uFzks$^UfD<Npzhb8C}Lx#Lvag*<4xt
z8zVE4Upz9-bzx$({6?=Kma6NwdY%2^g4wP=hjq`&zHZ5q8)q*cuX@jO#i;#Z-YK23
z34C7T@^#XQ>h5{2=oh!nbA4|5=p26-zqc^l`rU<Le$)EXMJm%!HMy=v^Gfxnb6s)0
z|IBYNl;6^}Je>cXCckoRm^yH-%UfE{OkK}E(z53a4yXK5IQgw!p`ZN#CtrvDe)+|5
z{M^9RQp`W4au27x3k~B%3>R4%`+s+0=j8FU{xCKJQNO_D-EA02Ek>zPco9{h%+!I!
z-GiZ%KTXxMxlTcMkgJa04!h+H(^xT&8G<|!`_Ruyu#=y}4&r56e-#EMIg-vajjRVv
zBbH!0!b|x@G7jfvu#>Ol7fp5$paSQ?IJd4OX(P!;7{(Za-bmr0pER(OKgzF8^Kd<Z
z!O8gUttPQM%!nFi7<b^?CcXvQTYA6}XkOY~T4@-&;Ai8U_9}_x*U<9Qfb%RFNB#3K
zzD}ONM7;|CedrgcSI&3M=rsgL6zk`D6*J$Jpb8?qkt%z>D^{(E@J6ZS^IfUtA+>S7
zYk>K*dVap^4D%7yE^N1&-rYOE{OH9o7r2s5&XTHkPM??;{rRpvrdqzhm8p_^UY}}P
z<cd_0`K|(0l}FH~1um~3pQ?-VW~-(=SCZOafJuSRJ4Y2Pz~p!TD^SbxT;VS+&v!ML
zir9JL9I2`zsWP(A6{QNIyeX=#fT&IJ$f~^FaJ7GtD?%NO2W^Y<MyUPb5xKyXE=h`h
zAKyy%yCjr8Ly~w^QePtMjPvG-bF7-Pz?CA-QJn?n{nuD^bVDHxT`#rnzu1)~lzfFW
zIaGhFB%wnhy>tx?)j12O-^yMt9!D3sMyTbB=t82<Q_fEMUFlZ;(3@WAnoRMF-4G+m
zo&6F1zxRxqv&5CrL*~zZYmTXVm$*JK|Dvu~>PqX&w7=nqt3Q)1DU7)^^K$jnQdhqD
zy6V2nHDWs2n?qWr*{sX$$tIKTCgg;(yvi5aONz|omB_ibvLxE*+3K2Qu5oFHJWESL
z=9t`tISH+~TN*NlKR(-*f34nG=2|#-z<f6Eh`C#>-%Hu#=Jy9;4vm~Z^e@%i<*vBW
zSj>hw%<4WdjY}|-CW+Z>T{LUHahb!tR~~mVGFEfHY1G&A>kqTC;4?ffqYyjNT4_w;
zvoKg?#PDYU@fUT)Ip65r70AfJ`ErMI7|x>{&VHP)Kp!e@jxLVDF&68fz3Ilve09pz
zu3`Nmdbo_Mkh5q+ihRTA>ZYq*i5C#&N{4y09svF|tj@Oh37OW%Wh}vtb;EL3ryKm+
zSB4?!ct{#a#3|Bo=F(U5E>hoL?Hb!}q^HhUh8(*{k#Cx>CS2p1T>4Nxn<)H-=m(;I
z$H7b<SH|(~E@LkCQl|V)-T-;nZ}Gdu?Wd#s6JK>7^D82CF0qN~s@WJpo#&vFIz?B3
zzr2DA)!Bsayd$nstKY<6uI@KUaIOw|6#0X?z+_$E^}esH0uNJQt}amQ){}s}omC*V
zh7?#plJgz0`a23tqQLWxl-d<afO`m#fm5hLKh786ECVb0dAdO9t@%3WXUHGW1v0Vy
zsEoUwd)q4T00my83lzJx#6JnWG*E5=DbOe-$%T$!{Tvm>QsDx0(uERW9RV`&2^E-x
zZ=Uwuj{ZJf$WUF#qO63eGL-jI$U<F+*sUhcROC`fDaKYM{*O!}-w~|0qmbjhUB+}>
zhy=KW09p8j3aP?3OZ)CZzd;u=Oc&zb-q$K*3x%Aj3lY1r)7e*?LLjLGW4lWTc8Vid
zA4eeuJ#v~ZL;~DKA=!>XlJL#ezHgvcx{%?zkSQ_0O_3hiK_O>Yg_IhRVt5CE_|GZ?
zW4lKVn8xXjV3CeOA}HicU5NOv!Fh(GkScs<Xy5nI->(Z9p$i$^Kf>yfy@gbAwpED{
zDTa3v=qz0b##SL&1Ut(StgoYxND7&$3laaj@Xv7+Qh;xc8go56g4t>=e?-s1nuRk`
z?va(ucj*tGGFPzGc;b3j?^3#OyiO+T#9CeXC|&v8W_PRdSPD<Wj=RBHPp%;GTtdlE
zVrvg2$tOBuc^rlBU`|ayCzD(9T#5fV_zen8swA8zY3BtPtjkxkZg9n1HiE?II<d%$
za6DHh9<3ACeP|jdIDES~64#M<vQ8}iOYlD*zfj&Ld{0z!Z+0d3&cIB%O77H~Gt{p)
zxQ3NBQP#=GWmORQIhXT4e&msMl1_OAVei)cma6;hm%J3K-!dqF3U<<OVz-R+?5wSR
z!`7a8l#uC&7133Gsj-*(r=gUNlK{&JumHzU{Z%+mMK1-4K35m$!Dg@ydL8ohy1+5I
zK-Z8ftO93HU=nsxpx9kQeDYfbj^Tc_rzA;|9l^SHRcP#|z#-_QKnZXy0WNkFSb*<P
z^irVc&(#G2jT9YoHRK*$AWt_xD&yI)!>9VlZ_KB{;TTGVVt5@1E=4X=PacKV!b*~1
zj$l0;1s<Tlk?5pA@xLDb%kT^JU=qF~&`W`$KTj9P-pD8&t%I&baj&kBgMuHOaozV5
ztu9<lfupdK0>y3x2^OK30<pCUtoy(;#yDd2bQIV`fvM=EK=Hp3|10pLz*6fW--Pox
z45UIaIA2#7j?q{h^a11nT_A5z{HTn#(|1_w-wF!si=7lGb~lsYD)dqyw)P|>N2DW`
z*D8?yPr%?X74}Cd6-s~-0xUru>cT3V`=OTtML%B`2sfg2(8rKh=mPm*z>muKVMUVF
zgKH=-3OgxK>{bzf8T#&p{10P$5|U(qBUmp-7apa+7<5vg1SloI)%b-9EWmdldMQx!
z7w7`nkQsbc{w7!dUhH=KLE5_`7(8L3ntPLLfIAKkQ{9u~O*99owKutvr^h3~C(=WV
zzn;lt$Dh--5vMO%U9aPkk&*2aTOhw|ONH0Po2<OY$TgD>y!@u$Fy7?%%}zuW8SXL?
zb?6?1-brXV$^XBFmJiHicV#4)MuK|d2G)5KcUippBE3Pc^Eb*mpXhFShmh+18?ka}
zs{vTusIB<@<5=}d!YT|kwvp4+XG>{6&~U3aR^k}09Y-Q6?Fzrjl^XkZ)99wPGTT>o
zg<a)J=x<b^_h`M;eJd;LO4rc-MgfZMT5&Rp+cMPFRjy&Pl8{FrM-=%!12&FHBjY3!
z@hH3Vp6FzpMfd$_1VkPh=cJcRqlfklqW?5s4Jl<89?Sgbg`9USks`m~40U0tE1_T2
zizfe;ot;bM=PgusmbwO&X7qI#eRLj?Uw8(a2Azjm{gI5R)a2)?BSvo>qbCFXFGj|L
zTwbZ9GqA_d@wkW--N5;V@h9YP>qKPWd(`3EC(rUNl{1YohsOTkaCYH*mv)wOOw|jf
z@x8-2(ss6dbMgJr;Tw+cT8D2PzOC9fhQhAoX6l7L(`u7?#P1r^Zw;OBGd9v`Vv~1<
z+UKX!nw~d}pB%n7FH{|3zh!{S_yxHPj@U0aLycNZXJ(P>xVBG1ewEHO-R?i*!7m=2
zGKJiS$@iPiCw2?z^Z;@>=U{7XVzbEot0Pv4UIeD0KS2I=onQP5@V`&)O_?28Tva%?
zY3DH*6fIN-S2H_i^m7?Oomk|ToS|ZFaV3-*&zVMt!+xbsCfoXmSeNmcP9{E=lI#KG
zl1pZD0lrPz_eAt9j#-hxtoSE7W<jLr1~E7f;RCnN$=glib2%)*ErY&}k)6-Te&KL-
z<GfKj%dGI@-0Vo)&*7Vg?^h0A555mOe5>&NQu~_jQocv07i?}bKX%4|(XU#Eors<;
z<6jucf^G7ke2w*XGkS*Jz}8x?lL&UWGniov)x9V8@iz9c-=LGJBL1)A|0wd%eVm2w
z*K)%mi~OE0dOBb2zSR{|dYt3*w>q)N|BmBh7)fHFmAGk}X|y<eQyhu6koY^DSp4_k
z^aOsP#0H6vXy1RLe@b@%qwB}4)cE?!pY8J~g}kE+5yLtHJ&A=Bg0cNfrI7uOV9Aa`
zswm_=U5NO<g8$R_g$k*{_g(GVfc{CV5cO`Et6%8>a=)*0i*6Ub&*<DUbnYbl8yx;a
z9J%i&_lG*S`0vL5Sx4?Hd_T~>)#xA7op5HT6F!{YW^F9)r;v|yA!4|lK-+a87+alC
z^(_BLgd<pjqmUI8@`)}){Qr#qbB;pl@cmf(ZbSdL-3g!HMkj0`_oq6y=xXtOUgthb
z=T5@E(cz!y$n7WhA)Q<NpU3}2NA4_q4{G0?=%3J?aCWE@zVZySHZMCU<P}|r7`{lL
zmvkW*+f#BY|H6hNSdycVH5Br9U5NPa!2hp~LJSJ|oA%v{ev91+-{0=)cg0?Ezp8VK
z?l1W6(z$bV?kaNcarh5*<X%ti*L803|1174J95|I`x<&v%{|$h*!vA&PwvQbFj<?S
z9=wASlArSS>hRfw*r@wyrd~1<7Dig#6icc9z)qG7v3r12f7hjAYjslr>E3k2igA>h
zL8)({;|8<*AH@Gvxg$e&j34KE?L3DBn{*+wbRqkGnr{^{f<oTbg@|1xfnL{zU~3hU
zM67*|SOXk|WKqa=UC5m9(X^NVZ{QdzBn#(l7)VdabL)-_``CS>+I)vAu{7&x(|AtD
z&LjWBy3X0U&bNm4wCc>D&Oc)(9cI~)re4>Htv#=vF^%UPv0@!{&Y;d(bkbqsUxoiZ
zXNMWo`J#4SEC#xeIl7Sgn{TrUnL!~h=t9Kq5dz6)Pz1LAr9wG?RZ+-Gj#vX7h0LLl
z9lDUk^3l$t1b7#xP>0pw{8#N<OoGk2kh!{$cWx=N3YkwKf6;}A-DAXQ(1l=YPa$IM
za>N?sC}ci`>`d2{NPx!)@PVTcKh7_smywf2{7-u1ZkCZtDrMky>(GAb2VLeoUFP=p
z?9<F*%G`{d44l|)A<IX)Ol<85My4u9tT;!R1(f+HIvF_ee-i&s@GA}VR~F8XXy<Ya
z9@T|#Tl%pxI-d1e{k4KZ9@B-0-O~j66uk_b+`=+&O*N)b?TF=b6tb8?p3sHhUux74
z;1EusLJSIdTsz-Sf@)nzt}Z0zgxOXhYbfMNU5MCiCC+EWkwUPw3aO%yEsj|6jzX4G
z$kRtA+Fc?6o+ZFP9fj24{1nbIaI%=(r3dZ;>y~p@Jz*NpsAtMK$kb@73$VJo^Tv!{
z>q;D0rA*^lttPo~Pv_{1x}Sf8X>8T%%TNb0)WLGsuz^X)@6qzBkl&}>d`jF(tL`(6
z^{CNA?)7lsrjJ6$nd9uo0_bF<Mc0F7eS=}Bf9uwo#=YA2d-MnK?S=ka-M^8Ga;A{Y
z$l!Q$A9k`+7rV=tyx$^c=Ui&NEjtmC#plQsj$r5O4wl{Q47T1%7a{>JC%|{er4Va3
zi|+>QD<8am=h)3=vYUMXT{n`8E^fYI9K#RJZnl)Q;V%BUD2H=@$Bwjs9(c&%+zsa+
zw6pAHv+#Y;;Tz@fEyQ=D!#4uoA0560_*Rx`=kAoVgceKpou|96Zp7Qx!D9>i|A(=Y
z?i0JkXBfuM*wKArYaKlNd~&|Y5o?w1y=mxUISZ^tC&MWISK$8(a_PO8EdJMovtK()
zKP_FTcHhlzHi8!tx9Y?qzY53Su$RQ=b4?ZBKbgiY4&PEo;u$2qO(z!rBK+I&k;GYY
zk+)3yCZS)ZJAjVzqb@Z*Joe<2Q$dR<<PKek7%nAH5V;hBvGvxy>JHPm-4V>M3z7M=
zf<peJ3$YU6Z<<~6$5>+;YqW1F@xQm`PpN%K&SC=Hg%WwBD1Bsf;S98nL;l-M<4%Wj
zv||n|W)7@%_;$xv-dl*T%%3ED%N@Qk4&S0uobPrx_rSTE!#NA*b=tX=h&%Q2#@gX0
zwsGdcH|^7q*JWJ&oxR|T-A|{pv=K*^H(6(7ovpjgG?rrnj1-$m^xnmxbs#O$Wn7C=
zCXgt9CO}UDgw}y3oUhT&U!ec1F62U8$Sv{BRw2hZmR+X{5xXDM_<LRbOZgVFQS5L}
z*CUreF(XKQgHA0jt)%YbNL_{R_1gCX^ncN*FVd;!FB@we1rC$?Mx9#hjuNM@-3O)C
z8*k}^B!aDQ1e@q6B#A<9)`dubV+4?wQK3G_!uKZa`#SoUbRi6=pU}qS<VdTK0~E4S
z7b14wo~}*_a9*e@;~%PX_)l=;jwN@g&MiJi$UVT3o3D`@tF*5||Gdt<K<DnU@OAq<
zMDB}qZn67}I0HkSfUz|bvIth-2sYMH$Z;NYm+C?!z~=;ra}-j5?<LyzQS>k9Lioe#
zgpBjwu?pEjA(!bw#IE^tHLpTC;a1Zqboh^R<TmJpMLM_md_nF+NA4zkFW0`$qJL56
zCWMvS{6`<_VZVdiSLobg_Z4vlhdKdcyAueu*bywvQAh-ZT%`+<0EY>X>?ouF-z&B6
zU(xT-g%s#Q@(vqTA$uugi7rI!zCK-Da-XZ;nFjtv4*&6v+>zv7rgMwWH{>4f$eo1m
zQuK%-PqE&T_6w&;a}mkj<5uv-4E5!Gyj4nKtmf!6ClKnHe3h_)ML(9Z&Ot8QLy^Bh
ziqUre7#HjAsk?=b%N@SM93}2xj+~2577Ouz6aO)e5}WYN)xM{q->SRk61#g!ja5}!
ztVuG0LeAHPh+#c}#_2*Zw#La%u=5<jhC2$WqmT=9A>#iQ{^K2mRN*@xy>!e<;ysih
z=O8sfxrUTh(T^ADxU=znRu_4x9>evs-?D1VpvDWalg<*mx5<&NYsA)`OXOMLh&94d
z<6dgaLnoaj{`>KtZ0dJ{@^mY}d7*Z`2!n0;{GC-sX$Fb&bz+gfi(`gPT&NQp_)d5D
zj&vm6PvR_{Sp46^|0GA}B;k9i_FaU2yY2vXmVUyQ8o6uiPhRI!$SJxIF?^pu@+CZZ
zxX3nC*3~*bw>r%cY?Pyr0~C_23laYh@So}^qzT_MwC}a(|7;af${tV;h3Ap6*6p;I
zLQdC(h~WVOW$8k&wWkZg&U6GD?I@&)LT2bf#Q!7wPjM7dh40zgx73ZnbG-9=fT37H
z;<I$(QskfDn5`2p(uuQho*8n+Ce@MnFo|<?V)1Xp|8z&<0(@s{-@DM)>Y-R18j6G?
zY4$OOLT2eg#PA@2X6QmNwtB!I*my^<F^)oxQb?LEMEnoof0m<=Bzz~LmmB74;wkHf
zaq&@&`u09oQf#SdOwj5ZP(NU)`8ZFsiSDctK0#N%Jjfv@U0eMLtB1AKjcWWua>yy-
zvvRGz8};VSAfoP9fO@i4^X}YuG((Mg$Td6`@rhde5aP$Q7r&(nuhOQgOd|s|n#jL0
zF~`$`p}mdK&1Ia7PVPX_4WZHJI2zq_scD>~eZN3|2w&NxU!kYp%P(DKopI)K#+ibh
zY|_Q<>N7Y85r><&)cUwo&Nu~^@KL%W*nHjlvPYfI8E2X<L;_qxfb(@Ba>nuFJ5~E0
zL;s`WjFZh4buc;xEmCyx^V!+q3%AcWbr+jPlEZm`W1qR4ZDz8=xhKvSX=mA^7T`O?
z;T!AlEyj1K!`F+ie2qa8i?1Kwlv3^7MrF%sv2@>+y8GU|_Brc}vzHzIaO|Y}#IBgu
zFC>txf!JC{jj94ZMR&wnt$S}8`ZetEN20^k^1lv$a$DQ8(ku*wjnK{>Hf>kyLax$<
zyfbUFRmg}amoZuwB6c?rh$&zdf~{4EK_R0Yv2Jk`vV%g#=t9JQ#TktMWx5h6gkNl7
zq-tmR{@XRWkRn~k)MsC@&OB=<WSlNU>~5lvMY<4dtus#@eLdC@>sDQe%%{T?($^}4
zZxu;^n+YIa!2nu^=A?W+QrFIj2#-4^)C?w6e{|jGPSGVWuG~bOXP&A&K2dl04s^_k
z70iifhi|V1I4{x8GAXifj&e8;ayXaZJiy`H8{cIP-vWGNwC`Ui?2UZ2?_n0<1DtUN
zB9{lT$m413)%Y<VtgiydW2lK)9qaHtMfX+$y8GE!#i5h=BmN2aUyDD`8hV4ggS79R
z2w&BU@KSr!N{t(?bX$w?{S*?f3lYP<1iDTag0Zy-S5b)15p1fXkQEe?s0$JQe)!+u
zD5MVG1nv6<`ZujY)aO-fpAVDU)VW2Mgzt?y_cEP3OKxq4|EZ4LRpfT*+~PkN|C=4T
z3-Aroz8lcLraNJIs1r86^qsxGrx3R;L=2+{v{Dy>vDFDp7xEVn9KohJ3h`4&H(iMM
zN8``arE4@O#G`!=p#O*63Eyv~6ONO+yUs1TWPDfa-15|tncydPgu_4Ek$Vfdd+OZc
zpMpQT!Oq-O`1a7gkDz~DcfvKHP6#tnt(mZcLVD>!#BczCI3!wA5@Y*uOfat_SeB!Z
zH5AfE7b5<#`16jVvydcwdu!jX(AV3Y5cVjY5YdYd_H}O24a1igHCFCxb?z#j5Jw&U
zr#W(OBlizFxA+go|1L-FI()y^zFX1n)tyjmcfwL*#^wfVChVmXe$<7C;UEI>*3c>h
zW2+Og2-fNdHr-LkdJ6ej7b5;~_}}d)qyXQalv&NL;up+hYqFWJPh@m{rtyR;@x%hU
z>bTA@iYU8ux!2j{8k29eZ{Ap{{S7-=Ma1qGbx$=fD4OQ;slLN~vZKHZ3T#Iw3x~M;
z%Km{@s-2@`P+*((os53BPJO*j{g)+oShHmWse?MT*tHRd7rxR_r3S`UPgPM!ha=bt
zjzY31<TG7}1ZXF~1CB!K@NLq*r=WjX7s6J}Pk3YNnf3=^85HtQU5ME6m$@E!j44%s
z|K|?>3`g!6<o-hE79aVDd7~q@AKzx}I}3fC&b>nCzH_hrnbHh$f2DJaT^Mm5wmTu;
zz}V`9B!YeE2zH{QkU13cFI|WPa1mg$qmV3o52KfP@&xgo)boT-P?#sBes*l%=+GD9
z{)(>iMqTG)-d1Z?&!^6>v6Fcsc5c#C+jSb)T4gq|dHdE8>m)~+^C`0holF|>_u&7y
z$v}hc#|?FUr=2fH_%~h1O}daLzDu`e&0-2Uq6-nb2m)=<g<xw>8VY&G5$j|}Aq5n&
zUl($Dk5Z-^0iJSn*#E=Wd4NY%eO>>gP^E+#Iun}o-US&tNbemdKoIOGC<roxR4ex3
zZ%qIJQ4|}}gb7U)6r|gOq9TI5pdx<1b?=#QGYq~T51VB6*=y}}_9^$=X+!`YwBTDv
zu-yuoX@z{;>R+cK(-CsO3Q^o;@Vsb+5bFpDz;)Qe)y^Yi5<(8?ps13Fpa=xK0GL~m
z5a6R0Je&3%R>&+XWY_uc93hJka>NQzTrqffzksvh5bMq$xIXl7wYNgrS0UhLlpM1{
zL{J=pS3N@ffIp^PQ)mtGFW4zGBAoVolCQ)--V$J2U!!$R>u`nVlP)jPKf!#88=jP|
z&hs+m3()$BjoiTRFI%1&;X9w_M##@C<ma||E6uOu&eStXL+fX@l~<X~Ypuh#J)cyo
zgpWJcZ0p~H|BZ0v)k&8JLmhd2A2*(qmpEcKrFhXBpn}{0ZKq9zS6f^5jZL2ZhT8D@
zzQrD+JqN5#@ORq6XTLwuxdEEa4bV>F2uV@goz(3uc$^y`f{ku~^c=+w57`J?c^z0M
zaRc<W6(WM!5Nx$VbOYoAyUSuv(EhjQyxN=d>N~Vi)D*Q<9nRtft0Fru@&sD(HUJN}
zqUX#R;>@|n11=7Dy9MjKYQVnh!B+BM=Yf6CgG~Xu(}VSa-K$doG$}uz?AELK(i;w}
zCpq5l!;j1Hhv@Cw0CmGN8@m$o^Hk9L9_&0@tq!!E<#_)JZ5lT5&H?`pcxJ5eW0_#r
zTI`>+PX()C3%bJ=G&SeF>fG@3D3fGfvqBU&7ot5@2*K{1SBnJms)uYoWST!6u-fI5
z%o|pS@bkdG=MkdkvtGB@{3w_Pwx|hz@gmohRpfrt=2qJRu={N85Zqm`K(hq1&VyfI
zb8E^BC-(-MTlj_GKd`wqWg=kLTWm?%&wHj!|Ky~g*+gRjDyT*uLw67`kDb?b{Cv1;
zqX%5sGwh2P_P0FP5?~Kotfov3Pc&}!V5@kr_krE&V7W0<;*x-mdccOnTP*k^+CO2%
zaiuYH?23DE@jz#*E69q=B2HH!#SNwRAHu_wP^`1ng%FbI;TmGASB3V*K~~%{8a0PR
zFbsl^A<!JsJj?{V)PncW{;?Humlbkm&S*!-Sx%eJSs{uW1ka~d2(gY3KU~W_T!Rr3
z)EttM+6Y-`g@|A<1fP3^1OTtF;LWsuWQFj2b_lp>zvUiB$Vr4eZ-pqXKRm~+5Mmu6
zIgNQC-{lIXC}@zUBtC?^V1=k{00dupgcyXZwnEm>{-G5z&kFhD)9Q|pV+dJeg($8M
zJYQKM#JWS%m|L0`J!AttLdqfJWh+DkeIfY9BP5evykxO=(tg+qnQw)(x*^p$AS^-1
zLsp35I>Pg<6*50)l^Dn#^pN%S2sz8;@DVFS1f3xG&LhMJ_F;>?oAx7C$O0?mhQBkN
zimXD&V^)aby1)}WW0er>mW1c`AN7#+^9V6iBy5F<peqDFc!XqvecWOnqy4BAvd{|o
za@)I(ko5@ppDQGoqQLGD{bYp@?9NummUzhedxR83$dgux@IAo);t>)6`-H`2(SFPd
zS!9K5{N*u6$nK)!d4qtfBt?N&LG+syLa-x5uXsG=AsgTkl7f(Dtq|dRf&arJ#0U17
zaGzI`su!I@m<}voLi<+u&R3Ia2Wbph;>v*EvnublD*xL4kE602<(f;JHW|g$A<5rX
zB(aXjNJHKO;Nj}#5xJiEGoLo?F~Zjc{}1>I(M=`?@H`8yL%{oE!&kn>1+q4Y7uv+~
z*9Z8oO?;0{90I$*gYE7~93k=DHnH#xz$dWbMDu2XT|~Re@&SBrwzjv7{9cKBuY<28
zRVYy}!Q5;4TfnhzZ1}viwMN@L_*E478^N6nH=T6e0<Y&7{P5rJ!S?WI+l{vSXj4&y
zZwy}FUJ%t50K3>?JJP=24$gh<-~>(nKYw;crw2kFutF5r6rzGw2*K_Yf^5oZ_XMJt
zo*p6l5ppwa5~8+d;0t?%_`u$3v3+R&z!4HI`34v7;pD!BHWfi_Ex;DBx$n2RBMlPF
zG!OnNPwr#no@#Ro-x9n&f*dVE4%pji*8m-bZ-X5m-A-tLmc8K)&~27~1RMu=I`9n!
zXcF3H;FmV}Tf<$#YJ0$H3&B6#gYD(fmV>sLw5cw_w*jwD)<m^sg1y6HGiX0(2Z)y}
zLeK|Ip~7pN0h*4GJFO4}UI9@VD@3nvI5P#ZSst?99w8?Y60|~uZwEfrBP0NJw#81N
z{g69AU#w$*7LhyM=2qL4V9VRw58B)Ye87Y6<H`LaxyRew!gl~)(UaQ;b{y>*pikl3
zWCv&jzuOg5KA)+bU^2oFy~z_a6Kw3av~P{Bws69F?sg)neCFLY|3GtgbgZ4`5}9Rr
zZQnNQEmYgX*>5IYUfz#;vUUHBd#6S50a8f&M#}IaKFzeGWEE#}Qyb(vCyo^Edak2Q
z1FW`z-1SuV^es@77x6825ADathCf@+#9G3c>jwNBbW`NNo0`-Fr{mH?m~L`%F5?A!
z4|b%jsZOlZ*|Tr5d4<0R{N*;UPOPQ~FW_73QQ8w)T==xkV4YY8&{%-vYD^o>ehiq$
z?unJe{XO8So>S+8oH_@2u%*D(vsmZET9}vgJy@Ry`zY8U4we(E5|;+tzysF1fd*Uf
z7qs6-kC`y$VS6#@y2`!RC|H)4^odjT6n7uJZv>C3N33(N;X}w!57$Cly$-Zb3Uc5d
zL8FFE1ouPG1Olx~KNbQ!+=5Tje!CU&h!wKw9`{wANeCHbg(z+@Jk6{SVjUr&LVOtB
z!?g%5=fo-{OAs=~3K7AB5VY_J$pJjtg8!s_x)t)M6;l7+geyD4vl}5-TOo>j7@k&E
z2(iu`kiP&QhIhGwDGIt9GGd(1@9#!PM=L~ak3i7I3ekkh1njp$@}+Q@@=U1V+%<Qm
ztpL?jTQ!FD3a~tO=94c!AAa{>(?$o~*;s*qB}|I09&j1J?JZanDl;FSe)oW@c_!3T
zV7q&;Wx;mvU?X6=SuCGrGc#<59~(gV^j#YIfE#<ev)R_>b5%VF)|^mqu^t?AZ3P}D
z#2agp`6RrDxH+%kZ`KF!*baK(*TfV@_2J>)Hz!qVd6kW=z`^5R-tq{odBSSY2PZYj
zPv_%9@5J%DUvXx5xbIs@O@qBXdG3gIfaXc)?MGWwZwd5vzo6Gd5)({c54Hu{mhyzL
zC0MzFNu~+yQLQy7UQh5^M2h(z#v2rinZ^X+)P1b^He7T|QjNyk-8I9Tk8aBQ0J7M<
zO;dc#9o=}Ea=dFJKfh}d@V{wqNlGo+oMwvR5S<Lq-;&h0Jw&ZML=QovHQ3TaR30LY
z5&bl6@RzU*jT(f1-jdX;+}g6&wYC&G*DZlUp;v_4Z%s;V-<BpGIiJQKFv_j`v!2Q)
z(O83N)SkGK{4R=jsDxBp79NtFR4sp~C-)Vz!yC3HRczStBfn{9DZeIk;f298YZ<@}
zw2{xRQ`~*S!ikZjrX}Yb_M0nRdY=v^dxwX+MUonofGW*~>rC*#u<(LNQuRti`*5p{
z688e#*RW|=s)u(H(V`m7rAeaZx&z&&nvBIDScYQu!f9#}SH_C@AY3^+sd}BrF<wZ=
zO?1kS=zo%GskoQpqx;TRg6yLE#lzX#lImCb&~Iwdq8{mv;vRY{U8&gc@I#TL)L`}@
zzo|n=RPKw&WsD7@WNPD&>a2rK`J{7*)yc(O7kE>TIO$Xs4)k;5c<jV6I}MqgkIngc
zR-!Jk60ZYK`vBKq&94Sl!1oYTK!65Pqh3EeU|UkfqK#-~EAgxONO;D!q-JF@5AljO
zZuMDr_S@-m>F`5ald6TUizF2)nN812(5{|qoEHrXpWl{Lt;un^n_{#52wg?8Nh<WF
zluaZ5v8!BPweVZp^@0HDN?MpkZ8b0zu^d(gf3!;VsFHlD(g<iYnlpK<DwQTqRnkE0
zr%H<JjVaDsB|USMO02r}3bAhv=YDs4Qk}Lxu4~;k3qYdZL{!w{%(Svr)Fw!tLsWU%
zwJVp$kH8f9=M4-m*pXCaGF%R>9BqU+ZQl&d-OS8%=6^-nw1EkCk~VQw$Hwt^JVw9F
zN^Pd&c~)iX!8R*+oZ#pE9cZZlmK-TF!%ZsZOAYV8y-0H9%J}H0{~*{7{awB*!00{2
z8Q}rDlFF1$G+77zrV@ebv7Rd4I6PczXHv5uoK<lX?kxY5QSQ`QMSgtz3AK8je{><x
zChWuWBYmi1F)*1#=CY=?eow$pef(cBf{WWS_W0^xqtT+@C-HMKruDQMUhP7r@Yzo7
zUI-_wEK(}NfR8Sd=c%;@Rns^Le{=%WX9CEl2{0BeO@J9ZR&1ihXX9G|z6!poSU&vJ
zIYs^j;aB!2@xAi9l1dLtHfdBKnO2R6T2C<|Le$2t<FRK%OmrkVf++w-RsWEfj#OQk
zcGxvMcB|ewpP56|^Ao5!sU@<HIVtH4FiH2$$p~|@5V0DecZhv!IKPCay`9wHx@1$3
z2G!+lKr88&kKfa<%kWFN{HIYaUuW!k9>-IjfS|%)r2KPYI-^{eF4)aHc9jS7o8t%k
zrYM0L%rgvTn8B<@P<`xb0^<y(^P$x)CeaiDN4qn@ucXJ-!L-LFfJ^2lliT^u0zXdy
z7bjLdKAgbHp~p37>l+^OPEx(J1gc!jHV>frS(<C985*X585~Z}PAb*$MW#`OJGk#G
z>Rq}8P5L3f`3bnn_8yQ`luhh4u^;hkI>>*IGaYz8WIh@ob4S@(#|6zV1ggFY{GR#>
z+!ec>$MNc$P1$}1qxybDOjqhFOkZpok8OPmnKS}_v+C}p;pq&wbC(nNt0j<s7zFZB
zMsu48uzdU;6`JU5RE13fe1Cvb-A57A5P)#qu<1Nj-Gyn6g@OloCEo^X&UYYK^WBf#
zV1uKB<YSQjB$gH1SgZOUx~~;|Epb}CtYT;2{Tq4RpEwQX7)EO;qg9tSjTVJ-K0DHn
z-}__X%soj3+el6pmgschGSiU(`x^$j=X}4-Yg~8p)Bcxb)#myftc}9&?ct3EdErG&
z8t{KCd>Wzafb+wgvvB!V0dEp6v6stDUU*TH0Q_G<HC-M;<1-A1^w-1A<JXAFKY{^~
zZwRJal=uTu%*^?~Z;pdYr8nusjb+Mu1}-RV5giG02-2u2s1XAp@}AgS9(zYEz@U9e
ztOi0WGKrzBPu#7<c?TkAJ#QW$ju4;XMouJ%KdQx7_*LFH3CxWR6iA9r;MAziA>suJ
z)C`|SZ2~_hI%}+J-eMIu$Js`{v0V29+XR)ger>7bdkLx*mTD3|uZ>5QVzQz7)>7#`
z6mLO=YID+3$@dCWtt{2`{Jb$9RSA<0)pwTaIbd6%q95jzrIPP;sM=VnoB4T*LlrX{
zN}4Q|(-}*pBHj{k3wRozX2>acD=|Z~kvGJCz+aX@l>fk0u8)aAPk3`XKK3Vb6F-aa
z6BTATmh(PeFmO8Q5%_{8Oyjc<Hlm?1_5p<DAe4U;O!AG!LOhN?<w}|C{eH6_92deA
zU4{aAr<@W;Bte6&5nX|~zF^Tf&f?}EWSW{HYs}P?2sq7sJoZjaKU4Dnv8v3|v}Mwf
zCj8zMJ8U~5{}?(VUjUoK<J^t}aY#*~vlWy!S^NFwkWH=)sR_vi9E#n~V{h_IuA4_~
z@@2GTkz7+)JKYhRT>h&`E?+vffXBJXc^tfuyo~8c@(*otEv}{{7jOjj0FS-N10+9Y
zlRr<}Qj%-Zx5PfS$>mQcxqMm(3wi8KPVS&dbb4OaWbgBvj{s^6XAn4XV4iKmj#o;Y
z;VKcwSJHfHJ0h-TbVMS?V2|<mLPw~-pPqbTd-5`k%jk*LTx;xe+Y|Z6(-Zk_!WQv3
zrY9!R=}4+crz4-)<k}~jlU%rQ*yB9Dkerne+|PR{Z1Oi~TuySWQVty^Vw20CL2~(S
z#_rZ*EFZr|XLF*HyqxJs^6fVHd}3OVTsUZ*mF7(zB6rbk1XrNsIzu0F4g_cx^K;mR
zb&}8Qw5$s31nU*FPsTL8#j1EsmH@0fY^;umor%@)P}|otF|j^k-zK&=ZG8UJR7Pfd
zEr~0!T{Q3`MGdbFb~m&&=E8|QKaH#OmJH%JTKX0DI&s3z$EK3(HhyN=q%VQhGtVXa
zzu!heGuef9yUBe#L1wCuyaWE=9-COsO&1cI7fJT6O{TWZv<a{PQ?lv&)Ou4jUMAVr
zhk1_qjVVsD0Lk8SlNnv0s*=o4Tt{rL<&=NjV3vfO3$Z&Pi|K(=;>ddmW}i#u<9QzA
zgRB$r?^`1IwjfTvMV2Q<oWm1>=M^y8>2!|vGgO^*c$xxe3J&!odK@y0)mnmeS^u22
zWA0?hZ(g$ua(02C3!tgkDogMjzc<Hn_VNA>8>>@gM`F9;za7&e^(M<C_6=f-)AloM
zY4orgzjw!8$Ip)x`EL!<O@2CG{j!Add6Py>1~pElO@LL{EFP<rFYr@|)-pbo?%>Rk
zzja%Q^S#R(K-`4Bzlf<u!XEfH*ktm}_2hbrT<d8IDw!JR(I~+4n3Ad9Yxt>TuaIod
zL!21DU*JrJ2zNP~++^k#Vycr&2Oln<W+Q%a%6}W2@-gepJU!;8rvs{L&;%fR%O&&u
zO;8QUdJ(wU63Mp!BKZ*F5XFddhDWzvTdk<mU~0nCoA^H17W`!qCI2B5$){z?rx@I;
z{45>>Xe`1{F09T~-Z15}0BS@3info!2}k&*<LHh>SVYgH<s&?=ub9aMrb#JSSEh+>
zA{Z?5Dvwpr=P){NLQIoo#PVy3?ir2_b!Wp9IHq#5zLd72@Tlz=ZBPaEo-*?qj&+!l
zzY0&6(L9Ma_+;)g4r%-R<~hKcfBKHKS`>pF*!jB8<@j}TD1Wf6>zhs6J@6qq_D<iq
zM@Tk_W~IYN*?uMPatOJ=IbT7sf(8w_{L>k!LHH467V|jHkekTv1oONr&UcQO+He6g
zt1ORvi*U%d&hq4rxQ0#Bpg9(T=mkJ3kxuv@Q6io0=U}h%*i#|*Qs8Izr82AC<mNYG
z>X00m^HKdZR)G9>kbM~b4Vbv29SPdi=Lqm4;6<0r_YXmJAsY_hWlJRAJrK#a(GunM
zgvZg27<gW?qJ9Tc4;~#_MhauO<e#P8RnE7tM{q<{c}0bIhuni!)Olj+LpDnBmPo$)
z5GCIh%M(vjXcuo<0i*e^!)ysAP(Cf2ytBPjoE1<yj<1Y)3^EOs$UdRw%vp067U6Nc
zq3Za)-#qGe#QZ@_13EHB<3AP)+uo>QHocK=E4GBk@v7pdH;=nyzJG~n2-(%d|IhNs
z_W(TdMJ!Lesss&0O8}`SIy-zuPXYkuV%a>7*ON4Q@}$M+3Y!Bao%ng!wo72r_xjBf
zu3&SPm_`U5r}aM`(9>468bYX+ZyWXmkK^?|vXghnxMV(dJ<}Mn3B*5ZdE|Q#9{ILg
zo_M_v?F{mE8LR3~k~M*dzxtr5nPbW1zY8+?c4ALmLX;m-AuB2Y0Zk#ghWNRbN4|&Q
zk#CpfnXd8oT$h|>s5g_|1xS~6o!Lt}Gc|UimNoN<(Xc7z3x;44T=!smc^r4x7=n4O
zKvMxR%@7#Wnekd|Ar1UUQNwmr!r@%#)^cTCK$~_%we6ry@$<1~2~t_#=cktJYm8n`
z6r5Z7h<jn>l5@A4&{qX)a}r)h+#;I`U(oFFBzv7?_u6Cv?4?bB1(=d)F6_6-UMJb(
z%(!zE-6KPYWcRqqOhvFQNOnE(H(>W$PWgAiDc?dYGm-ko^uQT{9FAi5xn#bofLcO!
zBY?%0NWS+Wl5deibdflRClj6rz*L~Jx@jpBJCx*)=;eickmTKn%9$x=-0>Cg1mk-X
zG52GKcpPthv-kMT%@(7Zl&`_uO#B1b;Y(nGS%7W@q)RJrPd2US=@fpyMTTGFF8_LZ
zE8pGNO2{-6o};|eTR**>=5qOXtFmbg*R8}&wLJ1g;F0fMmnRr|Om~P3M7LW}l>oJY
zXexkd*lm_f{@swtCsA4j@rE)0*9<Gl2c|7t5<lJH3Gz|`&ixR{DODoU@1BX~i1I-+
z(~9Cf%;pM+bXZo>JFF=AH^L>~ec0+tR4TGJ!Q3e>#^1-Q`KBFYGXTu8MDlHeNWR6E
zNTi+{8b?$JqM#L}|2U^TJa-T`6Ps<h<bM|~`6NnnBU&lv`p>#`$Z{Y{cgcKdfUbmW
z7Ht7bB;Nsu<dY~7#j8{%JmbMsWbWv!*Rr4U(E;-ym^XQ>y+S9d=Gy=L1dk)QAltak
za6ffLIgRTHEX2>%*b7)>GjA#)ju4;XMrl9hkIuIf@M{8pioXM*wX24(Yw(vxg!~KJ
zy1pmyzXCVnI2#f<nFl1#O{7;2CIZaInpL8dfSY78KJ`e)r&jbx$FfJTO+5DWh+T`f
zAlX=*4O$a>7jchc+hbw_llLT;>uj(tzikMfOW?EEu9#pyvDeyI&4aeYvH?2BZ{06?
zMFhBkx`9}lQ<~C=%La%09_Nd1ug6z~PRLh;p`1^^|FEUt_hH{yo^=#UA52_Rcb~JB
z=aBm*%hw6)_@VA4;tjCvININ6dDS+7HtpC?V$0$C0Q=VE4S#bysc4XK=MF5v46vY<
zfQlhTTk=!ba|9p2JYw7x2&TWqbOSSyMB06y##Ru25VMJ#@6A*meXe1U#k2*JqBE!V
zi)XN706vP7D2M6|1f=2Cc5nri=DOxg5#1|$D{Y7!ViR0Nf=P(dHnJ4c6D}WPc||ej
zB7g>4(3OBn(t9n<W!NVGKEq;rp9yA|6{Sl;J4ESn@Dg@}$6isH+&vDpv0A0=iPdF5
zEB$CNCfE=>!iwpOm}}|5Vgi?ApA!2ymbV8CA7X}sDNalx7(c5{%ljd06n=iB$o~i(
z1MVCpGP$~sq|LcWSL35J3a|>xAxfK)?z+^I_2f7@kEemIaBtH6Bpl-=^zqewrXvX-
zCT_G%Cf_novgIVZnl@d()V7>9#XqmzLCJ38!S6QN29k9dMH%1x%AM_0WUQOaBmwS3
zGA;E-v5uBh{>LGeZw>b4#T{@aZ3r%2xm2Zmd|8L-oWb}%2B5Pgl5+(_^63srL^0wV
zB0of(tSCBXx*+Ot;=@>1%O(F3xa89gdm^4F@|zrP(Yjbwe0t7wg=`7_?v_ZtRS?Oy
z&Jx8Fl?hKbFodKu1G9LTJKox5UuR$FNn2HhKy44wroATkCjXp7neL2>@H=fz4<p>_
z^#Gttfwt^Fx;pQ?GG7qqy%dBt@wF_(5#m$aXwRkKD*X8nBY(YsyL0!!M~alr_`<^&
zfB5v4F0&qmy=jy}H7-G+b}b3iD=7cQ2+ToXKO5hQ_=kzthQ(wx-w_+zAwThbZG3m)
zpCJBeU7ZBbvp2B`0sxv=z!d-<1)z;;6Luy}0z*76!a2*3zL;(d<FmwOV$JYt_RIeS
zLn$8<-F&6>Kb>DZOU)^NfU#`qlKF}P>JC{JaV;#7e5)anZ=;Cdi8m*F@H7Xbs`di&
z6rET~ygKm%m|$$Lee|N01#|%LIDlmYZ^nKE5L?y=_pB{#d_Ur!Azqu6;`KybEPoF1
zZHU)q#oNb%rUyNHjt48S)-<S0G?=I8oqTU$UqckHN|}hg!X@+Z#$eMEu9d{MwLJ2@
z2#<VQEKlAlG5*bI$V63pK>V6g4bb0%E?iP?W+AtuO_yr5ouf^=WH$C9!hXZFZ)m4v
zC_)|X;J$TfE9be>$c_Zl!SdE6X3)?JyseoGSL(bdVl@t?kuA|7ehE@N|DbWU#E#G$
z2-Co*e7tAUTt$zgr!rpIb*d--GxS;~wXN9q@WiWLfKCUiyJSATd&u;H>_q_8ERlRK
zK_p+q66GzQTRk5{HNmJO)xn&nLfVFRV7fZ$sRU1joQ{O(NR2amiM@KlJ1R|YlI!yM
zGIqHYAb%zT<lBb*bcvqiaF<@oc7!+0n?7)@CBBa3k#8+L@@=;~Zbuk@l~-%AIhF}g
zZQGN2fQB+8x(=u(Ye}x>A7Y0jlb+PGm>OX8<f8Wbx3LYx>nZTq-uuDSm0<E4{&y48
z7sapf`|DT(t6Ki0kjb|b(-Ze`#}U!>MaCA)+uKb)xO5h5WO?L!6(0F^S)S-HcJ=n&
z2T?;n8pry8hSU3X0MwI>B-iuhvAqw_lM><Mr+E501!yG}&^Vy$`(J!H@m^kXSh?QE
zejb+sUzXIu_ht&cN!oWXy<V<+Fg@j=XAPpbWwb^3pm0eyZ;=00k?kTTaUdp<iFxef
zaT%Lf_`W1w5AhyPV%^^ADG+bs5Q$5JQ>|*!mN=N9)HK?SZKhD{8W#<_p;)OF)PVMp
z6j-~rFmHj;<sr7fJ}_miz=^c?N1%4^y;!s|dJ4oB<R7m>KMKnO)R0V~ZAVOHBCOhI
zd~?Whc&ynVL3$h^aXFh<_<<zWuKpetO{|{RvU!!?EW`BS?TWUSYD{MCX&3(h+d?lj
zuVWWPWE<b_V=)bBA5CSpSj<*1IyS~uCI?Jqi@98vlKdpnhIatl0Z98-Y@!gLN*2_J
z_OT?2Sj<i^+A3lbWrC>+MwJ{$`@I7$-jwzI*SVz&Q95raGkIJE0w0q_1S=?|E+2=n
zXesG|(VU3lmeCd<ae@_Fo3?^<a2x)E*lxs@#FyJaZ6zAFoa_V+DDKQ`OhAAhZnvO!
z0O_a}+e1U5Br9+*?W+*D1I!UDDp13sig^Y4!6n*U!8$Y+MxpkqL)g0{(~H5m3h_x`
z2!Z)5s0r=k5U71gm_1<h8f~n=Ofbn{Gz3Fwe>i3c{5ukYxyqCz5HkdZ#|3N;MX;J4
zYF|5wMSG}wUmYdAJ@nDT{NO5(csOn0n8ZGK-Br}fS(hO;D}O;qwKEm62?czSgj(mv
zJPBLl*Nw0@VT7I*45$B?RK3t;z!}Sw>CPK_AIWKw94w40jUzcAb9rxff7Ic$JvIZM
zZeWWN!w(;3#!TcN{y<}j;MX-`77hKmM!bu!I6l=>zTKV;q9tuP^dP!HXvd*P!90!-
z0mUFF%d{53V7e&M53y)Xb%yj~Q6hGb5N~LAN)#e4KzDzFBigB2bV_?~M=<KtOnz6V
zek4XO5ovvzU#@n0^b1Z6%<VY;YH=Zn)cRx88y^4cNmP?Wdx_J%h7ygiiGCx7hfejz
z+UhYW&9B0;#+=gE0IYqmiv?3cN97+L@YSBu|Bhd!pH2J#O8-9bf8tZ=<=Y!m`pg*d
zs&o-B&K|3=3E(&jfl4oep_E>vA7Rnbcg5cg6ObRy=Iy84e_JsjoC6WFkG8+882LW%
zh>59W(0Q+rFGii=@jq6K2!<m@q#rwp-HUg3{5>(R7$0KJSuuCfHV85M!Tf8v<QoZ>
ze4jX6yhNmvz*Sf;%uALN;k%gt=A)!Enc@NWsC^ut$~+S16ETZ7!s3puJ#88}F^@w$
z&H?YE%EG@CM9l{Teu_oQ+?R%aSd?fPZHAtGX%mm8?b(>bnK6lSh>zkW#L)hQO)UIc
z^6IJ<{EUC1iTl%_t4S2NjJ60>_zGaO3KatGN;zT4#uEQ3WCw};+`}~pzczj^S0-HF
zfYS;K(6(X#>n|E0m7y5bnLPg5CKBLv5*;Eg$CGFner<<Qo@L@8(YN68Q)YGOtA6gK
z@mt!o+~|yRY0c6#Ho<Q~9V32o(gLLVYXC<GI*m2U190PyZ{<y(HeQ?mH^gh>{{d?g
zCqA9{Q#M{=PZF<v`)Bf-c5wg+0L}oQIpsduOO49?$b|E8gI_t>ou<7SJyiS4?c81R
zIDk$(uE<Xf&17~OJ-hN2Md63J{#b`AH|rNF^aJ>y2p%QyO$fBszrcFLCveugZ!7O7
zwTYi1ZYww~%P+Cs|A7l_;Y+J5ZaHypgZqT}XHEd@^B+J1a6dqerIyUMl<re7zhMJ-
zd|@o@&}{LWgEn3>{S@(<<Y%!V7sUrnC=0*=3sA|v1E87wH>R<TO^{CfVH>YWd7Ahf
zV$WkE;>4#Be~5URQ*<jI{UnQiKN_z52cO+KVw*LRE$v8iSPcO&lb$ZrrV2mLG2$N*
zSKW^I)t&edGyboztHE7^X|#2+o{W{m4~O${wE&}i%wjdz*MQYPe~r<F;D3T6jQGbE
ztP#Hn@RtC;!O}&BT`F-V*pDn$qkcPB4fsh+cV#Y?MG#MQ2=J$XiFMY%eWTqCcq^5O
zepJYC4#~`)WIiFThONv8z1^y4wckjRYpuY-JdS|X=SP$IJ%stF%;$Q}L|~QX0AQ8n
z2Tb7W@`8;YQJ>j(RpTJ>s)VMa;;)a1k1&0+ZM-UQh<Np0vrF+eU>#LHtIqfVL;x^;
zjWr+kaE?$<H1{>mH({z)e=tq(YkFpfJN}teuKDT1W554xtl18_INmpi8l1rj;P>ya
zZTPkIuEAfXp^MxWzW-0|w71b*4&zD?(Pz6-Pd+yG6#whMP9Qa@jHt@Ac4@vRZYqzf
z^Ha&UQ_09Lyc&IS{uSb2L?Zh9+78=^2Z?>3PMoG~6u-ymMEJ|U*@`Ndw_WAFb%YH;
z`57R)tOWVWB0;{NuxUK5&QB}Bk?*FiVBP^kQ_2=%^d~=79;=_6WOmb5i!QxI+ix7P
z)tv-@x9KP}0~6s6cXlvELan@g+5=bvqnYvnebOw^Cc`2!H)9tK4DG_Zb**Spzi2V`
z#E`M(U3eHG-<x2D($OD@3BHH35~^RunHn7*OW^jbq9ZQvU{x}ypZzu_r_9(mZIw;x
zvB$=I^u`#o7w($$S_xCBq7wd!&Ej!&e$we1otwG~y{%~W0aB?V1pQ8_RQ4p4Oy{wu
zQh7@MJ{VPK2{DJL(9dAT@cTs-3LpPFsZ6kvc?Ez<v7Vq|l!BRL)>^gll|!w3e_(g=
zxH>;k8zZ)odCiL5PRt*ORqv7!8#AmPu@%j$dTLH;wUUpbR;zeCzo%nUu-x%<)mAod
zShX8y9FE%G0laS2%2yt>^8JO)=5cj?UaYpVdDE)hMa)^$s(1NNs}pMc(pEC-v`<NG
zCP5#e_AH<Y{C<&In>3rB8c%ojRWYfDhKxBfZH(DqJFyY$2s-g6G3)VZ^1X_$Lfpw$
z#RS!pytA*WNqzsf(fiAci8K3rCiUGnM(;g5+H8WnCKE{sD^MXN{0Cda<LdmpxI$IT
zMlhOu?-27R6{4b7nf#P!K92Y$s%+i@qzY-L{geu6m(AdJU2AT^V#c$o*$hClZxf9p
zDaAPeTdi98Dxy|C?P_=PxH`XHthTDzV%6>?<}cK$cLl)Q%j5WBtC+0l>{~@p4r+CR
zn8@!JtNrO;P6?S->t;emq4i(FmszdyRYI$LiR8V9$JP1$Vy!;2)N0*F%-?8L-wI+6
z@HoEKs^+;fNso0%E)!f1Rs}vx?B&DVo%0TUKhD)?9#oI;yZ6eplZ|xZ3@&jAm$%N6
zHouAJ!=T6C^qZAd#{n?s&>`K0u!ndYUxzPfR#*Wa5>yuf+K)o~{vQI&g~ZiN!g{}X
z-g0~jt^pj{Cg$?H-fen`PFRkh)K@oYkgT#K#{o8mM5}kf{}+ikYM732ykI%L0@n-<
zO>bTEXJ1MPCnZmBYMOLNR$G#90k(uh({Ay9k>ug1W#+)K#&Vni*9HzvhllxnevFV{
z^u)oqdW2kVvLJcUB{4q%YzK+PvM}~o9HRI<wM{lWFT<m)=r?f3`nz-3=8Jxyv9>d@
zqaw&bNiUD%t7TsDkbRt&O!FRG%{aIkaeURyLuX34CnzcVEJ_wmPf9Ko%&R5MJV@Tl
z*}fDydXQU)%Sc#=tmW}NjXi@si)la4!j@voFzsIXD6lwFu>_7G1vc#*^$46iOex1d
z(Ypx!{_vTk8%x#Zmwy2+W+PFeBG_|0UVwiQwu0ZiD{~j?X5TVipQ!n3tK1|To*VTT
zNopdzKr){ZYVGnAg{vD=VI>?A_I5ZuIXN}>D51p&)&DepswK}85>^b;ex~E>Q&`Tr
z1QRBXP1Y!GbcP#KoVdsFr&#=V-2cm$f+bTUB$(c$yoaC9TfV3HJp|wXT)v<~I0iz2
zN?=Q9C}9a(HgJ6<@k`KsfLHVL0e-3vyW!e@Ew?hQZ*QI3|5cBTd=d`TQvaj)!_-oE
zJ!$#^*az`HjJdT06`jWKK=$53PC6U&w|O^wf4<~O!KV(7c;>eenFTV-jC=x0P3T%s
z_8;h`2jPybGY9K9lDxh|KL0G4^6h~=g{L@q!kNk>@qygZ{4|kdVxESZ5QqDpR?G_?
ztighlIO{$|V;$<D#OG;LKT2b|16YeG!CuH~QnYsq&v)x!1wHG*>N9mw>`QsExt);U
zXD(p6nM4mrM_Nxp=Ly+p59R=(;3+;tK1RV+=tmLuIJP2uya0=9>{OoX%Xhf-ySS1<
z38B=bEk-UMGIB+M71KtBfNOGH4ty+gLH#R(&Bf0MIfYro)y4MJc|KXG^#Sv{hyj`+
z?4F?)iWkVocO24Im)g-5G<#_jDc?`#+*ZAgM%|Ez^nD7b|F1WDXG)t~fe*JB@#v5d
zj~94++K71&X+r6LZC^tF#KE_t86Cx!;C}*(U7Y*ie0K`>%KtpXps?#FgxCTeEHh#u
zWHnLM0<tO;SVZM8&2XKTC1^k3teZF*KC#a;qU;1<wOsXIz<<Yb6|OwC0bF5lTCR~-
zcvm-Zs;J_w&LAd8kiH0M5e-_eYWSY@Dqls+yIw`2W1a}LUIP%`>mdw4C`={n9{gIa
z;%m=xm9H`;LC=8I0W6cBE!3+I=+)l4dE?6DJKHd>Zbim-9eO_`oJdRpLFb0M1XUo}
z9Bxu5x#EBWz_KY|?T70*2ap6mkJtLmeZWAad`RwRhq*(qk=V@8FQ$z~e|{)e&?Y(`
z_7-}!gi91oE|r$>qTf8=X7>FI#h+xZs;q>pfM!+rm9ohdYbskh*`}Ow8UG-3IgARe
zjzumoX0P^}n+a3p@tVyUZqhWuZpEJvo^qf_)ycQ;YiYD8q`^RrQZ&~@=T;m;S^VrX
zW*fgxBd(0?!;yYAj#$MNAqI$9h6P^c|7s+*B-!-;!v9W3su8z2{9fVYis776$%Sgr
zi3B=vJ7FrzNy3)W35C_hc0lQtC3yS=zqyUDXeTz&3H7rEHUocDc|(-TC*|3|rJSYV
zD4$N8l&e;UQLeZVnBu(3Lo0d9w$1e=Jn0#HII1RrwXi#AC<}=ie!t4~HOH@~x<`?0
z8_9I17$scJ<IL5(ZyTISq5r1-7fNwCn9bpTF7uS)_{)AXi!hZ!vnP{MD6BrV`$8$Q
zDMirA!)rED3RSBPHXDDm6pbi_d@99G0#%BcwiM~asT8XCAxfdRhM3}_%9jzFgYp2e
zQofwnZ-$%vrY?b-!=DyOt{6PXA}BS7XTqNC-!_Z*M8EdGx~3zuc)ZoPi~Mh(QLuX0
zIQ-?vE&uKPU0)ykd&r%EM%>Pm{+jKH4qrW!{k-2~fJ<dzy+puM^o<|E+(7UVC7wVK
zI47t$^@GpZ2j}JE|AeHiD2YXoLYu_2!QLbB`<N0RqC*WkF@&=(YbyZ>OwlX~J`rqm
z?rq3$$D{!-GMdRWL@VAHRSh6i#dUY7iqC*V%VrXBs<;OBAQe|!D@<|Tiu+#Rt=%@+
z=kSapnMS4&b}fF*J^6oT?ltOyb2t{Ae8))ku7_|rkM+Mi+yGERxR&Inn0$@FXhT^?
z^LqCyBp6B0xx2neV+60aI`)!p9XbSSg588)BPjn2Mo_-KnC`dYk6;6n#R%SLWo$;q
z8DumCBN-btPMaMa?epqrXy#D-0pN7Zd7XUExFa7k@b%qU6P{8$xm1QIE=-<ACLN0Y
zP-yH$QIfHj<bK#;(tF0<!6h)}9g#c8%H56JACcR_%GHzKk-Sx-CBk@^fj~4(4v^-}
z47Z2PXdHrH6_bA^6_alucIc87Yi!bxFxX1yf`olYXbVOa+d+<9yu?2~JE=tQFmv{?
zxy*ub^E`{NiJ8O3KMaEMB+ki?ptiiL0fJ80+mKX*WD-+UPZAg8_apG|!;F2ne!Ca=
zsn5F(1+ThoM0jcHCgFpyBj9b_g3c>vDtH2gOF;DqK&rwqK<lZ33fB@Fj$c)f|4ynP
z-(XC)-0|m4Q<J^QZ$?=W5pZP@A>0t`s3U?!h(C{rEI=czh)-!8pW!aH)_}&~*CLYt
zM;4KMow4W|+5^{qh@*>gIgbOZq0!)|n6rBfWdUpF=#TAdOfbXa2xzY0=lPE~3D$z^
zPs0JmtsQ|Hx81bA>rNd;&9mT|nRG^LtkpalZEMi1;l2Xvh+l=1{~8J>-)QV(D5HfF
zp9Ue(Zz6j4%JG$czR<;$F&Y_%k#Qv$$#_o-Zl9y0NnRZ-Oh<Hdwzx^)j)C)A97iQ{
zFrNIDCJk68U|Oerk$In8r%_ojAghTpRC#^P;`myau5gjf;rd~y^tv1QEH?5kr+7D}
zW~CDSNTcd86#JUK>Ddb0det;k;7OiIUmwWb$>%UHx<jnVIgg;1GTfnQ54IbA)mZ+?
z)L6bT*cWkY>^xu7%A~LKo1OqAW-!2?5hLI*?3=t|E^IA`=wU_N1LoxncVIez>19R8
ze;p#^yBa$YD}qUH5}lE5Z91;>o2#sdkpO>1gn*;4la~;YhKN2^#DiegX3zuTJ7T@9
z2>EY7gnZ+$uRJ2O>3F7F8*_Yx-}D2R%9sTJJ{))aTbjOBsA7IcsL01+KVCv;_Da8L
z0;V9BA8le4yEz+JC-Vt_LWnqmX}1d9&zsr_#O+gHU?NWbz-^l0*K(Z7eEyjE+!g2M
za2f~cnCaivBruPg+VBwJpAp_&;qZP0)hF>*%@yW2+g=L`yARlLU_CADGhjImtWMs=
z*3QfU*4)CD06PJ!mm}^B;+!I9flhQry1hBhsI?-T+NG$E_oci2Yl%;DdN;la8SZ|j
zvq2mD+WqB!mfc^z0G2~%;_Uvp-*V8}<qMj}$@B_@Dp)t{3L45mDF01N4f!Tuzd#s&
zBf8RLt?-++;IuPL1ovp%nb9`RE)cvTj<cO<51AI6$j&l_BqAOA6A_-(pS$4FR{Bjl
zw@2nFFt28~L)rsO2iqa}r_dq!CSt#;L;A=Ae&@}PEBKk{3_}N#PLHm%BCaRkFGL79
z9{cMOA`({mO_~+46wGTG?gY6CjL(XY|5ilEcP$pwbA0j3>NnZX`Au~z;ue7C5Fua&
z_V*=3WFx$q6|oA;>xj^F>y6d4BIKWj2z)_vGY!9AQbb3ShKL#fHMY|MJ`s0p+dEZ^
z9?PJNaQKenYhW(79TM01p=@$eb`6%u;)pl4bLdemK#Wd`ZYWYY9z-t{i?~$Ov4m<X
zN1JXmCSghN<;QUA{W75{4^fH5ZE~*^9pSC5yDUvKopUPy)){v)mYf%yJJAvV)dQnr
zf)1L)I40<jc^!6)Dgw(rCHPkGzItM{Z1b@^cV@W9i2=kmz^@Jk)lh1P+oLsjcJtI*
zeHts1Ky9UI6Ce{iPJ-KrDNGh6`jB3g>crC)lP`0>9A{~QX=I5DfvxD_-3xC+%d56Z
zE^pBJAMr#y9l|1(m|ZaR=rm4tmp$ly>@AD;)dSWX(D^ov$-|GB6MEw~zNU`vFtIBz
z`Vi5<B?T^3{Xjm(zY=BFLHQ=E7y>+F=7RhtG>1u83XsaKbLw!)t~2Tlm|lrdHMp;H
z-*WsM@4~kUbPTNmzmA|&u;Q`&y2vJwAb|vBY=Q`&5hT$0^A_y$m;{+5C~aePJn2TP
zjwVyFQ#SUf6%!yf)yC@d(4APF9B#u-6YG4;IvVUJxGcfi&-XJ%E7{L=?>8O$fZ$8*
z=lWoBd2q!^dYmK~8Sb+lBf*u!Ulu<3f4zzqD)G(2>{&SIya#9(<le)u{{O;?R<({B
zJy5Nq#tiKHm>%X3TY*?|r|9tdFPl_!d4^b?m1)yht4-fDqeJaXEQO({kWBsM|CkAP
zurD`LXD+PItmS@F39u&JJc7#ENtbsrwTd&rTF3FVF;%S)aU~)|C%@T2{;A|B$rW;8
z(jlM<AWgaepq^Ag=NMu1I+Z3}bm$^fA;Bil$u5}$I@Qg=Dqn&>9sVFQ#yR-t`dz4-
zd+q)Ma__k(@DR`36NlTUxb%r+<`3K^5kL7-<gb_R`tsqE=!rC3g=k$l@50W-i1sh%
z>5XK9H9k5D=0~-Tf%C8`mry-t8DA}I6X?p=iv+qV&d1KjB=Av{{KRSqb)Kkr(GWU^
z>?`ApSBE%0zbRzJimV`FCy;a@rWYwHV7Q%k6s}NpSk@hv`AtD9R#(v8h|M5=5vK9e
z{L3A~oMj2-GMhk$g~BAzLE#=u&+^Ahkg&{e3fly_qV*wxE@t;)`INxJAA!H9jn&0!
zB(XtVsW{e}0ydZ{m9vXw5?sUv>*6$uU|pCNV+CVk1H=}$!gatXf^Z!!m^P+1vGIh@
zS?V{%bdyz_g!!2~yE5FX*!6&Zf=m}6`O9*lkdL7>g&>Q$06Ag<Tpxb4n8IMHSPc7_
zgNeDeI2a$8Urq(xU8?|~w-KQI{6_3&OD2Cg$mHW{YD!*0K!nTVZx(YI7@x(gw3v7T
z0$_gC^<PUxSGCcUSQnv(u@WTWuk%J*-SPV<@gKydGLv<Ds&dgxHWBW0%<sf#H7TYT
z!=TgEqnKWEi8l<v>@2_e(~8j5Y78QDrFsl26(b^t>-$+^HDm?p<?amk0(C2~f8*DX
z$zOrq%eNXU4M)5o^U?diH2wk=0aVR`Ua+8eCCho1_tskhUm^A_1n8<a4LgS)T`BS(
z9DLzGvz~*;zgC>u4$-Fg7qK#0e^S@h_Vg3Oo93S63C)`V&WR?Igy${s8Ngq9h&Mxg
z+!Cwpge6|%5W5GOc9!>3c>i9_-PDO!odZn(-W;&bM;^Zhyd}fECf^SDOG_&M5lH2G
z3Dc{(F=CyWtpAnx3yb+4%-0t4vc;&99w81U@(gc{2NTpyudemQySdl-RFZi2e&2%$
z<^+N4tUkq!n+O|!;5OgjCtr&EUDI9P*Z4F)zM)}jhI`4rgWzxRYpCTvhCun&>il2+
z61@tb@T3L(0O+Izy#i2}c)jw0`3{U$l&<%EnasM@hp~!a`0H%X&G{K&GM|dB|6}xE
zB5HLqxf44BiB3WCzsxB}z9+GAFhCyroM!G_?P;4_*Z6)UpAF`JSk;*1nI!*#SZDlC
z5*#6UkiZc3Jq;?A{2$S4`Ci9-ki;ARoTqtvy2bnm=9I;}VKMP46#%mzOi-oLfuTQY
zby#=`ON&t(K<z=PrRU!go1Nia&*u_*z$TXe6B5g}0jqu~u_QQbF+b@uOs6eqlLf_7
zn+fO;Ac@sEVE|(15zla&8Zlxs+1HO+v8RaLhFF~q7GOtgV);KK?*hHNvW144mr&~`
z@rM@lGobGs5W{6L@x-cx$E;YL69ytyX9S99>cogOLDYV1)qY3t_6+xMa5upp*~Iea
zkXXLAvAUNK8(@ZhYB9fn`O#u_TTDE$K_8$`tXds92BB65j#XH_7`2(1yy4u6Jx%P6
z4ELaMAF-d=#PWYZV)@>|>R&=^<`cZPJi7n?3g~AGde?&DsSN;%SOG;@njauQhnEMi
z?f99WDe_n6TB3V^HQ0W(>1|e=+G^0Ic>2QI8W?{i8f<&Y^Jt;(CbKi8FLL&2KZCZ@
z5|;p6&qI6&;vJS)ZS^fNBkH_gLQ8e1<^3F9-T#;7{=YeXO@_C@I_tj}-~;qTM=N$)
zv&)joUkg(CboyxsS&Ud`FAtIN9g8UmrlG~?^dn4+5C`K2vm1=F{&i*>%(&~k_A1uM
zs{R<EQtRgevqx(Ay*LRDB36f#N3eHoV)<*6SU#PUnqNX}4t0OeV)$H4&@{Fn9hC%%
zr#1v=FCeXdoot36R;QYESj!l(ne1%)tk@Jp9YU-QGmm5M+r;wMCGX=}|2n0#x`bLk
ziC?jx(tw&eAf64jn0R6}v({PxzajP=1n2^^1bYoXMNE<Z7bd*E=F6F-5hl~CHjx^C
zrBMK#QaW340o&6YO{RG~_4?=zXOc!3o;NJ<KVXv(8J+z*;C<clsx6r|@#>Hw-k^Fg
z&JrJoxB}1T3~b!MnWZ6!-vq32{~hqV8Sd4LRcO{(Qu%*Jtb978=sOfNNj+knIpl+D
zgT<T&lOG{qf<~tlfntO>pd2>%^?-Cp(WSUdH}^8!g?R5FC5HvQi8vin6gPe%3it!H
z*@&NfDe?!>UEf>y)Tux8{LdbG#g=WDH)b;py7c}5;w-(Ak5h{2afx1q5V+N1{smKz
zUJ0W^iZJmy<p;CH3OE}Cv=;&R3>7k2mh3G4zYrjwuKm3(Awa-Pi!t=DFam_pwO^Qc
z0zxQU28IHp1Zx7SP9ja1H?Ubun58sUp$clNK^qg=tjF{o$RXHu7=QIKtkBM0B&u1;
zJuM^VIZNCE?98DTc)Rda%yP@Cwpp~X2L;VWh>OBD47<S+e+luS2T}j0yIn?5yb`d+
z>`yB59u?E&l$#>60>6e#{&R?xPX~~`aK)R58lUGaCLurLUxbngqyvaR@k$n9f~>OY
z>!7|CiL@ne#u|eej(Mu$?wTZe!D8BgnN5$iCBKC=A$|mQgPwomuc628G6A&Of*Js-
zO(JclTd}4Dj>2LSg%G#KV%mcVkx08~Hr9-Iy)^5oth=(f?7s*msFMCg>E5TLy5>KR
zy-b5jD*t%|$hQR>08zY>1`zO)#Uvu27!?&JVlnY5>I3tTuK${7O#sy;k+$M(SZflE
z#bSFJLG^>SryarEMNhRAZ^zmYe>E1{(@ZdrfT5o$Wx$joPjs?RPYx!Thb^HROVg+=
zcL#O_Wa-!pjKAgtl|4U~<JrEs&TRLweLZH0D}%ibF;QYav;9%atG4TD)0QjV_K=Oo
zW@66sKM=bU2Ewofl?7B5G1^jhVpkG80rQA)$C0~>$1SD`m>Wr?9aWeP#AjemBDVt$
zM*HsnEGC!=s2qv3%k9EC0=OnlqD&Gkv7oA8ZYGg-Ibk{xKPgV42$(0qXpa5OT-wJR
z)A`~x><RqLv7lcK`ZjSnb-Eo1QM^eOVjQ1#LH;BJq*#njx5C^8jpst<Rus%rR&+Tq
z73ipT(>+)>Fq2jPi%QD%@mUM<0lI~rYC9FEJD_W^i%LpQp8=zuo})wi>8TDUZ(?(-
zX!#QnEuT)tW8iVVW;wtw{53gPb1cI6ge)i-0VOR+Cu4!)^)v)#E*Rh`wZN?8xgSl&
zC&b5Oyo<J~tTeT)qD|ZRUMzS$gbOh}{Z5-ZsUVvVKufbc&PtSR1HtcMJ&C;ui>+EF
zRhwr8v;_0AN5BpQEVKgDwwAVyu7IGq5wd$?1Q^C`ffbMjU@Akf$qKlN*qgD~A&4O0
zZkwbonAIfF_WV9Jh4|P`1{ktMy8l;M+5&hLfc9nqZUGQ0Dg@wOE2;wVRj7(~#r;@s
z;-_J;Rq-L}9xJLLm>0>Rt$ZJLt3+x4_m<vAlKX9vY5;Dd=h{Y<qz|#TVX-~;ljJ^|
z<VrBFlSF&Jl1wE&wj+LWEVl8%rT|_7pl$a9>~;Vbm5KxpSU@M@*AcHRSMk&1BnS~d
z<vaIj_pZdR9+10(I`d0ACopq@`RA_F3l4VzqVQ%2d<=~U)(vvE&O;>B3rFQS=sgC*
z<CrBf%OEr)zSSn~4({cc#M=3y@mgt_{|n}IL`Zy#O)P@-B-U1X*poO6!~bFu=WxKE
zW^p~iZ3L&C@rVca1ehmdaFHOusQ|UqDgyK|sk93o!1@t92aBClelWLNOm#5RnIqZ-
zh0(jg?!sc{NC3=j7E=jKHS%Z^3LfO2fdJ;mNtDAq(+mr$0q71AX%iA=5b+D*B+{+U
zbTE3!r5BiWF+-5U5CPNJYpB_ODY<tA<S<4vA@ETV5p1TE+Im06qGP`lhGkfkNMr9K
z@f|kt5O81e47rzAdrCCk8GA<v`A@h?Oa#ZB5R@gc2*#3FTjWts;^i=`z@o%T93t^7
zE4UB1E#S0Me&)fg1hWd`uai%>2-OV+t>R$-zm8EHh^b5{=CO~*v#ns^10>ew^`R&6
z3uJr|^9l}-INjzAjsW;wOkzL8QKc%CFYlNcKaT@8vGC(atj*>VPvSKYyd0C5`)xBG
zoR13k0~Z;@_-ki7?g6d^^a{q`+)`x%9A_1e0{DH5-~fW6J(S>3tYC&H!wMFD0)n-9
zeXjfe=%jj`jBj93B4=!Q=3s(NJOJSKnAxml6&0$)ISBPW)@r!M_E`9BB-S$hBHCfj
zpJpA1^;i_B#D0h-THqjnI{<3geHjIU+W=r=3@!)3lfeOTcFr^Xcm@Q_57-i7b=W$A
zWnl?^{-eu;$NXl}cS(;3pdIgf0NU+-#-54;nDeOLTxSz#r~85UBgFoOJsl@LjrePc
z*M2aO_;W2TRCp7wYdXs(TJn~t%A{n`#{@X-H+&ba(mJ_X@Op^SxZ|Sprde-yX+}ue
z9>)xP+xh(_VtD9N%!G;j!ylc5Zp6>=%y$zauHkrg48a5NX}QZckqXOq0*g+GZ1}ce
zR!`9R7MTbo?hjBsngsBlygf1jhA!FxbtvR{tPZJt5cp_En$i(1*`2l{Gl@}01`-qP
zNQTo9CM$7f2n~#5ih5Wy-R<3AeCpj41WlrM9}zeVpL*x`=$(9DVbR|0BFWnqdgpX5
zK<9>f_>032#EihFn`LK9{*f`)c6=6l4uaT8mHmj{3<sbJU5lXeF%>eNil{<9&tp|c
z?W4e_*`aPig|xQ>-zAyGdIT{l(MW=#C7S4z$W=QAzp8O7$tP2dPXUj{r)tP|E!B|k
zYb;ugJ@D<tqC>Zt9Db^CH8@-Cpz)=<3C7}t$FKeVC#s>n{%>qG1hLh~q8c5ES2eCD
z{@<8tgh&!is%m6<9y<$w$A0i>Y?jW<Z%jEpC)qxdsTvSF)#yx6v>Mk;#K|AROeg%R
z#x#;&OEq$EcEzV^$ag)}kndY8T8;g1e-KlRoZzE;H#b0A4pTSXP0$@5JkFl}Gu6;`
zeja-*PBqe~MmHO;9q||9wHqd|MmNQYPawXhjSmh2@GAiAg2@0j#{pzL>^D6CR6|gZ
z_<il%HT?~k9mH3pB4a6&K9Zp45WixD^22=IVUY9fQkitJm&Gpxe-yk{&B+A*N2uNL
z37n;q^GJfZ$^sVy{2ZVr*mu}_|AF&^>l4my$A^-814l}VbhjGierwj(R$tEDBb@J2
zO3t5T`Vm*%_Al6p7h6ej3OhoY53%T18hym?B_V{v`7R|NQ>m}Ts&5Oy(l2uwJF3r$
z{uekvVVc<fg#mv<|1@R3#}52Q{~{0bu4zxl?jP-LSz8gcmd-Szt%lPX_LsxVZS_Rg
ztIr_)81sI^Py`W8Z4XrB#q>Z0K7$=2_P;vd2iw9XQ}2(EOFjS1=K4f8W8P#2KywRF
zvZDZ$NGo09a$@2GjASb-P8A9xPSyDd`}F@17XsU&C18!0#{Ww;IaT3D>@C0=+t0B{
z%w6wi;lBxgaAk72;6pWfKGLP9X#+zgW>6*!eHyx((Me`duWxzD=7&a}FFESuY*e&)
zzGkJfNxlC>kHyJ7<Fs`#sSD@zxO;gI)0!ML8L(C4sMp$682lBPUm=fa>43=>YWEXQ
z)Am`IC+QV{YXDY#j!{q5Mr&X7{1WrlKZlvq)|O;0@n2JtpNank+d{0TB*DBjvArTL
z!s{HSy;b}?B=u1&f<LgYA@K<Ie7{a-(+*56gsuhi4^MKdg9Q+J63h(7fKAZs$juW}
zqE4oRRjcXsEowEje#f=~Vu6|C*hK@FOCpXg=1MDeH5?5PEBs&BH(=w7?PAh&A5jan
zuK_xbTJ^3FYQF<?v05eSY<yPiy<kowR+sZXv3J1yUt+mVz|qyD&h64;{)8TUbvk64
zc*`K`e_`TzO4>Efr0Ql;?>x~xnA{^yTX&N>V|n+PJGz@{wjwXU-H?h%<UiO?7b+5T
zzGti)rNr+jt7$8;4$!|;L?tXtMZUjeMY@<8wjx^WKTr`Z^mEwzV6?k@fyEy__h?4q
zZsu~U_C-h<p;iQj#6JU$KYrazEh}~d82xXW>fL2vegzYM__~@pR_r5Seh#8mm)w7`
zgMj{9ZL~_=O>HZ74J3^bD|`}(f4QXC?xvm<yBSOZV%51|5&rq(l4`q|x>l{${cniX
znoq<IfzjUhEf&AD&Y^_L_AsfpcIiHCLU+>ut_sYx6>tr5=USpU89v^buUp8wq4hMW
zHzjwUQlq=6Z#()Dm?m^ox$|THkT?FQ^e~NVN4J7Wq@yZKQS2`;@kgb5&@{9ieG<@F
zI;wL@KI|}{|Er@EpN{r4C9K$&;b@9j;R|BtFDbUCNpZwpZT#B+C81WmD~A1hNwqyp
zDOYUc-C#NlcQ?4x#8<;kGqR7c(RnX6r@{lea!#3ZpWl>p@)`e5pvk0@zT#LS8)N+9
z^fYB`y1igJkxo0*5C4@830YX8-$Zzs(PWXg^mn{y#6{A1*IHLdw5WfB#D6Kx5L_VX
zZL%RrwL%Udqz4>YYiIwzIf8x6aY)Kq61}qA3lc4Xzag3U|0SfaNx0u{%3F?4;OGm7
zrkD1@|Kwn^F^Nt|`k6FH%2|>eNCrTnQBQ*8XGSmXD1yIGk^ZJ592H#-a{`XRaA+t~
zuzYcNa=%IGR34%MCLN**5a~j25~9QbPG_8Naa6(tXMPWGHv1^n_4SYA>t`x^xbj`#
z3V+!>xkR751byNV^fr||1O+b;IK6ih_i}OJjy>2szT?o6e=%q8@4t1veIz-gtK2}7
z+ObBL&Rx26EzosBm#R=+&TLcvZsg8MMJj<szZ|J31uKRX$5OBoSV>HGO{FpZI`v3Z
zJwkkdG*G%*6*z0_$<Uu6eUeQ>ge9@EM1N1<kJyTswaCbZo?gJ)`{y6M@FhrNf9;w8
zSxp4~1y{4yE>9`AHh@oskH7jN%FxN{=;;4G^L)*U<`v*#fX@Ry&E2SgrLl6L3e)@)
z7P^;@0}xl7-xYUVh8t6cxJ3LZ7XK5^h{;zLOJ?kA60fgC`k9~QE#K4p?t?F%_>^CS
z*Fz{!s(>_<u!LP2xW01uCFob;EAaDoe)2<=3Qe5G(}$~9yS)lg-aaR~6o7;BnnDG5
zqXpF!zC2bD&{_O{VXAJ>yVWrlJm0r7(By{%S4w$E{%A!{d7$@~P2dd^g=o{9S6lCa
zt{CB+F=oyuGKVBkzCD<xUb6N5fao%1SdiBY4?15G<j%{1rXZws=($MGl2D2(U}a#c
zjEN9%ZW2w@%RHFBJeVq%z(`o(TnzO;=K)oX0eO`d1w=_xN+B+blB$&F5OW@jov2LS
z&@;Dld+^qZ3*Um&eO{NI%e(X{&}&DRB5-LgRR}_MLx$VSiuANN4fs<k;D2wZr{uhz
zSd6y1R7Y*^)28@Jm=7T}u$mZu^}U~=`3o5Pf4;tuyw$wUZI*YwqQDh9I{)N$#Q5L|
zE~h6Vsf^VUnz2a4Vtd344js6c_Z}8czcA5SOz8Y0WHk|08?xpsJ@Hh*G+8glMEMKY
z!Bk^c*K?kwhhNt<Exr1H&#?3atcukJ6<vCP1$=KmNmMSy-I(FV_=x)rzm}f-zq0h?
zOT)ZNFPYBNv3y#3dROePxqR30>?wo-Rm1+EK}%1By;yqkRmUVKKSXu;S&*On&{7N)
zp2hP}g)%OD!%_aJ{@>x?Vr{;HqYl*-z6Mq=+@~+!aP%9nH)k?>#TK^CA}qnryPfCr
zi3DIgOxX+U&0+4y>!s}7jQm;JX!M_=Z8L46tA*7^M}u%$-{ewh9YcQex0~7bD`EA?
zTvJ&IDU9MG;W>knOEn<7dnF)iI$8g+%zpye0<$1>utvGe!E~7aAxs0nj$yvcaFbt7
z*g5>A5hj0Ab^-Y`4I5(mo)$znAGQ6LI2OFqfe-t+arJ0dToGc(?!1~An#+d|h^2nM
zBRFe~F!@bw0_%kv^h>UoM}F2kzd25<3iC0s8<DTj##mE$-NFRZgMO1kSXBOr47Z1M
z3HuU%RDKiW%P0AbG0A6xbL4+PoaC!YA0c0H4Kc-e<?9{VU)fxT3i6`8zT-&!tdE_b
zp)4e7DA~*PHOH^#AN3ta%^0O79>V1$_Xqh3esHSC+a#|}K^qWTFFdAyOgZLIj;{$-
zIW&jXQx1i;z*<7<l%sSo<Tu}1g+~xFCByAWBce`Pf%2C|pnUDIsKB}iY#t-fhrsVF
zpC-W<@M!>=V5jg$tJ{p~$~Q#iZHB45tOBRJX9!ezHHyb5uL8$niu0D&H!H!MCRWu5
z6Prpkni5wp+<pL`>;IjV)p|PAvud@~%1ZQ`2zN^+E95snSdAO#+oxz0tQqz_{&J+2
zKiJ>(Nm^S{XYi9B&b4+0KNB6k!6tpS-+Ta0_hVV$uBUIr1#<(bQ$&d`1`jcFLVmL!
zFwe>Q1-_B~O30O1I{+M{9iHa=)W;wW6080$qrVmCZ!6-ormK*|hxOyJdkqgiFeo{{
zir_iw4KWA6mnYXAp7`C@mH{Nb3)TV-H01eM#~)<GWnQRqU+!1c90ng<I>$5YbOL_}
ze{^u0Q-GHE7~7y}1+gkP10q%Ms3q7+XFr2L9cYam!LQ28KZnZ8Hw^0lK7QqgnglBU
zAvhIpCAga?o^S)P_TZ8Y4pl(a^UdZ%2?T3qX+a#NO)5KK66u{;yeMcsCRUYxnb^uy
zx(#vl!ZQZ(br#X0COR`<m^sd#@{tYSM7WReD-;fGEe55wv)Sg3g$o;QvVnaHO!eG}
z{LeGoENy{)f?ws3e=g;aZ#dQk%J}6NVY2S@o6o^%BCY{<vpW%^^DumLP;$``(yrO-
zPRiltGXSce1E3^IGy>~RBJbpj+{vdEh*e2i(Z_#>10qg+tPfd3w~uF;GbLxzG_1L2
z(Ty|-^fdxpecgk$9QvvfwZpdKS6}6yPhaH=U|lcS*HI>&zHYP9UjsJ<>EMFS*4ZOQ
zdkF123D(3v$izR&#Me&N2Ww4ioQdyzo=KnT-eE<bCCOKa*23(7y^Wu~q{x5k5cm4L
zhWtD`nq+p-SeayMoJykrFJV`b>^4w+t-wF2)`IE0Xtt-O`=N+we2&2qp94EHmzaAI
zcz0V~wav1;FI!${On<$ZTBG(C#8=_@;=23YH^@)r^9z7g{*HhT(-S|yj@Y~SQ|XEP
z%k7joOWcK7GSa+fMJc8#R}x^(8|&`lG_serqR^===%Z1jW3YjU8HDjyU*#P-H=Alq
z+1AD(_VdwT#&93ta)74*?#|c?I}y7IP;{P0#Uwh*ZM2y)%WvKXr_SsLcT0x5iaTrk
zU!kEaglZ_pRSTyXhefYiy_u_>A&)Ni<&fyZZEL{+X{?R0K{t0(^b^NJ=j+{HP+UfW
zd5t)&fzGUfMAms%xU~iv!`n0=eAl4ll1(Hy3rlnak1@yDr(U(Ve&F(f>uzx^z_oJL
zP7G|U$p-d@g^dDM5LizOYXj_x+$HD9cD2a@_Bt??r#oFbLV2XJ8}=rCZ4dIVh&$Hq
zSHVY1PL>E4U$YLJ##p#&46(S<u~GR<L+=>7lZLT9&KPa5+OI+TWoYjOpk8>+$mEKX
z^HwNe=Ad~!;pND2J2`qX6_jHVHk|6lp9<+Ffi#<Jnp;Uzj5K|f224l9caB%tai$~b
zHd>f&xk>=jRa3C8z`Dn)?09pW3Huf>RkjZ*k5XBUV-IXIepN{R8dONW3D{^TJ&Pc>
zK-nnY3PwG?6HG69EX-tV1Q>c8bXHDuDBQ_3!5p9IH(RWTK>&{-LbKv3EDOJeSN_#;
zhxh6{obK=jOr{ktTrI?lD?^{mnQ#g9S*TxT)z1ag2lWD7kBxNI=Sk#>&oFZkztoB!
z0q#S@Yg*_@k>{)k`D-JhcM#__SjHtpq$6Uv6>&G9euxm!jo8>rikN6RB4Q;tAdPi7
zXvT8arJxTO?{aUGIX=OxAdr)+Pl1yrGVuJNZ033V+6V4s`jlq+42D3{rzefQ;!U4x
zOd8W?6)>VwM7MODOZ5UiO{uF9^ihU8fpv~sjlUAH^1l>!G{@yBvOAjNqdvZE-_;tS
zjAjjFnx7)8OLdjtTI_n%#GRiM=|wP_pNqi^pt!=^gayFFUy+kc0=So*4l#W)0Der5
zw6+FdFIf@t*GGhW*I_qYLPQp14_Oh9ff<AdVQ$6JFDYWO$-cvH9t=7vc&!ZJ*ND)m
za1izgeqEg9zm1Eid~2~z5XM}bor9x3U;nVh`~zmT#k^uM)2VWQu=cv^U?R8j&c?^A
zfa?H#f&fjCA=sn%H5BqUqzdxgfZYg5yrD>^3SlrBiYLJgp$fv>j$H>P{!m=Y7Yohs
zo5ula5^Vrj9U$@11EJ<Ib7OEE-_SU|5$1nz0e83>A)NfqDd0g)0ZVMBl&3LmL$!U}
zER|%u15@I2oB~!);{Ce$-pM@$q|v)4Em(*Drhs+upMp)#i_P6Uk2AhcSd31{&B5q&
zd<!-siXp%Agq*VqxFlr(dK!?H+FyXa$#74)BLO{yUvonK>5Q*@uVOtRi#sP6-)Al6
zJeZKhykRk#6Tyon#Vve9!V35s&`AX7PJc8u$CAl^2W0ZC!+KvrKnSjo#rzBAE{l27
zVi3R^0swnwr615-K%94-)7sd6?m=xTpusklgPJpgLw6*YyR6s)B)Ao^x(UA;n~z^-
zEcsJ7W63uii=MIiFtOU`gN5EhmVY`Qj{vC2sFQxJxRY^|Gf&PU;=<-NI*u>hEQCv|
zNL(!${{RAR!{*TuXTHUbe;VVz0FZjGbIjHBUT2s)urV>c577I&iPc=$#8y3(-s==O
z9$SRJJRI_G<xWDr+1OAx;Lg2;4w57H=%}xGz1Jq!5g|Zw9S>$<<6@HgNq&!w)fG9N
zSgmng4=2XN8e;Fau{uLfBv$9=IXXVfx4|l>Zh>=V@G%J+Jc8hH1nUAj7n>AA93ghG
zjn#?l8e(-Cn~yDs;SCY{0I?ccon>;Iixu_HJB}0LjAKR|-$XOzq&rL^n?zzAiWXvv
zV-g2Qe6x+!g>5{sx~Sca-4wkSiq2?1!MECAU4SMKtc%aR*p!&w`-r{8#_D`CnOL1|
z?#CXC5n+g(W@ELbXArCHJh+&Dro|BFunMNyV4Yd6C0OT`hh#Ct5n^vAR%O*C`0Kc5
zfJt#GI9a}6O!==hx7oxZyN<-VmOp|miAfwH@eCWQ{rnbUwUs}H&9t#><<6YSBzC%u
z)%JZWvD&)B+W%+81P2J7X@j+IPbFA;_W!UuV`2@lci326`>!Wf*Zn6IA0yaD?486G
zN7ON5zs-PK%Vioiiw68DYG}qbiOYG}>KHfFLE5zMtF1Y0iqFFCB7ye(Oq=K*w)j&E
zS>r3-aPFrwZ|8#tmNy4%XAkcu@TSui6tNn+SmLFYcsj&OE%AAXH(t&C@!XI@9Dq0g
zSeuk~wrTWQyV}#(LW_x(K8M)xHdg0?8;I4J;92YmJ@=-)Kf3n)1dk(FCECu;H7
zvu6;TfnPhC{BN_P$+rxfOG59CMvmMKP0x)?uzVlVa0)){q%*N=@K+|kUkxqky`0Oj
zMG(f6)tN*FaT6U7Uy}!>o5eh5F)<Z$Fp=AMpT8CG5t#1~pxyLNY?8Q`e||NzhD^>C
z*gcmJ5Q6MFE8q(-JuGIW#l#cf2XieL!W-*akk-w;4%|%+9-81E{G+E0Lf7!+@C52M
zLV=Sf;^YrkNpn4Z;#1`BIL`ImgipitG0dmwl_qisyAi+Ul>BWGDBmjV-uRuO|0o<_
zL0<y8%7UJ^pm@EC@Dx;kE8rxUpAf*s#N34q!jG;L`3J#;%UMY|%=m#e&;R4>Jm90M
zp0|HCB%wp7p|hcbbWi~m79@y(Aib)g3JM6Q2qNrKq)Ug;goI`R?0^L^AU14>y+KeE
zu}~E(2=Di~_iS>rZ2bQ|FCWKb=gc$n%$zyro^3Z8>%rJGt$!`0`LI)<r}(LcybSTC
zw~<}Q?f4AlzNdA38t`C`<6}7PVI6C1h~t>mWBsa^p0R!@r}N5VyFyLfTQ>x8+#9e;
z`3>OT$V6AV8AvbdRQl_2D&3RFBRGrJpdqT#*J6%=>0~imEhbteg8@K&EP)e%&O*Ss
zd;;okok`ydXEV`hg)D$TM75l5_mfONi#Y>kn8j#?2oq5WVS*-;`M=E)_!-bS2<S38
z8yR4oN#6%&(rJY($%Q}~zV5J?vtWi>j8=#+R}x_UQvls=349CaBm{JAo{O}$&ZKXN
zGwHS=ORY1{*6UOt#M7{V#ry!Kv&C$;m@B4$_$C18MoZu@pi>ag^?p9m#yXQefHUcy
zN0#MAfHQxx#rz27R*QMTVj>9yb>s>FYHJDn0_b-L=n%00xyd?{z8}t{(|TSGfv7z|
zHQQOtIWTuwjFz)75mVrPu{4t=yx9^s0_ZdZbTC<jw71TrZ-X=GUPM+{XOUB2c>d7Q
zVvd99W-%{WOwI~We^nr3EME$+1bzkd2LyE5S%TbRok>3cXVPgIra~ZE1=2{Slg0cF
zW~9Yv848o50=$Rg5J&;k#S-`p(0K^x)UyofY@JDeJI<t26RydP0R7a+V*UU#+G5m%
z!gvWdf9*~cNCDK)63~_5F9_%;wE}5^9+L#=r{fHjv!$M9=WA>OX>0~#vVRI$Ysu~B
zr*>2yJKqcGeCl=Lz0MiJPr_IK#AhkwI2Q0ckK^BQ+|)YO*nI2wY3o=SylEZt!H>{w
zgSfIin&CLkB#~=vB0jKxlSnGKmB=;Lr}Q)Nxl;T8GYmX>C0l1&{5WfFL2-Z<I3PZ{
zWHAv$984ywshK4Z1LhwH=<K)}xz75Mem1^V>$0im@au9TfU_1B6c1>TK-llFn5YC;
z0XJ9zCz<~X5YT~iEpol}C4CCMf>LTjS#Jq=u7FN9X%J{-LFWOD0VIqz6k)v0q{D*K
zOJG`B0(l^P5du0Ju0xWnFX`v@WdE0PC$h;B@HC7|fNcC0lo!wv2neGW5rm0WffO)R
zErC;z{u2T^aHb(utS{*Y^<@5~)P}Oz5{RsTA7|Aq=mMZ|5D-QiiZIbCkjZ{h4UC#Q
zAEdA9;jWq;C_*%MO~xu2Zo?U5ur7l-guINT<M3(Z5Ta$1hwk3+AUBG5wXu6(z;O-h
zI05YRUfGV@^9omO>sVtm7}Fs{emCOpS>%&!zfN_7X*jF}h{PJ|XkC*7qK?&DFyvv@
zQ93ZpXRg9d=R?`u<vAbLL63QY^!*39=firINPcwnNl@p8SCGdrd;zfu29=@1Bmj}R
z7E}z-OxWw-AkgCgb|8`LQ@}K^m;zuL5=iHR-N+Nz@5~OA6(@6y&(yb=;`y2X*#y!7
zL7*oA?1~b|5U7|16(mq&0_iIFDzXjx-BALi5h%f8N`RS599;#4*^d3IiW7eRAFe_M
zpb{2T2vAc3>0+}7c@98Elt7_-xRbfXlms)MK)Ton^E~!@6$r8Y<gO|(rN9K$g@plK
zLm*vLUPE5Qcpu_vLI>mrRMG}21!e((bWsuJCG1~EA_E1$B!baOC<Ep?;$ZIF#A-%t
zo}ejh9cu7;26gesK)T@U4df^y=6ohDRDtVlv-LII`}!~?Wv$~{V3$HB?6?yvp^Wvb
zv1N?u+OZeuiodsz?;XF)e;kgx4C35(<38sC5@HI<TW|%yEg_|A#6IL+KyM=+Qch)3
zz?8F?x?oliP}hjpk^8WJ2XO;B@!W<7Q&G?DH3fb^0Rrh-@CGskz=0@%{D3N0Oc5~6
zz-as4kGzcidr0K8rH%HP$`(@^%p%BX`xYkn3V`>K$a%{ERLO!G0$K@37o9f+!v0+(
zG9v>ypT$%I(*|<d+TTJ#*nfaTRF3mM4Z<I%ajMW1&}sr{Z+{z^3gE*ifil44v6!M@
zS`bKE_&dlR><=Q5RY)1-GcgvE2xbX!Uefhnpw|F=ghW;$1Sk%WUfpU2=l~y1_b#Il
zP3V1`7V$Ws$Kd_UuJCVmcempM#MBGXWp!hLQv^PZgdJ_9+f&FeuMOM+;D-@`Qz8Oo
zQu;8yDR{$pybUb;+XU85`ffNdanBGlcrGHaA#i>Ryj~dt)Xw;x2lzXfKO%6M^h-WV
zxFxvHA_%8N5X^8NXYc<Z0vEJ_<={O6YpZ<U6ZlVp{uL28MBoB8Z~)xbk%95+t&Bg%
zW9C1AqOc7t2OkhvTjwE9;J*oaF(Pmpy-~>G+JHL<PCMbp9^AiRE=Ax1;EIA{k`l`5
z`u`el41jUorFxH@s|bUY;Z>Hg48}4T`~dk5N4|LX4GS*U?j}O^XRNc@*uUlRbr4^t
ztuKwe&6svT`8tNLJopOg-H))d6r7#2fQkV20no1ZA@W0%;0F7%*1jJ02e8+UC;K05
z0Iz>P_J3fn)zlXI@4DL8|H4<lGz`3Rl@ak+Yxs8@_^FN@9HQjf#Xdot<wRHKC98Z$
z*pF6|AKV2?Gzr{(h-#NTsP%t5N@f`V&Raku0Pg|N&UX;`IZ9@k*#Bkit7HEr_S*G6
zLVk(jKXW9vXqUbE?`i-a0ML#nz^_pP7y|qQK&5Mk{qcw{4>9g7b%4rwAE)s6Z}6&|
z@SjjQZG6G6`Cqtn`6*k`h|*=i^k0CeogM5CUcc4ubpkmHQT}s|Ry?&cz5t*8ygOhn
zT6=9g=dstO^DA=xO8a0a{jPwyYysMEE&$MGa|*d|B_Nc9{UvL!ed15-wMU#q{)uA0
zcLX1~#=bNOw8wr7nQH}nityUHdzUKRX|@;=E}5DpMCO?QRq_r16DWCj+}UCyO6-q#
ztRDK_1{QuCfweL|a{{|@%jhfQWg`Ly2>hK5+zDW*h`?$#FA+79_py5A2OC)U`v|PY
z{lXJicSfs#WUYT?Y;bhU0y_g74^R#HjR&aPBvy(boN+gw_y&hMpdIutrbN5vAIRk>
zQ<Bb<{OH;T1KK+O2B2;8PsHE_Opc~%4d8?gpmqNb_FD7*AO)h>Ct?2+_WGDnU+m2_
z*;Drl4>h%J#R_s+M4^)Zk~;aJ*s8&w!6orjA`j|To!marUA4kDuhFgfe#3@wzijRv
z#{c-I-+%RBTY}Y8SGU+nj0=0pgB@wHO8X@GqD+W?5QN7M<%3&jKSHVgldgiN4amo&
zWhLR}ke!tK*f4$&yc%El3CyN^9!A0|z9xifA>sDf&4z5!n9h#@hO2M`nW%SaV;H`z
zi!wQcO}Gl9hQWw`oaM}?=*h%CgrE<Q$d_P0L8nBd^G76-;!TA3&TNDkDN{cNpW{H8
z$iahTBBw_@nOp^-x=0we8(fG)KC@(osREGs3e1=2)Ed%N_mEL11XZ@M6Dic(U)bOR
zJVbCg$@B!*Enpi&1Wy6?H8^_Nxdl+&L2i$H<<Zqcha2-PI^f!wgXSX5tzG#NQXDM~
zax8|u!vp3UkCRq7QQ4263s2egOqo_~L-65y&uGdHV-udT(Zc}oPuRWaHD$x~gm@xl
z`WA2oN|k9iS=aKENmG(Zc9B(j9eYiY?59vA`8bM%o2>~1nj&GGrYL})ec-}X3Iqqa
zjNU+5h4FDXJ9WL=wcp{a4i3r#^xhkWZ#jifnI2mjJNJgkxXWkuTl|CIFMwCCe2?h%
zj#2RLtzky+fvC4Ea2mkN05bs{LmK@DFa_Y77B?4M-W%M^kAu7VKX94D`6x9wxF_gU
zo|pua+F)vI{@~kIsqCBe=h}otUYq6R2mWftQ$|xLK96}9yQ;QI>)UXmBH#~*Zr`b!
zWscx?eO>=LelPC&aPKff*$-H(D)bmwRp&?KTHWsbzrZ2N`@RLMN>2k;g?>Vs{U3o-
zz`o~6ZovR|1-u5i3=;Z~v1-J3=KV{$SlQ^T(F(`c+l=%zy9^`~XCF8K6HpdkVXq8y
zbdbI7vaAY(?G5(NTH>0fU2Ql>G5!nE9NVm<^{gziWC6g>*kH=~Yl10bUE~z3Wkj$L
zJ@GvDD!ZofpT2G)KVz@Ot|<y2k#|$zt9qf}se?SB9kRi>B@4@kWbZbKZJIP~+r6oI
z4yP4pn58&<xW2nT<Od|o3y=TMq6`69(+);^2V<Ul$BZ<IEzdOxlxXVgD>=tTnZ#yG
zo3wboNzm*dNOcIVAxP_H9C~4W5=n+J4d;Lh5On0Pyf{*1uIJt{cbgY%l~m$ysFKR7
zE48Ym`;z9E5~{SzR%tx;m#EUOI+gzhK)3S$k1}~~S~Ak?w1n5<qz#0HKZCT(NjL}Q
zB``HeawC{Hh$=xJQ{O%aDEiG2?lv!4wrV(EKeyRV11trGrDJr%>nL;T2|UWYV#z*=
zj~gK?{O`z3U{zhcz~X!ZGJ7sZnU^iwEr8--tL*Z?wj-eEWJj7;E!inxVj-(T@;Rh5
znExOfblAF$H`*jN?cU^?dz+ZuI8)=Tz}f8V#`9!0I!aUBV-oA1YuvDe`^TMgHryDK
zSm*i1bw6%wUb7Whhwrw{n18XnfavC9o`%a2Y>%zT6M*tk5tWb^*G=bKD>BNY+lpuj
z=cOWAzUL9$zgU~sjmU7V|65TuvYoE4&t^}>JtobvU5}Gw=1&g(M!ErxPWB$N(URQ;
zCLd&#98b_q4=|C_r*{Q2c;n7&uxw`nDgav@kpDue0Lo(fpJd0F&6aE$KH5Q6_<xb^
zxsn|dG@C5jrvc@Mt+Hc#Hoc>;b;;gi9<yY%?h8X!YyKiq6^ypmn~~gBhTCgnO=7j~
zjceSS*w}2rSvi_*70%{`n=Q5VY;OOMvxml+M1P6KRhl$5PuN7$f!su*imhY$ZN#l=
zXOcb3Gm~S@<2KLdz!V@)6(%p3eqbVJ#m$rAY_*9l0F*$YI;mYo8Uy;j5_O%7Gf!Hw
z8}QK{vcks@`1V}Mjx*aGvUixk4nPHAtL*Y2cjU@;tl8$0bw2&myN|oUb;5otdrS}5
zECuiF^@6F-&f!L7gZPBJ6V3!)Bw8V&iC%tWP_EI&nWt>D*TD26nl`8|{}l}e=_C>p
zFc~~kHG2o~VS0Q7xokS`N8N@Kt?8aP`7AfeIh@>Yj^bpICGr+TQt+Xr*5`lrF`oCu
z2M5ej>qOtT8jKUI0Nt3j_y0uX0h5G}CDzA>_!x!{byv#&>4P0G#wp1J(;6qstrLAC
zV+2k#>4R}{*Z)Lhq8W&fWv&nNIX*_>LsMA*85+$G@twWApwBx$51OesO2v_`17G8)
zF3$_#V8V8oXYd3qif&>Q-2}73<Ew$kS8Ch&u?e^2^g2F@hx^SckB7z{59zRS+1%&i
zQq$+Nc@*N%5_p(*Xj&g}?&}a6(&=iFNj%%Q!SCG~oX>lHT!WRktjW1R35Iwgqbe0d
z!uO6DMa4!V_aI}CvB)^&UgSO`T#!Uj2yt_hVpSWh!DlT!z5D^Ek8#S8!$Qb-0KFjE
z2id}7?^-Mj*-n#r%iz;*v+o^KG*6?|II9l9-|%&FGuKaHu(gQ%fF)1_?AQlbIF(}E
zTRk{4!@NEUSU0dZ1NbD*<peB(OaK_ZfkoOt{vrc*JSQ}=dvK_0Qxv=P=o2jd0xyI~
zr)Rj{8(8!snTgilqxcBmZ=L)Ftiwq-6sQ<7m4RZ`VYhm&t~h!T8i1n*`FT4(NzN2a
z9lId5Wbo421|d)s)Lew#lhjiPJ-3~NbqdylkjQ#6`JS7_Ofu8)fs2Gf<bCE^mK+bZ
zBPr}S(->2iYph*2H@fheROk8}L)VCsl<)GB9~rS$mq77vEeV;l6wl3KCYgtETANhm
z^eh2IxFn+cyG};r53J{AF_X*;59W6l6LdgRasi3hEDz|sCy-9I-auaBGr>^Ngm}np
zr=qIl5XkV(e?%qeqGDut({$SRfdkn$Z25CZgTJ3^a53-2Lk;HOOx;r$XAd#i%CZzP
z8@-a0{=;6`B_GZPJ&&<ER7Yb68Iyf!Bm|Ke$is*#RGj*UepzIUe_W;J2T<e*`9g_5
zEvf(WbM=4C`|F|lbKOi#2^>8`CUTO9%)|jtx%E6SA|s{(pC5&InQiG~?hRkGaDN<E
z{}Fsuhg5ldbzrT@Pp}OCQ+w(Ll44X6@DtPcP;sNOp2fyn@qo3~Dg!>nS`)4;G8-H!
z_dZSZ_(JSdE!j<`TCypJEI_ZdCjH;6HR;Ops&$UVHj-@SSch6{ejJWHk?n8_4gr}8
z$QblmYjW6^wI*FfM1*+#+RWuA@60(#l_II{sq&6Ole2FYvuoN__nxTiKCOKjC*3Kp
z98^N)0p`Op=04}Eg<VU7KXgA=%!`Y%?iw?-f541`12hv}1O8!ew*^+C_=Fa?pD_jl
z8av3C+$JIOas5c@)FrVc%I@tIF!y1`3Bf-XvzuU0S<zFsFX4_Fi9~&R2`>cveQk?$
zyv77f1}$ig_6?W^fT_&ec-c2@m<#g*9gn^YQKe746~E}NM-~!wD4T+&7{6u<KVuw!
zlTA7_CW1>OlV#vuaQBE{mLc{T3OoTLaE{SUo^)4|?j*(v6Ic$~Gp4|Gk;MdFf+%nq
z5<0wxO>S`E79!van9$yS0rMbOC4;%iKg=ytl{~!dHTgv9x~1HP{UHq4g90*W2D>W!
zQXP*AaNLWmCZb=)??LQJ*_wad#kH%3t?UZPIkFK6u&YnOR)v3u!uO)^Rk72w^9>*8
zud;T*uf2k$F6<r~SC1xe4tPFTCo%a0ai9tQ00)Q2SPp6-D{%5i>eywmC5Kbk6s(ln
z>(nJk`-)C=WH9>Dprx<d(bd&K&(jCzTWU*)w%kK|6F$;<@}&Yxdj(NDK)VK*d8xlG
zbNj_}0-a)#=#!x*@+?*O=Li!&%r&nGWC(85C(^H?Po&e!tE&hZy-$K>AnDu*PETf5
zgWKu$Nw`g%4`62Sf@zc{3mG5aVSv>m0(YQd3fcf!Lr_nTIL+y!%B{hS02A(w0wkac
z*G7h;FNrUHHP=F<cAl%;o=C-+dSVg->WRBC)I5Jd!TM6Lt3b?4ZM}jU_;_b@D#g0X
znr^`K7EJ5O6c;m@i`nj%jJ0;s@yriW4Vh6u%0lBQ+UrOQUepD0HL?;|dF+P6M6VL%
z<M*<(KR?XH%8zE<;AJFub*!4`8#-30HA0kyw_{V7`g=~~Dd%+Dw-|py{gg@*WF5xt
zmJ)2;GhjyJli1EuoQY2@!xqR=%ve-fhVF-;IIMNWR^@$>Dakv`O}Rd{x1v{nNWX^u
zkgf}|9v{*BW2zb0BM>y*0cwuZ0Cs|-fZdU`0He)udiQ|oW(lh=W<gjz(G__#g0P<D
z^~6@wd7K&yV2<?emgZ<KNzM1AmR=cKJjc8qr64`PXr=xN<})}6lZ-qD&Z%3;AS`-g
zrBb{Bix4YS)6f{{g<dOF`uZJRo!TzEQa2Jg9Z%uu(n<|=;|mPdAEu_@Fn6Y!@}Lj;
zaOYk_=St_?m}vt})VXBkbZ&nP)f<}bZz+poh_UR#!8T%>*_AIVV5_n`h3&04xCXm<
zskf|-Enhp_vN6tfKh5l=BDY!dI+!=WyqROZcn!NqIl8ppDE7%ant84|(@hev0l?_w
zhU)g_?c7dJgN^o8y4(5sjsFI|xquxW{j$4f7=!#{*xZg@CEi0R^Pv~ZYmwII)otj5
z=26;B%09?eoJViB8KyOJ766#YcC-bDFH=ihWx|&?0%q{V7;_^w>PXpiX7=T~2l6z&
zuk6UXdE+0)Z7kCvfc~W!RG{WavSlj$EQm_iAKB(1N(DXF*N07dw}5F2PV;jO1#e8j
zTY{OF`Vj@ME%S7E$2jvd)A%U#P1d{(=2v6h+A%-#D0f53X+Fyw?GZ5Tta)e5n_+&V
z%wd#<>kaP4l@m7G>;-l+FipoVBvWdbyGdLJv^{#%3DSSu(>>N~N0E=~d`HGAQcaD0
z!kFx5AR7rc9-AHf)FxA&HX8UQZ!Qeas}DKyK1dh7m|z{B0{pedaUmSvVjXMj8|(OC
z>v#eVcUi|3aJ&sgt!v#cG-Y<?OA2813`eJ)2F%KpZ4jIsP3h>u(?zW^_!zS3VoAzb
z7y5-2>3pDREhIZLre4z6t&Hhtbu01`<aQ$pToHm3LhOFeSIj!)wDwc(Mu61={g|ce
zn5CNlYL+$=`7vjfA|`i^Ne9-@!iE8B3G8MIdji;22X<S|$(?Ja0&8Na>I&Zts=B)O
zMqbWIHS1!#m#%MYLyaR;YeIEYs8rv2cU$mu={%D}h^sNzg{CPX#<M|bG4w@VA%uEB
zw3vJTP85IE09*CiUD#e?L1_WBLas%xr7!&)rcS!S$aDAzuYCN}_pGS-W*{r-8q0MG
zT-(C+7C@@zvjlxUTF*aXrjlZFU}cH7ldb7E7i{h1w<AvgjdH<uwlsbDvKctFSPSK9
zcbc4?Gc&TAf*I@)kxF#4&2>0a;pFTA3a6w8Aa6z#&QIZ5V5`E7!S*tR(^k+1xdFWj
zC;dDMC*2U_`K-e6+&0WG#@SmInDnjzbG;27062l*0^WhV6%jm>Ygj8Ad_1;hn7gsG
z#kQplF8w0}mu>{|LRN5M>$Qa~;N0K~%|L=D+2HK}P9(U11Ch5Qf@cuiZ-X;z3K2Y+
zU)mv6(T6XfS2GpT>Dj>J_|Wx}X>y+pRIv^oBA5>cT5#=Uj9#Uaej)6o8;R_}fv4v@
z<QJLNkgo<#J<ti<Ls5HRj(57l2jaP2rLKb+%$TW(GxdO+JxC=)Vkq(<B)p3!KqYF}
zM5bXILn7J&Iv}-eBGNA=5$WzhUUL#*{<R<F?19C`M<%sw@NNJn5nRAKk%JMz{RF?t
z2A_>>9)fFA=!Dd<!KGhHaOv(v_Bz3dP5g-9OSsoyz|^zB`v821-~!%-d=wEpgL6$?
zY)K+PCyB0u-BUyZOK1W3SO{t5cSh=4LeejXP*BPV$o?>+D+nz$Nf0UqQ1f~_z-dwE
z^%19%IZqZ-qv&Rs1bk^)@f9?a$xb9jAzwsf7b3e780&JPQ+5ybXq~XfAYWqZxtwI}
z(MKU$+y>AkXfFPB`5BA6YW;KhaZmSMS%0N4E=tzFVqAKdd#dh+aY^*Lic9|)7hCD(
zB3p1W9(&JK+}**s$R=9MX)uQ^X1>L6J@#IpT>?Qsr7c@M9hePUJrx*_Y_@E}&qdO1
z<zs}{mJ%s;^uyR{UEhajT{Hbz&)zdRNtCgEb*P(ztqyb(k!L)9oo|Gu;J7@-s=`|s
zPh|`3iD56K9Qx7(l)hm{S2qQH_+<YArF_y8Tqk=!!7JL}y0p$CxGt-ckPI8#PC?LQ
z5WE7$nq!@Ldojm4)=oyg!Z_L-w`PtjTf*;xDLl-*sP+R>2|XMVq+iMu=xCyY*`Kjc
zW=vD6v1N?);~OdF2|)V@q&fZ3vg6*Rq1#4L&fd+Oub*UgAWiG|W563djxXXkk9DlE
zO^#!#V*P5~f3kin<9Fk9dhCPo&bLp}f;f%=oCsr`mus>ny&ICd6FzL>Y;f6JONGJ*
zu4Np!Vi{BQG}eqUowY;A&jkGikv~6ytE58i$Y0I3@YomNWu-0w6U+-(^Zp*-BE#HE
z>}`PK(W_L_uOK7owj%G~Frrd!_mfC|i}?i17K_<tF%ea8Fs6eqXz~Hl^CcZNTD5Zz
z8gCH9`+O<$I=(G{9lrds8v{}OqdKMldhLGFk00#n3Zqj}??a?0wz_nuAcfE?Dd|^2
zFh%qK3<K{HFj`g_H}mOz3;Gn$;|_=el*L5LDgdU4CGY{51PJKzJqS5teM!F>UxP-X
zd>(l}7XrF@+i45>4A7GTVZYO2q7lew&j<U#XuH;tu~xXJQj2ZiJ+bh$&TXv)QQB>x
zISX7ZC3p*3N~g`K$nVhfv{crnn@%hJVKF*vT@Pjm_S29*a$@W~;OM`pfPM$0nfee=
zF$$%t`!M7Jdd-ydYsp!<UC5za>f|Tq^A__tm~9sGlEq}#i5X`VI{lY<6QAR^1P%f!
z4gp=^ha-PlXVS03nRG8BpX5dWU;kLl7hs;Um{%+&i-4~9Sp?egIekDXwT_7`DVvUn
z(~<K8idO2qow?b$CH)bY5|Gx(;BMriB_Mqo1f+Wv`OHH)sDqR4&8%k~X%M(<L0<xT
z#)9@(P!?+s=@c-Rz^J`+I=m{}UijRCo!eK7qV&~b>4I739oKM)If^q?K+XbGKx<+q
zV)90>fR74ff>GZb0#lL-==3le`Q9cY{RWbeZXfb>Rt2cPw*r2$`ObpA0`!~(y>3C#
zDv+7XNA4|Y9V^>FT1U!Rh%Xn?dv6JtV_?b?-#N?7@8X_gW&`>Lb|Mu%m!xqjIF7M0
zJV$^eGSvBH9P%T2)Ctmm583ebErBb+3C6T)G<J+J+0Q_}#lI5C2RUc{leip=n#$$i
z(+8dFPzqP<pRD5xfPe8gE{Wq`tYeM+Y8^jp9c$k#fMX?41-}DAyuQ?<qI2Ce`288I
zrdMa+w!N8u9f0Q{F}aj#Dy8}jkg|UV(6piM8Rvd5zoIXVGwB<5<gHY63y|<}WCu-k
zlx(>eI9HHO{0wyRnN!x^Zw0A;BIVQp>jC5>11i~XIQ)Xlq+5ez;zXH6p7PRg_L0S$
z0h4JlYb{1)_Ph+^?lT?vY(AJorfoqGQ2nS&W~uoYBb7?0^L8|q4(5*_3At3NHI+I9
zMwL1VrVNScg!drwsU<1>SCEu$GZOqRm&`)2{KSIJ0{YH^wg3_)T4n(-pIZXIgQ)@m
zo$?++KC`~0{}Er(aefXuf6XY|jcOX2(J51iuK&`4{seT=f^>jBW=VS*&A|k~d;vyJ
zCv<vzq>FoUTm<F^Fy2*_c?+M>KM{1xwTBLwPGjjT8$`asiC*)N{uHlyNVg369w#`D
ze3Qa?%_G*tm~U-x9loXzJOpM5@~aKb!OPug3I2_2wf=Qk4PvZI>T={lgmDVSN33IA
zPD9x0Qo0KHGs4!7?O|)HYsge=b^Ul0`8UEgMDqssVoX8_1<AB~4|fCEfnq-A*bK(>
z1!ax(U`(4wDzXzt`a?Yx>^iT?I`6QXSF!pOb=Ns0UblYBgWcLI+iwr{kA2oJV?ncx
zL2Vu@kr#1X5T})_<7znW8)Aw6UCBLo!}txrnqOTXrjebl4QWUoiwT!FgJ#@sZM7{=
z$5vbNMkGGVmip_;=g0UhjEnO79~f61#*;4$=OAyQ*PbB#Pc)%)+7t5PBwFKUK=K`n
z`3uZxi_xASjP{YN#^o8OL%;xf8<5Ifl#=zL+<JPs9yv@^N&@th+rgxOc^8bTq3v`A
z8EPke3@MbAq3b`uf&75=Um5>TQFX1P;0FG8gaD-gc*^cpg#ho{0NR8eCV)1e?MShR
z0BIae-@{fFe3Jk(SWViB7a|{`*J_gf0;@^79Z2E4%ztjnNiQLOU>%+(cr~U(>v%Ep
zEP73e^gmNx>9m`bhE%jENn?7Sv6#QXoVA!OI`IpXV@~XpqyT!}68IWWSqNw?FGZe1
zPofFZKaaEUW!KQ%J8U40y}+34^{ldtC3lsrTy>iJKyVhP{gVrw%dR2d3)b=XfM54G
zj>YjV>sVv^tz$i{lw&1O&pNJw<40EVO~-ZT9lsgO{Z6oYUZo8!xt+V2C1S7dAZNW1
zmDa*%UcwNcei@D>WBsEb=0)_H+iwV74!t(2704@=l=M3xCEZp;e-cYm;mOwNsSspd
zwwPmj#pg{6+Gar!nYfJz=v6=*7!yQn)nJ#@Zgj**D1`2Fu8`s9!Cz3V*BGlzk7(=*
z#<X!gjZ}nKV+1uH!?Ei>g(nPqtg{M?9Pv2&250HknZ`01(`F@Sm2uQG%bB~>LikF<
zUe)@Rs+6Z%+NxJ08_}zArGJscSL?!}r=0p5S<%Xs$+FsDK|cU`TOjQ9lv9{!)k*=g
z8I1Ol5)kggUZM@_Iiv`fDoA)Q39@&)^8#p-1yu#~OfQ~l0eAr^3ZN<yxj&?l>0@A&
z>0!uKAX8mv)*)LgY3W~vv~<rS<?-W5G-y}`?i@ql2@CoW&;bk5b6bI;Wts_r$H8cx
z6CnK+Q==X8MWh;-?0L3H^0aKLC47V!6^AkZx_G1`Puk!b*iCThb|L<pjI#&_2)^B7
zj)Qs6V)P_am}rCzm~ED@AHv@SVXQ4!jB5ajY^F3AKW!Oj5~LC())j0M@{|oO{T_l#
z_cD@{8)1AcwwRy5d;lgW&?^=cl`)`2fYeNhFz!b)ZN`2#Qk+1wk#IAGdnp6VQj4hx
z<~e#v=f>9%{WZEei0!4I;TrGEe+r-_fYeM!VON<<btQWoS#DWNp8;#>^h~xojy%nj
zH8mMLjaz0hC%_!E7(I~{COc8)pY76J0f15g5kKJ{KsS=9Hk2L6?ASbJ1!Jm$#@aHb
zJv{?C9?u6xaM}*h0kbCC@03xzLfpxHwJ?q6hpVjLJXB*1)d>6L@1byKa_d-wYZ=s@
zF2^TuTpy?HvmHA`GjO=ZVlG0a9b~lS?nQnB(=aC)x4K|fTTCo*))7ZLZcw0;fEq;!
z6aw@pAWg}41o97aul-MfS&Lpfne_W8uXK89T@ydirX)ah*IUfbU=F#MpwUxnfwJc$
zdrAzTb%6BZ!ga8|rfYcLwmrjgLhXE<vME19k&o%Aso+aPRQPG#-EI3VV(K|!SzZ4-
z6I_4vB<yG-j{I$zM1Y#_e_`965^I~@kDR7j*I@6NZ>Pk5sx{qW;&7HmiM34&a|Vq5
zC{g5827-X50n!du63{p72im9KK$-!F+`%#-_OK0d31S@}rfv02<Sap2L?M<2W`>PX
zjTpy>p>16;g3SR$ZaM~1vjFJ8pfg*?cJ6sie>2g0UI;YznVHz($1l5a!zdm9SWsq;
zB^i%?1C`J=E|TXUc^!UiC4yQmZdb!{wgp`#vrc5D?e`tzFF-d$%j`y<nFmJa0uk6f
z*bP1xoqFa5bk{}njCNGJOo^-gpKP-gEIl4Sf&(>;9L%6;v^~FzxJ|=bNRa7=Fzhtn
zMt){OX1+}zF9Vy%TGs+4@HYvx%+5MH=^&tm79?+-$y$5(0puS*fhbuUFbgauADAr!
z(iSevMKG<S1oDF!`=i@uWxyP!2egF;-{*gA07N!g3ezzLfL4GHz^xF|miqy6i6CvG
z5X)%84a_YjKbXfMrY%>P%V3hPB*s0U0FATEY61EYX1Y*_nEr-QWOIffc0cxhh+T#K
zXH1WF%R@+bdisMKfP|-YBhwS20{4MaU(^G4lD>%5{D&JbC87^P5s&>me!vEngD(iI
z-ScBl;9&&4Ga_&bfyaYWC1b#KqmtSwKSW}K7<WP<H<J{qGSPwx0D6*AYNr$^4*Oe>
z$ju}KW`fPI0hn`6hS>=QNC0)#ZmGS*xd#aE9y&|VU@*yMC<kAWp?0rNkZ@^-6L=)z
zEp31d9|TvL3|oPl2~L~KK@V;en9)d>z(!)4HoW6A*)nd#$oUAy+PT7knqr^9<FJEQ
zc<kr#Lzb}c-w;@v%%`5fV+lGgf^Zs@3WDQ;zwg!9O2H}op2TJc%yjI;+n)TQzt(gE
zKPRHQpWg>!!$ZGp?j6Se_@|971w9u?e^25+%#+@?F&K<aeI{Ky&;D}__JJ#NaBqYj
z?!z>yIS2=;rX0*ADLMTN3D3fK2t9x>>`dowIVP#0;ItWvWXr)W!6BAR6O8|fC~yX`
zax4PlD6)IT&*M9B;K#ol9EOZGw9h?cCP3stBs^0a37*-S8>U;_)!;J0X@C04gPR2A
zA>mkr&MFiz1K<eDxHG^wYQ_&IOHFwugu;(iP7_g1jSmMOp3V-P+?`tsP189Eh^F&y
zY<TEQXYfAHeBQ-?=Ce0ZW-_1eVKoY!rbD{<WFp<yNO(FzIGl=viD^30NMs~9^+9U}
z+Iadvy_}Lw+`SQU2t~MrPO~$g_t?O4Fq^>I6u<BUo=(sip1>RBn82gKh5Mk@V7Cvt
zS~Az-D^IuV>2fZDnFR9&Pl<RO;_<CG@KXgj_zp7K*1z?Tc^D$Ikgy+-2|%X1#We?a
z44n3`!yeph&CQ$$pdVm2%lKA+@ezGsAcWfK3-y8D^H_bN@t)wreQ<LpcR!j>9Q8q8
zOw<QGunF?mX@vVdebAc$^+8{v%%TrI!l)NI^?`H?$V9qtkZ>P7g2M$!n3(#&kVs!}
z;pu2U*qx3(=(r~88WuGFc5yFYKjLgC4ls6BL^?lx0sb^lAYl4ifUZ54u-A3x1TrFu
zeQWIdS$pl|m$BC_{u^@774|{i{cpuboGm~*so^~d?V_iV`>p`+K~?MrU|)p{hGJjz
z26uxxg8fA7%j4p9e%B}Yb!4o?d72+)!!zBT+q9$v_{_77*Cc=g9|5(goI@r@3D}x|
zw*$~}>V|z@Z$G3ie>JvpP}8pFZ;e@a+S_+>7t9J=YTAK2(;mQvht8~z_ssf@=r!y8
z2{D^l{}Q7%=rrrnEn?QC`yL6;dMdtG>ZMG^vv%W55=;gdp7tSw-Dz)&PSdVUF@~JA
zA)ZI3M9KMROFq|(y^1^v`$USYZ6_0%hJDT=vx`z?=VmscR@{i)0`CV{8KC;~D6;Sh
zU^dQxyV(+Swh0Joh5v=jjY4oQ1lwEt=CuhF3qVWzBBDP$7um-H0d)K^b;=vDrGp&+
zQm_QIPxJn~qRfNZwoQr+E7P3w5_bXNW6s!BwQcrV8(y2k_jkxL{I5sUV#9fy#_z>l
zAI^PlGMIr*7OU#L1y+?ihOE(a{TJ8|xQhj=f*%4_^?pFs{vUx2*v_8h77lQq@V^GR
zgCx|5G3P!v%KY0-ZZ=x^n{b?NGt!wX0}0`*A%H4WU^4bKnMyU^Psl24bM`>?REApm
zOcN)N+dGGeqs+AfD9#2Q<h*xdNn`FCTYxe?0zlbnK?<-j!rqVl)!3`#N~A<T&hFSA
zM;2kL>Di17Pu=@zY>CwUZ^f1hp7K>blT_I>1y+HXp9$<$dw++knEBC%$!Xy0p1asA
zb2^AI=f({zI7+hIB>u9u(r-sAMHyRR5`P$2>G;%2=2~J@r)CR?(Yu*TEBGtio`h3X
zIIA`55_Io5+UBQ+J$JELX08E@jZ^T?nRyla7i0_A|LU4dYSP@+WCVdrQxk2`KO>9P
za#=+|;r+6A9h)IPoD7>;_6u>+2ljGs3fY>Iyytcn%guFQs#(&Rom|3NxSG7DkP*7I
zPPnboXpa^EY5-V4kiq~|oy*8}j3@F_721!KL0n?X%ndd>EvT|&r*=AtECIu!GLIoz
zj8T&#KH04>*W2tC;iNCw$-!A<8{n!0vs2><yu!4yWLJSH0$EkV$4TQEFww~_H!UsM
zv0y4dHmIxF8DuG-|BJ2LY^f&kXi}x`TURnkI8)Qk!C6Lj(|SmzMrp{ECh_aN6~8@N
zQDZ@qvy)eu#LuTz{4%|w@!N_l#(6&~B9ea~yK=2as;O!#vKCCyPHqKbsK^VsRwTH>
zRIw#-t_qYytNV9kIUwz8k0ZIxpEG?cO?69l34!}VR`@@W7r|DII(;imHIZfh1M2}L
zz*gDm3#%^!iavj-rluu35lj`x>N<G=Sq<iYBfHAfuw<9wBRBxIa_|rGQm$-Qnc89t
z*$se-K~~wtLiW{M$*wfDELm;R)gh~;_c!t=80{O+YW?R{TDRF&o5YWjDt_F$qPYrZ
z<tWsAoV^~k*;Yqswlya4{i78=^i?!<Y}U)b-$v3Ra!D_CzMgB=t4%$d^=3fDJGoVg
zBkPP@vtDKD+N`I7sYTYh@c)af%g#DGY2rJqbMAI)OaseyIZkeetsLlC#Qt2#t}*p3
z*~h_@fb1nO@nGJ_mF#L0EatLxzKwTJA9uHV4#4(!&P{A_OL5}ex$1F!m_37+dA1B}
zmJX+wfHttT+!ZZg5-fqO5GV-&5zdReohyMgK~usKcn;7w2xte}@!teYcHl?NKmr%H
zKJ*dS`|+VQ{|Y`%<(`D~!N;qa88a0prL2>eaWVlXT8exACnxf;-lXHBr1hb9WGCT6
zD`YP|8X!@t8KlJ7*`%4hI7zfl_Tgj-PSkL3{+E*+KGMxme3Z66^b+t?e5eoJ!AI{1
zBEd#XlP)gItB1%2<7*x;WnCxc08VD$M01%Bc{hrq=zcbuB>a@YkFFCR;HMYQ`91Mc
zFC2_5d`mbu94O@muYtYHrknB}Cw+4|u{LQgF171Ne26!v%XOxl2R<Mw{Lv_3*E+iT
z^!%E;c_sRX^3I(fVnaS?ZZe7MedW@V%5BWMv30qM_^-+Ss2rDcXJ1pH@=IV>mPPb2
zMf(K#6!{GK9QgwI68Q=V7bTI&@P)Y9#-20fE8w&ikH5g_^E^G5zXHfN*zZxXkS!4l
zv;k!ApU%E=tYgvaJ9q5<sC;FdRfl0F&hBXDIx2{>*({Q8ErEuBn@nStzqu;MjXNrV
z)r~ujfj!OBOyLS4hr#Lmv=#|m%bz>Ij^~Qzm|jGkGt@OIj3MV>=gu8+@GmdrNLK{$
z-X&)r*&MMB9|e+mjnCu>I|Mfuf5H?+V$c_}gi`9cx&-thv>!jgOn!NjU*d4oh^%@{
zVLQ0fy~&YJ<=wisTpUhR-cvZ=NqOa<7;+Ts+h7kMk>#ZqJon7mWMc7wi-gPg_~lyn
zS<QBxYX-#TF?ktO<7+U$>q{ad2hJQbeK9mqSobbt3}wVxU1LZW2iGX(RnI+hHko*Q
z*2bND{z^a*E{=Q+>F*KwYs`ztSwYmP`8}YM9?-EIplpnY<?~=pdoVvlV7$Z&f}x@b
ze`)?-qNFP2d!+RN5?RV7Wcc264p_J4YkmOM&TVTp4=nfC)N)VceImVF0i3Cs44?R%
zH`L9t1eq2_k2XR2k9uX5oIi`6#}s0$4%G>2@DPInltg}n$j`_xh)UFy;BSxQCF7~x
zvV$)lST=QP*;RR0rI#)0W@6&;vztWZqZINh_=6;KC`v+C<MWqgeA?ijOO3Oa-qO}(
zi{Pv}tcu_)g{3D)r4e=OZ<f$Csf9m_O$?sFR4c7Gre|1bGD}2GVisO$etP3Hb}EkS
z=2INmltIp+Pq27%wW}+O-n-B~f%_@zPYW%B1#;H)H`;Od2o41*hy1~SR+<9cS<ls#
zM=wI3;pj9!zu>2e$_@~!y_AFA+s(3Xkuz*;>EH38C&gchEX5SQ0&*s`*ypk3yEX$>
zeHJgz-};qvrtk&0IZC5|xd04Q!gIim_jV_GHHu6o`aEL{297Xxk}<ifgq(%WxzsbC
z$CgMMST|t)awGfS#cU3dD=I1`hhd!Qe8IOK(VZ)wqV8=NFn?Ol2Z8*GXF-yXKf<0%
zR=6r){=rK1XZJNthq<vUWA%4x(igE+NBqlhN$6>~IV)d7`g-Z_I0=8f^a8(M#IBT0
z<GU`dT~%yl#}YEU>uWY58Eos9v89Us-4L5eE`IFJq;CI$n~->O7)TD6FjaN-V)`>V
z$g~FXS2%|f!PZpgC}!a#nvjIjtb$~s53_Fpd+Egfyk$>tBlh28C-y4UQP|6_HzGSP
z`%K=*`Oe1LO?NLCYI+%^Sq=Gtfzmk9z^xr!U1RimEwO(L6=i<@^f=r^a6M~022SPJ
zPVnh4st)E%>bGA;l%qA}IF6~xp+@?Na>%p}@-J?kW+@q@8b4ZwnzP1KLj|jeoIoG0
z#&uLfI#uIu(BYfJs2V?Er)sF^nN&k|brIQls}X?wFW8FwWNaURd|WLI&!oQk72lHQ
zBLtzRdl(bi@mce9xWoiZ1}_|$bU6NOIj*Nz_24L2ZR9ufWr!{P)4bv+T}MRsj0t}m
z8Nbf=Ph5XnOj4bI`4wCuOKcXn_sEU3f+lQ;y%YEpM!0f}9-w?oigPoA$61Fb86$^)
z986$Lfv-hk2%HDOq2V)>p_@nW+~T=yS>LVm)eV@Bz=qqe5y@yuuF_jy2N?)g{CZej
z#egb)$yR(KPF3-bu~Wr0uScl3?5;*+=dE~%iXXDUUWDkvq3+Do!}e43YCq{a(SG&X
zqU?o)`!9iXFMAwrB6tcfD}Mq|Q#gm<4>E=I!DwqaN!d=j_YP1Y(=m^6X7DjHm83tn
zJT?=rK1mDK0Qn5PW>ETPnL+6~A$fD1!N-k{8T``HSPqTJ&}axoG|uw9N%OnI<A$6(
zo-jw*z`g*dv&kdG`@o&~h>3s9nY`wyUB8VjF*Mv0ZYocjy}0-q7n*yyc#ydl<y(*f
zIp;q6OK`OV<||7#9bgE$O)TB>l<Lo%RYQ}(i<9PCAZnAXglRy<lwl*}8}zD}^v_c<
z>25{x=UTC?W-1hpSPB)Pa0e8cfl<XS67Ul5e^2ioTP(;IDnnn*;u}cc6l{?_h1<+g
zN`4pzWeNNetP-zdv%o<Bxrh_J`0Tx3)Ajg4W%dj^o^`*bz2H^1r4WCJx`}vKq+oX4
zxaB--wgB`6cxwsP8GuxUX8|>&3MyPv<aP9_g7iD6f^^-GBDq#!yGg>|K1*U1xG9hj
zt~*l5A;BWFD>sXT56~Ny#7;nqSVUTE*C6}RYY|D`p`)v7jXu1F^00>Da2#Hgn@A*s
zHS`uZD(37S>sY|rIXWY;4Ke1YQ-!87=53HSG1h{sP3;CTZ!Iv;ycwLwq%J<jvp#0c
zv*4aGd+P+uJC<{AxK*ccn(pSv+vrs|>DN;@>G~lFgbr_+@~26Nq5f>Frgg1=dDqgo
z8X7~PaXlE(h>L+pUWZ4coIIW}>F_vUajn4(2iMZ#@<YF1H2Tk)sleU`rgd5qn!D{f
z4by@hw6-}@l~ear6x}oCJ$xDKt2c8_WM?B!V<Z3YNB;V(x>AAqFsOR;K+02(N{IU5
z2D*984s@+P%G$|~Qj-sGtmeE8Yz=Cxsksh$7QJdLeL6Lku0N89zbk9(ycqMG*;_MU
zo&hK_^#JyRjDS6n3OUJSHD@{`p0^|hgQ*D#P0S6*bC!hk8zCXxZAjTj5;VPuac25?
zlMaa;mP8YP10W$_U!-C#B&I^*1xsQCm|Bp~EVn{-SrXE3hJ<t}NI4G)r+v3%k$Azh
zhQv;QiOgAZfcv7(|Fh;LOH?+uLsZTOAk}jrngr1o!7$uVo7muXlseY5F~!j2hXA{l
zdmpaCKJyBO9Etoge1MGfk74sNdM(FWn5Vnx=Nk!lCe?S0+e_ta{~cyvoq&1On)k(g
zH0H@NXW0~|2}?TdUppskr)dprw}lM_HWt`TPLw%Bi7?+~j<P|$X3fW9ejnyHJLd0W
z9<evPXj)^w2Xp#5LAf0o;^vi(PQ9GY{}vDB!wR@)gQTG^OP!>@+``q}j=mH%i?Zuy
zeF1Kx>n~{TVRmX$Kov@dj13HEaY+9dEg{_?q!tdZTpT8YXVk_=M{EYC9ibJt{ZTt&
zhaoAGb4Lj7jH29SHsOp~1>`J+))9$2kou7Dtop2lHWdPoxfz-J!PFrkO=UY|i%m%S
zCrC)TAxLc{6l9*gtYVyn_OjVqBVeAeB$5FRf`otrkp{Vt7zl~SEs05Bu7ZRbqdl_K
zl8}BYB%~XT1ncBl)>lkYjevR5lIRF<2qXj?jObDntrAI)*ltNo1ydIiYPSx^HcLYK
z?T|nhG@}`~DpwM_&D81v^E5!sZC8K?qR#D$P8Gw)G4~3vC5rBG^OQ|UzJ~Q?a}%?n
z$hEm-bhJjmECxjHB<P8RzKIdOL~P>{vB)~qScow_!5D@#!{2p?rk6KSLuEJtOz8e@
z))gWhze|G_tn=+Oz&hLBi8Rj%&T6!QfR=#K@jwU6^<CV<<p?l^!SEyNbP%fHGt05n
zvOSFL4NQfO5nYgF=#@}V15J9l8Er>Xi;PkktVjTjHDyeIbx3IfjK`*x4aA$+p{_Ws
z{L<sj_s2q2d}f7p{2kcqJbvrqca`<4vFlyGe7M3oo`B=l)^PyGOL=MX>0$jH#{nF#
z1gsv=x%LK{M`g`9-&>>TwmZ6<YyN4o247m<TZ=INGbp<b$fJ-OA>f&_tg;`a30DJB
z*>yfmqwG4H-i;Kuq;)jap)5ezAGHB={G5q@9X;<s+D7`<#nm~>5@4+jptEN>0d(#h
zgOrR2pf94Vx3)T#EWlPrl5t37Ys-<u{jhv>z8GO`b!u3MtxgR0AxYQ<`Kj}ZW1NQZ
zRE)KsKT(t?S=YJG`Nm@uk8y7MxxRrJ0tdvrOpql*-4{IifSZoKH2$RT%nFe1LByVc
zgQg;D=^w6m-a~LoHNNa%S?idw2-Z4cJb+Y<$S?ruhp{Dgf)1^BQtt5b+(gsOU`%tZ
zvEhvAFgp=R#!r`g)L)InhrvQOrLei&btr3n`k0bgfYo$5sWr7tm$R9ki_&1vN721t
zW?CZhbr(c*;(JK-@9HqgB9h&7)dFS?AT^!NFpH^z&M%XZ8WC0SQ-#?!fKGHH382%Q
z{CCU6|6cqD=K*58p|0Ou+i_3=P#1;wyrcWL&&5vX9obD_&Fdd+V;(_I1rwwnJH*w^
zM<>#{dM<(V9rz3(jUq^=RpS-;g&1pobOam?>l6&9A>DIfeY9%8EU*D|Wn4;tf!I$+
znnVN$(MMyCyHlw1#I;vUp>w!?Daw4k97Xqv8G|#Cm9sIB)xqRpq<=1C(;<5=7*#`8
z&}ER-)pI6twa9AyhnHUfw&QF79TvtBKnI1{$beh|3?#t)Hh`{Z%L$;1*&L)rL^b^Q
zzt7t0VzmL=psrN&kn626S1R`-{un=CjdgL_h_NnAk03Wh*k)Aaiwl;x4jA`AT!)K=
z$Zfe0KUyVVCLZU>RZ%K%1W-$wN>{PFkO?@`1xWg9xKK!!hP1@lc<enFAa|Eb;`;~|
z^Syjo%tniexVAW$5SRy#+uJ&10%`>T?dKzr$<~?l&2T2&Cgi4E2>5aKki{GW6R?=g
z788v?hM(^i1OcfzbXD6#iFFZLgtUt&aflL6!8VblrsGqKD>~U^@J!eY#6}G&oAFG8
zPFG8iyK<R^U{aNUNwFk!wb~2`U8$BL?ITD8ATbzQP1#Xw15`{GsIkZ(^qMm1TadkU
zk0UqdQn3)(57qn&^aG$a7W9M#MJrhV&=5;t516YVpsU`!$erlnl_34Op4kVQGF$_O
zS#lctoiW)ziFDBZKOWRQHd8(e9-7KC`6oLbbPgVdfFrEqHv#8?q%JPokqS5-ZXIhZ
zmN7Zr>Nuu}u^DUq@?C?_r<1rUZfWEA&8X}%cY`ge_5T{+Ysf^`<ol7ktW)XF!&bU&
zNJm>r4_T*^Q*buQV%`E14-sLuTTBEI2jd4b5=;=E4Rx*W-Hu8C=!C+%_h%I4%LN!}
z@0Z;~i0U8Pgc*%qd%yJahPb+}=+qw>5V;mxU9u-2x1!fnOaCWHN%xe_|DAKmDx(r#
z2(X~H0p%krfu2T$iI!CeOgAuEQM%r*=;B`Mmx}-xe&|x~R)A*i8D9U-LU}2t7M)Bc
zAw6)SQ;_sUuXA;)(BFa+oJYQvnf0u;w+*gqd@8{o0+T9#5y1_?dtvL$|6YunQ7T>6
zryzY9P^qN<n^H;l9MTgf;|U%){{gb@V=?c5DL|!!dER28R;nUzBY_F3R5~!Mgslz>
ztC3z2Y%@51_Y+%!@58n^WJB0ZMQ*c!rN0PS>2@J~av`gmRSmG114WqsLa-I+WebYN
zHlqSx0sthkIw!1xtj-8)k-iaR4P<Y(WM9X&1!Q$Ln1Qsmfu+9$*%`Y2?q;B0E^Jc>
z9I&8w0TqF)9PP1~Xk`6hZnR`|PFM|Doe|QJ+at)P1i5guv26Ead>w3cIGBYb+rZNE
z&Y+`v1N|MjkPVghnYI@59+(7+dDCK|kqrg_-DKJ7(6I)#I&f@3QX<&umP_p{**CDg
z9<n-U%tdatfu)Zjuyk)B19KspQO;-Dhxh;Y0Ts8Pw=F0d+Yq3RmcWl-ZiavkFY}QO
z=&4hJ^f%%wyiGS`o4&=8(^y-^WWNp>r2Suk=GaWz`b?<lM!I`uA!naXqnSEc$3Fw^
z=yBW>$6c&rjdik)*E^2gr8>|0O~!A1p8f1jeAn@tg5S<yo%Mela0fEc(P{zG$U2q2
z9Zsc7Lk8n4f~>QXWil-dE#?<6oh?SEA7RFmpofTqNdwaajI;iAW?Rd=>%8_1GQ_fO
zZMVuaE*6c&7Dqo}+Yz!lq%20Rwt=N@PhjbEQW}v9*#Lo?TFkFn|F>F@j!FVWW1Gok
z_*y_(|5FL{C}ef2c>%dQf@~%m%r%zm32Zw-b}4qtkmfe9^c{%1OzU5#l##ixO(SqK
z3;GREHwVP)coq|lYzmm`EP)Tew1$8#Kr4_I=+&gsm%x`kY402+{4~=IHjoBOGAMvf
zDHANY<u=oHG*fRHVbMpEoM!S9@Ota`Gq6?V*!r!8-&WSI#;RGrI;6;Na6E>o)-hjr
z4|VUu^Zt5oJB~A%_m+S)?;iqgLngYKtwfTnQ|U|NR5~3}9=wvQ({Let`7P#iFg09E
z&^e_D6hXuR1prkA#Bf7hif?Pj-cOK;D7=T1P!XT0f}IX2vU?Ds`lmxmb@V!<NWW!>
ztE+}iNgdSdKR05mtKJ%<CIh<k9)w{T7)r+}#Z1m6t4uDvH7w=}Ftsd3hZJF=C6xxI
zwk7aU5KvnP=t}k|Qp-B~2z@!6NvCUnC>H`LIJ?SXz64XpVsz~nCMp3|NF6W~AR%}O
z(A|8jLJhM6S;hzS>M_Wdssb7t$(Xjvok#$ObCEQJhq^~isOVsV{;}EV7sGVbwT_EW
z-`61%_B((^Y+(IrY(Hb{ltJ?%p4#AWKC;0&<_<rhR-Ji6VabPXhm&P}3yuLi8dBO3
zUqXh0c?9v0a(X9)EQ^8BJRc&FWU8Ew{RSify{7q)=Dz|=q|~WpI*zo(dX}2w%#X7Y
z7W5U)>ROBrEy9ck<5@)xCX;hOaZ9>5EZzj8E%_B>7?=gf2A#ioto!x49Z#TA78DO?
z44G;>eHFPA+eJuZrfCE!c|6ZjO<8F$Zxcwn>1)Vv>=z@R$~tijm_*_PRi2N@s2%0e
zHUBZBw9QodN;s2Fr=*!UidLNv&dOTM*I?>fj7~|yjE_>C0GKkm{)=@1Sd1f6ZN(YL
zC_qb*$V@XitdzIaDF@~NfwUFxMMh)442jG%157zEa7Z`_x&FinH|3&OzR_FJI@I6*
z2DRnxL&o521@aWaL+(1wU+<Yk*On{j++#4nVWfg}d>-uckO@2PL$_D9el_+2W7=}%
zcO3p!BG1UL_J15ZbCQC?N)~hq(Cv`Xmijt!FUG474;gnJ!T2oZ0+^iy((^uH?!$gH
z;so-<@q^*pcTVMlrvar9NW0vAWITYiQ37R@<zvSd^e31X38Y<4m<O<bG)kaMFfm}%
zW1oPzi5}DW;#njPJxeec(10IjQtEWO5J%CPERA`LbwPn|z%;ZNoo<C0PmstZm;xrR
zC4C0WAQIJX`VKM)%sQR_t|%$2<1OefKra(W+o?bg0a}k-QBpAZ!6?&D!L%n+9Zq&2
z`7CMatKdsIos5^@XM*NGa<j=~ehOO9w}2X3kWR(|Mawh|OaU;sOSlZ?6tDfL8TG-?
zV>Dx7#wxSYG<KRXZRZD&U^)(WBYOE=(iAb2tq=e$&9eZ8P&Vz3?;%sL-Hb$5Eu###
zJhudjQicm20zW~Zh$W!0KN-`u4}qZBh_lxs2>2mz#scyHI14~~{kzB}Y$FMWDF10o
z_-_dBrefNfMOZuR79?`YG9i3U&;L~xP2ArE(cY{ej{%4zl}3=WHpn@G45KRA6+b|x
zV*ex(S(N}m{;-MUCC2Z>&{qCF@;LUp#m-y$06~7YLH;4g9x~B3svy&_-HJqJl0uLR
zHb^2tE)qn0zk)o0ePkjj#5iy5g9QPc2cT{DLu4C(E6PiNzbv3U_Ls2NmMi=1Q39l4
z|0ni(=cE$$1$fIA)2xjyK(pG%?j0>svK3jo+CMn(Gc|JXV|RD!JOoN_9O;`Q2MGKw
z5_Zh9HsdGo-!`xxT+xWY{)jjkWrJBvoWPls=3g6F4t^rAw$hJ1f!`zO2N8h{fiGHI
z6>vX;)6V#b2lpYEgAuq){Uy@N0JYS9XTFEiRN4g(A~P`Fi9|M43Ybe4a}i7iJ)&Ju
zn1`|7g+%s92+UE7`2);d#L*@c{D}X}0`PK_K>B-cnHKafpnU|=CM3*k>|coz$Peaw
zFq->nRHAgm6c{QDJlqRG<456L0or`N!-3{r4t}MS+Iqi4!gK#Qfxke)jx_fH0{>tG
z*9SL{uQPf5YESV#4mokzf2C}Sq@g&5gVOktgJlHP7Wt_s@RtPr8VNg6;4}gsx44Gj
zR)EtU^0^214VZ5uaG}yZ^JCC5t^u%IM5!`pDtv@XWiojj4lKd}2%iA&=U3rR5?DLl
zH=e+m1U-s`{fKZTfq$}bgN*@>h$vMGj>7;aPy~?_9{*x97Jemxwb^{-3H%*Fk40qc
zC-Bdfa4m4B^^Gv?Y)3r69{~Lr!8ilpZ<cXWfFmOahaxJI7D<>X`qdH^el>)(d3~$%
zSGcK85cKB=#%ZOv2fPhj2Vm8RDGK2jKUq^`A|CsBe9~qt{8<8PnP!F)=KN`XA?R-r
z8K;o(M;2HY;O_vn?2d+k;7)=$6@fG04uXrPC$w|M@jj7u&EJssvE`YtLpGhCzJLh@
zlS&25#}=R+FCKt)yVJ-=Q2^;B1Llym*G`ugd+l=PkV8@ITVwwz_Sz3xVW01s?A7xM
zF9SHsC*0MHnG8x6evE-L{z>Y;adB0HpW`QqlOMU8-ksc)`3Es|zm|#U{^a-1u;HO!
zHj_Ao>7NclU!o_xzdiq3#w-1w!S@Svr74tjYbmgF-yz|SIFG*zh@}&BZkduvOMDGb
z3AF~8KW9QF#0-S_E1xvaW96gqufT^B=|v*iD*m>KOu|Nqe2Yyuk%yc_=v5Bx<{Jj|
zf`&5eKE%!L2s&lgf&VQhyDxBf7@e|{ZXMZ4_dOEM?jM3&%+Aie9}LOu_4x0Jf5pr|
z7f*a=N6yETYd1cRe1(I^o;q40U|t8H3f)NX0udGRdn%#|1w4;cA&u_?AFfbODx|F?
zc$r|D>%JJNL~mdeF41H=*J8ULy{gfP|E-`JUju#%ovI;S8r6{Q7!t0AUQhBN;i=n=
zpA@R`COBK|pcyvAEyp`3wZgRP=BFCkasNWTk5Y}jRO4;zRgGlq3r18!TTD2ps*&M&
z?DRj6-vyt<Ug>mx56bZ^!D3;nYT($Z#sQ4N)tKU`#{1}r>+j6}QmMvau<xN$HKf}>
zHKaR^gsTyU@A!ynm>^6(1Zc})#td<*@c}wjLtA<Qs-fNdBJxv|YP6;r&suwJhy}6N
zW_TGn9mPHg`)90uP#a+(0NMm&U~wi2U~h3g@(iE~g!*9L;0AY1{{ZGM?8{Rv@BOAa
z@~j?}`SO_r=YCTeC4J^Oi%$W6EqJY(A7f2F^KWzq&eAaecUa(XfUN+k!A>AClrl<S
z2baRl1z$)_FA$d)+yxve3F6(Nzk8!2?yT?dtJ?C>W*ibth4?&XCw5hBen;ByT`@xZ
zgcKx95hQ%0qoVv?91K1T$2SvT0$;LN<#rq_d6}P)Liy>h{{n}|??s#67{E7?pIYV@
zB=3LZmsy+-OM4PqG{D`mUV~63$-K;1H76PNm!tHyGSTI#1WprdHU%)w5C9Ud+6+|W
zQ)Hk5|B4jG_P-KH0lV7<Q}*8xOPTA+rC7yui+k^lj{E>#v%!?{1%fHtQ%LduBUl=<
zy2l0+(eDW+lG-H{tW-p-5ca`z0Gcn&|5g3mDx3sxmbTN}mP8_N3~~$oaor|f8lQW;
zb7vw}Fok;6Bra)~f7z}1O&UJ6^-RR)ANAeMy0gVh?E0|h7D}(iJ3D#jRA1q^^1jZg
z6${gn8-Z76cBTR!)r>QT=pf~wLJK+w{#Z{a;b%Jh-g%yzBfV-i*!)$yW8|;8X`L(o
zM8sQbL;jm>`U9}9Px>4|g60f>bN`*c=gvBN%qB~C8cs$-SolAX@;M3Tz&r+~21(8V
zQwpL=FpmADVt!gO`aNe}HCrrOwNfM4s*Qe^K$gX1N+VGw%M<uD^MoZk9Uu2VR`@@W
z3SqLu*WW_Q0ealBoe!uqY?WPJ*!lrQC%eaNwPc5YX$o0gx-TG?{tvRL4U5K=to<Zt
z_0U70|IF?o533B@I-`olmDk^<bcWnk+-DNUkI6fMJAG}p+0MYt7_t?Ce~=pCY_CW*
z!)&w3E(DWEvZ_u#lC7F+vabcr(>7Zzylcr;i|%hE4<PLlWsv`#n!V;JOZH)WjD@W5
zmynuZJ&o_F!Ctf2A<N(6UIM5LY?WPp*wzLVeQGkyB1?88m==)LmGfUDADI7*>^`&9
zlAVc<aY5M1fseqoa%H>EEOFSHz;ZxkA*<{PfT^1+*}Z1DC95TU17x+FHTC(yXiKbw
z+>l#o-F|r8B#ueQJFa0~vkYhDXe+%3@lUv|Quh{%D-qE``%U7=gYl!epL(iIbQX|%
z$y2dok;cT0J}a-A6*kY6V9JrF>QWGC04DmR><gMzHqrY41xQrKjXX#JK;cAl%aiuc
z;1Ft7TC%h8aUW!bk4Kv1N_M|l<B+9=)&MFGTV+=WX__nB*Uf5|tn(S(S$*8SD+&AF
ztnO+typ@uNk3aFVky|jspH2<M%w^X&H?3JZx|o0nzhROJ1<a!&kV4YyAW#7UVq6%x
zHdg}s&00%fGnly$&~{YqzX+s0TY_0$Z(;iU*dxHSu<HQ3KX=|Ho9|m@Abi(ZAKUS<
z2p?K*_5L>>L6UykOvTAm>qPGnFU5(LK|`FZ`JafqW76>vvOad;V+B6cVpspCkJNo7
z<BA8TTb#aZwF;ajyBWBe|5NOD%~4`cbA6as@v#;kny(^Ai~rltXcObi{(Ht(IA9*e
zk#_pMI0|)jmY2H4sk+<N?>RetPTjjvbO+1~kFV)rU#WXb#g(`vC;aUw@VCq?4}4}A
zo`cGpE-p2xbX*Bv*VL;^^Af>KP&MgZC9sJnHmX8VBz(W2HdN$Bq%D$+v_oz}+9Nk3
zm^uk3^3xaMhB&un#?8TJEgrv%&)0bFDSrt_N9=2`tZE_lE-vk|)>t>z*yb@fY|%6O
zrYHR~;%4KlIs`w)+5OF2N5yca8;5kV1iGhIOyuk}57Q!;evau`o*K%mIC2YSIuNf#
z`~~@FK6aQG*{vMv+UQBzBj^(>KF`&hyU>+Hyf;0m1NY9>-=p}*D8Sz)aQy`xhpTWX
zP$^^~1I4Vv(e+$iY4jpg7e`(AS)ZS%Ob_xeJrj~+VzY00(&3rd1^7@gzk}>PiYa^|
z!co;U1lt&Ki^<ui5r%Tc-}Cm|Ug&)@_L%!b=`2q2P((1!r)Wm<;e|1bsf9JxhnMTZ
zcxOgQFvh%}rF$2g+B(+idO|vlk5B(G=CNi6boO$4{(W;VPHU67oW>GRjLRS$VBHN-
z5Wt?h3%zf~c`)%FO!r(cB6hzAGccbA)FT4qWquzZN}BL1<ldm9DrHkja!pn#vn$z)
z3^#_@ovKxBejs&mS$5$EfP*yW_}}WGZeC?cXgqq<3DSSuE4#>JcsFk%V|A#9#y(+8
z_T`YCgzk&<LwIO<6K>!lKE1eYM)v)Ln%<h{nlX8r<!!bo&xB)nMmz2B7mnVByPT9q
z`s1J({98oHp$|@ntY=etr*d{{HEx(^5~Qj_%16_Wr*`sF0a1Gnu!Q=iE-uH?dJt1B
ztvF22v9x4X5xEVs@X|8zJ~J6Rm>Jov8S2_p!tNpTs2qGeOG~=Sh<9mS5B1xvKP|0L
z98d9Ff4v=tYjG%$ADO~{mX-qD&(e}E2@#=|IJ$$M0e+$~{mB2W6&!0nK9`+;$y<E`
zkEKp2A6NS76nxPS4HNKnBWD-+Ndey$d>dpSzk81Zddk^9^_}u@Rj(fGLj8ia16svG
zI{<5+D>SJBZ8QiduW<wf{l1F}tqOb)(R4~wlmRg34pjlID?k4m>cJfIU<T*JIDjG8
z;=*~^Glm$N7S1*1PV|17-3dI5&sZvGHAJsv4a3n8WT?lXyl7<)1DKe4qC#Bxm=VWP
z<15Bhi5Y$@wGKZA-ObQdR8amCn!E0X<6;ED-eT_J=Uvz^?w3t)82{rRX_-;ziSF<$
zJ=E2WL<ir7vPnc&J#N=i72`VlQX6)RKluOmzfGwZE5%hPZ8$*dpI9im{(Vt4HK}9#
zk^eux|0|vDGaJNhX%Jt;Hzd_}L;PjmwW+?A{CG3f*NPuKQ+)w`+_}rwIzDgdqHDe=
zY}yX+g&ugTO8sp$VzcNKSTQF)f04v|3|<cULP_<jd==vv>=~c0=;^ETnS{X_oz>{B
zzavMh#1>)H7drCN>_#V-dmV%IP1}{fKP-x_j>%{8uEB?ye#_1;y;frgU$!KJzR;O>
zrr+{I@3@HYCz6Vqk30K9eFxoD^N8OI6}feMKIr4lay+8u#|e?-uJ`3L_1lPg>7=!h
zqeUi6DaxoXwDF(Bq`BKt2X@Jm=+4-`J>m-%d2fFyQ)#g;^!jZJ+Yjs#F-j}tV^p3O
zv_CaFlGSyK@|lZMG&;G}t*s(Qr<E(js4tXv+LTrY|Mhb27hhmkfji?b<te(2yLdb~
z+!xyVRL`cX3P;qW$l$*D7?uAXO@A&P!R@hpqvB(VcKW`ssgmjoz0!4Y$3871Da4e|
z$EX}E>Uih=$kEaL@|nYXG&=f@*e3BgC#1*=n~E{&3*8uV&m9lG;SGLIe4!$hN}35F
zU#MvC`oQplk&Zu&&Bv%5Ul+)Xi5%^nKc88%gHiK-zqoCYIma|9#;7m!&a(IW?K$iX
ztJxZVdtq}^iZ3*_Rkems?TZ*K`UW4uyD3GUt2SJ{&s%YNs7ULE`OM8DG`gwjoW>E@
zB6(I8V$>HJRc~|C#lPnqEmWjNqAAnS7y9tT{OZ%oMo=!=hpsQvQR&XBes5n-*qxK(
z^A+}$+?9WB`~}~NHSw|aM)ry~6F$3m+1)?%|I3#bynmpT`#6L?6ufh&@s)d!Rjdj5
z(1Fe4uT`D>{+}GMzFm*xfIV@K@zt8a8qJ11urt<Nx_mhY?9>M<a=`BFXnf13vo5n?
z*E|<zmafT3^0jt5bHJ+h7mNj)4ZHWBI9@Hv3F|ldwH&ZrcNkxZsjS9qSi7LE<=%gn
zFF*U!!W^*QZxW1Unhks4?!0F4r#Yh?9sOGl*sk%$S9}s{FdJ4gE#4#-`uB2nyvvur
zjQu1BEaoobYc_#p>46=OGsU0ES;?cvcISYNyWRNujbbro!^Ss_GZ!N!;Kc=7a=>ot
zZhTKKVWnlmHa!_@c0YM32gyY}mgj&y*+DSYU^eVnpDUwHuCO%+Y(Y=s`)U>oEF0FY
zYn&N(Th5ZaQ|rkbu#ygpm6r`G!;5`7Ze;(veED3J^*LaRyFjuB%PI?2w?}i{-+3fw
zB`4OIl>@f?M#0K-Ol>$n{*zs?^W!hp4pv{dBl*+LeUb~UNl$+HnT5$eZ~iH{#fgQ<
zpKkppx!>Ru$?qQ@o&4a|`N=cdKahND+iA&DetIc+@PM_+X71|bMw137&wg-o^5P*c
zCnvmDxLwMNdy=2~^y}nrD+iMcU09wx?T-b?NpDR}?s#}w^7V7(B|rJ}>f~;TbCSok
zUz=R|)5nwd+|e_byrbIs<XPJfCvRvyI=M!tr;`V7dOrEn!9$X}jvA4Calx46=Wp1Q
zTqJf{@`#<QlB?&>NN(35E&1=2Ta))b{Y-LV`#kL?k4jCxH*Q7pp;K#;_l`Z3ynV>L
z<a=8Lleg8_pWJMB|KvY2zf6AUszCBrBko8(_Q1O2C$<NZ-#`0g^2x#9Cns&&b!lgO
zj4!xzN3C{^eE%kI99+NMUF!<`KjPj5Jc{CN`|ZgD$YcgcAZ!vKKmfr2K?npALTBm8
zLKYH0wtzt(Y+;cd3Cs}oMG!&T1tkK4A_5{B1=L0nl`ZVZq9Q_A42ueaig13_sc8Sd
z^S$pm=lWicT*fC=&wbx@S9NuFRZk~zcN`kxPN;QQd+Xz-?&1#B-4#-=X-AtiaaXBO
z!|l`E&s|}8HTT5As_qs2?d}o3eWB&dXy=|=;URbP<rUo{M%>osF4(1o_Weckj1F{P
zpZcx#-Q8e!tE&&Wuhw?B9s8=fx7Q4Cj~y52e)o-f?uIRY@MzH`54oRMRNX!2)mrYw
z{lC?GzD{<ph`z7oebU^0b7)idrM6AoDNAd*+mCMUzSkk#-TW~>_qq9Zv{zfSbf4VX
z)V-`)BlpyrRo$9z4R^-E8ty8ulxQElTgiQWNjtag$qMclL$7IR1MX;le%#Rg*mytp
zS8W2_Ep9s83GJJ@ziL)=|AUG?71r&l>gzfB`ZcXV>gU>rE5Fh9%|5Mtp7o~|`OR7F
z&2P_Yt19f(g1`7es}p}xJK+6HtGVJEZBMgLH2+fvwez{3X|1k*tQ~2xL7U;ZswJjH
zx-)NI*WPUSp?0gsPOak2ue7)AihKLAkF=+oozuE5xvi}W`Ce;&Zlg9o(C*GWWOonk
z__;?r-qO(4PN?L5?nz(wSBvYrpE$Bl3mv{z^BH(u`(xv~+JKk$YB{a<YO4aSY9qdX
zPxBAku65pbNQ;i!sXe%PT}z7oTzfwMfcABTH??{{AJP(+f1y=g@|!j_?wZ!&mE+pe
zIp1pS9j|Gx-ThwcwZ+Fh`=^6iGr!%McgUAo-l?MdpNjeL=k*8Ls>&C&wZ|H{-!H1<
zULMoX{oK9tTK(UuxGxsp)b12q(blB=spa=k+%ra1aqoLdao<Y6s0DhfxL+Fergo>2
z!yP=bf_qd<b@#7Y1$WICXKK$q;&k^^9qx#HU-!_re%Ffcu|p~Lckk)&qZa(_RqaNA
z!`(8sqPun2XCCcz!Y`VuvEtT}?rSZsINkHYgWS&z|3(YS^LJmacvTzlZLqt-VV7H<
zthfvAozYs{zN!W7`c7MM`l8ml{sZlu`<2}x$9~p!O}L?r`mU1u<#orj&|e$7Z`Y{f
zUUJ{zK41GAEu-yiZOiAC-48wqa(AkzxKHOC)-Jz#TdTjN=>G1C6>Xl;yE<q+Vn%Ad
z=YzD>V-hvn#vu34wt<?_Jypwf7HEYPCTl0`>6-denzp=jpf+VdC+&3fv)ZmdHfyWi
zoT2TRzd@V6I82*5xSRHUn>L#7<MXtvv)NjD$M>{*CBroLg=j6I<ty5?Po`;?cGT7C
zMfK25=eE?Q9XhTB<QI6fdsUlg;}&aLaq}hG^vQ#??OT7)CLe5}4cfn28|XJvyW09m
zZM1)FZEW*_T1wxJT94-%Yt4>CY0qYK)eesvr@isnc+J@6)jF02YyQJt)w;gdPaF2~
zV(p8mleK0iR%)|8pP-#inW9ZD_G(2rL$x8ReY8;>;<d1XA(}Em_xYmYeb1t<#xphT
z%9H<6^!!^5%XU5gTF=W2@P669Is%@5@z{%wd;ZPIhXK#OI{Tko<oS0`zbAdgd0%vI
zU&Ttkp2<yolws~9<6nxNf2(2HuIFFtd6@yvzc=)z0nfj9?1cgTsLsDS0{p7Kf7O8J
z-#z^d+vIuoj#lKWQqjGmelAy$-w%O**la<%@5R6ej-cWEB7~u;x}wNc5?ISt)cAtm
zpEf<@O5hK={gPk3qL8bBc4{yCUGvcct_R*N@+<YLZqw`D47}*uGR{aT^1bfos^aOL
zXr!D{ys>98jJ<P-bJC5ykNW&`@f{wDi+>OQVeTFErT6^uJ!XrUI9YD@#;(?ky-EK+
zI63ue+3vOUzP|8Jzo~%>Zty1oi@F+)0mK*G#sWR)zTXYof+F7senB?hjbxq+cHD6+
zXkzoK?s&tY2iqKXhA)Wl@v7^R496L&C#M;XG|K-()bo8Di#_GaYf}uzn;0$C495dv
zdHp{XYy47nyyn>*Q$1ah4aci=H!ay%P(&O{tdEJAn8~(#)l2Edg3A9~)IGkAnVun-
zXiwuk6O08hG`=p;aNPUn8vS28Wm9#pU$sSnIXuSfl9Oqin{D@Y`90If(Sse;F4fGr
zNztBVoJ;lbcA1%F<a|iEsb0sw+NJ0H903)!bT@K-(Chg*2I=8`4nMRU^L<kV-Q{rH
zscEXEbu-TGL_MaLaV~_Iq1UNg?Nac^sQRO}owytIsiar&^WXJ}dT^C$7ksy3r^wgk
zs9N2&QV(k2xDmM0>7zuiNl}9v^IJ$B*VJG=D8zBAayH+6O2(F~27j#A3lVNYgqwzr
zMS&}K@@o<fy497>*_CLWs0TN4-1GHGfF0b}aiN03ciy53p^hpx+sdM#CXQP+-^lJp
zzam#N$30uniY%j_%kJ&GD$_U<;N$K5WH;lC9^Bkf>iIC!=;x#+j!b8pw{u!|;|v|_
zTwecA#hKzWFkb4O*Mdo$g3}bm+qoIGN{Oe_jed_~Hyl%g|9LaI=Ml$D->PD`e8U{~
zD)?SZQr|A}ebm9V<_C>$gWEbTR1BU(ZJ5p5etCxab_K=T{`aox+b4(vlhl?)!JQnH
zeEPJkN`7#<`ad4^{AkBKPs<n|Z~I{h>f1MIc59sa_7&nk(cJ&88>fHL3pzWNxSA&$
zy_mYmLy3BDjN?L$xk*MZKOZIePq%UM0h#7qjb5YmdNE=O$A~H1g(-Y;8-I3pr)Ko}
zgZLoD=v5S~Iv&_;CyRpP9G7f$PQC?GLBUJDCb<5@0F?FM1aK$yV9jx<$evv7&r0v7
zJF44kql$tu91naQqc-q~$^}imRutUBalz(0PUTzQ$C&oMGu-0a-VR?M*Yd9FwSxd(
zAr=Mob(Gp%-DA~jS$1#y!6fyX?#g!Ds}jI>2LR2CQ%8O1qu0-NJfVkYiyrjnk}xI3
zIH|h^aNeEP#CMN7Zh0O_Fiy_%QCd`?%Z+#gEFso7*~_l9r~tNv`l)HgNq$`MpNhIC
z$1&T}>vu~FCvlWb%ZqUkWvTzCoZPKeR4~x-yxlh|$>3NH;aEDV@tI`&(Nv?F?i%WN
z;HkEV;*Z@8{@{=|zOcLTY)j(TlpkaC|Eai#&$-3F^|$qf!??znja^7Hsuj`b?kwZk
zCooYv-Kh4`KWL)7b1px<V7Oz6XK^><+2agGCkA01?U$GTsrY-@(oR|DGRHz+I>Ir*
z!`~qD#&2c>ej(n$(GKFGZbr3s|35ey`iEXHk|jemH;whqG^*VtZo<S+;zH)cHDY5T
zA0Yo@k^4?To@0T>Y~;r*qgn$R`-!<Qow@K&H@?cxJQhk26LXYfYR93=d0lZ^k}-6O
zk2mxEWaH9syEk(l<rJGYGcVn^bb<UMDaNI4l(%*@h8`nUPBJd((W4#B0*6)wzA)7o
z%Bn9j5xPEMv}18}<uv2cC)DJV=}fGeYFv5=Vm%PA!pQ4tT>6rDhK7D3ZUgr!xNbD4
zM~`uYx{iWqK&BE(dfpg^K5%F$0wp-Phf_z2F|-E+pJZd`3}Ev!jG;}*e@_E<iCqw?
zK>U(&cVa&Hq3}9V{+4(HS(W-t^r#BuW1^YCV;!IL85)e-JY=ho9!PT?Nxx34N4Xw;
z6DhyQ0F)BrXm%R)jgT#*JPw&JvDFjzFH+u2zt&UUCFJv;<Z5|6$w>c{Uvu|evT<;t
zU5WRn{0rq?DaJuwkckgTH^iTOjKAO2NN-QQPW^V`Gs#B!hu}FB2faQ@{Lk=@6O}CE
z;7#yblZ=CQ@{7nnPi&lOr1OSLd^qK2!F>dNwU45E#yJ+W3q-66mBD}xAbvgFNasEg
zKc}m4a5m+xsm8$|$`dieD^>9ux*G@e=<$x=P$R=gp9Ql3S%b)dOn-vr<|2QOGE?E8
zo<E+BjwSOlHLsB2g)U({eFs_#G3rAblW402QIwaEzYe|*N|mU$QMThL3A};VEb5og
z@+Hb^Xt^b24ew9Fk0bvS<uma31LE=Tke|aT=9$2G9>M5#pt4n(k^Tkb0oePD*pJ5M
zLw-R1Cb2i=HNpfP=3S%sV$8DWjsF$=*Od2Do<;cu8hHrZ9m>BG8(`u+>T@UuQ?7?;
z-YbnCfi8UAGtu#(Z_iY9ogO{O(ZJF4mdzXUyC@Ex<XGitTF=KDQzJuN*PXa8Q(gBO
z>AR_F(~fp;%=$ES-DbrbGXnTC#P8G8rYFH|%~G4zC-%rtn=T-Jnx(GuCI1b)0?N(N
z+d}L{c?0#;h<TL%pq|$uVn%jT*F8=-@t$5V+3~9Hymah@PH}VzY|5fHW*M^2k{%3P
zpD@L-B5VN$9)&m)1EC;3q#O*QQo7po8ugDOu$~x#VZGf{$Fbn1sYa*UHgBvW+1S3B
zjr@TOqtkZE2K9RAQ;yJfy{V~2O-6U4)7!+QWVk-Wwn#HN4S`XRZge_Axir<-&bld<
z^Qx1c|CG40z}!Yn7c%TAVvna8+c}YAk7gL#A0jpaNjGCBQ{G5^TQ{TAEXt8WAz_-M
znQgnCH_fq%;L3M|hR#8cOIqwH98BYJhc9|B6NkY6nRti1-Xq_!*|Ys&V7D>$8rAQR
z9*_KvOk;a>>Wi?$>O8h5&3(Z$i<!v10B#R*7pUI??=Iy*L^p9H_$A={l8o&U=rm0?
zw);^(8y+{t*h|<gq`nU2hbh1DwO%mYF~jqDrjd1y7d!(~jjU}%-*n?hbK>R<Bde#6
z5?LwJIKo*H`A)i##S3<kdEj^{I5LKOC@~_<II@Y~%=d1Vk;P>uas=@vJUzoWvY7f`
zx*1ths9%<99N{hL$WY3>TpD?vyvxpnp21mF7g0{R$O^>skT)T^8_-LLtwrPnjd6{O
z3{Eq$j*=fo{dw|QcjL%vVi@^V@EgDzLw+lrm`=F>d`rsHF#izczVIr7H|R9CEm1zw
zN}N?5t}jPK_pWs_j?AQ`)!1wX;z4&KD}gu)#c9MC6kh_jotEcO{(=0j#D?T~Gc)oc
zP8ySU(cPaZw@)hT**x%jz&(e<+vq<*PeaK6#PEhtKlf+7z{}xifu3l09>en;xFP7y
zLoWd5EPo@T;qQgl6TRb<$HI>xeocM`_>=VLd&*Vtkqj;iANkaC;mzU~n?)Mvy^h`%
z`f;CfI(!w}e(HJ2be?CXqsTMUfA+9C1}8z(hImyKv4M<>h(*Cy{VUlr&HP(&W%=M(
zRpquEl^Lg+^*d$#!?HX+zO25H)S+zZZj$wRM18`K{zVRxu0zw?mHKxmD<+%SQvaoa
zW)>4>oQ8-Vbj?527a3I_e9gbeX6V7!{hJXIuKO1gf^PU{ly&88X{lJU0|Y1khW}<z
zVK@CleUAeu>T%PbO`M_k_}PEAZy+^#@Gt(+K@<4Sk8x<Iss?9g)Ku*2dB6A{^Iafh
z5^ni_(a`vb0T{<XGzL%uLL;I(RyA4>>xnLe-S!XmJt3&6w`s;GO8wRU3(;E8UD}@`
zl%nqfSuVcIzU#l%)s^qW_`fFTuHXC<e0z#qkKg>c-8b;@Mn#jaN4vjvR@J8gHt3?N
zd#GPPyWddXwTo)(qP|lMzwX)YjXD5<=Oa-Iz;pSCs!r7NfA`-Od@)8<?@%)mW^b7D
zqgA69WroGjQ~&TU9%#%$;9X#KM1*K3zK&ooqCd18Xmv647PL5U`=~!kK9`uqfSe@X
z2idlipMrmgnAce~MiEz;M(+8qYHmyiwhg}>ka-drjaZL<jVIPbzCQg~PIG#~pZ=x6
zqZx50HMunKG0a*tkcOZ~AX4x9?+ktqje694&B1F6;x#f2i4R1Rp%45!a0Id+_+N<L
z08n^8N8AJ8Bm^<57hz^J0#zCNsg&<?G^T<}#!OwBZUp|heCKT6oqSW|YjgVAe2<gU
zT|UkRVtV;Fn>94<qma+x;Bj5lB@|Ym#9=c|QQl1p`UD@R4%%gR2K&}SNe>g>y-TA-
zsdi^oF;TL8onJ6-LMu33w$x{6;&((s==pO*hDa61d=HT=YShCF{ac7nr@tRkZjDkM
zY%{iMKjKQvTq8ft9QumRrK0gl&enwJO3tvLIgD&e@)ywOeaonCM3?d_IXC(a7oNiW
zFcEx^%5WNeim?(sdJ&QsyC5KX4~H|<_c@d1aL)EjN-=WJu^;*^!N_IN9zT`xX|^gC
z6O58IlnYagl4azdN;FDX#K-@UWR#5X@y0*X)hPLn^84T(A&#M*=d$tf$wuxg#1=_L
zZU{QViFk;w48AtFj^IA0UUwV0TWwz5<L{h3yyOmuJ|NkT#K(0tazCPlS7E#h;|Osr
z`9?73k^fmUa_>+-KscrTCglw9FC`ketH@s_hEk4_dN1JcD)>9#^gO3i_mrHcxSD3z
zEX0Qc>jll3YLt9H<Pl8nePkOZ8zm0%`3ww8u=v*)2##btn~9Ro;I~0;2laQ5A3=F}
zno$x!KAAG-Y<xrPo=49Yd^+VnDT{%4(V-W(oHIQowID5`QPy?wUm@O&c#%fME>#?b
zt;B31KNJwZfzdySSOKH^0cC%1cPLM$6M`Q|qa`%*Xqu5bjxrmB5<aPjzXgt+Qv7Mk
ze^CD#xGlst?&t-ToijXB>AW~99WdXO`UU8RP<{?xEd0-D*N5_2j+Pj#hNAPtT*_Rk
z;#1HSo&KEqPidzYyv~$g#dj>_<!+;dvogLh$K+?q-;?)J_7Hy&N52YFuw(%(sI<(r
zO7y&hwN(5;5XUL+riY8kcOyPT*`8sPTqIUcH|J0M33xA4{(#|qo*2hrSpxng>LV$C
z&4^Sb`l8#K`cfhb+4$Gc-$Z@wN_s(6=ktTNqhCl_V?=_9b?8wXxSim*bj6P&FP<BO
zkY7g+s?x(b)Qb@^h!4~LZt`E!(`Doj(avhhJjyKL32uB>eCYWB&Uv1a1Z1B=ZXvWm
zzz&h#hujau8PM-i??sNwSbQeU@d;G?ee$fX<Jp|&_5>Hgz_K!qzeDqzi0{(fl{9;T
z{4w&UrQNO6voVNILWi4We5sFK5GZD!ctF#DsePE_B0TP={4tHx26T+Zk5Yacb7E|N
zK}1Y_o(Pn%TZwOqINJ#G6sbaa1+zyyjypgf#h5n*Ux!$PIq|q-CglXmi)m*LaU{=1
zsxkYxw&WI(|D5_9VNQ0=L%VDo;(5MU!u~#f6tNP#YYYy{_W1SqNP#z*`Z45RrhW|d
zSKzZ@j}OGpUhq5Uw0ItP0{oBEe~X_$cyALw_hI%`cg}8JBCaK^aZ&*CI8M(}ejTMS
z%B;|HYtq6T7<yQc^JK#k8$zFIM#&K3Eeu>C{T>Z%CXS;cMWHpE-?=<PIC_ztyQxF?
z&?>TbraFWN8<Dptzo&R3_Yt`yMV53^FP0FK$Uh+8o-+HJ$OYZiivx%QyQ@PEkU!8v
zy*SFp8|lhYhj5pOY(hP=AhIM&y?BbU1HC-(aXr-`<EigfsuwhJ&Ky33b2st<;->)}
ze@wl&nB<I1^&+Dl`8JL41S~Qcu|&!p!Sw(afe5$x$jdZFCnJyaP={~^MIIx+99#<;
z<BW{VhObk9gtDI3*tya(nVP3z%u7{=Oa<{L_IRKYS%>%$aXhg*j0<$LDL52{jG<f|
zh3AMdIJ*f>@Yt3`o<WaKt0Pb2JdM~1M|=PoS(V6Pi2M*fH`U0G%Rj9Qbx!r9Bf?Xi
z$li40IHb3*9fg^yh!sQHL*wi^Be&6LN8&6Rn?ijFIANv;(Ngfq<UgT45#IYmFL46+
z+cbWHawz%v#8y4ki!I<~QeG%7X-zo2-b`2Rzxa3)Pj*#jCD^@*nHlP=Q8sVl>QvP}
znz%4UwR7MSf1*5)zpeFtl4=hC_k5~4E0O%`#A4zr=u99#FkPK>p7<5`-PAkaJHTgw
zdjp;I=-(i}AxWJzN6hS|&e@JxJO)mD8-(s^=4{<&)=mJ;)6`j9=o5D&tM&^BT!P3}
zHgOk54^sXR0Zxs?J2b54HDmEJOZR=)8JgNUOSRVp*fc}6e@lE3!WamPh&pi_&dyNp
zrThW{2WdA*I7P^h`Vln5XXS}KiTZ?xol8Bl;-R$w_BiQxk*!Pm0`U99Y&y)cB=MWB
zs{H}D&B%@<b|LRYwgSy@bQ6n+fs}utei`|y=#8a*67{~+uc5!MQXZG0&f>D3s8Qyr
zeBxVny`VXlpuP<J0NPv5cnJNL*c(ZC6@CQ&H}XGFR>;3V`B4U*yMN*_+T+YlY>93|
z@()qJg>pB_2PuC>`wFopdfCEHmOATQVl+Ii7>P&FpC)EY3uem^Dt{%JKqHkPEhfpQ
z{fVzJ2n&fZh;vU#+)Vuu%DxQBYD@$ttFw+$zXyIX^(WF)yC^pS$IT?M0iEptt~+LU
zu94{A;IiUP{1tqE;)~?F+cEKobD8Hy$de(LQu#Yzf67-8Nro&&v_Cydg>;+|j;1Wc
zmVzHm4_P24cA|bd@e^>|91?F(ZiUXDbV5w=4n&>;B`&8vjo7^Gq*admC|?Z;b58Kg
z;sTXe0)pF0VpVK@ObcUikPqVo!^0VwSW3Au`8bA!k9QJJQ=d)@h9~kpyQ#B2BtM8@
z@KA0<%VLgSW)9t=9E;-u^mzJZo>Vu9dE`C6>jf>HuO-iVn?f;_y!4Q$BAN<mGmY(_
zkww%mpnfZ3--z;IhC!T>d6-=XZwTgAfPV#3dR{B%3g2@ub>G&`4vtyA5STf$^aP5W
zS^2G5{uE%d6U<*wX5E_jEH=fpV-~<T>VHR>r`w5Li99X(t9#|${6CDkCma*ACezX~
z`Xu^#5l7*ax6^lVvRtO+b2z9)-y+HXAGY{^<BOBAjdO0;gfundJvKp)rK!bY6V-+?
zm*l94>1sxAHajlr-yt@jthamA*)C`=h#er7rK%ZXhr{H|(DNVV>QY=pk*h=Tcf{Nb
zHA97XFhec=h4KPu(`l$7_4}y54DJSTA$e{@QLmBr5Syf{#ctxK<Oh>~g%}Ml1>VEN
z7r?EgzJ#(JT$ubo@Cu*1v~^B(6mMp$b{3HCYUgYhwukS&1b&TvJOz9>@?Rk{3i?s%
z>mlEV`p$xp*Uq`#R;&j_IGe>5bBT;frq}EZqOQ_MPPC|Tlv!Ixxl!RR8}%7wE-_JC
z$#ZuUCX25VI}-K$2v(lO+&rT;lU@TX4BE$(2UF%?N8QH6pOkk}&k{ZA8ugi!uTbtm
z{1|g$BmOdSt%#3M=KdK~A6^pWso(?AIY6H4Mbs`LTahSnP`N%t<yF)R+H<ih_Mz|s
zURyCX7a587DStqji%k@#YH?>oxjYtkA<iPdpZd=zvoek1vYc^&#;#D_K>0S}Rf!sn
zJHb6no>M+*J$l=P9{B>|qv&wki*i=b3pzL#WEOJ@M{$aqjn=2pCrNGt%vw3>4B+YH
z6Ns#-qntGMI(e>389!iJOuxgFIYBc%E#npI*^$n*(JfGTj2cd9Q#m;q`!M`LK12Ng
z@?4psW}w`c7RFQmGv)cTphtIfHgFVQqrqOl^uZmSD?>K|5YHlZ(KPP{MNP-5n9z?B
zgFwDXy&l@h8QN|g$ag`sN1A<zIA@C6Qf`biYj|@utfS0*JEJLzOmSvKC+EE3#Vk;w
zI^g&b(wqSqUm*NGa-T!vd4F+VA`6fVF&3LCucbVLqrZ~qNl}YgFh}j9nGuv*zz;!(
zYib6&-KdRp<qhH|!f0NUQ*T(@i1Zxf#0eRX4C|aI9k(Ni;zZO#qn)Ke;~>67gJGF!
zaRdU-!O%lHJI{LRbyu&Pw|TYI-PPgah}$yM;mzz`Z9%3we754%>QX<}$E)?vRIl77
zF6pKYe~J3vvee<+n6yrm=fW#RhxM!W+GFaKD9X#=$AiC2ektX-J=NhHEbUg7dWBoR
zwxgSR#b4aAW0=J$6b!&$ko=V7q@L=P@x%?3xd&-+nD~kE`;=pd{V1;>9(qh2&Q?l0
z44;XrwSdP-t+l0b_I(;l-{G67e-CqCQO=|jufsb|JqKQ!i&?frx~B^!P9g3Oco!hn
zUfO9uJXF$d(-`XrjZcBDaI4nt5eE@P4?id0kus;QmIjZxp$&!iHRUacbFj5J<XN0(
zCos#6SW5->9eQ6eGGZj=qtALm_o&=ChED?VQ7`oh=e70%W5y0pYXf68_TR-J7a8sQ
zbanV{%I)bEcTtUv&~V05izDx%+>`n)v~UB5y(zaOuamD!{EhOjL|!V>`jHpI`W=z^
zuU-8`FNk$6vxVqEan7)?f^@Zz1ybCCR5j!Tu@+@63vpaf3oj5SWvGRRh=-`x^WvPk
zBZNh7+-cytE1m~?-vQLa;+@Uvgz#hYala?4A+vqFao=`T3%OjyrKbVRi+8SV_g0o#
z$fY{&=`1xQnHYg$b&y;mLO2w0YeD{q!n4Gg#9e}!kl+jrs-3JBa>m3pK!GzPZV8_B
z`~>GJIuWcnFA;p*;GRoW3paurPyQQ}*Td7J-Ok;?%V>)=YFt9P8u9}QzG!pNjH@7&
zf)kwyzB|NsgA;iw_ysC@P?EFM_q6D<Z?dyPU`PU_{g7Gi#Z3{$Qj>YY7^3GVJ7)(^
zO;HOw(=M0ZLS}B<eiXRZ#r4eq7@Wc&g$m8+6zA@CEHn$}gPD`Uc*Ds8!(oZ5iTIB+
z!gi&Q(>yMkMtE8o_pTV;c3pY$8p4?y*OMYAMck`I=2_gM-PMq-#7@W+<#%<i@M*`p
zrH%LUk=e7U#v0aWu}`KMjZ=vSC|{+%eulATAs@ZHlV+^hM~nxrC!{le(?I5ej0JIt
zc%8DIpYB}Ra7`bG)e#s4VQ9LsW;o?Tlp7HFuy>6fmf>vY8<GKGaE5bbu###t<_;XY
zoE+QpSU%cn{1D~&G^z)8b2jL>hRGRwkG6XgKL-{8jE@V~Oa%4_^>wIko@K1bp`1>W
z3gyj2eL^>uh#Q1!SEjRB^EGw(Ja7+*O$hvfz@reJq6<uw*e{4&bYtD3-Rw*b1y8r=
zRHk!%pEc}lV{br=gUDxgjk!n1E<|7-aVUl-QO^q$jd?5=%bOUD8_<x0{A%)F6I)P!
z61g8K-$!Q#<+-MOmUCX<ni7gtfN|T3eO<I4)Sc(Z9x+DI-8o;@crn%rz&QAP!n-Ds
z_=HI3cX!T9UQ>f&Gm0<5=kv@pEJ0$2bvM>jBK}OjUZH*m`LBrdukm}-^MTu%!SMJ%
ze@$!Zf1s?V_HYIV@ieXRI1qcYjK*AvVtHd>jh^3wHO`t#6#1NY%_{nLjB3%JM-b-r
zAA5!R%H(;P*7#5A#er-O&X3618q2#|YuItbCNMtph`)e;n);tPKKH;KA@YDJb{-vI
zGaEY<{T$*3`1jCX_&_h{>6}?-%^tw50RIO0Ex@AzO%#I}{FpN=^cCdWP;(0!SGU-m
z$V5>dM?E*!Se{?4(eoa2uJBB!=6y6G(GnxEkFtx%ho_B)gG{9?4%{}BY$zS0t>)x=
zU|isuf#by^@!@=97PhhHXlXCy4R{eFx(oa*>R+WCPWb}3-o&RejmGH;SNLAe1)e5|
zuo8@2hOO_2{(yE6ONnAyJWi}Z<K+kAe=5$pC0|1C?acSA8ANyP<DeCGE92-9%Fohr
zLmUK9W=S0TW>=%}_J6?1&rZFd4^IoSvW#Kh`6%j=ZpJX)7*R)N8J7cXidwguarr9c
zWtqmX;dVvk&9`A^sQ;4ueqs&EtBFk>GcFG$9;N&=@m^2kaw5Eh?#AVHl=&e3@}HEm
zGL6fd(0Q2puc&_%-8U6Q_w;ox_FV3ZgMjYFu+bo1MX5E2X_TKLswjMgLRVrx@(DeR
z%O6r-7X^OSP<;n{8~8TjPT~OSXH)++_?IYW({c&;>O@|!QR8|V!}yGyw~)(zeBuYE
zzOWxVo69yxcY7I^d8^eKn&5fN7`9v(XJFoi{uRbQKyE9tA5s54$V}p+*gQoXO!KKI
z3?r@qe*zo&gly;1K9?IqJ4KNf7Ss*29t%8(ar_-P@4^pjM*VGM9nenG+>6vVB|nh*
zc*b`o_~+pBtGv{1G}o5;B5-<kf9F~cSAYZieY_EalZ*o_G$NKI8wa9^zhxL{JSU21
zlwllT!w_++yOEYnY?@*m=nI}V)Y5`%-iYH#M%pXXABOiUy!X&yVII*L+<MBH)F)Fe
zM2{0L;%VwT!S9YPS6baOJZP~e?FcnSSL48MAa<u42bL3Oz{nsvDKp0-c<7zRtHTkW
z!F-W2?~0}IST^Eds*xsm1E}W`5b-)VE+P@LiCm*1`l8Ddv<TiVIKYxD;u;PvQ64Pc
z34H!Hec|IgCrw*Oa#pgD#+o>SYf~Cm=7`IX8c@HAZt`hh#78t*gEAMuv^Zii_)63d
zWEeYu4^J}=aK(-o3!WQFL=<^$G!a4YZi_OFx2EyOFzXLL6_cCEk7gg1!wI#8`YzOq
zVdvHu5d;2r@Xd&`h<vJ&_7?aZ4C{FG7Ng6uIbs}sKB3Gf4{7z_JwbjjWj^Oh(}=nB
z;~e!G{(d69NI8PYi#};*gdR9QaX1FDjONA21LGilj0uiWgoES|%07^}sz-1iPwP$O
zZMy^PWg=YEZvr<N6E9FM#VnWYi2jt{BKAh_9pVm-#Ea;}V(vQlUf@^^M?_ODpnfTM
z){44kl$aC2)NmP3bAud-13pxW;QnF`*H_q&hItug2=(pAv(AbrrTeTtBQ8@G!*reD
z*+f1c2Tv0}CEt<!5uD7ToJ0@UrA4%5SdSCu@Ox&&o1)up>I(-sr`pnV-!VbWGSe1=
z{S!_cu{*uxllh1VwDBcv++~zl{6uUZe}#Mup7=mGVh+9QK{=Dyl`?05IYsu-mYzQ*
zXr3o+BIzA;r2}-{SxRFiAJGdL9-Kz-lHP$c<oO8YKt&=i)f^Z?eNW;xVm<1w6TimB
zcI0c(Of#YwM1}e@%sd`LMD(PKJmQI%P1jEm2cr8jdUNjU1w)u|q3~{j`yHG(!I>#(
zV<_K3w-&gM;nk-8BY2sVyMh0Rn2rAT#BumN1fEZd(pVNp1jAzw5wVi~*x{{#&t5BH
zGTnbb<j#GdC-E%2B=M{@ml;PV4(x!u4UtI5Z8^?tMa`>HI`z}2=X0Nkd(?|*{1A;y
zqW(ujdJwC?=flhhH^+M#aVL$7d5}*0n)oV>9whR@=7Ez8&O!JY#6!ZI4HHA1Gksl%
z=)U8FLVedW#(LQJpkrY_@*T_R2-XV+&LhC*y$3c>?!sssqaks*n}~s(2<c%Hf{r~u
z!dbUXE)V!)Z>FmyTtQ<qQ`Fp(+?(%G-bMLJS2dR_ENe}*gezz)A4uftsUujtj?7R?
z^t=(Qe|dUdqWg|?M(gUNpxS!xkxnCF5BNy%JS!>TNqlVU3^iA*&3IUzdmG&+(Qi+=
zHF^g5JG9FsEB3cEHMbf0{@rOOJI}ekPOeydPXOKy*d)?xh>wXTgQo;FXkM~}^hwlj
zP~C@C8310@Q%m-t!XB$+HRXV0;Dbl8qpD1e9yFRIZ!VYHScT?!{u}!Z)uSNr<f=sP
zL0X?SHRw4#V2rb?4Ig8i+gv;YDftA*bb6!*k9FSYnEMFDoeYpzv}{8`EYd@X^{`||
z*_V7c@M*-M#OB1W7yv!tNoQ+)e11@2o03&<I>70MhDNl3c@nPJ-K4?ULmWjpnD{-t
z*Au1(h3R9*IhX7989_Dl+T)#%=<(va-s7FMeR<SYqE8s_TpP5GK3_)kB%;E=5Ycd$
zC+L!1eWJ65@1Cvz!@NPqQWs_!w|U_%r8v{LeV-WE-I&OGg(<m|?LJD%>y#@J*Jl|M
zLv2dR+HS_gE5t?JjobXFPRc9bc+oFq3^+Y?5|6<qMpHb|%ebvin8cE&7~rc2ur8Qb
zi12kHf7>JF6b)~tyc%KN^h=qG0KaK9r6Yoq5&ofvaXXL58-x>csJ96n$}`CG4r59U
zbSe_3Q~wM)!@voAAL^sP59?)2oGjK|lX=iFaW5_U&_X=OR<wK=WIGrQC?6$;QC>`b
z9%WW~6Ppk#P!30F3@yf!-wy5q@iQEEr+yncKZ1J`2d61Fqh;P9Oo{Gk+~!v-rZfhh
zK)Wthpi`W4Jv-8j+jlAN>So*)J^dQpv6S<{%>v&LoypYG&g}-&SEHS#l%43kL!Obl
zokIKT@#P}6hBu117~Ff{FW~32D8v7b@*v91;jOqX*Wy!II`Cd<ICt%s%1Oo+W^PO|
z<qYD+bYly<ikRn8jBp+y#1v&1Th{OrF`M@-JfVx>n!LqM`D4m_${BN={9WSvlq;b3
zDLM`!yPhqhiJo*Ld<U^Dy7j@0pni$Ce9a765X8MKoFzrftPCT(De(o8dT>Eda6)pL
z5&kvsHOP;rx$elUB7T#E9Q8lI3?Y7&Vr*e|7{eWH%XSpHg5y>c^AXMI`O|nby5$+t
zk@T`L?%51)8A<vLU~DpD*x!ZMA|Ff~0zDcdODXq9{uKG`$gn(!>5Cnnro~jH{te<X
z@GK!?`0<Hw&fS<A!g$`SppDrqB*M9+#zb|+DA7raTy0}&g5Xg`%!e4}o)Ys1aWTvS
zjMoO2fzk}>XVSq<#2d6!i}*8qc7HLvg1beZFr80Dw)CgS`>^4G$YxPJfU*yUa*^pn
zJU}@#*$9s#|9}o~D~(A7$D`$#w#mkpN#K%c<`$7BDqD_G|1i8LaD9oqwh_Z_dkZVp
z81{+b`_N&{z2!sdmD_s33~s?q=w2xN+vIDK??$XfWGx<koqB$%E1VAnVot#80q-Pv
zAL`xc`hcrMo?CtlKi(1E8eB5<JP?lIWuz^?k?%=4o^lC3AEF$NZx*33ZPBYL)?0If
zW_r3{miuzdDa83$B*qW2n5(R8x0FzS4wFqN@1v2GlwW5s(!qT~<2=fbc^>}9#KG|S
z)Fq}pgTZYu=57`yiDzka1^g$#wWJ&a{}6nBl3~jXu@0CQv|#XOh)*C{g`S@Q+ywD9
zl<Ok40`O(TdA?xI?IzSCwndx}lbI;2=VJIthVaYaBEg9fd6PJld=R=<iFcX0dfxn?
zl?j8W=>p<)y2maf<|0ZwDT<j$i*M7y_uY(eaSogUSDX5o#8S!%Ej&)SA9y`=K~RIh
zE#0W*c|o{{&yd#h3%EKCq=uVB%xhQ>2c<Dx;vJosd=5r)1fC_IPb@*`Q}W`p-9p3T
zs2A4~F<D~BuSGG5$Pzd_9)5KU@{BvC6GI}d360>hWQyrUN5okq;zqHOpT(1k^N@}L
zK8~0;Q7)6@`A$q<X2MY-2Wv|V_2s?&pNeyxVinC#8qenHcoWAw(~e;=zoimAd6OQA
zt6DGz!-2UO^k5_PYzf1~S^Pg`Lh9(AInK3x4)epZDv!Z3J#2^HOE)s765FL1hX+y5
zgUZ7%vORt|)i_+A?ej_UXUO|z7>D~%KM$UGd{K~OWQuL~H{h32&dM+{_58Wcr2`Ki
zqR7Ld!`wAhHgB0v5ziqb9s(o*7f(B0LxxSK8rs#!EJo&O<X$5t!Aql_$Et_L(~W*<
z#$n!<SJ%Vm{d#pHc|C6)kH3;&@*aiS3&lt>{Jh5DPeJl#|KXR2H<OIRP04d1Jlu@(
zR-9A@$4>3AM!83tk;&awy_8~PRt3lFT$#HmzlF0O$TK+)^P8g8Qp%#9z4u`S9B=Ap
z-WB)#`OcZa7fC84Pa$%NBzwqAenY>?bv#oKUf{eCy%gDw)O4n%3v#?Wm?><9z^Fm=
z!r<nsUZtZ!jDH{OvFuRoL_K<;^N#JX-fodIEO-b_i33#^A15Gk1I+vh0{vs)7dcn<
zP?0=MhHX)16=FvuPQrMevGq_MPZKG`ImyQ1--zu|V2h`Y2JfZGMJSJ?Jdd*I6jvI?
zda?5c7P6n_V}e7;G?lu<8RnZ|HoC+)uleCmLGp|`lV2;Kj)nOx3OpmutWMm8gJ+3X
zI2L-?Qr5+XqktCxyGY~`mU)r%5)A2iOPwo%#?r}jYDR%zsiZc<LQ&8&&eAG>{YvKW
z=T|vv`%HOQ`~u2<_!l(~eg1*ZDO+UG=?6aR8`u^W6*RE-3aBxkv)L<7Y5t6X_#Z!H
zH@sQV_15;j4eIQ%^LxTYCI7q?%i`lcijpIK{p=%-WE;QjpW3=b&pmL~^l_*O6Gx4j
znlZMGUM0(3Q-9pYRWl%d%;-S_C#deYRufve<61nbx6HDS&|CSqTz|pa>PxfiHS|qc
z{NCe<KCT9#dBaB!88~iG-te)4=T2&sJ6w@E@k!{!C$)IA$Ypm`wbf{mF>%7!i4(f#
z4IeZ_B{{xbi?*%w$9!E`Eahxo9}c`P4*3<Q5BT%we2-;5kPyESoL`k~M!UYNhrPO9
z>g(d8wF<7n7VZ4W@vjw_9j=657V>>Q$6q{E3a`N5<sgnlm0udjL2Igi-P7*!+>KO}
z!W8~+Qxbo75u#|d{2$Lx|D((WNm<Dk|CxW_d@Kv^54fCc9Lk^OWO#GSi9~_3mLKJi
zl!9qQu86O}d-w~yfAHtA?m~WsWM`o&|J4L&E$_#bhp+zR&-3+0O`B3U$fm3qz|RCy
z9}2$w-vYk3mWOcsYx4Cn0M2M-O14eu&DpY;hn;7|Ur)VkA)n`aYk5~C{)jPOM<F+Z
z9QnLW@#Fds$x#}>kj}w)`M-92Z!LevxWB{K9{{?*RX?#QIh}nJzmEK^Vz~ZdsB<KH
z3}mxHmJNrZOy`S^Drtgz-loi-ROZx4Z~6<e{Pzl6Yk4s9Ac8M351!C>^|B{bjsr@`
z{5Mjs+1p-U&+&5&vwf$((%W9k_Llx$Z+oI`wf?6leXPeiT(xYc^p<_>A^KE@tAn23
z$L`Q~QCoqwltBHW!_`3lppX6OfIzOHLRJpqs-n!k_B%E`r$5v~Id+$RE}Q@HU9kAh
zR}Tqv`Ra=Y|3@)+usuNEl5G#ty@TzMdQPw_M+gMz=K`7Ah7-n@bzA}Zp(^|@q$b7k
z|0U>Sv+aIGAyr*@v{~J?Soc!a0|rCh65vYGLn^y!>(2!6llNvL-pZ~ZJs=R7b3v|b
zsb7V<|2O($`(v)WPeJ0lcA_PhK6aS!nqv<ay$jHD2HR<KFh0zlly&0&LnFauoo56>
zMI+IAc(BW*@6184eDqvpUF;?Xm^wfGW<CCA<S^*8*;@$I7XS0pUDa@8_U=E%v9=yw
zk8v!l=~CtBc?a01=!MxFi9<u-nSyj8^gj<<*k3(%>05#rI9hk<g#mm=!{&(A7F=I_
zOI;it8YuLt0QrvrE<0>GhfOsPuR~1PN@7~;mFv4!`8=7SZ>sNV#BbI*T;El_S|Qu`
z3D@n_nPVYhp4Il~zt(rvuUtb>ygRrVpS#sCvZmg!forLU2YX6=APx8m;j1BEbz~H$
z3>2jnG%;_*by3u*e1#mbDNo5J9D#7SFo=Jmx#`q%zM4&+n^IOMPNsb7@vI8WFT>4x
zw{n0&J-#3)mHFbww-v7120^K{t()`xu};MUk@G0PMML7B5UY);2Q>P@g9jbz=$TJ(
zPgxw|>QNz`4u$HK8@cLwfQY-?e|Q)Ey-jS&AnCm_k`GDmLiP#iUG(XDyo=LU_#Gm>
zi~4-Li+cKEUY><@IZoV)_zOad_6!?*-R?05K{Rk4A<?jCV5k%l^&W&oeKYwbgoNq3
zc(e|<FMoo<rYc@rQ&q`@PvZgP&3{6ZpTGCgu(J10#)5KyuU#2_-DR(ict9Y)j6u48
zBUgP7K=aQ>oMQxrAw<tYM82+N1V&Pi5wrdWj0;nukrC3kz<V(+>Ra%|BOUX5p^b6V
zXe>rYNu!;lQAW#0si(imk!d+=Jb$taEO!bq`@96TkY_hzB&Q>sM^eg0xSsw~BUg3L
zY$&b&rYI_NJQKL+f`A9hF7P*Alx`N8CnlE3G!SE??BA~hD5?rZ{8iQN;II%N_@mHo
zBlSasC5b!7wjfI!Ofl#JIOuOdB?OV47Ln12wEbIzzo%_edRR=>lP1MP?<Gyv;LoJ|
z#KJ(FT@A~|Liu5Ijn0kA4V!lii2QDU<*dW5JO&YPfFR~P&aDfgql~OH76S4XBk*3_
z_pvlIf&<EkDWT=!^wLY&m#su^aOjx21JUh3J(n4!iO}>&U?GD2q+mr1+?39n3K0Li
zAAB2)b7en9`%0q%*B?Rd<EBxt!l<I6)!(8u{BLv0J$3@w(vaYL1|SdxNjg$;AU_U(
z-h>E1eGf_giACE0DSC&)bwe8D9P^6&q~J@Y!JQb41t&&Q;QFH9nY<WDaXt!z3T>!m
zS{@TL%;NAC4&zCRArk`q5a<G@++YagIP$`v$TyG%#lBiA6Aidb{dH;ZVQFyDyTzu#
zjTlS-Ck$Q=gxre;Vj&BMfK7*m#Wp3;A~w`wuoQ!Ca>Af!s5kt0c;yBSxXI*&L6HxU
z21PzeYJNcdRbf!%noEa6)%r2w80X=zD?s5;0K3wFM!hfyw(J<wNQy;lkj3DB45pD2
z1_eJAej>bbgE??h$qR!X0ca>4+5u%q(cRSlD2K3xG+5!CK+|A82GhX_g94XM1IciO
zL9nL5kb^d*jYVs)#o#dv^7u{|^q2w}2#C#7xkD9lTL8kLXrPfaNLhJQiXNiAR2t-3
z=_RFfuK9;Jzm>%pi~uJL3S1`oY19jY;<_p(;kg4grQLN;*;QYFhFBb)#bJjsho(Rl
z0vV9X4JsIHFO8gqdrcZ)H|GVWJW}1y9I2HU=_ri|Tn}_I$qOT5=hs+ttPm%W7O`B5
zkqa1!l179;PXxNdDK}!k?IgLq$)A@-xC?lJDNAh*(@3eGUFj^12wYoqdXo1DC*qbN
zh9n2EXp2}si;=|`=^~8?fp!S=f>Un93pYk`2b2Fn8fhhsRLSaU8o7s&hSCVQ$CUO6
z^+8A&0bDkD5DT%0Wm}A_z(`|hL^RX^em{#56>cLvu(hjkg(kohz0D+h16!z`*2>k`
z(~KG%2ZRQ2{pEm)ySmuSiO3#KLCgFS7ZY)F=FVtd_jCB3$)tqrwJA-}5ED~qn8%NL
zfo)DbXHdXcOkI$@50}0FEjv{7eguEd*seSbP|QpL>`03PA&cGvE}NPAZOS7SvHq6c
zucY@a$bmQco!}3GSAG~2df$?~q92%SZ{P{zThZUzcx@>AI|TTD(_f{KKDL(4=VS4r
z>`NQj7ki`O=8+7hE3LqZt_WN-R)(T2x&l^o#a}6;FM+0?Q&aI1ApO)AV<-I#|9|k4
zgP-c~#Fz_Ss0RZ7L_5`}7h^8kA7B}#wG2}Ya$=Z7t~1_7GTZ+eCj(zWQd9t%NKtW+
zYe~`h)R)N4KPo%Fxm7c>^I>$pCOFY~f$Ks8qogUYWv3Hu)UjwiK7~f;g;*sh8*x~h
zq;Mz%R0PIQUw$CHkn55cy%zbV(qJVp4@uD#)L)VYSsr;wDHDHgYZ{EkU_EfcpuokU
z|D@0m!zR{J93CZvMuIJ3IhG;ZhQS8pgh3$?kHC01<px!_^~no^BHv6J6svce)GVa_
zvNYHZgC?Z7-3R7HIu(ZsK;cjTH5y<IXc`1ub}H<%DRzt4K#Reh81x}03<|y*K5hMF
zP{CjY^1`6VKkVW7i+^H;?JGq$lDHxrvf%TI{4Y~N%puIiU?p(Epui>3z*O>L1jXV|
zj9?)KD_XRkuo&EpK?gZuQ1Fu<X9`Y(R6c|TWIq7Hp#U^5^QZWNaZ)eN+!oCEX7WrC
zXYGG?=Bl6D6n}WY0=nqmkG9vUATCUm^{==Z)^JJ1a1@)DUkh|wxT|jS_M+!<Cz=C8
z-C<LzN_czVkCdOPCcGYS#mg@tt0E!MKw3&je-lzuKng#!DL+X_9FW%Rro&y0dWxJ%
z?j|{L@}9-;Y>olLV4fPgS^fV)PK=w#?c~Zln_T&@g>1DcKTB>!_O*{P<}SU7$KJS3
zX=S@|n-tgY0Fi2(qbPHsR@Mi4?6s;DZm}u1EX=p2=nFmehdi!8yK+aiC}^S3=0g)L
zvN-kzuu2#1ynGpG{3<o7fo+u{_fzB|*&zN#j*~P%<mPa~ETmqXwBl)i0{5<kJDB;-
z`QkAj6NJ+3KP==ikQYmG2(fUpP5IqI9#Td&xevqr)50AKcZr2-z`ZBA^)R+u3>o=o
zvrqbJul=F8Xb#2$Fk<WkCN_r$qF^eQ;glGGm3yF#JWn{c$)?=5Xda)U-$$p${#thB
zd+NpE6FPC|Jgc{yX|Lr`v2jjPnvwck+K4S1TIHpSYdVX~!b+_CAgu^o0un2z7goRu
zD~d|<=Pg<%<eArn+&wH@lokYE%i%9ci$fp|sLEF%<O`DA4!|eo6#9N9Qz)Cca7nf;
zQcdB$0Hv~CuYfKn8*NI7g?UIe+X+ZM&0dks3TiW`eD+j6jg%a?mnByv-^Ma^qM15B
zk`q%$<mPe&SHdlyI_Eanl&j_YWdX>w9R4s4|1}HwNyx8CatP52xzy6|PzyI4?hOlf
z9Na<+w-D}i$@Qzj70O1Bxg?4$iI<?%dgOC+V|CWwt{es^E|-2am^)l9SCbc)OR!}(
z9K;U&C8j7($<YfXKbyVsQF3D52>vqoYpEB<tO;K#+#?=I{*4h3&;ErGw=|L-A@|4_
zIWCO|+|vlH11F4tExWP2X;Y3_w5D2&<YVNdG$Qy*;J*P+81V=v3QkT)@>Kv2<P9WI
z8o4^+PT5`<Bd4Shfm?{s2I#^F*s>c4M!vRaO|cl6fRQuOh~O`JoJ++<h~-0KKt3(W
zX8_!nMv|nF%A2;7?RhcsjWi-~^DweW8Ubq>QPDbU(VA>Al82G+Od}p8KnTo7V2i~_
z4&-ko`7QFlOC!nB$jR7y<~=_TBfF##ft!ZTR%rxm*`ayErWh8j@fIVg9-Mq;I#B|I
zKt2N7EJhTJd@9LB<o}RHQlycVFI6$`?fDq_TpAI$8R)z(jespXH0y23Zj06gjCjOt
zK+M)282LgP5dt0rKA`@uLj!q_B!5Q!o;1=`8o5-lscB>}M!u3p1a2le+tCqo1gvQ!
z2dyv5v^)U<G|@67*%;X;jfh+U0y`~6ypZ=wBgN$Zltxmek!eRdnnqS)<bX6HaI?|*
zSQ-Iq8VOnN;aQ(WY?8&uV2l(?BSK&f0*1wiijjko+m-y!(ny*#QvdM)(?}`Hu6Lvn
zfg6O*r_xB8M>;vT&Zca!h>f%uabe_LX+#JNMqszah=P%=lG}^?FVaZ5G;;9Q-lmaz
z7<o?`5x87*YV46t0GCY?jBL}>qS#y(f#vWA^kuEn;eT1OeF+@;Uz^JceBYWcFF(@o
zzQs$Pr5{1`<3n;xg#gk1Pz3hE`D=>N^arxd^Z+gmZOfNwfW$iSziQN=(H*kUK?vWH
z0|4C%O*zoSX%2vkDX|@#9|i<&I0g=o7jqMA+1$j$N2V5+1_2sn=|M0(*hy0KK;%Xs
zPz<rW2VTgZNF!s&-<C$YNhABG%`uGxW8`CLMBwt!IV_EUEt{Jj#9p(AjkXvG#Ymwv
zA_PVuaMWT%g}a8lIDAPQzBY0shk*I-MlxirO<675^h`teSJ{J1*@MFcd1ep7=)vm%
z#ZU^|7z~_{JpgO=;N0u{<yt9bVk{j9qXX;7iIEh!vGBjPbU;~W^C;^ic^-f}(nyvx
z^3Kt+O-(dL-jqfJZX80Vr4g`Ya~dOWShU7kjI_hZThfT&kB5KOG~y|{O&gFmNb+(3
zccqc;(#ZYI!RCmhV&rXUMBpYO^sO`kwrp~twb7#Wq{T=yMm9?$f<H-IfzO#v%8Ym+
zZ<6Fy0DhB3dPpNRJ7kzfvN5tu8WFfD=$w~Ez?Mxew4SkOjk6d@z{qlPeZ;X50#gyV
zU@@XXeine3Ts@dv5pr^sKY04DPE+qHoAMkyMSo|py+Mr?QX&V5j^*=Huh+#@*W8fh
zEVn7TgyaH=l91tlLzHK2%JUME2c&bkk-s6vGMn<Egp32yMMCPB5Rcfpd4aqDL~PxP
zYjT}uT+8o@`&mrBOim1i$Q>KNLt5)lEVL;vN$zjt+cJ(W@;&7-2^gB%Ig+xAt@A73
z#9l<;n&<GSKx~x64gj$EnTIjormVDxZIlN^EX9La#jlb^gg^@fZc;C<JmTYyb8ugk
z+zPdLoSE|XE>2}BK9d|v?*NgT$BOu8IF)GM+{T4K_FBlpCjRwVn9`0Bm}Mc4hkVN<
zv&knK^K43ig*)8Bjf6YL!kqy3R}1%?2l8x54#3$SIed@F;WHjCHuq2WI5_jbiQyBt
z#2n5;gv989HTO>nT5~O0C*|mMAs<%Tt}GxY4x`{F<*=lqzMdF9m9HGg^CkHq0AC0r
z`mUw+>YnT%yRt~OEmA2tEPkMfwtMjv!&lB+o3hYC|Ju@aHf=vG+ZOz;Ijn2pls9d_
zT`alH$Qz<*^1aK(z@wb}>CMjZRP1C5ER{wCFbpBZR%QgSlE7EUY@4#gB6e6B5mRU}
zySXQClnsL5x6EOl!!MsgUbth)i}Ro@`3~k3Vv5ug;>@A(B*h#O$@v^gU&xfpo&?OY
zDdQ~U5teBX%`}*3;ZB5GQF24z7Z&guf`vQM!c{$xCtJvqAp2R!2INVS{0GJY=`lAY
z@d(~aP?EphTeh2X+Lfu`#0^W}3@&Q^=rASB$MubgDq2%4S_{$glwS&b1MJE)l41@C
zfn5l=5D>#BHc>f{pOWNK@&VFFUumSE-UZXhevC|)Mg;C7bgD=rV9m?3f>wT+mM1`f
z7Rcim3g8~sXOA=@avviQU@;N`d4@Fd4f(3l2p=kVp)0+|v@-WDJ25g)8WFhd=v0$N
zz?w#knJkcVEMoI5Ms8tbkTfC$b|4UBF>-FEO?g6c50bAUjbuwB?HdP}MmAz(h%_Q_
zMd*n4v&5mv_DCl=hz+)g&9fLO#mG=;L<oF<Ky8Z=FWg+oeU-ewG{Ozbi@f6h_!nin
zIgAXKMg;CHbUY79C*tlVCW(UBFpJnMi;?{p87YkjfsF_RTa1Lj9U-|J$UCJG?kZj(
z9og%!X(S&bqsokU0t9dqLiME)z@`ze*QVrI#AaKJ9K*;MX+-ch!w<0-DTF&(a^ENK
zl16};Mr`}4nrn&0PTs7|EOQbdfbW24B#rQ`X(Wd^(#;|^$719xM!HKQg1;3~sKtmE
zZkFVJO1`o*k|T{|doC-cla&lfPiaH|w;|M28Ubt?2|=ufMQpCc$OVk_l12poJ@^k>
zjHqxQliV2cKGKM1pmfr&@>TQLl(M|)BaH~)a)eq)BVbJ<=lI>{y)9zXEJhY%q@Oe*
z_$%OtS&S$c=_|P@wYWvwrIRP5kr{<=m`3hlq`x!*P<bAqR?-Mq(?}skvi}xS@-0SI
zVB~RWMDTU^;T9tX+yOV_2g;8D@RddeNh8l>959WzI5e@+h`_yw(4*1_*s?i-m@173
z%yf&9l^BVaMnvu<`0dJ#aIsLlkmDrz2>=zOk-^f)rOGYMITDN!O&Srnl?b($M!=TM
z5wsG(Pz;DOKT{Adz$%GUaU~C!!WuDFt`W;OOT2#nAA4PK&uh(BNWM*RTWrm+^l2@9
zN+u`n7Q)IaSm{*WC%7s$lFGQAfYAUd%0AJkm%P$re%S}zVf3jhKylv^xK#*6%RUK%
z;*xZ3IxkO1BLd^G7}<!CG-*WS3gO3Cj3_ghZ>a!8?`P2ac-i~m2#c-af7SbQ(`-tH
zZ1gFFE6EPz$_`Xq-O4;D(R3gkoalhSt;RsC>;PE#$^boRV-fROda#Wigp(6PDFoIa
z5O3*$0k^H>&L!_BjSQ7W`j0Bx7o}q4QE5crUPnhm$9!b~aM?UXECP@ijoFNbCP$-u
zZ}VS`Mj@lo&SGMwr2{+ZKnHT71ES$|G@SHz2Rw?GM%zpBvj7~j2g76!>R%5xhccTU
zbOa}cQsCY|C`Aq>*s}SJR-{F%z+z-KMxvw<!G9Bes*R^|<!cxfawh<y_Y3KLqU^nx
z2mjq%2$^D2I>S@+KVM>J7cC{8K_Xd74AI-9$(`M~Je$%*LY@cGwS30?4JiZ?BOxn+
z@arK=1N|+K5jLfvgscXVQSQ}*m^-^1AR$2Pbmvq2&z=1D&h8lN%EsixK@qu9ZmT{1
zJ}An$LA-V$xuxXWF^*zc&S~K#D4%>d#N641v9oIePHasCu5}Ju5}VxF0XBDbIZxP>
zP>a~x@}P)ic^J$3X3~fd2uGl|G$M9(Ubsyq_b&NPDSz+ml30~DC&v{jK;#y%IPMGg
zubrJS(55_WA?I0^)(MQjBNp;x$k`^DOB)U6z-?jSj<Rr*;I_1Ir@$Rx;d(ug!z9`M
z5RdNU@Qskew|D%z=FV;>yQ|jV#PA7R8okd!NbIh_n!Bq)Dq2~zPRY^hLVhv3t3Yz%
zFbaM;d<IPMFon&Xoq~}7Nv;gwD`{k;G;%ooHPc8iKdn?<8WFf|2o07-z?N+>F;dN<
zb=qQNCq`;WBk(;+CIYz#lpD!`93;sP(ZDWgBu^TN*}Ti#<^@-?E48E%fqN94VbTa#
zbDL-Mw<$F(T1TXjPUKgz&8s7B+hRzBKwAVj5&qieg>ay1OL9~4om2jvP^nC)y5z)!
z61jyOS5C#hwt0n2kcTYX(Uv)p%ABZI&h<c^3VF07hu}<w9Bd(vv5>n#Zct8!`xM-<
z7H$sQ`jWege0_S%Wl+4M;w30O+uSVsWQafFV*}?A*GK`p#5HalLX3v_LW_C7>_>$~
zOqatK3MP+jo}HW+J;A>We*$oE7{xYEoFYDwyNvup(#U9OWb%j#;#%!d@^DfCpfDnU
zXAqhsjQ}<;)`b}HwTLaZ7)ixQC22(P&%)<2`<D>|ZbivGM81JEGR8FGVV|<Z&s?7E
z#E3&05x}bmJtd6*F1zg^=4TOm!D3_rMx4@!;Qt6e-(tiI*I#m9AzxP*p?zhn9FkW)
zykQ#2$4F&qL;$}<h-Hv@Xuz6A6vSK>u@x30Jup&L8WH^O;Cn4bLf}@BT+ep^LgbJ<
zDUH-K%-5xr{TTU48WFf_2o*>pU`-=luA?`ln83VfF_MpwU!)O{yAGd+Cx0E9!oEDb
z{#laW1Q0BZjFU!ulrZy#z8E97r4fPq9w8nFnIi($G@|z59S@7va~31n7`Y>j2>yBa
z^Gzcj`A`D#uabNnKtpMSv&)OV@<*+f=Ak)@k>8{dfxC&&LTLo7Y2+MtrMnibmn=qR
zW8@EMMDTwS53d(nhD5>0?~?o;fO^u%1ZgDcwPU7{l^FR`8WFe)=q!;&z?w!1F>=qM
z^}NN%V2nH<Cl+-9Lf|3-&sdBYknc-!8}ikp5w1mE^p*Cr%=1?%Wv%tSG$L@Hp~K^5
z^U#1bjpU$p&Z4#GZzCSXg_9ph3L`>bHv%gxM!b;EOLAxO)uoY1(#ZBv^UOJN4<i?)
z5rNx-&hyd;Skp)dS{E!@i>+_w1!3fpG$I7PK;Q+75fyTYBqx&(l15l8dC^xkC6|4U
z>Eh5_kwygWD|B9xM!=TMkzTwJBD6g8T7aJZdq|XEjQl8#h}>QTR$7cG7`ZBq^dw(H
z8X=fQ7JgawF<vl6u1O;Tw;!EXr4g{E5l<mbN-bhbEJi{xazh#s0tXN%v=}kqUYFc~
z<ZDVJQ>BsDUAN518itX>(ulwnqqDlqh{t@hPTXvA5IbZMTWT>9hLNMvh!8l0z*>tD
zFWe)NJBoZQY2+zsWc`hH<{XK}$Z=^z;Ep`roo$9362N7X1hHclv1cqs+F|6RG$I6!
zV&n~r5f$zUJ+?2O3ZDYT%`o5%*4&wL&0W4z-T(!r`NfLPJXu<cG3d!lA(FQVUU$hG
zPL5@(c>m;o%L{>bMp~YVt=dz#v5MRNG&ulgIy^9sL@EPt8lbq@3gB^Eye<0=wrp;)
zEdRzLw#?H1X!?JaoH$tp{{(#A?fUDAuF(JQOfH`a&jC<J9*KNuWY7H{O(WSD`BoYc
zxUUg<M;ZZZ8Y#rcE@?zyp0yZBz=$D@h}<do{8qugj2Irsp8*huU>if(Qx0YM2KB!h
zN+pvQNhEJOyk3$ghX21i1pGpE<#Twz0yLlFOZ5w*?Ll=|A1b@0nt@s$sWwEP-cRoG
z3f(s43klf+q+j_l{~IE<-+LrvKal?AeK8^CJC`8|Hsvb`IRfPI4E}DotC6|A3yI@3
z5^`dD_g*lcYgx8;XIW3~BWK<Q$oVpkJ1yfVehhlA<bFp!f@2`IchlwZ+;uS5+}@?K
zy*mI->`zQw4iDVRw|9We?VUG@w@55vo8$rQL_V7BU9mJG1ll8DNF!o<R|xl@<o-xr
zwXDXwu^NAeoLG&E+#+tppThlXdv~rAZ;e>UV=XJ{9*n@d7V<R6yG=3+BO3O?-D=@J
zY2o&U`<{iH4|k7+TNng+n<W2=v%O;Y^uup)<IQIyRYXdRm`HV}!(YM`JGvQs#qecx
z<Sh{k{fr#2F62ttNPS37967=7k%K&Zu>e>33Zdx_Bv*ViwND;7PuY?4C{1Q>HFtDp
zF|tD%5x~a~+Aoa&HjRW}WV=P|ti?z$f35c;X+-dQ!9Qp*;)T0Yax0Vn)HI@>-)OJq
z$!8PyiELZsBH<pAZF^}ufv<DzZOX?M{!!Vsm^B;O#Jwil7W|GmT-0UTV%De~Y)YZz
z)*&BnnKeCpx%;jmDW;7`F6QVRgDg(k`g|D?yyRgakFy;4Y>xcv7VdPoCnPrnZVueF
z7VdZpHwW%|lgmX?G&}?H*A}uD@;XW0M!pq27FY9`@}#@-O=I&Sx{<e@-vlSF<^oqM
zhj%*AVM++Bc@b66dc&f%UXETU`BD#8(YHv7IV1#XBXAZ0F^9w^E(G!hNuEu<g)~wi
zjdU1Q%e=4VVdQOTMBtq0d@GHBHH{eUc&EdnwFWJZ4@u!<KSnl7BSOH1z&VSNbC5Sl
z@@M2*OCz(Sk<K44G)H77MwUq<0{0L)=cN&_rV%e%&y;ELRvn->EJJb&Bg>@`k*kZq
z1&fhF$j?e6&yasa8ksGPjQXR{G?I^z=cEyVtAb96Gy>K%64H)G$}22lYb{2OVMLck
zgg{jUE?bPK7<pcDkB|?SM&?K(*Jfs$M)qUmMQKFf>Y;O08kr+bZ?X0&Y|E=17O^)i
zM($zcWobkR)JLGyV#I*^lH{%=A0~~=l}3K}bgyY-F-BhbzZg3Y_^7I;jh_txM2ew?
z&PM4S1gXjm(os~JbQ7faE=_hLRf-^jQ3Om76tNfVXrkB)Dq=5*q9R~#sG#5fx%X^x
zv+VMI{v0-$GtbO3bLzdjdt-^nt~`E1KUgLhyF}7>KV+-V*)AWEPa$%<B_anEaPX6l
zNEqyPi_NFKl_j#$64|i6zf0s(h-7#~LP;_V;^-Gk1Y?(o-k1sdobB-uNo1>9YKaJ+
z0{%B2ks#RXEOs~T%`A~smdI1TZgz=mEz6sR%RMGZGOUE7KP(Z9T_QQH_~?hv*_}Qj
zhpEU)OGNm};7|C7<bhpbv2|%b*Altj68Ssy!YP-@aZY2`TOu<2^(sF1VToYu5((pM
zmCxC2J|b%&vc?h-{x|R^eMGXruC~~gw4Y~*gjQQ7clT=NUBp3Toh2f}KXCM~C4#L>
z#KGBGpR=t#BAXzx-Vzc1Pw<9CCbFQVfxW?EJJ;fnT-P#LV~PA%YQI~NG7OTL$GxQ$
zL-QAo5^$uY6<cqN;A{pq#aKOMv&~0jD@10~rj=D~e}hl-5eb5wb=<yb*b9StmdIL5
zq{LspxI}6~WUeJ5yOTKLL76TQY`rnkk`IPhBC^@;BeD}B*H|KII|ZHxe|Z%N;UEw2
zJPRI(L48YPoh6bs(fjgmQ;00EL}d3Lj?T73u=U0WzUKRU-R>iDFGLnuBEp+A=6{(8
z6MoMEywHNvFlb<j++c|u>*x*E&JelQ5|Ld3j>=gg*m`3GUyFUdvV24yg-FOFQawoy
zis2y1M<fmKlFU})xQI)~j<J*UAxG-;$l0A3x)P`Wgc=lF#Bq@GKHA$nVA3q^b8uHj
z4)p&6=YYG~a-IUMhLMZ7^|k<|&c4qb5|>bb(HLsmmtpZVroJrzwr&CR4mE$H&>fqy
z$@UfC8456lHjPcSl>ndY%RdNqyo=?0G9QCRwkI}NBFjJWCdmkhjI%^!cNUJSSt8iF
zL~>dL%tT8>HamPo_CsWXC8D-c;A{AZ<b?oF#z0+gm{N|lr5uU#Gj)N}n9q+`+);4j
zEl!v-;ld39W-2($l6vYQZtKJ{RVz=io_@m9MC)m6W?+)Nfa4Eao9Wi%4@@Ro6aG3)
z#3ZXGA0x3QCo!2C$)nIDr#jEyuqFxR`JixwvTNdgcP_6QpC7?Q>&@;u+`;r+#~tQO
zIe<3xsoH8XGW8Uy$if$_#OFsW_FdX9pc~3$=0CxO$P_N!2GPX$O;S@QdV%M`x)&lh
zQi%>oRs}vW;)6e6dr@cbOV|#tw4sIX48EZatuuHI*ufV2Iqg0A6uyr#L}&1!H0lhl
z#&sNp8v~A<!SzY2AwKXN-#K~=IS=<?LtuH}t&0s3IIBFL5b<H>`mhtgj&!k%aRm+o
zZter;03Kn%-_ibt3Wn2-b`Esk?Oi=}=1(SEg`KJ=yP;{E#qgucIBeakr@SOSA>#A(
znXO(I+Aj%l`IJhddQJ|8;h;4R)N_smB8~<BO8c9Z$W4~W)2qEV=C(rQVoOAJBk<G4
z62aCb;y|RU&)4Vpa?jvhFjx$c?v{ugjKskOJ|bCwyIHUaa;Mi4*<^_{T@$$IV*Gr{
zHC<0jM0OY9r@bYDt$R(UPxAHfeDNd{jNY>)(SUk}i@M&Hh}ycO@ougqq5+i#xR)hT
zhW6gR0X2eK0exxHcv9O9bZcj@k@1vQhL3>wu=D1IywOO)W(tGiQXe=2@I@AkFOvrN
z5+C>)-+-C{_A(##I<Srp8wT6YVv}+9mQ8r`K+2~F>FB$rtmr0;oTM*XVXQGB;}6sL
zI;E|^&6xOY!=(bIrO)y8tO|T2_ZfXF7Eb5Wq_HVC>*3T5H)S(VJyR^XV-CpH7Ma45
zGnr;u_0>g}ksjHjVVuW9*V<yo@ONZ4Wp-x2>1C>9j@ehN3Rf^C(~?TG@ew!>$qZ;`
zy0RT@kwiX8<0Bn~iI6d$=<s1zi#)hY<QC+3Nj}LzdnAJmP~<7z?(CAytrPesjFa{l
z!PU_dg)Z#t^|z|o(+1m$nZCrT{DP8q0;Us~;*>_1Zy{X{eMfY9m9Fd(@3I^6=EBVM
z8D)~ob*A|o8>U&G%oQ`r)E}AiHXqaQxjKp~mF*&*t8%!~geDJ@f}TCVrTQ)tPdeo+
zs;znL$7>s2p=P#amVd5Tr76Q-8XL3`+=m1aL5TCvX*0l^N#Gw5*U#r#y`I-`*Bq}?
zxBEI3jH$}lsD~XUMm~>>_k5Pkykb_F8f9<Cyf;^6OKNp!pOyL0%reQnGY&YWuJ!yq
zu!{?ai+O_~(ttJ`Ii>lh7j3G6*z_l#0SI+70VF%K{H!unE4}5I`d<8?p1gF7_~kOM
zpIN5HL}HsP89OGNPtZozadpi4;8<yj_Mm9T7L)qAV;X`{-|9(0zfvw0xrq%Y-PeRW
zC-Xj14f(^R5U#NecM`K6gcI+I$dz%zWspfTFxa@cU&C^wyminR2u7im_YOM*gZ!+a
zsk?PekMkrf0hQZ)fXa)Td1i1?pb5Un!#QkDM=<(>oHU@C+nl8PlALg3E(6<@*KWw=
z{CowX!Q@XgK%(W{L_@|=Z+0V7fvY0J0Tk&mbn1*;GEK6Xsyg!^tjch7Gef}mGBpm_
zRkVbk!1gZ)Yur_^gr$2Q!j&?wpG_y+L2(*#W8|mD+IP&tEWt538o23%tPDhb{yhBF
zGLb6rb7iCwdVWrle&JLvNBnqCP2=1ld@?qKb!`UB@}>goo0-CZtwLKFY7f4(8Azj?
z)<CWzmo!_noe)d9nnFHzu!GtH)FB&E)iNKtrp$RGtKtKg^D!J+O!8cG$7~ev(bh$T
zQ-YflkHKMl)&QsRsiz$MLuoKI)zR^*`{bjKC{2~jk8Ej77R?DRJkjL8=9pTTswA%v
zb{!?D!q3S_P4p^>^owJbB+=BtMrq4t@GK8k7wlL#__B^Xd~R|$0N>hxlxH5GR^+K*
zJ&-YR^DJ&MUU5tSoW{3?*j$E-hF5K51UUb22{T;G@yv2J@}5O8FeKEHelw%Tc@t?U
z_7joF$v*)behtc|afpBTPd^t&ue$0XJ)s5O#7jg+v4X$D{Tzx_9lMg~uup0aFa3uv
zcNhnCAT2>72|5Dgj`0BJVZcXb%p@$+_*sM^3G8BwXmbgp*US#T95AKuL9!ZLCS#&E
zu5@@w2baj}PDDg<Nc=2pBMGOEyV_d;_!C}k##%-srF}+MQx#3Rk=6$PuFculpmM9y
z#8!phO<D9BS$f>nr;My%X0tior1Ooe5+?su$CR<ApJMt2ruBepLFv!yK*YbG+`;cz
zNP;^uOPXBp=lE!jfu=fpf~7f@pOs2EkLI~9&!(2AmdKxNaBW=A61-97M|0tMCnc@E
zkW*w?|Kf0#$tK=UK#J3mdP30Eba*M;{EAK;{wuXlqQmQ;`vsl)RJ!NJc)H%`BYn!4
zcFXDOQ<FtM{(ihzIk$Ft=2R}6nq{t=SEfciIQeofWisIMo29P@JnhG{WoD@b9(|wb
z*(M#+KRt@(E$G|;#USodkU!9C0!zOn<^(Qb{=!BRST;judAOcnXH(`hgmSpU%Pv=;
za5@w51Rzb|ZvdT7vI^D<nF=8GLRH!X!2JVG_0_|Y%28j9@4wOUYf!qE$@6@48Y2^U
zor3ts$er9|^JQPgq>}w99H@hShD>>~KNrwRbm}1K_R&Go^+h5b#8`3%QfXc_R(2sd
zbkKj6pdOY~9)cy&|7!_K_u3E`UVu^zr|~)iDaz1Ku&haeV7_OL$zX$dfS)~R%F
z;<H0$%A(VHx10$8|H{I%Fs%$+J@Tc6OZPVLF6cyQ9<OWwXP|VB$)+Q|29H_N7p-2~
zQjEb~j9;ij=IhgVG%`;iPAq>I3$#g^J;UbkL*}3ti`5x)n62(x+oZxpPKx^Z8}!vE
z`f*&(y~4X=a|!y%5L<*SMy^HF>vwCF<G-^{bnUz+GjJCV5&aIh7CCJt*RcrIrTJ21
z9FRm_cOvd0#~YhRu%*w<d)Th0&+GAXedHK=Y?7o8#~kq`-TG<5xg(lx7onp%I%ygR
zIBb^ZtPe(Ef~O%Wx(Gye?No~RJ;5{!j)Q4C$-`WZ%{1&|&w?bA|FUC_Tk?N_s{r{1
zU>fR__ahCexf(Xl$XxOYmqb5t4f+Fm0z1;*%!q3UU>%Y{phmndvjp#jZ%vlACMDW;
zj1bMvb<8`K=tl58Au0_0W2XCvMk<ovs3PS}Iz-=ARaE8xJz5c_jrq9=a?lc$eiwxB
zW9~tIgY3JOtlI9RP4+h+BHNVLm6q&%kbV6|K8Etc3im9O;0CT>GM;tJA$$-w_;&^7
zdnE!3_#R5FW)sNb?a5x3L|8NYa<p83V1xgS%`F5MZat#ss7F<*pexg<(EG=W-9#6=
zPuNOkj%mDogs<|%cb}?tkV$qiN=-5<Y!)N&=Y2(!!ua?Q2O1x@K&CfkZw976G7}$>
z@exTN!OgysNu}(cfD;*pf(bQ(jehtTrAm@OSu0bK=IA~{r*R?OZnBl`CPdjrmD<fV
z!3|Q`q*CHfeL)%%1Bm&;r)z?as)o$xHt;`WQH8)QFw8~Ag+<adbnnLOinC)(vobMs
zWlavgl!W|mAqn~4gxmn5Mq2NEqr0Hb@^c*X0zj2_JHWn_w<Vwf$Si=d%Ui{yP~PWl
zLRtZnNvIW=7cB|t_CP|q&BzV9p%uTB$tFN4_gjZ2AyO5GtpUDl9ZI(qhtl1Qtar1D
zom2)%y#!84?F4rzNrlb_G!W6_@?&RJ)da}uRZAj~45~q*4VYIf3F+>IgmjsRNW@R7
zn#tJbnAbgjq2MW)RL5akfUjAH(rw3~bhjWI&X83qS-lBPS?vLL8Chuu9fZt}n^bj^
z|B_?g(3@Hs^~K4c1|-@6I$%jicRwVgyA=_M_*vC3sbuw*msRjzh}6VidvFh-tHfXq
zqx&#=Ff;ygbd%sg8&tq7f(p1ra)!)vpL5J3;FS4;;I1I^4qyf&3*u&8)8vx*!<J~M
z1X<RCX-7bNEeYu!goJck5s`?Wc`Xwl^G7|0rW)AV`0E6&Gx9ik)|VvdU!PvMl&@te
ze@yE?mj;d+-=I-|Ymq4g)UL(f$a9y<`&r8`y^7uJ^0zuvN1w*$mY%d3)B;<FfES{F
z!UmJ>pfA`4f;~-}VyW#t+GM|6^IyTViDcPe4-o9RjVwZYXS%_H1bfN``;RgQ33d_o
z`_R=SvlZw*j_y~tThEEHAXRjAcYt?yP_O@a01eKxN-4k&O0NLhkR6bUyRkgynCHN$
z{~rT4fd20S=1OECxY*OVw#ladpS1~vN>Yv#GI9WATN2Ve3<>GBBO(#M|Ld6CXB?Ad
z9sUWCbMco7?hbUxxRLIKF`jNX`p7ByTCM-ObS#ZhC%j#VJ`A_RI@1lC^KjNxZI+yL
zJ0U0C4&)B-@hX(N&oMi}sY1^{Vh|O&7)%<n2HfZb>d#i_K6y?3k>KuNL6iNWWA3sf
z&LUu4NOY5(B_Z9TkdRJ&DH8E3lwwk;(A{8l{?y%<<rBTf%?%;G*$Ft&I@0LQ;{duF
zV}4H3EsXbK9yE&=9Z2qgD{bzvL4;dDkRJFOiQE?zL!bQHgKbERbZxd2<DM9%BXg|r
zPJVU?aX;@q>sXg$E3xfG+Zg2jD92%J@5Qzneh*WnsT>4*V>cU_fo;@5eUwh>?(^ol
znFRBI4Wd)>GJ^D>jf0W7E{aeNLGHIf^zhBG1nG<2GK9YtSr{ZU^<8qj5mPP-TI#~#
z<x#wih1n9!O7R*A@9=@#4cMU$%C4DuhW|7yZ$i&5VXC9A$4I!8pZg&j(bpnR>EB{W
zk!}T|P2IPAh1|#3sgOA@@UR@uS+F*s`Z&7`{U+;3y7zG;-Ad~yYH4yiB#fU-Fbv%!
zk=^JWrp(Rgv?S?@;lqMaLniK8;|`IeBIBLxa&#K^&jC+Y$=r%FjSe|mMQ2<NA%?u+
z@1PeMu4#0}E!Y+#u&w|up5?9bTd<wL<=kx{8q45FjooOZv)r!%K8d4e5CwmbMYr1s
zx@F*zf;*Ri7ag<3Vs*yt0k%K3Bxar}iY+|2?RU&JFsgzMZUJU|==AKW#K^d63|)1z
z)e?}iUJw{S@HxnHaR}r>;C74AA*;_U*0_NHu0e9*fU*H?w;-KiE+LT4FAI^}IG7AD
z8DJP!N!tAr$@KI@Yoeq7@5E8z7^rQ+Hkg3@2&N+gW6``ACs;bcmRgLqn#;jxo5?_4
zii1f3a~+s+nn@{)(gs8r!p}pI<uqu(NdE~V9v|)%-X?bHWwdEpsqItRWY1P%?!`#6
z;Z<I>UOvQ_>AjkpF*|N@H|j8J+DZjv3fBYMka)we3$3uhr2NVk>=uGuZ-c4rFl_=X
zM-)tR_;nlXVS?Sv`f{>H`;L^{I0Ree1v9^aZA38bZH!^F2EEol>94(l``uD*M0Vq>
zU;)g`Kg8O!+WI&b+s63NVm%63XL(8gIlQD>fjoeVs3Lf4G)ux-&sVTMm?rqT3j6ie
zk96PQM>;Jn@<V<he}b-~EF9ecNJm!fnI&S5tDu?ry*I06(|?w?r>?^B+c@^k>Vlb>
z{(@s>fYCx)12Rn^n69xo8pUiH)a7b;8<x_oK_2v2`VWFG$si<WdCr1$z%;|z)!5Iq
zex&;lKhmwWeqt@8c_B_4b1bX#05!*vwzaXyHP)H*c{r0!q#lA)tS)dMHP4c20Hy`L
z#$ms}`jPGkexwsA`3XghF*iGdqxqIpO+frHTz4Cs0BDhQCjGlOlWqg@NSsP#k>Em0
zDhQ?(zF5r7wVt1lBjv|9lJZ9DC|0S`aJ1NxDg|a4ju;wxV!a8W*KC%4FM9-B+}V7B
z3AThbmL5lKkI^Rkb;v%9nyUXl<OeNWk1)eNV20K1<lV3dIJ}9M>zH&eXs|pE787&|
zwrMt?bT9e>o+RMaHlW&GqD}TU*no|A{n*BPlz81j%-Q!J|GHq$NqyciqrHIUY#b~n
zp!S<-$XM%G`X_KK-FoC%H<KcX6pZ=|5*Xw83swMkE&eFB8E<`sq<jTuQr={pMX@Y!
zmh!w~##wT8!L`O2P%{&mh+Z>D`Y&-N-3H`o$i*70IdtcQ@4fqP!G?g&#~IVuOty}s
zJBlOeG%n;Q)?f|eXc8D5uyo+d9pxSN9>%^DxVw=(Ncd4cu7(}zpzNB{^;&=26V?<O
z7z0UaxYizKrlZqhQ5xL}$Z6M|i%dhW0VVxYq%Ykj<as<d<<&Qy3Mv<%8?NyD1@#u+
z68z1>_HvC}Ig|1=oJqOaI*Zp0`8XS3$&~@O5@*`o=Og{CKk1*rpL91PFT^1i3NXsE
za~w0sva19zghPs823lv*y@507GOe?C<WeDbr6pGu+$wx61hWVkZ2d|9EdJ1i-1{s0
z<FFICbjS_0<SGLU<8U#~hFE9Py@fOBZbjrP9=R0A4F^|@uI>Qt*!0YP`P_uw%CsGZ
zzBWA}{c*;fcKq9r+wu1r@-3oW|1l^yS8+wt_<+0XJNx-VQXW534(#>t<A04TGs1F|
zJ_nA{Z9!hfmv5+qwDuH?mHg)%bCu^XSOs7P-omtxvd*M?8)wpOMdU1A!nrw)Nwwt8
z0k;}oIx}5|IM&}e6`}P!4yD|Nyn;g?yCUQ=Aa}7PmyE;faJUpqSL;l=ckw0NcIzu1
zxfIBC2dD4#x720S)1$mgs7E1C3h>>Crf$w&K1_!p>Y(gesE+){m4fMsUftOn{T*~0
zHfAp}pDyh|TWu0m+fLe+qrV-=#^?>?2Si<(c_g1duBua&zRcF!qAmpW11Oz(vXIBH
zehc~5q8JF?ZkfX;?7(RK5W6(He)P3|E=6}IepX_ijXVzi01}D*xaTKivYzAPc7U|#
zBmr7OZFQo#9=Q~~)(PqNQ(x(JAn)KPUSH+!bIc{4zhG5xOYyfF%w^V>bnoFyx;r#B
z<*X6E_;#p*QJG4|_XDVUrULwljMiYg6L}Zhdq{L;f+W<+Vy*zQ8_ZhlcOeI{KZF!i
zL4E3W0ic$E)TxaC9fBctZl@Y9=gz0C4kXm}K5g2|cO%O&`Uv?0p)1T2%#bduJY#pX
zbKfjVa7>QgD9wJ>F|F|t#78T9%&O&W&mlQk1^AOf2VV9JKz-b&tqt55(1!$GkG)3d
zYV1Ene1TnDf*bHWlTN@kU{$&9V1J<s8^GO(ybtbkgvy27p%@udd3+zVodpd7v<HxO
z!Ml(T0DOTIln$LLmjmVkFxm-q^nY`dck0()`z=Mg$Z9&}Jrpn<utOb`T}zc-{jd4d
zo(7F%H9TQQawl{_*tbSEn{wTRot&-1!)J&sx35~2O-C^5rY2xMrdrw|75ql*^O4i4
z6>`f}*Q8Lc3vIyj0nZ_zc0_@10{CUjfXSw_4cHXSrv%ilcMq}&`>$dJbmP@C+4~&R
z1#Bn?+ctpb5>UIGz&8Ut95Y}wbCC_$49w>QyaoGvkxcBrjvKJPNhe^6O|UK4dBkJm
z^8RSStpLA?8L);4+JMag<q>cT!5%<v!~VOt0UMYU0-gu9Cf%ytt={DYJDGV>`$&1?
z1l9jq3$>l;RjU$>?uw}2H89=ZS?$innWkq)oc+rAjMy%jolMVqw58xiZNJf`E%ZKw
zCc|Gfb8ofs;`$2n<>|WE2eH2(1mGG<v6V(;wGFRFk<$lPEz<x{9Y8I>d`Ts?VgE3)
z3;ScpX@kq{>xL%(8OPMO0o#Gq_b9eoZ0Ih4$72SpV;TWc#(e<&<I8wAX*Hxx8LRCi
zZMWl48S|M@!*@Q4WSm9DO|1Pz*#Cij7Pj}R|4U*VT_SzIyfFavoi>u(%e-!ibdUB7
zfA?s~Ez*7b-pmH<2mW6IW_tnMH((11(*Nq;AOV{KP<dMdI81qWpnn9po9g_6#4c|m
zlTLYCfDM9e4|W0Ny~ASf0rx9%dUvLn<|0UZrz@ZnFx&~~L4+?$oK}&Xr}<-PHlzO7
z>(1^jYtM)2Pb+yI_DR^Qf?9qX79IUYE3GOPij-8#=;<uaH5F~p9ys|Ml3KqXLilR=
zX(Yo0tzhj3W1j#?E#0!`n**nj%z8QyGL-?)mNX22{-av%2iF>Ml&NGhxQVtVlVg>!
zLJZR9#n4qY$@n^#YRlJRE+Vz@y`lN9_eE;KpgK|;iBusAgMeegDNi}3iUnr^UIJMA
zzyTzJtphF?1DpG#W2)ICZUfsoItk+E=tD7)pqzG~BVBR~T|HCP5|OhoM6^G=gG7j&
z2Z_2dh-5$Em>QPI?O@wkA|Vb8z8$PujG*;n=xUhimWZ5P2NCTX2Yp29L83tnBB>Cm
zWwCcur2fkQYXdpt12+WNC<ZtHxTeiwB7t{LD!R~UX)W#g@usyF9{=xJSL%RIW*i9L
zXBzd_s>NN2f)X|*a5E$V+(BC!BfGW@JQ?79QGvC7oZf{EV(56^))J7POG0FSKLK=j
zkMv=4g6os5egXyp2B1dpT>w{r(|+-R57!DzYa~J>i?#p*IQwzO6tm=~f_>0OKBUzz
z2j`Js`aLh|_b|UFSk`jTkKkIUv^z#fw<YieNW_n-miH7@v4N+9+v^LgI9Zzi5w?XR
zzhMkr9g}DY$kF8x(4wNnD?*?h!8=3|$bvvgaH`rp;8uZq7|h27IQqXMpiW4HNEVF&
z%9j6_V@d$7&f@d{;2x6;Z3@XYD!>)$QcNiuRJhdyeFUGM76j#uX5w{GOqOMx*-aLr
zpcw=`3ova-3jv=$%v%wkV!G=qKzyLgsfO(wR>#uTZUJ`sF9CTvv(+9xb$2#R=aIBr
zv>fk^Z-r@r-&CY466vXDaIM1-2WS(N(A#A#elhq~;59s<V9t+?H)#~QjKwUe$Zhl1
zfHXXCZraBIg`eb430hECxq?xTK8tjYg9(BumwD~|WzGxyoSDr_=qEf>@g|I!G|sE$
z#h`m4WAW6B*I%(IM`lZj(q@UbHvEE4>wt6z#(26M>?7kT%+K-@Dbqe*nbej;%D?MY
zUL^QLf~!W|Z19Qvtl+=d;L8crmf%mL|I-GS?j3?lmy1M#XYjN9_=1}>QvU;7B;Ew#
zJ);e*yA3!JkOKZ?1FppR1q6h-Ibj1zcaVV6J&!~JUdPY!;}2*E_>V8(cmk?#df0dx
zhrM_{f%zLF249dLO)CiLKb9+V$`ZMrpzR>?9J-U1h;)Y_BHar}gve5URscVdyeIe!
zFF0nX`|-KO3q1e-qEq}N?dc=u@B`qxkh;i`;yiT@1Acb*{LKU0ZPopAPrU&xg7Kxi
z>a(k*uqU%HFJ{VzT`p2}^2k2V;<eju1FtT65$RKi5BVLqWle+aR~E<)XLbR69?(lj
z-$LMNxSYp%ik-!2vAPf33t(PGE{TgP$=dN)z<ix~?L%dfLx;g*nzRZ`<U_pr`CF@3
zq1M_$QTZ@UX2P~o9aONNDA+gH)w2bgJCw^H^sgZOaM~YHu?Fxu5Gjvdy@$PVm<-=(
zG6<|{T?<&1dKJ0se}Ho+*by681zQeQ#d;07{C{ABV2@esJ-Vg04zMcrI&#JT02?ZP
z)K{7Ox!xk0LuDe1tp@U(5<DdjyZW{=)yI2Pd4m{3i7^OKWzHtP#)$jn%S3(-BZkr(
zw_sJK24Gc1r#68HN5K59@f9YcAFaKrQ4@Pr;*hn!GRi)WBz~~=A&r|_0F?d57BD0V
zki&3!2LS!oQsd$11RaEVvlu(|^Dsnxd<7X5N6**|r+mp3d!O4nRpNJMYK}fMoc!dj
z;ZMIf+`Nr17FY93MLw$9o@xFjzr2MUq(Rpoj}qkB?w+$3tg}nEebRA%X75K>z%|Aw
zat<$UJWSkiCmol8=3VOs|K{x}o=w>zHmBnfz=^xxj&#f+AEo^sr4sHW+n@RKBkY?I
zE~H<Ci<ff~ll)-i;SaYRZr;OBkkKGcfA;j!{Rqre0Mih~evwpje&xNQBg;EQ{Hksr
z_^^Nbu<1N=!i+8i7b(K~KF}#2=xXf8#K5QkANer)<dFJAB*$VuE(V79jm?LEG%|Fh
z^F1R&S2^cm9A{*>-?a03uZj5tthU*GU^_4n-Uj<IIt_xo=tlP_T2-%yn-o@B^GC`A
z-aJzYUqB+I;|bg5Gn<hD9Hj~h@HsLbg1)jBVDu*3r#{U0KFm}w(bWz`7=7Ub{p15p
z0~EX5P0Z&$%&$JobTF~Y-NfXAQMI)&{4|07*Q!t-fY!|6NYq4h%k2%urY3pM-eLD1
z8fJ2EqOw1Wla7@AU3|QNPGx@#-NYDW*ZuS83iB+!)0I45*F5iYHa&*3Vo}aKg3V0w
zopXon-aITupl0Spd<L08BK;ZF66r70|8vPYc3(C%`+cCi2nftPFwuSKb!AiYvJdl>
z4|5He=&p48Um3sT1AXHI%?A{_FPoZI!Kg2FMf@u>=ODxyV}GWu6iKc&%`0H(!cg!z
zz@6yB_W-|+QhoS1x>YgCo&Yeq4|$e+5uMi+%}sJv<zYM84l}RW#1!Xk5>uQ!WC29e
z_+Fz6X65otp=RbyALyVDv=C72vNtnt_%QGLFkvv!W%s6hGjky11AXKJWdMp@_GacS
zFe<zD%0DQ(_Q@vL>*O#3i8X_~vNtymfz`~(0o$3Xhu#PLAWD_}36vkRY(|%uC+MH1
zONgN>hVBuYkOI6wLh_%FTnDijgFKX@uX{Y~1HI$}tpXIg+|A8iALdmb=6W!(%iY{O
z>chO@V(5RdTMa07xtp8E0jb<~k<nk2TQ?xuV1K4^dxNlrN#1&B==R@-n#b^=vghLC
zLdyOjKAuEZllVzbqTJj4^s;mDdRp1}!ux5u1awcxnf3K_3}5`A1Q(})t|N^%9vXX}
zS$qM7HXV<3OOt%_+@ZH@9%`P(X^<65tRJIXV$HG0Y#{p>eVHS|Px&xU`Y<<wiP4pz
z9A&i62YSW_+6X9iU$!vM_%P4;FgJmT-IXoOb6{vrdXa?mUkP1I*5ZejiTq7f8XJ)9
zej3}4^9~MQ)3yo37Nj^ozr+cL7yb#i^OrFjtj35=qbC_7I*DEgM(6R7NZctI;_E(H
zfHgRt2YeBO;}gJlpi_T7h3@_s{mFZ$F~-5^I{x~N*@3ShiOK)nR7(E8LN-Gz6uTc=
znq5B7y*|)3Kr#A}Zft3G`Y;dpFx$bz?nd5{yvqlA*ax~DQ0#teY3>B0a_eT(f0X-E
zvT?9KQ@Opt-O40yY&&$*prPh&e3YaAgD>Eu3uXTdC%aLq>`$Y+v3IN~+3K{i^WOJq
zx&(Ch_<Uu?@Ws<?OmW7ad(~Jtn7ngf(Rtd`?7>-(Bt>>BRTA05$S$&u-G!~peLl?X
zKFpn9B3&3#AkBR(2+HJMA83aUbQhr5eb~x8;KS_lVRnOw-G!~p{a_j}{+yS|r~}jO
zb4YxH+zrS#-F&N66LX^na$W<|5zH6Ze~avii)m^$fKiuf|G#6b*QJHk4VfIBMvCL4
zP$`|a;}zAlFgIBOZ{n;I1oFTgK^}@rpqbfdF>izE3`YCScgVxS=w^<Bzl0Qyf#znj
z1sw!*As{WgN0CS325Mn8fvHZWn&;cbMrUerm@ua$%J^$#GHtN;0bWF~uLyJuc|2~g
zmgZ)Q`3OuGFdB@<ktgC}TA5qH(3}*;&fcrx?^p55Ei`C8e~Pmn<9S;h-ECNY&lT))
zetw#tBNyi{v-*x(z)jsigZ5dpB{<5gwaI_bF}GPidHCs#pKoY?Aif`yrS;!)aJ~sV
z<d`kifj%F22@bx+!Gk!+i%kRv5quky0)Cst9|3<Ec#Vxm&xF_fPjEA6Yf^Er)jH6(
zE&Ag?17$A`_Wn--7npSLw_E&A;0J-%n0f3B_)r!G2`+(lCIbiCtpj~7L=OqjICv5V
zC;umb_9h#AhKDyNzz+wnG4Va}RBRt1_tsSw1_>^S4ki~TVVr2rp2W$CN#00)8JjG`
zJGpdlM{-eJ`xv@*W~tBDKd1R}=l=yUfNf*w+L-HbrYR<3|DNVd1M&PAxYlMlxN}*)
zg_||atH=)|Qz}G0dMkE6@*?sQ@-p%Y@+$Hg@;Xu+V`a<zdlTUFQF3YbA<J+|Q&LrQ
zkz1eoc_sRWyp~`=xu4xpfqq2Zz<4FE*CTK8vu0GP9KXVpI&cSfJnH?{t{^~0y&)@b
z6ohp-9IfD(Bqu*1JpsOj17TmyOixU#%w^>D7<2Q<setiCeLcgUF*tzX*}QH=^7yj|
ztFYs=V`S&B?aJmCWDWXrEWTwQPxmYOL@Lyr*GmbyhS#^PKWt4Hf2-wB(?||eaVXGl
z$T}LhJ?;|vsEeoj9lZ#x<Mkb0*Ym0@Uc=Yh^ygRYuW4UUff<#rT#FBtvKGGIpp?S@
zfxHX$MqY11RLW3fb$T5~Z&KuuPnGDvEyi$CdGBqeL=5j`g-;9za`c7Q<S~N=kDEJq
zLdgl62d^iHMtd`Y9I?0i@Z<Hh(V*(+lJx5me7ZOJEK--(%2W&8fFsDbjw)ZqpuovJ
z$P`YuGmjRl*obN<NEO<Y#h=J~06sueg|{GFk9GL9vzL2Su)$~euvLB7Lq)Nn0>)xD
z+Xt%d1HE4q<lZ~goK6HY3k>B>Y5>Utlv`!q46#fkGGaya07OU4VT~_-s6ACSUR6V#
zO!B0?gQpxCJgwxk-v`gdnFd;OoONYfDBBar9P}!;^sm@*<FJ6dzEf<26zCFsI?C-X
zLW$_E!517|U+qG(3QL~GTMP4OtU<{Ys6A~8{ulBgh)<C(5C!6%UO4$G?uEaY)Io#K
z%ciqQ9=3PTh(m)$l^pf^pani(9eut&!pCRUm;3!Q-5`_`l{A4bI?wk3JNtki1Nz(o
zEeT&&bT*5?!Q0Jj?$`pN?tOD_3GQqbVx!s72E&JRFHhP1jeLdUTY0?=@%P(1Wcu1n
z4hyGlESzRTHV?WMM?osp8AmHwXyoG`L{s%JB7*ON&7RI7bjtzX;N-g^V72Zzl~@G2
z^!0!z0elTGvhd{W;Z`4p8V?RZE@nK)<`l97{W-1!R&N#_>HbCh3(q!w|Hk^m)}-Mt
z-RG|x{{Gb>^q(4O(88lYd0jl6`a^_nr~O-Avw2lk2g&LU7UZ52uXX!A5N2&}u)P1(
z_+TcQvnuoc2{S?X0CEKE9sIfrxt#eOS$E#UPbe1#4L3GvpCx;Tx7P1=%xD~eNy>vv
z-`U;(-9r1@4A8N(Re)$&g7l|N0TPk#031b>sYqr={Oxwk7>`)+FZ@-7Sb`;W7mJ|#
zA)zQ%Cd-0TAT!<u{1@=$A_KTMay(AJ+?|dYX9FGuQ<;FpY(On+_aTMx$o@SPSxA8F
zb74Et2K*6lKLQF|68Rxcz;rMZz*MwVS<HA)MR-oBIR>UQzEzPONZK7dhuPXHp+zB#
zmCZTG_tD0;^X(Oz$tM_BAd?c<sXrgU*+WQSCPfp;hSXG>h`R9-5)qaszM5ZriS+mP
zRSKXfmYuR+2svfW^J>kHQRK47WV$7H54I&B$KyjGm%07zGF3u{7+e)I_!`#jmv_v9
zXo90n=|U4A#x!u6Rc|teZebP($0JnD73fhp(vP3y=^CT|o!FxpxZ;Xm-zTUqGWmBo
z=5laQNZJc-<5br!RA30+4LkrN{J2JoSk$!9X;9B+Q1_>;41wifF>NA~jQk1U1fsy)
zlngiT$_>gTr3)S~lu9y#z%uKCkHer8MK~LrittXEDq}@93rTR3?_x3-n**^|@&}1^
zE6K~gG;$?+RF3o$NnW~U$lr03cTDOI#|+M#`Y!Je4h3F?%OR}I)N$VE`-xvBl4mt^
zC;52@HvAftO>0KJ{!<x-qgVZ&CiEt+eo^J?VuoR(63J!>B@(@A$SIsrBJUGQ)X4jo
z98)QqTt@;T*Wgh=-O05qfD!1}Y#iyn7~|=xpyytT`%J)B@gd@oX|s#pv$6u_DsY-&
z8qEv(cq5r-TAN>8Te~Okz*v9KY!t?kYD}XVnqEI+sA`PFMAb<3*-Xbqqx?5(Gtt^O
z*r;<~AZQsnmxq6=b4ML4Q>F1CY_gC9w{ufX{vCY%#o8ah{v7P9d-mH;x9@6FvF~c_
z`(ppYc;3eL>_5jo^Z^5I^HK^kdYNwqU2IaobqA-y9-)XmC@hb*Hr>#vu+n`-VWs0q
z_yvXi2Ck}szp&2Mfaz&DTnUGt;ZPe4ON@7w^*SK`8Ura`(%nqp4#)JsL8RC-D5QGD
zaClX*$MU0!-5VQzb%);hcIe5h7kVY~4WxULNJW%=(dqBdJ5qi^CQ`OUB17a2fa)oK
zCMKIN$av1oN8q;LOubbZxfDGrNBYT3E$Lby0Z7JPSGt>wP_|<(0jTc3n$rA6X@Y>X
z`o0AL9h%}-riaPj;h4)TemeL+!JljK?}B~rjQE}=l#7FY*1;ki{EdUU)`7a=qcb|_
zWwODyviOzYPl2y*@w%P+*%|S@LnZ?UEv<u_aG>w=Hna}D;Me>!I_P5p;Liuo^5cBV
z0NHj0ml5FGApcQCeYR*Zq7Q|fg|818@VuB&8=H3CL;o{|5=?6xaDnPB&DS$6wJf(r
z{=z{b##(Z!BhF6#jykp)Oo!>`Ui5!e1w*G<A>Bt~JY8q>k+F0LtQ`5rlEYYPr=y>a
z{J8*cj=3nNn&M#PfD^mHXATz)u1?)ksJqx)fG;hc@)c(B)BtRSBoWxRzZ9&U`Ha0#
z2SCbPEADDC*J^tK@^4h;X*+moH@0MMzQFc2GS|9O1L=ewl_ULBM!s|xAqfPIJ@WgS
z{4B?GJj$n(#Q0@^*TGo8wn+InjI&{Up(Qa0%mzqkF{_1iMi1vC>4)Qs?sj+H8SK1W
zXj2@uji62T?U4$J^nWtJRANSYxa4h|L7qq}*m~0mc#+L+Ip8bFPT-4?V!rGmU6J6f
z(wCSFvP-c<hl9HjqFV9lAVKtur6lQ>g>0Wk9MGP_A{(QBdzkZV76OhU3pwkERELN!
ziGu!3f&96a{2DMrAumi1q_`#TUYSLLd+aYYsj#nS*{1{C2>T$Q6r`>tA$=Mor0asz
zh(jV5Uk$)$fxDgQU5x471bZ!TU+HB2HKmWg!u2z`WKkanbWsu*Q-7h?w~f%j!MTxt
zhLf3y^hVB#+bx%w0Aw2ad^K7~|AR42P-<k4$JtF}t0nY2q%nF;Pw7|1?5dtIx~f+U
zU2oG2UmAq+HJZxFe`<(-QsR^@9kxvYVee$}&q%Tq=n|w<+$=9Qxwku}g-vKGxJ@La
zCA}Wf+>(%f3?xEQc0<mMLm~wd<t>SAfUbgsF#V9T<C3_-q(UOel9&x{GbFSxG(aj^
z66ix_94w^liJTXQL_W?cSQ2;OEFBWU^hZj^CDGpmz*V*+7J$1M653lDA(hbABz}?_
zZiv~pecXOL#JDJ2W-p4-xBX1A%}Bl`kdOlOK^n(NC>vr`ln~?3xf{^cWF*i)q)gn5
z2AJ&ae2E1hGa*SAFiQ!R06|@2EM}#vYKf?A8Ev|-xfE%F&lZRlx{q1tYTw8SXnlu{
zAvZIJFS+k@Obx)A1tGwhluGloDN@}!m3|UVrMpaRak5JxyINo{cOKxMu_PzVAf!&w
z<U(#~6WnEUpvm9HcS-<ic{~U(37{AsLJGUUiJ|Lm>R6J(t$?JiYOX-eFPeGb>PRJP
zALZ$XIyij6!C^B82c2YxAc=rt930%Q_~<cR0UJzby;TI$Id1^cCQh(y663~&8|wpN
zts|DsnnN}JOPnFtGJ+KYs4ZIi`Wm)q?dgM&is1Oa@Nlx7uZCc&E-XWkophlNG3O)2
z(QC{||Lb&b%-n-rC@Zm;Nu;qB0o3>#jRIVYl*4-ij5^vtpD=JLc4Y^gSlWF8P!0i0
zTE|IX|M5BAiQf{|ui8#pze}v&Civ}~$ox|VpW=AaT;8H9y}^6BA&yG{=DgNYXS%OP
zdFQ#Z7?;AHGo0HM`eW{AVaE&z%B~I5N&gYUlt!;TL;Ba*Go(8kohqsG##$=bmhZfn
zVYF1S8NQDZgOBfI^1Ta;Rm!qB*hMLIDr$$6L9c2_{|8w~w;ZX2kEm+7zS3^xs}&Yg
z9?ZWMvr?E4AX|z^CA@D^-@<n*EP>;&-3kGnSUMm-qgP)_|MiT*zD#8P|3n)z#ZlWg
zw8?%BQk6K3NTw^Vno*xo@piY-|GTFbo_11ibIh;S`ENM*-RJx)od05-tL+c#e6Drg
z6sO&+^Urbq&mKyA_+j6JhX<LIOvn6=4^{Phd~91n|LX+P3Hi-BSHlsUOE(Xxc{=lg
zIh?=6F@IXz@8C{Y+yaY>BIR3a2Alkw9rK6gzt|k0UwTtwo!Ku${=%u&1?iVFSEWlw
z&Vfw4bwSq=C&1~vpp$Tgn2T3mxBQV4m&RWXJ9<&2tGm?vgEJi%<ZLyI%ta6wf%Jxi
zZ!IsVMh1(_-(Zx`4`8;Fkd9^!atgi1h4e?Ml5`7^Q0+66H=FXFw7@?A{%wJa016ks
zyjPlR%KM)s^CQ^XA(INOEAp@Pi9TeG<5bGUNZm7#Nr6nh1^$V{lNP5HRJizLhM087
z<bl<!(J_89)%12v>OY5({S}QWwHl|;sPp|uqz{g-Ks05(U{oLM$CCPehl1;aTuS~W
zSe5)IuvuiO(^PllYfD!8A0R7T7^&}1lKAc=GJRtL4qtS;)iH-H@C3krEil6Z<7YY4
zq>$yeHW8ior;&)x`d1-+izZSqW<u#?@QuyjXF%CxprcqX<U315`kx>o-E~Mqe+E$r
zWRt)Vi~9?l9>XNuQj3d9z`y?wGx?hxa}2Pikk0j`>=Y`PF7C{@EXHiPJcjNHa};0d
zR{5GiHFSM28W~bh4eI{|BqQZdr5fJ@Qh)veXb08MIj%2q+-4>HFH}RiWk|C#R3kt&
zezds1!6i@);Z|6j``0vl%cgII8g5dl#t$|Fo&RT%fzJM8kf8+`6ipz71l|FoT~24f
zb?kCF|4l$zg5iJnBY@iEf;)KY>1`W8XZtw>(7ApbGAvGj009nyQH6d5a|adDS@3e?
zUG!Aak^VO-B;6{c6~3Z2pGd{|YZsZ0Ib?zV08FGr0$y){Q3J+Tp%Er~lVjckt88`d
zyMb(V=9`AJjmkD{YruSft;UwFA<DR8E98A@*6n|HaP^NdkOsui4K(lLOoL0#=21Ca
zj7&nt#Hn2VR>yo~$?Dv=9<l?pdd))GMX}AhEnq&h0d##Zp8&cxn1YOr6Ci^CpID5J
z-3!6!$UOrY7Y7px0Qwk^`m-Fn-JSHO4wqLVpP|=DNBR?-TBPH|Wjf$2=1GS`%cmAo
z0gN8aCd?X(iE+>gnINDq0O_o!WB3=Nyd(Gp1Sy4&4-w81CJis2W2dv8?Ap?G`VZeG
z7roAU(%)jwdO6moj@uvMb0~)GqkLwqrT#Bir!u_2=k$5&RQg5M>5J%8sdLcXMWu9h
z9FFWquS!Y(H<glZEz%`UrSdWZ=4D}s=jgU}0;LjYodv}xmB|A15+IFsoe4HDfOH<X
z2DyYl{9iD)a~MCbTCyjx-3?i7Xrqu<&}%1@K2)6PrmgTEqzpUZ>ojT(tFZ)a0_XtQ
zAMcuqvv}2Rn8#*Tdl_S4)Bb`-2c~m=e9byfB&o_i-%0p>)B09h70-9b=m06_jc_{0
zI?u;>^{(h!HY?a_Q?@$h4IHTaryzGX8EI!rLk?KS(w~K6>2!7+guf`#k%cE5fb3g3
z|0&zzFsf>SIy?#(MW_In4e%iXYQXAzcyN?=HtYq)KVb9d%LlR3fR$Z4-`IKty$0-o
z5DgRT;CdLPvip}T_K=-+s;iN`=tWZcvmq&+&ThlckX`N;$2@9rC5Tbo;ucw4Bsur~
zUp_(I70}_Gd9r)l68VpS_drA&&RFCz>rnbKIFwGuwozvwk_wS0Ev_U)YFeBYK5(J9
zWln*}6PAenVA{P9(Z)0$dD=RZz8nsv3nOV~AQFJcQx<m?xY{m`KGD%iz(`3ny7T=N
z0l3c+sRZyrh-f>Vh&+Q{Q%U+_OyS5(+RfGPbF^tqP+KBxvcDF&iddT7y7bfiXMKV;
zd~70)(^t7yzj<7IK5HFU1zg(a_+A`mTgPfEV;wKCj<qaj$T6>9;aB&0Dsi7j`+XR{
zSztBB0$}eWk&t$*$;cfvXppHPiPA`?v)*_-#hXI8w>oBr#gzgVv^bshgo{_vbSk>b
zBN8$R0Ply1cDAX=PU}$m3OJNb$E=BGAd&)+yDaW(aOYZ_j#<LRBa(V+$T4?%OiX2f
z4?#qShv~@O=+%GHm!QnjEk~}#VL|^<O1jUzWDnoUL+dT38klkxv(jRsy2i!ih1mV>
zu>^_%dH@32uV*5A(5p+NuS7=D={Pm%43)^pbj*DgR~lShi_>XJxadk0-2cnI#WD9<
zB2@rB3=thnW+M-vS0$uBi>##6IbcG(N|4n37E>KelEvs8AdIgPA&pYE6b_&p0ckqb
z1XK^^nofI>N^Bk*XsZJWwbiFho6-%)#W<XetV1+)4>QpJT}q`6EOqye1V>lcBTXum
zxd|UZeALCqDl(d@>u)*f4tQ>%18+5S&x3=^MjN;mpoRq2#<Cvif&Dzh7udyl0Y{kt
z0XKuybV~;N2o=><Js;VGUKLH&`hPa%kx~bSnK+C!xD3_Fw4fSbDp-up3&QwHT2Rp(
z&I>n#QO5PaTu(%8ZX(|k%r%I+{vU=R1I?h%;q+&21*B5f2Gp30wb^VyE(NdviJkFP
zCYKA)TaFgHNdfDFT}?o3IKuV=w-B*i6p0ri;Fab!8?X+brUcZcauaeHfJJcwrkM-^
zZUL(?(-dq)suCG953*8jqpdb%)K-Z$Ex$J+o$wh(GPVBlsyCs+FZSiytX@U$&L$TN
zTdm{HfbZc<7je88$G2O@YP**<Ex#L)3vqTGaw{Ug-{AMX4UCh&yd^jbzuUp8LJh#K
z2~i?#h06AF5Q~wh5>cTcCIgVVBnYTE71D0E85w~6wMgtPNjIrfC~N~Z1iO}a+VF%O
z2rjfFX279lsSTI{s3ifl7u<{t#(rtsfTK--fY;f8jlixWp!Ne{uLQTOXgqz&T!U_y
zS#ATK3#c^#wdON`%?JR?;|9Fiq;r+L46OROCD<zTvzF;ihy!jVl8NxY`(C4V!W`0H
z>5}C0AB_Rs084Ea3OEwW70Bt`Fx;%LfgCXR)9+f_nVBXP`&Edq-`zzmi$K@g#LffL
zhQzdlZ$r|tzg~xc*nKy~<Zg1zDwib>u>`yxl3K?F9u00aayrQoW{oA;2253|p{1I=
z$xOz64dSnc`^?!eELU4lH$V@Pnbztp$P@r;k?72VVAfeodoa}rqy>!Y4l^D58>0fL
z>;}wQi|GaC;pv?Jw1P1!%nSe<V+6{hBi94cb^8iHeJ01sdSr}B42q!}Wp2RNxeN{Y
zTB~P{Y5ji+(PuUEg@-+mxCe=pHVfhbW>Nk|$ILwD5m^oRvY12$7vKt4)T?6Xt~4|7
zbuRH8IlEz+w=O^9BXTb!?vFtv10u5p273e84PdpzKI_9i0PevU*whV<nPrpEsu*f?
zdEwM}l1y{x3vliil=Qn+tVksaGfFdaaiV3n8BU7vu}`gzTabI%s^`#_0y(uM(xxSs
zv!{6k$B!YUu`7Y#KV;JA-)pS13jvh&IeQRi^Q<$qm7z_`teicHqbG_w<4vT1S%AI9
z%0}$3jM+s)3f6)riSI53r)QXM=Ho~umZOc7SW9#c5*a6Z3H}rk!Df+>J5JU+W|1Y`
z3Tz2TYcaYVc^dm76(GLby3n$|8Nl$EtcMl^D(r^QF?7SswU)SiZGt$n&OGlU{tP6Z
zv&2J(iJygtc{a=!TksaZHv`tXz267U0hnvSerzh(kWFGc*wHbQxH_7!_W$8U2OM;l
zSQ2t{3na8Gzw9Hi9|AANAQ6B>I@lDtEfs7zO0VTvrGE+g=<yQV#NL}`S!=m$5;YsN
zGJ-3Z4O;c|``3tOhiZGZ4cHd^Sp?Kln}xiJVUfPZJcmIv+S+%=J}Cr1%d7ye#bB5R
zV5|jn0B|<;T4J-2*JA_-V?V|wq$T#{%L)!6E@7>vkpZi5q_am3SNKJgo^FI0Zv*$n
zQ3V2PnO5KfF^FancpUbcF*~r|Lbqw7dKHPxn76^bgG2~qY5uzd?*_+Aw5&$}eu8_m
zzD2@aBmBQ>p8J2-{F(qhnc%|rC%D##9lqf25%>@i!R;V>NBuw91|9`)UsT{=R3P<V
z#DV%>-80Ds7JeXsHQ4U(1%98PA4UaEV-=e!oYwy|fP(>Q4D9p)KLYe|6wm=U1t8mL
z(r9q|qX?`2{RGu5(=9>S4<U}aaF>k}(vJ5TK|YTP6eQ3zfagJQD!?}5y>i|O@B?g9
z!F5F<!Jbh$33i3WOb62zj4H7k`6M<bWb)V@F1MhWfGz-}D(pc%jSVtcU<O#sY%uM>
zD5HCkFXCX*!1M<L&!mM*-@UF_k_KjG^Y)1qLW9ub#{F3Pym8zF$4?%9dXnH+0EhW`
zAU5TU3C_c?50`RxVRI!qjXCL#jq!B%qmK;uBN`%LBSqTB??H+**cWf^I3|HAly3ua
zC&&#r)CQc7^S%VU8=E0Epmg68P`U?v0lxz)KmLFYi4XS$oI`>t?w2;+NNs}3aF`9a
z5a*W=@E(jt+JMsiNI>Zx^aVUhKsoXU3=(jJFW_tfs%O5k0jt>rufm37Lr{;pG40)*
z(7gsVqb!ld1icg@_W@3|M5Ox}BGNtNBXSHM3g9Oa=2YZ>W4&=dI=FD5=l^1Kb?G)8
z0rw7I5mKK$jC_rQ=w}grYfKp5=8G-^+-DJHF)_MY2ReQ2!-2Zs5#*cLc%y#T5v{w$
zX=i>BoGSS!Z^D^x&j3v0dcB(kYD0VzpfcZ!9619nhl6cTaE#$3-4Ka<5m!I=uzFQs
z-5g5KWYAZ_!y=k@{m>3|c|F1Dt2c)AY^iweid&_}2>cy^zegeuA^d@#%Y)H~$KE|H
z^|Dx1X&YEo=yBv|G*%ZuesCJ4>TAI&)h@u4$vlA^iv~wwbEsAyi&eGm1FK3si5!oE
z4MkW70bdGOB~xj>oa-HIb4X}eiDKpw+UnbC)Mm|5C3NZVD~^9e+y@V0n~UTyPA;?d
zsz4p=mAy{svj3^jKI8&|WZlmKlywk*GS<;efS(Hiu6-D?t*pH=O~GDS>Yyb1U!v^u
z7!NJ6SEZF)0t=1E>%qU~K8)3;zaaxiKl1H87L;&b{<-nM3Eu+;$C~8jV|uMz*2|oa
zL!Gwo!eOYlx3#k-xR{MUw&=8<V8qDLI?6P3zx5x{akBArn)(rC0?O7tr^^eS7TECp
zz#`6AZV52XB+sndb9OiH6S-QLB0f2pn6~%~(sd6&G^3ZY=17SAl?d@PG8Q`)k#Ogq
z+2}4^S%i5TADE;Iz<&ddJg-CxP+`EarkxM7(ues6%$UM>!gx19l=%fd&}tv(B%sLi
zO8kLTi4I`Yr@D+RK9Tn%$Z8+*5STb!YARumNsg-4{upn9sSkJKAT>t06EKSIyJAr~
zuM5VT<jMPcOg-Ggbh3#&h|l#sJwmS&@f4(d)%Gez|GP{&`am0fpnn0yF7r5Zp%1g!
zhe?ESRGHn3l}u+J<`ys^l}qf31BzYdai$9(m05H7EXu5T{2cNy<g}vwjzrBIUzx|7
zi@>YOcY_~Yw8~!p2lxwj(Pd7Ep(}<i#b)pj&aNW^5qSeCfrHb^>sEKX3HmV0eV8(O
z)KBd4jyLD|K&yP9vT@5h-kj^htnp#W#j9?JjOqbWd3Cj2n)2q5O)j$Uf0Wl7coR(W
z_+>pN?(AXe;zwoPgP%LP7c58xQ<HZfPOCB(0jDby&^7QmyE}%nVo^Ac%tVu%Ubn~9
z-FloBD8SzstZ%FHFa*D3ij!o>yh)~&C{OIZn_wFGKwo<x7gHHb?5>+&8u~Cte3&X=
zip~uBpZGb-xQP#R%m+#a6nkb&Fpa^e>$GW=qwBO`y?{JV*J-={<BT(7B7bNY2kNtX
zaIn4m>3x<!*@$0c^C%Xj^GZL-B#%7Y{i?(srm0Qj5q!>N5Q@-SNL3s~SDF6T0yNRI
z@PX$0K-B=nF7rgw+=p4@!_)y&w9JtzPc-Fym?eA(uYg?;Q0y{KG)aI|X5E=7PnmUB
z=0)U%|5uqOnTp_5<$J+*qiYFeD{A|nOIe>X!uE7sG3+YX3?9YV1hN*914s&9qsxn$
z>P|A1eVEBUOar|M6}!BXOeG&^nh(?vOziS927H*AK1?Gpv8v0Xt|pl(fK(nG5cZDs
z%A>8eaI1`U=$LGhuUyuB=+5q^RgN1ArW!sQaOij(pUyb%!0|5SXbzDr_6+cn_J4nn
zs({qbI@496pLL#l1$mi%)(Q5{Gft<;riM-AKJa6U^mC+3Cr6RxQA5RG3#o!B=1r$5
zCV4=&?t{j3H`Q?(B#FH^9ZpF_=xwA0pxFIA+0^o3M)@!;!4&Ck#-F<Ysbn>MpmZPT
zd_b}Ld$OtH!;JA^+JK4G-%7SN7*$#KszS+>SvRU)Lk|AG$~?scaG>#WKMqRt_D`os
zGE<6F6xXb<r|XKDDdcZ6cnoLV$v{NjLE6SG?-Wzahw0_Rbc!x-L3O8?1Rthv0fzXU
z<CYi3K2W~`5N~$IE$<YQ2u9V>`R~@Tv1XHZrgUdmr2qNUh+DFv({6$(iL(X_zNc`O
z$~4kZ?I6+>0={YFn?>VH35(eWrYjhoj^0Bqj)Af3|9Df%f^q;|3`pDZA*5Rj5b7Kq
zCeE^%YNOd4GgEK#Dw>qHs#;y-P0-1vv?cHY&bmQBJJ$P1uebyznzJqDB{1E=Xc_(h
z=^Ym{$&|I2SHbicNBuRIKScV(1x+?(0I^UdX^^gqnduaBj&=M7Ha#JuLHrTYFRtGy
zrkwVF=A!#)x>4i#95H}a0Om6lzi0INAF_9xOSHch(Ji~{x6h(2!BIC%HG%6L^RxAH
zkl^X~(HQ9--;c>M9-jQCnG_uSWF6=upJQ;K0n-Bq(_#}TaG*b4oC^L|i~j`tc<>rK
zJ^wd8L<TcVIu3rZ4)j6RNjOm7_r<}~|4Cq`$pHVm#eW5UDtL{9Oa3R`ebn46lZ}Jl
ztOI>(Y6cE8CN9OnMgNn)Y?BNAPY-X7fu9Xteg83X*_nK3{wKKoILGAU<PV%^@%RxZ
z36s6WgMK$z2)=Uo@Q=Gr6xGd+p_^s?^7$$r)7MPzLoh|1&4}S_x;f!<c2-Ph(_-MJ
zx;h4OxWszyRsZmv`;@4}aDYVSn&f(8LR}jy>)NPfqn%y<A%NaNQRZ{FRj<;0f|Md}
z-Kif!8HXankm1M(WF#^QxeDQb_qm4Lls17d-)HYOu-o7GY{>6_;q%c6p1)7=nU3*~
z=zr$?QRG=zKfrX`3clvI{DZ=W?saX`?G%oJkPF~_E1yM>lh2T^AvW3u$6@#gm1%eD
z=S7}{brNh@u*G$|yswA*9PHKL+&|T^6(;&<#D5rao88E;ANPJ`^ab)S`g5#<3;KAv
zJoJfFraAU{2;Z-~j*&xNu{BvM118_+?=t)elaJ(~=ccDiDD-U?Px&PcV)Q%UvAq7t
zD=&JqV0aw&0w3D5r|{`ut?FL<6;4#zB*?x?X@&m^83*Vu^#36Kr;KSadjr8FM}7Y#
zc#0QF3gY0L%NXr6-h)g?EM~r@QL{j8ho^W%<bYpY2MIV3IqBZCx<sq{8N%DIp=<Wt
z=^JeGOmdU`U7H>5+M;BO#ETE(zY!G(>P@I5f{XcKBn__<5IO$^N0lq{p3(RrzW$tR
zzAXUr)}RkNu_(3(*>8N9N<Pdao&S9Si)8a1AWEE6owN^8VwLt3#Qsy()Gl(Pl8k+&
zoo9}K(>O{2cRBs8#J)z3p$C>EeXChs#X~Cj2<~<trBNkU<9=ImIp$Ye@_BAmq$@1B
zr(^JimosZho%+W2<iBf7a*h3|wGOA&DOo46>-YHQT~|l`TN7M_zR~sn6tbF(%s}|x
z9m$cPd{ECE_W|1!0?ky6XU4%Lfcw#hyTFH=24+?Pj{euwDa_~^^8-MQ>;|xSoU+5x
ze2dHm6yUWO;xGSiWdH6O&IRYB7tWAsJ5%4mPmqjT;Ab0ahI|}BG|R3*_}~0a{P4hQ
zxIF59(l^Kg=54@b0iO>zl|@6q?~wVRG{?1O<Xsmq2eDIM%kB#LS~f?KchR3?@f}z*
zq&tTA*No!)9$H`>Vr;@VJmhoOABO@RN8UrPMMHsp=;G<VM=wGpd0ohBDPDOof8gru
z#ay138e91Myl<CY^Z`D~)BYAdx>8y2A@c*EMWB*-t%mp?i18<m9_PeUqufgO#1n9M
zf?U`!@8d`#`5=x4%=SjIK7sQd1NkG`DnL|i18I~0pOMAHxE4{OC6R0-;Dq@4KMp_i
zm<9g<)E8zy0uZUvyp~0xNSQ1b%*RGD_}-BXXsWU~5m4A)5KZmV1CqdE$9!z#9S;$(
z7Xg0)pn&J_S{{iPFqMFx*?>QT9YR21e?v0j1k4XP=2I{gZB?#iJg6dnBZHOzDor*j
z$qqyxK>flRE2A*R%H|{zj<%i8_mXTTpFyTOiTsM)qRf;UWs^gH;w=kNujNzZT+91Q
zOb5eTrhg&J<M2)alVhnUS3Sy4d7eV9i=q-<#J8C&mF?JGpeJAcj-iTLgV$P!zo;ij
zr~Q10EUw>CBoo|2*nE>p(C2ZaNtHwX&oBq%=MUsX^lS)@^wTGKy7CF^-NcAI><7R0
zqtNcra)Alpa6dqOY+*OR&nJ76Bw~o)9Hx;waUwd8%tDh(&%F#5m?P|sT&#;oF@mlp
zC?`|*0jBsiS#;b>*fPO_d#FH%5WVpy#;QOaUQ>{$3YaW_sB@UV=2gJT`XvJGBWqz#
zAg`cT*3!=;Yw0RLU|r0t<!h12hIr_8%lv+T8(}VBae!;0m^(0k4O@**jg2Mrv&MzN
zb_KTn;h94}zlkjeI`<<Bi^qB6cqw*~$Dye!-Z;kdmB=Re0l^25%|AFi01*F<bia@B
zbV(R<$H#p#UI%RCC=zfN9I_S!%v-*I3kayWu*?RmhK&L~<g;0b&8YyF04uH0NNe;U
zM(Ur(V9}ZG2>?|0oQK1DNUZKja0ljMlfK9?k68Tk;Jbi7OZdz;YcknRh%1udnqO;D
zF@M;aUxayW%+I#rnh>g4R5;FT;R-CZ&XsL^^I?fe!Ovdnrw@Ko@KeV6X@QSco}bMB
zYLzV&!ikRr*Y<!P1+0$vgrX0mBmM>OI68be()~q8NLL4^k&fue@2Z`@BZ73qW0uHZ
zz;z)~&JsBv`?h>b;8>lqWur)hP5wg1JZVXMM!-Rk_z%DnmV|WwKtj5DJ`(--UA^ol
zku@h^o(2~gev9a74bu(SY4|b9`QvH)@DxV81lgsSiTuNV_%r)FBIXT(J_`|zfCS_j
z^qOSS&t{TIR~OlcWBKF7o#`B*OoH3R876gsW1a)25jC2MG@v3C69}du)sASL5dV-W
z+CA5q+=Y&L);gGmgT^?hOfcau1l#3|_@yQre740e1m6sNa>zR93b@-D9V|2HILNXN
zR^Xr|4ysvv53s$@h+l3}z~5o<8^NCsK2*ax=nMGLGdfsd@+sjC9FVa4F`3rP03c=;
zI<1BORb~C7o3!92B0JI3rAgA)nBjK4d(rYJ7cF<uR*U$K8f(%hz!+pL-W%ZTc3!n&
z|IJieF`rK&y<X4##)ZBbbEkD&tQxC<&+!vDzT0!`sI9K`JKp+jg1>C*_aFTBnNR<J
zIeDG?ZMvL!d@T#GGSM?iUnLVQz$K7-tW)W0<5ao{$okXCx*z9H!`B{*Nd!~hVl?W7
zDIlW34igs?1azNAAovkg84Q7v0PaOccRAAi$<&js5fYjI4t=00<)3<C=KllWbdmBX
zxGyIcofSZGIK6mxG)_RE2I48Nb*Il?MRZHd{W!yqBWE`;aW&;?A$Qtx%7->qeFx}9
zFA4K5w$G7-7T!|G2K1V?($8huq6?XZG~9%f*z<O!$(`?*n{4o>0B$0<oCT4)qJrm;
z^+p@K7q*WQT+4WAWHWjVM(NjZg@cOEp;P}b$KXvgmVucX*U~7!xyWt;+>7wP9=07m
ziNiTFpD+9(^8$x;)XQo<*mh(kY*S<tph!}YY9_dg(JGTcQa6jJ`oAB*y*SoJQ5LzC
z2KAftTc>)#BM!VS@*ZznR2MF?>Unkx%q=!C`MQC`<i9@hfK5!tZuc?bZvQ)cU5Jcs
zvl%S`)PalyYJqHyo6+?qos709Bm4~n_ZSIj!#M}pW=TlD0TR+RMjk9q{e9*21|cN!
zakkZxSO%yQBm}wu$&5>4waLB4F}H)OMpr)1KfA7=j9}U$H-d|PBF`z}haqh4slAqG
zd}_f-FxzpUfueKkZ3P2GyIV1nVKY(NR@!u)ZH_#I^T!eX*Mrx?dstfM7k{;2oMbE}
zE3oQToeV3HkZez={|l$yB3D_oU$2XyTWXeCB67AJB06HWLY^#2q@bed5V#JEx@Q=e
z$LStzxuHt@v)npU!$zD**BW^WXR*2`o9<Z#Mss;Jm<!29m<y3DU}BH@H70}ZS!qe6
zfqO!ab<!4E1zBN9s9_T%q-%pb9fw3JB(Aq4Hh}2@31L!^+v1W~Yf>Sx%90pYmHIzP
zDUtzIMb^-uJ{pI^oiV50vKZa5Jce$CS#1-NubW9o{yQMK{)7tpU!xvo>ug3h14<<$
zfi6b2#?5G*$)4w!wKk(E;GQBOoeQcX>n#cCZ-In#osj1(iPPqPHY9GaB(?&&7!m??
zN4CW!af1m!V&)NVfXxE;G$eGksEN!#uhmuhEs&6|3-Us|`JWDn*?=ko+5xCLBn0Y-
zY>!J~y~&@;mzFGv0Q3JfNNAI)jm$;Qfy0qL$aXB<44wb(#$nWf!`sPb1k4-@Dh{Zj
z1<eE~Ou-(`l==s$j^s?|J4#?Mci!cny(~aFs`o)|26Ng1r2gN7=vjgDEb|0_ufseD
zC<R$yNl1S#B&3^#+!Kd*7!va>rUaPA7Bkyo{KP{M<_@4mmcSR3{0#_b2Rsj1h+ZqA
z^lfJrp5v;rB3?@y3z(y}3uu$QZn577xke;*fLH$4ZP@V9l^kmB+*`0Jrp$FhX0dht
zt@_#Lye7^=*16hxSm*t$^QJg`$2#Za5`O9`x?yS9b)MjEvl~qAOvfz2ho-cy6)H21
zMCJtIt-9`Qhm0ttjz%kE%;{Ao9cLQJa+bwN)<s8e<RD&s>wm$ro6CSq14A!3AM?+?
zE4-_wOTgR$W|7YS{)bMVf|<sH9_M&9K&{t#1bdV2(7xRO8I4{gm%cq!k?u0&VH`%4
zynsml9LJ2cxFg_tSzKt41xA(A1tz#<ywPM(#xa1Exz27o$Xv&^zQ}uVGS49M@nDqs
zYy5K`nK#0|Ke9`i^Gf2Og2!z~GT%?;;{Yo2e1g42<~l4iK_=P^r0+-u(hWu)JwxW{
zWIn;-z600C;;wXY%)j`VZ#1c7KG~A_5pWJ<bhc}TOhT_&A^jknqH?cpf(+cLHlW%D
z(<Xb(ibD`pyw7>%e-j8Xa@&mLy|%D7as%uGsg!Ptb^a#~#`>Js#rbsWTy5j5bIl4l
zSO4c(=Lz`MO^P06+7vt-CmY|>zyoz}%!M{{oYo0OSH4dG)Fs!yh*q;OXZb5IR#*H4
zBA4u2VAoRi=&?!C8Sw-@l+9m=H?1^-u8*NxZ3bBK@;8KcR9$Bpy<g#>NNG62y3VLb
z0M`92@C3jKWGxaJyuw9gU4TmmHwYXM_lDff5Y;JZ0P>8REd3wJ)*;)07%ST^K^!1k
zor2CsuC&3W?@Vy%h9LWFaNjCd7<{&423y=waF<xzP>XZNo%Ub;cdxyaZGt=HZZZJ|
z-cU>CE5L6<MyH{+3T&N9e-Tcl8-_f01~Ms-8E$dM!1c4Z&~OWkKMOXQj2Vs@X36{v
z_<6`&0InS}(mIuX7*3_rlspWXc-@)7lpNvVLe5_RCt0B8q=51IGZ)}h0MDZPb-q6~
z$~)UX2Bs9%i>x6z6Zo4O*x@cHyN-0T{$og)RP>l9Nk4HB4N^KN)$fNP^A6eRoYoP!
z7(LzMNbf*Wx)I2JOEOl!XU}p>SBpCi?sAJ8X>mm=&HQ%<OoH3*n@uXE?rzC^4ftKi
zbOP8J>1Lfu-xa6QjY3{J1DOD1dRpA~;QCwKRTjq|k1w)~`pDdD0@EGS!;<+0@C%U9
z8UG@rH+pTWI{*L8w1Pv_{9hA8w>DZA%Hh_y>17!z$OsrJfTq{4c#S)VW;>>@#r+L#
zip6Pq2^Vh+r80*4*rdME`TrnU=``vfms*G4;IJDGrAtFzIYWta>9R{K?gwxKEiT>S
z;#WA6w=AYP<}%CXSHSxrqbr!MNI&aT`cXKQPV@Y?GgKvYwqsgZ+&|!^S)8W1aM4vM
zc&tx=vb3~BegpU-M0DZN4LRRBls*lI(rFg_eg-1B47}DBcM{wTi_<I+&JywNWUx~e
zEStGx(AE<99pFn4(Zxp(qz!s)I=V196?4$t5Iw*cU+YafOGN%gLqxtbbN<yg8sZJG
z6f(HL0#5;)Wr3PH{}yK82J|P853Ykv;16&wlYlN<dLiwtKk3Kdub0k$8espOp_KUy
zuuc~EFTgnhg3$mIF1k-6yJiMNI)Ve@9#}J}m=3GDJbxdYw))V&{G+`-ZC1csh_RMd
zUAg=@%DZZL1H)3ZN0wGS%dInZ_z23blW%G5f*$iE>31*kR@RFw0i7Fffq>48W08+N
z0-?g~7Xm4kfR4(iAfRLN2T=sFt`6{rhCBkY>+B<N9(o<g&OtwyDgk#N3)PTQ)48<O
zp%QAFN1KjglXU&}HUYjs&MM)pVtisDykj9Z7x%UHJs0X0lh2l14=0!=De#*%!@B5#
zI8H{ly%y8fb<EO@VLrnD?x!H!Zs7>YJuuV2l1T~ynhzVDfF>g!V*NGZBja0=Z#CJF
zs1I11RY|b#F}QSey8>y1UfE0EWJckE^cn7oH>3@2j@p{iCVSm&4}D26T@aQbdku(0
zvVSz2RdK;=_pW%5+wD!P<I;f7_c_kNapV7^?Y!fosM^1MCZR(}03~!b^d?<JQJL7N
z3J4ZZBvI*zC>B&?6A;A?qDBEVq1kv8QB*9LSP&5uQLL0i5kyh3SLD6EGiQ^@W)^?{
zynMXf<eYQe?bMmwomj=H>uD8_L$UPgs=Bt-%f}~jMqR|k>w|^<v+KCoebm>y?Wh5#
z3xpr(Z(SUG4j3G8`C}c^2_sZR)b7MUe|Mpr=4mRQW4og>Wy(%9%C?c6VSvMtI@VE@
zcg9hbjaUEgt(XD-^oxgS7ODkwjD;ouiiwNPV_cYeFcDJS0OnzGu0yK2_EVTW2>+Fh
z2kaMeZ_?BSY7SHf=sL!Oj!=`4?X=#HgpY@XCY$kaq&3(OHXnmJMTy-3_Z`B2Wd=Qa
zcbg-uy=Yya85q=ddnxiYt$#zp4K6Yn7)-YY8^IP}P+P9peQ>`c{1<0%fvIl|>Vj}4
z_Oy{r(e>XqwEhbTH@Mgou=+K$1{(t}!Ju|CVDl~9-w6N38NA0dum+ES$-$sDtjmxe
zX#5Z2+av$_gp%fFl6M4q&}3t<u{GEPcqs<8YY9F8co5;gQVzV3d*h~&HFzY<EDUOc
zxf1zNjhO|Z0Y=S3CWB*EQ){p(@G=Z)lM(zAumfDt;C-fvHCP|!Mhs@r{%Yh88Yd+(
z$e-^l;mpz8+KV;=UXDR+CW8M2P7XDgXPN<NYE_5%h^eKG<YL6392Kt0yE02uHWjIa
zp18Hen<WwMtnqh!{P+J-fO=U-x7GxOO|4>XP$dItg=-Ge8;0iI`u+wBQ4)2{a6)w_
zP^Z=HDkPtPQqb7~(E?Y6g{aANY(CtU_Y>*=2=7u?23BM>fIlWO?NDQpO1_t_%Dbaf
zWxDlNB|&8QI9b&)wPB98%rwhX2n_os3#bZ^BI^S5%w=Vf<q%nQ>J*vko~2I9>ov#{
zB1^@QHa0RoSe7&RB0j@)%4_}-JB>ejVcC|jC*jO^xar7J*gA00Kw7-M$p@-wp&mfb
zVNfgHwa7A{y5$XejGpsY14h>a*TPJk6gFV*4;jSs{PSOI3i?y&0h5BV7DOy%i>GLx
z=EgCicRcm!<_<W~5y2mC8s_urxO`Y_l51*NM{a<<EYy+trCek6zi)J5pfFZ;w@I~*
zNZCU;qIG<6z>!Wk(K*DCY#ga=*<9FsSS`s91=ucdU85mz2JnwIiaEenheoj|#Nh=Y
zWsA)L>xgtM!4WOh`PLEdu`WGuqGyOBMH5}~vvuSa;Oj#jxhKTvgCS-2nxCvA(zOgn
zy!C%cASFlR#IYfcIOAROtA+0XUIDD-cv*n$1$SHsTQJTwzrbn+wS{$=1zMc0LpJLC
z9|5W`3#K^ccMBa0^a8U$i_`VUJB+#vn8392W<d_jZ<gr@b25gsR?I*)!?X&qlNAN}
z(?Y$0Uc`_ViyM%4fm(+ca$){}(R7;+Gb^-P9t!D#`(i7g_lPGge9^yA)Pk%?(W9hQ
zo6aLhY^u39kcr?+xv7>t&NY9*YSD_o)|=)pS_^6SEXVDG)McPobw^OArFbTCGP+Jh
zYSNCQU)1wNkJA0eMszR0O`#DzT<S#GQ993yNFS+Pi^5_SqC++y>3N)hw5dH72<UW-
z_bm;G{?7;0;*mMlHUHX(x&k*2MDzj?mDq?>*MvGP!HTFK$_A80<foQ^ifuqzdOHs(
z-F&=yt+LvTs_OpFT7J+HYIi5U0Sdmf23w(^J_faJ>w0kz#sVqy+818xm@jPmp0sa9
zdo9^&e@2Ld#kBv5_EGgwJAeiNni4t4nIQ&p0CwAmG$S?+_9Kd=UZX!4K$iVIR<A+3
z*E-k>K7u`F6Ysxc@T?FI^D(%`0-XRF0ca}aBEv!)bZNiOwm*UPEoiS<r1rx@^k>oj
z>#yw-Ed~Q@;LRaBD*Z0O<KXX+xZev=@GS<@$)Na=3^t92Sx9X5oQL5NNKDTXdO8C4
z4V<R$P+k9Rjx$_LgBY+DASdqke18AV8XgLN0fyDjvjc`lV)TMI!v=1@<%YpsC{F3W
zF#wJNx-brmOm@r<fV%Z^HsFVG4!dy<X7T&?){xp~VMyIDCtxQV8y8if|6`FZ(Ja8w
z`+zM;X%=9waeg{)0z4le_$)CN5A!lo2(!g97r`6{qfE?2Mkio$U_S7eamKv_=y)Jy
z;Z`Io5j4>;Tdkq7Fuh?E(e22^2^be<8;r@zol&WI^h18E&#up+X=aUOoddlDH1_zg
z@%;QTZE7=0CSc-Rj-ofy<|E29%v4r&j9+#q<*{KlgjGa(V!3<;{aM8LX~15t9v`Np
zjIjnM&`1V9u?8oh{5%ZaLYvR6L6ud<pvvwF7#xa0=?NNiG5A@)V9r=SpJS~(@9|+W
zxWgK}6y+l@cpHkhTZ1aAi9wak3m7~LgHjYU7$Ld40tTZPRA-E{22te&xRW*%N4&>}
zoum^t|IlZOtRt7vawLx2L90URh{{rML}l}lSgMBs%0SQ&{WX`@;VLsf)U`A0t%%ns
zYel!|7`ts4lTCegH!=YQI!c^DYpqe$fvRzO=fw<P(cOD|Sk5HJylEBa2)P3V>VgHx
z#Bknwd{{BgykR*Vw7!B<ChyT=JLxcB;W+-(hy`^T_zqB!--}E>441_Xn)PtRlBWA+
zNom~mArSriwk=l%?u<P?j4!@;ksi9{&t9a5tEptOfrhPZs@9C~vvePZFU9cXh_W;c
z3s>-S9T0tahW4hIEWKk{W$6`IWhf7s63^<e9fV!rjTTm_-UKF1=6>X|csP!Y@D|f%
z%PL!&VU?)|kf{mSXw2&zQoRXSDN|~`yvjd36%x@sWaB;R+SqKgV3|=Sbp3M;il-sU
zMh)5)A^8OHu5GUjw4}YF*QJKqUs=}Pe|(q$c-jJrx)p#T)<ImrRb_x@pcrRY+4hRG
zHSHCpPKj!NbzJ)##=|qTSEdzQBNiGtzeqRRj-s*pbULE`97+Y##?)X;{IG?C_9aUK
zj}OZ?>4QG#dFIbO&1w{C!(52M@B8^oP0Kv{=vT+Tfewa5p^kIXM_uWYe4JRB4qLH?
zNffRLDE*~OX=$6}7-%`x01OnE^nMF^4tS<#h@k@WEIK1}-4Yxf-d|C3xq{q4z@?85
ztA*#2_wxbB+P4NCAC_;P55VUJ;F-X&2VuaLI;hB=3os)Cj0XE9WVkZfuuSanVfkil
z02&p5a)4qF!U!5tCSHJ1pXyq;=>-2e_(3A;29uym0}om#FfYXpTZ>R|?`frdSQ<?d
zsg<|EzT=OWlq>VQV2MdTxkt}ahxIfs+CY|~vsYg}qJ~3HAh|e|RX%z?Y~c-%0`p1$
z>K%Y)0fnczz`Pt_P6{w{VdB#44MYX=YJkaviN?IU1t>ht1!f(P(yX~0A<de{4<p@S
zw4z;$#Lb*QnwOXYc+Hf>@VA!Fa_sS8OUk8`@l=>(DwZWvmTx0ihO))X5IM3ExfLfW
zq}R*t60<bGJRD%=RU!Z3>0M%$1fV4WXg-F*)4Rkh4=~FE%-t}d*^Ls>G9aaQ4iU8^
zz4=5_fE)vKxb*r1Z>dQ?c6N{Bmi91@e4V^NX?_qr3z<Rj1D9KD70OtDDi$VDwgRVO
z%JM^$CC72TGs{eR=byWGt=FT1A;&B;kJ;=j$H70y4naoE|A_SdgnhTvJQ{%h_92gX
z04BWamYOF5%)bF<Ax!zMqyPQEw$wZxfE*%L7s$CqK%sp{2L1=5uG6N~hOX0wwG=rX
zM%(puNaC5X%&d%}Kxuvm1+&Xf4eQvl3aLycmBGICQ@`A#cX+0Irw_WDr)(fA(0LTm
z%b_Qc#W)i`L$v=Ca@Wc{8GyP5pa+4%)4a^g4=_Cf%u<;0X^v%inVA=0j_F7L$Glqx
z6rSc~W&x1WtQ&3ZNwaRUEk}C8{CAp{o4eta<%i*?m&>y4f5$8@mr{RBR4hxT-Muz~
zM^M(0B~*?)C5Lb(KD~Z+mz#S6Oy>aegto-+^e#7f0jOI5S_u=L-sR@L0Mj$T{0}B9
zyF5N@xp@Fc>Cpk<?J;HhGY`!12B9(e<+9@sxD_V7)v)eurgbk@r-=DBl(`?BZ8>y2
zj?R8#{R78pva}jU0?$1PK0YkpECNzL>rB^)e%5*JQREaLonWs=5>KZUW}yuvAO7mo
zE3SY^I2N2a`ucida$KPw-6JMFQm=c99^K7@D2))wqbO}gO6AZh<Qbd^@9!06ae&DP
zFwer2>u$!Mmtv*t;Q-Vs06h;B-rp<CLjk63fO!EXtiKg(E{w9Q+mX>Oq*?bOA4mEF
z{dbxl;VCKpcqu@^4X0O3vtu5KOS87~ie-G|E^21m2p&UO9U_n;Pa|t_COo~5m{|el
zhye2{Ou6)yX7>>@C%~kaGUUGyC_KH7m>UC7qf&^g9GLL*K4NZ$(QMNB@3zp{<ewjP
zm^~ua|7;yz%Dh7Bwf5_qP}Y{g_dk^NAL}0^pCLQzaUd{_0>`ZabBkqG!3==W>F8PH
ztq{hp|4YnV3q1og5J)@U8e~HV<gaQ=&22VPZ8ZN2jnv<$$_M4Is<tfl_^=h`R_nlX
zC>w+W+OeKPHYIdmnYqI<YhliS(K7rzvN<8M+}v)NmtoG-<HIzUUqIeX2(2)80kImU
zX^_qdjr0+7r&ataZ3g3vH;B=@HKE=|%;nL2-dyxPI(qdumPdd!0N=ArolTyDj}J@X
z675r_>!CN#>XpwJNpejg%aJL#$Td@~p7j{N7CjmxZztAc@{P|E|6`^I1=FkoeQsq2
z3N&Cgq96?kaimm%z8g>ie}(1Wg^$8(?7Z{ee6&!XP<JkN%~e)`KB+Ye1?v0vP*C|l
z9aw2H;IFj&NAPptH4e7?C(ruN*fp6bxW+2b2dU<wKx5*46jc3B2c9(9@K^i1*$ICe
z{95=Ikq-{1L-Rk$>&K@|E=sONi58D9P*Q7>zqHctUb=a~*{8g9qI}ttA!YwF(*wFv
zLv^k6zieAx*%Kkk9yiwql+_JY_E-q_s8?2-f$PrUo>As5?-@CjavTC#Wzv7@9_{kW
zSzUgs`r8#<uERhJRysv~HFxKgx|fjH<HNR-#vRB`WEWD5lptRqUm}$acSlJPH>Hh}
z!{fv1wCr{RI@|I4essP#-q-gsI=`a*xwIXj>+j&BfKu^v&<!-+wxjIvVX4X8W}qm7
zbHAWyHJ>Yyl2?%FSlwfdr@>x+4YQ?LCI6v0UUzZCfK7q@1NP*BKDQ2bFI?&4!}7EF
z%Rw~MT}m}PpL@S*^eU1~d8$=#!T`Ulkn-T8fX*kHucZ*4w#G$YRzTkc=o3?fTueDP
zL_LQxe(mEIzD9*S8ik^LypHBoDXo)t`JI)Hi2nOimy5uZ%w*nk`GjPOcpdqM)>!lY
zv~Mt%^m^c_R;$byc->vpdp3=Es}9ura>~?7$>tL3H0)L9+AleZ_c{~Lc`s>IY$yeu
z_2ie;CGmY{Git8c7#LnpoAf{K?()~_F8@^hXKR;n7-)}f8EArWIsOK+8|U{U3bh`3
zhOo+CpPv<Yl+!9Rb|0S!jKR$U@OS0mQg7w#gn+?FfcZX-*^FTVEvy7H9)=93rQqb}
zWLTLUL*=;mVM@e-)IMz{0czwN0XT}DR%CA?6De29Ro?N6vXq}!nePKpr<AL1-DE!l
z(i=;8l6l(8ipn-?nc@{M{lX(i)wlDEiCod_xJ}ggGkP^2!@IllH>*4EueyJ0=gAo0
zU2#_iI$>N6tw#<J)ep!oh|-R(Qlrc+b6J4vY`Lhw9|3+%044!W34mP#27iM2EsiTo
z`)V^3P6N9Uj=n<LW$P{EcbEw{$IWS;a!wv&cZQE7`uB4fNDChE2a1-PJy&!df}#i!
zw?NS|tP)bP0nxnr8&S0NVIP}CpLT0lZj~4Yt5u>6>?y1g;@(F7fvbUi?}1@C7w~t0
zXsEtc!wczaHQI=rO?j$S(1%q*W$z%tRpL^jIp`_$SBVJvh6nVGMxU5X$hnkjm5`l(
zqO1}s+>FR0J|1TN<@Iu26;(P>eaU!OQ|pGZ2ZlX!a>sK}qDze;lw3h-#lMRjg5~1{
z-XBmO!tb%nHbBo--g!9xpLyO9$8q@(#QCm?jzEz{@_H1VKg}P>3sL+DBl$e)>f)*D
zE}%{Zwj!n~gBBqx-rHFPyh>rw2-lqNI~JJ_GZx3*vrh5RV#5;~N<YA?;2P5ZIZxfq
zO8?if$LCX!<eEZy*kq4%%}5lfJDy>v7n43I+JcOtT;CN|`OOpkvVD{%<IqLCTE&5n
zc|L10(02hGKBX;!d)%&s4K@Gy-J60kToGPl9N^Kw%97w`m;u=F{;=WgWa&ccYT=OT
zo~2GsejrD1s460zjY$8_br`-lrS$80Md-~u5}1CBY{9jwNZ0#-hpObQo0?KTN}rj0
zIY(+sXzbEf-LZ|f7t$SS`#Dk_BVpZ9Fv2ybz|@0L7v?f})qNi$Nia3hT^(71xEw`K
zrfma$R3HybU}`9e-AFYWSE3P7_@C)Khre269e)tT6L9=P+G+6Vx+k|{Kl_2O&@q(F
zsj%wWwHPX)YsGFu`ckg0RrxIw=^7ROiKO5_c;7x}a*6VEKy^U@;B$0=xG#|^aDguH
zyILckZ1%SS>>_~a1n?2<6~JtqpQ8ZS8iUiPtSd0s&%%!ZPX_+P!Z!onk|6pdZ*V_v
ziq7WCOz`T;-SAm-rHp@pq~UO^E00v)6d>yV%yalA6OhR(Sipx1&fv$`hYRM8^}Dnt
z>;!a-0jbB&gJ{FA5yq|QLo)S`-_2mkHRQg>_VxI_ok9hE8-Cvr=X-Dn=HmC6u=HHi
zyh^6Opz9R)r${#{)O9MqjajL(e~>yj65ew!7-zU^x>{$};LK$>vjaxX+=V0a<D6l+
z@H!#MjhaG6k10Vx4-~+rJ%ZsUOQ&^kRzF{ER-?mXyTfV7h<kw{BZm(n={OuVWD17y
zRV5grk8IR8sj`UvGk~5HvbDP#DXT;7RdxvBZkP92)+ey2u8GaI<#d62mSc{EQ#W+O
z`!k7rmpCF{K>K@<;OgC&<zpnlEgjv<TPIs2d1L8CQ$z-iv5HPa(NGla@f79VIgKR*
z(56gLzhnw%e;n<#Bxo6##gd>Uqy(uEaB>Mw>gu!?Z8c7RrtL+HQ;n5hkt1ju8>hix
zT5^_adc$aTzo}2$eo2<pr#q43sn8gq!rZ|aQJKT&tdFwrG4ir0K;emS>Z<?2y+~Jy
z`xj{d7uHopFekv&;SZQa`s4X<vhoc83+v3QDrDsqlS@|mSg*HY<P)4$cN8NhQLY;l
zy(!-_$zRQtReq&|mDLwb25pEVP3)Y@{aN+|^_|!qBGggrs_rK0(0PjKN$VDP&)L_s
z<h3%dP3YrM^l|lbdzEexxo5bhB?>f7&L)s^31q(?$T|Y)P`-QDnQQ`SVcTcZekAR`
z_uKzV`?2NQziKjR-<tOI?dOzk9_tT<rud|#WCDI^DQQI;e)X0TCZG3@;7wa=>r!lu
z!qx%b)}2>ZNOGacz*ZaE{#x2+(f%i|JrfsYtB|-c9~intrev6F+QVz;YT2C4(AARp
z3sM&!WAkeXeiaX4V{b=eZD-mP77~dzgx`=<8V7f?V#aw#+G@=HMBCl;zIMH@kPeiq
zXH|a6)Uux4z@^nu)NvMdRd*_NYTt&&wb0*|p>H}`Lp)$JCwslFJgRz6HOu02MV+kT
z-+%`O6mLRt7pquxgRJ6?R&ht1Jj&{AhTfIac|60hIsVtT(AybSb8M29sB4&H#dwtr
z+!!b}$ztO-$y-HUGr4EErjB*!Gq`&LW4J40IE^}rUDe&^k708(IyHnl;ZRpzHH4ev
z(9+qgLhoLEoa~z<&%o=ZpucPCp+qVEfQnB^v6iX5NL|Y5&a_O*^^kBWiaF>{u$R0t
zt^U&KmpyBAa^iPa`sU)4bRC0J-EoF_6%9_T(v3fZ33UXZQgk`swPZ*jSvO4*X6Ox*
zO@`9p>M#lB;O)f>-oN0qV6VobXOZ%Qm;TSk+axaoZyINqYwBB%KgaO>*5hNX$8l-&
z9QN7uCY$Uvge5cHN6tFW@CRZ8%CsGALSHr+{ThcFQ>NW$lk#70j9<uJRJt9l#8XZ0
znmAV-S&kUCqecM(T`)j1)08sMv}e%u5(_W-y;JWz#zYh9v?#0YW$JVU{{!iP?&FXs
z-kJylVtNjv_?f1qYg)z#zL5m1vG5`AXLzo~`CFtJ<+^n4OZhjUW8syM@v$zX>{af6
zqYOQ+jND4PWuO<~^u{{|Na*+|8srj+KR&L3yPokOj(|-DT=)!n%VaP<lC8b%a0@S|
z{{<dr4JPj24JL#BO#)U%wgG=mMzqR*C&N~$%5NtKl^u`H6AlwZE<se%%`uJg8vt)0
z3V~GsTPKX-ZBsJTHC3&_9dL`S!QR$j;vhDff_|>4VuSbyHoBcCwD<ggRJTf1aVJVu
zb^=OIJWLc0QB<=*%$7oe5LXqhO~N4FF+~JXQ#Y1i+@1XIA#3nNYcO#Xn@lEA)UZ)}
z47-CMw5k1wq)@JfY6Rs6LI-DIU?9@}Orb)q&`m8Hy%f&F87VsnXZju{`pki@Nwv}6
z0+&nl;%dOPO&I-VQ$X~!We@xAF2H<iu#YvEIQn-@Nnh9e2&{ql3G7ay*B1RVa=-?m
z^1IQgG8diw5(E)-xN0dHz?X__6n6p6CJKSI0NW*u;ysf~6hB#m#c)fkL477%1``Ld
z#S{?4uj>EUq2?wIH991;2h^cv6w&EW^9u&}HG)o)NxJk;tKRRFvv_$+ZCASz{zh3n
z0@gYB#sU5rxEf$H^u$)uC9sjfe8q>hwYC5MjOSmGLLEAONB*Ef2L_d&&c3d)lTp$E
zC2fHN=LPRF+vStje_Q5vm@_SNie);&1kM5;lf}ow|ANuOF1)q*!3qAloC0(_hII0Y
z5GH|`d`><GX{;qz*W3}hN6U8kt@gRVsE!TlH$&RJVE#c@E$m8HbRC<5uH9>5co>GY
zDM-1DQ*Ru`hr?CvAs`L7Pbu6*wzRGOjr>bFMXt*4q5oCZ4_(92lW6%a>F1gfI2OLB
zdoSP|A`*Bc;4y&V>-YO6V~A^tt-%twW!7MSYcTPE+iEfxa9_eI@>gLC3H%>@{xuc&
z>;#c#Fv7n8(g9VMpo{1xU4ClRHG~NsPz&kPuWVag11+Ylu7B#%HWJs?&~`U%8>;`;
z(6Mh0<&;9>Ml^0j<Lmi(266*ZjkIU5Me}0*Y5rj8Q!)2S0sJfQo8dV;cwfcr70#DF
z)NT))#*!}ZqAfxP)5{@)>9vruH_SeiDWy{O8Y$J4;t|MboCq6BMT1@QH4I}Z@~$pD
zFJdqy0enlLjzkoiBggn<Z7Gjkps!&-YHf(k<K+Zgd<Ng!fumX4B)FwGK*GG!&s+`z
z-&yBWw~RWSu<IjP=(z;Z!wyXgQrjYr2D|<|j&|ONFTu<G7S;iD1+WgE4Up_o*kem)
zNde3cFiNcsB@Jz6%b$|pDxXE{7jh4~e4$tNj``j?AZ3r@fKE4!k#Pwe$i;#8Eu-_q
z<1ji)G(*NGV4~STTP&pgdnJ(e?g%m=0p!4Z025of=dg5ZnH@}fE!_-CZ|UAj8-A54
zb25?YpO)@zl&k-@sQ)jf|EtsDLkiXZ6q<DUUu9j9SpPo<DFea&cLzD<W4Oxxj5Pgc
z_y#_rOlJUX$xTALVYAIRUeQKg6>{&|d<v-1EbvJ(p?$msG9^JKiq3S+Coozcw46S~
zoYzX)9ytdlxQrKa-S@fGuU&Q(`fKs|Gt&yWEP;Un415NprS~tO17u5kL~UdT<-DY+
zd>EUKM*b7X7DnoJ>a>ZeZa8&nudCj(u%iuWnsqMPlF@%@E~9_jb>7u!4qooEivI;Z
zKcM(S6z{Z(Rd<0^tgBurZj0iptm0Pay&#9K9NyHsns(7!1glJG`F)s7vGPVuC*YA5
ziqH?*@p8^|Od*XG^)EEuL)2Pw>maXN2UOk<2UK<%a=vvSetXUt=$O|m^9Rfj%be~p
z^uN*(zdswGH?0G|0`0{Ct>en~8&;Xh2cS%41CR?6I1m}Ymv}7mC(K!v87L+S6y`v2
zf4;#3q;zR%%_m)2R=Xn?#bwKot+#2b`(N5kn}v?}_ss^{XmG2|Y6hQn+h}M0cLNIj
z<*@vUmdW6I2T0xU1<==ILCa@4vXOFL(p0{jZ8$dgda_DwrjBA)bt|Y-d)-SLgPl(3
zQqW9U{l$oA&c>VBy}rMzbm=YN+JBQ({tXJ&1eEte`FmEm>YlU8b(cfR+oLqsDsRI8
zyyJe}4SVRfQr#H@^e(&t`VxNMO!{AIdPC%Ct5_9}qF80RQ!y5WabB0M`I$Jo%5vYr
zy<j=rFAx{!P$^dg_Y7P$?041vpFG;%`$qv)$2<0WZ@<nu*)gkWsAW{`P65zAs?8e8
zweqQatX=t@rA*Q7#+h&EF6|dhkmo6<$W{5{IH@vSWk)9-Cc2VAu6fRa`vG6Hpstq1
zC5kSa=+?q<aOtYUNp*tLGa@dG53UdQ|6ZV>qEo|D1JS)ixuR3~EF0a6lqtHeaOPX0
z)6Ui$d4+OCr}C9JsWM%DPdZF+1qAo<KE7-$@H_tZvITV+Eih4Z4$-}89oYl7A4jys
zWgzRUBPxFqeJaxx^yEa2L^)n(;7Nf6zXx1rL0t(8Oyo#5T)uT=FWh%HqRpx$vefER
z`6~3OOc%3L;~a@H2TMm~_CVJxvEUDYg%<2%!9<Rfz%93qd=2+Kj%ZVBjV!bJRK6O0
zDsz!5d`F_e#h}!Y5-vy|vETu~*DR<DOM!_TDZ-Hza5PJ+%ec9irB(~m8srl;ug9os
zhJDptLR}ku|MXN^cS7lv$b3Y5c3T#Y0WWhq?YvZP_sZwI{wS~p-<yJoP>0iY$P<+7
z7_RbdL;bQrlwXXlkYl_ztt;Wy<I<-A|2Gl-h(aA2Iw1d}LVLH$&%sNT=>qg}6bAQh
zwr>Aok_*t4mN^7-p=ERdDyA(q0^72`!xGR_FgkyvwZQY2E-Opbjil;HDwQhLy-c0<
z_`XPI6kd(ohbU9+$kd1&*7(1g`TIPI=ff&f|KQ9|1g0(iC}f^>Qsw92BxO-=@4g0Q
zVUgvt4KJ{e!I3N<GTOVvv<-<Y0&}-@U>}BlzyWP?U66Yzr^r?Lvy7=O>VI9W&O}+F
zHQwp(ntLqxBj6hz$j6Z_muN6%Gl=u7Bb#x4436kP+YPyoa*dhIQ7U#YXjFIxaw(1^
z8Z@VxB=dk(co0W^Mxo9FJ&^k;*O*cHNV23d9ZF`PFD$bzhmu7wQ9%w)Tx21gN(8kH
zNv#1|2t<>#B$zkJL~MGUz}EaAb<NSIy7km)&p#7sg}xl*VMP0Wd&bPnOF0P*>J{~F
zI48L}et%$!NcCcrL{L%#CC}oW_I4@h20W`wfxlLG$M)CF!`5&z%mxf=+a8Scr2T9p
zVA$h)dmozI{!!OFWDV8?UV}mH-GYw=yfM_^dXsAnR)X1xL2ac&kdtVCQ$mB=OeO*5
zSbMc#pHu#|oeFjVZw@uM!OXS>D+6u9pmw*RNG9!XNoepRQ`FBjv#h}s*ypjQ4NmMS
zaM4>s4Q@1ZtidWk?_yB9*jdP_w7)H(!H-Qg25+<mYs0>PL2YDWPlvm`yuH}x-#3|?
zt--25TQI2IYB<s#;Esd_KQRslZ?XncVb@|%8<yCCaCeqBsFM?8?Ok(=XD@2pYCu~t
zs9otCWDvkz2@QT~iu$@{t~Hnj`yvLlF^N4BZeFOtE#@|uW|*lDGnUz=C4HFI-ys0^
zAc4)tTce9P*WYTPHbCoGhP9@jjSQvzy+~jg_V)sqJ79G6t6i?!kkSp!dxDBKGP`Vh
zzeimPZU;bTnr>aRoaWEzg@$#XIeiy(8F;O_R@7;?JQrDo@zqF8+SNeTA~~!NcUp%V
z(SAHpo<`gAkm<C&ABhW+ht2eJ%;kIh)N}wUBucICBamwW9zf!wjKEBVp-Eaxn2rIb
z3vqfHb#hvDov72sC8wW8*K<gj)2#pbczp$cR<7y*A2JrSCtM(i#tV_~1>`f6$ym4w
zHUe7*_9e!G_64yc;1(fv#S1K0Tg{a)8Vk*|{!b)LTB}DQ*8x0;#HEQ3`0`gQEYun1
zHL|3o`h4Vi+CPNEXDJKjY8V~EbvQ{K&ETf}0@_xmyL62aeViX6CpqR?8tNFXhNqDg
z{iEJYr<@4VRQ}iuzwCOeRJ+=dDAmTbIG|KI3r}#&byla^ogUD6gVjlS)U2~gXHZDH
zw8{MM)=6P$`yeEhuZNU<Xrkz9L3*WY<rIJaxEd#QFG&|7>u~l}#Jm5;Ze;QmElYFB
zX|9=R9hnMzSEwWFLyT?<DcfMOts~O)B#vlbnI3SYFp3wid7gONVyPp!IFe=ID}h%5
zYmb-_fM17uBZSR7)ioE}Ag06K7aGJzp2JZ^&zV7eUC3S*62-^nBI}5ht;P{;9$L6!
z>3j<(HiS4*aEfcjSVv~SE~@BA6r-EG6^SFaw}@!ft2kvoF_%~;r07|k(1tQA;KbWF
z@J^T$nXVZJo56TUhi$`pFbr-KvZ*SgGJX{+KHV{6t=BmK4_EXWr{68NjAn&8{i&H?
zotB>Ga9W$vjYuqWo7MkYLcGq*bj^6{bz9)hc&&9`UcXQK@~asA$6TLeoxYLwOGBOB
z5?J~C<vLmuHnx6iC7ftImXa6nSi9QINX+A{7~dA+aIWi`OJQ{j_hi_MV726C1=x?^
zK1O0TmoSV>A?coMJ=dnuBW}mi{t(=;wD#LeG=I|}^2^}Wzm4JBktuCB7b2fwEWUdq
z*qdV8cWgrccLmU9Bj9s@_|&=pLoLu81086urFt~7J;Z=}nq!98fV5P<H@Ngl$#YoC
za_Ioqs>r}+Kz&wLSX#E#46}y2pr|{Bwcg8cafqW?7(R>kTKVSE{zbY#8__LDtousf
zzCdCQMDu0L<t7CEceeHVRN(Cs%lb)syJkhK-}Ji%T<~|LbvV5GU;HH)*I>;K82<{x
zdyp9C^?wAf&$Wh62P}y*tn|kK8CH6Ozboo<tYPtEF|4k<C}4OmM)#FBYzq0?o+I{f
z|3@D80~`;i)Lt9_zXAF-4$J{O4}&e?hQXaM&L8EMz<o+vodtEai#ho+PUjHB`Ib2w
z=0q4}W~`RcuM<GU7#eAzbAe6*QdHxS{RtQYGs-d}VEVu)poz$LAxw(AGu&jn0LbJO
zy~3xKNAqLst(`l^(v`5^!|D_@hSopv^M$lYGX7gTeFIy2Cgrs$zX01~xjr2O<5H$1
ztE?poR5lrjjma070Mc(07WJN)l|{x*0c4=2(bilt!<#7PWXfXB-GOsssr<n@H^Dl0
znswqr435KzakM$rI-#=GIH9sjk(d)}aYA}(vLty6;wvc2C!2i(25-lpvVXuD9Ayoj
zZVhIkd^`px(5AmNsIs;gRN0h(!51+oJ+WaTdoCT(PluOSgtuW(UH79k*w7jrNSj#t
z`vuZJz&dgfMknCNB-k^oBPwf;BPyF3aO5Q%k^Yz?QF&6F>6k%)vGm_D!%zQU%39&5
zPFBAT^ZW5Kl>LkX-M7$DQ14mmeA$7^d|u3wRl~f;r{yzK&$PVGGza0;1y>-yl=1%G
zpRx+Pu~2x5W4c*T+kfTZek!K{{#phG4f|t=AiBcQEKT<tVvj-7&pm9p0=er-8sKP0
z&tcM8XKaJxh?3`nC!6jxY;7anHiCER&~`cUJBI&46s^t^fAe!4Uy1h^G{XQs+B&Vx
zGzF)X_A8O!4&$^Nbsf_);N9-4{426TlrO`fW2tLn9cnYqcSeWXe^L4mB8Rl27a@hD
z{1_X6Qoe%#l<upLKWG~kK$J9i5o9s?kF&7S{uQuNejV~>0yxGNlKNhjRrKG%D)Q@*
zgNJ9MCZ7!Swo%IKw&Q${b@)}3`b0!9%3{XH@w8X^)t}v22$Yd)kRsZvs}3RRrlDkq
zL2WwG*Hd=#)_dDq15Y>m+@v>*F3D*8T823hh3HDVga2K6nhzaip$D>G4?N!Nb59|4
zrH{?xjou1nNtB%s&^fJ4XDOE~PSZK2jMb@-z1XB5c~M6Ctc(gu9qRg^G{She7e_1g
z^`o9iq?O51mR`}<8OMu$<~_`>=LDW^_PIGJz*e<vRB#nw?KKuI^-z&z1fUuLgH>U)
zSHv@lJQ8410!%d+E#U#iza^wpv;<P0>gwre`czj=*CKCLP1vV_2SIE%E#TFKck;jS
zr};)=nWeW1lgOS72g|r6UB1xof*mHk$`z4ncSlTX8_9hr{rz;`p<JA*UOwHW8B_PQ
z3NU{Km>OYOrvD4ouWbX+!2pz&FwNUdn*d`-v$7@U>Qqd#Q3m*+fN2k-^+Z#=-8hcq
zn%}b!O*IEFIMs5P*X=BzdEFjBb6S@OYM&I>K8vZ`k@o6FP32y6qo(l<$a>mq5lV&$
zOiex#fNQ*&?Rm(WWjde$HtjC{H?m*FespLP-;YTlWyzEsg{}zE=Ar8wvMNVzLh9m#
zKU>H@(<RV<J4~kl^j!dI1Qb3?c9<>!=0JdH3==*}c9_m$u<v>Ia{y`r6gN$5|Lrh!
zfRr80|Jh^8rczl3%P&U$fZl1+50*4P<U}fDj;{vpG-)Vpiz5%BbjVoVAR$F}AsINb
zkXNO9JrXoh4@Uj0+XTJoXPs?xkd5K}9M{b|O<g~bXxcm!oY=2oKPTZ>d_VJv6t7HO
zR_4rhb0o?l1aLpP9z&NLxeaMS0DeEmck@njgvW48SkTG<)DkGXpGjlD&{F}X4NQ1H
z?=<xT%<2Hs7RI|IP?W(zs3;o(sY~*4ZYW)%c|8wl2Nd2V+f4%)b&2NoNpy*(^=#x_
zx<p%eI&5I-d0i4!8vQ}C%cTFF+x*Xh=B6=<(2+Ku|Gjlu#jfE)p%uD@4-!=<OQNh%
zK-cDqx}t^2aiH%^u}S|qv-z*t%_}H%OtEQ-QuX%(IJkqEqbP4jx)5P_fA2C)0!(o!
z!~E+C6yD#vO!ENrWhum+c9`(~-esBvm^}feJ4}UXjeRcz4v_jz_l`2@yBmpS4)Vca
z`mSsS6mzNVciG+WtuY!ubCSqfd|5JO-d94(Pp2Iw*+#GsWm`}tNA5s+BusCyNeVC@
z1el%)vx`G}_(-_=QHDMWK*uLcZ?UNqF!X7F=^d6{FT2I2Dok7KEXB@QWBongZUQ<%
zAIS-yPCHB$Aa#j$zEkNE?Rs;OPv{bzY8zlMema%SrV^9>T}iVaoaUxFdQg$Jfd8Ez
zKLgbN+y<%8H%>@d5@po_%Ch5?dC`{F<<F)s_@Y!nvu}%<Rj|YQ|AncEQgy>3JnY-w
zkMd3;J%tFvr&Eck5nu)cm{VcGXH$tu2{32)Ow`EGX+YuAsl?O@KtlqCPKT*5ooohD
zVbpcHG1hm2e^cyM?Ch@4b>v^uqWmG`3sV~f>a%<J-#H;^P9kgK${!~EvhuTOr}@za
zuoztv2|$j_NBSze+BT&qxc+m~&Kw9pmj<AI3Df(9`8mK$4KQaW%<dN^`cnX!7JvpP
zOz#)wR~SvF6+|?Q>7=c80Wt(8d^YVgzrZM?TC#5#QyO&ek%;~VQa*hw5;i+!t6hot
z-8%3X%7){B*8Y2ta}qkR%lu}Ul`v<+XhFMI9|<`(AyjPsw9r#P=KyK2-iMr*kSQ^L
zz%cl{Px{?C&Ob@DqWv5O-zywkUV@KZn08_Q<;=a2E7uj-z4D1P$u)(1T+tNucFo^b
z(OQhpN0IjbqY^1XalVQ1yG;rFU+`otO#|<pvXlo-PW1ml8{ujiolAronR&>C2_yW%
z{9_%{r}gf^F)gK;w4WOmVL8XXGR_Hny~OfGdP^`51sXhO|DP1>F&QZM*DBEalnde2
z_aoq6{!a(?noRf-%fGF+1Rg}ey(k#@e^T(Z$woo3RiHOJ^Wiln&WC^HKONX-a^b)9
zdGo&B5?+b|rTl(m^#4i8H>Ln3U!Vj$Z5#iqKgpj`U2M8}VH|zq&7SgQ`^uFu{?zbm
z^Ho4u!%$^=%Yi<(C#38vvpb-xNvN*fF^>B`j9OpWmtJ94W`y25JoBr@-qUp|<v2L_
ztx12WsPQW$jbE+$s?%gI23oKHDe~#u$yeqcKw^(i8%q|)A>)w=$V6lkG8wrP(VC*H
zr0f2#lf&ngK6|?99+bADVi!sa$NLJknq3C)DvgVf&HNslyWMc~s@ZHR15?W$jP%L;
zru)zn!MVNY+0fKq5f{PfQ9@I#@$RraW^r%plI<PKJsIh1;1u9*flnRCyQqMRk;?&f
zpKmczcszgjhIZU~H)=O=rr+j4+I>TLs^w1}z~H9rp}eBPlzP!CtO}}47An3AsF;L`
zhiLjRvY&Epka{j8yM9?d<#ORoaxjh8w|G?&dl1KP*dNCE54-bn`BrX|AJEZ``rmMG
z=Xhh_qox2QR|3Bc{0<UKCcPeb$k(^#dlb+-tqKXO&U<Ry$n(DLa7m?P^FDRx_3Cy`
z_8k$=+Ya7HN@BFgIhAd;WtB|SFYJlOn`ZN&?@`qP!)w1uFUn~0dS;V1tG=1tWD5q`
zlK~m8fpIy$1et>KS0hq>G<xdR;z}UtmB91UzBM20NnQ|RYX#VAqUGUo-fj&TtQ~-+
z$1!Emd~3GBv?B`!CrifrncYmp?<s1WjSW)*_A+Z{`hN2vpoUIWz{&KTB3p`lOgS%U
zDnIdxK*}p8n~$hd%2jtJH`J8@DK1O-{?f8CC;hTx$kc|(^#3)vX9b?0_MJ(8I-~J3
znT^*}U6bATQw;FQ1Xl)5!nhn-hFnJmu0>`dGK#WNqugjVp9DBJz+F#!WxR}Irk8p9
zo%uWfo*V#Y0OgbegM;Wh^BG(_97(~^*T?%#F2~VanD=q+10<OKV~PITd$^Yp{rlgt
zW#TEP@eUM4i1<hpy^To83PjUv4x(s#0Ux}Zsrl_G<yML9uqm(&;Z9?f5cddj6I^VS
z$Y+G^qMf>0?V^{`*J|`AvXhF`m;#mwl|6<8pP#mkNN%<YX=_{*76lYeL7$k%kwVI~
zX2?(t*Drg5ayj%ddgfN6|3BpiMRgoewO_zJi4DE{sI&mjct8Dhl<3s14}vZk&wNM0
zO5_%ld=9$<2|mz)H7uv~&5V~JpY_Uzb6lS3(bF}rp$9BY9}c{Dnm>>iYNg-KNPd&b
zx_GL}OQ@8ARmiOvxeZaIHIV|Vu%x?d-tZlZ+z4|Yj{T4Ja%vY2mBcyK8>gNc&KO$r
z(Q*0cNOHBmes6N4JzVn^iqsz~nz0>_J}G(<Sx>nh450FdC;DY?Q+@|VVqcF%ap2>u
zKbTDPy$z=i|J@1qnw^%hb~N`4E8{AQ>VrQ?o;L?f2Ji-8Y<TdO%m6L1`XJz9vh)sh
zEN-q8@1{;pK8@UkL-P?S<uhM7{U`8d%j%V--?=G4??%`<#zbBxTg%7u3|Ba^#dC-7
z;wvPs4^2MG9Qw|o?=}Ogd$e(mrF#UgMHXN*tb2;+o=q_IVAQ2^7|iO<HOTES>PpEH
z^?w%E7Vptm-?LI=1$eKhUPJDtaU~iOmG{|7m-E59Hj;TLet<|;(@y=qmp1$20#S<d
z;dv?Ky<YP)th#z7b`H*j)s@~e$SNw-*QzL>uT}OTl855(zW&h^5aly)>XLil_RuBb
z3XyqmfiCes-=&#ASFE-HY$bpij}pPN0Jl`j`~Izz`fZp#WvQp}8ryg=jTh7QIooza
zO8o(`=#sqg^OGs*?3!n3ude!-_T%X)=`KPR;!s#uWp{DS^Lz5vy`9qHtmj~3Z+qV}
z*6*%+;q-;lF=XvQeqKu(evP2g^r!dqPh<22>t_jepTN%-eLtVv$ODdoF%{tBOSDxl
ztR`vy(hG{|dE`aP)d?zJN++o7V`LF_!#m+;lX;YDUa__x#rDJ4*0r2Q#P8(tPsil_
zzA+^wT0U977zamQ2GmSlh{5%tGxaBLQi(15gx)XaRXFv!xUcDTIl2ugz-U;n=XP>0
zHtT?hI`Rt8Wi!c`0Lt{a43!;4idFVG!VMztk!VV_dLTAomXoy{-r&uL(`;@<#50Nb
z6&OZP=Xc2ViL3yhu>eoqUi0{T+9!FV<5yEcrj}Z+D_kzz8y?4ML4Xdnf{Orpypibh
zznLQVCGeWmS_p1pQfm=-6}hL(NBub&JY!NXr?H0O9vV+%C~8Q2ggiv!u%YO5a?LUr
z4UHE_$RU!Xu6!AJgmMiHl`m&#sH_;tM_2iw!I<<t{@rADbj=DtbyYs#H*}T2Pmm>m
zVO>=M^B9b>djgK%IG&+FdyS`LGF17nS^C2iL`l=5Hj2&|{e>vhJ?oGsDA%>W%2(U*
zrZkt0tzXUKw4tfj=5-v#p0@$q!v^p_>J*IlH>i`&Pmz^4_dldEZKHgWIA_rsl=gYQ
zbO$IRs+GWssvl9^OjH{I6;)L%R6lG~f0|r^ddl*{;ctcCWciw~DP?@r^P>FV`O9RZ
z;7Jt3mX`%AFB&+H;DDAF)_iYynNJ&j^_G_bc6lK|W}Y>kh4DKu{;oBiO5-|Z#<2}l
z-uT}p1LF&<g2^bDhk`9uL4Du`<rKv8|Cka6=H2kxp|sF1!P`Rk?Z^WJ5Zs~oa~6)d
z*BYCSu>~00YK=9*P?N(NJ7_X6b`QL!nHKSznPytRcOr{17F!CIkd+9Z@5!UBX3SSO
zrW@x`?VGP7_ff%1nkptu^#{@uG~CX`+5^;~+g06U>eRj^Vcds-wzPT9M%9P0q&K>n
zuYJ_}YI;7mWbe0n_rXpJ=q*O?BE5GY#j3p0DsE*JcSP|ER<VoX1v5EGj6L5|?4o!f
ziZ#bCguRP7uF?G!vK%Hh$78c4$y?O^H4X#&K{#cgC)}_4I<Z!@H<86u==w+%&xTI7
z-%1B-*%bW4D=WX`|6m?Qm!_U{t!L^<e+lvu&c&u4GtcO8LdIdIFTk^hY($p;%_ky3
zdyo|gBRXV?I=Cj+Msz&fZ!`S`=q<P$>x9ZT;DpM)L|zU!q4{5S+`x(1)`^Kg3vfcv
z*T@qIoiI)=PRxR1K}n;j>3V}de&+a&MP5bMR2<tt<&BhS4^vr}G0Z;|euu<1u_GAq
zn&+{Rt4%DzCU$ecKsO9%p5H_nXqwL4-v(B{{L0DZ7V6Xms@qSU&f<HKSJ3@BqSY^x
zuQTT~7|0E%Vegc#ewp1}GZ$Ec#0CC+roZ~V1AH6h8Y3!yA#{xV88Sxx3Mu>B+$sm@
ze^<&j;-L)eL*B%@a)Sg%9L7jV2iM#Iq)|T==3cr-%umQuFyW)#aU8hYZ9u2P{V~&D
zj&+F}y-Qx8P!*eSLWSQV>k~LpfU-NS6Ia9J;e?o<kyQzuNOFqWyXNw}epUy;{fQGS
z%Te<l;8ZH)geu;}36*`1yp_O-Y@C<|qfE?zc>pKG{Ej@G(1~QH04J`nPMihzm*&6L
z_V<yis8HXkVqIw8{u9!V2fecL^Z!S4r42~Bwh)l?A3)xX4=6VOOYrO(8__JFMMNa%
z4`g-1h$=ama97)i&V&1VroVrD0C%l*Lgia=LS?@o?**JFH~(`{Hr+Zg7wAEp5cDtd
zY(gh0JDKfVbG<dDOQP>Frpuw<kS%a!)2(c+$>`#m>-Mt#Yn#-`cq!YYPR0KqOM!yh
zWKKKB+<*b?aylQ*9pIl0|D^4sw5`buk!Y98YU`L8wAIN;mj*`>gywqrF8bL-J%{=1
zh#DQhV_iG^jeL^8<7~1s6G#s0e7FpUbv8VNJRaw81c$R}t2N|J+UlI4qt|Ch7UjI8
zseD`X^0KYgW!1WvN;Vf)l^;<jpfRa<5(Dkg^Et1IhR+@6<Z^6#eBhSSg)5`AYc8_N
zH^YAw(0wYp$5`E}+hcV{n^@(YP`bk^=hK-v+pg!mq!lN7%9Fh9vx?(nxaJa+G{nJ2
zn`2<8fBjJj9m`<jzX#37QFBmh){Wg;`V6db@XE>_oT$#j1iWL>4x$`Oh0X%1n8H~=
zWk;d-85GAIGQ5j>eJOc@Wxj#A+A>`&(-9+q^Ml6}(k<g*V&}@c_2-myR9b@(9V|xy
z>AH0ijrcV}l^Lix%0F#56Di002=Bt1%V%=;hz6HZrX5XX-NyK3)o~)Wp*2O3^v4G2
za`Y6o;X5>NmC@~OnceM<b{U0QgmmT7IdtUy9<m7i8&Y=2OhK1cRq6VP0bY!=4vu|^
zGqEWd<>P^+`+rGW*9-+x|EvdMcl1szUjPoFTmw_(pD=h-)*NN)P!=~ZOZ64Ca?LQy
zy$koX<uWYCn2QDnod+g)gRQDl6miX2)|t0}lW<0-m)*$OR;kKAL#fJIptSHX&gA0E
zaLc_1_l?J)w6z7}W<bE1YECZBoC}PdG#xlUAf0rpcd2zhQ|lb+lxo#|NZnU7OhWH&
z9QqciOj|zLozpjqt$yOx(kbgiSvp5pSm(ZvfOY1p4E$AjxGb%7(|Isjh*rQXXCczb
zrUuL#FuZWG@o#|@a%Fu!Z8fHJq-$-*RP1DI-%&7*Jd72>7Sj76Wl7FR>#UT0g0ngs
zR>k5E37jp!*-<bWQyXCPZoLjU-yj!g{nwyU#di8bg>6u_0cD8>RY^<Nj7FijEpYoS
z*Vb~0yRo{HNjF|-op~F!QWS4=+S!j}TBWMkiBgrdL+QrDIFpStuI1i```&WxEtlAt
z8ctDjm!|^(YdKxjoKx^n|0J9S{2E3>mebNl0-kK6*oaXrB04SofShKXQF$@WsH_9}
zHYErm>Ts|uYT=qwEw~l%fCW2RFmV($om`^mYn^!qR)3&aC!HUW)2&XGe}PVwbw=m=
zWzP85|LkU^qdOaC`djb=z@IGG#e#{QspVwiOh4FKWK<{Q&lsRO9oK?AfYHEIwDvgv
zXEH(uT1Y3_?La>R)du=86ynQwFauyTsyD&tYpFU~{feAnomcs8oL5;7bbU}3wYMI4
zLBkn`F38Mq%^(YY2>6Qyds;AY)G1C;GuI4;)q0@g*9z7H9lef(-G)(KO4kE@+UZQ%
z>OQ7U&e3*p3xYrQlSAfyC8rypQY&yLS<-R44%(gSVX5VUrHyO4T1JQ3Vi+B0>%k=X
zjNkt(jb;50)B`BCN6+Vmg3fs#6N$EHW{bB)cc%@%MyN7_lt%uc#vDz#w&>R}c*RW4
z_%!HAnYL(^^%&!qMR0=a5C6_-Gc?ISaF2Fd@y8?pl~$b*A0e$eA2tO1G(N2yI*;)U
zo4vHvpGng}_8@W`<xSC{@~IqBRMrh0AEBeI91q++@lGlEE%_>rg}w!vZlUfL3Om_E
zV0yz;reD!-2HxOj<9Nz+64r6PLr7O+zp`^8jnuDd^aXvS^L{$Uswe2zj5e-00Y;<$
zYnYm7(J5FXtq<jjR^?aVxXO-3K2x;(8W${IC(dxpNfz1<be)Bcu~1mFIWQS8>KdJH
z9;It^vT2I!iR+sD){co-XSdN-m)zQ#laZEt`8p|%r>WvsrX0h-`-nn6_VgA&+GJGs
z8+8J76yJ{lO~=|~hji;}V>#ze=RV>4*O%_=nH<Gi!%O!L_{wB8rh5>&TUp&ZqEuF)
zQp$A{m-6-~O|#1TqI}qSEHFoHC{<pB^0p|TX;&S?8;>T-u)8P#(>F@JeWJLLW7^P8
zvqbIAAaecFENM@<x-a?>4x~__14k94o%KxRe-O3Gbk6&(V$`KmwuGZqN6YPmt42WL
zGAtJtP$`$)+KrkHfEu;(aJ(J0M_KPaMwy<tr2|Gaq?0wG^1pCSWjgo$bQtFfaITBx
zcEQ!aIdMAn{iIu<iM(@2b7w$p&oAKIuR}>MP$%RI3?|y1Z=|Q|*Z{u6p#Eg8j%GEH
zG|Dw5RDL}JU58q4!0qOCXgw-5byPWnN&&jf`aQ-p3mVY5{A&0<eNNA%#^l^o%exy|
z$lcJoR`HLpv!&ST{TjVTTD_{f(dyM*R_TpuPByZN`=hw?KyDDGzvC&+Lh%v68YrIt
zr{b-)(i9|}a?K8v|3g=(Oh>nW4$~D5T~Xh1#c;K*;QTL8XE%X~x*{8}p>^a_z%(4u
z_MD0|unJZFFOH~8$F_rqaU{2;YZ`kT*Z(DewHXuwb#4=w$dL>jX#_}<t~#=1jK&$5
z-zgx8rBhFTU#=<bH1*W(OdQrfO}!?RYrw1g#;6_f%_&rxKf{?iG}g9S7imVh(yVfW
zlPc4BE*Wcai%{vbDrw;w$8ul5rCCnLIdQ?@qAW0_phH@?lJFe)9I!5qXs<p(pjD{y
zBowMlXSqs;aU>T<k}UV7^jS_vIdM1=4X$*hjuZe^wvK!ccqESKG*e$7<(efbpUG%e
zneG!P|8cX#+f5_fC#Y(fpI~mXjP4WY4urQW1!jN;6?3DYigjQ+P(9jdM{Iypr(9j4
z@=8RcGM(>g93~Ul8Lp{jx!rI_SgvQ(f}|oe6HW`))U=NL09YSKbO322j&fx}<vB#9
zG94_cB*;V|XM_~X{0uV}CMrk=3qf&fftLvvs1}e`tzUtT#B(h!+<P)Z*vV3<YmO7D
zt52PF$YYW1%!pJuf@qiINhLWWZ{{8MMl}OZkgFYaGDuBrltfT+5G5N4NjssG>_kbO
zG6nwlFs2Z9ogZN|V-CQilNIg#&5#3>D=R9WMT9ET0VGAv&_1w=5+GnPhl!sp^b63f
z7SaJkP*_&-VSa*9<iEi*Aad=Jy^y^)T~Eg198YrPc&bwpam}x=+E)srXgJ8m5&=98
zsSK28V|l|QnO^`Sfd69j9m3QWE2Cdy^oTfv%xnx+b$+*w{tnX!N3~=2M)tu(_1Eyi
zJxy~8@boue^?hX=y^YMo`hGtf%b(Pt(^cK=)M>Li9(f+c4bj^Z;a5GIIVbgE7Or2H
z`|k@fyZ-R?M$?W2e*LmC#dA^ow^gjVH>lHYbpo;$#f?#Vj8!}c#eL4?@Ok%p{)UC(
zzhIT2LwL4{3~8TNwBNwihqHsxjwQ~Y90#0(K-hBs0BS;tw3D5L{0!6tC_F=ToNO}m
zk2Ppeuo;8e%*6fz+q8l~*{kjxviAPe`L7uUwPR%>zXCN&Xt1tR(%d!wT7!Hq$-Ij}
zZCYY~gKZvau!d7&4gQ6_2==sxoQy>O0Ln;cu%1(Z!D4GL34`xpP#cNhKVe&h8m#Gj
z2}5;Pot)Z^_D@aewB^6jUD7?0JB(k@kU<qu!@*H%`KN92E6Q=!txox!3@)wlCnCjY
zZH<<|;PMu#d~T`khM~FpH((2*)CP4b@)t}?n2O!#_5Tr0Hc{>cjsPd4VGB`emlFIp
za4X>Q14sQ>%h>~?y<E;ebA#{rKI?cx%GNV@wbArJO3>2=J%Pb%8bygT2hYE@PzumI
zqSZR?B2@v}Bk|EjB7A<_GEHGNU`WgODM&TicZfHH9sP-rZ(wNd{zHaZk)fz|uKvis
zc-I!FVu}>$kxmIW8NLIK$S*X!Plj}JLhK>fcCZyQl;Z5SPS%0Bo4~XbpN7<=edp?|
zzZD{D?wB7elmWDn$g~unj?@C`f|N_4B7^xJMt3<!!#qAI%$eHWO$|1pz&;YK5>lAz
zypOULL@H%3Pw_XSi*Z2r^qaCA4#0_lNG#%fd~jUel4<IiE!L5XVV??hq^g4|t%_!C
zlppe0wPK1mSYSdaPjf!7PDs%@oX|QyCg8*%95^$?iJ~U1*$PXqbS;EEYP7$%bfYc*
zl}^9ndOXg38x1x6)Nn|YTK;MJeMq_1{VJ6A;Nw`d@$PoN!Jhvybs1zwbv>!mn%@^0
zjP_wjP1<rl!FzaCq=jQXqP=E`Y;3c$<WtJ%u4vjg3ZDs0Q#CJ5tnvO+xX2PV{Iv0^
z`Thw?)J@V+NQSh|k3(YJGz4ePMq=5`hf_CYH*(GAuzLSFtv+z?K!h8J@G~kEq3TYc
zPMeP+JO^bX;v#g3aEEPQgZ9VJUJF$}<XqauXC$W?4~w9^2J0m4|IdX+UngW>Rxg(k
ztsA>gqzp_#&ue5r8_#$omVpa!U=$Lc0f!9igpDf7M!+ZGwN@#4eIY=6l=*mFWWClR
zb?lJRz1{EUNZ25*8xlkfrw~1|Te~^=?}cHlbOVs=kaQI`b<FEF%F8hP22pCan}Eck
zya?{%kSH@7y5=?O^fcIY6*BFu|Mfz=tXVGAS~}mfPDs&uoX`$9G2p~l92ghkL`ehJ
zykR452I~^AmT3h%J_-<@>S7{(%X)ncz-tw}*7|Tnh|{$^r@e)Rkse`HTyLG0o((vy
z4Q*P$>F6XJm>lAD0bajty{-#<JYH+v9*A5@`}k4q;`9dVw3h7;2K(b9Hhr~3*Tkvd
zy1?(z(gIDRcdWq{C^#8|TJvRaYDku{Fu0NSy03dQ?I#kQrsyCfHZCrQy8?+BUV;Nz
z4CKw$(PNuOx#h-f(O84EG|>DE{;rW9n?7aB^O4RbYy4P@PsX@r&KUvYS7G?-IO7qV
ze$N`e9`3E^q_VkLraZ+7I#37{{Jogp-$jY$y_9UkxVGV|0>-bw@U=+H@q7Z)%9lyE
zK5ZR84mLUkSR?Vw0DK+b^>LnO@pp7r0qTu|-hjWvWzCIqGK=4zv4+%sDt6SBg9CPE
zYX8>^h()LVaWS+Suq8j94(N{a`*aB4m9))dS2`JqF)zbJ=%O{2=?9Yuqpmm$nVx{j
z!OpXLC;5g30G$k^EDT3(NC4$?OZ9ncXb{XPFpB6LBqsq=*w8W0!I(U!O6BI!wfq=+
zZ)4$DHeJwM(7wbphFIqC^9!`8O<035F>NN_1EbALlxeK1tPc%Ub{-NN>yad1Pb@F5
zpv#b#7X$Vl7)!#Hlv&o`1Z(gW8o@{K!3>Rx^iKgVTStas^hz8V2K%aYL}i&cqOuVI
zM>0?({Xs_}Bzv9bNYs0zzPX>smFU^lkx>{_<OSB?FqB_~!Lw<TZw;#K6b!0tWWZnx
z3`$SXU_MD-3Rjuos-VW%W5<%H=Y{V?9!{v|ZnRG5ttC0JoHns+oE4>zf1E<hGRoC0
zXXD7#ICn1WBb2FIRCXH9sqFlKb1hLM{W0gfhxO&)-3q{%cXwFt9;2)k%5>~Jk}n{s
ze=k68LV@lIoIxmhxD&scGaQq{i}`d_n)k52Vp9C5<#irv3a@S%h1^`mmp!Zx1>QsV
zB8=%LET|2(HK5XaAu_iN3>x-0{W-(OEvN0}C^$u)jogxui?RYYaLoSz2_{Xqgkn$m
z)6XkyxiT>CN*WO1nKsgvCq3A!6~CudNj6W>uC>k7uOs{{jmGe;7`_uxmWE;AE`F}#
z>oBx8`BB!TCoQZr9Rya2vXI;U1I{5+^DV1PRUYYQ=_1(M|AWmUTk~L}idxZqb(Mdp
zF2t*6iD&_pZEQpxSXLB`E;$yUcpegbsFxv#yKQ>~(UJBFKxbdIpC8xW#qoQ6$D^+L
zDh1EgPm_>4XneSo71CYzSYwL16UG#=4q!5Ncbu^Zk>=4}X;+{FCisC|Li;mx!&QBB
z52F5weZzGyecXE)U2*FPfv5Y`chYwhr|dGd&3!1;fjI|-{ZI4vwIP<dcIcer-@q8-
znPk@Y&PCLjSeX+n!@k!riNXg0N(YxIEd`T<ARJ3A2GX7MPxDhgFHG?^*Ybwao%_)l
zp&RGoXr+F}DC()m0|cB!z}~}p`1>z8Y2R=iI_<XJF)!u$q5xbq051f-2)Hs%z?M3w
z$QA~e8Ubbz%q10>XrUs1FaV_lpv6FAD?)|J#9|or>BU5L#svRTbS#k#g-Ot*fyY#&
zI}gL_VG(J$C^$7F-ASZY5e8G599QOdK?5g!-GQ2gX|>ElHjrD;ncg4gap-d7L7Y;$
z<1>c+bSF0eH3~ov0fnbI-N^|sO#{p_n2<D+<#cCufQeWpD({v9g{L{)nFXXYYhDkT
z;LqvFFz3N&fw~uon>m3rH*n@yURQ*};B_^4DKgsUqX`xGscq<_zqqRA%Uf!i8?7SE
z`EyXD`91|1gCebVc}SQezMe)-`l_^=tGm`TH(NcrZW=Km>R&WnhN21oCq<2&^p#s`
zJb9poxd}yDA-&CpMM#U6u4S|c-G>BM$Q%}}TWGH}V-|+*VUT-cD_U;)G^jA(l1#<2
zWXk5EEQ0Y{QTF=jocJ+(1+oIep#u%(wg9u<GEqURfWik_1LxKN^mf3|(=g!!t$}k#
zfY}saR>Q=Nv)IEr8#uQEDLu1^XiY3Vf$VS(ze0ALkg_Do?h5F7DMVLtDM$am!RW?L
zA@4?n7-;0&X`{If$2Or%qxTx*`Gl!%=v=<r?;bJl1(+A&GmU-ExFVSvfVKvpwFy(*
z(3uuswgs3ME2i4BqsXs-QKoOixoDmKrMp*YCXzxjksMOSj^SKo9k?B3gK$7oays%7
z4ulWHM$VOGOw<exKraJ@55z{!HD!=FE5H=Ngb&0<&eZ|t>;Ur`j6V>`e{cs=l-B|&
zJ#&cYxR8CpA?o;jAt|IRnX>8DfjiJO2?w;!T#LLO%MSL-IiQqYAAqI=pbZJL)7ZH#
zz+4_+-cFdE#?B2e(Flu(yt^_0Z7fYsdFK?-3?OAk2Z|@gl<jCo*nu27@hGchW&TcD
zzN~>0-OUGX+v31Hl$|%$Kbg!RJ@4VbLc3kAM{${=`p!%o5VHVg1dKMY8;~stnRF-H
zGWWrZgwdim6M0|fzi>khoGc*id|KZxnBcGNR{~AJkPfL&A&EDfCQkZ_sWl#*SHoOv
zGj<btI`og5ZP+j5J-rG8$_Xh;qU@r8vaa#U(3Kp=`MY0JCw)oR8q0>%sBlJP|7q%s
zL1~1P-ie2=G7c2wb;w6V7(R!aIF|&N*8<GPFyT|Ui8C(1yy-Jh&(J48;d8i&Gd2Ki
z2pIYlCT=saD_j$20*ty&x1&Z+@NY$3O+YhX%6A?4*Ieej9~fj!o$)BpD8Cs6-9y%Z
zB(fI21|-|E@}s<=Gsy;U7rLH1olA8bxgPlpXG$N|7hHt6Sav1`ptS+$bD;3_Hgzry
zFfRv~5}5GpHg%$t1JJ7h=nJ6m^fq-agV7XxfQZf?>+gn9BKi_0d^R<7robqp+9#eW
zTQ`Ca>+`qY^6B%J+VW*hoT1i%#VES~2ejejAo~(J(8w8LnTKIU!Dt!ItrX>*_=Hen
zXPAYS09^>ASuqRwHX+l*ISYmbD^2}<O<APn#@E?a@A4>(Mx$5*Vm9(aLd8v;;XsUP
z??aX=IB;sE+)DepNNa8|=y3zF3)r)`q)WP$H*9~N)hnL?p5&TB-G3{o%Qr`?o|V{+
zK87MKpC2SvWb#c+L378c=bCe@0)1p`CA^l>58)%>js*CKlL0@%^3UoGm#0vm!SnV1
zNkN8_iGuU20=>_@8eV<BAHL6jI?%$&hCko(FYAW;8WiNB;Jg2mf|gD$3PxH5dJFLd
zc#Vng;m`U{2U<A=@S}X*yrKSo2?gr=8<7M5Cnc?&B9vT!5>2TM=!i`67c%<YOSgZZ
zZS5^m<;z-?E7SD{1EZxgI-sm&sInI2K%dJ9DT_E426VLv)zv)41s~Se%q!&Yyys+d
z1G46XCV{7^wQ<s~98&e_sa2;}oj$K>CI(ut04eh8xRbBU%|T)h>-&=|{)POF{DT}s
z{zVQUdYV;bUX+!xhxILRtGg&|N5xi@t{?9!ya}aA0BxCG?Gb&oI5x$G<LGieLQwhm
zv&){sH9u1Q6!b)J?lbi4Y|7nIbli;GRE=9U*7(`54R7ItyO(qyQtmNaCj+Md?*u-5
zpbyRkt^~+`-oyHe>p12#+HvpKsNFT(`&FA;kW(p7wS2z;e%Y;g1+{9|i&nNOs5UvM
z=o?Tm9Tm5N--et{Irlj|7wWiv+3l3eg`>zp6<)jWswAF69QAL)t*MpDp1YNkR;@oe
zl*@g%x0_tTN6j55sS4Z;xJO(v>Gi<F`r0`CP(ZHJ{zO4C@7>Yd`(ScbrDQXZI*ohP
zeKOg1L_GJsy%Ru^X%ibs!H4zvh3De&(j10YtCoS`)z(SBZdKJ8TdK~iI`cr)0T^gc
z24tWT#^v~(h=cRh5k-C;dh+S1U#C72cvxQ>=ZpYbCBW8*mWRuEJ178E3qUpFn9VlA
zN<=sqh7_m$jFWpwu`+u!X*xD8vjiM`SYKP`Oh65tzW}eL-xb+iNH@xra+SBfqAcaZ
z`2a#!>XdTTy<k%=#bqgP>!n;}Wls8KBgj<tJo^8fl{W?+*4NHSpSY^Z<SkXERGo65
zN)HV1hNCM3Z7?o}<{>FWRST(uDD5aKHOi-aobCaxU4W}i`?}@0Xc?FUcys{l5CBtw
z>Xie75w&xA!f9li2+wdJ?Q(KHas<qAq`r4t`bQG|B@gpXQqQ-WmrarJ^Qs(+B6PUb
z_}?y;2`RZ7(KKs-DBAP$<~OQ*BuPDn##$C?(YP<mf*LMB8q$#e{AYO9;?M2SPJOF(
z*U`6Xa}Uys@>I(Y9N?GTo0nOKWucK(K{fjM+D<^l^{BWP{66G(%C#&=HSgMbWqFj#
zg+2t`nAc2R<@W^yJ@Gz1k@vyRWielPpz?|6=zxwo==f|1_w2MR+>eqbz`O`Fqkz-W
zj7uBiA~`i3a{^!$3;s5Nr?b-d0i-FMHE1prSI6fZlkBmTO;RB%x6^jys><iht9;JX
z%5IvemSp;%Pj4NkN6zOB>r2a&jJ8UuVK}A)pFAZpR2RvLYh#GIlBq<S4vtgB#Weq_
zrJ9rSy49_n-Yo-LjF_(J5svQoF29@O_i13KAQ}7|+f$@IJ1@5`{o-5g;6!{pB76+^
zLW>Us?H`5%imiq>En(4^c65vO$0gHw@{BrzD(_XEx3Fz;XK$SqYeRKjpQe>lqOGay
zNZtj$%}y@k4<QkJZiVPA%GUasc3L0g<fiiJl%c{T`XG=#o34(IQ>iRnv3{frJf@Oq
zA9dQInB;ki#}j=Uj3rE*1$hI?Y2$NV)+WI%!fCPpowbfmdsyO1d-c=2+E>+T6m7@v
zu_SCH3DTt`v{m3pM?^{J81QupHrr8lIA8rS)5&p?%979_;P>U|SgaW$MgQ%1C+8^O
zSTCGA#vjynz<L?w%`&eSG|OUHt`gcaM>&p(IaMjublRPF;Z?O-M7sns@eY~5J1-NR
zaZs7)hA0!A1DUvzD7sK~xJ=+rA%_d6P9XYg;2uQ3SQ~uvigU$wc{Sj)-FbDduGQky
zdI2Xl;iQzPySk!CPWD9PWL?^^R(7SUO)VBWwb8$n13#?G`=wdsUP&t(=4A}5vMaCa
zz$#n*zx{96O<gLl=#rYUqIZ=f=aM|<#43lJWAmJocv+w4^x@^~Jjdl_-1AOml|z+Q
z45;EfcREkMc;4wp>59JmIw-YDa*fWT_7O$3&(8QJFYS!f->N^<V<o>=YStky^^DZt
zYpgisL{8P)-@f8NUfR&qKT;1IUCpFjd?@PV+}v&AUrD8r*0^t2YPHlBm1<M#<m_5I
zx7+vkRw`fnNVVaq$*Cte)lBD;rJ#Ps;<PvgwWrTarPj$=`geN9tt<1g2PdcdDLi;q
zYR%f4-Z;`UoA2bjKI*Q1*@NS1*@>!AE6sQI`(bX-_?fAd*rHCmc4%)ttn}{QAM?`A
zN&O?Q+Jq{9@g{Hb#43k)1dsl$JUYcZKHkZB;>ojnK9Ewmy!*8;9#M^2=^x(no4P^k
zXQWm||Du`KcjkU=>&Jih^ruc4S<UQRr}k4uSL#tEPTRA~>QL+CoRW0e=<D7LY9rFr
z_RWqk({r4h)aVKB_?p33l;&-fs!=P=z1`xZV67kNH`5Ot%ZHn?vVZbCsh!!in(04*
z_GVenTe=5r-I!V{<^Eb!IysjfwY=y2UxJl#WKf%Q(_o;J^WMJOThDG7my*=;lB!WF
zM{a99ZA7p$apd_*6H_ZCImLO^C#N1tUGzzcIrRc3=bE=#bY9se=(8f~n_i7tDaz>F
z;<E~hE=#TCq%Oo*)kTV8_8HH88n2;!!y~A5a(;Y!_8DKD8}I18j8yZ%nNH4#izc?+
z6^Umby003w(mbK<2j#}3pU*!hukFJ9&FI8m{GRPSuNqIymA5za8PX?S+pI>lsC9Cx
z&YIb0%fVnQ!%}PH)fiXh4~@F<RsK!&mZ6%N$qbwRonQag4$DjZVL7FASPuF!#k9zC
za$Xubf8cp1#hXiNT#Z^eGjHIyH-fdk^Of9x8*okAruAQWX<5wRbE{V~i`FRE%_A$X
zj7wDQq#kvsb#mUmXY<JaSKOJ$$8`REe<qnkCMT9Kly+1|>`~DQAu&!exhCr*nIv{q
zir8Y`iZCNV1f|wfrM6gVtJIcS%AuB@YQ(PSqD6$-iqul%c^~xd&+mR-ujh~Z_V9YO
zpZRW|@AtaSb*^)r>zv8#`uRUbL%9)M%Ic!J>Rp3d2Lye&_dl%V_xJX~sxkWqeYW@C
z8~(hK(;wVy!M4%utlRk&vdTZuD_%LCkMi9nmG|Uhwx{;1klH_!Vw~dnAc#dqdRtH3
zN2ec`KG$piFD;#3@LlWYvJ1V-*{i2c|K?~$mUey>&RgJVXf*Yar|!<hzg`OK;P~aG
zu+;-~`g#+XtNsZaxz(V1T3Y&2*sUY;UJC0Kr_;~PVx0XG)@-APZuWwgi`;G7`cjyG
zCoPNt_)pl-zdZOh$ID?I2kd?+EdK+YzS4N+!+*kTY<e+l{_S6B=~u-wUJAPxrG+t3
z{u4H`Um4xZQ!i)xb-<06!tzJx^cBZ2a{mddyTnr`e4dp4ldiP%tl{KKVQzhO`jC&9
zHvcW`iifV^moN9^ua|bd6gH%bPT#RV!}_1F5luaGkN@KUd_8^TOJPwdI{k)OOq~CO
zEnj2M?OgNpr6NB`o%>SQnpiE28TFsAOYQ%Awz1~5FNIBSqtl<u=6v`k%;xaW4SE0N
zE;&?h%}ZgXXJMQu|AbY2$hz&HZsb#G>D^k3UkaO<NRg?`#s3u+nA*ai>-h1@JvpjD
z_Df-Nty);sxcs1Lo+sT_89wpkf3vm!&GU52H_Z2ZtlzqnnRu09mZyG|VR?C!3p^i}
zTe;EG;H*7ki#V0gUid6nDz@j%5Z6`|i<Vbsh*N9-5*@o=6$MuYh|z1Oi3!moMN(v@
z7+1Vabnm=S=%#!onvCf#CXZerX7<=FDtuqo*7fUMV&kdv;-?x;;d6hk$Sj#IY91Uf
z;(pE)VL4O9nhl?c<TrA}km!Y?%Be5Jt`FKc#g>4@B76PMVp;eAQ73+b=)QcDIMuy}
zaP;pZ9#0=6HnmzV${8|6pRMynZKEn|u}j3GPuGe)8}h^((eAdf{S`6PW1cvEYk}A^
z__$c#W2zY1(ka%}*(XAFb`lRRoD~@jWpS?02jbGmMPl`Orzp6yM%?UvQPf<Q|8y(s
zC+F5J^=wV_Peg9_cWiwZdD~j_^0P(0aZIe;RmWD?ypqi${)RYFx3;aEM`fE^ikHn}
zc17FpCFN~%-gme4eRxoG9oN)0$>U91{n?(jzI{u?q?|n>pyOTP9N=raIqGNe%LB9R
zoojE}u2nVKjEBqHwpXcO8#Kh%w&{!NwwjGDIz?E~o3;<9R<cc8^oDKP`#%e}A7X5C
z!k&us-Sus^d)2XBd9RKwc19IjvjO#OkDCYC>bLc>U6_1dENalmc4lK8+sul!Y@@1_
zw+TaKTjG?;wsM~riElQQvE7{B)TSToVVfUtL)g3A7f*K8w6z`TWjoZw*Vf>+(H7aP
zj_pw0{HI@gx_PYJQ{Lb_vEqiP5x-w-TlkYWJmH+!pZr9G{Pd$({qv7vvB!79eDI*C
zW<4VcT;GZ+bAA#B>h2caXOD>A(!Le%+}tIO*IpyWIj@N*dx*_Za#O6XxlP<l-6=e8
z9}?@`b++v@zY!nTy&&SImxz^qzl!=7)``i!?l#9!cU!L(`<>!cBULOPR>n4Wu)%g{
z+S|4dj~^BReU=NiZa2kW>o$ol3%(Ow8-FJjSGXqn{<=kY2W}Uw4j&a^5j#cc?VBPx
zY`>V7Q6PTsSS_mGIVz%N9~5P$KNO=PZiwcepAsK;{aG|KE)`23{3_bzx!ETCaYWSh
z+9zB+|0UAT=0Dx5jfW>UOU2@{m&NjvwQO7S%h+aHYTD*L{!P67u$=93;can0^Qy>=
zeIhbC=xpQqm$Mxnt+U-rxGa2K<!qnzUoGyJG1|=iJZ$|fm27_s4_lS_<Hg*DlC6zw
zv;}7vY`xY#5`~X-wvL6~wgb(77v`U@iCYznwnk~5w#I?qI>oujyTZp`XA{v+MT4u7
zEj!3$o7?9nVM_P56?<M2U4Ay(JdXL;l#x1H=Hv6CLCG~?+VhK;bMCTe{C27M>S<Y<
z-^n{-&#+sf|1V{13s#;K0e|}2N-9^gO@C^%{Z{oSk@#MT$lG7mR=V3{Yw4-8o$Go`
z6fY_fZ|CJd-RJ44cMjOoT%=n13BzwDv20+J(62Mu?&!M-H6>o8Ntt4a$4GJ7JweES
z*v0JY-NZ*-T8eXFv&5diHi*Tm$B6@z*NCyx0>!8vN#fTgO@!ftY?1t9XOYlii+Ehr
zTi7mziO5EuiyOPgh$}nZ64gUf#ksUbV$9J~qC!TdQ#>wTTMU^dL}C5uV(iH7V*AF6
zV&stqqWkyDL^rST;#%XuVt{v5F{pkw5!<naNS*61>K+djvl8RPu^~gm7vByQ>SmW{
zaosGudoL1kTRMr}3#N&KqehClrx%K>{lmn$*pI}>LYK(z+Dr6Y<|g_#w~D~bo<i4G
zaXaYw)HyXy{iKS!Zt!bD=j%%RBCqqcwfuPjylMPed%*b`kG<A#=j$AN6ySWVqkr&J
z=j%QF1{<{VKL7Dy&oTz*$l7kY-nMA<HKFr$C4Q0D`Py3kya4Cx>-uK}IA7zj*9vgH
z&cR0k&euBn)sfEEd-|#Rk=c(=c=BJF{KqG}e0=h~F8cnZ*P9f>W#3YxsgIs7-^ucv
ze4irUH}v`bm%N_n6~C*#7Zvv_Ue)vcuKBv7E%v(Mrc}7;`yk)zx>qH=^44wN%Z5e~
zYHYsYrk77SXZt8M_AK9UJfEn3Hwm1WpnmtR+pEK`*k;lG)%)w&O{OyAu~&xkSw;ty
z%yxH$FB9r_(f=PRIs51H+uZ9<l_^iWM)~I4(z)dPIMvt%oNrTe6w_0$Tl$=QL#dZZ
zZ&-(AuGx6sm{VKtl5JMisF?M}`+ahP-CXj@Xw`Tg^+>yFv?IR?R5IL*)0}@ZFOOA?
zt10xlP>rSF-}+aDmG3@3UfJ%(QO?#es&Ns`wa2JA`QRY%ZHgFAF`eCA@|6TNr|hdk
z#c42(clM-+W+ZPDspeQnzA{QRK7Msc|FcngS#fz)oa)<^?a9_%9qNS%?ylC49BNm^
zY^->tO4r+n?#b$fcsEz;@yTk}ZOC<$YTgyE%=0o<@W@M1yIxePdl|bcL0(2LxQvqx
z@mjc#(RjbgGd6pYdSNH*mUij|KQK|LR<`05^DbDuVcQPwgMAd{#r#R1w?V~RuHq%b
zM#{-I_!!Gq(l1m@HH^1>7fNoru-sVL?9ZQN_&~^J#pGwaSGF^s_F~b;$mU&2bw8~d
zKdqXY#;Lvwck)F0h)rHN*IgH;fQq@6@v*@zl4!HP@sfv*Pp*8A0Ao45eqp|;w(*|c
z5R#&H%J->je5^OkNme`gxVu^{cBtnoxVc&lPEyY+=K99#&TS60lY}M$ODnyrl|4m0
zPXk;1t$$TGUVD3hhsN0r7{swu8mn`)s!Lhd!E*^}rw=GMh@yJD`eGDkL*sZud9Aw)
zfyT!khRf0N`h3H?M&_D}f{*6+jF&vkBhUuwUCm}E%IiIJu4a$o<n^b)Zqah1d~-`<
z8Mh9N%46@5ApftUk`ZRib~duOxtjHkl-J)T?Zyas{Z;T)r2D@$<J=!gW-H@#pZYOs
zJBDsdFHkXCjF&1;idNh4ibTv4n|h`cOZ_;t-2kP!MH|8vZ3wq!2%p)k*Tw7<YP-L{
z(pa@!zF9Vw>h)*x%@M{c`f6v^5~+j1g}n-Xywwp?0cOP<iQgH;ER0w3-D4^~DYG@f
zSV^z%pKnSumKu!x*YG6ml8|rYn^TRK^oAia9}R68_J(mb?PGhR!OdrOoP6U5j-P}1
zrjEwzdY_bV`9`w4%X%bQzM=SZHa;#_flmcEjgOG~Z*x=L?ri)}3F@r1;C-fqk7Cs`
zicc5LyK_SO^nvl7vtguqCfiNdpd3xE#p@iA;p&-o?z#pZ_!glbWmnInqJKrGIJ+7r
zINLq?w+v^fs56<TQ3209UsdM+;(bstKeL;0p1UDATIE>w<X9Rj@-WVNB3`Yi`1CTC
zIx9{^{5?gjSXSq<E=f^mH3EM?ZcFdKDm-9(Ho@ptl_*nsGsiqjcFC?*%qQu-WOdeu
z6j3!nt@zn1EaLCR`FJUreT>tc(~{I#r|689bizvV|1G~NeDr*2r(3E0)e>gg8GVhz
zocv<2%esLcxC`E=q8;GWB(-AG{|^=Q`b){|$C9C<jb!Z|YQ+cOmlV+poWhv60s4bH
z>wjfPPh_SWbDYnT{GO~<tU<Cr7z<+=3$Hr)V(m>a{+Sq<{f(nq^qS3&j%|)sdwt~Q
za%_!Juk>+uIkJ&s^)5$xf_mi=_J*<Ql_cbiacZxVVA*K(iV`-!Sl72#dEBSOtG!tD
zIid(xh7B-I3oC0^uk1#Xfu$8#K3=`@8G+UDSVTm6oO<P7;CT}L0dB@`5q?P|RKf-t
z1AI>4Q3FdEn3VK^Mx|S?>kt%C$zv)t#;Uzi32=*1dyT_wa-!O+4)$M3@BnNLkq7uM
z<P<Oi|6atkK>iuL1+9$!OIlP8`J|Sn#~|bG4!z9KWkXvIb2rkpz`O#ij$EC3qmbv*
z0oTC@(vCr23)&^fL!kK=WwoLD`N$h+*DB;a8hyrKX3Lw=YQkQ<&iYG?dStk}&gzYP
z7r9-mdW3hOtbPgV5l$ED(>OJOUwXDG=(mHP#Hb0|@aIq*ak=TNcZfd)>XOwXxAET?
ztsZg5J{9{s&_7;H;Eh3R5b`YizQKQ)n@({KG3GS&g{&M(Gfw<qw)JL$n!t6!Ix$W?
zG66X*UOmEBLaf6nrV+R%MLnW~4K<nr)I>EQi^xo9c}vd9flT20J=RIkKSpLK98ofc
z($GOz7NGeYOBhJu2|M7LLZNQt7(p(6QSX^N9s5oEtHD$TJ-?1=b*H9i{8egAMn9d*
zSCDhb+z44v`(WZDu#ZMQPaMBRVf_mGL{2g1FwXN}dbc^sckF7yL4vzb-nU>Ul1(PK
z6#H$kJ#wyA1P$f|D{CRevgoz`iT@AC-y>%se@YU5SI~MN`4L!yBDSFKip*~fTB}nu
zuYX$m!b`m39B$lZXcI55RKi9WYZ&X?)4ME>G_l7B<6>u>>TWJe<wSX93V7HduUv}x
zLA+e2g}ckL$}X?mpmSOJ;{FNvt6i>h2EUETa-Fxq)I_;X4!Ad2UTMJo6LFcy_2J6{
zlaSY-uL!0i|An5HZ7ltg<dq*FM?F?DM;aFyvJ)sL;3H#e-#RRMEi<8=g}Ddr%CL`&
za{_ZH;9Ua8Q$PS7+mOw8lu3~5+(7>U1gk(l3RaqqGM+ToiC0^e=w0E)7<KywZsZFR
z)t1|lRrE^0Xk$Rr_Gl`iNlZ~&t_NpeVSWg2U{_oABqB3GZFwB|dc3-wbyGO!RZAsf
zv^KL4xfxAsEZkFspR%jlIg!IpB&yrr1Z&|*Gs8zBufx7MNo|>h9HKEqjxpBNZ&%XC
z7#9OR8ODHsiSRL{g`cH@F>H4j;9CIpB>oO~AG?y8Vcg){UJtht3R{Z$E6hWo-{DZV
zS3;jpIjqjZ+mP-cdluuN`xL(e&|N~GN8AJC?w}3qhyQf^yrR|Z!SK{cP`7)bpFkWJ
z#_%hYy99kT<a)?o{GeoxHI8%ccc{r1ct6@VUQOPt{Y}FJ^>}@7L!z48#!VMe#-SeP
zED8B4K~45^*M+3xw-U5q4*-Mh>hUl6eS}TPYBJMINMG<aaY~|kd>Z<@Now*(=x4^O
z#}|SD$oz?ykl(QTxHF)~aaO$rDW_bB2lyMoUqZSMr_Uf;4#{bfF~^0N?P~G~>_gE1
zhFzqn$CrVD*cTIDgSdg%H`0i)$eH-_jg62o6#pi2N8)&kIz*+>T(&g%I6ugvIGxNd
z$F=6&NK%iFC(|;@tcyo!ikch=_J?r{Xn~P$_=RjIb2jot?0<qavB#3>GL`sa_o2CW
zkefw6Z&^0}2k@Iqg(dKRNK5^&|3UZqp`Ua|$#iiz8o;OJ&8B;P!LKL0+3;1Ma+bd#
zVZ?t&TpRdKArB(n0{(z~9R6o$(XYtmsDqcILz1Z@13eROGJj4!M1^k!e0j9vDRKhw
zGJfBq@5pP%A=$1874uyaDrNPqrg9pDuCdCphBaQ`R50Is&AW`=@yxz9;<?=%F3Vh)
zLmd(FGyTpN`g$+qq1Na6S}%BzpYeYALZ1#Q!+!V9H$Dqj2veF~_ip}NjCp3g?mffz
znUx}*;TT91(+%%<12nSI<A!&>URBIDz3T#zH@yo1(=G4B=S^AvqNKtX(G|3qj9cCt
z@Cv-`9bh<xLw@RQZ*JpMCH0Q?K7%hB#eCO0%ruNoUi3pvSynkeLq}1*l783wq#;M6
ziM;21u%`M49WaEB@W-Jt0kuF|xU4n+t7}aNEb%rQPHUm@C8SaF<Nx$NsAV%fApb;-
zDeM6*v$ap1A9yeKiQ|*j|4X%SpNHO&hBlfl^`SQx{2Feq&@k-P$@^z3S^gNu8m(pd
z0QwyA{)9fRwXE(z-_jzh<=kDN1q84|5}JcQ6G&(!P|0}Yz1e)(BFp#D^dqu8k(0w@
zwH-3uqAKx!c^7t5vmn@nTQv;{bO%>J*bek2tSez}QP5h#BJewm{si_kFqsZHgS`W^
z?;(#S{wSE<N>=-Wi=P$p*n4q(bu4b1skb>agP{>%b=ox)tOEVpv}ZQyl*lLE*Ubay
zaS2Tt33d_r1_|sCIyH~@r`|iwTi~dU<_8Ymdw49xQWGrIQU;WIH|Gd+F7>_?wgyM7
z_6y(v9L^A+jq0fsGXR3}^!+I0ryPw@_{C7nTcoaq|J)2|f?+2g@(p^)pf{Ytrueu?
zHMHU7Ce^K}K7}EJ!@-7K=oJ{|z{Fuw&m!+5gEGubQt<Y1m&}IhFe!oBr%fcyk9U{K
zYXhaTK|08|3Gk47^zomN;t?c%w0tf|7s=W&KLB!*8d{I8UkmwI+Pe+8F-+Aco4%Dh
zfeR_-2KF(}y53Wop(QUPH3q`UNP(t_^lT&Sm*D43%Fv&*CS{b7)*1R}H3fQ6g!u@{
zAd-%zue27;CrIl%6D~@sQ3^23eHLbvCOE4)<ev0bNUJ2dCp)Ym?U7l9hulNnqH~2D
z2CuohLW+{)%SB)`_EPN4khvI#<fO=#yMWzN<emlC3sU9F{oPz4KFM-V8Wd6+eH(a+
zlI6>1k&W=B;~&vR?m1NFQk=D<34MCH5m^fJIXq6al`l^tVw^+1oR5Az8Ms#qiGe8!
zxdncy_yxnja11FXQ%~@Cs@$_YaVN3Q#;*aHrh%P_SI{3vR?_{Yh0c*^J|-eNUhX*x
zk0;c|(m$jc_zgG|Od;YDZLEVI3_S-TSAv0q8e*Zc+xTg5EFeP8!^d-#kaJXS2U}7R
zTje3;K|S_u#B=aNcKyAt9w3c!CP2ddM@V~`af+a|lpRVj<smC1=m5!CABAitX$vrm
zWZb}o6yc{8lMm^2{9~~1Mju7oR?r0w!@q>&r;!7&PX^y<BVTSnoCA4^o{q0Ajdxa0
zk+1%ycZp>wa-SjK=0v$qU3Ztramalp=v?A0^n=`7;(dpFwFI1=B=`9Y{ljFr4@*hW
z5_uAF*Wsa$#nQI&)llS_#9Q$%#y$giQX9F?HthG3<*T0}??{rbdTT{kC?Xa?#rYt{
zy%<Ndk*^K~*C5j}5kV1uAa6ysfSr)%fJfWPeK@zoG2)&0H6V_geDNO1A5a{Jr_ToT
zTPW@jvV%sfAnp|UU+|kmvD`o^&ejxh7IJT#_uzCM<2jr-K}8A4%AjXWeKidIW3W4@
zwQxW77RVePVJB_@`d-BSfSd<;N92jvxqTF;DYhJPJbu5xcZi<RdSWvCtU48^%*ohi
z1RmeCldrmgpVDU`U=t#E-XJzn;VvS6O_2NSLvBX1c7U9zedtROf!znW4f@t(xJ8BS
zksD#>1#j^d_z?L|kS%)A3A@&<zknv>Yj>5*aA}+~Ia%%fi<?fKo}~8X#z^j$tQPy~
zb@E$DYVkGXnGUsgA9tP122bzv=>LWNd$2O{GO%`AwYV2}0{LU`aT~Qbinz!WwRj~m
z>&@aP$jJ`1_)B=|p+AKFU3gb(cXb`5Y0lz~R8S#B?L7dGMKJLz@bVbs(Vz^&AsFJo
zPS_(;)#7dF--3ZxYUHo*Z$i8t+zEC;KLP!E{69nPOy(l|D}mfc$`NhU-Yg~AY=8cg
z@S>zl=_HLZ7V8Om&`vF8^HvIobhcG{&(;d3V>S_fh{C^yZX>kcpx=t81ALb<&w@Qj
z9}h!sFc<&Rl%WjkEY0Xp>`&NPMAqf<8nTDuZlWI_;m#wU-gVKJKx-uI9O>qxuY<iC
zdMo`q9{;(-^JQl_iFAB-S<c5#>HNO5Jbp^DTEY@Iw$P!LJOzDI)Zwi3W7Ck`-E^@l
zkUhaw$?EU`y)Jfnk~;h<I5kBrVe>rpbNpC(#}33#i60@=Fb)qxJl#$$QHG7+=oI3(
z2m+cgd<n!i!B%*jCGiI2We~IWj-3QSAlL%Jkq}=@RZG%A*7U=>qStFY$m6hcR}oto
z9#3#A`cL5Lji2V<0X^ShjqTk|9X?VUA|p9jhJQy!H!@i9e22`(@N7y%4dfGGAo4Wq
z+!e$!D2LYuJ&<|(KXxD)t=PBYcM1HKic`>UhUa(uR#U+_<ho>LRUaGHMlERw`r{u-
zULQ%x{79PQ++kNs_`$x|JxOYb*3uu~9fX{LUl#r~;TegZJS8>IS0qmzWC^~nu+wuT
z{Js6yRn+AJHYTn=I1Rro_+O%)bDB*2FUZ}IxfzU|bMwU`+ELP|m9r#+b409~c7dJE
zhmmR;+i%uU$miH}yd0?(<svVMRf}d~A04F@u^ngqD_Sk;>*lh45~mjZg1i;KhTuT-
zJYlt3W7M?I!3NQ4njbuUK-Lo0viMiUuLXY0e^$k&rft;bFK@2nMfdUOfM+b0h&VOv
z8!{{+ViOU^!R6R%5t)tsj!@I?qwk_sivBioBL1I6scDO`7lQ%Fp)dG8r9xIB*8BJ=
z>5`;4i+)2~MjE#LtU<W7BTR}{i@pYVn3DDs+L|$Hkr8_a9m8gdbtxUeRl~}*V9~e4
zH-T;k`uouLMV@U}iz;A`L1yi3tx36DBCQ7e6Of-EYaKD)sAT$Z;V628ps6HfhsJsc
z@+9ywNwxiE?I>&nJA-^h*Sdz@KLc4Nz56w?H-7h#N74u_-j}3BBzf1arVT-6tFMT6
z4y^a^dw}0L<iF4_#V-&1;=YnumN|PAjn|G!bBd2cp98-i@?7G=iQi9N)>zi%94)Q0
zYHE!K(~!C4u*SlxHF`gKF2UAz#I-_RMBU-Yvu$b-t81%2$K($3uh?D4PVlaF^viKV
z7UhsZCNuXOTFZ;r6|o+{gXN8NA1!1xW=#U$M0QV9i!Ote5}u78>uKV+ep<h#d*^`>
z9G2<$e}<l`p!ElOgw>tZ0B>XT*Fm<VtV`kl5`EP&N@jU!UXShYFF_Xch#9O#iz4vb
zi66Hh)*;xnCjfrfSJHy=v~VJNt%p>w9{KlS{}(NtiTx;fmLc;{vWRD~);Q`=GAc;f
z&Z0<YM?*J-u<p1W#e5jLi{LoIS%X+z(6PL>I!IR=e2RTL$ToW#%VMh^9m_@AdY|-P
zf}3dWLeiebeiHk+7v*k5&laCG8Xj(Xt=HX@OkZvEX^&cJFtm?hoJx(~Bkv+fRh&+e
z`~>oPiqrb`E+pE}XK%k~16T)gR&mcx5!NQwIgFkK#1+s+t<US=Uk%KsIPJm3c;rar
zY2=v*_Two>MMfWUOByS6>wffIwc=hh&Pm>i$UG@5VxeX250)YB2A#tOoOKm-#1c0W
z{Xpys&<{j^m3X$^t-jRr9sWCLbXo8;{=cLDnR<MQTMzDcWAs&$Ce$y|=90!#l8NUj
zD*X|81x$g+?6sv;A;Ux>lt7bore+ZfbL(EA7IEufy+;98F>fK!25<-s$q%S3{pjOd
zoglm4b#q0XiIcM;-Ca?RL^-R!-W9bhUUnY<PKlM>xvz=(1GyVNU%fS2cCUcnym&b)
z3i}GM5d0jTVc5GR$XUOEhw$Hr-blO=|785WfM*r_x3I5?ma`^mZL1?qFlO;cFKRs=
zicejsag(f_IMlbxS)A%oJ7Q$_OAuTkkZsARJrsHbc^d?DPSkx8E9rH)jm=UF^`wCK
z#>uk#TR7H9l-+*@=MylHfT^GYZl<#H=v~O4LU4q<CaqG4ywLY05$`=kwE>l3^`sfj
zEGuCRaQguBCTQQnd<plbU}qZ4g(>Q%IN7}vzYWm#16yNvLF+*}CWxqf&=>hG`kC0v
z!#4>12=oT@xwLl?@{m|LixWFaAb$w1byqU$GYC7<@m<I_oBq)F8&O_A<i*sZ#XrP;
z5m|?wg?H4ubUgEa)JgJj;~Uio-kR9oM4yM8gnR_~Tk`9`D)4pI>PeQfHi2QpF=nEU
z!_NX=aW>#|JdW~DjFBYat|4j~M&8_tT1qEO0WFX-t3+);e;nCBr!1ogbBvsI0{sEv
z3(=pp%kG+78$YI#s2Vi3IesY=!`(uZk%P<LMbw}8zYosGp5jgs4W*gR-w7T`@O6}r
zaP~&N21yLTT93X@%i;+-MGuD|Yh*L<A3zIPB1E-BKNj4LAJaqBE#!CLc|s$!A>JHh
zM?Y#ddOKME`E9N4`0vlxBr^h~Va_aW;iHQ1U}A|XPno;OFo+5=h&WC6aAy~F9oZjy
z1l_`;`lxg06F@U@n!QbuoW*TpRCl_;iCl}!+88fp4BbNxr{YZbc#`q#q`D2JV|PAM
zG8;)tW3tvG6rx;&lMHD*K^sWMeN)s_^f~A^()YEHkI@a<8JSM8D~an#adYthoT8NU
zcceLn3q&f0#!_=*mVp39PL>jh$eESVm<wPgWws>pF3hZ4qGnO1Hg{y<7=iu~%qe7u
z1KIiiujc*T%>N-O&LE1&8cC*^v`K60Whx3n-cH-Klcku<7pUM3+7^QSf0*&V#cL;H
z6KPW5FuR<%h3%}icDe8oJ5NoJ2ZF;B<iz%DTlt{>3ao*wG<{cUYWfb39eB))mlL(U
zDh6kwlJPFHOJP1DvqK^G?4fCia-vM&kwm%hE^-cGV@XsK{bBUQ_}v1hV4sV9DRw7V
zJ3%hAfqSv{!2UTHMqDg$^}tW@TZq00*&V+?BM;E|ca>-8?{O<wxPeW#A8}HAno3Or
z5Af+<xZj{1qjB#8{UK=j6Mh1Hb?7^wZ>2?~H<ecD3l&qaR5!ekDKa#MR<k+`y+#{3
z(L#qHv$_hk!NNXN=(osBF`*l=bD7eLEW8P}0F{hj)*glVxNX3ki(4RJYz&0<K;~eF
zmQchK<elhQq=nu<??AqaoC@xuIBiRH0d((x4UyTx2z{HlXyj4&`@&Ozo%tek56EIP
zR6D55525LvN@g=AyFxdFPpS1C`sOk{u@(7iWG0(XPSwIzkTN|Mwg$7Xvo;C+7MZn4
zDARJ{C6Zl5UV~f$d3jKfT*9v&c24=wRq$=s_^@Y!@4~}nFI4hSGMh^|jzUi1P)_k@
zN#7>vV2qn_X0;T09_O*xBSBVGp%TefU}s)Ryhzd7@H>Xg37WX~d0e5=IYe3>)&PdK
zXgH;xF~`JEJ`vt{euBOWc4nr~aWHoz!%*~hkSCKt32VVEbKwmVw!=;7(L!1nunq@p
zS9uSqdCxO+ETw7#`dyISm(bPdm4KE~K+~0YZo;D()T~6cbEfb;WPhkx#XTDhE0MWw
zC)R<HA<n31DP{L5WPuXeoQfM_<_t(Y2=P|v_7ldl+ro|@3y?&uFE$`AM;^z~UkEy5
z<wBNBp@&J+7r6oPe(*4-CUU<Mx{juN0q)icO%IinnuWD6PlQf8A+6A`&IwhhwjZdS
zh)O`1blo(Bz|Tk+=#UG8A(%^q63|Ne(X`tkFKMcGMcho1{qE^p5o4oeKgARw1qNm&
z$V<3BM&!iHeyqA9-auwlL~uSWxde_#l$RU@kD^!7BP7M>$C@YN9PWybl^yzDa8v@V
zQr&8P{K=4rM=`SB1UFa2&vEh+Mr%ZZ9mjO5w7lusWO)f^YQ)FMvR@1s3}Yoc86tkH
zf+CjV`8y1=!13T7Eiy7v3NTfTk(bb65j9|-gCeF=ladiBE!O+-Dxq`*Fxc>$8!s<e
zhu=`_Kf%0;I3>&`?K96L7fY0g$OPGst9*n3ZqAtq4~@wjB}E!`XrFpSvAur~7R3}T
zT{oQ5+H8oCn)~`i67)U6T<s%1(h7@@;ZcE~k`W_KFpr9rm$V`;XWkOFO(VXCfvGN{
zV<L{`SUM>{;|z<H_BCalv1Bqn6JzObVv_M;R7O;R{CASDT3y0XjtC<Oj~ya5Y2Dj2
zjz=hdY#T?kL7W3F0=eEtyqhBXZ3J6Fm!A<Q&2ekWYvTUj@$`FEyqe1;I()EQ^^XS&
zkguVCJ5kM@!c+9G>}u{|(2BnjnLz)I!7~lda6GPnH<6W$1ZhUi+ztd*f}lSEy%N;i
zKFCLrYk<7Bk*fqIN=*%Zi3IdWlopzG@v1))aQJL&ZxX{(YX3KpCzDh$CrLG0<gzmw
z{+QhD!Tq=e<HnQ!+~K%2L|+Yk{bV(_D{=xUSsI3K0F_}$tPs{{Rr@%ky7hD4;?e#A
z3|~U<7X<GTFq$SXRKgE}oO9teEpKNBhl1N%8s(5yb;vD*@fLv*1oHUZpD8+g3IvD2
zUKBh6J?}#Kb6+0LOI7|gNMyvm4EqmY1N3L0`yKfyJmZijJ)=*SvVC)l5X<4ltQEdO
z%Wq2Ifsj+{qp%dtms}Txy@SIL;(1vkHwyevGiRho*)h455$ht(C!SXma=8M9_fAoB
z%Yb)i*XQVWU_S)XKL0J~d3K)LgE-#J$!(1OBC-;nDw$2(PWunR<GW<lpM`BWFCXP9
z8L3?2a<3rr%0}*D+IJGQ)}DqCGy8{MMPC*>x6}Sl&}#>>8Gc?MXKOgGVdk>n43DIL
zvcbFfe~kVP$LBGA$3gBe!?S4s_cGz5;O`2qA^tJ^Q%aT0Hq!WNxd(852j_<buf=%)
zPQ$g%G`E!k13rho37UI^v9%t)6Pi%uA?TS~!?}UYRnpr^bDU$*Y=t8PF0CgHBm027
z2jSlb&v<0*z-@*}57S9<)yJMn;hJAv{CI~@d(Xz7bxrsMGJS`<hFY{9-Ge_<X!s)J
zAmmH<vCs-1?NI#_w8w1iq#S2$NLUDj&!nthKyRFOfY(87SbP9hCi&k7<9`dY?!DNo
zY%gUvbGy@=r&RC`<(8$QhRCzXT$2haAhRM0Umd6VZ+``q++o|c12>C|pMvk*T){n}
z)dHT81ka373&OyMiK?BO%-~vyY5}W_;Cm^my)#%RRxRj=Kd&m<O?p@Gsc6;yIr?M7
z{Yl&wc(|$ux594~vIBh#@)GzsT)`isZ%KR#yqu?svyW+-(|#O{8mAUK#A9EAS}+?N
zM?@khA=5*_?3vhkhdlUOBIhIX(x9EEp20`rRl63~1w8{Ocm;k8!QcrXBRsexyxe{V
z^O{M48}=Jia0R)?i<d*^JyfQAz+*`J6pUFhs+~K%U`Do`^E$Ygpc?2G(@Zv#gTEnZ
zWn@ktdjuGRe;M@M=*H&w2iesE&iCL!_%j^^hhk@%3N{f}qRAw0O!BrA>rH$-MQ*@8
zfJIza4o4pP*66ivXJQSu;Qt8!x?mQ_rks5({yXT_q3}(Em&IZ55bD{D%*K_yI&mLj
z?}5xVsa=3+wBrJLL4ALK^O1u=-uSYg*ZA=B(hf&Amb1JeTQG#6T@=AF3N~WwiR?x&
zb3!mz0egFp*DecK2nG9~{}R8E6!9tYb&BPr4}KqcE!ZBuufQD~iTUt^Q`}Ab+u_G@
zJva<G6a5VQS!pQF{@R!@qhS)T+wkm11?))%Gdn%&u0xa`Mr1LOe(0NG=PDh1o#wN4
z2`)y~y6Gm}^Ck8SD)<=Oi@gQ*<5ZG`98C*Y;sw7)x1Itg@|TIUo1M+Kl_}k&QF^;#
z7-*{NuusGH2{9313a#Y@jo@MA;La!b0lmbfGk6X5tJp2n#PfvUiL@>i*#X8Oa|S#c
zB8SPPWDGQAJMF_U@1QBo3FqZC`ybG?gN9v~VBXd&IFFqjxB^d*x2p<zqHhCk2CJjL
z3I0GC+o6AhG<89(6Lsj%GxE5H32s9Zxjzh^K-15H-QZmS-^8a%W=}?30CD&5dxW2M
zf-_R=Y&{0wgZB;mb`e(<{WrupkdyHL2J8&~uiy~sJ&HdYjdm7Q!DixEfd((6J={MA
z=Mvu?97*#_LFWB}HsFuMMQa;1X^c1;QNW{z;LVVP5d0p;S;0x0l@id8LC-rC!H>~v
z!}v{-j6nZ8B&lFI;@JxiwsE}2fICU9je`X62XGNdkAS?nTfkF?;3LE*f=9LD^c2xc
z8gKA{L@^9C1sGP*$4cN()5*Z!`NU!@n5#{}ZxHafpkNJhYYIC_B5k@GP60b1QUZsW
zPR{Eqz11X*-NEqN333rLXt*O*PCLWQ{s4Io^3^ywjTx4eid@7D8qRKJniAiarBA;^
zxkyRx%j$`(w<5*RPYP4y5vHn2`+kxdc>w<q{JGI8VjC~KaiW~2UD?@bODlnQF#OGs
z8^fn!zfWGKtni0+Ijt`C_fyEzIbB*+Elpdx4a2=DZX+<~f^D^w=8sG@>K9GNd<OPg
zs5_9A4&a?lx#%z~tk8>=Ay<gOy+?o6<7Lq(rU5LN(wJ_;b);wWDEueX{R!Z<w@69F
ztc)3DnyXY8D8=ciW1zIzha0G(-MEaUMT&WlbgM;LL&TkQkao%241>1B3jnKAk~_=>
z>_NDX0egY<!9#R_5;<6EtPIUCEooB3BeU@4#3aEXKs`Q#iPhG|c47{I{gKV!ue4r?
z9BT?x1`UyBEAHb=m6fVPrG|=C`_z7@RMo(JdXX}0sI=U)k~SAZdInOhfSy|7K&R=7
zQfatU*>E5Z$3U0qq>~p`gP6rFWuw(RT4E_gP6XE_sCleWEOTSkAZ|!3`H5;?E^ile
zE0D*_CKk@oJa^<>$ZVNgZeo7`ZbkNhZ!bJXkTq1^0MMDB2JHafgZFLx`l6q%4d?Nu
z920X}5SJKBR-zhI2mBPHV$L*~BV+7p&=0uhLO+yrDbOqie~N|<{Y4`Ez;9#KJl2{P
zX1F|d9xQSAF*RAfA)S&jhWqTiPcVnjN`I<nX`R;(^B1_WWVNtb40;2*8SF`T7=^IA
zYIz^}v)H#o!{x%#k#g8Kwv<Qz1vnFbmJ}AgR}sXqv{cp#PtP)~>&%5Eh$+?*8b_g^
zL`Kd&OBFo0F|ur<VCERhU*I$%Gbx;hftEy=#-Sfi1HS}sk?9Ta4)LrdExenXrwkj*
z7F*u?h`cBp<O^*w>MqD`6qE){2e1G+AVv*}z+OrNm|89I__0r6c`ruI8-ZU8Y3_mC
zpXHrIUyry@{5pa>1+uVq&tnB+VbvLQ7#`MOdE3zIN|elTOv$xrUI6hW*sEYq0xN>7
z&4O;C=j&cU>@ZnQ6PHTd8SHN8ZScC`R|Y$m01Myy2x^R94Eo=YdA6DN5PKVBD{>Ka
zzKI+}-7HEh@4;7IyZ%lxjd!-DSmtTVS;*OAvv?7#ja61Uc}3_iP-Jc7!z5XVyn@b1
zz;8Fn*%z|RBYqdygLt;^EY0W)rbWwxWQqiTB<URDKg6#QvW57g#Pb!0ym8ufA={MG
z<6FpwVJt_>&*NMh@+Qb{K{f~HV#v8$c{a9dqlYX{J0V6gP`JKZ_=-bNF@7QVX+5$U
z?1kL~?=|oNLsv<kY+4xE9ZhRIR?s|FotDcmao=efPR8|Q_%%rl($0ai_*F$e9=wjM
zBf|&Co$y!Ub4)dS^O8_=_YtJwCzzFtOm?HYp<&XnETshPp!m}y9{E`^I2iRIn1ww9
zEP`k+cI~vyBk>UQ+FYUy77O;}Fh+wciGr-eSE4{~{94i_+MG~}crKxqb~Hpgi!@xP
zt?aV6@A-|O6F8rOOgmAEF><$P>BvYp0dlbNEa?Ak?f(`|lC)J7UqPP0Li;uq`;d&q
zU0xYlvYHlYvznQMVWhZmv|t^2ZV`gCv-p3jh!;GHbE34o!!f=^D|1t={erQ)B|&wJ
z0-MIF$GW3u+vC`LUMN}+uO55bT_>Nxejd9aQ9afHeKv903tyShszZBG=_mYWASWlP
z4kcrfG^5+Gqlnyk9^>U5nZ>+gG<X3T?X{_B+_m?^mO{f#h#U~7Itror7`mlkG;wzH
zY|<Rl-UsVsSC8=`y}XKeUc8srVOP?#xtWe8l816~I~YT-@I{Ved-3Fz|6`wlx1-f#
zb+9uL9;=JIkxI(q$IAVfKu)!*4qp0^uf(d3^7!#|*s%|JEtOrw&fq*Y54?`7>A4p>
zro)d{_#F>8p3ceAc=Kh9I*ex_xq^{<3J1T-DKn2d6myPrDQpI`Ezq<=(;7NnMQ~_k
z`4LeWbP>TjK=L&jYNG!;P#$kT$?l*MHbuIxKc+ODDg~N*l1e*JZ&Al-0=WP>{vd$%
z(eYEIg{d->=diF$c9a8KKyikMdGxIlc_=Aj!HF^Iv4>zY7+CVl1Mqi|aw^RIkh77s
zMlsXS*VCk1l+gKOHXV<~kScz<6lh3%mUOz5UH{l#JlXMb@b?DfK}7xx13ONRO5h$U
zm<3+tSSW!rSSK9|#XS?Z%OF#j<1*&y6r`ljkmi^M(Z~ce{qe{KYf?hK=@aR?{{Q`V
XuKngMma4jaQeXSqmaq74WB2+$=Tk%Q

delta 358527
zcmcG%4_KAu{m1_tIOl|VFw~=>9N?%&Btrx}DitagH7X)1GAdLoEGjgtQ&FK%or-N_
z)I;5Dk>R5MXtt=_hRzx**U)USMjIIw72C+s#>y?={9d2uIlu|~Ui0_6t{+}6c)vfN
z`@Wz1&-3@316ICrapjSVSEp;5c6MIsq7B6<DQ>j3R=)8y{PQBG>G>X|Mf2UNMQ9GR
z6ErOkvLO>(V1;L2d}#HeiiSS~0vO{fmG31vnpU||)5=$9TGet*t7X%5XJ}ge>6+GX
znx-|TQFEE5H80h){Yx~h)vIX-7HeADB7V|9TZ*O~UdYd6O^Z&_H0P=GXg=-E)3o9Q
zO)Hv9z1f=9GE39;xU~w+8ApLwO>@w%;u#d6_l46bz=-xtVMCM2GX}@WniesU`~>pj
zHLYMAc{_QVrtM}tjxh2Tb7-4>9td<Y7(Wz3KFIJrAj6l*k}0rg`mM1uBWJ%92uR2w
zVSUo6@p`VsI;;D6V`jpn;L<k&fl7YL*B;_o{Iu#H$DV&SJ=U(>;9GaYx;r-9bmO|x
z8?W2sTX*9vzS3KK>zEpC)60QC7eA|@5Wcr7`sUd9$k}xIxL(Zde!=L4ReCY)O>?bc
zroF)mF-7D?da_FDnmxJs>h&cXLOY>tY7PX-nIH!f<fB{(<U%62fjO7)%%2mPKz~Pj
zvEFi4MZ`{Ht9I$Z+V=wi`N<k+z8i14x;Xh3#+#-knPrT*+S(d0yGfbMka!+-^FsX7
zl&hqSe3h7SW0d=dOH4+Yu`_T|pwL*=m*<DwZJ9gqwG&ubV>;h>C3GOei|T*TXx!-N
z`%Gu0Y>BUY;v-WzvZ%NIAB#G1&xtPT8?Sy(FXsAp8@(`w`|8_<ZunDgy|JhT<VJcj
zvZ%MNJAP3o{^CRz^-X5&yNmiPyF9}-7Io;)-@mABW*K9y;(j-}sNEs)e(Lsz_;V;%
zDHF<<gB_Y5#wgbkM;oK;dTDe~-?p--yMA?|i+a^BLkBXvs2=ui8aFz6>+eT9aEyI<
zWGY7%_4|%gPGH)umJ?mntLX3ObOw7d*TW7sJlxgq7+Q1Q`u0j<+5kNq>B-2V-tgT;
zl}>28_MYgXUPFIJdm%?^7CReF7>m07qwinT60?jkSNjf*E~-ByUQ6BD5dS&lDy4IL
zHJNc^QHO|2Oh#Gx>gb|ge-ewDv;RaFHSE`+0~ub_R*o^+Haa@(@Ms6>h>uL=$fADV
zkt*|<^ZJP{Y8d?;ozCG!ZRHrF8DqF#zh~&mz?1q)Vp=`9k)Dh!>X;+dSkxRYA18MD
zwbS3xUdW>Mu*<VCV^LRhfB&L7*dZxn%+*>h=wjr{6B74Q*Bj#Rqg<s?iff}8H%7Ug
zIALRy?q7{A>iF?2>QT<k|MS9XoPK}yo6vy_FKQ8cH;o$|eYJhG18KxZrgCIazwb!p
zHbXo5+Y?>XpV8mZ=^S3vqJ2g$jNv}oVGj50yf;_MY^0ML>B-2VjyY1L6WUQOA1Ah`
zPt)JgUa+W<RqXOKVJzwq{UyuvtD4M+F*8H&k1k<rNW8r#5NHqa34a=$!Co_NjA;dN
zTCbEb?I#|*fIjbF1-Ee`KCu-%^FU~?hgZ<bu1woTKUaS+y8RAz+>v=3S;6l+EM@Yx
zanbAIXB8B}|9w|<CjA|qyWthIvUgF<812#z&C&kxo`+Tv(>m$lNKZyqaLi$8tl&1T
z6(_cW=g{BLUdRe2vS+a|V+FVBU)iT$m2F0hnc4oAfRUE|6otfnUj_oc5dS9SDy0x#
z)n?on(-Go?DrHQC#Dgn%-pQ<B@}U!5!B5&kdp*2@jpkjX(a*1$yGq$OUuDEc=51sJ
zzwa27=Y(Xga9#ZT-z)eDSBcTN8(zW2w~P@QqkaElL%RpAHLsTC<VJcjvVvodLFt5+
z%!T2^R`7FXZL}A%f(O{YX~I~+I6c)d{i<FwV$95nuF(~=nl~>--1$%13GvrZu2Q<f
zSCSbw#<Z0<A#+T`gDd#ANvz-@PO&F;4Bq{2Xs?G?Fq8e3wvGNg`pM|_qlu5q+sF!j
z-!UkYcZkbb7eD{^3f@hBN9S&M1vA;ZsBVn**3S*S|NMvVT}e!HksImB$O?`*28|Uw
z#8u$LR`5RhJK77mG!~opAI1vq`}+GYjn!ruW3G;LkFIHBNW7W4%^}|Y+31|MnQ>#3
zal{#%G0KL6qnE}XIa$<AoODlYQD<{{A05c>qIR3Nj7CS7elgmCJ;X<*a%54z@37>_
zTie9ttc#!jdr@c8-{X2Qx0?fs>c()V{oT+Fk>9>$ENTn6k)Dh!>X^e)I-zai!f;}X
znm~U?dm)P&Vcvfji@NsP?_bm;vy3rUTmL$`sF@-0JnH6!_@^mXDV^i1#EcuG+((>{
zG0KcTj4tZ@=`8BR&J$hKH+T>l9mw#a`q{f_-00~0OlKuVzVeBWOy$U;e&3NQ^Er{T
zbQeGW_oBYRb>p~R%=Q0qbWvaZhoK8C>#B@JEg(12laWOobEF!JI`K##P|a%yuK)QO
z!kf(6XfM`V7P_S8@|r^)ctKu6upTvfVt@Dyr=C9}s-m64g0L}H_OC`4m6+BS%tc5&
z;~ziGyBwqZYTBxlf_%BmxUsmK%xxH>l{U<?u%=zi7_LlBa^E@IcYEoE*{8c_U-k0J
zuyq#woS2Do_i^g(qM`pi-8X#{I?&-0qnEv(>PE+|`4=6#4Hx`!kQ<q9J<a8syN`?C
ziJdoYqF<xaI=qW{+3{%H818*c8+!jMKgnDfL40&e-q`55uX3q4u`OLspN`*Bl6g~N
zjBD#Z0|8kK<BRz4mad+=>(fA>n{YK0K>_4KBDf(M%$J(3p+CoMWv-tYqe!^C7{}F_
zmoZP`yYXew*s+GUbiVG1kDj}WQ+gN4Dky|}$OaF%K-#w&4<uL7uj9AoF>fBto#-33
z%nm;MHSnFSx!umWPjdn2BK^O&b_M-9erqlcA*vf&%Q+SZ@Y3bTS3f-&9oyk~5xRSp
zWA1h?tX-7(-&?$xz8$|sCwnN>jV+$@AM<QFyvbgAGrGm7+0MC-a@{$xJ>)$4b==lw
zIn0|LV{2=fO^!0-tBYQYZpk_)YVLhJ%ADAiPNz@DZ)qT8OEQzgTRKd9bW8ng=~m9~
zC$^>e^y&C5^_n-Q#+X(xgO$NC?I%9GrAzc)Pt@E^e>>5&noXaM+tS=_jt3eywlr;U
zbW5_q7IMQ|J74db>zsQHmy;74*G&3#+}37way$%gEtzZO@C@!IKDwp$gy^|zdrx#+
zlj+lOTbkR>u`s+PJLmNX8Bp-6iQMSc4$pJWUHxSs&_lKc3gQ1gqQ}#(<F>XSk(r}f
z*zoxyipzzZKQdtWtBGSJWUGmexj9@YPHZ;A=-+W$oz-gI0Gn$!6bKBTM(T*0TiSfe
zm2+2c@jbDv{PTOa(#)~I7>upROb)ME6>)Pb>*rrNH@Pnm=%rXK6vF>KgP$<!(6P*2
zIFYk$1pkb$kvVh(=Wt|u94W!=oqFXd$y{~z@XpM@NnGR^cnS<qG#_%oO=EJw4^unN
zwolh<Vj^aJ&O}6!(m+xo<!3erM!RVJ7pH&WsAR{Lq-6D`GotmB1=hLK*qZDcRZ>i{
z!Jqrh!p7V<^vf4~RFTi=)eXHAlXr-GkOl+cTq3~_S>S-f^tl%D!3AA3+X%&w1bu9!
z8Ok6%xJ3v1AJRsXn$^_ys9=sJc>~N6rXh?jy7?&=myD1~X`z+$kCWqPl!atV$7HAL
zNed%p><%gM?3fbqGkQX@U1PFleK$K6Jux~RZECaA#`g0=vbkfjX8oFw5>;bL#LqYo
zlHEThYu4AyJ2GSY$HtV1pOF@lO)>RI@67t8Atj2xQ{tl3_!-ZJWE+mlc7<d+j>|@y
zcg9A~qD&p!wzQlZlFd3UTNRS6{7!b(qWBs6L$Z60%N`5K_8pf^;b0zKuS8P^dnGLw
zhhz)KW-A&(vUR2oAKpRP6^$KJezJdSZa5rL{y=DJ<YYe{yFXVFvYNM?#;gr|cN_8M
z%@nPNZlsSwiL_?bUcO`T0s9%FZ&XKf$L8kFZ5qw?a6U=mG*n5|Oo)R2>wI#$Bli2w
zC0=d|M$aY1T)JfhGFHwdv-P*H81Jsb$hkz0U`fc&X0xDi?$eK*(Nj_Q|MO`if{QQP
zHD*vVwYkCsnJ0bS@~Jb@L&|tV%jjP%kDuWS$(DR4`*HGgJ$*&QjHZwhjo&FD^}9o|
zo!`k0*LMw!_R(qT;9e;8^Fp%O-^mWwuMR2U|4s?1-x`u_`A&AY{y<2H{_m8K`d;&r
zZ_Hqlse`>6u3r*TqUbv%9$FbcqcJ2~e_XaRB-?&m*2%?wc=a5n4sKgo&JM|D9+&lp
zWXr#kO<ff~qa`HUd|b9aB-?vjHi?^q;a+)69qg5~TojTm_)d27syWfa^=i$`$QeU#
zS`|H#Yz?hO4ZU-f>*A1G2b-K{<nt%o6BwUFF>lATk=EptAjh2iltWLDPpru4KyE*R
zr-<{;+-E=e;0tA|>p#4In`MR`t>rq^LoeI=U%uzOi!GLa=<lEZw&gGSj%*cHGFe}7
z;Uc}{g5t>?yQ5ZA(!(V46)ekdCK;*doZ?A)ulXWfQXRq6v-(3hk4<XWeTh-QZ(cJk
z0e$&}FNAeYG^)9B%fc3$@<49!<fgc4qlGH-3Sx<hG)nm{DxUQ5O0yZSc^bD|tFO9f
zvF^UOEG&D1e&nh}dfUZClQwp&G`3Y`o-Hj;>Z>kMmI*I^aLG58@K2_A^!F~k*Rof?
zeD(d|Pd|B^{^{y_E#K<bT~-zLO0NE|%kB-oqu?R^&is4BxBsQ+<?zcFg-u#xoSQjU
z1|O3by?oA<_ApC_e!-d-Xy`wDZ+Q8swc%lwCjGg>7c3vYeChg2ES8P>b65SI?cQ>A
zj1+!o!~GPVan1b{-f@kxQ24as`zic-@ySy7-RoYX@DHz#ikehvES|AUtdy2kr$2wk
zWvA?xWs@&Hn8TNp$v)j;eEdlCKyjG18}`)c^Y2_+QR5{A)r&1!1o#)R2S9nMMLU*a
z(R|Ri(4tjBq%?J!Me9kU9JrxnnMKQo{-qYJ29n^w5{p&@5zs{27c(Iq>RX{Z*`l>V
zBlwdo6<QG@6Fgvr?gbQpM({%sWP%5*(9LaND>Q;1iXam_V1@2e$wMRfp$Ia;16Jss
zPaYb<Kfi(kh)jrv-cv|IJ(NNwxS=nRkwO*Zfg5_~S+qUihg@($Pl82jhDykWXy~46
z(RM>QWPuZUJd}gV3ch7SG<46QAe2KEIH7AcQwU{{2~p5F%c4CCKF9zEbhwFu4>G_3
z9r273N+BI0pgqo_H9!fZL4}oXhhrHr6oVHuXyfI@Iw*n^7>HqAp%w}u1qNmkgE}aJ
z6wsh;1~DiGFIb^Hn$Ca^GQkPm(=GbRTUJ&0oOBu-&@qK_Pzo8~fX>Mb4$2@CoX|Ci
ziGec6ged6b_TyRbK?XRW<7C={Qpf-YbWWrkltJc1_CY72Ba$GLK^8<qkAo$KD#!;9
z^i5z_fGWrXH}sCD9QYv@T+lO)C5B4KhG^)GuxR_C7K$Mqq9Q7oiIZpms-Y0P5CNTb
zb_wu9J|uw#?KV0K<&X;=IA&$$p#gl51up0dXP<^TD1i)cLQfb2ftm`w6+s#}pvywf
zp$ZBh1+37aG38JRd5{Ql!RX);w;QUU5YiwDdWLwugE}aMEO5iIL2hy3Sty5!Y`(dn
zZ$Q&pp%#iE4II$<EgOJJ$O8`?`!An!gnB52Oo)cw|FBW0g<?nt2Xq~y4e&!AB*L+8
z*xomM_OS+00Ey85H4Q*D<bwzL|4j^Ppa7EKSU(+rYA6IRL_p`i7#a8=102xtPo83-
z6w)CIy1(Khj8Oem1q~p)5CNTi-26f{6hS&Tq4yv37V4oCG9emzkJ34)g<?nt2XubP
zr-Yy!vcUztydKd4)ldLQFi`P#J|+tFPy*>-h4#Pkb_|q4Iyj*Fuk;$KAP+py-@{Bm
z1C&7)IHCIsHU>3N2uX14a|Qu5PymTg(f=7EfNIDG5A=0Y8LA);+|c_e8wWq+f(v>+
zVYZ+WvLPCJJ|+g0kP9y8>7p+9As5`x`w_8^Sbskv7hKSDgvw9}*$@rgAF}g6Ib?wo
zx;mL&D1%Ihg3iCN_TYmIa6reOS$imjbclfV511_|0q+MDtU(9U0@aWQF6jOf8vq}q
zgB9A|XPuz{Jka|`ItJyC1y1N{ryi6+CPYE!dvpwZkO2<p_yZk-(h9z%Lj<(HOV6ML
z(!dIb4^t4zARFA!{|;*o^-v0#5DmSDSW2jcVn~N5=sw6)Lp2nF7b2jujrHHnH$UV<
z5@^u=dwLAzkP99-_BJVK03T$53;KS?>_HurKn6IW=K%W<)IbrWfdjhUVu_&&3f^M<
zQxI0@c$2AvO2~sm82BwS0F6)v+2Dr$RyGQCPz>qdfUe)LB;bcUNQ7f=&~d1TQpf-&
z^t?_7UuXSm5Jiv%4(QrX55W)lkO%|6rV(g>Qb>mgXx~Q+N+1oaaCk2rgJMVl4Gy#r
zgF;Aw{$CM;`U<|4LMB8(=P#Lh@IeMRpyL<J0QevioY1|8PJ$ouzyrr#qm$48KF9(W
z^u5Y#K^>Gp1~{ST71qCnZ#7T^Ua&%YGaCdSWP%gAUuJ{ghdl5=|IcX{YM}^{VBjT2
z26a#bUT{FyZk7hhAsbxK_af_m;6-{2rH~E|=-$O13ssN@Zs^@f4E&G_F6e2ZQ&0)n
z5DncgupuaiEO0{C^Xywt2AL2AozF4T&s8u=L<U4b=d%Q%3^E}Kx}KpjltC6aq3da4
zPzIS01)Wb(7krQb4(MoPCZQD4Ap+W;WWZ1YX%&34!r`B>Ge9wTL4&p(Od}LQ3TV*Q
zKshLeG_XSZ6U3khlAwP(dp7tX8=TPjI30l!@WMbnZ9zpX--;j!1|DNQp%(JN1ARZG
zF{pw(a6|7;SPJk%F1Vnlj-`M~$cAX>RxAaSLl!up>rrM7${_nu*58fje}tU|>Y)@e
zAsTvXIio=>6hk^hLAOo^pc)Fn3lY%yFdGCv<U<l@(Eboh1Lcql9ynIR4Ah*&S|EIC
z!K;=ZSCssSrGqHw{viW`D#(W<7^q^UpaFc43DMAVKa&JCPy}h<fUf&!1FE0^l0bvQ
z_p%z`gDi-KZa<T;+fOGU3!Kn(53>bjkO5K9aW{j3QpkV^=(vl?gHp(ZXy~nElE4qS
z;DVkCIs}!F4KC>2O6Q?^E2aRE1Or=`9cTa_WPuC%%GpJr4oV;coX~S8jX(_)K^i!q
z^9}|8<&X_7=)Ik?Pz?o;1OsJkuYzy&Py*@TfcD#%Dky~va6sp+i~_14A3V^13mt<x
zD1|JDhHf8ap&AMy1qN<rcY|6egd{k269+>p-)f)$5}|)H`yW(8K6s$7l-@%X<bfM{
zZ)B>$54qrio=wES54qrm-i^ed3i7}WeI>L9RVA!{9>NX1H&77#kP9y8xt@_gC1gW1
zbYI5~59N>rPUtFT%ApK0AqqOLWuE~bWPk%Yt|10K$b_hCSpTjKREBcM1P8Piu`Eyo
zNzi{aJqABygA+QhVwu1P>EM9&^~9hA(m{i^LY4@MAO!~2u@q1Xg%x~Df@5o0OQ?YY
zNQC}1ECp0UK6s$7fQ~>F<bfOducSe!hf>IdXz0Cy`cMnSkPcDMeK|7?)lgB$H!nm$
zXFgL4e#nO;khc-rFJs!E9CE<}$5t~kXaFB%feZRBWgt)oC6ED5=*gqwPy<Df297+|
zzv~j#7^<KEQosrw7qcu-33-qR0~c{3fkr5UY;Z$=E;9i2Pzssggq{l-DO5uNq<{wP
zIkW*~ITiFA;ey@^m@cS+B1i)VbY;^)sDc7W0V}kh&#XWhWP=O(&Z7;ehJ5fq-?@w&
zsvr;C(33^Spt6E*xeyIKKVbTy5;7qgde5OjsD(mEf@5bh#ZUtUkO=*m#Gn?6zzbGr
zKZ}uq53;}oeP>b@>fqcGzGZ+DdREa>sDUC#0|#_vFazL+Jn%sON*aM$C<3pt?6u6P
zC|}NGKs5BEQw{u304X5vqPCsEN<j%^Ks5B7&N!hO3cw2y&~Y05hf+v~2xw1Zw}TQ$
z11lU}Mw`pnor)1&(4cK8gM=bTfq^CLCQu87kOarPj00+*5R%~7V#+}+6haCNEFuPV
zPy{J3kV*_{p%9W%E2x}8gHQtnkO=(?868wZK6s!nnaPGK$OAX@E?~C+KjeZ7dXkt$
zsDx~g->&RFm16^ZkPcR8n@>k7_*MWO=skssP!5^kfc8YD35p;Q`sdLxsD^y-KwknK
zgDS`aH}uY>W8jBea6ylU_MsB8AsQ;W=dce$Ib?wg`erjDPzNQD0Z!<d#fG2;iXaUf
z(B-CKsDc7W0V{OGGis=WJV=CrIMyB-p)8K|&qlbRKbEzDdMJfVh=yJlBZgWihIEL6
z?ifZ0)ldjth=9(S3<&&?4@saw`wWhLD2H6|z_I8G)*+g`6MT>bF6f)i3_u-}KsrQ0
z*EB{7e#nPJ7??^M&;X^72~Oy7(m|+(LhwQabWWjz;D`JQz9qrHWTpigzz3NS4Ly_C
zAXGv&L_>EJ4L~_$f&A8O*U5|y${+)xpkpF2D1i)cLQf=12vv{=Q{8;)b<k7rLoT>f
z>g$#{%S*@84~T}|ajXK=LNTO66m&;W6{?{SybuAM>Y>;9kZ_}w;XnpNL1#F71jt7^
z@<HA`>JMYCK|a>O`wm(LL_?2-p@Sa^APELo%L7mcMc@UR)m$xufiAbHSyyenZ^Nc_
zH*eO@ubmp1Mef7loNjq~#)Q<RX_WC1@6=zfO_-3Df^Ee9$*s1(VM(wZM!!oc$+~wi
z@Q(gfZK8-aJ@yep9MI2w#4VAxNX^k}9<hmelM3_n{g2penEggeo(M0G+k=5$y4BEc
zEOGWk^e^1{z5_AiPGdcBd))epN9Wj3FT2&IR!f4t0sWj=`=mu$8}3PIc-%7jIDp$>
zY%@yPY`-21Y<KH3)f`bjb*uA!Yl*Y@&_9tPs^+&AtE~ZBC-v2Sto<<d;b7ybykamI
zs5Tm3LgPvM2LnGc8qcV+*)UaZ{k*z`qWo@sYu!9im7->kPo;b}w%o0byh*2{ULOpU
znW@F3(y>0bp8S(}HdLvZkZsgrOWbPPTh!T)E;ds#wqEQ8w|;~=Zwv;m66J|V)iN>b
z+-k)EYLuc2%*0||D;W%2?$)>c)F{8&3@_38318w?N2R>`H-mv(GqFs|#$3p3KQ_mP
z%3-#DXPIrUM_)iIW;ENa_DQst==oG6+Bz6G&#flDO*9>SE~Bb9qFHXWR-)BJf50%z
z=s9k+OQM}b&vxsFi2imkkm*)K5=}y%<<{dKH=<{{RnG5;mJ?m&);F2a43<ZtZA8yt
zUwM3<w6IK+Iq#k~2Lp@UDz1$Y=AsvxDVg_j?5VW0eVz?9&rHa?w`1qH)gh^)y)_t!
zH&Zh2N!U30^u(z)RII23o2+lOzP{F}Kk$T0;xR^iylkWwJCn(w!hylS3^O4c$;L)A
z!wp7ax|xtoG?AF<*0(pzks4Fz#~}tdfS%;mj}raeVBlm%^bXN%bR>=JSnNQJcL$GY
zIc^=POPA8xZsOzIYU?|e<j8KcZA=;IO4Qqf0V{pnamr*Fik6AjLd$UukaI^V@qj+*
zXY<v`hb^|qM(np-q~w$`BY3KiAs;6GUy3cXTYo<oI2NyR4qN8iz36WYonRagMfk7d
z)h@}^qW>MtNeiv`{&+R<U2^^Ce+6^WSx?(w;GgkoljQQyUzxc{GIf6Z(a;_-35$sD
zCf*yb4!vuc9od2YYiJn;C!@9=91MJ+fBEE`$Rt#EXbD=8b6!62PvTYFA84f#{ZVL{
z38_msNoh^QKh&2u&at<n{z5+~Z5u#;pg%y=bBOb1yxJ$xZ1f-X4~hCv?=jx@h}NUu
z)fYT9$KHy1Ctj_UXfOJpzU8Sok<NDp18<MnvYenZi2qK1mlDOOH^-Ea*{LP|TS~a2
z7;g*ajWLB}f9WCqx>1OUi#j|Q*f*w-tU)^Qy+$Ecs|fSUF@<DrsUiLgebzH`BKM$P
z9aBP<xtsVal!%`s3*dNnF!1v+rDXBah`%JIRNo&h*2qHat}(@=dNuK#Qf!<w-HdCZ
zhb~9Ty5e=LUMGPUMgml@{$Vii{78TeB@uXTBtRp11fCrUFr;z<&x{0^h(-cWj|7;$
zHUdwL1eo-G0*&-4MsIj7!sdEUu1`31_)nGyTPF5rMzW7&32ujxba#-f#WonpjOQb4
zEx0F)WT_;(vD=Mg4M_(d;Cb9g?k8#YV(Z!4J2*iUpdX9Zk3K)!?nnMqWc)Zr+l2dx
z=@@D|t}b3b@<RML=4t?^Ovgld{x}$TlwI}%Ip3fkk<4tn4_PY>#6&JLr?{SoJ{qBF
zE0KpsBW$*p$U~zMHtl?WFi^vJ;LkLlj(#v+Kh!kaUWD8x(&%k9?g7(Lr5RTpukYJw
z^tKcCW7Em9{)y{yyxRU3noC0eP%^XadB`fWkSrwbe$&Y+;_fq@EFtb*)5-eb{PAjA
zC(XG!1_SrR>$`T%wr3*m7HRaZ1b3I|=v^(Y(sXR91y^A@Hr0*W8n3o~NOO)41_N8-
z_3batwtJD~@%pyi#()ZNcbbj?`EhrcPL>UKyXj=vaAomo(-E2*K;I^r*>=yL2Lrd7
zg&0sa?iSNAARo>buWx<H*i=34X4A=X;cnuD^AXMUqBl!sw%z#`?n=x;3@9CUqv;q>
z5pI*|*i<!cqv_aGGwue?&RsOuiM}pg-}Lj@c5CNg;96El!b#{2Og7;>WMRBI+QoU@
zkG_I4?Z@b5bRMmU?ndV@eWINo4hFK=Z$xLJ&x}_`MElU`@oM5H=tlI?c(p=wJ358A
z5^X&)7?@8NM0?R5juz2H=(u<_B)S$I9j}r<MYp0SGclt3(GK<q(VmY619lD#(RpZ%
z{)_gb{~M>`y3x(({y3E*x*L5oPHh$K>>3R8#Hn4PGtr;KsUxC&=*~Db@iTNI`u#Yy
zLUcR&y*Rb$Gs_~I<Kw}=AM|%$o3Gya%wn@;U=L##+I*-(lF%3KvDu!*wTUzCM6^!q
z+xm_@^VJidQ!DC|!N39RLR%*4O-bkhYL($y#hJC5uy5#_e=%R(_yx7Pu=}wKZO%`*
zI7mW&=NC3x7Oq8{S*slTOQY7x9%}8z?$KBMa-ppo^{P?piC@}m(cOcAW}_DS3%e@z
z=lX}gJk?f?dI>de9Q&o#gxjqz|JA9sPSlG=oZVk@e8#mvf1G$aYA4DpUxI7W2Z`6C
zUNGWPz76-hzNY0=+W_i0lvzIMb2-!RC0>AfM&dRpU#)L`*{Lts8)IuB{*=VWv(sz6
zxW+hr>)!b`*B5e1mxPQU8~3v~{m9<=_HyKoIDO*2`SvDcL!7<>*@=82PTz!d^l%Q3
z)3+njk&nmehma-6`Z#?ES&w`yPEY>ze0v-6r*ZmP<N)#~ar!o7(qB1i$Laf!1xOXA
zA4OIpAC1%F_RqJsARmd-bCA8rTDpvM{f+Z<oW2X0jeMB??w=o7j@-s83nO=Pj?^V`
z*Yhm#A5!Fv#E2BF19J~sd^AC?dEFWr@pn#c->u`7yC5&|JL1%#f3V*cp?z^`2pw6A
zE*(=|?p^j1zrozbBCQXz!Pth{-Nyyr-8&dqKc<o~*Tf5?5_64Q&0PP6xklzP*T_!f
zdCcl>62?oX9AENi!F-cRN3M_z^NmbnzLE9FMa*|A^NmbmzL5jS1m+u=baXJ_rf0|k
z<V>?|*&(X2PUif#%sFx*bB^pqMlk0{*FOdWVa)k&=i9Q81Nud8cpQt;xNt7k$_ad}
z-}Oeqc;jZZ3EMZC_iCL4{$}RoU~u$tdpeq5!be&L14qofELaJ)V>G{1t0(XWef@!X
z@))hP;o3&i%d`OkZyIS?ilndP=->O+JX=2Mb^Whz&9POY_Ue-k#M>KDEwO6F*Yenp
z{uT93wQ2v9`?!Y=%(Eq-UerH65Tm-j=6O07y9*m{_n~&is)^so9WI&=s2-Sm(o*dJ
zdb^={7)94fN5*q0(LDcRH;z^NzL5tvv>vPeCbi4Z57HUeIJ#Sp*%qrOznx$^fP6q|
z>y>X?ZO3r;8|`{EPd}Guqg^gvS{|+<RvkS?yOroIv1;ajEXnp}^zE@K=Rd@|(6_|u
z+kbC9LpuJ=wNPw=Jq@|h%*Y9%2zy<u8j?yi=nZB{c8M13`dB@uEy3P{Tw`Wr$8~<q
z)h$--`Y)9-(3i#PLv8Z(UV_OJlVGnyUS!4>Yps}^She<BD)gbxHxo-V*Ed`PWA$AJ
z6YN>Yv(5N2&4<Zkwg<Sx)}zm2iVn@Tw<6D^9SQfMSH<dE2|JGs1~TZ6gwxS0WA#IX
zi;ycAn}n;;%VYJpcV^q0k?G9SAmL8*8L|2%!q)!`22PJv+a;WYJ}p+?M>r3e#%xQt
z9K9@7pLlq-y%D*T{b-1A8+u8szLs!5G9^}R8)BgD|8fY%>PHTnYmkji6q{f#Lwd}N
ztU)8zO<Msf9Y(v%l&rx3Hab?{_HKeDQBOVK)W1AD#W+vrean+I-3oAm@}nmk+I%o=
z#!rfUBqxmPH8Lugl1(`Wc=V2aWEZJS<iub~Hs!-dGUXO>jc7+OCkx(=A0MkWS;DwY
zLr2i1_qa_P<nDnzT*BFCD;+27Lx#~(SlFV-CUk(8Z%0nb6H>jhwGQG#`sQ{n%3)!)
zNXHQO8)J&ezMe+>KNMRSS%Uiam=ZFgI^zA(kp51)EwT;QH>MOfo!T+t|BzDV#XTu7
z82D0;{3Dl3R4>ZBj8^0Ru5TpXg8G{gmyPt|{;D4&?$U+=Jw{wck&XL8U+_Mcd(`I=
zHy-1gaG#FphMf62h<~g<@cx`g%`y}?5?aEM>Xma}67f!#+Gh=$9hryzAhgVc)WvcF
ztR(&?{X<%L7S$eF!jZaI&VlX3|KL&+ZDBmRqYvu~{=}nu*ihh*ORbe?Ho8sULez(P
z+og6%v>tsxKS;C{^`=V=NwgQ;s>gQBu{*<u0&lofj-B46qkkQ`ZF(pBN-^<$`l=7y
zQH-Ys^Q$q1<lNXo{Fg={rllM6+L%HzyHVC5?zC2YFefq%_41e!vU?N}|GCi&t5l77
zaZDjuygkHs=@0!mUPVQOStGl!FN`TByNJU!#H|&@jAwZ-?wK*AWCaU}KTWAQkyWTC
z$CQv2Y$pCQX~%es@5Jq(hc3qw^BL1>AL6NaI6#w01fCcP(33m@+eZS7pq#+tBLT+U
zNT7Zsz*Myncx)uV1osp8=}3TOah){8>%<TJIYyO^597fdTPIep>Eyv3r;Ox&N!DT?
zHIiK<TX2sU$*2h=yRo%KGUY=a+#`kpx{)lDq!;_Jk*p+HfO|-P;KMm~KdOekdIEb>
z6Z%1-?g+-(j@c$gCTjrmfJ@(c#CWpvj2jA6n~qt^#{F0vrx)ap*kK*A!=iskeU~Hn
z2;WMeY9v6#UIO=z1lXi={7~RN2^dfPY&M<1y)J#zM_e(GewW%F$qB6*eUF5VC*)?_
z-KL{TC+;qnzP8Kgsdd6opwe`*p12BeTsx3kU25Aznkz?dF$>8G;>t}Y%ZIzubh381
zJ4`1F$8Rs)?$UEU=9wK?=2BZvrnwUIZ4x$mSBtyVbo8zTcZ=!RR5#9NIyU8q917g*
z(pP-KGduDom)aCXa|P(lE<O2EV?cgfsp%L{6YfUS$%^4NnNF4rw^1AygNZ|d63#A@
zXf7LlgIS0H`Eb{pjsex<u5;;e-NvR`amA*S#ll@Hj%)hKLxF2J)l8<jbo2(Z5Cba0
z6`76!RpYKU9h+*#U1d5p)rl)~=@UQW>K-)|SnE<NrqEmxx_}8LoQJ%E6_Ri{`cjv^
z>oe}bkr%r3BgjtVxwP>)&pVTb0%tLI$aLg#b_Zk$a+yotj;u$fy7WWHHsq-;eF!;#
zoXhaP;Qo8^P#~TyBMXo-T>3U-HFAne--m2LM!NK)$X?`045f#A?<qq83rpC;y*Kh(
zeM=8l9Mr${ov0?%KlBe#ov6R+k$*ka=HM6LKGm0_(orAk8&M^wKk1L7>QV3N2T^UP
z-|K^@0n`C~!QU1~dZ!Kre#={{A=m6ha#blL-l|*vK3{+EZ??!<-2O48m~(AE@n1_R
z^QHKH?61ZYW1%$nG`Rv$Oa-F30m#F?I;I$_t5p(zg<=bB&8V04ciH;7zuRm*xR=Bk
zFU4!F>74t;s&{6D*=)Jko!EsoKk5a2M=!4@^xABDaL<V|YxQEE5v!h<NiBEu5Rb?^
zdKcRAP){1Q0@SL)?J#OdkF*x-6Gp8YW2n`KeO%xC<-$nM49>BkV{oJ{Vt?iBXHK>;
zYFA9y{K!i5BV)>N9BEC&YmG7q#`UKY|InCn9DQ2EOwOb+YNCs_($Ei#DZ@d;@7{3c
zG}|(6cWUuJj!~PaV{b+O$k67!Vn6<eG3t=yJTaU*gE{F_9{&Cql^jdXkG{{)!Fx>n
zy)kN=<hs%RU{2~eU7R^%)KST0qVEpoWcGacyJA#M9JxkxWiTguPCLGWeoM|8%c(O)
z9TDwCZ(+>w=puA^j9MYO7JVoE72S%yBSvi(-H*PVF^TrXan6iUL!$H0x3N7p+K;|9
zMy(ayjJ}1ji0(%FV$?p-&Unt1j6-xL`lc8aHw*1UZ)Oal8_}gPYOCmW^o=ok@>>a$
zWE!k)PJ&~WM{W>Oh+h+<Z|j%WBMLEvMjcf+o4b=5e8G1srOG{D3-QaO5+?@aMKSum
z{`rxvSwn#f#*~&LHH&zbxrs$u8Rkr56ME^tt&z`SSB$A+%s%ncnEg47#5#K@u$b8w
z?L{wO_C4q#^gL!?bS-)ovoE?89b=SFFrH5O@l#m=$$91s1x{uKMCYN$u>y0^esnl1
zfVMTG2X#yT+zF}1T~asx8$Go@VZ2ukAg5<2@YP6~H;M6oH`6lFKI~^B=|x&2{v$Ij
zhfF*6gOT)N%{q4|@Sd5Lqs5CoIFk1Am(YiJqu<!JtX3`d^^x=vtrh<(BW>J^_hUIf
z1$(|!^CZZLNNO85;(6HTM$*ePKmJKcPcZJ4o3T4)s%?p33AQfucKxOQ&ap+zlbg2>
z{u^&gLj6P^{4cL%pz5Tee&M&Ak#G;|vj#ll7Nuy-m}(k1B`nU~f&TGKm6L3Vkxya`
zV1J|^{C1AblgRN(jkzbCrsbmVM9;I8qi!*~BL`;__U4)T)`0|D2eOpj%!(j!3Rm`-
z>d*7pfCqiUOf@v09_OO3m)h#WQ#lb<VvCGTEYq6MYt4pbg*veXGxc4A3HFHjTybaW
ze;!P>rz2O-)F%!l*o%=Di!`5=Yq2>q)uyB{J~x0q&rC^Ez1VYR>ieh^eJVS)nUUUS
zVpq&maSNzaiay;;$-Z5WT{ctS6iBciKzc>W>lOW&)S2p#RB<N_1(MCgVm`>l<$LBM
zaXgWiArsB`5`N2+YxzvIHJJ*B(X-72uR&pE&3r`O==Lla3b>h~WVsPS$I~YZaUVL4
zwOL5K9v#a7CEkj5%~abZ-iwZz$rCkkXYx>B=1g@|;_2uaGkK&YUWAUGsaB*AuSQRw
zsn$xo89i;L+9vT%^wgPZm&C0LhXT%->WIXX&{OD}#PiUTXR72>;^pW_`WDWBpGA)|
zw7D4VcpGh{GG;A>I}tNyJl-VX2i3dbmO098<@K_B{C^BTPL{-v{kNH}Toh)t?Z$tl
z?y_3u**cMZYNyr0y-F(gFCxu{@O11^6=@?=g8b6V$lBLqd(}qCv?2d)W@JMH*uSZR
zl1W;`J&>7^r7ysKp%&OJJcS@Xo1xZv<tYUHsoEmpcI3x1)Gi4Rpg&Rv?H2BW7V{)N
z<2!djsj{g;;(t*)PO{8b3s2%h1hx1-jVUK5t^LH`H_8QXgz$eDQ%(lu_Hr*~w)eym
zxf8-498*qCW0l0)q})P#GxF^j>c|pKquuD=8QQ!sJC|@9H$$yhN-h)qRxl@<^5Nf{
zp|(q|5&hd>PKMQvZ=IorBxhYZ6!?vqGhWW{;`fJk$&s2Wr_Ms+`<T#Wyc|%4{#9t1
z38|^Z<1q1Gs4e3xb0RyDuZ9+Jq^6ukGZD*%0?jOdG?RjUDYVRl)D*d*6%c<>9i$n%
zAGwpBQry;renG{Kmt{mgH$&y5$ugp!Q5z-fN#mxDF-tfb{iNC}VIOkG40Tk(_2?&5
z<OI6dimV^AdD)Blh(D&*OOfc)I1`U4A~&>|#FZ2=KAlj4s~uBHrn!!|ZkA$E_v32D
zl#+d?m-vHbDb~$-Iw$xsrDQoXi2qo<<e*c<$f_|#WT&Yme!mnkU)5;A`o|QLh3p}I
zk5Nojp21y6)EV5FjVULKnNGaIC}$k~McA#<&5;i&R1?`U8lmlGBITnII@L+!&d~@X
zv8HqLHX30Dl8D?s8e!)0h?I>+nDKHVw~a<vrbZ&SO7~(^%E^4lpbdYE=?j<h@<c!0
zXZlLXyOwjkGyQJKXX0-%{b9+M;5Qro(G{d?v886(9YwkYf1~Mh$=kc}n>ZX+uunNw
z@L;UAL|Nw8yvP!f#w#8LxEoBz$o;tM)y7HWnBOMcb%v8S4cl?W?9eM^hels38Ls3j
zxqXxdMqcyCCbD5PLRBA;qR|MGUQgs|BQpH5M=O!5I8kKKd@p*vS~HpEof+JaiZptg
zj$3CqS@0s<T6#3aVm!N7W7h<&tTeV@hDu%~*B|tiMvCjtDz5m!VzRW@%Y#<d6`LQl
zvZ&b0f>u@(yLyI-JCios(3jF-C)ciibe^G&*FRim4h1d=TKbuZy*OyuVhQ%5pk<4-
z*xVUv;#suWg1(SGPo>RnbPn5^W{%Er7WdXc%jmq=?4V^n3$W)0t*j&VJkCkdW)u2c
zBQ@LJj?M}eV{`-99|SF<^JH=_Lr<ogTg=9u9kjBD*i24QnY3AtKFdgPzkoh7Sd7v2
zVpjz%qjR1;6vzl#wwR7x8MJJ%2)mp!``NTvjXr}uPiO4S=+l^obBK4Mm$3xV#I5HH
z1s2UvThHOD;6*2KnigGzPM{spwP-h+_yM{VJ(H;x-H&$8P@6=1elQf6$XQr)9y(%%
zIwabU4x<mEo6!T&Dme??js7}XtrhLe;!Yu2Z4;e|{#&%#C)$VZj#fuSH=>V3tGIK~
z?dXnZl_T1EE*ITswN<ni-4?BOi7rCF8Lf_pu0`*URuj)dx1xU;tyYNcM>j{SO`<*L
z@fvcp+AcZ|{cN;4B-)RDDq0Pl7q-aWg8o^wp7Zpq@p8q|dhk2c`dDrh)}0?_v%Ajc
zg#t6rO7M_|ub2EndpWYsOtEIXWsT=rB2|AKYq8lo@j6|9k9RF2vbpy+a^`y(>G&Fz
z8YlN7+1!Sg;<t%kXlq1Po2dum_z*(}{zry4t2!>=s-Z0LR1HYg4E%lK`5Xq)Z>HA8
zQ}tQ=-G(=-cH%3|{5uy=H7Z9g1@B7LOk}y4TIi-~8U7B#PcX*agum4sEW0mnir{Zi
z8{L*u?GYDp0+v*Q&5PVD(Rt>FEwP)-N*sN>v4G#G4oao{$m>~yb|$A6%^8$h#s$ii
z%PCYXn8j^jHu74L=1pcf_8RqoWSWp0%nV1e)`=}rUrNStk(@M<!5dTTRqBG-WJ-|h
z&5YEk#}=xcl4(P(Gcz2G{6-X~OcgnYOwz@iF3pV0LjkrxZInzk@=Eo%$QI-k>Y&J8
z<mGBmr0Wt+l4^kmnT@<mtr1y{T&*4u*@V1Q?G@RH%u`>AbmVbbRIziB>Bx)K1tLq3
z7pW~G>yf!?r^q(sh3Z3*1IQc|nSe~Xl#`%ZF0ufbtu`X<)yOPkF($}efwl*K7Dq~=
zMPAbA!7V2{kf7Z2_z-&ZYR)au`u1lM?3qX}hxMf#IzDu=QEa?4(ukcerDjbqj-JE#
zxxqGB@ymFQ;`mFnB&dwlVODzzftYB0;_C_a0^~HKBrm0+ql|LAmWm!9tqvvf7HKcq
z%07a&x$=hsL+aysmbnw;J(*N3o4_$ODUoHCqpuwM&(ZuMt%*RdnU^K)#C|@SU#vMU
z9}0AtdD)@Uv41x6UgOn_5(4dJ-q=~Nheq?MS{s1_W?uH~0qh&2`9)gN6+?j*GtXr~
zE5N=wnqRC{6WDF$WhPs&&yVK4{GQ=ZpizCbfbpG1-gPA}G>qn#XxRkn%)Cr}Iribv
z{8FunK((2d9k&yE-)MfB<|vSJfziIqUpjWHx`55uijjA!%?mBFPV#CsnA_Bj1(tYw
zGxFBy>d=+)x(E6eseP&~YK`1b%}S<bI`TTTnb$E?X#r1;h4^Ce@peD*+UaV00XHs9
z=&Pj0Tz+|?1Cwu>RL#0pc6jO3cv%!LHg~$pS;H0!(HBUs=Gdx`=SrVq<ii+R6E;ij
zT}THykUuc$tEd#7Y$MijnKc_v(Y%;5%*JKKim<0mS6kQegj|DOHeEfxHZ0lRie5Zj
z9g%n+df{}Qwk_sUwX2X<7!1!xAJFrHIoU&e_&L+nwsq8NK*tAjviG&&V`wv#C-r0K
z>A{?=jAuPp-05mXA@y?6lW1d+MPBGA$4xXG&-dsF!OUWQBbKZ2bahnfXjk#1AIvP#
zl5k<PwwUv3K02V*FJhi~5eqY<-d(}k>|V@$a1)*v0mOH$=RUa|KVbMry`%@Q-<s)^
zt4MpU9t!-|^tqDH#`BtiRIil04}Z+`4U(_Ne`ET?<n68aucxUaSILbl`rm5M%M+cm
zXeiJ>O(kDVI34{jb-@zCMaX|nQ(Glmjs8kKF5zZm-!yed!ky@Us4ofItQ+KYgjGv<
zJtL!t-7W?Hx%hdu0^}!Via%^-wN>Gd7~Z(lG-Lm4ru(F37yf<qmDF@x!%c{p%2-Ct
zH2fjM8!wg?VGq*74V)Bf(QSq{pXOWfzh|tH>qoyG%*kHjxt1sYX==qa<nqu5f;rhs
z{P?$~sqK<$M!y-%$zIZpZ=I%wu3_koVma5up2m!Lk^85qoNEaep!cbb680loXzALp
zMYb07FVsaVEVEQjpT%zL!S7L))8+o*T5bYd*Kx;W<R_#Vm-Z|IFH3=iwsPc)Mo!<>
zagr6c%dENXbnX#4@y`cq8rS!T>v@_t3Iy+R@Xx5nPv<OAf_z$}c~!2*KBWdF(}rv`
zGcrK~*eBJRGsq;}z`dE7k?ASG?ofLrQ;lpeGcq|X*vHKwt~e=-A>toXvFYp^Q6+Mk
zl+ink-yqY{2|UV-UC)dap>=b3M?SFIYOoKfjZ$q7a+_IgnQ>+7CQ!|cN;Stuxg*<4
zIlC8GHBB9rZ~^*06}epIAL(b6Zy?-+zFV!4a67Vcn%XAe0rXb2Q^KB2+<URVNH`mP
zyE&9?5e%gaUna*<yvi+M4{gBTV&o?*GwwRu2>6Tw#%EUhv762ML$j#wzEK{3sXxb-
zh1?jdYAm#mK#5t^xWr+vGxGYxf7@*bu*GUt23u~}$d>!?8-mr1)pnK2J(m<Pww#Gw
zZ*F<*X>7R^UnuE0wmRgRU{&MZqm@8`Syi^&i@j1h5~KES;$-67Eca7}@5<n0l8(RJ
z^iemGFT&@WK4lg8YW!uUFO+;UezoZ<CEtm^)bzV0Z@o$Gwv6@<OFjvIiF7CC(NfZR
z*ozIT+-H(5$6sXnT*)`$b4_0=`8NE8rf-mZKR(Cw`z7zXSuS1Dyy}yDCO+HrQD>1a
z!Jn^U&tg}oMV`mrxtU#|1%0lBjfeVfT$bsWGKWttbdj0nOAub%Ii}-YqX2idIQBzi
zCi~${vLB+)k_JXzf@mjl=4gbf14LGhMp$OgEj*cyM%Z*Vk(Fvtn)e}BaQ42L=IhbR
zC2aJz6_;*0s`TQ{P+y*H_SShTFY}v@O{U{c6USK}nKn(u`Dm^hz054czS@jiYC6sY
zowy~Y;}Edk#_g%;I2R=07OM}>p}9QdqG@X4Ei_k-PL;6HyGC4!>F8Y>ZlUSeR6j1+
zaPnfftBm*2xZ?bP$LdUU5~pC=vzMSxoyH@r`656qc7D(@rWWieLCYL=V-tf`*6wz0
z91P3HInW85KW?SX0`%NqF~;S`dV-dLHDTvW<3ZTmVmo$r(8|JLXBn2aGVb8fp3})~
zw3&^L4;Eu^K5SgjGP-(fY|ygBR;(*%*<vqtCa1jf7`yXM-o9WS%4jnkJ&is~ya?@N
z$!;fJjXs&3?R;+CThQaCsZFAL(c!d%wz<mXuH)eOoWrw`->5;6<;btpf^1|H^6zSm
z$WG*E>H(3CEknG*zgJ{B@&omy$P(mxD)s_oJ@TNsKx7;8Ewx4D0P=OUQ)JRsUg=gJ
ziY!3Bq9SvU)yNmsa*-{_=hQ}#y~rok<04%Za%DLvG8?JXpvZFMLu$c=$R^~E)f$nV
z$a~cTA{~`-&fj~XWw9+2d6#-9m&<K@t~^-dD-3UboS_N7#mqmrmCJD#{!a1ys>5Bz
z>!}w}^&e6-3(sq(c(ZCb{uaZlbrn?IjlWquzwdzDtWqzg>aQ=NYV_T5lP=z@nvLIR
z<`-5{wGw}Wcs{a$yv|HLcrjIb@YkAE<tnVX?vd+&(R4tn=HjnbmP>d=#E)D*Rjs&7
zZeY-bhBj}Cy7B9#s_l|<`gy(%<~Sg<O#GUuYDjWEbb+DGdX4xir>eDglWRv`5zI+F
z>%CkTrmB6C^P=+&ZPqKoUp7_6-9xSxy*ikadad|Nr>d=z>qqAWb22{9eOwEssw0xi
zLth-s$$f?&e-ZumlWRujPF0&kccU+4Y@(g_bMl|64vEf0UqF9F`_S1_Rr0;)M)diN
zNpw5<ys2uNXloT`e6}ari_V&=j*2cq|A4XFhpt7RGgak?ZbhHXI7IiOGpDLuqCG$4
zRL>Yh=b_J>swUpgJNAC`D)sv1!Ix*6vCAnofoFv-beekTa(3RRA93E4Qm5K8kjX}g
z;LBqr1Wu7+i9DZZ^|(1wGGT)8o=_V;p0S+H^V%`=OtU1Xz{DSOo-j(Pz!khUmXD7L
zHf-F+_z6rfi?c$_I9sq`nfPuz^|$|!K{%`B$>XJ~EOQ-;<nNDYnFRjjR6~;Vp}!nX
z%5pXm_`<2y{)l8d`s3lG+yPl1;9Wtd+9ydb`j5j&*-45ByyH}HKPFj={@rj=Zhl$`
z{Kl!aO0pllcR0zcXr66+G}ft(NHP!o(r{Alef$JoaH<v6B%9GsNpha?t0LXlCm5xq
zoDcFL0y8D|pP5*lF-ppZe!xu0+%#hEXM_)sYDeE?retQUHJnQrpQOBK-d2%q$-ER{
zZ)SAcSgIOysajPi_r1sxwf-s|t!08bu{Wp(3V9xicu3y#kou~tP@aeIMe3#X7LUCC
zQiNMea}S2a*{aZM)M@K2@wO&pfm*YkmfMk+8Rg}}3EIPQOGIscc?0<aqhaHbtN_d1
z3)`_(A<vY4%{nQSKgY(shV(4n-i}=ERNHFkyQa&H2HiT<mV#WWK9-sV$P{z1vLMyi
zWaj*89>4dXlbq`MtNG=NUUVW8F7fDEF1Lm@AN4cwZf5);9<fW&u3%1fje2~vQ{@y<
z?*Q5v%*nv|@lj5->tX7-9~lZbf;m~LZ2UOdlzL@oyP?hPHR8irMxA<x(fpA+%FQ<B
zWB@;??%Kcu?%@s0ed43?FfQJB!#)rDjhRlV<xag4->*`yVIrE4|57_j<uSdMThDg<
zKTY3o4Y!^H_^(WVSn?hvZ}1!C`y`)@|A*<_kC6A_kD5N?TJrVyFHK)6`Br?d>1!n4
zi~qZN{aQ=xNhw-Xojj*w;%#2!U(NU;Eg#ck8h$Ma^Mz@=S_9^Dbx|=jT9Kcb@umEc
zK;A_#4X<SVWJqofin-<UBL6Z)9etErz5?{0ROEGp{mA#Gs1=HE6Z##sM#AmL-%nB7
zBs_qAOYM}f=cgRoQ`8X&XQSUxgV(Wpl_OtQ3$8~tA@{2_B0G`4Ru71DJSNZIdqt)r
z_o^>NmLOYH><!3z<ge5PBHNI>!F~gq>qoveMeX_tn{(C6t;s<NXChyoqJ|_~f__QG
zmT)`Kh}@+%Z{~T{_~QuLVFEj+sI@=k7}p+`dl*ToeF+x3JqiCT1C(3=x-nQ$&R|*<
zfhVV^eUjXRe!{GH<kB#^y&J!sc_e3dZs#43DJt$UbSC<-DJn;_5B<|AYOCl*^iLRr
z=yr4+(<j>c1ouo+)WmwU7yT%UAi4<s$P~2+ZL38;9PF6!aJZj9%@no0o{{&UA22%>
zw}KZhoDFijwRaOIlyqd3S#OE)h+a(KJ~}P+YS8zX^$wNrFxrB@OGVzu#C9Vq=*Qy}
zbL^0tuQd|(BJX6bBwT>LUG0>xA9)+2k#H0G7Bwj0cI3@dRPuJh1L)1_f>QeA`5A9`
z7@O4}e8gtga`7AGaF}B&LtbyDPFAEE@z+Vpe0pxfUSoFn!QXO$)Q%C@VB}Tl6CBj;
zC*?&`SzCQ&9r<khdb7Tql6=^;MtZ^$V|g10tWjTWX440dSD0;YyPwJE$6qcfbJMOy
zd0Lj1Cm6qasbvwk)X1y-4Q#p;f3Xypt)AfGZK+pZm0Fx?@FvR?TPwL-b=S?t0p5eT
z&@`$1*;085HZqH}6wC!;;%#}zY%}iF$}s1fW{FmhInOjpwf&fL)g&J^x{+CGhtJqX
z#M5%QFifhJggM7Fi?m$K*<#{tK4hjD_iA;RvrMx@Yr&jpnx*{pRk<HZy@hLv^%<UX
zIgFlUL6guc)s|aW=RD*Jc5DfkqnE3L5^hAMb8t$y4Sj}+y_Il3@^lW5M#8RV<+*mF
zgfo$8?9dV}K`&E#C0vVK%Kj?h7W5Joc^lzwq?i5kDZ-BD<Y{(|guTc`?4J@YK&Psm
z680lg*gqxQgkGoyCESioX8(Me@Bn&&x}c1(=XoxE9J3P6MxUx4m#_~xe~LOJ;d=Bb
z>Prc?A`?0CpCR0fo~M@IPT2VZ&#WB05>7|YRS!tG2<hQ`BH?QE9QC1un~}3Q1w2c*
z6Fo~UxP!2@iOVbVE#V||yxJn+JY*d6E#Y!>tU4&+Mx=}RmT((7M#bJqxF0!_`F@VD
zYbPJJQyV3miHv5x3ENB1)9KZnJgnBDr!nCYZ$VF`Tjj*N(N1RkdE$;;eAbSBN!*K`
z%#=&K06mG0N!*V<nK_qu6FQQfNxU6Bfk}UX_yBqwUE4z3^CItKG3zgI=F3A5O;)=^
z`_acHt0SVD(f^#RCN`nF(Y=$^3N(k=jxD@>_Fz-kI7_B_SM;yZcHeGZxt^>JN%=-}
z`(!mFx*dIJvP#}bU8{O%E9EP8QrD|q7yTmIUi1=|q{(U@W$d-+S0}5ZqFd3sC#yL5
zMZ<pd^OIE$+UEJWTsn4C@}>kQ=v>@(aeNXLS!X6V=RAvh*mRtc+HuvUlRL?m<+ag^
z?xIo>veHa&<Cu@T!*KG)v;4T*n2{Idoq4oxvf8?xySyIs%_{P49!a8`d6Ff31t-F;
zx2$&lu(X_WmrKrvyq*DU=7FXG&6~;+K8)O8R#9Ew@aCpAK%_`LPSTe2ikw#uiY!1D
zszH&}$hB(0J;)X$_pl;+kyoe(M7my;mmv3w%to$OUy3Y8=BZdevI)t1w<0@{xoV3@
z$7^y@+$l00nXNuV+DnjGEanpowGPcId-oDPfXoc`JL*5u?|vd@spXRN?BRqpS#7$B
zvF4&zs*Mt^M5dc{EGe(m-fJnJG43yu%|HKVQ45~p_C4bXOS-*;SR{Y(>PgFdz2<@p
z)%%1c%J!Q{n)aTuG+6GmKO4pWI>WEwn)wC#18b+K-UiA9_@i|Plx2tIPFp8`tL`Ip
zm&nD)r<CPq$SD3;U5mO)<hA^rIzE5!Br<z~roE)@deU;6?MePr-EH>|K52Pv@X4@y
zmcG{Tvi)$ls{C`<2bQGQuJ}vXGx3)DUkm#-?6Rn-<?K0?z}NZmjXx1+pZ(gV3E@}7
zPnG|v$GFlFue3{6)8@bS;r#HQhsCWbx$fFc8;UL|x$f!>8E0k9UzB{aSIfw}W#h)%
zFDy+}rIC*7)i;rj$Z^B5#jh<$4*%4`|5nH!u+uD+7Jga{9ab4`{0y&DNh#r-mT)yL
zH9ThG<u+c(qiCs>J1H(GXWMz>e3HXaQNu%k{Eg8)XKUJ>G{zsv8~J(@73#N;|B-Bv
zFCWz1$cGlI`RvnbW|M;a82OU2q5i+*{{Vd8PkR~Knwf@A`CE+{`FzHcx#9n28TlGS
zhx*&-^=vqk5*^?fU=KNi{~6>$O&d?VH>B=nHWKRl8Ts9C1NHs`O{ekSoBWvn>SQH<
z8PQsyX^&B1_^TZq>eEi5w=7mI1*Ah6CH&VTZ}Wej@Fy~-(C8vMKK$jD1rG6N@+S*_
zPW`7SkWAg)8jI%sC;vZ6vZigM?(<T%Fj#v%#s~zxlfQ$wmIn6X)2W=eoByQ6ZT9q+
zRQ@_-LpA77-$Y$29HMTHTD3SlD^hm6;W@fit)1eCvrJI0EDoP%*{D8T9KO=>v5NJE
zPq#Fw!6}X(SQ^w8aeLJor(?RM<F%dM@HrOC!Pj<9b$nw{eM@-o%g79OtK4Wuv^qSU
zXU2v!Vx^hk4wW&_5v6jo!lP8mY2hwaIopw;YSPK}WrVw4bDtf4I85p}Ro86u8)dR6
zxO=+8r81&JODjqjy;gHtc=7-HDp6s|J;&iw8SxIc>dOpIRPJax#<%E4;~gFeIaEV}
z!y$cjJeo#-GhB{rwg0qm_y31Ys)lpJ+s4bxziO<Gy7j#9_bubq1?PvSOnTR%-F7s5
z3d`i?zq-+iiqy9A!`E7-t1r(FPdtU}ox@p{X+sg;_>$$igE-9@wFcIx71`m_!tTsb
ztFyxwrrdvDartnXqTjAD|DQ&wRzaMdHbZuouXD_Jr+Otjd|lZ6tJSm%!s9CBVN6a!
zayX_~!?mla!oC`{i279=w+Y1Y8fQ;!|6z-E6?y5u<gW?#klj<;K%5^&y)9RmZF1JD
z<i7=K<G=sP$RO#khigS4?QdSqVV_7sHZTne*}%0S4Q(B5NQ<KKH6fL6qI2^@>Q++s
zI`YGvyEfQ4X}dVs=0qmK!Td;v`K6>vO;d|<!e>ry8yBwKKpEmuQs(os)!Ll!_(?Q*
zeMr@N*Qjkd;nNoNPYl;K5@(T(;$NI?{70`E*9VR33I9h4|JhNn-bKvQ8<(s1bHbA+
ze`(vV-AJ5rQ4*hcnVNQC_|mWmYgEpK;nONw=*C6lr5geApVF=zuxv-04Y9EQ9sbj$
zVB1p0&v59m#$SBl7h1!$OUOyTB-c%SmUpCIUDUleShtV-_~2B`F{VOO-(DC#uA+%f
zTpH|z6zZapr8FX)prFx-LK?~oY3QDiPIS|W%YvPd`X5tY9&?8$r;@s>gLOY9KQ7n_
zPp}h@{XdMI3B1(v_y6D1w2+ots+-nnPl}>_ol?<WDlJMkHxemPgf!D4TS>%_;<m_^
zkez5+?Af!28?q$(PWV4xpU;`R8{_-`^>EC5&N;7hUhnf>KJU-xGc$KEB$h*Bl#!6r
z`vmD|BybE!I0B9I0?qYE?1aQgMnd=xz<2gY#K4ZmuE8Sv={8t26UKfW)eq;oy!$Ye
zGIotZc5Y(4PFKE$@5VrIj1iRIt0d@(URA<3Aeek9;>H<4`ONkRZiL{;Mo_lbz{kOb
zRjR@n32=7<KY>H(v8kJfFh(YmIBtoh-vO`(Kqb~(t&KRa$9mX=mv|eAdst%O-vHke
zNTz#YV0#(%IqYSu08PRIBwUwje{@j=W<jK<5s~9t1nP}mL~ss>q+g7<<Gf%=kH~h2
z^fn^GzYYF)a2X=SVEY*M4eSR6L{b}0h*myzA-RvY+_Jq3wy))GYPsX!`+4{|UhW;_
z?rXV)e-C_rFLwdh0ja}>a<c268o*11L3pzh%+m~bv0ZBR(C9G<DmBm&3?ay$W5R`{
znGKc2k%NPwvh$#*YlWX?wfr6>*$^X(Z`flbZ0W^1&m%hvvd3VfumS!t@I%2RGL?z}
zKH9)zN${5uIl_okIH6xaWG_V87!mnxB+zgpf^XPk+xh!%Uaa#yBJ&~A&WM1o=AIzH
z2!I(P#emzU4jUb9P_+Yosuvq`_Y4<E8Zo0%Lq<pICW@fj9<%12%#&>HmEwITOFapC
ze_Jz}TQhze(=cepKAO=HKh;xyn_zJgSPeXUgJvXQ(#4B4^PgrU+(O#X8KVlS0BHhX
z-`|WPz}*Zy1G|hQb1jTWwEm=kNI6DfS0f_7EyNjzpNQZa5Q!73o)_zav1~Lm>-AEI
z#EggnY$d>WfSGz00B&I5IoKmcgneTYH+RpGO#&jdAX49m$nPoQ6k-<<e8Uk&tVW8}
znO4i`!hcG_uOviWLn9*F(*!sbV1`Hxa1-p2)OT&l)vel?x9Z${T%NPeofvd(qavpA
z;Yx$K6p{2#FGoT7Z<_wd-+#AjnOf0|-DJ~Lt5Hm_BT~b8Ni+jQUGnrd1`p}Or~q^Z
z!f84X4WKiTk_f?EE7gE~g^r7BM_da_da#$2b7wM=bnbb6NJ)2S^Vr`sw>)L>D@UHQ
z(37Wr&O|!-l8B#>kGP{!PYq*W9|>M<uZ{WzYx~r2U>DH#N|-gUWj<p9|Kp!1<{7<3
zq%C3Hc#N9BzZu^j6Le#9y75r_G_d9OEWI!dySfqIpc~UKL|kPr)<ysHV#2MV8>?aj
z5CnLR0OtbCbYn5#D%eF(3C=Wvymk2&^hOS%Mv%Qj5|ewm^&`QO-x-3{@DoA#y+HgK
z*hR24Te*N?k|c+D!Dju#(5-`Db!;N20NV+0KHv<&BCs{Fi=gb2jUdOahQ+)UeMuw8
zC2JCs`(b9RtNxM?Kim>^wQ&?fIle@K3($)p&H=$VNosk)F7^mM0KvN0L{Rvb!CwR}
zL$CmB9qb|~`&mX%GddDUy~@%TajBoQ0!DGuKsg-K%QjF(rKX(}t&{3^QnW(#M<T9-
zxoAAUx_=(wQunX|%%#jM;;-)?YP(K~#sn^D)=pU8Fl#Nvj}3S@;>uvfkoN{mZ($+L
zEIALbi!O~#4V0}14V;H#rh!HGMBKrK{TurLwx(mEjVtdgA9V+zF9%NgJ9?)UjEUCi
z9$O!Ahj{)^TQS+JCAIbE&j-YnJBjSei@LIwTQTlryKrf0PN#DDi8x`(TbM&iI@iIw
zDrwEEEr*Sbe4cEVv+=*2aN5}Gsw8=L#8vdL3Gbqz3eBzHVb2A7rD3b_8Uy>?*U`Q~
ztrR@zfvW@l)&uJg+br_Halk)V;%Z>urS0WY<2#pYP`Q7lsQVt1$|cjf-l^ZmM(b2A
zSQl|Wd5|y1rkb4`t<imQe$@SlUfm-9yD7~QQmI?wuu>UI^FE2BiSdiYh~fKQkhWfz
za)&0?k@jaxE7LuMxfX~nO1dN3p{DPOxZgbZUhA=L*p@@%SEC{Pz2L7yZ!}zt)PESZ
z1@@gvjlD<6ER{bldRU@d<*57JNXTgeK~gx01diHmCS4^LFW`L~N!}50e|kZ`GD6z#
zZ->x6BcxdO6YECwBBX&|4EArscEH|Q%TQ?yx}0s#!1)WK29AtZ(0@0f&kUS`+aqq5
z2cF~geJ%RF*u%~MyTY&<I1cQ`9(L}2EXz;~@JAl_Jixbj;27ZD29A_sb3}`^!C)>X
zd2#tiJf%NG;<lHKx=--aY9_yGw0<ReMu~ib)vV~Yi2KZob=?FjpDqZeSB$z(F=|*T
zKt2IhqtDFtG~h1`{5=(_X+&^K;^qdn`5+*&5h9-(5&0dOsx&@&Xrci4mmYknmwGa(
z_gHE{su1%wFLe^^SBCu<dksr{oTc7##xFtYb)?>FspXePoI8j^lM)W+pa_m&-*~~U
z^@vP>$k#?h0V)yTE{{kI?5l>|f&DNea=Z~Kzy98U$a09hVnpOuzIUqrDbdP_qO}qC
zx`)5U%RPqNuUT$kDv<jgFLxU38-~?iGpTC1`&jO`y6+24ykp3{!*a_nLY(_DMZh^I
zLX2Q<dBLvoh;)X?n?^(dN)X_Fk4PNsJBGa%do?4{*NC)ORw3A;OoqtYMnrxkd#8>r
zR1r8CzUSc=d%1g(`(4W|Oeu0d?Bz~_ec!MfvFBUvewO>Q(v5@Mv&g;Ea?7s_aURVS
z0q39y1qA!h3wE_fq(4MHFd_<Y5CI<dh$O*oHte(5s~eI2M&zQ`Is`-(LL_ZO<X5(L
zs>g(A<vua+TRr>|FZVEVZ?W9M9766*UhX*9r?9i9$cv|14UOhkE`fO)szXo><^B8Z
zQcq9dRP5-hXDm%mLX{uO2X44Cu#>&d)9AHYNdE;(#8z<36gL2=jTEnmxaU0V0*~T8
z_CC*I!#}|90sj=3%wpl7_<~^v6TgC$XJA;KgnQ}dkwJa-LgaZPBFDW1dWJwEf^$Hm
z2qG_f!7lZPl%!VMjfn7Hfq%{;k_P)Sb`?fvwYP%ZPin)YX#GTbWyHN?amRqIXe0+&
zAHLY=kbvVpINpb!iY32q$g!PR;)riJmdJCj7wa;QV>vk9k4?oA{#)=b=}IM2tt8+L
z20k5!ys@d5PNSE~F?b%d#L|Bc@D)ou*b>LUKHy<5_Yzkj@xzu__#eQ(1}>8~4)!6#
zPRCx!3NR$oORvtI98{ndL>@IFa{P%vJB$d<;c!{aUxxRBUEvX_1(C;%i10syf72u4
zAo3V?^^(@<cY<DG+cz=w;)G}{K}S7dp_9NX8_yH0qwag1Ej_U2(A<chI!b=}b9e8c
zS4ZI+4kl=B@?u@-(QF9KC$Xucg#Qivd*Eth3YG@E(!dLGIFuW0r_)iLNnB)!rT+uq
z`<7VKu{KfwcD0Aq-)7Xb)w#DhiC0--;rD_65Pc?Z66~#py#ad_ur!iCZts%3BwV$2
zgMte5hsYWuBFDc7v<nYa0Ox>6j9|BU!4`Q$T0>;55fT1x@E?QA5Q&4m1G_p(>%@DR
zj+&~Ds$0t49C5d&-kHQT&0Xd;3bzjfH{I2#bOskJ$*UvoPBV|gyvxiJLvtMS-DW-&
z^T%eMYUss0!!z@ln0K4GR$xxVG2eq3OWw1kxjE`x(QIV1wH})~ShmqL?@Ju_H}BGj
zTVmMnu@9s)+OrO~ftEM1e;3`K-p3Wr)%a=4F24tRv+|&4+nNY&lXc)AeM!V!>jitp
z>a4wLIX2(d7!d_{hydT97m;AE3idj~{)N4pca77Tt?E*2Wk@dDmFyV51IIDfI0?3t
zfKwiL-v0e24?(HG^&a?qz&{wc8n5a35qG19o$q0rg1y1R&IJ3Dhb;zsQ=WkjhR*vc
zUn<g%tBuKQ^p{~$#bkPf2LA$<iUm)6b+w^Cu(ra3e{BWphG{38`{mfwbHYCg{x|e0
zSR5$;dy8RJnGdYzMr3A4k-N4_7aX@k<Yprx$Hxiu2YL~~Ik@a8m=|$#ykOsWM9Ni<
zx}*^iek1rl!DWb~!Ok;m4Eu)xkyPdW<titdb1z`7<(BP9uzy=_9%u*m)??gLyTrqP
zYq>RQI+J_8<raPu_z1gz%&19%z0|PHvG?#sO}u0@;TB*-pC{u&dM^r?x51^cuE8(&
zz?XPE-<+Pm%)?#)wv=HtYKjsOccq72;9*;Wy&}Li4HI7oxQqu*172j{A7RscY-;@!
z7S-*CMEUXma5a~{H?7qjJFR6}RB9t}u!}wHtyV29sz0)pU4>0UM)>2v9|B(0Qg0W4
zo%T(*Q0&Hj1XwMqa8L4*aC2*a5?;|j<ZK*uT9M<i1S)Sta1Mw#f}QIHTjLSg3z2iM
ziHPt$!B_N%#K6ul?3>t|1w>L?rm|UHNABsCTedjZN|yU1%Uv{^N5vj~wU_%7a-V0p
zh3^ht8^=r$(qJzz>@(O8w<54;hDB)GATPMASPqeyMnsNX2vpUG;2ib_!7lQGt@4QM
zgvf<PMEI`Y^F1O3U@tao3VWll2(O(@5yp^vmgSbM71+ZpH*P`h^u-aE@bEW#xi^w~
zw&fQ7Xz(??+{IwW8+JAJ#zAgAOOYu;`w_1OMVJhc0wW^F)&$~yaxf%u4vRprLNC}Y
z9+7PjImL(w-v)eLk4OR7Qw@6$_9g+5)JM}O!Yp!6u-vk>16$8>>m*tmao{I<_#!X&
zc5<I)xrJ{JzJZrJ26mERAIIJ_$jvkfi!jBF`P>D2jD-+6-H6DsBY_$j5uC%`m=$qn
zdcjtDM0P;r3?m|ZC-6-?BE?|OO6@v_J;Y?(wAGkL*su1V=EqNu#@fSnswHSgklMy}
ze8|?FRDMs;V>{UpO~FrdSAK`3rk%^(h$K0Nc<}2zezV{=7@MZ6AT{9E!sAy2cBo;G
z#a_o!pJJ(J7u_DD-b?BeEVcY<5$C9|P9q%7;b<Y)a4%TOBQhT%!;FXm)FwbHk4OR7
z5!h9sX~g@iU2637XyZh|1w1vkn1jI8HG+jk@ajP?1_bv(a1?&3P2flT)*->*kx*@7
zI&zHkV%^~JTL{0Cu&GeO$H2FZaNjPoxWoXDHt<PuFd`F-$Y<qS1S6^(BWkP>kzf5(
z_Zf6jnva%^@!&UlxtEfAoaGjz0l7PRwJ8RBvSBA;uV<;*PbSr@VV8vuTx*fKyQP+2
zBjR)pYjAS#SgSV9NrJ__V9Pur%OP^C5mA7~1nBA!DFWLAyE^H7Vtrx5cw&lSTsIM;
zuX<VNbAi`4o~Iel(yNXPFKyu26F>Ep{F;)cyY&^m;aDQoabB#OJeq5u*&CbsO8CRU
zAIpllzpn}a_c3q+hXzJuq7fN;R(OTc86w9U5&0cKpk77<-++iCRzEM+a*xP5i1alg
zi8Aa~^>@jS^9mLNJix$LlAxgxIo*iV`MPCzsRNPzMnrx`5~q(5!8ah1zA)kjd9hY_
zL>_?1z}Ch@0gfU-KaWT;;8xhxa~BivOY6C*>bY1s9>TW73Lx*;;3iC}_{4H0D;#5p
z%Rzj@U6`5CCe=^pnY*bMqyBFH5Ewg?tD)kF5!c4dx`F<~{yZ<vjK(Tq6-%+W3F}Yo
zQhyP1@M#g(4mFy*$LZ9S-c`;vc3f?-sjp=_hpU{Se#1_VxDJNhjeQVRDW9q|l5Kh|
z?(4M2q;0xPo2kigpxQ4@LUdhIFgD^kdGKegs2#D@;<~1zr51h*_z{*`*ELD7T@3pj
z_Fmo&wWoHdoiWlEc``0yqc{pM$KglH#_+7&17GUxGF#K;t{(OxuqPQ-JJbTO^*rol
z9=1K$Sb$}Rs>HJZLo`S%a1w9>1MkCL%ntQJcBl*4&(}w<YDvG1){X<GYMp`TqNWHI
zjXdmkRxMrB^k+Zc5F5|{{}lM~;I)3n5l7x8hAoY9mpTgdnc-AOxK{_R3@&Q+LZq<~
zk>fK2Dl{TE2Sikk!@XeNdqieIq^S`R{#o#+dPEAq9${F0BJHDqNNUSP>`VGriMnQ%
zTej!HGG>D!OeS|vBt4Rc?jHUJ%dH`^j@->Hx9~53pJcf;WMT|~qYPUY`|;k8IbK8N
zNQ@dXGG0Z`oe4NIWQrJkl|1m}UblCq+w(l^#bD1eta?2T_D~Odg@=uUtsGzzVdB|<
zr+VN5z*P<W7{ofzV%BbbgdoX_8{X>M@JT_rsH=jX)^+)vMeC;#hanMs+@LnE#V1Bw
zz87nQRZkZ=V-h?lt%gx!NCBo0;9LUmHs}IPzrzfC1<E!?WQq|v|BO<>aykYg)s2Y!
z#uG=MGZ7JdgXOdcA~n5O>xq>pfl*94L!^cgQGinjaK1+*4Y;;}pTpkKh)gvibFMll
z*p;k<NG&5Gzq5(MNiraUZ$KnYth!;WM4p`P_ewGWB6W<2Y|{vEkw>HeaKwmQkG-uC
zIopV=c=Vfq$Yh8(BO<>E#9`MN5WzPfQaqe%x2PBF9*;;*h?FoQ3UDd`aQ~-kAX3V(
zZ)5LdMA*wEiSKHss|G|iLZqY-k>9z*;YvRsGL186*f%WGWxQYyctj>cq_h!HfawI7
z=MhPRJ=n0Tu(vZJ=NOR^u6Zy<W<lg2BO<?vYS1Od1m}Q=&KHMx!S3~l^oK}UBccGO
z6W~&hNDORw!+wUnvk^Jhi2RY?BB;oAh?EP7B=Y2V9)T`5A~**`(!+Sx?*)6%BQhN#
z6^w}R=Yzk}BT@|ZJHxKW-rk5zHzLQc+z}92Sgw%g{vne*Ii5kFMMebYfJgzse(-|b
z=MfnOk?)O&@MnVO2Il@Tk_7uxs>K{WR`?@s+6Am6>@V$76&7f#Pr-h%(9M9iFrG7v
zXWMz<?B2;{>1X`3%*d|~X|6Sz_=X)vs^7d=YdxC#*ev~uO{<LXeZgPHVz8fQ4DcTY
zZi~Z_V^ekJa*Etb;@>T?^!)**Eb)1nj$(82r-!}GOI(ur?6bte4*-9oSD#|Ae;c+3
z_M@x-=Z6JIxb=PVg0Z^~B7YeXISwMwO-2OgpaMyP?ec=%?hz>mkzykv{9y1aJR(J4
zKQ`<@>@5Q#siWs{iY&)){m635b^_R2EcZ;y9S6VL!{6cMu0rllEVuAO!LRgk7l8fD
zu%ofJvLaj%79sV=JU%8}3lg6i2{{cX$Z8{jV>ko|^o1AbPLD(_NPKQ2gg+7dtsaRO
z*e?xx8up`&#D$rjxb2(PK~FS?$Q~mi$B_iO&4}O}5J{iF6Mrw*T^^B!5ZP-)gdYX|
z4v$DN*l)0FuG~hv--7)#YxzXJ$`-9#xoB|2eQmMN0e=k7MCa=v4!mAPuQ?+9NyOzd
zEWt8+k?~3r`xOtn&g0b_Uaw=*91(sDcs{DMe~J`=ea*15vA4DY&B_$$>A$`V7M1=G
zdBcdv@nizsYeaAk3KS>U4lmf<9+B1%dCQ0hKMwo`k4OR7H?gZgcN6cAOo66`1xokl
z0l(Sr$Np!#RPP1RCMEmwHKx?G1>Bc;&#aGP{X0PG+0KwlF}-W1O_(A)E2+in#MG_@
zT<E0ZJiIsaQ>de6o+@*!dbbFFjx}WU0@hMytrdU=#<{5e5Hps%^=!CTc^5p}*^PaG
zO_i7J3NCma_ZpmR%fouZzKwnG*wmAks>iw7Nux)VC;f{Q>Irb#zh8{#YR9$VLA{54
z&I;WTQ**9%wpd=_Ujo0$@&;R1uumEG6YTxGt!sa6UAJM>)>X#E>>xJ-&TL%^TJz}M
z17GQFKYP;Pr#)-}>{i286KSx|dDw*>_IR+*23WSPN}L4zln0Ihe!;-sVE>pF>#AdR
zI8NP`om+y7j&*GOpT`fEJo&vs>z^Txs)uiI(Gfp}NB3T=AFX=2`q|i;o&R=>>NW*<
zl>pBXfFT=P{uBXz*}%VH|3pMm^-kwHwOrMxd&zRk^g7TNEVp(<U6G=rBkn#A{*#w`
zFRAaf)WYuo|Du=Lk@|kamZtH$tp>@=T;0_2wJyh!WHLlH7!f(XNuZaF2+qOPPeIFw
zd(aE^vk}n{s&!b@JzzwHe+&GpMnpp>4fbKfR>nRs7(z7hFpPtF68I2C4Ivq?rYBzq
zoEbuKZZ|&affsqhpg+Ul5f3{D>>GyF5Gp=0;vV;~S9#dM3BZqe;JJX`^1!;!`GkSr
zg;;-DtgD|nHiWKTd_uU7<2mX^{IulCZ!N8Vhd7J~`GzNDh-~s=Jw>dJnTw7eS?Zp|
zs3D{Pw-Mky0%!<n92NmyY2X*I_v4clS2FWAlDNncOTQZ2`<8gFC60q#?P0fBx9MVM
zFL_s4Ug6h(|Io`@0QOeHZp7Z#3NSA-e!4t)MNolt5LshH<XA+YT}A}wpaR9*eZI{L
zw$&r@2}IT!5#d*Y|JWnqAaaLc@5bIIAd>oMA={GW<i6c<%eDgSZp%I2awo}smxtft
z<=#o|J1w{HH-rDo%UuNaZo^)Ty_Xf?l1vfWos}P6dqQNL5s~8(0)1gba1M%~Tk!XI
z!JhPpY=p>qBO?4Y;J@^U#K0~w?B&>dheh~)5k;6x?yD`gY)ip@Ww{q*iqM=V`5u0g
zmwOwzud&?1UkCmhFLx2x>kNAx_T#Jwmu8Ca@s{U<kuVD)OO1#eQv~|Xh~OL)Ax^NA
z7cA`&*$$EGjfn6!fd9cGQULZw!>+`Bd{~4Mi}f8Ta^GONWm^XJC(C_VrUSs=<l#4a
zxp$CzndKJ#Ch)&_xnp2gq%K$-ZBTwWZrXT!OxW_NI~GTqCNSP&folWqVimgF7K`7v
z{uvZ#C)=W%@zY`<zY9q6hmpiLC{X$cp4@w}9`Z=ef@Bh#)(YV-1pg=a{ojo$20YKe
zhvU%Ih+JVr?l`_(FkJUSWUdjB-z)<CZA9=5h$M-1i5Kf(kH~z8%r_$7>${5y5MfE%
zziI)#6bIGmRRWetEm<0Ek|=5#aSJSF8}fEDeped5zYpFP6lx#*F2_%Wl3#-GB?(W%
zgAcUV<~2@=%e+{Rc>ET^?@DYcl<-OLrAzqF3IJbW;O;oYjmSbHQu?>=10v-ZR*Q^?
z{N@tqAR~frIIoGd(2Mn`M`S5P78{Z7<vHoiBS2ZNP%*$)8F&B*x*L&2Mx<a;rJ%=Z
zL1fzAup;uiggE7l2)^OaX%=y3<HKQ|d>-?NEQiRstud-3GA<y1zWkf%v0}jIU{{a5
zMFjn3d${wxZ!u4K3h22R7JC%%9>()3<N0OBaELdD=XCtkbMm{4G?k1ezTwCs)%jkm
z$332F;CUW4(G>o2@O)G1pQk)UfG;rcsW=>KM0hNbB)&Vi?A<}nb%w}HBO<>m2~^dH
z=n+P3Bu=c0yjUAOBI_V>p%FP%J-3hm`Ch>a0AFn2X(Z@rM6Na>m%I~><^B+vWklq6
z6>$zTBKQVG9I+B!tS3Ao4?tvgrsou3F#&3NL}Gx)16I$yPeA>R2lX6(K520@R<a?#
zppbfEDGOJjIeme#VruMl(I$t*^7)RtslP#8X@9S`xK0mts_``6%=(<vq5DGz<{0J)
zX8r?nRWsL0$($&z%CmgTSn^g@WQ*cm@a$m!bs9GHvuvxl;A!ABs-Qg2@(ud}_90Y+
zw(DymNnYG%FFp`_j%^GVK&Ru!4meMK?@*jZ#9`}{2)<LIE1=>-c$V)4d)~^c?Q3VQ
zfX*-?3h*ugnivsnUtKw#<s0@b5%I2ohOv8{j7_^&*_N=0JREGedu7bTNj%j9FZQ;e
z!>GU%4?GX>5e5#nuVs0_?_sa@up_|^_OSE8w(zj}l+e&N2L1?UpIZ5@vGVo0eM4{s
zw4KfW3HYge^4r-vH6<0T)_1sv{l%))jbb!svp)=*x=sG?Q>|83tvL21q(>NbFZR#a
ztr9QMyjs+qXnCan5Zp1~w8Om?(bZ1!;D{ULVSn}Vjv?<z%PV{__%>eNVz4I}_BZUG
zTYHy=?XBsWUm5ADtDS`q8Er)5_)+iF>>D6B)&u`$i8VYHl6Z_ImjA~j?qG>EJc`Qj
zt#`wg%HyfLH#|l%JWj@@;UU{K^in6V;qc(OP-(vO9%2)L!xFEH$I!)b4}1yWE(X@{
zC@RIb-o3=vc-R84Jv{6Ju-!at8f-7a@?9(U#n{x5Gt1SeyqDtiM6ceJ{^a9QPu>`<
zQ{gzzcg>j8o*VTQIdb$yuaP6)YfntoUly%jp^y32U^_AO#MIX_%T=p<Zz-O*qh|ug
zr9b<`)P>8UM<@Duarao!_58RwAC>HjEyH8{aqK#+=+|RU^K&8tJnXG3ELt2=$PLKO
zGCWwvp5eBX_V)p=(I%f85k5Is!41L*Wt{75*CEy6rf9>KukbZ@w54d4w0}X1bD3*}
zj_!BTwJZ#O@I(FBGPUTYXny%)FjIseQ2W$<H$_`^A<)rYpvcK#$7@z*0<|KLg40f?
z6~2;1NHkB?Ssp#6%gF0bZD%2LMOA?irEzVGju%zkmEeC0r}rI9KLp>+qf%Ebk5)VE
zb4uSHeP!&5am<MDlz#}n9~G^i`fz!)RNqAH4((SmQTX~g_qWP8o(Dmzx{JhJa`Q`Z
z*U9333jUYj;b*>=5pO!NJ7R-OT()0_r;c0^J-TxD=i{zRm@}p=M&}W!b5=x;s`PT)
zbvAc>`T4KmsSPWl4Qk=t4YgVkMbZ>SovF_1n)(ipHuW%9A&JcWhlHl>CAo`CloOFr
z=yk@6;aAnj)J^rdnVWgZ7kTiFn)b)E!|g!(E2f@U9<ATl(dM9y-62*VAGJff%zd;`
zYzXD}@vs%q{KOncHo_%Caw{ZNa(c`)M4w^T2X@kl-Ed<kW|AiOiJjVBe^BU`k6wO^
zRLUNHT`m4n#5KlOLr}3MoXpcn4&32JU=M(!A)r3hKxmp;a!a&k`6Dp1GQ?%BpL+C`
z=rL8(+>C05TCM+<_-$!@mDF`NN1IooUt3^Nn{}P^`|wm(21H^!4Q+0DRPHV0Q5^{E
zjzXVlXkQvCof<ldgzAQ=9IK&6;-`kHmi1_;{9=gw4rnOsDp{^?u^m%LLsgOLNFD~Y
zMuw(+LtVqvk|J(@$G{zmS*^Mc?oCFdj$9dS*5bQ|+IL(Dv_#ycRnA?srLx6;oiH0|
zi*7pAK&qxHt>Sz5RWOq=F7s5V%G4Pd&+#dU%6==dHfws6q@g8(G$ma3XX7rPIIzxp
z9Lz#W<KZ9@(y4|I>ErUbYG9D7iEBjg2`DIIkf;@}vFCZ<jkDUPo4L(Kq%K?)J*EoH
zsg4?t;FmFy4Esu{?hDG*9r6G(v8I^->|Yi1z6N@l71#HlU+$nU?a)KW7_#d9L|#uv
z4&wbCG?N#1Nb0@?d{4PH(25*tAZ(_1>fG8$E%cdz(2v$ir-5)XVKfjX$_J@y_;=7f
z0bci=>LPW}tLoBEqw3OiK~Ce?t9t6BRnbGLlynga903B`yv?{=HH2UK<f^-ri%=4W
zMP-#`F=bV-HprPAA5hh2S=aE@$kd3Bkyyl=BO>mP%p@bM1io}*o_ua*<yEXw=v0ue
z@oznh7R5NIhm^#i(JDh@Myqr^5&bCLN$Aez{Wvel8jj;nv)F=DH<Qn`U~<+^U9u+H
zqFpIh+A@6z=t&aQ2hadH7`;j-{kgP2I`X-*IX)#dWL324h|+G(GjVqijw;JD7_O!)
za_ovs=GZSwnj7AS;Hx3>Rg@hYL!=>oy^sQY_YaZOkZZVYUDkN<9R$~=ki9l;jgj)`
zdC8Oh^`YT8;x0y0<_ED#W4xXHP&xb<8gcpUz)#q@$f=~7%<FWcw+`%pajbEJU+SPa
z;kNN8d@ny$MEPTEQ(Rk;H$h*)5=-~qKZ)Hsl6_~%WZQ{N{__weo5JgPmh5hlZMueO
z`rx!6S&}Ca--1=oyg|>jq-UDqKd?k<!>!R~Rd+oXcR!e2C)<kHo29<HHCi{aK2>RL
zG`~XxkMHr&VA)NUO|Vsj!;zoRs|%%{K^ICl7@5g&*oBFMU3a?hM;tZgU&L@t&PAn6
zu+XdrycU5;TJ!i9aO!&DmeBP|&>gvmW54T*==z_H<dxVSfMhfLj!5vo-;AXU<6wCN
zx?#u#9B1ZeCUJ>iB`oV!J{@<zT5^s0*(4XPA2N$$B^O2=T0rtYEcsq+?MSYv*aAuX
zjzQB&hK3ApDT%dzI-Sn@DcCY|b`8f#rqiF`4x+!b)^%qDABlbveh2e<2XZG8zdz#k
z;it|WOUWwJc}Jlih~J64j%1Jc)@{*Bjl^^%QX**GA#T^Rard_YCjqPquvMzw?a{hf
zuI1cjT>moH8Ms!*^_bKJ|8_0!R^qzLT<73g6IXVScSP&<I*>pGHwV{ZTuU(~7U6mX
zs}e8nV}naq2Y5$xYRx&kPEXBQ!3U8K3{}yMC)7v4G(MN$`XJR;5n3X<(dSbq>BrOa
z(oqyQm*d<$pXVa<{3qa8Ec0~koNdEZc^R4s8=HsZ7_a4W=*qj#2&OTtV2Obt31Yg-
zIQEB8jA8VtH6(@YAsV9Lbu{vYH6&Gn0bNK-q-0HT^Ki`Fl1gs#Q*rmXCD*AjNpj(a
zBbR3-FJ{PnX~|b$dzj?M;MW@2W67mIH9>MI3y}FJawo6s=8*g=OMVu9b4V`SDC7!`
z{g$UmzSolT;Yrt?<T~%QMZQ6=RaW{pS@;rCUV$iR)-oHEE6$4dbr>wh#h2?quy**r
zYJsHtfI!k+X>q3T>aCzboZ>Bfx?peg+KSj#lj{-Uv{(OgR&lQrOan`XiBv?oGmuL-
z&Rvm1U3V(-8aNHCbMc#h;J~tRRUyY1su<{QCH`8M+Os}dHt_~=G^P}1E|pdOG03$X
z`;{%AvODlqBksWVD2>pZ>V&*yZIFHvZIDidSio`aHdJw&x5eF?d!wfW$+dpYBe{U%
zk)<5_$zvpc$B1fIdJVpv@tc5LXTEBb26@3#e4YVc?L4)YDxDg#Hd-}N(!FN^q`Z~@
zU2vX=EXxWIBfz`(s--*&cO7Y|HVNI3_t9$%N&hxoFWo}q9Ae}eLPd1_PQ_xf#dtXF
zIuR@m;6n=}T`_^ATZAZ(N|!x`3JCOpk*W%&GjY1(*8|yXv7~>OSkhgETuiJCDcaA+
zUJ#WiMzC}kEXJ)I*M(rmqTgzPr2B|K(k(_5C>N>p=7`$@M%$#_U_NB<^u&Lv1Ptft
z%uYIy*-8heoB6JUdkUaVOWI!z<+Q|qPJ_@tEfw4oaoh02B__Wq>Tv#Pfqw>lDf(C+
z8*9O$kJ&wqt}?#T)f*B17@-#Z^JJYFtom6ekg9IiGjaEv#kv*TQN#k`p0!xgH6+&Y
z=oIThUT5VxfmCx5LcIV?Bj+AmA1e$g9LMqV=rzx!KXVYbZqO;zDvoo{k$g9vIr1Vn
z&GQ6)mmN6IGbfO$IVO5FZ=z>5L8aYxf@!iV*nFmdNSuk>%CQE>dPD=Hm?`kGHRM5T
zkJAvfzYp?~HRM5^K+d8iQqD%Sg5_>Wb+>tI+}&r%wX$78a^Z9dur@1s5y|g0U+w6Z
z;@cO$bCKK3_g3DwB7g7^m-~(B_ZYjvR6DEw$OiPh<Vk-m!<m=hXJ2k$7yKYLO=Q`Y
zVw3;1h$^EwvyN9)<R0gewdkiKO1JN*$|M=`4}|$*3H^?BHxdrO@USJ5Znc+eImsTf
zWU{TnCjX^~l1<@tJ+Df3FY#Yk!jA0g1;Lg(PO?YAYM5*Izst~MG`dGZ0w=onQN|%a
zg&aNM9><``DE(VZM(Hj<k|gjZ;{kKAq$}Yb1E)cz-FMm4ko%)GgUu0}6y;0n?id_`
z;qi|y&piQ5!%48~;4m1U^N{<jJwY#VT+DUK#*nl70b6${Gn9iLZ`g!h9U=Yo)aC^A
zH|XT3J$6=41Y=X5rg}0A7W)IA9s~pDR$3tG)}oN^MnrM4iX6mArz5Tij2a-H1vFq7
z{udz|G`9T-QA7(?he_R!I3G(=ZGN;jUS)};zk#gMEk_n{oT~u^B)&C_72AhTPhzpY
zxHT3>y4w>3lCsDG<!V8UK(_%>3$!o2lon`Ds(Eog$9@BnG+=F*)cu657fDCqHyXLa
z5=*~~ywW9B^1sC>a<w46iJyZAgT?;Dr#Hb)!v8J{B;B0^l5Um7$<=@Y;@oMZeg<<K
zamL_3R&BW3g2}L)U?_tV*!3LeA{8UpIwSQLKF1S``Qq-eIMS^nj&ye;ij#{}(UTFk
zes3bY%GB=pSBC1pM}MUpy2@?|$<!UfT}F3|Bhh?h6UTmc#OaQ!&36U1%keG1?@}Z!
zU-jX7L~Bkl`_OB{mbqUr_Mv6t(Vv1`W37|^W?Cnma5r$At4hV(JG(B775f{XzQp1`
zx76ZDx1Kl&DOrz#KvQ`2)~J9~l0YdVRU^ux3t$3%ry|#*=Os`2j|oQ1VBx)&weUtu
zB-<z0<Ubd=5hvw&idQO?N?l*Bd_t{{QK}olM6o)6`jhB1^fy@&>Av)mJV26VmPEF_
z*yKMCQ4&=}_kYwK8=OmCKcAc1HN(}th)S;rbGe#P2H^mbO~mnZWVr>!;65d&bh>=F
z=|JYeuX2fa0l{twgT?BC8Az;2=x??-(tS-F>2&#^I0uqr5Gar%&>TRl;CVX9?i-S8
zKpvW7Kvr={a2k-pT`@4cw78m(FL101XsiTPCda&xtos$)AjqDnK`2IZjj;?xFqZCq
z<aUnr-aA<a!QzaKOTu6=?YssP>@57|TO8@`C606tSe#sAEa3=rDIm2#7Z+F30y$oX
zZ0Fc-fn!s<0N;byQ))9hpjWsNEyC|${O>^SMAG;2gBbYX5|iK52`Kre6YFK@nd*7c
zKYTL%FXfdeEAsjqfrb=@dzNV&Ux8j{F6md%8tEQF?&LUERni;zL5wg~EJA`4h;<JB
z3oVXx8;B#_BS9SMpQ|iM0xdRDwEztz(76DnBUf24=|3l!bPJKYAf;F~z4i~%G*z4S
zRnWj#42<sI&cN?%box2fxRl=#NXkVPNUigxP`Dlt=-e=ns|{v2fwUJmA2~<Ch$a0V
zVo7%u^1y$PiW6%Fm;_~y)dO@Q!Da%OZh@rxo<Pzqwm`W^6|+}3AB@I?_C&`qCN9AL
zUK0CbLZ72Q4?kRD@;f^m6Jh^ffT22rLOXYDPZ%J<wE1Tzl<#KZqd_X4g#*K#{R;fv
z;n*LfMQJ`<gRcge5?yOMx(m^1jFmt)qA=Wq%+iVcVhrf>WVoLei^E+=5v$xRY$~g4
z4`7>x{#xW=oYdFvS@Qc?<=&dlZLm(^wz7z-B*KI-ek;I@B;m#Q&$e9BZB;JKe<fQ(
zvhkKo#%&k{Sc)i_O8>qkV?HN)a^m0d@a|v_TR^e`u&SUgB}(M1V0BlBk18miMO1J$
zF%zr#--i}>J@2(r6>*+F1!t8j3Yk%b+AWY3In_u?{}3dly8(HO7+DnwS3Ss02xG<g
zB9R+StU35k4C8c)OZhZ`q`c7rWwka4R7}5~1_+nDqrn_8G}U!ev_@hQx;nH<y5_{0
zi^DQx285nQK1MV?ST~aKacoSQy&T@zC*J86uLsz9Y&LOq4<emQ@s}zru59zM%|m|^
zaz0MaBHED?H}%%0Xm}QNRP78vsv5oNPAUvHCYNx07J7{%=^uflbjuMvJjzwIB;+QC
zv0{8C(TyS20{o|19O<4Vj&w!p90kf%wK##M7%3X%#u7+7pv#aU7EJob2qxW1WXpe$
zDyD&hjZ}FsCll*({D*$UR!*&x@_7PDxyl0NB9$c22?SENwFPq|7@g)d-LEG3bBNb%
zVfV9V9B!E2U>49NI;G!+EJ*-(9<e?Ne)v)YISFW(fjR<eNuq`5Z%3BnzavMY0x%;C
z(-+KTB+{y^lbiODZ{*06hyyy&KwSYHO`@v+=xlg1fH!j_ih&trn1NufAdyyDEeE&Y
z|5lDfY0lOo!Dx)}K^Ql#kcz4Qm!LQagStV6jS!G-9irzkxdvIBZWtW~i}7(bS3t09
z@E>b&q}xs$>F%~Txw<p`WP+a?0fbLpeLyt<ku#V|XEWr-VN+krRtwv;*zQ5@#%U*_
z3sX(eN9eI`7qZ5zY8TuIh;tS`+2S?>doC5q#BI;0?QU_KB`{9MsMS(|b&mJ|`RSj)
z!5}IiAWs{E^6^<6H@-03=v)uxSoEr%^iM)ky7kB_#K=`WNB8vzW5p7D$<v)euoQq^
z7D&362qc}h`3jV)d`0wWPcW)b12A>KXhppb(Net&@hTKF#L<xB40D8b(dW^6t)2os
z1mGhiyY(?Zy#e8pcMOUCwsLB<oJl$RV5<uW*>s65{RZR{V(F5<9Pedng(Ufw*zp$Y
zSiX<t$^gJ80L>y;KMQs&h=Z_c=~S>!ani-BAFPOAeeqWX8sZ<L0$Lp(L>|HSGbFnL
zX)pu8XeJz_{x2*He>?hS6#X%%FZml5E=@4$?nmAwP_Di#pbG|t!D4*u)lDGQE%*<#
zIMTgB9O*dw266nZOh~D+#0b<1kXq6l&`e5PgkCLql;ocy+1*l1OSnZFU>bp`Pa-YD
z4<nD`{{@oW`Z$<l4AVmWe<6uh8E7K_eHbS@Q30T%0jXsCFsnPYF#O=p8jjnbSH+~?
z0s-kBLW+r#t765BrPg7rSS9uUX#~3!KsyV>$6MWN1d@(<9R%`AmZ?|~f!czpOY(ML
zN-$P*9(WY_lH}O~&(Vku=6@XiW%0j_`~g7EZL<eg3_yDWbOKNkfY$%VkgswiNI%Ap
zw3vTC{LA65wO{^U=ZIg7f1;xSx&bH+K<obF$Tv9>Bms0W|3Uayz+Y>={J+hSpa}oY
z_-jlx#=jv|U5D>R<O%%0)5p~RH9!D$GtiNMW-&grh70s0Al;Px*Z2U_)i6!KG$xT&
zz$cKc`2T`r=P7!WUx(58AD_ITfb<W)-~&%hS^IgY5Zw1X0o$E}_pkH8Y>CtNw(bYV
z1zE?A;|Abkv|RY7PY9Ry&BWIyTr@2;&MIsEX9BIp7^5ZittCfcn@#Jr1gj<6V5h6>
z?AE7gNh2dW49q=QWMh!=7p)}YDsvp%MJYnj(AX-#v#`~2yamay)#Z;q72=b1kZl64
z8m&A9+hlMXz-;nxdJd}RSeb;Yv8DO*6ph11CgM#oHl@Gr$M7WblKKE$c7Mfb<>4VH
z-V%F~%4jMI^D>wSy6nmnfN2V*HeGxo_6=E;Nk7i{Khq+ug2hzAKaO*J1OZ~SPywE&
zg%9HQG?J-IZS=aAl)<f591=G(vWH`9Mgt#0zZH26vYckZ25SCmkx_C7sD-f|iNnKL
zYzw@83;eT=V;na(vchkNtd`>EkPO+n=(U;2kPWV)k|aLL*tWtrm(q&VR^)ZqmPVId
z+8CH4!Km*iV4p%Vt$f?OWZDzxtmyZBoW8GQjXX{3fAOGy9Mq~bgDL|Ex+Lp3$uSQ+
zg2V)HXOmbf+%sNcjVYBq6K6FxN8&>*@#)xh2YIPK?}PFe5jay8C5~qu7jRtJ5-Y$t
zB-YCJJd!D|_If&d`iYYd^RqzUVuT!x?Imzp%w7~ucM-H0a4K|K?J@*bV=RJk6=OUG
z+ml)2p@=TX_y?8CI*xOkZ;6F}g~VD%UQ%Mqn^1r20yGn7HO3@~s{yRZv^g2u)~v(@
z1kSXOK2H)Weh|1cxWg==aIcXtn4VrjZPD5wmanW^jj@;#RZmTOIojahn&2bkoyl(q
zNAs_}x@O5l4OI2G1OnyG?-wpFy7+33j6&Ce*V_2-5<}dy3H*<LgmJadvulXW!Z5KA
zis<U1(~2P7=icaRi>;U@cuiB25*e=hCuto5XxuBnblRw(+mTFBb-|_+b|w(RGpWC5
zq8cTU0-75kMkz?(<_b>^$vSkUiK{b48;I#_@g7qo_@n{O!K7TtYb4II#)slAY5*<T
zZ2@e-_93EY^x6L7@h^eDT3&$tnXFFLgw7DwM2=@Fp(RSCDL_$*05NO|u!ELwMgJO-
zX?bV#am0Ez=-f1|EeS5uh3C<Q+rV_kPhF^6=<32U`0x@#+&L5YAOF;aiP9L<hqJMq
zUdU}qln0|zOQieC>%#+><ELYnX$f!DkQm1Yfmas_e;y4{%vX?1Lwca^iBORQ%5aH^
z(~?5~s*iLwXh-6wv3-Pyc6P~kJs5Xo@kbx@^QqR)<<VuTF@tJo>FNbu)tJhARihj}
zybzZ9pX-&QA_i3>iFHyTCu;x|(5V{IeV0`YzxAp{l160uS=A`w_&e~bhVV0~hGM>o
z1l7<Mr8m+C$pE$B#sGc`Fw?s8taU%2Q-yR2Y7ZHmes&{`IL@xn94hoZ{+Vh_ry5#M
zb^AiqU}NtRD#uSa@e)JabgvpeqE|KMVmPCaM;F+CL06q>NcU4#HPrf_v8%PRUr1|(
zc^%30z4p!n5o>HBIK4Q4zX8m&c8<07S9D<W76O<zD*TRu?(Eb-9w`-de_*dqGTHy`
z6s{9Huyx^>_0<jL^>og$2lJk@Ns<?L*4y2>s=~>KBJOu>QB_WzIy%rAoi;v4v>axa
zb3EnzlZ5gggS{jETJ66?TIcYO;J?rOwP1I`|7Co?Ms!IN#Mk^^iA3Tqx$6Nw(P=GO
zibI*s;k0=JyRK<+v?!=v(Ss59m*JM<R}q{B*qccI|AC8vo3zUq7U0+!hFWmGLv(?j
zU4c0eEVe{zu;-IVefu_|OP2p6_y9lUl&bU=-(2|!INo`e@xRQMh4g;6>D7~Kt*19y
zk4;)}a0m5xnjZfIzXsOh_jG5SME@2t1l%Y@y{*nX38{>p7_I_ROmBbo6(87c09eI+
z7_bWa4l<nM{{@Z%{?rnyz?;CTxbGrbs{R)?2KI~njibBwjSjDbo}hJ^kE6Q?Hy29t
zb5J-owAR(@AGYj0l8hsXhMZbgmg6VE@_nm#N!}-sB9eS*1l2LmKv13X0iu;+KS3=w
z*`;xS_gG?8rap;Pm5&f59*cO1`B*wXC1w7qMg#m+3C$b%YpnbJas2n<Pj|FDT>aZ=
zq|Us2Vu<(ZHw``Y*<f0yt(Jz}rqspnM60(hsycdAo6+tY;1$t6h}W5g+#Tfo`^eYm
z8FYDfqd&Dph)k!RdxuZaq%pHjIo2KC)^PdD-xziCPovzcKpIhEg*vre7-koKr*hnY
zS0&#}RntGP*ZKUCPKrVAq;=`~3RrcIO5BiEtGt@cYV~-;Yjwh<8T79MP_wVb*p*g)
zh;ujI|7WYQ|C6wWdksA8jy(isav`G_`y18(=}w~o(tUzxyWW7;umPzv-i=m>>_}~X
zH(ImNTR<Aa`w3inaS~8_e?%s7d?q5umQ>_D=I5KnSOcLuj5P?3=e?%LDagP23A-4F
zWv=P+ub(mMhU-STcZ~VNV9qM!zCAu~qtl7>9&{5ky=C~Y#5GS0xTH-P2Ce1tuW2)C
z>F`lGY_(nf!Z$`<_0vfA9>LYGN;r|SDB)+w6pkCD#=px4QER$)aZ(}DD0@=~)$M2G
z4375~f@8lA-Up*TyaAt{RAe{y0lfeJRU~XtEw>ZA`tXsafTk33tCIj98rw(Eoeo>+
z_8?O^hV4#z;sfKl71eQYRbzfZrg5B`YfZP=xN2G+2Ukr=?RT|E6(axDdm-1_E^S;N
z<=1cB)Ix4D5@4%wMd>ENRl2W`vpKfP)^=Nr?b9fahpig(D>9wq+-z&Pr;Ke8K7C>P
zCH_Ns|Nmnf4hz1UciD9#uXu2z+orn^D)D20&o1PHCj@#1od(+mbZ2A+TdLj%(L;x%
zF;_>zICWkA+%_ZU4<DIhc*R`)?4L#?tB!O}6HdcRDbAn>O7SgnA;(sPI_^1~G`yZe
z*@q&iO1~lJbDX;fwcWE|G`zH?44?>FPloYc%eF3r|96+wbuWNd+aK5PnpViYMFKo;
zY#&E=CTyi+J#!avY;5bg7me+9lzm~V=KPLaED#c`3LL9c;Q*=Qwj0~qu@8o=b_yeS
z|9@i}b1xg)jbeLFAvYWe@RG6Jh;A}$rTYn)#j&xCx%-Um%P9N7R?YbXNphT<ZC!US
z7+ms_{4cZF(EIz*tLATjO`RC7g_C(dhZE3z?3eJ~KN)T1w4FTd0zUB=Y0`eBg2i20
z->MtGKJGRcvDX0ghnSe}L*{Xun^?>}XawHDXBh;v2A#wE|00msio-vN>$^E5e!zmf
z4{!xRe!*u3@8ABfRvZwcfm=z8hb_h~a7D!U6`z^^j*-}ZUfy~@kcMtEK_0Roy8*5y
zh=#l_{a*b)5ozRh5#v#d@ddcG#Lz&Tvp)v)|5u+JK%}vY+!J??Sdgy(-a!zJkNLb`
z`F|qP#ML9l<6#W<9k_MGP*?s*es}5rIZ#vAoj{KfNDI(U;C>wxPN%I#I-E{TgDG~P
zu1OAEV^{SFFW4WMV5xc^^GTfp@eOm}8@P?YwNDb4zcTOxxYW;qtLHX>Yr=vi+^iEQ
z1em{&gIN&r&@I*lv^F<OkZX`@5p5l=L#{`(hHORhkx0@d%S<gb;z@#Xn5T6mGb{Ce
zCHkhk{tfQ$5n-agIZkoBfLC26+`xN(W^Qw-qdx(5*6tIFfNO;=Q<5DAtB9alw|dk0
zHv$utr&4Eq!l`vNu3Qwv^*4t*whi4PxGuv@+w0{>!QBzJ3crfHm*2UCM8l^9ax3~g
z!*^;O>Z0iRZgB7o)c&Nr0^Cg&Zw2oi@z#X#x&{HK6EFY)ybVJo3%H;;fzXvgL}(#c
zozJi0Rc&}W)qQugO3yKG3||XM1=HUwF2Q0fL}_FN-dCf)2FWhiGpuAUUdopBvC<u$
zP2IRVTC4xys>4rcGkjQ?VZ(>tK}3z&#zZ`ONSF^q*nPn!`0#*(uPeb9I>k%6=TebR
z*?Zqk7><H41Lz9M5j@H|y=2tgh3#;fsuTmUiEtU@7D%r`6zX|MHliLcl)a>VKF65T
z`rhcB9(J&Y)ds@DCie3cx4S*i2_9(WfuP_{iV~FsvknZE&5J>@6qQv)mqO%vB)g(7
zK=$P6obXG%(f(QN(PSc$%s+9)@R8RIA6;hjgTwD3m_|_(g3TxlTYC_h)}v?K2UU8r
z@BWqt_lGhqUV;zMJYl~a{!b+fO7SCm*qTvE+4f>n%!85J60lv1=p?OjZiiw1I4*0R
z%Igp^gkbL(!;ZghSl=>z9~^cqfi#ZvH=YlowF<%y=eRq--++D@lHKYTiTV6At`GiP
z5_VCN$=!4Kuxp5wXauV^1e-#+6^Nhz35N2WM(8ELE2nbJwCrWCe_1sEYhLQ_MR#Ie
z3dipqxx2uXg@op1{Pu`jik~Vfzw-)1pYr&n&@(R+L4dAIOerfM{=`&}brw()v-q}%
zyWR_UJ^_WPh}?)?^HPZxF)yXdLqzCiu)6Ln;#F0CnMQ5DoC~Z;O)l;bj9Qf>|B@j$
z5QN=^i{LeRL>QnFLDr|bd=afV<P~t6Il(l&=7?Z#5sABG_uJy`CSaiQ-XV8IPDGkS
z>WL&>zYH73cxggb!=`Y$v9BwudsFqk<euN0RdKf>%pCik5R=GUSy^#eOPJe(FH*H2
zd$6%k)?I7kZn;JN1xOVl3sMc)us?F0?knT&7TnYWXx;0DVe%@t-TYN*;2wUrV-EHz
z5R`oY3yEy{NrwASoJdP*p?`q)N&Ko>LmCeX{i@?DzjFAfu&a^cTltVQzSJ@HF>y|#
zHC6FbYwpCp4jDvTJ2#{juZWg9plx$#+gt*uYM&FJGHp|Un#e;039FVEzbfu7!7bCa
z*9ybt=i@dXeWpzVX_Itn)BV8Jrm1jHn=ZvqZBhvv(I)wwfXMHFHm%@e)0XRF8alI(
zZ%C-|has0^s74SOdb2o4*9763CHMu0EhKx;3%G{k*h+qa5S)s%gXD!2sRkGoX+8FP
z|J~)|*T&tIKvWhDv^>fpNL}PHU{;pytK#koql(tOS{PQY7H*5sXUZ~&vPh@0JdC5t
zG7*23Wg&hli>lv{vdFI<B0s+@1+-!@zG}r9d`FDn23~C()r<`a7JdcrRYuRDbJ%0w
zGPvVP)FpyFc{8_pRoq==T%V?T+Y5Ps1-uS2ZBO`BK<O`qgLK`ICkUPS0v&OK2@=LT
z!gVi-yR*S*sa_9m4Q=FwzZGMS;M4l0f^%^?j0VbS?Z7ZNzr^V_5uA1e{;hb8t>UK*
z@vX>vOzss3s@^eV2Kr1f2a`xT6;l^-D&{EsRm|yoxN)s&s?!?K5;-<Q<mgwkh-#j1
z$v%YG1te3|>m%o(*BFz&2V+dSK}cqdy-hM*lV#$q;W&9Szjp|(GV5&YUXqVykTn3b
zKE)ptu9;9#Rw5;W-e~Stu8O;vmVY~WUL?QV8zL8>&nLh1m(vF6dLX*vk-IlqxO())
zh00HQ{TcDGq;CX9gX1yI03*KPM$dsHj&u=7TntECn+Hg?mdf&y)q~A*=#D%<7dGlB
zw`*nG%_5$9Tk&qAw?+O~gc~479?;#chz|l8<7Z)90OO{{_(@2l_YdcAAr3~<QEoO6
zjgZ%f{}M%0BO4>*(W_|EUrEuV>xDe^pG9lw=G+o@1xBGK6uLoypD_-Kwv8N5Q@D@!
zM5`o{*Uh8rPMPmbs8%jQYYRcwAZu}H_Aed7rKl|t9wjRm7MbrF2VZY|ol%0yoqSF6
z&RU!GHOEuHt6Go2`gW=%*1eHuz(dI924}(aE&RkI&T9JGIKNyNR_kz_C!kl;rC&tT
zr8^#Zo*=o$%h9eoO`mAV)8OtPxo~}uXAem3_@k>NKh2WAkMo5j*Q7cEnS@@GMfy0C
zMY?uKX0osibejq2PnI;3<#dZzipuRIo>qhw$eHMY<Vk<bu<-PrxC_m*Y?9Bw$SE=|
zV{428%tF#6n2gVBysB(FDD|N|Sl=cc5_~c?eG|V9X@L(0+sOhe?sno%wz#r&wzwBt
z+$s81fgKk34FWHn!Kv|(tHbZZ5_l?LO*{Qn_#TX)mH;*Fw55DLwQCQvzeL=n^_vsB
zn59#2S1-<{VA~4A0e2LTA?VeM(w|Q+N;eGAZKzByz6NeBQX+`g+O51f?goR?1Re-(
zsZHRFAMiBK4idp^Jy3bf0lF|?8@CyPLkXxcsesorEJUOq^0w7Laath-jEWQRtxVhX
z*XDcC_M`Dr+t~&O&-B8*N2<aP(Xy#;zla`8=cV=g7sCywKNf#HxYoj@y8jTZDYt{H
zU~Sz>_zc5W1)C3_rzn`}+X@+hUIml>0tzPGaOBPZELc0Yc}3iv2u|%A1};VWgd2do
z^H2N2`#Sp5DWfd;6*xalay9T6WTYjR{vwhGI`ljLGkJSAhvX+&^3mXKAh~dZk$3+|
zPH_|L?3g2?E&0_rKSOd=sSPsLl1qOv$)y{E=#E$Jq0+&1C;1p~I$allTb6UEv~}Y|
zko<8u-3UVkKN0!(0ETow1W(5KAbO;@G!wTMA9TR)Mf?t?yIAgmEvWwPQg{4N$K*F-
z0tWu!%8%fM!)H3fpdZ7a14<2_Qp_{=Rxo$!m56J|baZnV3dfp%690ktcanc<?9VoA
z%(s&pk8cn2U5M`xe7gj`D>J@_hjBZ*U95z?%vXPjei**p<O`3Fpz(=6g}b<Vxc9`J
z>g1_8#pA-J9EVP$RDbRLqQY<uY)_cp=yisW{+kYbzyRGS<W<7wI70*_)0v@<#k-zF
z&ld6nEUHaM<aiAE1e8HPSR>sy<WmCX?)<K9*YddQ2Ttq6Y2cRU?EH@5l92lD7p_$g
zTrE1gzQ8qW1YgFmP}<SRUeZ#v;Q6>#g*d}vfN@`r?{m~b-P9TBZ`7rq19j<6K|cF8
z^@J-C^m{kA67GX6`DB1MkzByBh;C^5$zvqvIxFaiRro$na`k&xq?IL?ejdrCI~Do-
z-^t0%b4uFcx|6)6B|it?a*_*JfP9^myl4d<GPmTn<NE^1HIU-S(Ux5LOGqx=6hyam
zb9P$7b$2Uoin}%d)oU{WuFBbKU4mYF)rDsUVnY0g9J&^+wKYVsR?rZUC`5D@)30Iy
z4N0^!XI&E9iL)*TPD6e+XC+jB$8c_IzS^+fg|9ZNrz5{)`Reajb-=eGxYEpr_Y1?l
zVNZP9qt~fj`nJPZ0i~RcJV%hMb9<0je^jfJfyx5v%6VIugkh$T#5-*VnDnxU>j*~o
ze6>-Goset8+mh<B0v=e>qg@w(ngIf?qm|lSo`L+C)ynj8K7fv|T3H5SAJ9te>5fCX
z8DZ($Ls+^b@=|sq$saI|!?>$~4gnN5&^!a>YNZ2H&&X;|csFFVADoQ*l|?oQ*_e?%
zi1;5uR(rriU;fv?63ft$#L~@2Udd)_WQ%U(bL5682d0N%E-_3lvISro8G+r9-2nl1
z8wuARP(uvF&XeJXq2Xpwd$vc|vo*0qvi*on{+A)|8M#snX^rYPsc^fS*j`U92=;>d
zDW1j__baebNS44)aW@k8@E~r2FFs)`iBW+sx4>#a84LUtf&XA5zvz<<!CtWV20r%<
zSpEJf*f(g2_7ww>BaE!{KSNf!E07QWO*ZKEWP)JL4D>ai(hw2oN&{sP3C9YU7GQL`
zd4=--!s$lWt<#V#B>Jz@O;6h2+=zcht~Vi`(57Sva+Hyf;a8YTw-C{jF-=Ra%mE1p
zi6ag34VZ%<F3cjs<RYF<@$qaUunWwrLi%5~hK3?}7<7RlLr*R+q??XpE--dO;t|OB
z7Z^!qz@gx@a%w+a&gS`nC$-k$`40SAgpg@Y+oogw{iAH}Dicg|U%_r37;em`f~aU?
zL4maz7BHQvny>byx8SQi={ZPVmTwH-D)?%=e2(v1lwBLG5lFtSjI|uf@H=If@+#z$
z|0sKs5>_+Jw_wUrc3~D9CRf?@_Yn^>vf9TML3Skm=OKwoS!^9_tK+Qgp7t`MdWHL#
zJrMG@d&Lavn)soP$?v=gG>?D8bT!bECr|pzY~NHHohtAJhPSD}XdF+{{I7*U6_8;c
z6+r2(M!xxv3dFDHPqiB6J22&_fG|r8ld}R~>VVNW(Pp~x{&8|Z|98kSP`c*Ob#oDd
zX`Cq7O3FEg<TH^PS=CH(@^bj9<$Lgbhn9~;butoFBgh|@;V)V)C5u+@=rz}rOJB#&
z;0grhdoUGfxiCzE0F$fbF)*c!tag&CAgf*Eg-A`v2EWdh8P!EZDrs!L#Q9y=7T`D@
zDPxJH*RK^xcMI~ve{@sv(ugZ<m><C88K%fEMmGHXJ4<XZt%?90Y;3i8Tn$@o9Oodl
zvPxS_sSYx-d+~h_vfAZLKn}6Q(w88ybhjeE{);U259Tw8%NpoMK$Q)&)<8K+3#L36
zU1@2vS$+SY^4FBOD~FH9ihS14AZ<n$AbJ8A{Mt^Y{Arfdia2YqlxOOEPK&e^n}}3E
zk2+8KV<DG0lfA;3>^p246SDQhCjVK;Z?sm#n_A_#pPBq}5;wKSUlyLo7@Uc3EpR2k
z{k*_C2>gQumaV@9zSsh5fHt$h?-01o5}p(+?hu@M3kdu@ST&>q*e_^^_KA~_pDd>I
zy|w?7Qv1K(|IIe&ni#=;G*D$g10f=e_J6`;5eYEG4CG&oKt(WnAaDl$XCgmaEa{IQ
zM*mBhME?E{0tE#7%|QC_^I!-Fqpg`Rxd_C-{AvX9z<ddT$@rgz{9&=A@0%c)l-h2U
zpqyF#6|^SJNdMhHRREn}7;U$N$weRu=1(vhoVpmPk#lf%cZq#CX>`izRz~L(f?k6h
zZGD>=oN-Pff8(n@{R!V<TBI}M*~nk$QRhj2DC9DOGtC&<g-wG~wkp`<ukBmqu(1i(
z+7i91K{TIx153Asx7=t!u?7AUa04&!HUfWafn{rGfwg^8U{$28#eI*sHF~f)Y1twe
zoLXW(0;?0;E2ZiGalOK=lRo9(pW%v#wi`#fIHqmZ`4foBKXSNF(33Y$`WwfEy3f$5
zIzMCBMRl|nI2ZX8y(%DmHHb>5J>j9`n?ioS0zrQlu?~D;puYe$Hjws&0%f%>z!ZS_
z+z7l4sf!^n9se1~m*}-ul>YAyp^hswSAkf3DD+=<ulL`Z*kb{Ify74y)P~`F<SQd5
zeRar5r@f*+h@k=Ob!k8@2D!b4`5R1A!?0IO1fX0kDrUfaV+4K$^f3fxlH>y9YYQfQ
z4T4FhT~qD<AW%fCR}JH+LNmiG=KEIxC>MbkpjV8*ZzTEz0vD3xBII=oCVee}NvBOq
z>^}$;umO3^FcAneH_Q^PVgltP0O$=sR5Y&<pkvu1Xlgx(%wh)Yz*dKTm8~Z>t@)gj
z-0M;9@e{KL!hia-c=D-!EbomD4VPyc_Ljv>v;ln6@P7jL<nj8O7Ff3B*tF)ek8p1g
z_y{8E8HxhGPv9L_(=ES+>wYnW-!bgrV0%MKOFL@!2iWF-JW?7P+<(FPwt+eTx|xKk
z3MAb=K>AdMmoS_mVBRxK3oyr%D2YGgEMRuzzZ3CuYy7w_7){aN>9*a4+E>v3a{;}N
zL331wI`oZntP`#QfpU%LV#=}8FeM;xq+wY015B>&jDz{mNOu8KL__DPrJUegghWT>
zYAK)(4AcrxUs|ej1-o<?#lIzzRZ^D(v)M4+!K@;Y&J{N!CGl_N<w?+o;Q|6E4M^qr
zgDQMVd9+Kp6xoVi)sa3%OQpL7(I;<mRY!wmi(#S=Xla-t!+6z61mA?v7A(!seF_k7
z^Qr<mhL&bV`%FgrHf$R0vbDyhlgLV>G$iz48$FicKYb}7`PV$`FV71`d-1jW*<*`a
z8|;--BNI1XD#719w!ktj#Hf>q0v|-QcE}0;1P&^tIrf}ks)1<>8Lj85kc08nhh;o6
z;edt2vxcby<|^`NIbWszFAJa}lAS0G=mi7i18PqqEwihUa`<=7k*EO7^M<Jl=4uk@
zl3bYb_;<<56MRKK3Ft)wRR`3OL|RVQAXNZ#&5@{xO~Q7=#K2rbB3*<EQx*SiS&4!!
z2J^CE4g=G9DDz*-=50uI0P!4&in*|R$w2i0EhUkz^@KSL|L!>wX$Ny37!Brq4BF2a
z%%~jgboZjyV3xkV_J309;@}tp<r>XN`ulzZm4v|2jAmhUZ6J&{n)eUpIG7D!G?*KM
z$v-h1bjwi8XN7$bTU|&rPGGE#5r20(ax4K)M#|xcDHuF4vOQqI+M=xM1-p!34_h$V
zV%V5{L9AZHDaeWyC)Pve-wgjm4FFo7??8IzNM20c9yLHm0QK?L(k%buawJe`9x?wT
z@vnuymgPH<<NqB$;R;CbH~`J<8UVU7__Xxih13Di2g#mJ1q^`43{xM>^^6HEy~5PR
zzb}$Ko04FjFk)T7G=$hqTK~nYAAsyqB_X!a07nC;13=5}I;21T|LP|EH{q|1rM6n9
z_Tu(CzMb&(_mu^#Tu<T`rMKQz|IeI2NAnL~w-N&bGfx4Y7#He_j6@G~jUl0RTqFkM
zARULqYW!8rn)t`5rk3ivk$U(KMEq(7^I!96m4O-nx`A?Ptrn<0fI&!hIb&dM1*2Ok
zy}&#_c>nSn4BBGG0kGgZVw(FJ;r?+E_BDI>LShV(3h*XZC|Y*cBf968M}5u$F$Kv0
zR}&{q;@d3oao}FgN}SG0T#$8~<ajN3<vmXO|F=o3wfi0~@l-NS%Sv2C;yVo3hs5uK
z)3SQ6hdT$%xmmb4xZ90zKQ+`ROnz5HpMlw5s&v+IF#+zf#0s#J#9C(W^Aev&fb+8w
z7qPV72~K077Puaa1ub+NkcRl`VSd6N48a-==xzfw1hkB?pp{ObMgYbl*<+y?%sOk~
z05I?QEu>+N24yd3b{sx|kJBgjSPK>416ru{{c$8S7G{!o7Q%l)r8E|F@>mb98Z8_S
zZZ<frqW62aivi3=G6aMx0JkI{oN#&kg(g=T#u*1yp*V}NgP?COz<#wQ7CuR0t%whJ
ziRX}XZdT$LiLVV3^Y?VY9g@gOoFp)q{bQkkzW0xl9A9IJ6<{ujwSYbBC7uW1lB~o9
zoSUx$7o$>xz!ihjYMI#R0T%$e6yd-9B`yZI)EJ)xutFB$SXQOtSq*RyP8nh0=aX2A
z*P~%x>d!pCf{a%pnG&myVkExa5)T3Ran_)UWmP6kX*0xwF68(|Yq9Vjkys1b6JCQC
z>i*kRpfW%uPLucsfM9}!smq{nQF<QW**LO12h~~0Yb5TH<FAUlo6KJeQdj)%!S_XE
zdJg{x{>#k2kJkTg0Pe;46=X&Zz??;Vl-U59|8e~9!}oRMf*k(!@L!I94A-Ife?rSQ
z;G0G=vuQE7s}W7*{e4|T%WnawHT^Vz{oDN0C6D5#X_M(99cwgaj@xpa<oM=2snNAd
z@(F0-)Z<J@@49(hd}>OjNY)<3D0ItspY(j@;-lH4k7me6ADflWJbWsWYASqY7jp57
z&s^gu-J#xi^f)Gh-IGYgJ(3yHmk>Z}h5{_0ItuzElBv#(B)$pBH109tIEpjh$V>y5
zfXqW+F11+FT}CYFHfdjx3AUU-E3$$Wv%OwmtvCbV!0bwsER#H_talu=lH<$4*Ru-G
zp?X>ZipZntP1Sit<+}_gj$??sV1k+v?kTQBuYHE}v&MzGE6}Ob=Rzo1$kR3gE<&eD
zNp}UUlWsGTY1~S3tU@wmRjD+MODqJaaWom=psdzuYG(jJ7kS4)7je88d_8MjlGbUx
zSVJPUj_rHUxU2BtFlgL`UgM^HX@8k+2Fd5plE(m_&9?&766qGw66v-enU>s2@U>Yj
zDWWCkg3DCuQmfQC=m-<nxvD#}QRk^Q5j{-_mj~*<Ql*Y_UxMj4!vI=(djQa?`wnvX
ze!xHeD;LJybo1B3bu9i`w00s(a`?}||9t%GLrBX>-OdbVT~}^JZU&%Re1oZzzMij*
z=!bMKFK$=2p}{YK#AxMt7T{6>G$Ftyu-lMZa^r*F-H0#Zv(QFV>r79GronuOB(BfF
zv>r@nTB5}Ojv|o;(^JUm|B)!oRqsWHOMz<#PF?pba$9a(*^nW?3sVE?m&{LG44yXU
zsY17n3V%wa*b4E{<)eZh*m#zUsag0nuu8t%JuKqW<hhGH_aZ9dXo`6s?<*6K7{L#0
zBsJ&_t0KMyRt0<pSyz(l-2VkGrjoM_tipT<SVegbS^xh@oCZ7IYuuls!%NL4AU3^p
z)D>WBNd91KH0saC8wsj&*@Fb$U@a<(^OH!NRul#R7gsA9<F6LHV*dAM`4^D<6eF&l
z`K)``E8FqE$0yExmVcG32=J+vOs#H0GPPJUN#q{LN>)s7PQYJfS2qqF88+kv{I9`R
zUGxy5?(x4Qxr=UY)$a>t&&$6Czh}XePcsi){@71?jVsyPP5d%?t=6g{g05&0PT`MI
z&owMrbJR!V;u(ro2_VP1{84LqowTi2Fv}0L^>q0|d-fVWp_e-ij*Y10?S#9i9Ul;b
z-^<A3aARl%*pE|P8Zjy-S@Tu9rZiu5(>z!6A3?ld)Nlnf|8#5qJp50j`7h%85^^oR
z|D*Zdci4}0XBxdbN;4{E!A}8RLpJ*Stlg-CcAo*LF>Sh+!~<B4)t--#C&8Ray|wCX
zIRMkcon>S-kVZjPgXmRcDHx`Ndsw@#9G&H{?dc{P+dBzzF=Q3s4djXcB-_(XHL~}E
z83<W5?h|BlHd*pJ^g#=!7~3lWje)IpS38jF_OtzOvf=3H<?;uW>@}oeFE@l(8c?f=
zRg@V}2UI5<L5Ty0P;ZytXHCz3+j{00UdOroUK4sAH@~MFY!$hS;IpZSNWP6c^Pd&z
z<%U{C9s)Cnil~CS_4H+1c1iYEq^CQ<Dx$eL?*B1%CU7>F-yeU5v1A=f##Zjw$-Wc0
zBU{M6Zw<zhL{uno?;s)})Yz&SB+)7@6kXc2sZ`oWSzCm(OaJ%h`+e@rcfRiQ|9N$q
zx#u~bb3W%h=Xt)*-M&Lbv@SnIwt&&Dv=jO7>FaC;ShA1dBLrFDzd+suTaEbcV#rmR
z&ZfU*yAx1v*ebhY$bLYvo2`=>WXY}sGa9nGx_*v43MNmon*Z|I8Fw)QE!)R&5{9iD
ze2u*2u#H8wiy30cz67QZWR=}<<lQ{Ub~b}8*^t)#SlDXKe}!xXq<!;cB(KuC&DPZ<
zwvX?aT)m?ihBGel37he?F=n%MjnPlnn8Yhz4PD*t$kk@3&H4!(q>;6V97aBZMD~7_
zt5RLf2%GiGVEU4^s&oQ504DYnbuq(j*6YAbAn9*(O3niGIH3P8Ys@0N7-+7uWS_*z
zT*wOl9da;Fve%fAmh3(-{UEF4P9h)cycU~nSCj0rbv^|3<Un_~+lPH3@*~x_(|46Q
zLd*O)3#VIZw#sBYUr>a*nYaZ(6SM?ggFt@>i17DF=n&3g6S&4)Z3(;sXgdV7gT03R
ze-JP^fxDX|0(Y=J^gi{o_+ZVOxAD;mA&F3|EIdwnm}H!EvQ7@-<OQ5)DZZEE<p1$;
ztr>-nj@E}>*L)ElS|J~3|G(a4^8YyLY0_}g#X9*CCwp+BhWiBk|K_8Y$;3xz>mv&v
zdqX%;BYcLFqs(U>^XjQcZ?hLC*SJp15uCh=56$Hd$QS>!pFZX=enMSwqzlCf9989+
ztS+NRkW2)BG*F)yy52E#y-atHuNpCZ_4MN!5a3$B5?v3|&Eu?g)LHtO`tfB#x%qW-
z;74ixg3EX>?9@r;8}0F79hdr=#NtPSCF7Gz6)IId`C44o;s~K!m+}Nm1^W?+-b~{=
zigyG#iX20ZBPWoP$oGh?jlH`;m@k*tJJ_KIPHP+PnS;}!>?v~ilN<v$h5j_M+P6}V
zLH2<K#su#Cv}GdWaq?AnM~7bcssX{j@HLOEMSe~p+ra;12^?q5giqYSTaatM&h_2>
zo`6dLz5uvw3m5pa901hmdOf0dVH;ytl3!&vcMKnM#O5^82z`RZCpU9-XVAyf2aqzl
zr^Z=+{;WMq4j(`==JQnqkHdL56zD9{1br#%a8*55_X~OvItTVNKY!z=%6fu|jJuUh
zc<WX-DOIGxrsU@MP<1aU#FHGVEBvT){O=6lKN$E6@t5}~x;GKbBI<!=IN@PJ1)OxD
ztR&)m2s@QGbz3r~{@2)~J}#M~>7yFaW^(fuA~*545l_zW;Y%+i_wj?Bdrd?`w7^Xr
za+RB|gb>AFksqM>3u3X^_r(yc)*ehZ59XYUalcDPz-&-HK&?EW9v;xIzCd2$SAwC4
z31uKxiXy6Z=ka?1@z?Hq5`1t5YhZV1v%Q-J_o^t<G5ONe<itXW^OM`)Ozl$<XY-kD
zW%(P@4n67w>3<sFDKbxK+cH*{>S*i~W3vBUIEeg({EetWKM?%R1+1fgj<tGIN2B~<
zg6HoTERY&3RH)GWV0$+c6TnefGLe%%kn=duJmo|D8L_2?KfIflcT0}SX|1^G!K?69
z15&k>2$htdKM}R(KS)T7egfQ|a=hB<e6Gc&J3MHy)dSp)#U|ii$OVAW#g@fOAOY-D
zE!o{kwPf=*QVqQpoAe!7Y|@=aJd2GFrlR=QI@Dqd<FKm7;Q|~A^bb-4y%w7su4A!D
zcR>QaAU{>#!u(WHY)j!Hcks%>C1Z2$lX0+LyXyGRQ`>X+SU;$!>)>BNap|R+#a9ab
z4DNXj7?n#bbPgDCL2g+wCunMc1D5a(xLRC{w7%9NOPKnaj4>Geld%SjIqopX2a)^;
zMKBCGe<MTwl5P*0IxdgEQ3lowVi0@o-H|@4X?z8da4$|6rxCoi4e&k2^#~y2c%(oc
z0S?a$nk&F)R`Mf%Q^YtCLh7O~3t8#=uo+1A4h4+Im%V95XXjf~^)X}@aI&h~%QY;B
zu?&j=Rz@3<@NDi`k1bUVe1h_RvdNEKT>2SX(Cl22$fY5sD%Ka6)+HC2UV;?PBbPXe
z)c{O1m%qrx$sJ57^wC`UYGXsETngG;rs(jcZ-gaas9aRQI^-h5AR@zDx!lTE9c-{q
z3iGlv!IX0$q%`_+z@$&@=ISm*&oc$*W*K#9qCy^jo2X4>W+Go<0I1@;LGYCnr!b(n
z^rRMUw`CV6j^dQXRK-!3)uuQyEshk+qd13e3z|gBR}=atd{xXMNICS;;`F09(y2I?
z*y4=mqX<rMDqyJMXhv#N92u5GWaur9J^)xATji3D?Lu-Xid|fKpO)mpAt2mpCSR1e
zqh^~-xa14!%GA${Vgf5!jytK{IXDVd45^4dk<ijVL}=+6A|(l}dE<w3xY*3ke2%~V
z=J1T5xeQ!+rZp4XOPY91Gz5$cnTLo1SH`FU&SkWxrwxeHQdm<5$G$N?Bji=Z5>yfY
zfS`(A0=XQ0w21u)B%O*_93RnJWr%3p5B63W&i+6qGAxhC&|AbH!Hf;|A;j(?n2Mc%
z<VUX#lRkwGldhfgw!?xzOL+XL!vfR!x&pY;lq0Z_Sy{}ilmf(N;@o*8ke=3*3%!by
zGb;m3GP6>^@_3nezrsVX(nvgd&5HDy<RV=o<WgM5o|S=SFSAleJfQI;H12^$88D(z
zl{Y6}X&ry5kHjF8N&6NAq{GQ01l!|IrEew&<Z1(Vb_eCs<#SqNut~$;CHT{<%iqh)
zy2v+1$`>#-b4`2348Gc68NUeQ<uEQM#_4fw+;(=CrqfM>O;JosQpDHr`y0hla%GVs
z=v6G~H&QI=nj(pL7Hfz}qFBXk=$3?@Lg>n1RIGX=*non~YZ_lFbcQumnyXQ8P&3b(
z8fr4x;EUm)B10ct!WF3vvx9@`NKu?rg6MdD*DIUw(wR2Q5A)dNOS12lQ4+ig^*9C*
z3MJyrkP6`K@^C*lkV2tKV65!l#`t%#SD_M-Gw7AQ^beE0bS;p|d1gP%9G)IDr)}_O
z!0jcta4nIFIl=jW3&*1%!GE#A4`95A;96G|khAEuUZfw%dXcUM5?wD%DW%?3impGc
zmlW2^uh!om;NnvW3Z@cr4m~Cb(y#2#j-%t*Hsl0{=-(NvOfU_uVo-pINNEC0!lt@q
z{tKmEFcaL{HQdhu-^}gmt>g1x*U7Q<`y+n;w0<?V-uk`4`fWEE!<yFdIUIMx@wYeJ
z<9@MZ8sB~ZtQB`PO*D-rx*VXIs1cbraTh)<<XQN`O&YE9H!#iSRAT-}LDhSgA%CIQ
zY)Ze3*_5sWayc%e3t#?nw&M{dd3w<N15Rr=8Qf!b4O=_sZ>&amoy}RWQLwv+hv(Ae
za~APPGYW#|aj4#u!zXAD5owLowY89+#)v)?|1Y+xK`&}>D>VpUry4XTNXxkNxlQ9s
z*N}ZC;>I0mE?CSkFf+ka6(;?jt629@(|it{>r66q4r8u*&4A8N)JtWqf@Gmry`;aF
zdP#RRQajIjrI=B-2F-WilwKEbPm-Q+S0Y#FtI}F~(fl&#)^Ba_*%+T9xbjOvj@sbT
zuOzs1x(#K6JcExiNd!M)gI^2oDS``k6;jWsEiE4n9wzv48+;zdKNDPqs)ii1!KJ^C
z;L>$N8s-^%v^jiB(3}Lvl`5ehxb1!13%ufGU$;Z%I&;D@l+n{Ll=F^AbBAFLK?A`b
zu*IyAcENd_+#RqAc06>R$#3On#z^ixk6nI>C%aj!dHtgs%undmJ>%(wS#(12S_D%2
zv}3frvxFLppt>iY(-LD$5-stg#oY{U4!AlN)&W?@Sg=$R2kd7H3js?5c7=s?2G%te
zY^+J+VsXmC?glm=SbYoY2CPS{bw1AQWnF#%j8qbo^sy8-;REQ@%P*o^%#3QotciSt
zz9Ky-eaSYit_%8VIEZmM$^QDvhpxYoW)ahH8iy)iZRBGH5^*SfIxQmIwMZ)*#@<ZE
zn>1SFAUJI%L%{8bwn+Nn4jf`~b&5&NwS7>D2ru)SPl0P~3BH}?QP{4?RfP2}tt^iE
zpSax3GHlO4U6WWB`OH$6eht*6<Gs$1X`Ltc2__lthivfc06s%-0lOpZeZeCH|H1~p
z58JZ@S1Z&*KDWW8Uq^81`XOx!ur^~C?s{|h=Aik?1|JJ>2f+pGi3EMYg9QK5249Em
zF9cUZH9)?x!KHtI;L;68u2kY7e&MIHLF8NlCYoe|e+^Kxb_2j&(OJVbdtsY#4mS58
zy?+cHXGxogd_7AdBGCs4b@64QFJ48gv7SQg!&pxr`XgPftpYYiBDchuSFEiLwy$8T
zL#<Bn*Z6ERu-%7kNpOE+TYwFNGi1m#!T2=>bP&+M{amo6n}k%tiSHob1kT_({i?<M
z1Li@CxzS=K6U1|kcQ7en-T=eGOVGixRbN$}`OjTG_m36i)(tbS1Jnu-@Kv(ZX>>5s
z%a>&eS-yp>viu9%f@G-!U~}Y6OIZ5V5SDJT=0A`}mRVEz8ifU20F+@tQ!OS|mKk97
zTe3Ppz6Mzx9)}{mePjcW4ZUmG{*7@V*y_aC3VFu{mVPaPrJIIS&4X-^!0%hkzhE|4
z%*_@Pi!3*O2$}aRfkS}Kvut!GX@fkCo+>6t-w$V~oP*;b4tP)5KpN}MnCzz`^)0zU
zHq&1@-+jN3hTQzDbH0n*#5W?W-z>0W^fij8<1cZ%-8$A_s&zcWI#vcltmEHsyz4q%
z1)3Lj9EWlIELhF^XJG#!5uHBTB0H>4=?CIdx|v90ktM$Iuyr~$fV1Z<=rEvh7Id2h
z`G`1}tSNjw0*o#<uW-3Z<Z`3uxz{0O36$66CZv07?6k~3C)fp;>*&%RdBKv9elR4Y
zyB%pN<}rw8K;lJ<`4-Fsi<xCHF^Kau3P8IofrEhVhCmQNGO`<eIUY(snr9=@X}gX-
z8@ZM%R1wH{pN#}q=P!ZN&Z$Fpb!VOD?6B@dZG^K+p3(Zx*-lfe(yJR~UdEZ$zMMVR
z(>=Y9fW#;Qd#`?m<+R7z>h$_NwmP|{h_uf(!ZVmmY)fkXzrgrkvetR56OzGzc0=ih
zQbOtOKw9Kc_5c;!XffY`nP@SyEha|UO~z!tIRPf5%yis+0k%5orXpi}Y%^f{FvgfQ
z(phF|C*Bajt}S*v%sv!`NAuMP?DDgx$?gvJ6aAy)<`MLmCrH0E#no-LA9plg;+eCD
zbEvom$B$yH3P?YkY^0lmB;z4g<7M5*Hx(@A2$)G0quo`Qm=yrCO)oL2QFP|68?#YH
z$LNXiF?18mRtTt?^0kv{>g+xanFIlEp9HAp<JfAi_$q{P9GUAx(*t?TX7LsJkz^s=
zLgeZ^GEX6kCoSeEn8_A%m&L@&JTi$dA6T*>9VK_cR>#N*$c;X>L6+0KmhIQr=7X$G
zK)sOVHn8+51eWd|q*ESb!vtPwF~`8%q}wP6wA6xPu?+%RVcF{N_#$LEIGQQQWFOfu
z7oqzs*>AAT4_TdZ`XZ}rVChFgR=SmXIig!0Y<1s*bPGBTXqpAB0w_!@vXLA3>^~Si
zAJbv8QOusY-mJk!y>g%C|0OcgY4jFkrZ1xut_7<xR<mfWC6JMhVgrzM=+P!fzY%h%
zoGavME@W#N(+bkqCdOnx5$SG;-)`fCe&<TIBa`PUx83Ys$>`1ptmAmV+dPiX;ds4u
ztg*+e;~T7FEzLWu<3I2_Vl+>MABZ@9b@zaWz$%eAu%$?Zb7II00{ozLs)5Zol}_is
z9zM2^b*^?1nljB|3V?aSVs!o!#z(}#gu#R@fqY;}LtwDxe+Zy?)|m#j;7q#7NS{0i
z7@W<un1W!QvKSrBgo#BULUZ0}3FHS;1_DF1Lk&abTW1>BiZkhS+|oTj)o9WAidG;6
zXA3Q+5SZ;2qvMt^u?PgfEC8eCTu0wEp*fv*E6)%h=PEeSEW$|LDWlyqr=G2Z5#2B<
z9$VS#{GY`|<SuMArzfy2Oh&pgjzsQ8k2*p6n<0nFY0fNqXfb1IPR2s!76t{-`7PCE
zyTAteleo=B@F}yfd(6sWHQZwzpT_Y#kK-daUSb_<Y`*oY<D2}dB6nH8f8kfJ0e3DI
zYUaG6n1SPEfWbA=@o7dU_vkbN01vaTaKTY=W+`^M!pUwn8S9^}a8pmXS2*b(N^x~J
zp;L8E;`kD5Qy?=6xf#7Ga8m1kI!vY1p>Qk#HQ$~JIHe9kaGC|30kgnjbS4zWmz{$#
zU~aVp-iFi?&KNp9jX`cfud`xE164RIN_jVug0s91i@NRS4D0ZFNE9I>ofpO;(=9pa
zXF^UoofRkKkx+y~%WWt4x`WDa7SLT5q{E^>u`&t+nrR9A0H!Debcr30%(4Weza3xF
z>C`kO4+24a-F|}WzX<#SXt4$9*d$Oa0uefXwk7Z*m|_snRr7iUvjn8S17FhV&@wF#
z0=i>OC}ctB04=c~?Fj<KB*6844j5V_p*&MJte?BIo<%XA6)=i1eQi=>!x___KNqQi
zzgfsCBp%zpSpk)2vi@5?RLfZbhH|7>$F%`JGa$!t58jX;V;yU32V>gv=OMLlI2+MD
zy5#sgj(d&fW!0<qxz`i29Br|c!H$HKw)XkRwR--oJJxwvIh~sVXsiX*1N0mLwYT4i
z^u%7fmM5UQLcpY2OaM#@fwYA$K>A`Iju9s?ITUBc1JWA(i76<~eCl9v6EY6HdPDj-
zOpSCpirtE%SW7g_)Ld^dzk*q6F*=F~6KguNLL4$CSk{eT{Q{ZlBC-hS2Ph4>C{qI6
zU@_Ifj3!fED;6UIu%C<gD;fYZ(PEl`*+m>(D?&^7-#`HKVgxdPZUm(2oFbzVR7Z!D
zTaiiVRUPTWWGbD`ez)N#R&_E+bc)6N24=a72{~s!fnt>>1<+(b#7`&;>+WPq4Ch8N
zVY<B!W9oK|^<YfbkY&hV{4GQ_AZp5g;JD#-o`mdf;xv1hV_I*=FJI$K0KAcML>+gf
z+xuC^8r#H}E+TR~6o-qDO*xL8=F7lgUyHc}Oi##YJ710r!}e~(L&j}bFaxyyl|d;$
zn+c@-T%h3qmLUE>DS-N0Oc5}>38ZcIUSuTpOJf8wU<O%CX)s%eqg_^*&~*To#RwDt
zG|+;I0_sa3ZKo@caoFnyf)`~pk^7NbOc^j+38dXrnDN-(dvP3|{{tFqLB#;|Cy=(y
z`;dtMR>TMtzMjvASWH<kj}b__rZ6{Pzw+WhCIie+Fly$XX~2>+b6DrUG(aO5P&aE}
zE}bdeO5{!)#p>n&jXB(6eh0I{V)WcVm{`r6MMbXzqh_uE>4pR8IX(Yhi!7(<Mlx6h
z5*lp8p!V(ik^6A?6jBU3VmZwdp=!z2Ssm<K%9-`G3SU9%OJl7V(^f5C>G;|1@#Wk`
zAq!_$Tfk)ingG!LoQ|xH5j>222Wwv+`!?8XYnJ^QUjUsGGqCSu?US%?hP}4s)jI#L
zjS;{A=m<b7qd0(p^n|wFHONiauS5LJr+e#lv6xH2JV8%r>lJ1y_UjS<Y6@J>{@>X$
zYYeC@%(V5w%sc?VTPkG|fY>$Gz6SO!u-CS`7I_f+i>3)uU9o4{8tJ4pyOVp$x(8bx
znzNdJy`N@M;!Jl8b+DA-9J*8gbg=A(9{U98UrBLw*IE+I(Om@zZQ~;G5QcuzS&-;~
zy{cIP`@vLGTlG35bTfbq#9z*g32~;E1(gHz6y?;@bb)TcJ`?emGXkb37`>v>3e5Mt
zv$tQTX~V1lyB$Y&P4~T7_UG&^u`vz;FqMO2JVVj8yB^VNe)`t$%LI-fQAZoWrjTJH
z8@LU)pM8N-e1T2&W9skBD~hHzupFEqu(sg`Jc0KT^c7#=Ac31$Tw7%fPTT5(9^9*7
zUi0Cys8n+iCVs+I0Dto_4$^4R3`H=-_c%=8W;U?!KN48m>_eWwZxZw^U*I5tTY^(B
zlmr(VN;7Du%Rr`M`~>3fg%m(7EGQAsc6vcOoiH=7e-iQcLKsYIWk`ZSFn{|J%pd_a
zv{0@c&AY>8k#Q@Vp&XndL+$U6BGF#hPvG~EXr<H(8RNO_I=FIV*cIGzaN7AYJ-GM5
z96*W@-ytBNfpHtlxEsJmF!mA(L&*C$!}r+mxSb6w{0ahVN8I2E{1HJ9I)QCmt^Wv&
z+uFcA05<mp4&XSNp?bkkI`88EkK5b8!mlE*Hn4|1fj=hbr{2KwL)_3+00We&1HjV&
zwOwxY01pBB42cF*rNRJ{Ea9HuTKNb^+_}x3+AJU83@ROP35SAma6f^ydp+U_{3SuZ
z@-dE#i!;@1;Ev$(_sJoq%0zG+CF-mi-(xjpRhzKzXZuor?P%LPfxjkbmM`NJGOhuz
zAeGX_)Tx)dDZK!0AGZAG{Eby!NHcLkzB)fHXsTO#?I)eF*Ix1>^19bv^DjW=*r2Ip
z0oq8q0MI6~2YJH_u=YvV*R=Lp|6Q@y+TV-39m9TaYS7fdK0pGUu|G$}Gq4@|1pkXJ
zr^5ihMWQ9tTnEX#HbAZ8Apm<f&sh~a8P}?bt_nSlU>|+lg2w^IuSjn<HojV@E^d-o
zSNTyd>+Y_|lZ0!9VhFk)`Mti!W+^sWJl|WJ@z$muHYM>q8anrk;qZ=4LrYP*cAj?h
zP;|E%amw;=jB;*(18o^{u$tn?>0_SS`~ZockZ5kZ@RLD1ma=40!7YW%CNK%smvn3K
zCEepX>O~8E3P(TtoP}{##%3`BU|)ZsJ*1t&dLBDj^0+kkXoc^gdfEZb5=Rww?yR8_
zUy2cr1BeNw@<0CRm{Arz!@9<rxhbwL5uHjs4noVu@Dv4y<<O~8(yb>u>7GQQ$^C+p
zbBMQ6DI^!F08njo9l(CR?6k6@0B4FkkDV#vae463?3R$7c8lK#r0h7nJN;7$8y-5z
zg+0kt)JsjuWCFpLlgSpqm)T6Ddyq_|d&-l^?>PU%mq`#JmBFEQDs_*oR26jE9dxO>
zhQ+AM)T_t`81f&x1M{EBFL7G!tj5bNKpSs20NQllK)%QZ<k%-;A7|~gZ*|9BTh@N$
zyBPLK*c<GtLTDQH?c37KdV=&cavFeM;p<JECh(YXeWfSN57U2h6X(tvk+E?mzjbgs
z23>IQIJVo7GqLf`eJiq3`RKGIs%@qRM76`bi-b<ZV7hlq&=jzN<^t$NAT`r7$glqq
zD1|`r7PlDOAaI(x=a4^R<3cXOF>$64K$1<+O9Ih9WUb!|TD?lPWeD$4VDzu8%)c}K
zY{j98V?%y(33k<NX@5?3tGa{0{}4DIO`xjkOVIiGy|n9tTWv7^0h*_<1*@#T16E0&
zMK1g&7#2)PQHxcj{spTlJ&zay<sI0V6tG3~QUj(5%I?BY_vzwp6!W>)Msdcf+l=b;
z;1xcc>e^Na$GUZ-GAe@Y9wZBA#jL%OsEfT4(4|WD1$_1f`;yp)R5Hy~p$_gm?E(;o
z@qaN70xn^LDeEf;ri`_;6s({xSeU*zgT2bGL~j`ECbCmsf;&QqHAjU3`M;!Ycz5XM
z12|9J;JMYt4JNVTGmR^M)YzQHsWz{bIQ`guSv@Yj_#|$%5f|rv8J`U>qNo0e<!)|V
zer024+2<a+(Ih66X<WK#WAh8{YEX}LxVyI{Zy1x$ZlpNWsFQ=um!39>TWy@R>8o`0
zNMB{s!dLo5eCg{C2K;K%pN{=x($|^eCFJOTXYaY6z$A0d(p!HCZMy<~au7ju_xo&q
zHgA47dH)WmCOK^+@BsE_<#PZ@02BMW;y0MzELk<uRLH7{UPg{VRx9EX-RLmJTzS~u
zX#TWpAHc~<$jZShNXb0O-e~@?WH*Bu2w5ffA)-w@7uiY7Ml{2}E!#POZh@^%Tl<jX
z*=+MB>vqRvlUVGdMkRh}Wd6dJ8tQ(09m;N~w8?xXGk4QWF^L6lZd7=s{^}D{ay8pc
ze9FIRqxgP}%s;jU58`eWH4v@WkqUX%V6r)HYw#$TLDWDs`$(_J=<dL|ijZA{8_mDA
z23n0XsDW1EYsg<>2d*gc-xD^)T(D#x!pD7(75*(mw^7Z$xkkQR?KQ<5wrn2*G#IwZ
z?jWK&M8+mN*<@L=3&7kCSzTb?ME?FSvb-kbX|J2icb07iPVR@T9K3_*Hl?xG!cFE|
zOZG`HLm;c{K1QnKNp^}kYRQJQ(r3d~tNd-`JZ!a5mO}C>t=nExP2%|v8(uiu&>X=T
zSNViB`1&H+UbzyR8l#`4nZ!R<gc|;}y`ed7v(CgpI$4Xz`$&yEvz}^>*{rvN8A{fw
z(kDpOJhQ&ZoU~al1rsLecXd#H5758=L)Ih|VWT%EEZGe>Sq)j?KR{~cNp_m~!IFIz
z%rM9*xlfT=x<ym$hMH==ciB20gL-42yT9pATU-y>{;;_-KL;zE=-H=wEI*f(&#ot#
z*ZMl&ea#mX;hRn7h@kn&5_kau!yzEThmcU+JPAxQKUxAW1KJM(?OBQcO~7PA#0`9l
z*-PM`tq;9Z{VqOO^F}ujU+gAv@y#{J!sFyta~LP5tdlozasVe<ih&#_u`1#5G2O(C
z<iqsVhhEwI2p?J@)wJWkn0GlnPG*=SoE)%D-p9!&I8npZhRFZsW2Q;Q$4AzOKGpx3
zZmyw5xB@3F|0g21nNc|T&~;)y#mSfW&|H3o)c>FT+-}nFQ|DtG>C*58j(YH9);5xp
zx!du#ySntJZ{czgogH^*ra9<=_lgOh5yR#57`j`{rygH@WBR(qz2QJ^5;r@};%WZi
zom`KSzj3|=A0O5MX_iUczrEf&AJ%)X(0ga=eL{d*92%5s2G7A%u+Nd`%{7`+Jl!9>
zCDICMjp%mXS0Zf@=Of$-o=B6&g}MF1$qfxY!)a|EAH``Rdx{)>f#~+x1F;>9toE(c
z0+8)7o0HhbU*+6f<M`YLhwxPcf<NJF4O@%+e2M5A^6f={cGXwq!_#K41zaBQel_0N
zmoNrg0`M8YSG90~UjYsR)OmV6k};gW?|@xNewW?7V|d{Uo3D|t&?i`Ymu9Z+8}y#<
z`|}bm*(6(s58xz-!!JD!SKv^fEaV&XJgspEZK&t!4x<+#V5S2<`Dzbk<$y_258cYf
z*!+F>CI+hRC)4VGjSp4#JY*{l=C~jS-y&B7P65ovKrbpUz2LdI#w@cJC-_JxMj4VR
zD{$xYK`Feg8(~cSud#7`Tr$EtQ%q&^(L*9A-CM+W&S&vfSs2#6%hF~UDb9^3u!*SY
zUcqS{5|z^q1Qg-#5dB4qPKX?W%?={WJ`bj&2h-V$(Q(1c>@^Rlvj^103v!s}5Pua6
zB~2&@xyxu+RWcPDo-bZh$xBGE%RJs+p0VFaFy!3r;y~&8A2z9fu+YK2_20mmnyEO>
zRx<a>@(A)edejNh|2!b4<P&&G`xaw$sgA}@Gba0^NLPsTKy<@URj3HTe^|~=Kkf3?
zZ}OPfp>SfPbiI8|>b+X%)xP!KbTct!a1<aDIXQ;(!~rh_>P7O4GAfGG55i==@{ycI
z+jI7cxA9d2QkC$vmP*RcaYW79%MvQaR|vw7&c%7-3b~fve!yCKRRFhV=?QoO=?xH-
zv-HA)`1=vqsampIMYUve5_t!`mY($8SbEZZk9e0J?*f}XIxfhemR<md?|U5HheLsW
zK;A>Ir6-4vvh<|;5fLFiyJGtClTU0pWiC!dzN5@Lip_SD($ZUAy3W)1&{Nx8_;_WI
zi!bmKPWq+qzMXGcmH_u5OLqAA<<4;<F38<31_sSj;D9B(46Z$wCJ@fs&a0XFXBcBJ
zsIeepa(5c(4<Fr|oFX`{5pCe7{2lHSG}~PsfyH2+hsP=G`xP_=_4PqB)A{yXT*>sQ
zvv~TN**|EW#auOhtSHBIUX_#i&&Uq+iA0tD`ChKBaDJYQ5Oy@<>bR14+|18>K@;Iv
zZ1xiVd2r=nwhG)qyFo<lFvso=^Bgu6Sn5F;ebUo){yR>G;GDhNaom`n8GZOe5tbma
zO#p(*{|xd1`ndGjlZscl{zZOeTM5c!Fflc(+ZGJO-9WiTult1W!tPRA&MU}CcE2F9
zD~1i5rrC%D2k>Vduw^;}Z&AkADdSn}`lbJKhdYz*^`43+F@rB*s#$su(+<p%O#ehi
z;5ODQrBSAr!Km!7P=Wi$xHQgz*@M0uzNEjS8?V-)yMXW<!ujCo3jVaK8Sde`2`7=>
zd}YD%y&n?0;QK3}e(8H>!&l|c#4MjPO>;~#)1G1DZzs-WR8{7`Asf+a+NIw~iKHt|
zT~i1dd)h;06!~lbr)ggc?vvcp9{o=A9P=<n%zIEqpEB)oPG)8_&Y7<4X%8~(n=HW}
zu&oC{wZ|XGBj_<rkp7qcIXh)xmdj?wD&t>c=NOZHhx}Lbgoglnu^`n3N?<LHdYv=C
zghKAO)_d@07yxU+4^hCkDB$0KSvSs~ZJO_X>69g$J@IJ+f@X{5ND3wij+*qpkZtIr
z{WyqXNT+@ri97Y9-Yro-ZpBWy{0!f}J-HMkm&X}XE*d-U=F-_*O(f+q8Ps@ws%WLi
z<*}KZt6RDs#0`+kV}O;*26EX=F6RlVTo#bPU2ZP9n<8wIn8YW6K`8hYu#PmcN|29`
z<ABC$<WaqY<_R!bQ)>QtG>mfPoZ#f@T~lFt^j>V$^e+@OL(SzBOuYU<mNTHPW5B$K
z6X~kp<az?f?z%MYksUNE!70z<1b>G-{{`1CJuc0+NaM(D1*R1V@dTtAws9Ag-X*w>
z5dII<mZqf8H0P^4kbVD%`@pNgGoW}VcZ0JuO~e3Mg^l_fn~*s~gDa2f1ern*&6SGY
zn4f`Oe18FB75hiHcc$3lUIN=2v5i&iIP$$8j0*T8B<fQ?EuJ`J4SF?;^uN(8arrrU
zb!1=+&U}sU5Quc=y9*X{8qfs?#A(Z7d=2Jc!eG`}0+^Yq5KwpIN7kY*3jyf|Ql0!E
zepAa&gn(Lp3Il5S2QXB-sKx3t(*oB=Wan*{47zx|4fYxY)5o}N#mX=bq1PTM{hV%G
ziqKt(MEA&TRJqO#9*3I<o^dUIM*yJO<q3jEXct~xr(GJ)y=ae|YYx-eX*T{I;#477
znHNIB=+&*#?<NQ7Y9iAK8M|BOnNdB1W*#`5oi~H~Ja-F4_sF@quB&ce&31HU=*gis
z@~-Q3t=eoRpM1_{KHnrmbS{q7cXIq0Z6`vtkeLu-4zk-W(vz<_SX*7!KgU+r^g#Zm
ze7w<T9HvL-W2}PJrC<+HuuCx3KC%jC_dEMYeR#QhqO%6(n}rtA0>}m+MJ;4Cc5B_O
zCkk;>xYHz&!UACG>{Mu#rPhj61X+Y$wU+)7YAszuWHv0KwU)olU|oL;%;9T;<}Pq5
zNi%S-lcsQWklS^GYSly_EyWCKa<>gW5#vh<F3!b}#WuL~j}lzECdiyTgD*6D34RYa
zy};K-!R<sxN%Cf^iSS}njEN4JC6={-Z@^jsuR!J!&`UZD>1EbdSL!dY)m6GGGSAvZ
z_nH*uaVfSXNtR(#O>Zn~qbZ3@J(;s^2f$Q1EnEHJ2`yVT6=&Jrgk51iAE4cHQ72Nv
zZZbAHF|k`ZpOXu;;SU>N$523aX~fY#hRrneYNQTy#U&5Z7NxN2m)>rEe5E?M+u<$~
zM?2hlGXF~1_r!h+=5pFEJq`25xy=`u!|aQ<SocFQ--daj%%QhQo#8Z{54PCs1vbON
z#sPZ*SOp8)qKq7vkKEndAS!64Ti7&UPXnvuko#%DMY$|7hgtTwS@SuV@4);r$Ncey
z>^iw~xyK}7J`;1z)lJN6IZCKXl}2WvPo$%z-$pY^*8*8cp0QWLQj<h8-VRQ?P7vIi
zxtlS%>)aLHVVq?|@)2e^hHkN$jWaDP#~Dp05{;0hkf3d{S5=S(y~8Floxq7CqB$&!
zglr<yi*k{AoJ^!_jog)ICd*75nar`lJLB*zf(zFaS>_9F2tEp1o}wf;4Lizh!W2vB
zIhu3z7?!#&Qss~_4Crj2ftH*Nq-&1cisKk(gY27ujJ6K%fKYidRNIwD#-i6$N&h4X
zN_Q2q$d_PfGbPLCY~;&LT#ukh1*r1&0JxtF1#F2d_hlI7Vm96epNnk;f~$EeBI9gu
z>7ORJbjiqKUvOf#u)*&&$=!qIdVp$_z5w6L-6&ea5k4O4_C$K-y}Y!Uv!Q9|9y7sm
z7yNCwD{dQPHF3SIkplM{Ecd&xtq6CmpvuTZ%U$|s;4WQP<ZgWUdOT#guP}$Z1<j2%
z_+WtV5M02v$QobpAi*cu;7hTsL~yOg%aJKIxb)8wT)JzKB^L)LwzG?`G-(8%Y=e&k
z_%6W(Y>%w<1<&G4(+67}BX!d4-8c8sXt&5|rnmJcn=kRFlW=Dw!}@b}cD=Nj!ilUO
z#wGdv7mQnxO#s6rq%V5Z3DV~uK%4Pkn-rXu1~8`MqQ>GGlYIx|4gyTZW`hk>mNUnV
zX>6e%-|l{4hy?muzoFj&7xg$Ui{nAovBrv7$9m2p$CGfp$vVCi$HT)cnEsbJ&%c5=
z9tc*6oCDjMM072yh77Sjr7wt2={VZ*PRPY<ohy14RT*qSe*h|BL7e~!<0Imp0>BKj
z1bzb300KI=*Fc6^U()}<Ia#{lI$+;qoq3*BI?htAi8CWC=nNnO0fDYVgi(n-&)yu2
z0W%!TrA&*S5j3CybuqpM(Z!gZDf{J#ENrjSE~-Y-A+&GIMw(|vVx#RuHeb<5I;3_-
zw&5@yhtVTKfb5g8Rryb0+mP&aeyoiI(W_Ua|BLLU8;yi!<&k})E8keKptFGTk-b1;
z5Mg3vp8}?XCGa+w>JZRrtuE3DJ-ia6e+B}!kwfN>`241$4Wz*x3<{8nge|!zZKmaD
zxDlM53lD1LG+cmyU997ez`l~>m^%;WH=V6tjlF99j<tT3z|(psOeIXj@y2<q<@csL
zOC+lc-$DSaQoaMY2ASvtR}blGol5^4PNf@%%)gkd+ei57ZZQYJylyeq3lp-XeA;FR
z5eF0i)Xfrj1JIf=?h~vAfUZTaCr;9byU}Fm`XbRMPCM{%3;v?bwuvM%@Ye&J&a*m%
zx3#_I+y58DI4&$Sy#T5K1$>VN)XBXk@|-2DYe-{)7#gsrCH*e8HL0486^$k6)qv8!
zKn0|msP%s@PPCRibL=?F>dZF|Ea+oEZ(7ie0EKbqFMIkNOa_>ymaI<u??bi;_Pvqk
zePkou_+r6HzMZN$eh=eXu+=%F8PePamVOt3rJIbb%!6!@z|Ab?6EOQNW{SncB5T03
z)cV(nTxXVLoXB-vSw#@<i9D+nUk<>Ip@8h>LR9~#x@nDG2PEkqALZ&=q0<RKN6es|
z=iHb1rP<T$oLz7fA$+!Sv+Eq)a&sllH1~3LfO6`r+8=otlJVF@=RSjSw!v1_d>`A|
zkkpx{719p9=3e@jsEl+sBlqiBT&%f|bmH^+7W65gcP!{u7Zh{u!(iH5vO24N09hSJ
z2O<$4*%ZiL<&b6lAHcW{Y<0@H5=pXwrQbtf>82y=a1xVj$2b$Pm_uORvzQqc6Lapt
zR1*PR|33g!7Xmuzv_q;|XVOP-Cf#i0fxHM@%}3`g<})w{EM|_y#3B#|Qyq+RcN!gV
zKcrk&0T_Yo^_45am8zE5LiR&!uYjyhMnR;e4J`dW0!tS{9?FAkfWUPu=5xKk_Mru(
zSx_vtSsnO@JRt2}IuLh`+12;PXy6s*3UF#*;XWcs9imf^cbz1ue|86RF{z6&NhTC!
zIo;gPy-GccVhI<M`i$vIJsP`(F&(uABV%#25jo&E<h0MVq}xmE8-q)_ry`oIp7mP^
z?Au&Pb@h|q{-FZ-O+)KggYPh?qt*~)0uCQWKD3T2;CM)wi`8G3yH`IPH?Y`}V5dW>
zBe<c+G%%YH4=HztWGYKpOcgMp_XyYtz%b-y0FNM^fDR}KC;^b>|5wV`hWXb?whK}Q
zz1Ft$h4Cfba3m8yno7?h)bSO@S80p+Q}dr-K{{3mGzpOBfa!n&fG!23dDN4NgUq9z
zP>e_30}~G>I*k#obY(5sx3FD1#yx&@!!{AUuAI`(<;vO3IsY*by>vdyR=5?1RL0qY
zH}NxtjxA>${s!<$ve)se2T}pOvX{OHiAbkomHwtxtn9PsvGNx47nsr^3(~Plpjg?5
z0aXH|bK+7!-C}nCN>dRVRY*1;Qz1R;xdHjoSD`Q$p3AV+bcVjbxDgrYdfp4E%mDfX
z=|5s=MxSu#<=HBXDO-&lWK8xtroCXZ{n`eqNTcpwNHd>W=A5R2ByhR)`yJTta{Tft
z9**OzV+|g$j>lTZsz{b~TnWebtY+;#_?zR{;MjmwCSQVWOd`70_d)VopVEJVPw8|_
zdodST)<10B&ZKejS<n$c$1F(aG=Y3X+?4^QfF-bprdc<}J&N@M6OUdmM@ZkEmm{RR
z4S5z{*)K<gs4v(5>{s7(1CT=2;a8AoLPomW4nPW8a?&4yoOB$a%)VS1<s@VvcZtOu
z1#`k;bb=B_357hRbux<39EB}`uK_iMfG(kfkfPR^^q=EQI-QDM%Y#4)zKU4PF)-g-
zj7~+uKp^CuTX(GkDh>#83B@Q~7v5A+6Rt(p(<{Xot3p{d)|D}B!BdeBaCj0y?R;23
z2hsp%f|Ax(Wd?eAd}ZLPgwB6*ror9}YTK2wk8t#Z&sh*>XKe5U?7Ly_Y_-@Qj1fG@
zHU6}<4`AOHd+oB*kdLGO>A!5YDFA)}po>fy05jRowUyq2Yyt2I;@{6xAaxduR=_uu
zxEa%>OZZ6SSM*x0(tk<mrPHx%KYp}1X}N~t`6apnvT%0Jf{p|F(SmgB5@-^({(i{-
z^SdQo4blhL(6p7$MEEn3=2OIfJuzT@b4fG*1ZoazU6V&4e_8_4e+>cYbo_cBM^V<f
zNK>^xEan85Qx>D+moTwN2f+O8kPew(BCKzx)Y`XiM;-_CIpQyM2B5zjkO|fT^C6i!
z&xXM~f&CYVKhrRnf51?<gufv-hB(n{|6QCZ&g=ZI`PN`6gW68-K%T<cH^}dZvaQU{
zH-~4weS5gid^0!^{c9cPhs=wRi8}60XJ4>>HTDu?+D+wmJN^zMe_FpG9J}ithlefb
zJY>c}M%!f=c?RQe5f2#$lL96SjGFH-m=;W<F3A&+@6fCHq|ahXq|+hx5Pm$<7|Llr
zF0<cS&`Ch2El8(WfhNUjJ}^fu>3A~TL#EoUMS2IA?~rJw(L}?LK4LNd=(Xz!WU6g+
zF0vEAQN*8V0MKzj%Jf^vwIowrS|=gLENSV#g|u`!0e^)bPo~+41|faYV!jUnI%`2X
z0Sgo>Qv>LPWnF+wBV?+5T%>n_IfnQ%4MO?{i@89Yi6pA+dLHr;_Q#QsKhun4ZaZ#4
z1qrl|K-#Yb+6~}Dj6e}EKY`KJ?sYI-xZ2%>{SxFe?6a?SzKdN)!za;y1gK}LR{|W^
z`{Ml@b@mP!WaG&7nC0~|_;Lgmegh{XZQt_|9qAJIS+EfAq#{w=M*O8f_>>cu^JhCo
zhWP?(E4Itd>GTXr>3tmG@dq}r@S6y%eSM)PaAATL@deHZ#+d`)wB-L~N+yMv6Ya?h
zkXI<xX~aJzS%5wQqzhG9g7x4+qHDq;<Y(;tSG_E<`_Kj{g#D{bp|)_vcoq9INVHe7
z*S`Vuu?4AClOd*UcQNuhfL{<lF$1xK7E>6^>jcuaE6f|%pYsHYF24YnPi->Qb$5XN
zWTT6PvN;RDf7vyZ`V#<pl|)CW`*@W^$EYLNdS4~UXc=cd!;YbV?B=mV^^g6{9I^q+
zqwh&PZS!{{zv}frfB6Db;tLC?4PYRESr+hH41z)IKgV7#!zl1uquiihp$jl&SD^ci
z{?PWj42d>oQT0ZM|3w>XBTU1T=SzTUkT`%hF%8-_??K)q_<6)X4UzV7<{OK-1k9UE
zgLX|}-opMLBs$;GX~+WeHJFeB<pVU0K-#sIB5wmQ5c3C00kjv8mTP@5C3t%&N?ymU
z6yGWd`yK~)909MY2>(Z4=08lp<w&$Fl?YtfSCtg1@`}x{2*CYhsNGUz-+^o#_WlgR
zklkm=HUv|~M>gn?WB#Kh$zUG6*+h8!nq?~oe=&#JZtwMwy^O#CAK4&eU$wZ#;Ld~7
z?zzH)O9E3>xZ0FW1UB+B1IBMy#ytS`Z=SPj?Mxr=RD6{pJ5^<Py?!!%&er&<p|>zk
za$jUM>$<xeUk&`WBGwRW^(XY+#D<3f#LTygLPv$S(Q`cu3?S6{G42;@_M<Du>`1p2
z-gg38i9{!-CjM$63~N(%zwjG?_qzbq7EKwc=&N)V1@RWvkbreylu!oa?|=^wSojMB
z)?T&76SyuxuRx-HHi8Sb5Aj)GfYA!9r2^W4^{4h!f$<or0`FnNL#G0F+6qu}^E7&`
zTE@)uQSJfiDRipJAc#CLhIg|Wd<LD8lWwBtO3{MuF%j7Nvxz>|qR1lK?Kn_b<=`>0
zlhgZ=XjvOTvLO=1?E)9RiW{EW!0j2h&J$RE4gKq!-lP3%AQCN)3<5mfVFSy-W&&%I
zUF8Yfh@ee8fj5H7BJlIz0uXL4M(hmQ(bsx#O~EuP#QaAC3K#+S9KdKvXHZg=d8f@H
z8E2z?Sp+=2Aksk|M}6vXgx-3=CL#P5lF-hb?vP>rHF^4LRIPkj1R(PwKwUO;CRxX2
zL+6nK*m*A-!4}*c9Xo~svRmNkkKLAdOZ3(H<TRg%WkF1Tq&R!~NRh(xILzaha1bCV
zImkyDwIi=XqVwJwGHsD4ZX@^%h`$U@RTQCfwu*bKuh!_|@uj^l^nk}%I~-l*bC!a$
zOn~f>&UNV8UhXw$6Szcd3F0J>$<Mf;3GZzeG#S=jJ9JO%wK;D=F2C456ix=P(E_wd
z_5z>{@-d{^#enePwn4MO+G}&{jlDL;Cz0wg?9;G+82hRuFc$mk+PWLkdhF{IqW(Ht
z_U6F^wlvM_Wjs;hhsk)VpL3suly-aw83$Uwy>T$myQyNAMTJm1N6Z!iGHubv+MG20
z2>2ws%dTOM(F*De*;cw>PQgf<%w~*u=xj0zJx%l|`jY5};Pt^VoL?~7f=>H^bWhOH
z(mjYo`=Sf}x*|~`+7Apwwt<VTt}uNOibKB@rOI<7*5fIUHkOBw#yE&k9p@em8Ev^k
zIyhCOFStRzsswyh33?u@DjI+6BsctsmSr_%(VFZ|P;I?D^>eE7BtdxSROK#DRh~ev
zsthCeLu1@8tlW!ERgvx~s<IkjCK9bm4}4#nt18TY2J^cdV6-gjDT}JI5}mqU7nD9U
znJy;Vk@`6Bcl{`uY=yPgj@K7^?RL*1En?UozA|X;lRfdBwcQVZ*7XZW`xt<|09FBj
zSAw3!M{kLt-_xyL-TmkgJ}8FK<+h1Sjw|1q1DZL+Gy40nt42K8mF&PJn^>U?{@00s
zeG#pIp*-%#@1<QQ+!KQirb27P7O*yt{*cwa@gfq8L3R{mS7Wc9x)u8=^pu)&BXSM)
zo}LOtmz>j7hNfC)S*jTmVW~#kfOG@+UnB#NTniSy3Ci%_q3-2)H;PpS^P2}4t8TNe
z--Bt#=O(>+KN!dT5oOPb#JPV)7-#Ek2D5QCgACM2k03pBXOJs_@Ri&_-3C$u4G1Li
z+RPMZkS|UoM8*#SsAfx5g^X3E&B#EE|0`ny$uzK<DrG!xurlHuA0!#9rgaFSDfNEe
zxeC28`D;%0wH9C5n70J9JR%gyBz~H9*=L!TnJ`YZt{336cmucF0~WL{ebkQlGDCRi
zeE;5s-e(d&O1|vlQI|OjBlp<-Ch^_FmEVuM%*?}W4f0%y+eccuJh#GaBpj<jLyIaY
z{oNgWA!#nAnk;os!yaxqwDwfP;fQyx0+jGh>~Rs)T-Bn4nx}2ZP;CFJgh8<L!K#GU
zgIz!gxl)D9qkuaA{*M}Z?muv!S!i=xhLbJiCj1k~b@-?j&CS+~9}#wJvj9v@NZ$`;
z1iPPVu?HEYtCC(DQbG11nj;sq$}9q;$|%!DJ=`p{X{a(|5N~BN=*GKjcIw7OWT#Gi
z9Jv}>Eh+sez>6A0SB0FQ_nW(IcFS?{DA~!u(?|;7YLIhg&WBlSvs(jZB-yDN5o9cw
z*vZ{z?y+P;Qvof3tuE?MAsqqb$<_vTTQS`vzLi<|?Y))F5}c`v7vU=*r;9y_rN?O3
z)h6+k<WS|;Mpce6xW**D99Ma7Qf0HuR^(osZ>1t4xdWM$XGPM@Qd^PrV6LMgs^DJa
z`im;U{JZmgznOZ>?M|(PWmH6~;2A`J_E8(eXyl^#%QY3N%}w~w!drolZIBiIdE`d0
z)nZLw$gDOu15$TB1Skcz%5EPr1yJnyOE=Rj+1tRZfUGWm&mq0R{BLB}m|O7?65EwH
zc?`C4unU=-7hAK&++xXY05b}*%I+0pTApNAn;Dj@R?>Zt)hgPF^eJRII~#}oHez0-
zb(?LiN!*!P>BYU3%ygV_SxZ=gvog_UOP{)v&&TH84%eB)XGc|fKCP0u4T$nw1>|w^
zRP5cz%slg4Yi8O!9|kj;JXM!hkz4c3b4|$1vWcbvT1}$5Y`=sIP=3gN<>|KJI&-@v
zdmlcYfUNK@Be&&AcAc5+kmdc6Er7<rR@uFV+@2@fwdM|&tn&qfZw9&>;yCOVaQJu$
zHhMk5doioZb?L78Y%QBdwaTQI^98lJt~bfegC-;bI$>>tKq>^p_;qA<o&?sJIhMe7
zFyBHzd)CDNCSY;`KVU`?c$D>_&s2qu;zY~*CY<caI}4AK2TdAIQmm6*I5~k2Ek6C(
z&Hv5ELnad+V~%mc=HlkO?)d{wv?!+Iq%4w0IXy%&%wC*~wnX+p<P<*CaI^krAE8Wh
z7$;+`6Fnh4gA?_^9GqPFKM~nr;#vevs_Vn-$HzH*Xe!@8!vEz**Rq`X*=Uk*G#*E~
zG`xqS>Aaqx>lcmW*ztn~+89H(A%-r~jPv-qErzf3J9ftxzBae_hhlho&|L5FbVm$N
z9J*ax`Ukt&SE|wZ>u%r))6q)K_s`?QaS-2R5*MbGUzAz??m~C(Ek6O5wO9a3bbDX7
zT5nRZ=$$+6qH>Fn#fbjM>JsE0WGS)?i54c20>y<X^yaA*Z@?!nI!-i)a9WN1L=N9V
z?gg+%<wNw$%ezdgL-y8ryfxOnWX|maHub4E5og*2v+(s~OV`hSVOc0EEP)#7@9yDD
zcq67f8wno4G}wZda4~xuS&5m>?dy?XGd|XjT}ghIUB(#K<{j*&pii*)Yn$=1FS>UT
z@67`CL4B3=_W(Y!n)1gDTz?_QVI~d*dJpNt0ME4?LQmCmb?>7Wp;vKqA3tB`r;1pU
z{QJ+L^~bxnS#gTFGPy!;eCQS2pCMb5S_*#vxgYGCVD}?#E!oHPs*#*A^Pc;3Y%=|D
zf;!<}oLor}c_QO{#D92ve$$sRwYtVq`sCo98D*~W(J@h<Z-K*gB;M+3LAo@!EAs7b
z^(OJ{p=6U8fYUl;E~jk?D8?Tk%VE75kwd^H8boc`--EfzgISXYM#KhrFrlCawAKgm
zG9L(tk|z8KxjK|orF;i6?;-xStWAdR%;U&DqHy$98R5ix4wqjRSK;15_a;>sf-m)w
zDb90(4P)FaKP1z^44_V6z?|W|glNep707RfF;<uAXzVOwvi}HK2a$)63`7;GL-02z
zv6rUx%?UpHaAGK~d|IKjr1C@EL`*^a)Ei82IXQ@A;@|+;d>A96x;Xt|72Db07jhQb
zti6dNa8?6SC2;l>m6V^45jE=uOX!O9<b5o(;h1WnmBuugg(kC4kd2r{7h0epUp>H1
z#gW~{F|N(0*o{Q5g(iJ(7MgU25br`e2=z_YpB7rA0pC1u{e>Kdn{X)5XGo9%Ei^fN
zzMiZ59K8sAf}@A|d5E7XYCZC=vy%5(-fWd~SB{=%%3h5RJ<IzXALj@06Z{v*X0V@w
z{SxWLEcVWqzPM?8kv{eDQ)3|?o$dX(bFPRBa*u}wLDK<0xJlTKpFv!MH1TVZO-y_z
z#uyBK!`Mj1<o_$=5n^mffBKd9vgPXpO-GkW;BBn_gvpm;k^a>y@s-l+y%b-#h9d2)
zA2eO8w*%M>$D8c3kgZW~WojhV4w}xGX%-kU-BaCwU-RG^^yHKfN{{QFKQTRPXYu&!
zyP_)#LD>#rGmxFW<@qQsqu=s;jNiLscd1RJNiWy#TWn>=sxn29jYx!Qcf*##23~^Z
zZ=~}LcAL`=zv@kA6zN=xor<#;yP>2byQ9bx+3AFmsZI~fqUm%a9c6qN>4iQ@eJIqW
z6ZLJD`V{DhdQa>`T~%ugb=g(6cDbl?MQUtgy+pS^JjM((%JDm-5d-CLqJfNVuI>za
zPW{f!H~ylqk9%sdiQtC4q$xPn;&FoiLM@Jf*_>YLb#E<_sYMg)R0}oHU}_<|lgRd*
zS|m}6=9V8L<{E0DavejOp^w&L7`2d2wRqCjV!W+IOYBq&mAnzPklhc6?7X!ILcImH
zqCOkj<AY2=a~#9X>1W^`Vr_<>?9Wm5*-u($;yYhZN2G^MCLCK^jt^0)j&Kz01kwt9
zBC(}^pV-nBh2yitj(&p=zh-l`zs)A8R?u7tt~_nK5ZsUC#+nY9s3G=F;5Halz`2Z0
z^>m&8Mu6vWP7KGsF+a7%wzC8oHeDg8I)9I}MISBVZ~{rEB0hr;tp#34bsDPuF+Sm^
zDr&MCl8Fq@Au{w<F-#S&vcX<}*yb^AwSUAm3B4Lh`t>xFbQK5|ZKwnS?(q2AMDUav
zaV7w+H01~^BKY@o)K37lWu2lFr`>PSt2mk1=W}M|5tGKORI@xDCSE6!7VH#K6}@If
z`VYuOx?=En5tp%NWs8YpR%(a`H131OPtf?;(m2ZpUd=g&N4wlS9yN#AoT`J<>Ej;a
zopNW>H<^!UhE0R?r{9V%Gax$8ZYo>NUR>0|h2~l=e(vc~E)L~AQ07PWTxW5_s%dFv
zLi02<&x&Sx(tfvxJ;(d4rVgehfp5cYXDXzOP9wF^t3uL$M1`a)Nu1qzR%n|URWoR=
zu%Ry_^!tSVL!rT(XLc@dlaCko$Crw0nEvkm`0^nNS{L(*gxLv)@ohLmV6JqY2BMe8
z$I}pcDUUZFO(T{cKg{lN$FuK3QXjmkvJ#r7sfuVOfPdLnl?<v<4_l@BG`3w*Sx+j<
zFGxcMl&c0lCRgc7<78i+xjt@^Y6eXM8+;A8GXxi|47i9dc$na&Z1Cr>ePoQYazf@;
z00|6eNoZg#OG3IhoJ5yIezM<*!{`#*L<Siwi89vV*Wmi6a?JtvJ5m}wCJEB_8<2A%
zt6d<!xs);9PYODi0i0w7=uEH|2rvoMe#^WurS3D1Z`$tM;(R<jgXe8!t>bTTJj~;`
z9*z^OV~q`GOn$pqzmxI%j`iCFzbiTZKRIq#>&7rK0UVbDtVLCamidsD`3vyo^r`RA
zt(>krYx)VZmzJr3xu$UtvD%WjI_yuRJbKNd^e;1u(p?URSBV{6)AE<I(>`g^Y6MLs
za9W$Sz<p`grnM_!u#d3+Ig7Jldgi<FWk%%o@s!DgQ$>8J4dvtOo^I?*iG9H4EsQqe
zAf0d-wo11->3&SQ=anvY1(@hU>3s-ML*|*7yV0LAl`SM0$RQvX1WCW<-HR&pv`K?a
z70gwk5wK}Tg;dbLRSop2ko0@0kaSgG`CgtC+HNwd2TdF}Wmg~EH)JPVBDfEtpIeBg
zmqlN4b#;P|!T2hIE4_b^{5H7suMk|isyI2AXYgmtD1zs+!JC51BDioB!F}kImL84<
zj}W|o4Sqew?Fp_z{forg;L^WFaOrB|<l{Vp?=VROF9c3k;nv{3&D|UB<b=%Arl4gg
zqr*L2=K-95;V{f07=+*@*s_;3(th_$C*}yd*RazY?(A>weM5@##R2T#7?9l}645_~
zO;Pk}o@?oZFX)5<`EkJ3et;W+M7t(mP(Ays8AU4;vGzl-|62AkCthiqt*p~*KG<_6
z8CY=(8w>0(utFAA4p?~yRws9~=gr=lK~v1arUE+x>=K9EPmuGi<QGgF<|Q#NKy{Vm
z+0kx-CD8HEWb(f)W6<FuA94o0E)>%D;zA)^HRLUliE*LGz9q$J*I&p?VRnM4^dX>l
z<QE1KQAz(sFIQIw{nt2*y<O}yd#eS_S#a7e+Jie1t0kyBNjYt~nV-xWvk`hOhVpsy
zE6%jC<m}s?E{W=pI06ZNWcPM});Z^9Vs6DYIn|xYf*Ad76OsNc5|OSR@{KnUz4|9M
z`JCR^WzxvxHygYY!0!kyU@hdRFSsH2pEmd`Y&)d74N(}|KWuR6-zK<p4UsHwaAG^(
zj&*~-Xp#v2w+-GM;1PlgSQk0w3m)eA$zL{j8n#!bx@}bi+kb3u>E9)|bghuXzThMt
zy|c$lCar4FoCl~m>;v%om~;4oGhGeRKlq4O7^BBA@`%ut=3mQQ@S{E5xb=zqlaG54
z?iZ}B&i`kz)!Dxha?0Aux2_BZ+r!pYr_x`r)rqu8Jm>zOea2b5@|A^gNpRm_JRp@<
zW-u;+e1~4wc<BdnDVDA)vKJq|OS}^}D~WFlSj=}|Mq11@7BiXn-s`sm$^di}klyOi
z32{PS`Vlg@E43|#?s0PjoO)ikb3NTtWix{P>Ps?%B#&e3e6u$T<AJF}!>|-`%rcgK
zFpQ<^hP<9fmH`N#w3s7cQY_|Li<u00&mqOFOn`3+fN{RrtFz&+J>7F*3qZg5*rvet
z2eHM`VQdGbx@W<%*nV#VOFxvr()C2%%!6#0z&}~cQT;KN(H7Lpf+ojc8wB*DC9sD;
zJyN-$5BBAdpV0$JkiHDQ*u|X}M$2<_J7oiDtSn=)?~8n7$^D~u;dyL$2{@egEWqvu
z+^-B0@B{1kb->j;jw|E%fOV{~>eg{T>v%H${<Rh4rRwm<v#8kS4vyalpUe0NtSb33
z*lTrvE*&?@BOh9)8YqWT>G~s|X0y#E>&#&a&OWx7H^9`im;n~!BjR8JU=D)O<>paX
zga&cF(X-Tc$Swlqb-m$R80J&UJVLOZsqRsw61Yz+3F*s2Lb`#-XL%6M;*(3CxtLJ!
zO+d9RXpjZPVx9r$kR`AS%%faD^`g&Z$QS7KJVW{id7dHNRY>%C#$`0cQ;_jK&q!g_
ze6HvH+AMXh?r+z*F8;clxI5yLF|KXfW9XhSUqM1^U%rm@bkFClAyJ4k5kIo8cR^Ou
zm)3Sf2;<`z>!^AqQrKr~F#g77y%*bFsm27bOG3Uzuia4kid0a#!N`|+ls$zC?zNb=
zz|;j35@?78P0ro9?rwM)pT@Urb-X>%(>>Y-!Cc}ao5d5JSFqI~P-mIUPVPD8TL^g%
zfx3<9KI}LIh5|BNoJwT=QF8Md13EZKzt$d{UPZ@4vkwBjvDGo62J!}aRY3a6WFy^B
z<aizx$g0eT;w@%Bn0k7iD9|tqnoJPS+I1@c=q*6%6P>Y##_W?FF?wQW4BamCCce}s
z@^v!Q(><(rfIw+qIa4U-+ZZeJS1|6A>Yib0W4qsGA^qiKF5MX9WFDEPki@$d^EQ|U
z786Rfpvf^ZkMK?EcPv{SCBN^<t8LhKL^R3#$nNSe%jtbf_El{Arn+aID=>P`29`cR
zVCk+$e$aC)UAO;%HIKCoJ0%Yj_-PAz2T&smn&^Ve<QQawV4kvMbzJ<Rr+ZZF4yLS+
zY$On8o^i-B|F2=(FO|=HV%PxLZUakSmB7+XK~CpUT0`JxE#_S?O)Tani<umQY=ke<
z?f_Gs_z5~+ro?Qj7tQk+saa(6V^8<|*&D}~`!dR4Uw;l;^~y2%OLdQ8jj`Q{9(98B
zV;~p3LN?+;7J7j}Eg=o2GAKZ2<P6zP1{JV@nsX&<wvh$9pqP6lqd8x+j=u+cgU4|*
z9PhGzH8#om)vG4*t0}MQ`VARASs!lDotGyQ-*y}aal9L_GC2--RH}1OG))1&WSvSs
z4yV%T{CD<ZvQ8fve7$TjKY*ERF*^SVGfC$^%S1#RP=rq2V+ou9G&+@6;{h~BGOaV|
zC*Vvv9nF5vgFp~p85Z*+n42s{M>An25&t3r2GB-J;3S|ishsfvv_v*oXVOo^nRGgC
z{gnrS3_AT`i}?x6G>g$`OPG*nF0w0-0%#K;y*iKvXmrfxe91h5jha(7KlOB<pA5%w
zU0-wRlggX1)s()6?ch{yx`SOCo&O(Y0DXc6n!+yHodKF?3u9_djWuHofOCE;LV(GD
z>e)bisV_WeChtvuFw5zo010ffem?<wwa0ID{BE^=KPf@|J6gv&zsa#G(!e_Ao=V|<
zgSm#+f75ZC#WH&wurhfc$3w_Om+iL5W7etkE$}Iwj&DUTChKl;_<Aw~i19``i#@{S
zQpd3TD7=@;NXa<!1a@351G2k2mFWD_)$(5Sx>`!V&0Z~+TOyxAq_ZWWGogt1k~7ZV
zoe#ofB_L(>H2>Sm(Mrds_Q(qKIxI?m9cM-9#v=uBme*NPx2L$z`a1xLp{Y0klZ>pg
z<fLykfcckFhs6>&oD2caoVsfma_JUy2vAot5=Li5Vf-2C31@_cyWbM{0L-vdp4nr6
zHL}L~lD>_u|5EDMbZH(0!Z=%PL7xHY1_5DoY7!<U0ZvWpEP)Tf3{Q2hnw`L`wZ5cp
zt2?_$sZ&e&JP7F9Di2uD=YV=xj1Dcr#3aD=emxk9k#GUb-TmB^^(=~2d}QPy#`w-a
zFr>jH3~J9GiM-9Gkc87V2oFtbRzOMmq3*8_yW2C>c+fg7K~H=K87)~k9?sHAvyL@(
zgfZ>;*CFrXw;Dd%TEA`Z+rB6Be__S@j^i*{hAlWB;H8k#-kyT|0=Rl^QciEAfSG48
zrNJB{p!W7r$T=`IatCxUK`?VIrl6jGFDH=p@X^R0fNI7F6aaK5AYz!0sMv^9-ayB`
z2QnYMHYw?^f`D{mkV^QOjJ?01!%WRWi}^z5|DG14qnJRk<}-^!#sbT_94t<N(M4n|
z@+YKg#mbaGi!7!Hm=z?dYsEO^Z!opNggo=?lqUe_E(@v%=z9X`Vle?Z52#LzKnBd+
zU{sxhBs4OWV><SIkj3a#9qEJkl1^v8D)^x~p^M6sL8kXu(3gOETaeCv0>!FM3YaBe
zh@Wr@%q%jEHsx}<eHmlwc8%S^m|g?80lA>(|MhX&72&bDk{%qokdN?GEb81mE6h1<
zsdfAt;Ex~^bv&4EpL*PNtg(ZP=^7%(dQYz*(k;iYQ{4>w-DEL8gPEgm_Uj@s35mnF
z5#k}^b}XQqE$DYZpAbm<xiI;#Z-V#(rGS}cF{i<ViKAWiMkF5lW;RZU$I99XWB}c2
zL4N=`L?CUm0u=zzJVu}Zm|HC744AnD(r!8#DTaLu8;FP5ae~~VY=(;o1^)!}If1mD
z3RE0G%NT)D08O`n&VrdwAnlq{kp%2p#RwGUE^D`0%wJ%>B#w4XVM<}&x-j$aFI@(p
znSj*I9|O8Bm7NX12xJy|b+hyxa3-Ce8(e|2$=LhXdVt2f-D181)7N73+&~!T{O37r
zIMa|tNoNDXU&6m4z^6ylbZe0<G~FGHRe^-YQW(>=Jq@`MhXavf*h!o#Ayo@mUnR+C
zyvNsL_zKOj&NMiILG9IY)(%I5ea^CQHp<%P$9^>S+MaJlu8I*njD3o=S2-tQudP}3
z?Op$&Tx>G{jIn@108#;HTfPMe#t2}rAC0}%_AgXp0X?Cu_g17F_Q{BUHR;ZKV=d-y
zo&UaOPPFw3ln9^$;_rz7pj1mN5n_`drma`RuEyS9sQ|>rTl=EekHcQu?sTLh_Sw@!
ze0QD*Fb)7?jdav{oU@>gSxr%R&w}9+aprpLbQYA|l2jb<PiMgiHee<6lL@FTTLC-8
zAecqG8?blo>~N0K-9_oNFV8?K0O*YPOP9fYXD0&Efm6ZXAMFOd5uK*sJbtpMrnYMN
zsfdp*h`*W<{7eGFD=MLc%W+hf_bIT?-XOxP0Po`r-{T;Ur{Ew!P~i(gUfb?WM6dZJ
zP@b^_9*0EzY{Y*GSx<HYCj`JX>_h#d23jsDzCgzJIKbmRHn1ENCa|{Q+dP5C6ZCpt
z;2?o}TU=FeMZjrSy<Pinl<`DBH~4^Al&YU)TpeHwU!{UH8ZNRcm*RUICU9RHSomTD
z)-F5C#to@dlL<P-7dS}Z0RT0xzcR0j0cxka1F4K{f5hJ(DPa0r%s*hhr5Ci*33D0t
z0}x*?giILFAe&)L7+&eiFoO*IJDT@#78wt;846zlqT1i@M52|wiNLoYsGLfv7c$Dl
znIXq{{kI$$HUPMTXSv$>W_y6Q0-BCQ0}5y$JlGO$1a3Yb<ByK3hr$r@KF;txHas2%
zUU3_RaPS;~wI$B+1ip=+xBCJ|U_8_YZUSzRFK__IxXA7WL*=}W13VsK0}KBGaYNd{
z(ma7@5p=dMup#hpfB_iS2ADvJwOcOm;O2k{A*ixT90qrtCEN_$5+C76dH!giw^Ug^
z#u=1)q-87zyI`yxZJsCaT!PN?5sq-r`D7coF1V|GJr%*POO*AmIZMe_xR1j;4%&?6
zpfnk4M_cF#JfENod>N;Z@zvl8QYmdr1A4ie(kgHhux00Ts*}mjxS$Egg)R-64i=yd
zWgq};Ch5qe7=X;OLDR|FYa<zi{q5MUMQ)5?pM-r!Yp?Y`7<;Y#2au`Qhy0B8mI<0J
z00LxC4?tNm*ZN(IM3>V-fOjFNvgbNT=AFUmRr2-<)I4WZ?8MGc(dbm`aRhrFmvEFV
zPy{fzCVkZJ@zp|I@sq@bkgdr)!b^i%k;{nJ3dIm~EBL*;$7VA&T0G0G&3J3m4I8Kg
zy7IqmV@MX8Yb{6V?)CJehojq*;*{l*bT~#y_rO6piX#UvX$9f*E+kr<dm*tBi6*-X
zKN<96FH5EyxW^z92Gi5}l5P*aq+6u>g+!gL!qI&`XJMQ*wpm;aaHzl19@0)>J&&C%
zdE5wmw89TlJ?#LiiK7Zn(V0XgZi*3)1BkgNmH+Wi=Zq%kbp()psXYQTM+cWc5B~S~
z7@h{;uo*g4O1cQyNq09AO>PY_*7}l5A-PaXfa;@80EhXq)6$NX)|n#DV`qwZ+yZ<w
zyG>-L?P9&nj?=r7Tx)Ei$t_Jq$v-96>Nww~Qzkw6-xFjqAMlkn6Y2JmiF8YlXeJNf
z{6SwPL5Q>g7p>G9Td8*Fv_0rrHKdn&oyx%14Djw^r2d&=6ExvWR%2TW(9Sy)fOg%D
zNLn@^$37YR_SRne)-dd~XKh9n#jsDp{wnONLTE7dp|&)$I{F^uegJxjuQzql=TvnL
zZOap6ewaf~B{(-uiIj~qN!CFM4i@5I5!j_jdThLN7l<srw-B&IwapBNsJ56#k<ii@
zO!t-!nrb%C1OUqjq-I)%tox5ZDFmu&aW{cW2dAl9fjkr&=iaX>t2FmZ2S~CBdPyL9
zHxB(?-Rf1ctwY?qakR6Qse07|tvEE9ES_D}!mgSvZNm<3RhJWZBZ0Rds_Iax`zXJc
z27?cFMCLz0^VGCpm9-^cmGoX@(|>~5M3|B~7OP6N2dgTrL^l6F*c7m}qt6$Wof+mn
z_uS2|n+oPPSKzq1&8Tq?wp^U*;`TU>w<5}@2)28WES%M~_DZ4&_DVq4D%o%I*&FQZ
zV;@q<G*?|<u6ep2c?9FUO9lXNz2o^;R_9k`-IQR;SW8R69`gkY(-#d1rm`#1b-c>2
zMDEjj5{og`96f<(e!Smuyo!0gaubDF-1bV|AkiKW;d9$2aYds`R`tHbG{C7guiJ3i
zse!wu+gr?xK6kbehG}*%TJMZGw<~b{>@kUp(=S|zFA~bFf7v9??_GES2ia2iuR)S&
z_}|^qA#Bz_{291XIA;$D5-;I<o`Btax5F_SF9B?ITij+8NQKqbpaMOGcxI0#q(WtE
zg@zOCK`NwU$!g?ojQ^uRp8GHCF{Lfxx%k)vVd2*!J8~1wg}GD*^O`VS1ZXT<xH5bU
zc@9wQI}_|SWi46tS|()GXX}t9V6<qSL}JXQCoomEWasH%{xWRk;34E$z@GBv0+j<K
zvP=Gty7vH!;@bcJXLkWv76E%hMX^^bC<@{L&QhdfHzpbtCH59IG486^NsNYMG$96K
zy;iIkHD+vy8heQ~mPAn_mRO=ODF4@42D3Ncd++mmp6~UW=g*VLXFu)zd7m?9X3m_M
zb;(JEtZ3a@wy((f*JO7}m2|TG$XNneF(F&X_C7g+Z0G;PwrH$qQU$w@`{V$3sUl!v
z%uN6+>d!IfL{f8%i}q@FgKL$)46WT#g|~;77{~WItLTkS2S%gqg3p(1-~8LQYf@#s
z?Q_T(K-)!Mtz+BqZ`=NjO{%IlUL2^)X}mZ{7qMlNBu-K5+5YpM+9mnwWT%3$3$h}A
zDVs*_zur;1r0Nc`{A&gCNE!%R(YhSA-6Z|%J*7$2bh3lVSqWKj?OnpQh@Ag7vb!aN
zPIej?yKS%)04vyb{ad!X#rLcn!|G#_(jY5Zx1Md^za_g%GU;T+>2nQa#nHW-?MrgR
zDKwX@)c@#Ob&|=oLVY8-T<?9v(C_#!N6{XA6zwA1=Bl@8I^lb0k|?u^?R!f6>m9I1
za?zVKpPaX7lIVgBZ2QUi*Zsd+Dxo)NG&wmmNlXz|inH`ml8W2(KboYURg2DidnGrW
z><mEmLKXmN4cnoAOLnhh*2#WG&LGH&#%*Lf>>$etQk=foO-_f|9?89kt>gWtCtZq8
zTH8qQW~;@;ajNN`FDGG~s*ST)NqRUrXc1UOSzH?SNi{rW$)Xcj2!X*65QI0e9TNm>
z0R2w_dnFH@z*3T)K|q{{zW&by9MjFB!rw|^6fUV_h^NZW!4L=Z9x$3YHKL^RMKMgh
z>grm?=BekcAy4XBbZCA1m%M*UWB2^o=>5_pYJ2M#Vu8^Fh8Xb&!06*t``=*L=(z*Z
zB0#)!kj;R&0V2lOk^eUkFb+z)!1z_i5O46C!4SLtI2b>c_;-6!ec<L=y4U@pJn3ul
zJjoM#Y%|-*|CsCC5r-s0Nm+UbjyS3B0w<e0Rk5?f)IKF#%fvBgif$1c^N8<@T>oS{
zC_QjWUtFC2Pca8{$X)*v@;m9VQ~I)hnf~p+D7(*L3s-BnyZYN60a2IBqG-Tg?wUj=
zY+)<@hbb=5As0obuw7!i%yxzCD%($Njt0ZH0u49V`G+ZbU2%N^W<B)p!EDOOK;Y%F
zT_e5@hgUte`9+7AO=?D<0EyrjTRNOaOKm&<`F;7Gn(J?X)rR0n!1lE+0@@0g_|Ext
zoj?b2E6s#Z^TeX>nmf9jhpyy$k$Z;RaBf6J-dAjS<cSGzmSY-k;a3L;Bt{Z}>-rW2
zZ6ojx>^?d`N=w$Zux)2^{sw&mT604O{2UC;gD(s?;pKoQa(1vi$6iXu+t~=4xQ+Xo
zO)zQ%&`o3$r05ZaR+XQ|6lu82pWmR5$#E$Fq#pjCAlsbo75O{aZjl>6ZgV!hdtdf$
zIJ;;+IS7rDqf`h6s*mjfASrY!IgU572lFKM1x_)jg>PI^5g7sC*v9}E0T2X*?Eq4M
zy6845q_g?<&Ew(AgPaRq1+zXa7nq%>CkTHdE)YM%`Zk*YB>80#!SaezQkOrIr2K!A
zBZ&R%l+(>A=gwbpiiq2anEymlJzAU(yFj{BbY)Arq}9LZN`8vPzSzzHs^#BiKcr1|
zgS|qBL>KFDm#7j^AG%xz3mEAk|69+#r<J0JbPYRZA7OvUqPwHVxj;$7xe2|2vjN>B
zoR4sdgk5ZRA@YFjA)Dx-D2n&_kX^Pq{m<ep6HTp#n}SLNeQ3%nYKZhZI6*W-VC-gl
zM1Bwz+y13NVFHw&WU}}Bg6Zvn>VFQj<^iT#fYpYSfkWV1x>5k`VH1P(u}&yPeb1A5
z#!W)Sk>*Nh1V@?(+spPFVMRw8b=>%d0&!52MBD~CN5t%7`vp5DM}8L$G-3Oe&G|qJ
zf%_93PaJ4=SH7iCglBUAZUj&yeaDuMLmX+M&_0edVcX9p2!#Ul6e%M``}wUy`&*x2
zJNfX>_LogG+yeu*51}tt7R2Yf*z0ku`GCEVPTue0&$$Lq2LQv_p7GiF@+a<JW7W+i
zIo}>H%JmbuTgW{CCy^RMTGYQw<*+bGMtdKFjO5R~gmes?N;E;>v`8qzISAGtU?ChU
z75ucWoqric76+&w|2rv`&5Dy+IwckSbCM(Dgx+{juy|QTd_IlcPcOVBhU+lZzGoB5
zY=z)FXM5p<CwfqvB2JN4QK}{Rl!x&Y-c~DSSsZYN0EqfrYN1|=F_l++yj@+)Kj_9u
zMgMq4KnX9GH_u^fsINTt4l1f{FvF6~ms%#|Sz=)8Cm7G^Cm1+RQ_=<O>~hDlXp_E0
zw)5B+lK~>8DYio{xu1Hw#v9ZLm0f++`5s<njaij)=WcYn=boFr(d}oq>GlS$%k2%^
zybOKRNkMLfhL-B2Hi*6Iq_&7o>ZD-Az}!h8ZY9clm%d+4s{D?@{{8Hd?KanOQ31N8
z+LPVfy|*3r#8YT9*gf0TIB#_F1f+OL_0ol>O!&ZO&K}><74aDC>&BK1fA+Bxnqf-r
z%r2DmmWG?9kEf6;9Sqxg&MCtC*_sM?4EEDc4~CuZ>grT4Go#!~&jUZ_oSSVuU!ooJ
zab~iUpYkm;Nz|Ds&v*8Cm1$G~kHLQK`Am7yYV~RZQw5cdJLzb(*Rb&wUwXSI{Ke}%
zo3T@jdVhv9lW|<{O1~@mWJbHB_RgLUN0rB8uwR%tv)$q6P8<zYa%aBjR%r50y;@ER
z8fUQ2nd2LhRnn>DUcFA0#3N9vgxo6q7dFdh40JPkubEg@>M_J%@3?eLtH@C2!u9#F
zxE@19^6FM^?REC3mpr6(^F-%#jdp9{_GednDUZwJG1wQK?;8EqzP}XoP90a_rFW#o
z88N?y`6c3IqqC>OP7mq)bipD&qQq+F#{K@hJRX=2zZ0?g(qC$MZQWPurFZ-8zj(b*
zHnzM(Rd+jes#oJCw5^k)Z4G8O|4W`%hZWv<X<wIa4Q4$0i`To%$cit$(<?b~^{&BV
z++x&G-HYpucXP{~HqOmh(oiOM=2W*QhUNB8+#1;G&e^P-iSMSA{A`i3J!g)Rzx1XO
zd~1$!X4M}`&%U>mqc?^s@2<*FrbUfak~&OQY&XAB`n{H=7-oH{w0O6#GV7hC%DjHt
zlv0OETKjC>scbrZLAhMrrj&g0u`=cH2TINFCMmI(CM)e`&Q?~f`&3D(FjE;Fm8DcT
zy+Zlsjm|b@ORX=I8EY;nOTvaKbz|2l{g!N0&h+i4qzp+@o_{c0+1zf4;_Wh7d27o8
zrLI|1B4e_ZKjyDe_N>cMDn^;C6NjkEXxC4allMMT_KY~8tnD{j8PnFLtf{kCX|toJ
z^6bhvWnx2FxtR8ba((PVWz`y+a`eGU<$m9v75~+_6L+~84YsYD>sebGo-0fHHn9#~
z=wWR;z|U%}a9mllvySyx>oQjN*gMMUy0xu7uH~$z6nCrJN2RS}v%RgK^fFlo|Nfow
z=ERoP8LpMB4L@?T4teXbGGpd0rEZUh%A}#CtiO)Gq+EUKYYq6Nvh{XFv(@8!Z|nB*
zKGu;VOIbIssA{d%_-C6EnpfG{Z+02$^o13yb9!A;T+T*Y)sPp;&~NHn9}K8tz15<Q
zHFi#UYtYd8))%b<tPQ)kTYsMMM9FT@*m`b59qUJ>Yg*qe?`^fZl(QzzDrYVAd7iR!
zV+rg14_aDHBV4UnweBd1uRl==ch<0W8|7~OzF8@2!~15dwN)MK_jPk8?r}4^+JaZC
zS6)<_p@i*TsZ6-FS=sQ}0p-Y@MamENHz^Yu&s17}IZ5eTGfTPWJw~bc=tCvw;OEK_
zX|6J-dX^Hh{|jZ#xmilitdErF(=Dx0<+dn?ma58`jjNQ6*E5vVS((a_O>>lew?`@$
z0>4!DmtLjJjec8MUh9%_^xy@>{h{5aOrQO+l5%2$(r>k0abN6i9e(3m<=TS2N?60m
zO3tI1%E@8#l^yG+DKj?AQ+__RRymhtS1vxCulzRiJ>_&@zA|^j`^uaZ%arZzA1Lh;
zzgK$Io2wk^IY()JWPx(E{7fZK+pe@W_g4;Io}&21Zct{7w=3nsKUBUrvQ4@5dhWyn
zVk}Hv?P|T7W3av!)6}}zDp{vLtY=M?PAOSuJgtLTnXNmXxL7})RNLC`xQq3@wm#N5
zZ+lr!S)VF>!zx<eS+_~~v~>yVoy>~X5xr|#-(T=TiRkf>a(uJV`o*x4*5T)VQBsmB
zTfOr9t%D~#Q>r}pU1@j5-MVw6pY_91^{fjnZ?P$@M!HzPO*30RxK+;DrPFW9{%U2d
zrr_gB-`<t2EmO}amk*S&j?vt#hXehr^GcSnj=xx-{L$sHa{oy=>*$#l>!t=3tUY_V
zTRZJ?w=Nh}#%gbTQn_`lnswTt($?v7JgnN&n@YWrr<MADxL8BVm9&QJEMu*GtU#&v
zd0p$%Rk;(t7rk(N=^dq7>^@~{){n};^z+Kz<mXE3A1^A)FI`j?x*k-jeD|GFG2)DJ
z$i7D@m-(ZzukJUB`P>oZYU&=P@!g%u@!E_b+b@bWF36hj_^z^|##hS2PCFFW`v;UY
z{0RA1bH7$**S(^|eDGLV>UUMCcV)FQy_CtCc+6x?ZL`m&oNBBoi$|BRW{z;N9>}O>
z?Q`OL#XoI{V(4>MdA53^((AK>${S4&DhqvnQPQq%R?LmIDIwn<Q-Z^HC@=2cRU$+7
zDIZTbtekQEhvNI-m}33tJH_*Z-<5IUca&D2pH?z@Us76`7b)3KuPR+~jMgc?9#QJJ
z?^SI5_bY?W<xV^-dckn7Q2Datb!GXfn%1qkC9I!>*09bm_(}2m-OGCG=zZmn$+wi{
zG0&BW-6gAih?n*7JCgND{B@;_-OHLa<R8lKCCt`pgIuk{Ld!5hT&)!sOj6W_7He18
zY;8Tk#X4x!Gv#=JWPR<ZhxK6V+e-CIzbL=@n5|7y-K@<U?XfABIy_Xo{3UC2)C;B2
zEsJ$#Kw0bjv>%o7gFURb-F{K}T=KP+IPPg(FjlfoEx4dGe*BA4Vb^6vJ%3$kQLRwf
z_@bn>)~N@|zR~xTVV6r-7cM)c)cMWd`lMVX>xVDQ*6S62R8m?zRyOP_X*FnNts!ob
z^~c`FmHdT|6~CO^iAUYs47OprS}UDH2Pv*Ul~ooGi&PA&%UT~A`Y5|nVwF_OWMz@-
zSmlf<UJ3g(PRY#gql|yOt#UqOp0fLobxQU>CMw^}SgE|1(MTEJFH!laSu@4;jp<6t
z#n+UCHk*~_c>@*ewGc&V{JC=Xn|GC)+p8$mf;%baQyVK2j-6IYO_*#`o_p6;MrA0<
z(fS`K6UX*dwr}`Z8GEFG((llh%A4+!l-o^5D8oD|D#PpdQ9AZ$qjdh*U#WK@SotWS
zqjF;8NaY`UMk(4RyV54#SMeCQP>J2#Q%U<QL)kxmtWxiXEG2#4Xyro8IA!cnyOP^`
zfYSd<qcXI0gwl9&e?@vroqXKwg)O6_=2M<mp#LUm`@0J3b!~rJ%ZmuG{`<G~fbDPG
z_oCspzjN?G!1lL}{t+3rzxVJP;UbRv+zCIpm2j~=scn=7TBEeTN!tFd!g^iX-`4UX
z0=B=e>m3Aaf8)Lv1Z;oj;DZ1^&;7UdfbH)+{4~Rp=@Txwv6aZ3aLL`%(^jE_oVnQ~
zg?k}Q1}VI{BA1=X`ekyIT=rw^uS)JNTJTd#+;-3G^GYj;7N`Zc-RGFgzMx@!6**JQ
z%y%zvTNNp1PB*Fx^WEQ6bMoEYKe_8}clkJ6&dg1}>+WlOZCbLnq#rK{zY3`q+;?B{
ziYxr;KD|<{6dKSuL94YBIX_NY@~KgZu*GY&a<d+|KQg-BP12V5sGmP_@2loKa(CbI
zoBL-5LoGGyiTe%1!QAwx?w($@u94cobCNymLV|X18ggp97Sr7L>g0MBz{Ov^KMD&G
zs&@(V1e>E`B1}Fq*~7k6w1ZLq2TacW=3pS@zfe8g&Erd%@9}%CTRLhpUq>#mYGyUl
z-F(kr&P_L)%Nkr(;Zd_Z%umeb+6KF<L<rSlev)PmG}`55QJVPz=C|XtnQ`c^LaGIx
z<_udg@{$<M{12GsE1J0wS!{n*vfRU>9ah52Jl+->t<79WbK{~lb1rf?dNoMc6>a8g
zCcAtiUNe_`by6+#GEcJgqi8FNckZCg45j$eNNr}pt1J4~MyZ9~kVx#RT`9v?sLv*8
zy?Yz%?RzI_SJF-P_RrAo8}03j{jW%>g+7odpQv5APf=5{);pG>laODb*GbfR|NMVJ
zqLg{A>$()}%FpWOrOljeN}Jt3DPx}D(vjX!Gs>8s*m{djiG$}3Iw_P+^Ft<}m%(1F
zzbZNVfod#AKlS#Y*tTTt%3eBUJS4tg&s9TiLB6Wu#kgais>br>Nrv9J>E+GdWemP*
zW_|O$Qoa_W6tXx*_Vwqj?$;GLOU-OxepIq2pLXMsqUEfe>gNqauQs4p8<}U9^4(#O
zLXKGFEUw`ps#W$?vl^KTTnrsZ&T4GF=2{pjXW4S6HZgk{48FOUP0f!CF0E6v3%ME1
z%ms$BA0=xSJWcj?UnFT4e2n&XBNDZqYE}z#zHKWO3zl$XJA=Jl97*h`cE$ErB`1jo
z##{|b1afkTrf7_0Z&x=_yO57OAFuU%1G)i_>i6o&sD**%NiJTZyV6^k3tV0DqU6T8
z=^^4Ga82Q3Ry*@GH{Y>X8yW1aK1z_+xJvd`&pOI$en9q#lGo&BbugDOc3<O-zhAuk
zUzciu)jZv{Ce&zemDWLSe4nx#!sRu$kguZLziP($UsYq2`2){-(b^eCZgeVA&5AN#
zD>pStJL7JYqMuu}GhE`M>vh!53{^jm5+gW@5gfw^?z)LD8SPNCu74n3#As)7vtrDJ
z21D1}tXT66L#3{(NR+HvS3CZ4igu<K(i>nkE1tNnYF2{zhAr)e!QOUSvV7}x<a;Ud
ztqh~Rtp_^Kre5mB_W!orXi$ww=9#uMQT%F>d}}{)cZ!@g022T0Vt>V3d826Va_q6{
zk!+r5J4T~Nn(S@EkzAMCMkmW@JtceFV*9I-9g1diYKatPacNs=c~Fv^wh(EiCziq>
zFjl@*7iO=DK{0Mkc1I`kG?&{912v<Q`H3M-P4CQDL3A+}_+H0899}V<<g_?=%}tck
z-a{XU%+2abyx~r6W_NR;i}}t<gFWK9A`i*Udfj}@;4(_)V|s7SoGv!2`1q#T#ps#Y
zQ66%Hz=g=%%zoy4gJ-8Oc}TL!9&t2E9-?NXnhU&2@#!!@lfvaYUvW2*YUXYvm4jd)
z7sv52T30n=AZP9i90_Vhn)#8fQ3tK-bfeV3i-V>nU&!tdrk&|(k{Y;jjO1Z|Cr<0i
zuS2{lsTRIvPPd(T=2YP<j9#O11`L9o?5~Q+)6%)d!RAj)E-6tOr-Naf4$SjDqYqEU
zYV*{L5#~bMyxFL?Q#5{~#vYNKqRnfJJd5l?=f5iXEw7x2zvUcqqqs^rid~D-=H*g!
zZ?ZP;O-NLX*XDit3P}_<&eL5rjy8W_dp}VteVWc_OXHVOzgT}&@>$Vco3zZ+(HE-s
z81rZwKXqx3$Vt{pKSVxB)aGqR&PKNUKVUTA57qcKQ@eRqigioUN<T%eqgw_bXE8?Z
zApPl{xpdR3lFVF;W4RVMiri)_RHxXljD`0Y3$LnZG42<<i6#EZGBC%P$G5rh5x=Xj
zDM}kK&S+1{jn)RFne0i^(PIquq`~ppjcfQD#b^T((KmF|Zk$4T;a7KzGuJJ3!<+C~
zvDyvp=aQmGS09ZtXM}jgX#>8&G9gwQ&<^Q?{FKD1(b|oLWDM!34cL#o0MM_<P52iQ
zmlQ96$D3<;o+6?;9(Rz`{PAYB&y9QlZos4frsf#!Mkf-C08S)qMuIk=4*s8@@DwQn
z<ci#nKLt5~crLX`ZO|_v@5O01WbA7x<Ar`&l)3hf`J3)He1V$|j2G@VDc1(~QqrrU
zSA}mR`hqC!Mm{n;LA$|wMoBe+%|;&y?0#r<rlS_1ucK#{qwf~*1@CgUyc4Az+06pX
z<!CK_j7f?xqd!FN8lxTIU9||mcrBj8CE|HU?MN%^D)w#2InmmYuZV9Lqs7}<xOqVS
zX{3~_#os4>LzEV8!ap1TCrJNTEuMGkA_CCo5%)FmUm7K~aDsVeOCP|zFjk4xjvNO3
zE=9RcL`<b<I(kQv%c75o*N!wsu1wM5)gAAdeQSM_pdFb)=44<s<RCKMq}(*%3(y$~
z@oK?)G;|oA&#-)shZi*?8qkpKV9kQ4ks5Dz)Z+O`G>85H{yW510;vRc1G))LQN(`(
z*JSMPQ~3t^VsILxD^wamei;6D&@Yg88@V24Q#r&6Cvu#(q<33mY!au%e@F7`G<XlP
z2gPQPTnL%_$ZjMrrl{x$Ui*tULJ@A8B7URDS@iGGr=ZU#t}<~?DDn(h9sg$RZ{YVu
zuSz~|k4L0YmwdI*X8y{hORQW+-C;LZH!pi+u!sI3)N6M07q(?pjrP!T3G%WO<e?<F
z&LZ4TW94OSO!m+fadMq?l09?~;U6Ne#>vah5Vs*&URDj+iM*M}-RLg(e<W@)dV>VH
zP7X2=eI@qN$ie6Z*wertlqfHI13j`pHBL4!beRzkoh6ga?Mp4=wkdQjVe@eJN2-q|
zn=>2D0-!mGlORxwh_4{%OGF70?_lqZT#l>(VRh3K^C{ojvD&uB278z}THCgcHM&Cy
z+O}=z-(Xjlq?>EC?24r{miQEH+ZyDYSgkGRhp+~5THF3)Op4dGoj}hggK1OPh&VC}
z(mAtulDP>>dos8y4dcMw#(^AmGC^xw8Ce6JW`>PLUyXlLqSlr-9K%`*guPSEbq#IR
z{Hf+I5E;|VwQ8k<$0;rBES276CDR24pCS8`{{Z<Le)ZHe^EzAGdW1cW*V-<^ydLu?
z;I}7f+sa_ig%0<3VVx4SZQrpRX$NjTao<w*8uoQG=qY+%@~y~0#D74Xdz7{<5S%*L
z-La=*=fW6v0~*=bD}qxGeZ^VTID@JC-X!hB72e$DgnD8V(j{IyQ6ITBK}+szl!8hm
zX~`TVK{?oY*ENVb$;*)K60{Swkb!Yp@>+uwv>{nL!D%Ka4SAnDmHZ6s4-&N#<FL;q
zFAM39&R3;_u9EL*VnEN}sH%*ULoUb#`4h=&0o_Z`0>G94a++YyaY0q$v=b-skHG#D
zekDaq{u0>`{}<#}!#@oFhIlRcJ@iTFjnUshuZ-TEJU8OMq0wBnf_fxt$^3i%YT-=I
zFUg{LcM`SaNmN<{&ALRqNYPGoKn?-nU1U2DKP7G(m8YZsg#R~W4g9=d9dwOm`{TFJ
zoe$6hqqGyEWz&iOmbi~;*<<kg;P=D-E8XjdeHu$dv)LW>!4q|-gL9d<esueE^1Wfs
z^fxGk{Db6m2JbZbaPmWuXYfxX9+`X<y(Ib3#3j?C6R?klA1~nteFNT7@Yd0e7wGZi
z%fubR--GY%7tS{47S8raA6QA|AP8O=Av<JsUCxS-wcM;n9wiJ(4*RO`BEN5#EOS{7
zPSW-5;j$+3>*;b7c|xzL<BZhxwYnUPU48V}BiHPJRe@1A{pQiSNR4)QfAg4A%Hf@@
zC%uDIGoN_Gx~vhOu08R{HOOk#Q;)icy-z)kA~JvXNGNK`8lBovor1T>Dfr!E9Z{>F
zdDL<_1=rkD&pcQMkkwNK9(!GUu&7zjJwnQk;*&f4Ysj+3(HT4rspdcTIOQ@!!0dhD
z@m&r1S2|!M9pO(vSrTd@tzoj(09jQuVRfO0ughtXdZ>^x^5#R9??kaoqotNhx&Yc?
zwD`Kr6Q3>`ElWIO_$0b*k;u+4Svt6M624O=%jIafy3rmSLibmt>~HO4`F#Sbx0mH_
zvCo9YkJvk+@50_TRMxyq_Ta-Lu)r2Pqn)gA_6RPGR0~`zn|!Yk@dV2t2zMiM29aIS
zPm5h~$klSRk30o{jiIt!Nk|HskV`^jtt--lwBDpurgf`G3n%V->?eUqMKUn7Gx)m`
z-va#|@{b`0BZnXtI*7PgzNjz1N7yFPTLUu!sUUsZ%i1VpIpC|&o{wmsy0?TS-**Uw
zEm%_Fxs%Ka6pRDVCL#{GTXy(@rB%gpmfRLZEFz-@@`Wh5#B6EJ9=K?>TnqUJfr9&G
z<hKNzAwdl4*^n3t03${oFA{|$E}G7-Lg^aBs|6mGbeHXX%1yUeTnsL!NLDjEE!D;F
z^0d^gA-@1(0{erN@!%Wm#!R5ZemjT0mlEnDPm4-)MoGqiufSX_K5Za0_fScTw-_lG
zy%;fa?h<c{ry+I@C7uCNoxc4T3ASUWe~aW6HMlN)y$bO6Xz*9)O+cyyZ_&9uky#|%
z!Oy;czmH{(C|=6a6tSa}rBT^*@*3m6M%Qj8|DtG8K`F~>mw|%Q>e7(#J&ZAc;_pyY
zv}ghSE;_FaA?m3z4D3utS{X~aZA^@ocZK_*ryaCBChZa9(9d(La=n9=x)?n>MoXQG
z|D8xJl}UWWA5mKBTSj}thaI)l%jjE(Yls|%ohL34;n7;&=g0==e&7s@((>RCQIhzI
z#I?b`5Bn!pEpLOtt`?THq^G4mA)*_R+>b<r6Zth279uy2aiXJ^w*-GpGN<GJRnhXE
zV1FHE=ds^IPau9lq?Y#u{u{_z=xuep`7r#2_}_?A^UGOOTk21!i-6^3AtC_SuB35w
z$lHn>O9#9FwgwF`<A0ZqVG0(ph;kgh5!_6q?jb(_xb4`V0RI;HM{!!J5B_L$4%&zs
z;Qj=j3-R&j&(TFkEHJCa3YJN>)CwfcrYQ5ehy#EpBCk_a+@*@WumSlRcD~LNv69|D
z1K1?|ThTp`PtciPJMv3WG>;<9;<UVx__;wy<r!qeqi8LaJEe&8;QfJp5pg-l6;D)S
zMax9nIQmHJ6(&x39kI^<*AM+;^1{g9M_nWOV)mBktQw;6$o}Y@t0H2kBO1LA`)=xV
zC6B{1;&bYSp?_r6QaLOmYO+rrpkD>wj&39Fq1gKs8G@-Zsn8w|C#win!19<&MI1pt
zjlPE#X5ddiRz^1^XsOqcrQ;prC*m}5pP_H1C7&R}*exFrzW{p>`dN01H_`>BO|j>L
z&qOw25e(L1uUJAgR<?Z7?<?@L(K~?Si>yS8!in2K9Otfx5%|TrfgihZ87=SvXDW8l
zLmILk_PzMOqos54AH)A8dVP5C3?L$g9##t~Tc+DmI{-T#xLKt2CF~gPgUFwe?~(o#
zyB!*w$08DuJX?-<j-T0e#Cq~N6X!?AGBb{NLix4G4V1~E>}mX`@Slgipt~MBHwF=r
z;Bc{w$TzA+UorZ`0!=lB_HmrEY2zXEuPIWIpi{KvB>EcqO!VzT{9@>r#LnGHL~V+2
z8zBynR4?>Q`dBQ-9Y%^iuY<i3G8b}UxnmM~2lNc&ROBGmMyfLUzM!61D%%Ii-sIob
z8)u^~Hx3c3FXnN7A29@3g1me>hiQAna(Kj$$Dx`!4F9Lthhe`-J~!+UrReK}#BV3A
zB=QI3ZS0reQ;NJb$bCjeUsX$b{Zw%-X$sP0B2U5OBKi`L8lf{oPc2V{sbr|Dt69#}
zNHqYoThUVcBOj4=3pYzEdFzlPX<+V>>Xyr%w(IP@pmvG!bsk3tbxV@_v#=5L2z|3;
z5BeU-DJduqnTL$RUx>eTqTHYRo1mE~@^zje1oc54#($)f+<%DC9^{!UU*{?jR2w^^
zAm~Q2-2WW9nY_Wohj*5*kHVgquNoU$CZ%2H*bOR7lCPg9=!Y(H{|uZqiqY#qYXD&d
zEGRlf?jMQXhPY0|1p>l#KIkUJXk^fdPV#k*prBLuKO(LHq&XskUL#+{ega+1Z*0l3
zjRWa@GN#AM*T)m_0(vZ{1XV_UjU0_kA>$eub%+C@|1k72=pQ57!|XnBB9Gg$pbJ#s
zMU<fPUF7R=$hI)z)dD8o@^yAY&{z1ms0QsUeh;CkWxOo`5LR`9y3vf&B&~vWFl2lv
zc9f*==qv6zgEmpL4Kkf#<FMzEC&=UinooQ*{%^2Hk+%go2|1eh#}xkoy%zo%$R^Zj
zKwc91EOAO}%Hg#lUY_#MXpcP8Q8slj*&~w@WYZ9XJ#ta3JY^_yR*Y<7$3^}cE1UXA
z_Q=gq@)RH9KEWS}e<}8($j{?t(`fvCkUt^!<KK(jJw~2lCO#Q`1vtyWzlVQilx&(R
zMs|QD-E3kRIC2e0YDP0l(`Kd}1k{g{r*NW=+!ifQxdy<E1bGU#vXQ$w%2SS@e+2-C
zM&xfaK+SK)<j16@H@DP^ZIUccsX}0F5-uSZ(6V79%tornjWD@@-HtvVfFsl`E0_Z0
zj{Pkd@)k^FXQcY5xn+*c6hT@8(%!(m0oW?Ie<u6|@--TK1^Y#K6%w}&IS3hw-wv!R
z@a)~lE##L%e}H{1esAo<vA>OU!M-?7HZ4RSNr!V<k5tfEm5*FyQjIM+1$C$6U#H$Y
z`a|G1hTb6bh42yiPx1eZ&NFw@eDr2?JXin7lhotLj%<ux1AirC4tgT;2>Nb#Nyzfx
zy(ai1%chOUVDdO)M4rI@t{5#X87;>#{)RIG5+z8=z{z_&k&EbrS;$bxb4`g{3#k+6
zCOYFwNca+e68k>#k77T~&J{ZQ$i&4YvO0}zO`I4dtZPJ?*}2SkBOepr3%LM)iU|^d
zmbtcHNFGaaKE`JRdmw)SB#LCwqrD&zOVVld5Oe`HhxnlsV}cmj7W;e1Z;0dK5P6S1
z&;*?46ca<dHH~2vDDorhame~b2d#ACw*J;vYs+Yxi4#<09!OkPBE87mNrmBLOd#V2
z_5nv^WInn-{xJHQ$2*bd=!STtFL}b>IZ-x!g}*P%Z0I$?5o7!&L-P@O81~8F^`kq*
zL3JN}Fw*u+H3nH0MVr>39L30E(@2?Ou_Udf*mjD{CTS-24Unmcew=O)N8})iEF-TU
zBr=K5f|QycWXW{7Os1ON#?snsav|Xq{h;ne<;W^%!}Mnoh;7MyNJZwYk@KJ_&K*-g
z49EVA1Xi~rJ0e*vdfB|<X8tdwTG$p6rm<9-OPfSnufZq)eH(2N2g?nZT!BFa`XUJb
z|FYu$$QK7=h-F%%QE~Fo&D;caiIWq=O;i9nr{v%<@p3{pZgxDeuSZskmyfEOLM$!I
z9w1^n5g*3NN5vh^DWqBu!r3Ju7d1ClPPmNhpCBKVNj#DuCp<)-K^jkbgKMCFkNqZb
z_mH#Dxex_s<F_GeQ^<<sXg=Bx|L4SokQWVJJ>+}{WnsULZX&Lc{y^{+&s{<-<IM>y
zj0Rr-Ow9<jv~0AGPx}eKL)trpr_rthzzm`6N$gcA(;a(=$jA@1EH@;mne8og!xA_}
z21gUky+QCVkl;WI9sxP#*1=XJSJ~h__&LP{uLp&zV{i<l?jqZOt`@Xs=9$37Gk7iT
z#e_8?d?z{sHi4ZT{Ft!k=sU18MGxjEJDP-k3%wI^CvoCV{4?U3ARD4{{S2;#9)&(0
zy%acy(K%lP??Q4b5<Hs5a()ON?4}ySIN2o_QRV~Mgub~>OShos(gsd8!5peb+X2ey
zF`+$j3dx7C??Go~8q8_==rxMnLas!AM7}q&17t14)y2*sAG{2lZ32({$;jqNE_=Zi
zSJfD9nVFQpAsoyho*;^f;WPrd5isV;VuvNXCqU7eQwLiB<-LXA>lFDJ(jxviItOUN
zZsaE7)r;YlB_Uj0k9NVrA?;{RbTstIFq82i_ScEz%oIEk*@Frru|Ghc0TXpcgr&MU
z;SL2E|3}qp5tgi4s|XNl5gMg=OD_05N{Rv99D7+JS729{bg<NFxfIJrB3hBleTd_j
zX@Tw!G;??{8WNVF&+jN7tpnmBLAs!WWqMiy6O`aqFm8x@Ch|LgHv_kiG(X^aAX)!+
z^u;>TmY`3ha2C=QBPTFn4*s6Jx6tb&{lK}0<Zd^3HSsHuj2?!xVo_@()Wn?*oH!sO
zFf-2yR%zEDq&N`OC00v**%1`}6e5k1<fDNAWRjsSiL_j_RpT|1pA2^8OXNu8rUb|9
zB+5))Gf9{1N)_z9)}g$P{uuc|qI~O9?7#CO3>PM)E&4R_?hwcPRr#`uoED5emlt*-
zh`)({4*ImtavD2JVZxWjrC-^>Yat%uk{!h;?ueohydURooMQ<dg<OTsHAo5P-I8C?
zbJ0VQJ<&6f$0)|Fl)|Jrje)8(=q%sjpjKK?ocliIIV9I%-$MKW^d#&{$vcgm9j{F5
zBH!YcNG*(p#CgCy2;NOgnR_Ydw^&qC9+S*GLg6XUEiTnc0kSVrv~VAOdLWHMSBXPs
zY$yZ3Ig7p?a_nqn8h$1x3U>_rn3*Fk?J{@=z!5z$1AOKaYGDjlj$3aNv9qh3#&NC8
zr#ajKD$T{dfc_?sImswj<K<g>(Oc8!TtyXbgl^H7N(6pS^v)o*r@}oLbwh6q0~LQ2
z;(kZ}4e5iwCw|ebmywKr<(G%5G1fBIkgaCMS{gOti1cHE6h1Rn_WJ=@Azserv=AN|
zC;MGPzMUXvA44+X@>BC;Evh-2NpJZ1SUFqGh+|>z3W4hCI7{71*?kT6@MqDoU%JsA
zez~LU$LT6OK90crILnfjtCM9vPSxS>C(GH<$UrKUA(FX^AG;!a36Zx!n1`H%+$A#i
z##?HYtpp}VOn7xNIYPqcBv7&--tq;F$V#x>K%^%U$E|?hYT`!0`A3jfkf-iQwCwer
z2YcqI;T@1agWv*bPMYCv0x2tr73m$~)3qd41^0uZW+q$mT`q_=r>9t2m&)!y(jio)
zd*R~*u|p}WFlMU-DVB8KcVa-JZaS!p-0(wm4%fQy9`I1JI?+kB1?G-Umc1?OP-g})
zQ_1;(oMd!%OL%#D<2FSOkjr5n&IK)-)yi<rlz!@_&aAv<bL56|p7Z0N2>$}fI16u{
zBKvJXwvCaqa|=3KGL0?s;<d#Gd1N*(R`X|$CY}-c$084-|AM_*g62PqM{n!nw8dP>
z!@`MI_jaTIco}JNDv@Cc+Tt6?JLqabH_MzF{@qC|6RRyALc)M}%|8wO7=YE0JnZ#X
zS9iCxbg53Fdab)9%eRnEuLJQBIozIy@n~ytW%L=4QL}njs<-iHaEARs?QY0@gas1D
z<HE&b2x~}*O4#csYyQ2_<IyE_9vAwnk9sgg{D)wj(bH15zJC>-2Y!pX7JxtC(wu~M
zsK-zV`;NwP(hakUx)*!0E6z9?)zh-PyFd4~VfRQ3Cy{4%i@8RJ%|d>U96;jR*m;7r
zm}R-Jjo7PG$c+3FKhH(|8(;^{|2Fyym`y~V>VSXEGQE_49;z4NT(-iNiu#$oSVx`+
zpt_?M$BVxmVoksqNj^__{Uecmgu9@ZWqP!KIn=t)SU^6{GZ!-j2^*NA`IkUG!2UV*
zZG;~{DwN%f9lHMg$m4;&e^cx~qpOErxA>N2HEr=oA`T>Li#Zd8X~I+RI&&QV8>r_<
z_=5JG!Yta;0AMcvVYg^NN&GKjH2>$=#g1%6TnQvcYZ&it`E$n+7D3!}<U{29*m-Yf
zaRG71kt{HUO-FJw8#W&N-pG~60`O-Qs>a@yNtOJ+CAbNl_&dp~2p&q%XwjKjy)BJu
zWdYj^%R|a?b_?4<dNBG3>|9*KSikaD^Ltw|ZM<5ucncUoV2PeMi0+Ae7d?&0SbfKB
z0>J>%DeU#}chci{;=Gs_kHo|I#Y}9&t{@Mh{{t?fM|a_Wg#B~$0CXOe`gcRVlcf2_
zOPt~RSZ3O45yVU|Y%a7eBh7$pN8Uz?Vetk@<tSdfGyYq0$|L>jC4DUuZ2o;|)(aRk
zfvzWv8lumIK@AvqqcbHAThUQlyzLb*d0<hE{a7tXO4e>(HcIjbiP}Kk7?B4hYXeIe
zB)JOuFX(fVw1H_RNnT6b1?>CrA3~Nxeu=EzMY}lwc>@3Y$b!z=%}DY(q-Zynq1OWE
zIeIcUYr(09{Q&mn=>HI}yY#bU*ar51fj6F^M0^fXQzG6?(FVSQloQDSp(98=@ptH?
z4g3mw<s|K9dGz%#X+}P86y4l`?1eoY`x@duMSqP-dBm4N@`8;V-dP*SGj`r%FM7N9
zXN&5Q%AL(X14&Q2Y6E$x)l#d&L>l&yAP&q%(hopyD{vd2^ELJ@MDkAx%FUs94%wIT
zu^<eD&SK(ELqmO(YMIl0pg(D6QF&oOUP<*Z!pjo&jPS2XtBd_Hux8TEQ*HtFI{5ow
zkATJ`;xozbf<KXREwFDPUcETLvc&dFtakX2(H_`8N;}L%BXC}{b~pt2dxCbDb)rE3
z1TBsmhQLQD+Tqubbz`){J%}$6ryXWFEATXVpJP7}t;PLD-WGHw=7FKaEk{qr9*v$2
z9tT|D2QgY)Tk<=hbEZ`b2QqInrD$<n!~*XSd;;gLj#}LB==<WexQ~#WRSzd18Rl^e
z_rRfm@Dg$0UdSy#FC?B7+Q1_;M&xk-#&I?XTt*z{lfbD+PFI0FiDNM>@NICIodw>-
zenXH*W8QXn7M64vFeeTiL-RN@2i_vGI`-@+Esm#wfnRsj4wplJ5U(8$N5&9e0{fe>
zFd)8JoEFC!J8%T?Tu=go=}s;*f#s<D7(Ik;X$k{g1&Z?^KNfu*{-NB54dQ^}4V$?3
z*hRN<X$)+S{~7W1kW+-Ddsh+9Q<%e}z<VEDrp<w);Q0+YPaY0eC9f~M`l0ij>#&04
zsvgJkVIWt^xL=W<q6Z?M(6bi=9zEeMdSVc}V>3l~Q$Fw;NU)CrJ#h9%H<8R)J&^18
z;cm!UG@5&vz>*YQOWasGWj^{H;yG;x_CjBU>`rH_NA95W7J$Pe(YU+BcY_?0;lOq@
zW-|5<A;nx#EgUR%M=vsdCDIqqW|&yX7z{EGmjb!YIJ)g186hCtB96y&fr0p$Zw2PV
zh*@ahEp*Xccd5J<|9iCTeI#!t9&UsGB=#xj(X@oSwZInmPa~(kP>n<BiNgm(5cKVU
z9HlutJqc_}^Sh%DfPNyyeq|qI0P+H`aAYUCiD&nLqw()2jzcMq*-_vs{I~G)WG{{<
z#(~oy*9kot*%5saabgS|q8@A6fd{=+<1ofVOYp~_@1U`*!5K__=KO)Yz8A+5YoJ8;
zUBEvI>4xO>pSb?myC63qt6{&3JPV&~)Tv0ly2v@SN5Xy)J}g)Sb^({gpuluG=N$4)
zI^#338aj-gs7=x%f|!EEiSs}z$-~eeW3NEbog`JnzLTUR^dyRXjqC;ZRpclde~fq@
zPabCW99We+ZZ!hA^v9XVTTDK;WP#(@vxUeuG`2JHM;aL=NDPO>IY76Qycy6Sl3NhW
z9jiEb#l>TvfSt!ffd$ybXs=BC+t~8~>4fw~=b2|<B>KC^9kf^slX&D=<QEh@g5*Wm
zxHI(35#ke&#{@Y8Bt}4@B+Oai4veRJO*kJ@Oq>dsKp(z~Jb^BHXaz~!TfIDU7N0@?
zmr^Yp2?;U4ndS!`q1aCpsY&;)M3?FQQ?yK+hQ~niYe<R#|GzBy*L<~blqIWK9?vnt
z?#Ih{TqwelV&uFtOhKNa??S)TQO@H+!jxLh<3bU}bC*2z&}io9gA?RbHGeeI{3<bg
zN*`kh@q9g2PUYAND~!W(XpH4hhwrcik;l?d9uFSEnkLA3Vk&wfUQT^XB&&USt<al-
ztKokF7H8<N-$AI0zgG(NF1~G9-X~AoYm6qmC1G#lUX1KYICphn>#-j}=ax5>MUb$!
zfWL$MI!ey_fV?x5y@%eNGIThvEXsM`1JAu?-k0d5q9J^3EO(Y(WU86tnEK~&tqGF=
zWF0*066PT!oD{7-h1<Ym?{Svt=Dad^6GRVWjJG6Uy*S>o$x|ck8^URJs+#qV<zAb-
zhNwH}O)-(%L|Jj;!a{6nRp^-rcfrrfaNfJf0m%Bu1H`F&-?cO~=UD|yTEUyxEN<lu
z#7lNwZR9h|U*HY`EfI7Dy8-(MP{q9<xA&>vB8Q;+BCn!zP=>Wdo~8OO@;GCJaZ8^1
z3vw8(xdToWSg83ESWd_jw~!H{dd7P!_~%Uqg?D`!3GZ>Yv{&@WqxURJ`Yop$Z<2Wi
z2BIDPLH-IU_Tt?Cl2i*OS~6T%o~6fa7T-?ab(B+AQuDuC!)8g3ot><W;f2eXqe<Fh
z{>g=yQYqRP-Ybn6kfe<<8Ksz|DcWN<<nm<gaV>)svm{Y_d<&UD9N+efSx6i&dd3VR
zPCaC28T4@o>Y1+E81<2zDeo}?7XomO0@(obuNXXTN5pvwuS5S5VBYkMnMNG{o_0)I
z0LKD+t&=upFp@V&ANR&?6gcQM{Jg^&Qw|(A<a-qU5S)R;iTLi=gNYy5ReL;E%)2LX
zg?@aHiXc3WAhIcyj}zIFjOyqok&Vzrlcu9H(;rhC=}LS6NW2p|CIbI9;;te0z&Hi_
zCgg46c=>+JdGxwe<{i?Qkj~m;{<Yf}Ud?^nfx5hgW}IS~X4?^`jd_Z`JF%#x^vIau
z=o8SV5ML9VvEXb+ua3Pm_;t`dJ8NUs;}<;_L;dB%c_N#FKLnXU9RDmx%yrsz9-VIs
zjk%2854}ElnRoSjv2-4>^Imcev$jy)2MORP4P_phlYm?uujO!88JZcR<#4hI%}vk(
z7V{E0i#Gvv(YewFn9z5k^OQUEF8-&;E#$j`w;Q_|`5tm8aw2%!kuBo2fNI3Oh5Z9@
zT}tPYa2RJpjFBWWwG5q{pykv-&L?OsvI|Mf)I)nwgy;RCT&HqMLLw?o%Q=g_82c#5
zB!lw>@?sRlu>S;}A97EO7Qos;=u`5xffGX<(~r=v!DYIk7EWbM1bj$D5Z&Mp@-&(^
zh=}DxvI-H}7+C>-6=Z)9f<c;t-V283@NWZ&`D18z7_r<M>P6)hNalq(g1IMlPFJDj
zh-XzZbTo{aD})XLM?tpZdF6D=+&%%d0liJ1GusMn+mU7<Es)`g7+RiWmcc^5qB$b@
z4`c?QbXPz{;u46Ph}}-}MYr7}z5?<Ac2*8Tt0UD%(|Mv6&<m9Xf}B#oCd2J@bOT&d
zf$4@kjP4(;<%Hueq-~rkLt}~Ch`mL$7VtK4QS{X#<XvD-VXsSGFmc_HrO{cD30Q#6
z8biSM;ILv7@D+CHv1**j<ggaat401Jc$CMVfGmytkh*uVpCq0KN})d>JCXMTej|1(
zct+w%U}q92>gO~iE*kq!=)A}l@H>9~>6Cy7^y_p-W%K}ea|09Ff_>vH=IS#UdZCcz
zx)6F6@LYo3NfzUr7svzhuwRCx*e3@ml7+sM_;}*@YD55!fkHn<--+x?{zhagI)k}I
z=+k6KA}`RGOo2ySV{}pP82Q{t1WXi@!&#8<XNS+DWz6k!MiW$$qzfeRy^Mfn=)9L7
zkV(=Fim*oII4-ydhQ7(35eL=V<Z~|&dI#{E#I+_)^vpl#fdTl-fXhWL^eKDn8F7or
z8-e~Q{)J-v&xQok-<(4vy-9F;<Wh=r=NfvQU=|`nN7HR<D1Ife=pZ@=sfyU`#OD)N
zh<pRRC-Fm(=aD<GHz98V`fc#UaC)7hiP%}E$`O*ed=4wfYGDQ>dSm1w8p;htfY_%s
z>4_K^Pe88^2<ui3FxHX-cH<XkiX1>kU>Bz`##&A&akMZe3i%d26G47yNbq<iv@N8?
z>8K|8Ov6LFLP{K;Lar7!k?*rec9o=)1fK#-9A`Iiva}e=&0x++WIxD+VlVEA|CXF)
zk;I=`Vh$%}4&EFdr-W{zSP5Ffy0YVh>dQVfLyi+_z-sKtkQ66@{}u@yM=hLVS<*d;
zZ~Duuz=|JmmN&&~$HyaE#%Ra;VlRl-k`}Ns^I5ERyc#PvXV5R;cS+EWcgH>ryI47!
z6r~*(D?UHspM#!+UoH61GN(_{F;q@N$GKq3H50U?caWDUBi5gykYb@N8yLP!BKvpL
zj`L(*&H!!^GK#!7>=J&lu+}q9JI+@%<YnaX)ed<zcxwJ!R$jShIxdsZ6;BWzzAAKl
zH<2~aKSkb;(vs@nXD}tzMc;rdNgQ9vOH$BNK;}X(--ywYyot+0-;2Hq|IhdtoJpS`
z`SMeeuv@^Dh~rBl$DfKR$2`j<-)qp6aB?flF9=97%NI$B=v*O=t63lMe*6c(w!zYl
zergXKudO<0`BAnU88+a!?#sW#X-Q@2|L!0%<KaCI8oDFX^2Cs&Zu;2L$hRL3v7@+H
z9sfZj0{ANllK@Z)KDK0aVr}O5c|4tg@<O%&;tUxd)3-MCQBa9NPL0-%|Bh@00{_^9
zJOsO)lCwb`gg%{o(I`$(^z|o}dr-Kj@`UOriRz*GmPRh|j-vA|)9WYk6(e~iNEWyZ
zC*u+b&p|4K+y#Sq$Xo0Sb@c*nMUsLE=Nm@HuOn~LTi0=a5G@KVuw<4UMkC{~Kq-k?
zn#}jel5#UYwd8xL_LY{3#+*92FaN>+CvK|!yzv}w_V>78TwdLfk!$?J*wv>T!%nh`
zLyljN5dY)*muKqb4z-wiRIhZ;#6LJDjQrX8Qj_?bWR#@d;vYnBXnxaB^m_vVx#cXL
z<&Ci^gVXx<f1~fPp`+6VG#TA@h-?jS(7eg0mJOOqgVTog?=!OR;I!dI#?&+^Y;f9u
zq5b<glH`aeM<ffpV}=YFpD?_cI=rN(uj<pp<e?rY=~-31U(&N|orqyW`}P?PLX*)=
z1h&*5Va({^V@4NYV5U}!CTe9bPd~Lu6H|p+dO}1L38KV5^Jrp?YLc3!+M1Y}suvrZ
zymR01^7JzJ6;}~S*1sm9t7=nI4RwaMXDl;WgJje<eujZzL6=A-J3r%K7e2?&NfZ_q
zx(KTtS8Nk`;)g+0A0JOYSAq(SYGWVI#SPYa5Y1l$u(o@4csPsC6Br-jOVC4Hi+=9+
zK8Wej>LSIhj_Oy+)7>V1GJ=0fN1D%gxSzlec#$ls7XRh(!C7CAKZlL~q^WH3Bz_`o
zA-^e+#*coniy4P6|2`wgS?|NJsKwTRh#hR%C-?=Nvn34D;Guxv;GW~<AK#7pC%rT+
z*aDNjMEt_Gqn<$;__je>(o>R>v5zFa_-_uMo%P*}!3k{H1h`P6;xK++_=G_^7SFj^
z0yvQ3#eXIE?5sDZ|1Ol^$FT^gM)Hc42Fb66QQ8r}peFa4Q*IAFJL`SS{FWWtujDo-
z_tX#kFWD$1@_o6R<PPfR)VH6LJjh8;V1M2vxjZ@T*~W2?IQ;#hW(L!mZ=GU~k?X7%
zkh_jejD>D$SPN5B$#6KJIsV>KUoYc1&9v}BKec*GQ#sQ+ugz5p%Wy1iPE->x82(Tj
zm-Vb*TDEwsY7_d`YA>PxRHl}?uBEA(`f*v$)+K#7y6xauo`#2%QjeGQtWvVHZW?m7
zeP|i=*Rr0~ZKaAzI{wf1tQ{Fw+oyywGGG3c)9tIPWrR4#m)Gr`mNoyU_zJqcp!(<k
z6kk!dXXm`cXI%3qW(Ur|mGnUIBf-T)MCWi8v)j4VU3<7z+bVn}N%q6e(dzg+q^>FF
z8L1Ah>*=Dd_V;xEq^hT^-VZdr_CHM-s@BHjtJ-UNwsaMH%0*otWU69tQAb3XTB!G1
zn_Sdu{+>P{xc`}~X4UcZQM02=K5Fj{ri!ZU>)B29Q%nOz5l<24uI3lj^(n%4P}%G2
zSyBD44Oyt3s$9vlL#|xabFe`e-Bm3FPfn02O8qdzR93w&19hy9XIVA71D~#h2$~f=
zd#iFALatQg1iAud>J?wAWs9Z?qUvA;Le2~VRB8nMNxrx<0{&tOh&B!WZ}x+uzy4qQ
z;r|aD|L^uy@o;dA371bQdb*Tzj7`R;82;k4(Mp_J)WCY4Ul^Assz228tXYSPgx!ZD
zL0m~hxSh?9sm18KrZOBDe&XP0J6Ww(-?LiDs<^i^gSzslMo=|1y1wTe<DmZPd0aMJ
zQuV(;$=I@idPj#cbCd>8#>k(5eGfY)N=FIqZX7njbOLsW$|A-#xX6By&!PtL5<kLL
z^m9m(LF1!T;Zb58@85=dOEwZYS^9YDrvm}z2dP3X1zQUXTi4)Ul=B>99Nb^^Xb5Lq
zQq8|`c60$xoBQgVO9B<5a~5!K>YPPe*29@{4&MEB&cf~vXJMxe0^h+|?;puv=a&TQ
zP9NFtuBok~@uI*6fJ9+YV1N!J>=uB8T_o3K6OH#{^H;An^sLZsS5sL^C0sO_UFi4|
z5VdF%b)<vOX)?Nxi%Ci&4k({e#0}}s#X~LG(6fq-PzOaRfGaK`bh)1hY3J1BLD;G6
zuy2H^U?Ix9g<b3>5$_38VfSNe$R<7uY=1U`EZJ)W$kGtv1=Y4XRdS6|ZS{B~c%-~7
zN&EvH64zp>4)hUnHovJoOwVaUjyrQv(WSN7TGFJU_=xurzE0#f#jfW|+BJiegkQ8r
z_(S#fa1E0ZoiLm8Zv=p86Wwb!NQVwdK9VdGRME{Or8pI6M}cN~fqH_V9@m>f$xeVR
ziU1wE%NLB!P8gwJwD=Q4vXk2hzmfV5mhE+x5SF^?pf^!=a{nie&-z6@BhB7;G^9Z>
zVf`w`&oE0X%qFP|dBpmNJf#oQ9rBoHh+MH}US9P?yaa*nq7KK0BcZi^6gDabr<HaR
zZ*nu#jX;1LaaMp^W9O72cAKL&M4TUhJ&6<DE(pkagP2W7Jwze$w_@L>H>ioo<<B-i
zt`<s*q=B@c7lEP$BCrjG+F%zgAlNasw4V*qYffM-{v<8!hv*yl1W}RSmi%DybfVG~
za$m<U2nv5#5kZ^eO;DH)`UJ-}IztAP9hdaB?+wxA(pCt{#0i2TE{X!}unU4>0E)qH
zr$~eoR*)0H`w$GrCkTrCX!6^$|6el9CRrgsFcbmddKY>D+(8H3i+#IJkeR9-m*l#>
zl%og7K`@dyK~ThnP#_GyAV{o(pdU4?PFO8%P7II1FdC;|C;-|45J6&b7s@0@;THsj
zKSC!c&dMEiP&bY<O()n)CurY#$<c%RAs9oPASmKGf^Snu77Ph?FwFkRAjLU>wQ(Z&
z6oRq%1VI513qT}!#RN5S6YvXy!rwtB=t)exj(H5?J9L80sc*+8wJZNeJF$;vz_1yC
zf}sd(Pl0Icf*`>~T}Y7tC$K;#f@dMv0-qo#@@4WlcfBMiliM7>ASnFGWcI)Kv#<r~
zpygn`OJ~SQ!!G<k4=v{)cou>!i4z1xTs#Hh@C$<C1S1Av_Em$_+6k+z6G4g9npXG(
zL6M(e10aE<mt9D58v+DF5n$E3Q22v%(2FqtS|`|2C)luzyJHZ}hhQ*qf}n^C2S15i
zL6BGn!QRwpt7F+jP%9^fSLniaI0ZxDivS>n#A1SWl0)zdg2ErE6BGs7>!97RZ_o*H
z7PRA%<`~Qlf;ZhwQYdkPAaORS1_e43AOOTEQ*@ypMQS*KHFP341A>0|1VK@#Ciz{-
zDJCeB>yKX$6#gikAO)qGI%YrY8yy7M|E+X}UdbID44=ZVHi3en2&_ec?j#F>#5xFG
zxnhuNIe|5DBKRQ$>*5mxMSgAady-d7P=a6`{DNRVaU&P4GZdH5`a0-v?3?s1<a}?(
zCH?+dEeAn&_HjMp1VIs3mjb=;3xeX>;24AuZ0Lm5*ookL2sXec2-*ZdJpkSyv6!Jo
za$^DnL*b9n35q#DBOP=C_RTuMAf2G8W}1UwMF=(}P7oAv4Z!C=2SH+s1|c<?+~vnZ
z#g$0}HF08?4Z~78L*Z*kc3&rgc9MN`g2LZXCnzRAWpsiwuy4@`av`(hlGZh;>mXPU
zf~6e<ZIX`&Y)pav1c)vaS3Sodq)0g@u%=D~S3s~VKGB6DzX|yR$SdwancNEaMGp#p
ztWMB{nDRPiCiYyL&akb{aLj`i4u(N6tVE#bLJ=51fi#i@L1K#rA&bftoxlQ|2<AYr
z3O+$l<ToRK5P8J}B?wl=Zxakf052vw{@5K-RUI@N`&OM`uukx3%dL(xVH5;?i4z1x
zTnh>eAy*J2wrCJSu(}ggGbe&uAy`f9f5A`yv;<%niNy>xk_{vag2ErK6GUePr)WmA
z8v9o|L6+O>!u4>RpQ8u6LC~ZV6mhM<AAw&GB(`V}Uhc_~p%YkhCx#jfUGWKm0-!Yj
zqev_!XosK+enC+9`7W5_&%pmnV3*XYJxtYI-5ehqr<kf6%xb%qp8kqE7C?L|GCAv-
zC`pw_7{ewc6IE%fzFCk0j9HFj^4OctbhmWnutBn@cLF{A+j;0v)qv`*LyafoKR`*n
zNiXRjb)b`}y3JFEs0Bn%9fDr?j}Y0143f8=*MPiUdY&lzA4`+xB}&u!_Qun5Og@tO
zZX3^XhF{gWZ9M(U-PbV#z<fi;9HW+O>sh5f4^kYRAUdEoo5mBChkAG`!u#mq;~n8u
z372~BH%JfkkPt%p>LKs`8RAFCV?89Ckp6l|B}Yhqfuen9kRA~t&QqQ&wRK{=i807b
zUDCMEc!Ez15#d|en}uNP#X}@pGf2PbxvlUU=~yutYOjygIklIylhr=2aU9+(X_9^?
zl%4J)LQB0VNz(`wlRLttFunbLe3se#zTqe4ouJ><M{YQA;&zH3ah3&5JDYnclk|s9
zQ(%_{dj@tvGlCFeZ7+jmfS>8W;RIVc{(18k#LeGxd|at~gs(HhbrvAv%2kKWzSAHT
zIOVo<x+n9auL_-V+moB2=hhQvcR!L}I3))<CD$VPs#CH|@&_-I=@D|T=(%wa%hWr%
zLs17vi^qJ|PEkjMc$SF{Vo&}|sOV@B+LX@t5Gc{ngiBVDqTkxBmdZ-KzcxreIYBSg
zJ33Nr_B#7*e+83t9k&<)0xkf!dBBLijVkI}SzI^mY#LD4bf~Vxeyp>M)>(dfeQ`T6
zAD<7!8#+Z1*Bs1D?1Cb(f}*7DFi3gD*H00&te6+hr;xg(lM=ob0DOX7Cnfno<fcxf
zAO1Nykr<uG@jge}vD+O3N%Tc;a%Hw|45ZsSF_BmHP3BMYaR_E4OR)mLDZ$P^Jo<}M
zp=_OGII-g(c}FKHz{)}LQ|y8yqf)v;ZoZy79KV;-Kn!6Z-osad`aZ(hg#nR8Qt?Ra
z{gpww>y+HeX)x7eFdY}kdTtoG3-#Q3NQvBIPPwg}avPC*(ka&#PI9(WvLDGO^yHv&
zl5||ZJjLl<TcP1T$IaMT50iA7IL>E2B5opAmoLF#969bN8X~i|@*C$)STTC<iBpfA
zJBc&J*G~Z;?j)Ar5Z%|9O(XdSJ-I&qV>*#|ok)D3epd*Qb2<?b_bxcg$QMM2ExJyE
zb=C<h`Xv$ie;iCM;3UCE089X21&PH(>?EJplPlvtsuSV1$4;2^%Ue&1Ztx-UqfSJ`
zjRR*TenCXsB8eXH1M8v_R+RJo^Kgh<)`<v!@c^tQshEgN@+Cdlg8zt4gmb=~FsbC)
z90!pV5ZR>@5piR{S&Lr~A=W`e%QZ+EF+7`=d_+*>%PwIp1tNQNBEt7J068QU6S+e2
zZk@;l2Jb<gNRm$EOjv<qjx`V>`*b2AE)ARwIuT+85u4N-u)R)T5l%#+Ao86~L;$=6
zz$Pamc5=Vfb8q1PUMG^Q6KVXZmt!t94k8D1A|h@uI9nV<*#87Oy5!0hgS6iXtb-Gg
zM2LK^6A=JI0NCn8M1sgcJ@+C0Lpl-eJne)@H{9wth|GY<VV#JG8zx5mHUh-H5x1#g
zkX+f!1<(mhaU#+UB1d&10$@1#JIE>CH4=?DqURRk=XY!!bFNN0k#|qDaS)jgk@dvc
z#PKTvM*_4HAVGv+2NAxOD&;tVS)GXVhR8;ph{zvBzD8a#5slmpdhWaUck4tt>qM%(
z;UkFHq?^ohHxnp`h`=O(b^|1c5bPk*8?a4IV8KpA+#!;y6A}5z<nMJNVkdWto;w}?
z9-T-R2NCxFk>9#Gy5uQDzS4<^z)k>ts}mvELBtQRtxjMePDFelvRx-4@;j5i--(D!
z?lwL5Bm8>>5$cP(e>*}tT-)Lx;?BPLS|=g`y8?6&zvvNSi^d2)2(rToteq2)iV*om
zCnEB@k$;H1;$0&_WT&1R_XPp_^e*Y96FKzmR0olY5Lu)X5pg{LI-(OHwrGq%B%2si
zA6addVyYl+$)v@YsOJ;q!;K{;9WkpO!zSc0Hs^Z;an~zWLVIs8NMGu_M7dC>Hu=$}
zrTFLrAK~i>l@sI?w@HJ>lA>Ij&ng1G)!WovZ`1L~gB@+EN1K)tD2AJe>jls$fJEmK
zTQuANTc#5cG3}j*)Pu-BbRxp{I{8015wUNw8Ke~ih}Or``ZT@u;|cmtt@oq#tMsC|
z0Dq@9poiXo^(Tw&3xjCDO5#KVL|ktOoC8}lfLN)a7*c+;V2u-)?9_q)TCf_Q=t=?5
z2Y?IY6nCXe?m9hp5B~i+k)Aq{7wdc-JsAa&wK@?I*B6{0!LbP@1Q(4{zVJBL2`tQs
zNDxFm)QJdyegIr{A|gTLBR%&h{{N4$^MH@4dcOGGfJhUg(xfgO1Qbz`rfd-DARr<j
zC82|WC>9WLH`uUK)L_Ga4H2=T*cVa6h6Pb<7!W)5-v8h8-fVKSZ20~8IBqg`&Y5%P
zmUrK_yktT~nUI>to)icfj*ulLgt)ky1kp~2B!u8_o+9LWkF2{VB!-adOo$Rq2LFjC
zBnh^Fb`9xJhV*nBQYbPEX}|DAEY0ZX{SkMA0Y?IyVL;7;ea(gB+K9UmoGbWfYPp6r
zmKjAZ6lWO4IR*Fiu39&^PfXk%aW`8CUpVQ`w2-+aAu&R3vXJS7%(jquK}b#AC%P38
zcdLb*MaVgW3I_J6+BhEELl#EdEwt&dP^%VO8zZ4ru7liWPL#LNrs<@%=W?CfgTHt>
z-4t=l4Vy!IIjUBb_R%(q*^~Pp7vuMeJYA7bTorf}S8E(kljs7WvpK=KgLKJdd?dvq
zyTC@VJ8iW&++SouL{J-o@9|4Wa2^M{z_0^oKZt><KwC2`#(De_+H|B=+o|U|_apxO
z$sgPqMgd>!fjfF<>ULD%G7o$L;GY99PfhZeEQq*EJ!~frdo<X~J!}uKzk1lBIN*f_
z9*(jbtbAjve94xt2lrgRGdNcgr}8Q87<&IZM5-RK!9ACQ>k5zSZmV7|+FRBx=dPwr
z!zg?Q@PFcut9-q&T)<ZucpL!*CS<G$DRfH%A;YVebJv;>#dU<}Z}1XAY#=1}hKRey
z<67kj8IF)MzYB$ckGoC~{0lH8BnfyH0h)|wkYJGs$u%MG{&0_O{@i-z!))SI5yjPk
z$3;RR#Ohv;rw$|TERSo23F%JTLhkR*`7RzR5kXxDvLHxJs084%4Lpzb>fVIv!Gt=G
zHccqC4QE`-gXQ;NKDnh4cdm!+?9GXe%!#=vY#i{3fO%hP0M>*mToQ3}Ja88e+!gS=
z6c}tzuoXRQ672bgty$Z-9k!|FS@k+J$qvr$tE!Z9^N9-%hzh)uh{_OYQ{~5CQ;k7(
zfk$?#l~4P^7i_9=+BBhrzY7o_swv(Va=~6`*v}C9z6qho3E-~s*cT;)zmbfRJOXvW
zQ{asdRf9-EG^DK+4Oy;7cCsgAGeX9j5aDkEpX~{Wf*ohrW(fV*gp3b_u>Oaiad=RY
z^$3}0LKL_PqJvEc!GVzE;)t8zkq!5Re2<XHCPesqz*qN#6oH*&*dG!4frOCXO|X)5
zJoU{$$Q*=BH6aST1)`cJgxEkxE@V?YvZ0=kHxP2V2@(EQ@U=Z539!=)8$TMQMOKoD
zCS*bV-Qg>;2$^9*6n8H~bxa7cfsmp_yp_|)6f?pT@;gG#Fd=GN1HPUo#4V1*-An`j
zi_#BG$RraIdF8<12(S<#gH4FyZiA?S2_ZJD2to#VT&H+Kwj<<Z6C!*e_-G)6`s>+<
z1mGbCJ^`gWO~_;u(r3rZ;nN}rImLu1ZY@NOO$f1JMXu%D8y?q4Pl(Gd=T0>t!r!kO
z#HN9gP>4gwFauX6;3E?<#e|Id!UYvshL8~^L~$$OX>LM@4TKaTWVpvQ%o9>XMNXql
zr@3qq+zvrYPe>B*NCRI=$=)&{Q%%Te2YeMw)-O1njW!{Q%ZKMM6GCh_G;odbxcZd{
ziMxd;8B3#th+qK(Z9E|fz+()&g!Zi_WSR-Nuy9o%<adPRm=MKX4A0>vgxEkx6t0-Z
z)qfu2pGBi~v_%Lx!Gwt55(wIPLSlfs8+bYGZ<~<QO~|5?UJHa&WN1z_A&R>Up7th$
z*l><q6LCGnMgD9B4JavzTZWKcCPZxuAvoF-;t<l)gsi50n+ch2LOwsBZ7@e_BczWB
zQQQ^qbTA>r214S6DCzBy4fKQ*BBY-Q5y6!Zbo7KI!S*%mqqM(cLS~qdp$o$o)mtKD
zfC*9D)$nu<g~Wqb<+R7=Le}3SJINEW3Lz(%5D{DhL03;m0_;G;K1chzCS;}wsj%|!
zpduX+a-<1S+@d_{e|)IK5gbku$l7^igFGP*Amk_$B7(&TiFra|VA~sZ3+>xY2(KI_
zpm(EZg!eSP5ps-#X#5qp6r%1XL@zU{B1Kn6+|eG{U{A<;gdA%^guf1a4^N0gNC(5d
zPy2f&WR?kO@NK)GA`^$Bq?42&M1eOz)YF6z90(~yNJo!sh$ke8kS-=f_yX{~Jt0Z3
zoeT0#uG%Di9AO-;vgdI7ao(Wt7Af_lL0=%}kxo_x(?Ke@#kzp^a~SSIaB~eemv-G^
zm4z#u7jfMTcQLqmgE&kGfb@L=i_VX@;|+KP!1<O?*nK7h*Qj8_FkU8%8hjD(xWUgU
zIPz4xpL26~2Z)8-KuErY%q<BiJU8MRTgWYhTx22hN<wl8X=WiS3Aw~V4h=$r+d4-`
zQ$lo-?p%i#A(#WH&F&4(ku7M`uB*29b9qM08@8gEybHvz6KF3_-KtW8Gj0An`}oA;
zV*K8c+qzc7F=p9{tIv^bSj2AY2zEJG?u>|Q>5<L1`O{ql9OGM?5D_$hV7Li6f%XL0
z!wfr%_FDZ)ZtHq;3U5mrlO<biBRC9?1Y3Mtmz*APZ9H(-IU%?O#4##xga_^gcvJvJ
zlthC)+`}H{VS9i*$iwyqJI2E%;()UaJP&2dtbAu#`Cgv=aBw%Zp1Y|-h*S9#*M;8a
zLZrJXVuQP>!qX$}V2|q_t6neK7jidMgEkGL@Lj==10QFd1-EtjI8Ai}UqZl*CS<k=
zd2`^3KuB#qFjvcjDDHTOCYTUn!~08w)bzMkdqUPDgpYs)LcqseHwY#{kP?y$xVC|p
zkYKS1IopIBfB%NywyyTUyfegvC@u=m6ca*ha9fweQODz2X+pZwzKq+t`l;JG5j28e
znhDW_icXEVdInxjdmV2=^=Cpgq)ih_Z6g_13K;B0&A}E<j<^OM_IPhj^kz;(QrI})
zK7d(80az0%2H1JvZXUQl;BqN2*uG$AdDvXAS%&RYmuF7S=iUKMw$B6)?vx|H4__PN
zPnhjTTyR^bz-@U<B#4+G!4tHtc`RaF%6nv!t$aEJP2qsLKW&;&!oLIlZ15UJ9h4nH
zDj2pB?O&M?^d{7N-n9L6UE#J4C6x%&bwz<MLBxI(2q8GUt{|kMM>gIQ(i<UFOo;F=
zgJ<I`7NWlmQ`xY+Y5&fIZ~;LGm(~1hDh9`~^$0n@geY)3MC{dp5M6ClkzB~CdSv?A
zw=Dds$Si~$XhMX44?Gv8Vj&5z)eKvM_OB&`{Ep#G@ZfHJ{JTKN9EAK}LKOHaL>HP6
zVgn%#vhO{z37(Ka2>Ho`2>%-RgeN2l_D93UPbJ`cE6H3FQa>5KOLQ|rela16`v9T^
zCWP2PNMa&yD={*~Or2L!kvRzY&4j4!L+}@SLJB8xTlcGh8xgR_gz%7npVGDo4j&5<
z@`njg-0KisYC?z&D}s>UJ+6tKkl_gV%Y+F32Ka@65bCel8U_5PfhQ60g9(8nL9_d-
zR@-1`wj<;p6QZ~s5M5zHhz%>kcKEl)HI0VU97!PLKNBMS$8iX*3Y3IG3IYFX;I;&O
zV?r)4A-z|;9SB*5kewz(ac{xHNiLYI#0ElQa20u6lRP202>FDz;I>W#TOnBF3CRV#
z%fR>4ac-9hnQuZmTokNd_XWqoPfdv89)xF!2_ZHd8n||QT&I-?iMxs@`J6^=BqDeS
zg6ljXQNW)WcpdGZn2-xi$P+Uz3+Bl02>H^4DDDw>ZZILl211hKdB2FqHOjxO%SOo8
zCPW15AXw%JDFXbJfj7{;+k`NM67ajVeS%jqT}6iG8xx|q$KbiiglM^IA4$NqM_f3v
z6*Rh}B(63>zB3_eTMxl4o{&Po-<puuX#e#5aF0)zkSjKauOHS%$eSiaaZkXr+=LJt
z2#Load&46e;|Yl(WUC1g!IKaadO~u+zGc|=X#dQF<eQK?tNt8J)|Lp_W<nJAG(0Qe
zVaVf-;Bb;a_O?eh))UeaA@7<H5j+FI9T220Hc_ze81_@zKQ|!@Ovv5e9UaV(jtF_r
zgeWel+q%0#C63^5l5j5C?vdqsLfRqZ0}~>G4G3A~2`K{mzF~i${R<OvkqP<W*#Clx
z^hU@>5~A@};6{j6n-GG-Ndno29$B6zq$5H;HX*`44}Og&q!8>5!~R42mnP(56SC>w
zuLB`dhofYJlpsWbn;=?iLI@6o#2`z0WaB&`F@$V1A;P~1{sB)&F4*U2cLhi0^DfO7
z_>ob|Ud5frWp*c$y0v=<n15@hqlj+pl7o0>h~d_PyWDWP1RaW%g^PiE(QuD|yE6UO
zt}I~UB;HP9z$XA+Z3&fZp9#UeRB*sVZtXT3JPG_-gP&6{f1=&m750y~S1e=`A&V_!
zZb?Y2A8#eGkXH#=Y9aGVLL4ElS;$+2TyG(V1|h+%U149|LqbS7YCgI?FK96jQWvA`
zoF!kUO*^mJe$3@PB;L>^PT)NxhP{mT{TO!L+FfFEXv3CC$Hn-)H@9{V5?2Kt#Wfkn
z3QgVG5$tj>J!$cPM|Ppjq3$$w<ks$C6C#495F|}Va1samkYSh7UcX<-t=%9_;_GPB
zNnCBGab|oDZ1JsKZVvAm@xU?fG(Ct4Jm!J>0e(IJhqrbP?4uqw$HNW<`?!bg4|bD>
zjm815H}G<l-DKsv)XMkVhtCCfRCBnadXhMmPjNZ)eltY6wIepTql(3N&xpr$uT@XC
zc3*Hu^)zi7M&Y}Ie;IsSyJK&x5b#q5UPZvoCgd^`((K}g0wLQG@~jC_Tn~s|H6g@?
z_n6&y&xpsh#uHMTKU%)Rgn*B`6Cv0FK}v|80V_1{Iua}~Aq!2&v+dprZtdo9Yj?W|
zQCxF)-Y_A=2Df&($MGf+kLz|5(w+8I+}holy0sHQ3kcpaA(~K0z;_sU1MN|7LJeg?
z-A$V&l-foyu5W|o_uv(~SXbT@;$gdcb7DAi;+_;%pJ*Nc_#FecgrKMkZwm3iCwSmd
zfbUI#!43qw-NWjM%+-dyya6v^(&LJ7UT)P}bnTE}OW)2ru<s)-xV2MYHG2O(MA}aY
z4zDc;S>utNZ{^dCjAJ?8Pn#x`@CSnb5Pa%tTL^ZoVb7;sPnAi?6(;1H{817TcbieN
zm_Qn{6<7tL9VUd}@Y({|B9CmYC**goElW*^@KwPVc|vl*E-~!swEt%pxhn%9Tw0HN
z=8B*s>k)Fj2~l80h<2F}f&(EAvg<suIi8U35mI16gs%jCw<jbD_6Ea_qWxb9A-~=j
zp5Vd#_ED2S$SQ=~XhIaYA4H#-5Ml!%g`FdAnMZb>C*%u++-yRGuK@lFPe>B%O@@uP
zA>dCd$<-$0*cr8glh71|+-gD;_eUPL=q7~NaA+XA#mE#h+Y_=GA-9>3AP@LGo{-$m
z+}bTSa7Vy@nUHHt$c(=VgNn>SNWKYC+}{v=YeI+(gd{uhZV!*^Y){A=2)W3F2>%cG
z?*k#!U$=HefEO6JhZ2~OYfZ=}&%GQ7S%{EJOo-zCgXl*SLTo6cBk%t3xX$r}Y)8mt
zCPa7_XKa2Bl$a2}ml}AGl$wx5Cgh2qt`CGPL&)VOL~&X0{Axmo4TL1%TIg||>j^1B
z$d$AOw{{{Z2f^>2kV3#$82D8-njcNbViWS`#(x4KUvM<M+Jq=>XCCv(gb*9f5xB1M
zxK1w-!Yg(txt2z4BqG=a!QY;cT)@{DcsuPsnUEzW<euKghPQSIIdf0Ak0@?8JpY;y
zVgn(1y=fLPI_M~7x_@i82qCj+ln^z33W1J6sU0l}_$&kOru}CVvebn9SLyknA{7~$
zb4-ZhK8Gg@9(WzG;T$=ZcY}zF`IoJr86_og%Mfy&2~pb@5R~_X6ahZhgnUo?FDB$V
z6H?)i9|Iw^5i-|=DDEqGDwq&r10j_Xkj){;70kbuqxU?1q{G?UIVl#{NilWw=G*8>
z9lbe=1V``KF}zE}(=*fSOCf!kPn(J=4PT?7ve%bF(qCZMzjYk=)jD&%btZRe_<{45
zbS6%qE)xX0Zy>5_ogp@yXOLZ3Lgvo!gsehHo(U2DTkzF9A-Q04_wWWD^?nDvztMW1
zx`8OG_m19=x1<$WNWWPJZm<q~-8oz+9qGV0>wx0EN5DbW0b;}X8AruLk8GCLg9qrr
z1nYqaet<xq$19$nU?&^4I_<xkkOC7@sq%fnwCIhHNhU;bKfzPOgv1FBCuoO=o9dCB
z=?PhnkSQia1V2Mi%M(%r_H@HWY5&87EHfdOycaH%;Ru;#LKOFF9^-$gDIquzl7MW6
zM|PGcB#DseCPW0kA*8M+q!8>Gv};IrGo&}$kU|mMqU}ATy}}u-SJ-D7@GF3~8ZdQl
zw>L1v6&}g=fq-)b8?NK6n1hVsdnj%*igOAU&$j!!qGl0yvW5Ig$O;RYTM|-8$Pf$p
zn~>WrWL`-~Zj*>R#X_<U;;mN}a%d0|+}}kB8A^z@y(v*%5o16oQ}Bdz5eLdsY10B#
zTV-n4)T?2%9&aHr>=N25P(0n=-DrdP`D>3H7vuLS+~18Lj!hw3ajnL264qCa=njQo
zmxC47;XNcCS?U8V-D&I1f$}sHB7(ypXlX)%<2cxnhFwm3lYS-lcf&c3kETt>akY)+
zRD2lN;`=+j(P)$h?%^G&bE&{s4}22fHUT)izbibHw~Tn$6Fuw%usI%f5ZJ>#Y%&fw
zX5f2Jc8itoCM(~GtHRr=j{HfP6Npp!6xVAUZx?|`)gv~zttzU`TSh#t`)nyNX<ct_
zt4^d%!zg@j@a@4D-`~X$(!;<H6L6~ux!Hs~c}4ir-7<vqG9ilV3(?UggxK&#6Cpi4
zuC<<!ISA=vLcqseKL|QNplzx*RtUJafs-UyYC>)?A#JXV9M90Y?cCq>Ga-s=15Za2
zLTqq<r#G_o^|<aZA>C<T&;8v1ZRjeA2--r>83GB>gvtfn-@vcZ-rSo|qnJ=9(WVKd
zwlR!rSFrpZ+}{;d=Pe>0wx>5IrZ6XtOkv}I2LnFdz%3!j1>DX9_wvBg0UwnDgB=1k
z=3x_H+Z*;>+Lv38Z>4&vPr_CtM*{nup^P6xuqvS7P2;%nhfEb%PK-V_n+i@Gz}rJS
z;t}i-TIaXfuw-{?-L=q?=tGY&+FXRg34)IWr-`c^V-ehg9GbXb<8FlrJV25}9V}5J
z_Ny)ch<7Vf<cJdZ!~J+G32}&sDXuGV74T~aRK=cv_T%w(a1AH%wP1q}=-80ovl1>T
zW&0OqSMI3s*RCgJx0RI2b!2jMfjm{2m&P$UA+*Z4SXJI<Vj_hdSyByG=;$?H@pv5V
zsb1$It}l4&wL0w*aCFs?1!qj(4%NDUs9q)SWy$Wa>hP<JVujo>b^}Atf^VF94ABSU
zKOSE-9>r5HKfIMjylaGC9gOjF78eM`=;tW@1NpIn0G0a{3_QPT^W$QWH}S}yfShL6
z*dwnGxvD_VTr>P|2IBJy9z4Hl!?t&#ck(O>@cN;hRP9KDFB<#ft}~h%?HBMp4J|O&
zqTrwNtJaD)r%4j^{Uqn{)UCRD7c`o#`hF|gTy<tqE8?U^6vs~sXP@eM5`YdocEkqp
z^I(kfxVz!#b1Cnsso$yNDt^vplBu-iF@5b+B<2!%pTy#>r3v^3;zJlH30R43W`?z<
zjrzvaR-HB_R1NutO?NUz4Vg>4{kBws^2G^u?uxm?;Lv!jha*Yfg=rghhHrmjzP6_~
z2Nw*USG8`=YIs^+%nQtqcI=F3mle}&&2ss>Vy=zx==+#9!Xq6=7>|58&{QwD{DP`=
zXVCZHW2fq4<dc{?oUlXaMi=@rnSD%9$rwr(G?MDVz3Kw>j`FkWl;ByGgDj=KVQCX7
z)%|QsDc>=qtWj|0{Hk?3p*%>L6)5K23A<wM5QFRck6r?<4dY;g2j7VVyjDS@3#-;W
zrj+83W3Gll-yq8?K&7&}LFMZRw06OaIN#b<I&*m!*%5QK46d&YdJTAG;57{{UoYTw
z@M-c4<Z+OUpS{UbT);-~9ST-O>k4*xepo!s;o5un){g2_Rt<A!h7%ik?o`N9$H7#E
z*I0#BU0IYT#Oj*R4*|W7P%YXzCRDyI2(61xLWl4;6!V3y=2Q7z-S=}lKZ?2fdkPM#
zSgmn~dJv_=ou4Ldj0yC_xfp^P?kN~psaoR^4Lt0;G;D4LY%bV{bw*2h8=X-nopnaO
zI&`LC!GjB`)@^VfwW!0c+35g(qE~PmkY8?3LAA<A$nqpykS1Y#1_=oy>}L}6WkcJM
z!18kCO@e&&k<g@I#6`>-BzTqE->o3^{)B2X(r`Y^jV)P%y<i!8K?T}s*ib)08*Hu%
zHj1E0m@26`!7v9;x>oYyANAtLhVk)IfarGKo#Fx3dw{$Q31Y-o@#3HG;wNN?&n3Px
z@zv1wBb~b`AGtg_wgq=U!8aH3mdUEL*QA4L-#9&NiqU)ue5YgDRaaw8u&iL2S9Cjf
z#@vD6YSH)i!Ck{R`El$lNtLl0F}2}V%mK!IEIc#d)s75r+%@KhBDS3$2qrG}Hzlsg
zuFD*Dov~pAr|r7oHohz7z6YaSR^QR|0Rvxw_@D6cYw*2IQ3k#_J}sm%JmzBlLb^|1
z7qtp!R?xFbE}!Y}qmk-+m3Ba?p8aB^@->09Rl$vW&!);Q5BSdp*S8++1g?($W^nnM
z18;**nkMl$1uLz|msG{&!1*h94dU+<=w^1J8vM+l#oZq?X!O+Z0u|=x;HGLg<e5jW
zs@uTU_PdGH7ax6sNT$F0%S6i83Xw<PlSqDaP-hSsPZjO}*9et=8fhT}pF#@O{bQu^
zwSlR<kuqc4tc;{7&T4MwhcWjzc(&B+B09JzeVUO!DeZgDz494+`?>!h(|i<JHLKQf
zh&UIeHXnoOSjcqz*E*vwHu{{-FsI#4>x_Iy(3!|F1(#p4*QRoyTY>N*Vo4LL!vAVB
zomnfvw=Rt!SElJ4p@5%&(J=oD<`#w-t-<wV7k-Vh{4b(g*O)%oNVpi~k)=AD6-?!9
zx8nVn`xKx${29Ol4cHwZf5WI$hZ7(0J{uEUhuAs@)|K+af(PeUJtWRSCs=YDp#0o4
zmS+#S6^(UitA~AN8s&e9RPy!3Mu!?RNIb~p!}q00+(X)I!}SC=r$nOd5%2RB97}!>
zNg9xx=j>bq>}&k!%|0KJ3vHHYr`M*%z$LXaXp)rn?eD$<$JEaj--)xs8N1e%978LB
z`Mx|~aq$g-^facxo&+lCnOHt5Tk&`ik4kVq;Cfu)+EnP+IY=IJi52g}+#c{6!PN+S
zm|iv{wjVamt4>N>Rxp4EyF9A%9k^;tb9X3-8`9$_KR3d@HRbZ3f*^Q;uju%ho76W=
ztlEarruaFS#J1t_QWN_CVqaOzHvH|Sog_9ZNPLLPe>3LZP-4;@3`aal;>HAQHDWn?
zLoDB1YypI6RR~K@?{nz=TYJhaRqr35_v`4rk`2Trf=lgErF&o9<<R?Wp@?|24hb6}
zqA8%aO@w^I5g{KH4@9K(&bw&Ua5-<r+`HB*or@d8*9_b{__V`Zins+C=MJxj>^*rZ
zXYf^V@0mCy=!-ZdVDoTS?xR8xDzqJ(D)cBK9-~6d!3@S`sY3cb1YbZ_FitgHBTDpv
z^{6fho6w^cfZjI|@{L4<d>3F6QFeuDxyYL__mS~c2ip|Bmf+$a8ljy1A(V4IcGW(5
zmA@_KJ_e^=Jw>k`r&pRIL$Nc}tBjrE4vkLjFcJ00*DRmQ++lz=m<TyXAws?jF^MR<
zSBJU{Z%{5HtO2$;y=o0^qw&c%5I*^|()6bgN>66IskNL#ubu;_UOh*zo}^bwekyiW
znZ2sxcD@&LFPMn>By53*wqTw&5%P^egnS81B8odDRjgHSHrI9e^y)<;tO>X!gohKe
z37=;3GQ=&+IGYbBHJ2hzyJNKsz60G$CN8c7gHWdg<Z%Uim8hgbjoyyA%>Y%Q&4_rK
z3h5*<5<9!h3e|HPsL(6cp$6n@MWS|KUe?W)wq7}N5h3RSOd^U)#P6jmRNv)Lq1TMC
z7TCieJd(Iqtyl65flt1Ruw`X-D((^+w#MA+)~hY_>REc#9?)p)oHBdWz~#`ZEheHN
z30ouLC@>G22>He#LcWVJiP)=C8vm?d%{O!rdi8)2)&|@L!lP+_*a+nt3ZZ;gVmFrA
zE0@TpQxAdDP`yp3HqfbKz)Zl-_c|5aG?iQ+qHe?Xm|JHeq9kmKhz@`rF%j}jK!ki(
zVG^-dukdFDooeKAUXQuQLOypS*u&vFmiCU=qZ<Em%xE>tJFDbMcNJH<#{qHBh^g%Y
z+61@^n@K|Ldbjgfp2r7SRRhLy?=xsir^ftV#Cd1Eb>JAVN06`+{wMJ%mwXo$=c4|L
zNcMy!Q{%-n3a}7UGHo7rTC#^o_UsbMx$ev$Ss}@u1`90K?O+c4{lXiYTrg*VslsCf
zyBCXb@_fof$QL|)r}5V>&(gq&CwSVS9JdW!@J+_{qjeQTld&n-RBRe1;%rRBg}jUD
z853~=BBu5WkE7#gn`f~p5rywX+zMi=(bgW-@pk#)OIgQ@hz6Bh4HurpvvT+@$L2zk
zR`TF{kPF%EAz8Eom?PopM*K?Sk?&G?<h#OnTJz{tGd%w}i0%ZW-Ay;EOJ;>v#nl9B
za-u$|XCHK>EbXhB!I$msfU7ng5!XS?{1~{Y*nM<Fb4DDEvD_W;h`ZZ@b)R)G!8tVW
z;MS%ECwVsFE@D->qv%<CdfT0!Pr&ZMugb}vpvUrEgUyE{Q{|Ej*(z}{{?TIr9R*nr
z0QVY^d<!9x?^+{DD_z(Fh*pCkJl2a2Ok?a%B>oKSLXubEk?IBWB*9Y#_bDmA9}VDW
za>je|OD}AVC04@%63e#;OO(-rD2eY6$)X*>90OTz;@29Fd{@9D-(us*(1ExsdXp~!
z0;KWMP0%6Kz7GJf?tYT{<5l!7?+YWg4)a7eBg-<*lY@dekveqJ`=p*3S!y|it4@x-
z@Smeo!TPi$Ns=rfSY>KN@Dh4PRf89xuE(!UME;4hOE#?E*+Z_QO&2q@O`@$om@~1*
z;cvs^lRRnyKTJ{Iy_h3^7mnuS4{yIXC>DfCqpcvgf}{gzyTKC5cMha*1or}elki4M
zsJ3%yQ~VrE33bYO+7doO!XBq_8h-54;9ejS<1c5t8*|IR$JNWm5G*C(Kz=?6yO{<p
zMV;xdqE!9?H7sZK48EEfe6`(8CQ>=3AX5Buv1buSRpNeGijQJ>RO42Fh>IQu@JhrD
z0(6U!$u|cw`E-9Eva}u-b4BRsZ6TLy0`5Av27?)bE!XQs8Z$LagHX<SSTa<XQCuU$
z<(s(H;I4x3WH1R6C*S#S$)}^axXKc@GY;8BrmiW#>meHo=oD;$5z0RuLiuzw->?sH
zD>%enV&d9>yBfY>U@kT;`7VG<KAo`RA}dQ>KH@Gjam@hU0GUq1!?8>8YmVrod~L=#
zQYXWFsF&`GCw0Spxv5i<nW$3&t&Gk4=xH83T^Ndswgq<$;zoeE(zxWi5H9%=#<h1x
z$)e@36S6BpGS?hn0c1MWvTEGbMkxO*2<4;My|fQ;Id8|@RVMCmaMuDD1?F1gk}m<*
zDD`rIk?pO{_6EdVW8zu>Tn3pAr(>`)zX>O>{AWQZ-$mHV`w+KtYs}38r_~id0^lMD
z#{!ydWb!S5Og=WMfUGQW`SkRxkj%xH3%9|Q3nmXc2Y+=|m;C3<41Jg2zZ-GP9a;FK
zd4ylqV2<O|*&gWJG*G73IHF+A^DyV7VR9{5>Po0QF)-%_n0TmeP8ulFs}Z??<^rk)
zK@WOVNKeP{^LT8I36TFxxa7MOTV2xA;!Cjv6uupC^MqmiV*;I(2FldaB%t%XM6=T{
znR;3TX1<3xI}H=h)Kj;OH|YVYhSHYIg&XPV1b&`~#qsO%tDA}?8P|QZ_;9*TE00%c
zqu?rHI;dU<UOTRM&z{Zri}XtDJxbMvNAEIOyoaw~#^!~BqU``KM$jZ;bB#>Ci)V+v
ztBk9x2^(R;jt{vUpV#Iz3fW{poVwjO8hFUQaAMt6<+6GHd1<2~4vA01+W>DlnjRwT
zeWZSbox|^%#}jDh*QnYDE#QCri(r%S<IA2%t$sQ?ym+s~KZO^cCec=dI;ibu+LZkU
zOx!!L)O4kWahH6Jcjp1%LL7*w%ymTxrr^^cbwYFpx-pfXr(vgCV)?H;tK`&h9jAtA
zwCSj-wyS7U{AJh!ByG)O5s&40e3ZfKa|Q3sT^U~*-0L{bzca#w(c=IgOTyC$oNmeF
zTk0iSMzS+3ncA+SP4NpcC2PUsE=%?p$!=h)`Lk}v&W-UWS!*-HT&{B*a7U6c+k)?V
z8EmAT@m~Y!4E&d44?>pKgW!yu;O=5jNEX$fZtMgX%fOv%Jo4QDk9>4E;PJ)9Q5O(J
zAsS+;dLPUi_#o0jn^UMe1;6$>`Def+-__V7h_bz|cud#ecUyUno`;#4hVeFlcrexi
zMFE}afzC(+<yx{T#6~b-Vqk`Om|1C<OdCKhm=Rz!d2<e7kGf=bIEmIXPO0~CFc~=-
z@)x`aWEsVBDC4EX>CC$XyBIM~VP9d2eH?5zPHU4^9veJ=o)y!<ys=yHX3U)iM>S%)
zz|oniG8zslHxhq6?U8C&gjddMu*V?HG}c9IF{4AWXg4rjAY*m8Q6hpzzGd*pcdhaG
zW1Sl61UzF+RJ<#ot`KRxpNow#GWnN4Cf_3Li84gRAj>gPF)+u$bsq6C<B{(sc;s7b
zJf5hyOQ~`Yod75bwhy3Jc*;O0{w3IDB!8x))ZTsbA+9?Z4N-S63ulK1@TFiD68{|L
z4N-v03WlbM%X>5CPW*;5aujf1z<R^J4%`A?0dQl+gf(0bOLzj9D`tmh>g&K<L;Uk)
zCT!{&y%}@8tb_f)zDB+a82{ie1S|shLdJx(Tu)2b1JG3@oDb%DY%%eh%1qeI?R+EV
z`dGsLV7HJ^=VD=(f_o8*+wAp9dZ_Dd2~Pxc4GAT38Fn3jSISJ-+^ryCKT9|O?CT^<
z81@EmuVQKW;z_*}*VhvE1hj~R%%R|Ywl@KIt;~ciTpkGrSi*r|-yq=v!`=dJ3zn9U
zeDz&_%hyZme+daM0(290D}dLr%#)_2Yed46Ea6Ece3OK1?E&^SaBpNx*w76GQ-gV?
z)8ZPI+9hCa#@>trNPj;@jGOc$J-}KI@K%QST;kh#@eg?MTQkHbh(F5W<5KjH2Y5RJ
zpb$WN1L#0^2k|-q-hyq*5TAHG;*Rm+@ABf`DIFgV_VGdhM|*%(9^l>5fFM3e{IOpA
zYA=3!hWH}lI}ooue=x&#^I54apYgwiYk4Oc8!<j=yoJWg=-I8<9gywBxD+@Y9G+l8
z$Hwwx+hMN<=Y}GlcIs%vd0<a2CT8=3w~O(rZ76LzA1uf2g7Xt>fBhK(5kCp>N@h~S
z>ER)v5aP}TJ_Yb{z&Z!qhTRKrcWF`KC7h$}afX=)W*8V<^9!*x#DA)UCHVqOJeNas
zS3nKveQ!Y5%?^*OcL)gJ11$4uXyx)=kGXDOqhJSteT&s_6*ys6g8LBLd-*kT$AhV%
zd<V0&+``!DFtGyr3_$v7DBQx=hm`=_axeb#4Dm_gqh5TW7yrdx@r-{FfF>Sbr3d(O
zF90f;OpS@xd>KKn3n{ZM+bgk$h~I;G^CcV_FwG1z70hrjx<ubje)lkdZ?N>SNg%AL
zfkp#b2}qaXJF#`df14puE|?aEnE_@b7+rSn!X735yR<y<#x%qLH3y{SaTXx`BQAIq
zxFxf=6>XgJvNzJUmJ{jTW7;@_?~n|>Caxu1<cx`{&6$k<5)yrY@h(iq=dm4c^me?X
zxfr~Pwn&T@<X?)p)+Xddzz=5>Qll8xL^w{{dZm4D!L{6Bfe`MIMRqs`yQKhjU@0Md
z%G}k-AY|v}m}_gjcm-@bu)6+#<6-N9tCtD;Ld><XPP`8GiFn3dG|Qm)&<wuj?g$ei
zvi1nM9uePpLK-4M_im{Qq^g;Zki)?S&yiLFe0+X*k5&NqAR54AcRYw+_?MasQRYH6
zZ7lPcFh5P=*(BQU!&J2<fOKU07(nWn{uCRV?voSvD`Lr4BkmA5G@rH}!uUU%u>y6A
z6*mDCj$?}qzWVN9$TXoKb4ODF9aMh9Qp4B?QO&UQ3gl3M8an?8{4U@QfNuo&hX-yB
zutf&&&gWyUy7l4%u$wY=qGfT?CUAzGXc%nwTu0o~stkWdG8W~R@|Z3rwX8QH?T8>9
zO8&x9y*UgqtuqK(fuNdTH8i@UN2Y`ajC;VQUT6D<Vfh#P4}&lmqIfnx--{)Asp3#0
z&^7+Q$>H@~Z7G5Xpe(E$##9IhQusn{<7<P*ojnBc8`(vbXC+q1c379TCWuqp^R(TH
z|85;E+kj|iWIU)W!Ivo10ia2uOXqKcim#GEyRMF^n3d5*x$^0EuA)J^E^75n%sB9`
zcw+8AOg$4bPUpW@Y1B0tHLg7hj!6>}Wp`|7fMJkrBK|gFwUmw`HeGxXGrWPtPa=K`
z@r4$DbUMHGe@E#e05o>G#r}uVufY8PmMT3TFLZn=&GDV0!P6vKUa8V2z&r4&mM%zR
z28Y!ut_~L;zdE|Eq*g<`2}I${Y~{+C>XDFaMzzLnE!LT&d-d`azIV^!rxE`K@wZ!i
z7vj^as4v#o&qRJq{43##iR;$cyZLBTYFu?S^OfsDRUVSYR7xbBSs{0T{~JrKkgg=}
zhNXI#+VOH&A^V%)F<@Ut@SV&*z4tBn;6{263+Z761Iz^Q7J$2mReVl{9wv#eWIfC$
zz70LpFcDfjsr}zIuc~PLQvsSd6(P_(5W%0b!bzz+<W%oYB)BJ*B3egVl<}`(i7y3r
zWO`z?6Q}v2crEqR?{PV_b}L(A5&TVJm2EwiO5BU2ebN#q=<xyI^y~&r?vDK6!zIB>
z&{gp_2<NJMkj<zs0eW6>E^$m$cOdQj8jlJ%cme<89}ZWIh8k308Y|&fp2}E_?;%VC
zI|z_+CT!4?7{Y(>1l*2*duh{iL*IJ>9zZ}p6EKf93HT`xFg<)`bBGBL@sBhxd9o$!
zkPAXlzu=RoGZ6JVqLknfOr+_e5)9+d9>-yDi1)W|3aVYo-xc|lHekVLhz>h19F}LC
zOAx}p#Ieuvztr<kQ9MO#Sbu<NN`C#V%A1FTS6rUNbWI^T2^)m1qBs@tC5@}{@7=p@
z(9gwJ-`cLc%Zj_-A)^A>S2EN`r_GfjhEx@XFPyY=#{XT5;xF);qr(5oRM8fpT{|^Z
zh5$YpOW`!da>4xxPG5kUy$E2Zw8TkEoSLNC)c&zViq}(bT6sw<m1rnQhNUHn!Sgq`
z)SO(&lz0HlgV=xg8?fSnp}n6uaVlU<O3eq^zXb4W6t_Fsn-V+mYxvJ*`2S@1MW$<w
z@ae-JqhduSpbEmfAwcu@VeAw9`j(uS8h)7+IvM+*M>;*7&(8yRROvKlI6b*2gu4Ky
z=Hv-BpFXuD2f!60iRPn9y&J!B$p0HT<lBg;YUGFqFWe4HPeR!~2gn3!r%85CZup1d
z1`}5aa1z^q#WwNfgv6;z!5>xis`Mp(iIl&;iTnbeDt8fL{-$z|X~%sWqTUdvZuO{Y
zpt6W>ar)W-3jzEKKzoo9_s<ZYApR@jHP*|Czl$n}TNl1>@B_(~f9h%BDt=BYD{ddv
z(|DiFDQ1ra-b$eU(KD4Rky?V~`MnKs-x1f0o``VH)G*0ww2daqSS%OQu&Fc8+p^Pg
zjABWIerrPG2SSj8QcWO@@0&DeyyX9b@sjT)%*HEtx^IM$M-1Z_t~bD`v{y^`IpGo{
zu{(v>^vcE%ur-X2$8G~~Gys)p6Q&-dW-&hoLs1CeEmK^b@$8OZO;k1~w+(+&63c)4
zkkI!s{*iz)Y(~NGvGKUK0|DG|EkQsQ0+N7U#D-^3>=5v-3Amm3yAiN~SjN!3gI_}-
z|6GPbzSpra@MIVYmkZZ>L2?(nBMzto$pw1FK<T|tZsr>i0jQVt=;giiawD;?VyEq+
zmp$m^2PS$Ic~&7>L-GRlzKM>j;e1rf`6f2L4AoIoe`Kof0n-uD!fY{2y68ege`rZG
zNv|S@hIcDAp(IE6sx-d`$_ihKxQ|WoeSq#kv4&(bw!_Hep9h(I+ptL`Wd4kIMR08}
z!E3>EMzAn%7$y|V`d9LGSOUdK04j4+%KR{8ehL3u*f@g9noJi{=8fQ3UhOmq@6Y8p
z2WBFeN<1bp%@wziuVN&QB^_1VDGTtC-$k$&XxNVpLJd=Fw|X95GsI|OT}_WQKel62
z(t4~8Xk24-;6)Rnd9p4yoFh}ygd{ic{ffi^$u4Mmf7N=CmkM(3=TmH(adJx1(bKJ;
z!kqw*dHgaPqk8Ua9$z6wJy6Ur^?-lsz{~j60r?wQ2VTRc4rq>DLkBchim=lO@>hm1
zbbwmWgI7Hfk0C-!X)+NKF_)hu;&o5Nsh)@}o`_B+;z1Lkd2=lyH2$ApGvO&Mg8V9D
z;fs7#q9@`}L}+D910WGPvPs0lo`~U|h=)88!%W0Fe3~DcS5M}KbLtE@{P~d^Px4iX
zMykLO9_eF7D*qZIeH5QAHV@OY6Idx*`T1?^as1lC<=>CHH~HSdQU{d;KTjvQt`9tD
z83)HQePbOzVJw~Xkfi3qIx3*0HiKwYAh=lZ_#`n|HWx;EVxPdDnhR%|*r)LU37)lU
zTXN0}-iWZ9Gv!m?lpA(Pb5w6c^(#X|#qW}I4k#_XxtI=>Dj6F|f(JK!d$;3J&pM5`
zXDxwV_zd3gi(5ZGQ!xJ_J}bY%%RkEUv$Z6Io_cGgPHlMHgN{PtT9}M`NAf31@V}3p
zPx^T#;Q}7#V+Y}fBlx^QE)s4Bs{vU@7kV%tTCDG3E5G60*BTKu?9YhExeH5;2(j)=
z2)z+;MNGpxJ<$)&2~Pu?5uG{>I9A@Q3gz4##I-ch<Cx<T`~jAL^FnMD=j`Bv+%MtN
zC)LZV5sRYwZou@UoeJOc^bp8bjRZZ;=TW^KOyL<(?Q4laPxC>r`Hy$0$M19?sSlCV
z>U7umE}O=?yQ~6x;5w0BeMrnl*ga7im^RtFX&B!#Bx64d$o_6wuFI<h&DOC4>5@jD
z;4BUaE_3qn_a)paqY?k=Gs2hyh)v;$3v@zV?8^ct?S35Harb(XriPN@!5*6;E0+OC
zElMQ_3((rtI(pdn4(E<{t09dt5)#<6ci8z)!CVCJVoXBVRuelur@()#?2^i*BHovJ
z{a4^V9{3V~mzIEwd!~x4@e=m;FqaX(Fat)_xZlH^<Y6w41GpjsNR?O%NYg}5Q1@Y)
z?4T$0iPgf$FinDwE`~$d+ARjl!iavOs^s&OEZ9Z(G$TI6H_B#2+#k0HZ#z)Ad|y@u
zA73@$mRgUL;Ilqqg3rKSi8!z9#g)@r8kTsNFGEb+3A7kc=5n`o*L$EnUZN#nGMBrx
zyUxRW=V6wDDP3(=sY<3E6#&v+@)$kpMahczc_(%Qer+Z4S7IxX?+Yxol`P=rb0Dq~
z_`)-qV=K84Tyt=*fZNLUs_ln4=rrWT+iRS4Q3>ZJ>;<gw1-=W@ggg$gH$pzazZ<*R
zgveh7A@Y5RrG#9>&*!EIiEiTiF~MovHOXHCXDcF@FMLM8CAAcMXXq<E7s6%N#?@-@
zYhJYGICm?g<jVd9QvIudHN)C*t8@!(t?2pBv|T!d7asBdfZYsWIi_4&!tIdHDDjks
zHnx<I+YJ00;L8AiM%ypgEv3PwX8Uq5Ev(cJa|uzIzrt<=v#(2t+eH8K0XGM%7Z5&R
zoHVvyW6J;+k5k$(Iff}=I6YPW2{;YK9_*%l;BudjxQoEGG_m93Q$wZS089W}f$5rC
zncrW+k|bJSy_3Kv>7B&=h^-`cU%kr(dx>EsawAv?{0Y0=uyG~JbahRl^kTr;fHZ^q
zpB}!#)c~N%dIy$vo$`jHjk^r2HlT08_F)o#OVUg6X(E1x@8q7rJSe^Y^EZit$?*!o
zsePJ1iLsZj9KOpTi&91<*spJx;Ae2RA}sR;)W$9JFckv~>tCQ%fHH4DZQPX}sEU{9
z9x$0Vpf>Ib4^z#<tOiqR10ug|KyBRBfK+b1mUj~6{*JcB0QObxa9Or>wdy>YSMQxX
zcNH8e`*(2krR?9saV<WT{d0W1!?G7|@wWck?p5LlXDqGL(nHbKUE`4rOe510=I>4g
zI4-%R=7YFO``WrQzv7e@WelZu0A-TeU$F=1b>=Z_>t=lwj=L~}Jj{b&G7n)}H`~LU
z>|q`%NBuL8VOw{W2O8#q9!?)a1gCm_4j7uV6Zld`V*PwJehsaj8e7Sd*KT+lfunRW
zxP)Jx;4yXN<Nhl#Z7$dA3N7?yP%bOxlKK#9-m@`x9z@zav=>Z(Nc+HVnD&l4F@Nuf
zvi{Gtc&-15#Q#L>UlxBC@xl79j6<7Ef;B%EKr0gb31H!raCMZUdyf$8?cJruc8>Kf
zo8H|?5^dLo#E-%<bS`*pAuH(A5&Ss}dNt3I{zKBsNvb(eo}}x_Oxo6+Z%I{&A*>cv
zp(XL!6`#T~ZBF58Y3FKHAC_11+&nk`D_;NAa`_&Te)&AN56J}(Yq@-ZuUm#0&M@w^
zGI#^lUcM}RagXc73|!2=(!fyRk*-#D&Afv<<n2YuJHy<CkVY9IiSE=p6#XB<pQG2A
zS4}&Y=V7{fnD|COnU_sFm+OJLd7$UPWL`Dx+;|Vu-NU>9rqs@dU`9C3hSAQA1El4l
zm*G!gEdQXf6@Yz>CE%n!1w9e0hVTcl{TaeP!A`)ZA^Z~GjfmKLBRkT3tpC)7hHpA7
z6&MahdsnOSqq$Yz$#s*hXG-)Yq)MbNY(i+}vLERtd6>67%!^=xs%!t(wwMy3@tx{{
z-t|DQ0m@wVBi$4a^S+1K0wzP*srr%bbTGXCz_runFr2b$V`vLNhl8iFOiL&%dwVwx
zu&{a=_B616gYEu5uwg}ya<wX)n_KaQTsK4ezsBxINCq%=|3ESwVvXHb_&R1-GQ1VB
z)G$d<Rt8@=d}mly#D7SikUs+8b%cgPM*aQr^9}$v(*xD?K<@y`yi(e`!5-#N5A!aV
z%tO}R4HAa@F{!KPfwlw6JZA0P$$(U`pTG>HV*gUHA^23Wukp3bP%(W>KYa!84$#0?
z>i#ba=P4f9Q5neiO3wgT`dS`tVx@gYx}oYJ9hBHMOiGE(qWm8fm!D@KOPBvBcd7?E
z!UOFnv;0T7VIHQvhxxe7@*m|!1Q=e8kh)_$P*IuXKgtaU)QTSITfVNC!YhjOXg_+i
z6HMl-V>@@6VakEI5{&kY3fL}Tbp6-ZY{5$If{t_}0o7qlH8(y@yJrKN)I=4objgo$
zqb*qll3hhI&G!AVFUt_f{A=$<8K@GVs{v_@Dq>%i0n&sT3r6c-PoJ-v%1tQo&k#Qg
zy98lr2O961f3#b{C4WqakGHE0!MzaZde64>{~$QVZGa%h1n7D9wcs@jpMyW^e+uZ}
zc7l%uc&@)u2p)t$WAerSi{MzEgNwNnOn{ysegwSA{5AOV{-=PBt`Yd|!c%|O5`xDd
zP{}J{d;VV}om>tiCqkmN-v*8gW`uJ|&)=`Yph|K=2<Ay?U&mr!yd>~g*TW-Ay)m&g
zutPDh1b0jZ-_foYT$;cVwjiUhBZP|wuQrsB9qxJpY|Ii9u<@B;MJlT@^rr@u`3d_O
z`vv<I`wjaY`vdzE)2fi@hN?h<Z*=|blCgc@q$#^LzSOZ?Klj%8?=XJoK(fkQ#gwQD
z_7~BY;lCXFo1arhb6(0w)GlPgZTaKa;sn1;9NUisQA8gP&&Tycj;iqV%HpD9qTbXg
zou`yXxMPAZ_oX>W|M}Kuzy|>C4!BK!zIl|E1F(Mq>S%rkmitV^4Iqxa-zn~-{4k~(
zasBZhX!ryA@eUe%2V&*nKAiXgh`5r+|BR1V)@sC^1TJ|0rw9i@C{Q*wkcMm{oYF1y
z9fV(kt`@j_`0izu`Yn1jn94QHo)h*eLCJTYGUiA)RM`P=x1?S8gRuzM#rT(ER5o5*
z+N}`1M$Jb*QK8e@{Oo@P?i%*n@`*^2*A!i??bnZff8FR0_y2I)=yoL0>>o{%_s<B&
z9-eUQgW2CXm=C@$3l0gFZ<{N)=B282;zz+l-vXX~gP0?<t;MF7=M+I(Yl@*9{b^Gt
z4#CRNkqVf&c&#VVjNS01%9oT3EqRXdume48`O?^Uv0#Zi+5-*pK>L*j1+NdGBUv8I
zv0$in_Nj<$O|@0!>k(Ff`SX|8RuYpg<cni2`m`fOc41NDU0kglzl<)bF=p5PyN(&t
z0W!_AF^~<;4|`jkOr7wn+VUShE9^Oh#p3nNE~<v_%kq5NXbS~*aUJ1O#l`ha{OppF
zPuP!7P0(1ElB@As+LXKowm-e9f>pzmXgj^R=sunZJ^Je43!KB=bal11{4(l|8l&IZ
z|E*(2clNlx_qZy;QPsGD4?#gz+><Q6^6)qh_@f7`1n2+*nh0+wy1K65YFW<?no=w@
zcui2f*Z0?SBSwp30v$ZybQXbn$5*(!gCV|=hMO^e+}@+7yID53JyyBotEyk!HtKkY
zqEu=+L`Cd7;;4mbr5*y81ilY8hsiLv?SUaS5W$Bkz-rr>r4MSiW#197HdY;AYTt=H
z9&u3uH6Lb?WJrD(b0~35@N3_ZKf50<QsS$F`TNeT;A$A3_MM{je0aj+I~l$@dJ?KG
z){KU1BRqqBN4|QP1l<O>CXXw4RIfguSFf@?_nA7sq#`%JFrq0OtVCC(1}`{I3-R^g
zs0H?Z{10M-Sl_7~o!83}@eNd<`I6Sbvn`Pr-yrf>%(Z|>Gc+5by61)ybS2<dnV`*S
zI~bvAt4Es>M6uc=ITTY*CGs9jd>bB(xmKaDXh$&TA*_Lkx?LB+I|)ov6<)#tN7lz&
zOG|hh;Kn2rxDi&jjD$H4^I-`~*b2<qBy4C2?>1p2`RF}mC{DQIv6wr|5*`jXN<x8~
zVD-yLm`4xWf;q&hGM4$EinPM&fT;q%DzXlXJ;d|87OM`8ixaHgw8rYC1sAT3xFf75
zbqGF_o-`&-<9RP+EdMeqm-84Oa<Gmx1lxd)2-^~C<aK0-KUVql<RDY0-sd4sJ#T|G
zNE7Go=a1Q&xDQwb;}F-BxLV~4zJ0xF&G<)5u8+>=-BxW*^S&YdIJbfvhk(<n`iC43
z<%hUtU=PM$lOoCg!E_b{z8Kb&+^H`ggx58}vSQj#j(3gVs{u|2u1~?;H8ZGoJa{Eo
zf#3-|E*OH+;BGDlaCN{Oh++cQu%zt)H76<m1w4&;j6Tdq6o_SoML(y*6X|es;xrnM
z(f&AAYF;T>$?2|}s|i@WE=Lz1&JV#YAUG7idM*D)2#_xaYgJ~iV=lrZuWiD=MtDub
z9SyF9gvUrE0aBi1($ytc6I62{mtoaBI09=-a1~-w6I6d-wGOeafcyG}@%sGOfLon%
zggnEJt~uQhh~7DR4!*YhT#p#~6|Ii`q4~U^rWkznNhjZdlS5xO{5%a3d{pL5u;NLj
zev01<nNbbAd{0hcchI_sT0$<sLBfa^^c12rJ)2m}Jd5E5E*Pnw7^x|YR6AlcQcu$U
zG*&WFVVyKkS;3UbaXTN3xpDzM?xO!ea5@CX8o{#!ZotYW=<YT^kYxhqf$s~xli@dl
zeIX6MgSE1HHao<VE4{UHf?E-XXg?#m459%LbupqBA$e(UqF~Bp1wHBE^1+t}ui>f9
zP@Ixa1;Ou+Ps1bMAq<avL$K8Fd<w5B8u!O1wuTP{n3$!A8HAYQ0I9660C;VmVor2<
zh^b^^>X3L?K4&TL6-|tMH4r1;$rxhd!DRUiV$A_xOyXYNoDDEFIiF@8Xg;?gF7;k}
zP0q^1Fju3Rz^5(XfBYlN9S{m}l}P$XemFmlq@gN)Trv4Sp3c=l&VkqwkY(P3db*wW
z$J~JcwKd#EWlp9t-NC3buaj&m=Fb<#+Z(1{E{DogGyJ{ahk@@Qd>n#p1nXE?HbHOK
z2!bDs;4uhBK+w|!a0Uu4ab@HCxCr>~4ZjimDDb_($JGO=E!wAmzHY+<G53=Zybi%w
z2>P0UU0`?b6W`A@0{<g;wxMhdRrUG7FvZ<3IJIeZWjvnA=NTJFkHUV&PhhtEx1SXr
zQPvVy$ZgASwDE*eOl^12ruf-dOFGj+Ts-oh-WZa&s|U|UJzF>Uc9H~p->=3S1AFgT
zj6ahnxF5O$;y;X7jcbhfY$I+>-0?<S1o3rjkL#)|3O*_wdm!R|2dj=82lm-~u8ZJ~
z!TvH%`R{^LzH_nGaJ3-6uQu4t^%uMUG*AwpwFWxRKxsn4E5Lnxq97CytqJDTeD3>+
z{~Mq70r?JM>B%<~OYH-3-5F>}m0Ei2)BgcvJI?-r%gO4Q;pO8aVw+&#lU%_{U2sER
z_rRz5R1$ZB`xh=<fW=jd>zGztf9!<v?EmC0zK#`A#+_j=+<_=wmmhA+9SJVNk1Hns
zPDtb%jJ1Ox^Rn&lHn40zvE;vzytd&^0@nka@)sx9A3EP<$?K71b$+;~cP98#{F;pN
zUq8EK&Xni8yqmVV2vge)v?+cjb|MK{(YZc6>f*bTIrHUhY*JHB?;KPo#@CR?*YNG_
z5P*%XL*XkZ6u~LjkzUVICCv)<{Q)kIo_%JbngYJl6IBsWUz#Yj-DRR?nW#31>2IRq
ztlh+4t9j~U_d}heDoilYtymRvUqGT6T_2Kr^TP{MS4h6bueot0f|uFcNC~7lNpUmG
zgA+5%m*eB^D^sc@%TTHW=U@Z(E?+pQC|e#?`vx44*prBOjj9QED%QTts-5I=sM;RO
z*9`Eg*^Iw{b1b1>8trN&6&d96sK|GKRgngO*W`y+$K%0%YozkuL?`5%i=DKOPVBrl
z##dxmFE-GN*Xf0Tr(s8z*^9w$=h~Qi16)&jUyHl(Cx`G_E|}5S;o$VXo<W#iF0Y8#
zPZ(kz+?XY8x^Cc6A6M84UQ05rllY+|jsYHl4T6CGidRJuTjpB?YrJ)SU1QIUl->(C
z#DFP12YO}j^>*7VvAF6?XZ`6QJO&${o|xtMHo;n2%?Z}=N!2+YQ(4O%sv&OY-F&S)
zSPfM}u=nMMm+KS2zGIy7-=guCa~?JV(zNwjJmGQ}tM>qciM>e0-lSr}<zmNzOKm`@
z3S<S#^kkRMP;Iw_EdZ~f6T)3!`Pz`bcrBKma6{b+dhvnvA_{nIet2j&5%Bv)D*tjw
z<(rR<+(#!ObmAlH#A|e7E1if7I1cMrW-m^0dGz8#OW0CEEa8QguuYkr80PZm#K+c&
zMquyH4-YQA!0ymXq?%Z_qoy$9M$#w4B<h>N*UxP*fl5+{KqZJ{<M&aKyw!0A%1W{o
z5!)z<fD^DzWme==mqSH1TEbQ&e83VWEMeId8SWxf<T<NI<G8^2;aRK?;1`UvG5(d1
z%C`WUu#aBk-xG7sTPNP56YtUq;U;07%k0Dmmw#W(#b309hmr6hOL&nbEV~yYUH)A$
zx5;|Z1nh(P;d!th*q4k`{yX55?=o!S-ksq5&*X`)r#E7I-E6)1m|na`F9e*9bt$tK
zr@0(@@rosEO~OYk;X+GTb|*%;Mt3p(FIz8;2b|0gPbLEZzlL9jO!-%F1d}fx>xr1O
zLuPnvTh06E4bvUW1BSWSFd2_+@p~ffbwFALx+7VeaeM6<%=pq<ZvPCcV1U~Km-cmW
z)un>Ev6+IMiD>>S-qv$DFFr_Y@DfQkf;Z%|tr9p0d%$`j|7v<6-z8Y@GI}1P6Av5a
z1TYU7=2F9C>Uq(sh<ixuUut!xtA|>h=cZw2rKydgb{z?t65E0+-YY}GyV)~{J&M>2
zSG?dBULRR|#BfJ|+Y0VvFlS=N1UUBJv`b|G)Q7MhGhjo28v*Lrd=}Py9|@la_b51B
zc-qjZNAts@+%RyD<L9cJE&n<uDOYE=R{MWd&TH#w)Ris((I|ipk+Vsr89a~2{dnB%
zT%y6nTy(B`J~*yv<!o3Jb59!i(U8B8A}2l@?k9{}ZJUf+XF+ktb;X-+<ex%5=0p@f
zd3Ny$HNx_G8UppGEd-C{hsUhp5Ilun&&uftb{)effvGDX%}E9(gKw~V)&#*9ck59k
zGM)K!H=Nen;{73?UOr>MV-T|0a5}mPm)4PD+)i*StOGs3J(nLIbVh<J)Jvoq6E!@{
ztPRG5UaaC?@OD62|7v@LHUV^#c`C^?qc7r-|MVX2#Me_<HF*nzd%;3ZOe?{Pw>Q`)
zJ>E6&-f6sQdn#_kXB)9LmP?HIbBG%(=2rRIPQl%4E+?WptS5&9UY{SH6-R-+8^7j{
z{7+CB`E&qHz*Sa7BUC1SmjOEfeA$3HVhLDQNgDyY2ONjXb~=D=$PEvl!@yLc1D{|U
zu;^`kSw3-8HLAE#3vluedUr4WYK)vak;EJG!!zJmT2|AbvdjM{)sgR9>@>*J${uXH
z(Y1UzzG3=+dB!lhTNWm*q5-Ce{bUW8{m7p!Wqs#|s_qX}xg#KYJU=|&<w3L-zqV+d
z{cgxO(@)MY(@)9ZJJl_QOGBj`PsJ&@66gSW6>7^GtGs(-Zjk|x1^BArbdVITtfATf
zZmD&u9k?g*!{gp~a7*yVr9=(SKq#jUkyr0SNFxZZHz6ItZ84nAg2I(0WCghEOvsVo
zp3DzUS$vH!zyc#wLlQ#y;@CC&5R$(-=58=tCva~VPDe1|$`Y~@+>KwATgv2f?Q63C
zK9wIHl_mpR2BCH|9iwi{IL;$7jOl6aW)spLA<rR1TspR0zmE>AxF_ar(*2)?pfkX?
z45;ImfMxX{haTJtPCe)e?)m)i8b1}>E%>#1<X_K9kWYt}v*F9Mdg4(|Ew>q{FQBA>
zbZQYO<MQEpzuW}$0`o$Cc-cN($&5?>C*YD#XNU7L3y8a1$PxzX2k1EiU233A0vwo&
zOu$iKp3V=?i8H_~z^{tR|2!3yPlwMN5U}^^V*GUp&EfEQi2=I+eA|FJL<(5eP&8r;
zF9xT*zCXBa;B*aHk5%W=dl_vF5u>(uXwxPAYOF7u7ht#P%H>}+vtqhzj&>UmbSYqM
zu{!a*mdiI51Dr#ACCF1-Z0^kwcR6v|Vih;q+hP~ukCJ>S$#>T0C2mVT0FDbSZ@l#N
zdZ4=!5Q4HrHqUm&D@;gmghGgpPjl0RxSJyGY7?TkF`kgCj8py_jq}=&Gu}?es}~{F
z(dzs((!yIJ?iwRi+*ptF%&)_;kD_-QnKM)%DEk9kVrS9Th@PwMdD?W@z8pIa{)@3Y
zG5+JzP;r;o)RQ5&yngruF_+lcMm!zxX&&(f5T9klY8yqHF56dN-C(;EyW5Dr<O2Hk
zA}(BigqQ9@E;#3a)m#_=b~|%H2cB!R%MAvUkKKmtJr~BfeCER0mN*yiCKBt4cqJAC
za|Kq?BYQ=VWqlqP4W%kK%gTPP@yvo}3_NjNCa=PBAh{CrdJtSH3n7{dC`ztV0PSKT
z>Bw~rb~5n`Fkdw8Bxh<j2aI|<5X}4ZR)>*mv7y9YhFNFh#RtZ$VCIi?5qdk%5)K3W
z2?=!|5qK29g&7kLapzmYlfZmPLS0c8VWWw^wj^O%zId+NaCgkjw}htx-c3SXRs|jl
za8bsDL)`_Ia1fXsB-E92F_ugG;xZHFxqR})E#Gk6|9?tCT|NaK2XINogu~p0mT)kb
zA`)uuEX5`fzqHJR<6I*W=2^lKU_T?Du9CvWrvSVzW5VGs7f>U{Z~~ysOcq^8mSATP
ze<PMY>0)<B+;|f?1k5f(>MB@(O(nhnOPl0z7moXQx8kmt8)qs<0{$G8x)KU}I>2Sv
z-V<%4n`k1>29t|OUAV5pW)XiAmOi12?uxhxhMA)4|I2`Mg}NR)2f)o(de4%8CL3rD
ziN=A^1?mRuT;gxZkSG_-BrsY9Cxh8dHFP0jqvXAJe3lO@WB;Gva_)+`seq$^PXqh~
z)zHOA;F$oIV|!O)l$!!Z2UYESxr0-?T=<H3g3S$B@LA5MdDNy9m*WdejwEaSbO>}c
z8x6tMbHZ(PC2gy?noXmv34+x2Hf`EcmtjvslEkVISHZZ7?&OaS7}pHqKk&HLz%@PO
zipSLWA&uHbMfMCt&!v$$N_mDQ&m;aF;<fwTh&`Jjd4l9KiB~1G)BQattOVO~&{1t_
zVf7e)dgS0q>ax3cNe8;YmiSDFc92-Rr4ql8L1-a~2NB<xDs3UY>zU#8ek1XE$xZL^
z-30DMEXYOv;<qN{t&F*oP4!!V-<pwWjK^dUIVgi~tQ%rN#C1GEv}M2G33(Y2@mEqx
zJRuQ;oMJ-W0sL-8Awx1G&CTE&>V}#Sam5g#t^CDM2=5N0B(EZ3O9mnND`M_c1HTWr
zJ78_go4sDV4(^Q%*hXN7StoXY-I1{q@$ge<rB6Y_f@P9g724HCm<SQ|K!mpHS3MD1
z5%6{f5j&U1+;Ff>==emi+nGPwK{>U#ZNw+F{`_SVWdfaMs&@g{l~MJ$3}qf(YHe$m
z8)>4&(-YC!?_c*sZzp&By$q`JQ9asJp98oE)!I^}`hDWlmuC#oqfGP^;>Qmzz42)G
z1XjH9XlJyQX3zfy%Wu3JYe}a<@;*tmC36IHAA#O$gSjK(##sDZ;&&0R?O5?UGKh4D
z%rTLl5&wBmS=L|au^(A#IEn!I!=sHT28q_UNP1INU7+5>QtSI;lJCM&d&w$#9*J>}
zK~+xx%T6A<3TzhGWx9sn>VZE2xEo6)T?aUci0-C*F~AtgQ|;Qe6KNs~`8}1KiD#SJ
z1b3n(7e{%LYZHqv_mY23;xE#Y=R(%Ql79v8>x>g}R4GBxG(j<b?*)gdCXT)&*M;l@
zu0O?P`;x?8V=3jSvhBPz=6ag)8vs`TtciG=CwvdMZ_<P(m>+$>>305RaJ|#2Rz!Oh
z^1IX(%b~KjB@&<_i8PrCy+q%W<cG9Gxe)aOSB^y2fGfvD%%bcovFTjt`qEZ|o~x}q
zZ5r+s*k|zklv@A1qb7L!XcDpk23QK90ssxs?bufWSn^bSl=%J@zf7WuS7qpn`z}L#
z5~7nV{#N4Sl>w-SY;x|$3;<;w2tZFZ+yh`!eyDyW@kiosM$hEGU@ASvw+8zep0Si8
z7t=(Q*ypiA#I^&YO05RdMSsRIRjLG~I?9x(u`9wv$gO@r_y?q^S-%dl7-a2D*jnOW
z%nz%C+Z|)V<e!f)`8e#jU-lBl_)D20j<)3Y13I4M0&z|VK<UYgsLHXH{9)oZ=Z7U{
zdvF~rx%?NBT)z9UU-wE*Zt}ZA$U0f_M*zi0F3`OO+ABG;r=ulbPy9>y;qdW_itA#@
z<=5A5$j7nI{gx>?x#LM{+}V;p4yZfH1>#xN0F<6QN%G??iN<{qNi;+cVt=M3(aKVF
zbLn7L!)QnrgHdH4<_{dXzsmqc0d+HwO1T7(DzXmyw+u`XGx>Nhu3&Wi@(tsS_^|;`
zpfKOu^HajP`8&|rIOwJ;&t4NHhA9|TOzr~y$3HT-CirO%-mBJYe&}oL@oj`pQ|d3{
zn+ryK%}V&<F3%%u>Jj!a!WQ_nfHYc5>7R<eUfCh?7f`B@h4invC*nCoXlngqBIcQh
zR)L6k*sF1#h?bs+Q6{3bN4Nn(O_%?SkOOqktHT1qkZ-)l*Vf~kWPEM#X}5Sr_qN0H
z!>REQq({(DpWZ%$ztLn~CdBs`mb!3$0VqPxQ>*+1elKEr9&Ut5a3kPj+PFusgN#r9
zCh*Dk1eW59e+fZW8et)X*+%#rz>)dklzJ54Ax0>FGYI8-(j)u|zH(`N+B^>i*PM=R
z0r&OLuv1Uz+E#(!^l#aLBms|G{=tOc%jTj8s$qhjhu|~>X%0M&RX0KMw?L45Ni5aF
zui@D*O;8crf2}|eAOF>f`zI8sEKg(mXBHH^UpdCgu4z5dIrlGc>isiV#eLukxjr2V
zF4)z%$4z|(akXCCsBH}@&FJ%3Pbfxpf(N(x{Kdid<o(RfUzfNRR>D_C@}xXTpT#PX
z_y8>RO+MB5`Jj-4?_z<7>eVr<I=dCDI=TUiSNR{{LV8@^!0Pb_fYsaQu&Vz9tE+51
zuX7L039tH_C8`2{pYUs5+iqz+n#9^xZ#H5FLwq3Sf0;>)UNnGAlSs$Pa;Jpj{0i}v
ziOn#Ff+5q<AOcQ#)ceWgQO|XAb?|;%Te8y<<<et5#~T3D<0$~tTiwe@*g<LWNfMMJ
zUM1HMjyXN-#Ph_z#2%sH(KdMjgP;1)(H;u$>{_1bi`Q)4wH`obYm(P=_y|_BLFG;y
z<gy^to%weV_U;p&_`WevWY;wBOYA0UBMkob$g4b(lgYogFH1g;bD(j?AuayyiE?Sc
zP}C$>>#yfe`e%3eI<+6qb2qL$q)|%y8$uiQ<{E;6Em&<-T}+Q9?puUU-_GkF(H}|)
z-(TaO0yp-+hk{Cd(*odPf$CWWFJUteQwL1yn-+YGdS1!HwDd4_HU6n@TJS-tL`6Uv
zQ$011$C$oAuQp+?l|823C-^71D&RGQ--B=2vv>%LJCjBAl)u#2@wYQejb}Inli97F
zANbAgfv&Rk<abE7^$i8RiKu#|%Uzs=XFlBl9%j3TsShTexy+MXRS)!m2WnDgnJ2je
zJ<JXd(-chRDo={LYJgN`t<4FPS?ls8>`lt7&8Y^KwsgEQPj){TUeA3@2CwHmUdG<~
zANa7c@hPs>=NAwB@{WP-dx$h%KR|R|hVkOr%~C@{j$~E_Upag~Stb5}>tH6FI`kIS
zjLvwg!o8~T7ZKXsj~=Lo2WkT-^Ds?zzj&D19;Pjr%&TFt``N?9>w2KW0c9Si$?i8m
zs?LuZCNPASnz-zfd-)<6d|CK@E#Zp0X&K1Mr2)gSn(AupY&!6h9s@HZn(F?rUj2!{
z{ovFb*oqy6(o!WauJ{!9``5g;CCct4P(=@PG@#5SpW^=V5>@dq$AHOP@+t054^z#<
zbkHY{Gnagd`v;J&_qy7B!ev)iy0@|FVD@#r&*g&jH(0GIUBITXDs=IB4SR=Gq04qn
zOsgvHU6H(HHP!tGfyVnM2pVQs4O#p>UN}n4fpX^4PO6I<`I5S*{cFAd3$EXLg+jK`
zc&y%Al6h>Vx}6^AFAvm_teJ;qsw?s^|9Y5iU@{L4_kW*wpa^55GD%$wQ0AeT>UM!?
zMaX~j=$a|v74KazIbfFCRc}jL6+Q){A<<KfGZ+#*&G<%~|9y})Ch0>G&i-kx)<?Ss
zd|ZB@+YOP1=4XiRWt$~usbOHm_wq$D__FZDKljKU%s|E~8Ub19F%2(LrG1m#XK+QS
z%HIh4n)1Iv=ib43qcHO@Pm8-RJ<zuvs1Kmb<2=oM;bDI8Fnz&f9_MN9YY+2tF^2uO
zAE3<RJk5OtNaLKv%fr_)&briY$NGcGJkFEcH-^~{%px$__dmi0WWta?sl9&>AdRUu
zqK%WoF)dx*aC<H73zuMN-!%7~38;vG#R$+YxC0xSL4f8T0;afc0kx#3+H9sy4L2G+
zW2+PIahQJ;kFvqMX-TS(WC=;MVs>JuWJnU`nCjjzOf@h|!7w-CZWpb?$^=bwTLH0b
zf;Z7UfiNwzp2T0ys?lN8|DK`mIB5RC4gR)O&4Z8fXT{XH(_Q4on0w23>XQ6vcr=Vd
zbp9%X$op*Kbk_)iZ6SeEg=VpSRh{>-PZ6Rmc{rBopb{?M8Lrm57Y}&vjsfm%cr?_%
zz;jVN!;)ir$gqC*_GJ<Gu19ux2Ci~xT%nkmuGZEb1GY^V5dQh!(r5aa?j1;@%!PkY
zxPzslUVe;?p@*5*@eKE#huP_2#)8SbjAyv*sq?=Q?e;*qfHJS+8SVow(dQl}4@~A|
zJj1;YMq96*Sf9<%@ua%`;Jf>_JmbHwu_J~FG}C<qfyV4t2x|7+d&|uthWw?r;d16H
zy*y{Q53K|L!u4jH9!N<MHV%f&<(=t1_CRlYnDJmTmv^Sy;bGqOFjK*#svDQO)QzKh
zvB3kq?}4TP%3R)=E(u0URBxh5vb40@olN{4nU|=yvd-YMO)RaAM$iO;jSy(JI`#iW
zFw5nEe-3;rChS4ruVZ7;Wc>s?9gfV~^bGd`m_ylrvNaA*PR_LDhGWX6;we7U?Ia<*
z!MF3S;xQ}OWX^IM@IP-tsw3ojg!~_4=K&v8^}PMF3B4KV3c@DzqN1P_+Xhi-Dk3&S
zO{DkU%Wi0bBBH3+&;$fQ1Z>#R#GeHfdsoB+5LE1lilDyV=ial)$>uKa-;cv4bLN?O
zW=_3#cW>0Kdyt9Ah(sy*>o$9Roa{BS*D`t)fV}^2`6ikig1%<)ZNR?)UMp(<{}X<a
ziRR(p4eLOkJ==~0m2LtKRv(1`Ciz&IZ9dkt2m3BqW%~^><^O~q&*PtQpsCdn2a87g
z-4HLiA7oOga{Ic80lIPKO~=<wfxgBj;G)W6RDx2vk>+ijWiY5jtm$|^tGz^zKZk4*
z8BRuKBD0X$$Q(pR`WV9h-WX}jYbeG;v0mxddpkZ`^Lr*f(^!c1YX9Ab<Y29=e?^Ws
zH;R72*UE)_UTwjrCC?jsxkK-FaTJDJ4;&rjhOvBni`1se=Gx%+iXFIxw;qnGRqDAv
z?|`ii_Cl~dPU`IgzXLoEU=;{Fi0E@Bk6~DYA7wZohxewj`5t)`{V~=-tzN!v|B9k|
z)oVrPTNgB&T;BD4!f|m8F7^Ze0eKuf_c=WdQ_k^qKcW|fgJ2i%`X{fv=n==Uant$v
z&CMlG7<;T~uPyjcF|UN)NfZ-&)ck~#g`f`6a2Romc>qThC`gx|zU%Z6ZRKL2e7(z2
zZ_S{d56$nnzT)}^dp?O%4aZ?P{b;OD09^5tp=2*ot}dlm$9D2Djc~{>hw^Eh(V3nz
zZt9s7+BPA>%B7m8Xlp|S6zF`~MEqxD5#Db?6y$dZ*QvvMd_%8v9xFE4JnLY)JJ`h<
zdk(mmwHQ9*fO<Nh8xt^Y0?&h?#A(+<@)Sy}%Kihf{}8vbbXn~Fsf_(STh}_Jonq>3
zc%|pYJv}#9++42Lb2!s*8jiDTbNp<7A>*y+RdVT9*^=Y1m^_P#R}EK};^;~UPl=ir
zaHf*WS<$4Dk`Lep`b)GmpyX=XMVo^EiYy_kWyo@b|Mm3g*a9Y9)qnPSW%tL)R8#NX
zS9+}6)8m1P50vZqqT_2%u`l}nW<1<l+F5ZXDQ2q4bAWq`fm}v{xh(-#lG#*~3l831
zVk_--1ySd*VpI8R;uvX4jK#7R{iW=FLssB8#A_<z_S>IiT6-z?r4~;pnOb*0*kcub
z!c=N1e*R#2k&gq2rsrJ<|LalNv4@#WsZ)<DUS7zbKcNFy>&r~QC$qc=_&ahpsQB_y
zcpKl-!A|2rcGu<jHV3hL2>me@U$+<Q0lGgB_pxH-`TZX24_gz%-&!3f;|{ZOe9)g5
zuE$}TbvXMR78Z1WAtF?f*L!)b%quTiH)2(0aSW+Exa6^7BVRdx9X>eYn7Z{jQjGBN
zhrrwis4B13kv{sHKk=*gzj35LCpGZorNxgG%eysVHsDADv;mHqOz{WkDjdJZ0DYLY
znh;f6Q`!{ZZ)7Dw?njhqMPxk^itv3<{B87!h4)g$+w?S=e*uWqF}xm&xQ{$xMU0&_
zlWW=W-JNIbOfv<`BIXh6s2RcY@MBo5?prawQT2LJaw+}<UM4X&V_FNI!!aF64~gSp
z<N*}}TXEcg<Sz-CP1fyi@Vf)IA-Tn_9`?tTrRN;ArkhM?-MT-urYaxqK(GgKkm~WO
z-ne?bW~F%D95Ksmtbd7h7pCQW(=aJ$FDN;!%`lluB4)WYKNrsrVP3&fYXYri3Dn$B
z*<f!2tFbl$?9LoN{Uc!RKwq2ENI!fGvkl!p+L%_6Gpn2@@ftnC?^!p7%<Yy)6+rhw
zq@pF#0&puNp$nZvW}0-!tZ;&kC<*GtycS<N=N5O2g2o7Xrvn<90Lt<}r7^i+?zTi1
zQs5a7t!#<5f#it^M2`^W`>9+#etTh;bypHJ#XBg^G9eh><pdv<5PV=j@O*;bYlGiL
z@Yw{fs^BZeXVAOEa*8tB6x|dt_h7Dxrk+?nxn!c<&O}>DTOHd=dPWUBWE$e`Av$T3
z9mKSIf95x`Fl6p??C*5!*ZcPVpHz$kvH(2b0Pb=C4+jK@VSm44e~)ATNPvAV_Bq(s
zfMA#*^+683t6Bda5~HCmnW^ExC|?&Qvs@g-pTMBG&;&{GI%JMXXPV6eM_tn@fqRh-
z!Q4AP$qu*qe51k4#fD-=WOIV`eH^}@9Dp?kPY;m6YxrjV@C?RU%UXkI&RS+LY=Gop
z8?=_?L1I1_P4)X>5~GK!V}IX@f6Df7)D+OeH(2wBF`tL|F*3(}M+kRD+-XyKgv3l9
z_=Oh#6!?YUkG1$Sz@AkaAN8JZU$TJYm>e7|#({dFIR*JJ$6wo0aIy%!Izjq_6iT{u
zWHsc1J7KN~v3}iT3B3%V#Sqe`Zj{hDJOj*o2uSjb=9z2=-Dn-WgM%eFsAC<ROE%|~
z&L|<F`6eCw&EVmjR)c@?-BCb%I}D}=42>B9x|wDP&NN))tdXrkBM5CJBX_hEvP;}z
ziGKm{TOeM~62Ab_J&<Vf%omuV8zW{Z4m8|46R{=3O}oSi$b$q68g7|nJN{e0``SYA
zqa1&`C=X^FdUc=le?lBx)U=>sP162bXo?m^%tV07xHiDXKG0-<dody7{KX+N0b3F7
z4xlxJwfQtf)&>%e@@<sG5i{8`K9v9;!&n<dC1jFiEd3z@OV<)vmz3}grU1fIE#c$A
zg)OcbxR(+Lhodl_VuPQ9@tXwK;$H=sjviIoIx^X02aTp%$<i^)8y}^0vjcQdGYwx_
zv=!r<iT<L!3189rtlBeaY(UA>xR*u+KM&ak+g2F8#Vh~o3MKaa4czcvyQTO607dg7
zW(Iim>t_(%O}4_fLN)-3Ps=#T6mM-@Wb&xe>~A?(iq!=KKIpJ&2&q|?l-mBVq<UIX
zZ6WiHCG{_)p1Y1Ua$d}Nh}2?J5RI59KD3TR@bMOv)y`H8nPYuNj${6Pi*qS^A#dO`
zA>-mN^XHIhw#{@WnSMj2!nHv*Ce8FllS8IiHeh#%{AJ_yRy<u;CS>DmAvc*EvKVNy
z$OOEdEVO#pK(4n=rT-qM(&>@pZyqIyOp+LElh{KN(S2kg;EBl7NwZjDvdLnQ4cLQ#
zf7^f;*?`HDxY=Zq#88_=SFmrBg!Ys)&Ho`Zpi6rWHctkQ@>sx#&I!=XGs7)G1^EGj
z1TYsPpCqja^S@wT#0&$d5&S(QzM~=no{T(`v?5DQCKVZJ1NJ1~5gSk|ssbjj$So#B
zMMi+Fq4ob<!0%8JZId;T(blQ-KjBn5UHyETGz$~UA&F5oiC;)!KS>C83i52yB$k<I
zI$4ah0ecbfUmLKm4VXNOTTQ{7h#8|SNTM6qcS%B9Yi;Bb>r?t)@G0G;$Y-UKh<dj@
zycI3Nv(@HCOdp%YA7t?(SqRu3c|K_tx0w)0TxtXMCZOT^U$}lYVDcoQ%S{eh^tD->
z2lzd*(EeT*xeWdB42SKIc`k5p&JP%f3rs&tM*e;yd(RjCc9qWF{G<MlXCET_D*$RB
z9wFe*WG~=p$O}oczun~Az_*QTzzYeOLiWO4ZUZLI{tgq(B>SrXs}Iiy{65+1kWe4F
z(mIv?ce0S~8f1537ETfcIT3TUO(F%{uOuPd8Aw6dB;wz}ywilpqQ4Dz5x{a}A)v0a
z6fk)fD@@L8zBFW$xDM<tlF$MAcw_*2?J?57HKAnZdy;#F*L}<9R2oulHEyR-fUA%#
z1Z+W8-}CAoslr??U27c<1oNTe@EIIt(k6du`^ft1Z~e8w-w)Q`e{5Pkr;%86<jtKk
zXei_y5CL2Z@M!veR4@Nj-x2UrfZ|uS8?cRWt~(do6R_1q*x{l6Mb=r^mgh97x0E-7
zc@%jJdEDZ49VL8WjPLZ=Skdt~7)>yp={h4XTjThxhQb>{<~)owKC;MaH>K5KvoX>Q
zy|R`59kP|~Y9t>YLDkE}*98_c2+SuFSpNkYU_osN;*`i_@OuQ%`GBxb)5X`z<NU)&
z{9-HQT|~|CreSH_+yLD|(*s}R>|MyE>lwUZf}evNfVJD#xs<uPHP!`31|wM)8=1)O
z3AXx1XfJG4rEF~XP$eBun;<>Wt4h+pN0p?@LS9joH2)I^nZE0Mp#=>F^qB=^TToDy
za_Q6FU^IRvlinD*RHxo^kyi;+l{oP(&F4DzVoNr9JpsOftxl)Sk&A3#=|3Q_bk`%V
zi){eeECQcmF+;$7VKD<OCWvepOgk{}jCvOXO(?Cd1I|YdCX_Y|+x8gaJ59&%ZGruN
zgV)zbAn2{8-k4Yvpu5qW>IaU<S5x)B&hy=oe-i>50-uJl`t)LqKcPxG)wV`Dpx2@z
zeR&otRAv(r=HS|qHl3!`R)IFz>oolz{<VmuRPYC|;n^b{qSx>~b<97xh9V{wS`aa(
zTjy8cppN6bDb71v=W45Ko$FLA=Q_BSv(AevfzNDwb^@nF`hfeCM099vi$u?`4%JWz
zhf?Y!Y;csows@n;;}F@!;;sZ&-{N$l6)qW(Oo*IiiChBivmEBX4xT3=XVQQ!O{db8
zj0Y0(E~QKQb4h@1X@G8-ImePykSdUqf1Qjgl}U>J_X4O)WOcR$UIp-Y3)Bf$z+_pa
zlU2ISsxP?DDX-3s?U1nbCw(>iNv8v8<v5WjKPXcMLotz}c@fjZ0{a7OXn{Id3Yd&Y
z!TgBHutY8e_XR|B{yP<EX8lQD1Ao%#Oc|{bXA(dp4<b!1@M?gKEKp}d;gS(ahe!)c
zq#w91A)+H(2c$WARB1Zi<?0!%W&8QI0Nw2Yx;srPOHM)3Ag2I2WFC8zq;lp)OiPO!
zpzFV|1?sRUV6v=2WYxxIbr}J7kd=;Rrz5ScL+NYcPrA#Hnnxj$4v`aeg{sBk8h{xV
zsB@cu$%qsHY-@>(p^RTcMCXJ~$jRty<4^j*V|?8e=zpTjgzzN37d*o6(Fg~RlN`{H
z1dxB5yJQ0qC=1N74rXWqCX0Xy<_3y^Npmp6d`#5D*=fT_1myy%1*itjCXi7fnd*?z
z1*vJBNuP}~>8?b6Ns?*dT)yk%V6OKt^nV-_l&PKqQriIyOb8T|X&6jB2Qw%EQz}v7
z`!~Y@)df_8-!G>OUy-Sf%x5CUq1PCZzBXe-IvvAn9c6G8<wQ(<i@O$FQ;SoQ!o|my
z?mv_)v#f%db0emKC6Y?pj}Xzp@@(XI^xQ{GyN!~c)XVQ*9V)jVg)zR*L_6Pk^uD+F
zCZE#DAkbIfxbN=W2Wm(=zlP;^&|Lmu{AuAcjcGucwj9S*vBt=(LdLP<HKL6m5w%sL
zO+<zva^3`q53()T>)AOW>`BTakuk?xSh>gJS9U0s*h@k>?nRJPo&ObB4PSC{LFet`
zFL0Jm0i<)A+IG+;fUXVxz<V3)nh{**Mc)67J(<U~!1RgUQ9i;oLV2+AI}+?}$8SD<
z%UQo_+oNYN%CRmI<hTV6TUf^_IDUkkuywt6JjVvd6>TQN0PiCc9Y(t%6|7U~cj8kz
z9mW4FOV(r%BcD@sWiU}?I112S3(`?spadcwC<{;}OJEA!Q3L@UFuNgD(QDz6{u)NS
zbk`w&;|up?rgnrwZWZ1Ci-WFB00m9QEI`$rK-VQ;vIwYP@iC#rDh8&SgUKp|Vg8kx
zj{pS91#}D`_2F1NHe>qW+k41YO9o{P+8UFs+M3g*^W7k1JK3hveJ3OQSUj^*A|{`=
zPE8T_eSQU7gSO#dzl~Btoq8`oexX6#p@u@bLpo*j51tYRoYx&;NFK15(Q;-n*%lMj
zh51a=-z<R<V4~kaK&Rgx$U*B&4ZCnAozDItvI!y(!`bf^GX~7}7Nd#^6GR{a=1(wT
z5}m5|PyggHRwr1M?_Dx2pHR9GgOzt}%pZVM@^J)eNy&9c9gMsOrUIDYlHbjrxrvxV
zHsCa{e-TdyRbfBS=aef32E57qWdn`})S7@gKn_7ZBw*E~0q-%{1Uzg5P6vC4fI348
z`!U#RZam^^1ti0p&EGcQ1VC*GsDt8A<a0pPlLoxkq!aKT8*m2LzX_<bqOf0rt>FeN
zj(3YW0w$`GP6Tuk0d*i8f$RWuOwxe&nW8xn^Pdek6YOCE>P#r?POxc#0dF<`+5{(o
zX-7PcYEf@v_!>~{qybl&JOUQkfU^J}A)t<HBatGobpit}H-$FfWH6@^P)D*+$R03t
zy?E50pA+Wk{U)1$yKTVPfd3(&j%5Px1yC<A;2mZc7;RQMel20M(&1(_QXl)o_rGHc
zLgpKt|Kfn94&b-|KrVnij{P#n{`dg<eC+o*_P06q4XnNQH<%#ct$Kw3_F90BRr3Mp
zbTkHO7!+W3$b9G6FLdlr2(T~2{#(a>k)FX356~z8phwf~cK|m!0U8I`hq3=2do9~h
zx@|uT65PFI`xUm{A8AXcd(`#=ZCB}oMPsqpPoFl&VNb-p2NY(<`@uTA1<*jpVGM^q
zTZd{JM4PtpambJOYl*Mk)?X@Hx!$U6()?X-D=*~U!cW2yKQa&SPk^;wk4Jt5+$w-f
z7H7owZNTMVhJev_JpuU*OzYACy*(nAQ}26VG|i`g>A)<|*>MW84@@)+D0u07z~s%2
zm=6Jm0nY*aFS9_~vcTT~P6x|N@$B&zvK8h7FpMehEHH~M(>Y)w(gyp4Sr9daoY_8h
z0COBb+p+*YqQc+iaO`7_{fPngVeCJ3?B_c6Csm;S35-kxz$XASP;MaWUnp}H>?a|A
zk?tu3NgO9xU_Q5)6<~&e(Kb98IRxfZFwR);x+b5*KhykIpql{w21wiOROB$A_5p!%
z0exvP_ktM#M%(Q)<OrA!0fAy*z5s)H+OPccll=tu5xOqggn#^gyDOkC7YFF>HecZ^
z1JZKVd%VAOZ-f9}g7F@y)*ljABk=LYhCbdA;@To-YQ*fYL=J*IFq-)vH!2}=Q-JQC
z0Nu^zYfD7VE`o^m^vw>D0g$*hfJiz-c6!*Ti5vpl2e9_%7aj0*0JDO?lOm?TX7LZ$
zznyZHByn#5;ake;qUJu6Z?llIz7WyAo$nAC2#G-fL<%PI4G*w7Om+mDo#S5_4F~%&
zdS+`x`Zhy3Y@&-=pG{IZJ<;b)@#(Gq6>;zaqUV;rYBLM*w%31wncZ483$rpnx7@s9
z6O*rgB&PGhHcLW}-Wyzj{vQ^QSq_=KZo%aNUk+G%{hJPWIJgl3*mSV3*(551t-yJ|
z>=?V>>X`o}eRfCj+P{@zXzhQ)l8~b-A))<$yF+3$1jYoAD4Gy4+rX*|bQU;77w8-?
z6N$e?(-qvgy8qV=IS<iQOC4kbVqzilmUW=5_)i>Y8=mbrI3LUfNOz=10=|&0d=orX
zPKz-HcC+dMHLH+Exqx|_HWH6S)wqX7?Sj*hu{fG&omIkEm58u_YzKg8ns&PX`uMvf
zyPDUo-7LOq%3d3levh|n6MEN@Sq6T837M#Q5;pHxHfsEVMs1-YGZ`Y&63Aph<}qtO
zAN#%7YuB8CObH+#rV@`@``fYq3486BvY)E;KQTZAz!Lz}ce<KANa?kE%|+sU*Ox$-
zBK?rd5Vz~{!9Nb3SZUfnhFnr|rtqr6UQt{f;+uRjDmy|SFk39^ML78u*4lt#$ZP^8
zc2Rgn$UJH7@525U?6vvGeh&6!lEnUL>~#`KtIp`I8aPc>cue)SnwaN8w3s=+$N2pz
zoMccb`RLCK(b?c_B)%7DfX_wZ2^Q}K%#dew{jX8D1Qy@JSUX=1GA|&*!s#LNjLq;q
z?0>^vyPoXl2V|%RH$RWP65Px`y;;oEP41xfp1~JaXkfVp8aU74K;u9Ts*s%q<zghB
z-68@nM&h_F1jqun)dqeN+$D*DX^)SWE$V;G^<o9T7Gm&%4J-%M2&~b1qZ9Z>f-Xr2
zoKJ;b0;m0ed2fDTKhT0R3waTJ{MOtX!?@Z6t6Ou?=@@Eh&BK6~uz-W-@(=&<yU9gg
zhYH-qOsGN0<!nA82Z^`eVh~^L5ZDg^Em(7qRStn|5LoIHh?<!M5rH)hfgy7h-aD9>
z2hlTly!&}=xSxlqc?g|It%Ot>q!eHwBBzO@@)-wf9UoooJ;C+pnzH{yx-fFT?CWoj
zi*Pa&Cy9U5ryS>Z5t=dJ)-wxuEeDNtIG}gZp5>o@34N^T7#|awIgy0lHJCSmKY{*_
z$UzPII7aYWkoe$Q4w2iDxT7sN$_4l^Kscwp2=1zcz#8@LSjwkMHYyNtWS>SH&6wM4
zoT&EUI|#BOAyAA!j{uAhrtj%Xt$_28&FJHU>2><@F0dL*`1iVU6Yc!!jisS;QSx61
zxfQ*-Z57>ClWxOT)ZC1v93=Kzj7*kU0-f=EJp?rH79h*fD<kPoWO<fu8KTPRbkmz?
z%WOgw39Q3S7XEGn7avPkd*gum-(iECfwO@G(LlNZx!nel{$zqkcZU<?LW10y7=)^=
z0LPlrPUGtHEIzah<~HmqLxOQ+3V4l8;(G$v>H0G)!(nsL@*nNyZW_c=`ayQI-i40D
z(jJ3MElP(^Z!e$3?s+e5Dy!OR)20RLCJ90Bw*>i2k$0Ou48eP>{ZrUS>jKcivK+Z3
z0FXZ|WL5&uIM@#0rySq;6726o-;BW0$A+^Yp?g4rANLW03jPXW2>yWebrzUGdjD7b
zzZAKj2KA{LPNCe=-HXKg^dd@gkIg3Drx6_HfK&F*gR4(6nn8C;0+NZ5ObmNf@@?$T
zolO0)^-4LPQqHr^)YuJY%IGdc89iu&)3^RR9yptey(-g{t{O~bGzf1+7NBnmDd~5!
zwn%p$axXqgtu4ea9u!(s=34^!5I7zJnx}V50<wuf;08<Jee8dRfV%N^WFdNWjr2Km
zjdXcPE`Hpu(Q^|0=cD3mu?_wKpz{eX&`JwR%-&25nMK$u%bsMppDZ;y?nmxR%+g7+
z8%f?|Nq?gIzrVm*{j~zQ(Xyt&%!Rddt0f7n<D|17eX}L~DVXk%7ADtX5=j?M37I7}
zh6e5J!>Paeca0<=i27NhwvbG3v5db2^ecqbCHEjptuyK8L0GznBniIagu@UX|D8Wz
zcYx^`g|R@ZEGUt2K8(kGm%3bK?n{{uP-e}F2ZcFGndejHi3HL@q(S&~mOlX3f~o9u
zM>H~?kIZ9;zpxC4&!v_BLdazFT8N~N*@b8ly7<7lgN!vk9!AzDWE`(vj0C1yA{r;(
zWcg#{VV_9N;?b`^5i<otvm$da&rhAskL)q+n5yQzVg9<32WBD)YG$6~=V{o8w`@i@
zyl0{pZ|OT*-ZRjNw}#f8@YWD{40!}U?u4+sITt{DwnO4wNN5JFvm~Zl60;l<BOMY^
zhr}h8#2iaP!({~|)ay?oo6C@3TzX?~a>!&mB;J99Cd39yLWd_+BFiB$${{h(Au-01
zxE>u@w$pIhpXCpwNAckfi`?iKo(X83%5b#fbdYr_{VUe#P;^=di^yyU6GRhgEiweX
zR(0vmVpW%J9TGp|^kJNBBzXLc^C~{_nQ+nJ02|r<f19i|t2WuJ@#SSb3>(y5|BsoA
zlK(VrN1~7S|6)sU1iI!B`4w>0%X|{+0Y8J!#7A6~Cw@%M`fcfRwircVG#UpCs5E``
zF8-KN{XEL*<@j2DPs6@~JuX)Hx`p?Y(IadvW3g*t3GDCBgdzAN$g>c54pCKR5#V`#
zK9+S+PQ1sA<`R4iV3l__L55OZ&A1K7CFoUN>Cd6O(mjF1%X<kv9><|u-Y^5Zk3;&2
zY5pF#4gbq3q?$`<Ye{^sR3jLrV!07{5l1grrd2S01IdD1UjWDQvnnuUq;I<k`)jaO
zb}u2y>=JZzi>XL;7Hs8>v}*63(U9LEo6m&gJMI7LvT=78a~b};>(ZJGu<y_JL6t11
z=CGR1vo3QzhqM(xYbAsu1#xvL==$LlerfXgONq~7Txw~Y_iT7r<(YbGCw5u4yh|CU
zA=<9MX_!hKubFTG1C$J(LJG)uJ{gNhWr*kRV{dzMTPfm~JJ{D9?8{&mrc!?kE@r6A
zu5v(cI-vgnia&aWfW;VPex-wX+rhj7CjR6Z7o$pCt<_6?synOG>C??*^(c}JC`q3>
zkEY8r{lTl;x==Z>ho4O8F3BrX(NroLPg<W#&|J<pu(`%&&;VyolYxjljl2pOr_9A!
ztJ?-RnCBeKYhdCJ_HnaTcV1V5iJGkrXgi?b^5&UqOF$;y!MqJ7xV(8L6O5+wH<V~t
zj=vE-PC^m%n$F+ob7`4FnY$>TMdI_hFD2Q6V|PAB*dxvb*c{*?9M)oW(;Dy$vK8Ba
z)y=$0@WP3F4BQgg2W~h-v>9zd&Qt%3i5kv@h?LJEaUz#O<f#NASzOd~1J?>%23>V0
z%eH32%g8&}20t+<*IWRGFlk=*5=&zss=g^11EmLxzhJB~^_JD@d|Ug@Wd;&|1J#^w
zi8O=A3W#XDyoP+5lt`ZG0j7mt#i%*Z-!I|wfIb3Lc8xAsq9bJ99dL8Ny4l@;E>O3<
zfP4rxSr<g;f?gJ<Tg*+ssmfb*UHI`)fVmSwrYFFbK3g-AWm-h_{)Lw5FA;4h@nu?n
zEbc;^od_N#Ig$GxvIE;P$q~O~1A=953l_`NtHqEN5BVwx>|pih?=A+bwM2t+7Hf$}
zzJy#4R+Zg}B<v5yng6TIML5s^sZH=@J%bjLRb|FNDq}zcDZc3VV`jCfx8%c4OAmB1
zr{FV8^%~;y;R}31uR`iG$grmP?@PH0gf0@>IiQUW=yO29i^?j~-oZTTV0M8CUQ<??
zQ@^v%PZhf-9MEn+!Hdc&a~gqEX1$FRrOf$cQ-BNsldQ}oGi<f#0A5wDL(H-9=jX-C
z$85~!=3c3iN|zF#<IfkHjyCHPaJG^JMC5g34`hOu?A7LU2NTV8K;HujF7Im7*#WI~
zF#Ev-mv^=4<Y3l1m><B{>PEc>mu#n;%2FAf0f_Q?w{<>Xx@lGW1i(t<GK}2kC01e2
zf10=Zrx#uBJTLJ<lQ)_V&f`bZR?~14el!KQA)2;dBmT5y{=Gr)ET+#{7NDs&8i1zV
zb_*yh2Ke^*tP5viuYT0n*)p``98uEO{>&)7!j|q@&L5$CPVuJR!ik+0E$?K`!D%a4
zx5VjPET@_(A0fX(Ab4G0WzGhpik%JSV~A_{{Q~<W6>}amvDQQjC{?-*+6D)A6I27}
zW8`4cpsP*T4{B82xy%BU_6=kt7_D@_A&HAkNoCfUCg3%J((2-1K##JE4bR~Tm<45Z
z{IPA5fwM4GYlN?53^WmW5BUQU!LwkEY35<L{uF4r13C;ScowWNO`Sk@I+!D1f@i@R
z)55{r?O^@^b7QpR^jdtZQbx@IX_3*pvI{8hn`HAAG78Yq%Ihz`51D#%+jp9OX(!VP
zKN_U<@N=Mh*(x*sA1YJkP=GE4T}#K=p#W#82{@n3I#X}fhaKk}=ok=ZUDUL(<!KDV
zJK1!U<@-o##bkZ=kZJ8;?shQczyx>QL*_&Wb6+ur{L2Fh?z@LfTL<(&F^DZ2Oz{5j
zkU1HQx=!oJBDzj%$UDe5x=!2p0i<|lX#eLr%{hXtGbdRGdaLgy9J~$qU1UL!0|)Py
zd%dYQZF$ETPjocL;zxa7A3r<JFWdLLwc<ZN)!lqjQM&uNmSsJ#Da}?QjI*g!LK%I4
zR7~1U>r5>NGsD4DD(R-u>p$PAG&LR2YzK5q(r#L3>NuE~gGo!;P3uf;Fy8Ysk0YZW
z0!DL)tP?Ad5}->(SI-hi$Je|I{O<aY>KqG!;00*CsVfZq9}#Gw1F8!scoeTU$2ox(
zJD7T4f(P+>Q{TZXaWM74M1x21deZ=q29H+9$A<cYr}R$dFU6&G{#sI6x6T}oFKuck
z;p-mu7cI@7ASYDh{1?2dtuZH9&?$iK1*A3MQ=}2BgE0@8h8EKS%za=qKtDqoC&fh9
znZ|(hZ3}h7=D<wXn?}}gM{HK&Sbh6B(j=+l^(F*H+p_NJe#W+}ZTwHB@MB2vK_-e)
zgN&1mvoAkvirAKob#OiozQlpH<U@%L0`VJ6XiUVUTD)!t?*y-shavogL!kKi`K1Xw
zV$yMtVjWzBgCZQL{OJh};y0U0<2Ra2@Z~LDH!$~rS65_!f4YP~wD@jSX$PD5u2IC4
z^BtJWaj*{uD)|>k)6!0o_}FZ6@KF&T8p!?ev5a?d-D-3H(XhI0PEbm>DL}W;RB)Uv
zFXK$OM*?sg0(1|X%JRkh_n6%o=xl8O@Ig;km6LB|0dECme(gNs@ljK+=Mx=zf7s!o
ziWeQ|P>BE;Ogm-Xe7s+|FR5Pq0kCbTVze#)oQRx+oQ$+XPC-sZ;-zYAIG)FN0_^bB
zr&q;kYknVu)4FU<a<~KOfc+H=;;WD&&P~W_5bZsiOZ<<&DtQ3x(0Qj<!B3d@kx@9R
z${lDq(VN?PsNrcgcy++9-oh6g3c|&YXw;QLb->lhY#i8jy?xwHu&0Bo0=Wl~+);cV
z1v}mUk==+KW&t)|BgddW#=}?T-Sl3*vT()w19<{$N9%)jzOfTBH5?x!@lgoA2>BH~
z_n<uv^Uv{hyU>fnH58!}uh;RaBGw>}8)osb)_Px*Jf`yUiKqR74;6DJ{5nuf@KLiH
zC!GOj10IOD)uh{<2WCEM4&Z=dr`?2ubNI9vQ15$HgUY3v-)PgoSKE7IeImkpz3E+2
zKI)N^?qhJg<d-v1U&+H4V~ytX{fL&|I^*jxQ?F00)B3hQtzX4{m!5Wz0Bxv%0z?Qd
z;(L(xkUs-a=4trpQ=5;c<YxJqd&h}K&F>DjtAjl=S{hu6?Vk>)n*%y40aKFAqvj7V
zlsIhxBs)@KRdz6`4n^F`lCW1<J<vFxMcNz!sIjvI;HVrwvv0`mFZ3$8^k4ER?|8|t
zET3u))25QE?S5NwIW8&rV_sFHD<SFYj-^ujc~FeWn9!*6M+7bx3qj;@Q!i4hL)Z2l
zx>f9UX@|cF(12<yzz%|o&|c&mvN{_%7g5=9Rvc75{&8?S9b5$aZl!S15?~6zBMz|8
z0cHX^uN2VD=yCHOIF0PvAbK2S7s-9d`Cx`aZY1KCzb4slUJM|b@np#qIr~8Se{mEh
z<9l#ajb%bkzC|?6dLqiU7T`I6*Ot#HwM-O&)iQBE*i%?0g!>NZ1uni!#25&L*r}^!
z7ag7B8-0)AZW^>ssG(sm-XuY{A1TLVISIgp{5+aWdRvEDGr~CB<v1LJKVg1AzCoX6
z{oTZxA>EIN2#v+hg*p)NMzNPwZL-?EjC%{!W|ut4HLcbudvK!bi_Q#$cSiUQe!|H`
zIN>&i;Wl-tCkNKS&j*wFr1+6##Se1L91=16@S}lzHh$Vn@dxrM;+u~c$$M$62~#z;
zrBMNXMlL2uA4Hi}M%E*th=~;pj+pO!V&TeQUWV8KONzIy^of*Gr0POy(=CjR{HvV@
zxjw;{x^VQZb<`b4@8js8b;M_^^sR(aj>yhCIEh3N*!^JjaE>Kl$I@jY{R?slq^nxe
z8+grPdHvqTse|pS#Q6iel#42^Xf(2V-PSO4PGnD-qCpY!qXk?6;3EKkdBj%i#C}LA
zBH0o1gSAb^b{n>bJX=2Agx|#pNtBMY#S~>l%+D4-1pKGq5Bsrx8dbe+)QuG))=$`K
z0N;#lg}HvhzheJB`q~sj`m4sUXQBHI=|?eGr#y)l<q>|@hV`C}dm@;(h<gNk4Xs<?
ze5=p9<j_`{_*14Ji*K?xLHj2Jy%uK*>;?@J^aBTTbpj@<42Cfp0ratDx}MqjFiiio
zOqav*j)V%=W4w**&w;&n)ERG2n>+}A<OCm(5PV>n;0&E8!9TUZw-9_I!A+`PrWFLf
zyKHc0C_Q6x2>J<lAZhBFiOf#om{rI!Cg10@)v?_+i8giFu^3-PH)J8+mkGFJ@xM&g
zpRcz#fGG~(Y70=g<M!rizS-i~Pjl=C1lWhM|H84Kp)Zrg8C@FyhyeHsKn;SP%PqgZ
zbNr>?ce2=lp3mq+q_4`6S2}$%vM+u{^0QWRjj1NQrbM(6KWhq@cwd9lHCh{RpNygZ
zT|?|Ke8TL+h)+C5WYlbw@4N=ihv2-N@f~OIn$OfPz*g(qg_Qp!);BFN`W(xp*aod{
znS)^ihJ`m=pc^xf(&s4vST1&sWlA!@oIZZeWM}cE5es-5`zNsHQ07gg2e8dU`VcgJ
z{ZtzNyvYRrvc-Q6{weSkEPgfEHKp-U?|Ibg$S%bz+Y2Tg2e0Bl-Oz)I9M16<xIYQ{
z3VL;e^wsGG>FVPAvZURx)#PRKT@g#@8wfoMp-Ps}LwdgJ`qGRN2)$@R5PHoz_!S2)
z;Gl|iumSAGqvBsO1=mH)8{l=oItAjNF;$C)0r9;9EN`urD2sJl0(4KAZ8+0-k+bHE
z7ga%Xa%{qQi7{T@vc&&^_)8G4W{Gcx^kYb==ew5f%Y2gq^EYvz0XKjF*Ny?Ft)nJR
zE>AMxiom=LM%{NY1drtSTgczwwxd_~Nq-F8hc0Rw;_Ql~{a0Xeu8Ej;0V?B;09%l;
zoYez2E+ONrYeVK8OL!!NPld4dpX0E-GLSIOcg!QeW0vt{1o#KW+C2V29<_|6KNiN)
zoq)6cNeRDfvhnwXC43gRmJk;1cyQwr2<Jz+<a*o&zaHb?2(JC$KV%Df4Vt&e<TX2J
z)J08-$H&v&_$aM=CP4R`c@kg5Pg9K3I1l6AyWonpW!-+7Mq)(Ncsh*=-UaI!ux*9m
zY+f~Divw|H$+;?Go&u--JPi0SSqj$(+%-;h;+Pa~djHR4Qk`clnGp?5)STzAX$P5S
zEEyWj`IgK%mP}hn#4MTm?2N1Du>`*}#d)slD<->N#5|7=4Zwl;I6y_U$%T;TtZ(T%
z;X9NXg+2{4kCJIRnQjHBOhW+wAyYXEBiAL(^i`8irZ3olBf)j|0`j>H8_!EdmyIed
z*?9e$;+5$&lTH>d*(?SD{+%qeY^NeGTBp)?!Krjzky%GcqUg$q$+JmR1ot0F2p7%Z
zpW#Wfc-@4^BG(2S1+b?Lc&-hY&?Qb1+f31=5wqGRF&OMYlF(j4FPl~98QjsduNXYr
z14sAsr3anSZuvrh?nSf45>${gA*cY|kVQwSNH!IDQ17FwU#kNQF~0?Dii}8FkvB}y
zfQWg>1{@8pw+(on4Vb(lZ<;(Rves5)2-rWOltkNOMP$8os)n<1D&6_W;-h4dP8RFH
zDG5CsB$XtDYmSUen#5bCfFw46i&Fi_7=RbqfEU<+$+OsQvdQ9Mo5fJDf0Bf@)XK<4
zn}qZcd`j0HxiL5i-bZFJ30)B}kJv1Z16ZCc1Z;_nPMXErCXXyO+kj)i^|1ka*nr8C
zc*mrZ#3uE>7K&kj{~`<R>s66kamtRF_A}-AAaHQL6fh9;%`!_y{<=a&zIr0J2WM|W
zQL@jwJYtsH>>C5DNcIA@M#dz~{#}z!_P5!9<G}T`0ejhi$+LgY6!ndmJHSSj{cymC
z$X<to>H=G*(s#qDbbXOKl4U^>*(9;TCeaLBWs(q1AJCqdG>P}4ChzKqxzh$553rvN
zc&QDTJc|!ZI$7KeR!PhSTV*cyEWsUv+=ZT(H0cjaDB1D0u>##o8|uj1hOGa;(<s12
z$WWNJz}bDg>gatOo6POcuyHIpwo3-T=X1cg$2wk!hr^EJ?Koa(9jon#^{Wqe$!{C*
z_glZm<M)wi4De{jSZ6to;rKqlx&^R|K{u`!)B(0dh5?FScy7Qp%(?CXY)`;emthBn
z`j=U!VjKSgYBrebk%7n{M0j0E2_L(Rzk6Y0MQ_K!_+I{*?j#&cvc|ee;u#O%D>N9Z
z#Pi6iDkWB3Ya=oADx>s&kgarmkO}x`fxS~jqa-8vnrAULfcaO?cNORo3u=RrQz8#k
z$Yt4FKspJX3+Rh+%Ch8gE9C79&j#qW2IvaR0(|K#DPOJhd{14+bwJj@+P&P0Q0Dp8
zSQi|vNBI{Ur(?S|!8VsG>l?6DmF8nxjVkGYS|3@69#us8zo?RQmmyPBCE}MUQ!f3p
z*n$=TG828!<rWlFr3jcsU^IU2qDv>xr8@O?LZ%X^DsJLknngu#vSgzR2v8liI-NE^
zZnS}={~NZ_U4cv&+W@k9OYmlkSqvrxvcg<xF+pVW`|&j!FmyxIyBKIgX>}cN2J#Sr
z+-}I{KEW**tN(QjFAVJem%YA@A2$6l@xK7wt7fSmI3izd)&DxrpM^Y{5IC2><M;Y~
zIsxN<Ss``0ZH$aV&r6#0m$6`_P?=3g2M!PuX`=@sYP*~^+3Pg@82+`GJ;|#Ea0AwW
zH={T3ru$E~bZ&wo6k#v8Dq<#B=QD6H*m2$o=aa2-wGFY(bvl-F9o3(*&Ku%9Zvk%+
zzEZ7A+;^yd#7qLOK{^rqe<T#uDK;IMY8|WLN*qfUK_16p0&DM#7lLdiWT#l%OmM?2
zF4N)?2o>Xsz)kmwM9n0Ch5@L9XcJ@_4eD#1Qr9ppRI1Yb`g(xwjR4(SW~L>l82uq9
z|2io@SDY06Z~LN{T?lqFEN~XU5f-QuuYk$2DgZaz&&o^&7a}R0C7U6$tUu`o;7_`4
z$n!e?Co69bXS=8c&IUNj0?!lhC`7U$GRG2`0xkt2IuEu$vaLVquf<<fN*yv^I0}(W
zh-6vd9Drj03a7K8a7ilyZlEPH6<jJrbfjyAT#ue^j;86nw=S@s-wx>0cLQ|qo57Zx
zf@DEX0d(m6-%*lE=gc{1FXz7o40L^*c$@|5@F-xitn$cesLg5`xN>Brqgoqeh$SNZ
z_4t!c=f78y5z+at;PQwWZh<j?6D&~YI02Ip$tHtgmdLGSQ4u0KFPw;sL|>c4q`z#8
zuj`FI7k`w!_-1yrGQT{66mlbCgaf)f0Tgrx+;C_a?O?7*z+|PA1d7KAgBewV;rU((
zprAX~5kO-B)qv<7WK@YvbyztW8DpJEe+j;%yAWBGkZGJi?23@Nq!`2e@0$P$$}|^H
zpJEWVTN5xrndXDJ)WP&iz<7yj|Bvs@UN->h3#bPA>5PT)WU3?dDad8$Nirh+AjXJv
zI+nkhY;ZBQau{3vEG`G!B#YDOTDZ%T4Xz@9S6Cw0nmQ2C0kb`FIeOjI-U6FbIcO*W
z9WJ*ZI$U10cg2iR)ia`3V%G>e;XmQiM;{STAC;KQ?;rE?)wJ`gwjeVW|N4)u+w@0Y
zheV&ke?yEF^BP3>odDtmQ@mWhA$Se;tm0mrGpVS24A^V`DCLc0T?6Mj;+=+EhcPc{
z(x-A*!JoHvzsK?YTG~)Y)K-o*+3T`k9f8|m_bIPBo1DPzs`p#hd_2QDy%#b*Gp*mH
zV5@q5VX_J1k8m8Zj@4MrI@Wc89Jj#cbL&{|A!P7-E)LHHqhW9pm}5vl=gy8uSL;jq
z3iy&vr|$LmC`B@Aon>+CKF@+~0aU|+bm|r;h=2jp%@VkqZm0|aoh3UX7oe|Am88Fr
zK`mWB<Pm%YuPuy;^BvGd380`EXD$z!9uDT>1WXqFreN{0pve;k)4c=}HGL95folt(
zUVum-?KY}2nHhp_@0oySa^mVqTVt|S+Z5V#l)DO<L$+IxBE&tTq<DAmJ~TP~SlcfI
zTmxrI0H;wx9dOS?dZR}bk-ie$A)T%XHsh>RAKEU=r-Bz-%rg117+n$w(}Ea@V<;;M
z=pswtW<bY6KnLBkkyETQ>8s#OIvw+$NJ1ctuXYx5E0~%Vqhr1>K?I_Oob%fQ3X|zQ
zfU2_?=&-8t<p6pbaZ5+C`D}USR4^*}axl{<xz3^ek-6AEg9MlSBa_KZ)6;Ccdv$NQ
z1_5;#6?i_tX9EMiW;)n_w-ay%0d+>a8d-q-)}#SHHX#Cbv;pq}dkpb(OceHpip>8P
z0|Ra|r`v#c0GdTWodpLVH(~!$(tw|s0&ZG$wgFdyO(UR=gTmenF5itud@Y5P^i9*r
z2D}r{90KadcO9}6Kta-gpPC#3o?!#t5B69B>ewgjGH@@u0gK~pH(kJZ&yHOIC`LdX
z$Fh)H0lbzp;AbY2fM?l&4}h&nK%K~hT@LQ`z<}?VGi`!*ftgD@PF+!x4d_k)+mZ(S
z+=K{tjt!U#SUZ+ZUc%l5?v22J@0qi~Xp_<jYZRN5PFL3>Z(^T?c+ZZ~uIQ~yTuY`q
zfH4l>tzv+`i^@KNeb})d=h$x#u+PH2iDN%O_EF2|?EpXwK!yeA<TMC?PE7-mcLMCQ
z_-1J{$9{-o|89VN4EtzP2QbV5ycYn-1<=B=AK}=)A7G!4eRJ&Lo3={b#)1@Iwm)OX
zZKeG`ol2^)HjUcH2O%qQ_8D>n(O%w&z5K&x*orIF^l$NTcci6t{5aT=+ysd`o{!@;
zw8^j9M$x8id@%9={=Pu|wSMdBz2yu7#BkW!g4P163mNU#Ly*-Ne~CC`JdC+AWKOi0
zr@)LMkhbfg$b;DLKuQAnaUvWY+k(-`c{iB(QKo^;ha-`@0epZ2FP&eQOzxzf3^)up
z4{&W}!KD^^FSrkpvP<;`<|Ht66p%4AfG*QHU>Nch_ECQtl#J@wCH!3o2XL(e*a;xf
zJ{S8m$3Dxk|2n`vANyJ*_EB@a11Jmt6auITK>hhJS=Xm)v<(kO9>#t*Ql@Jdcy%o1
zIWXhs8g0WPkPX=HLHw?v|KsPmECSWGpiO{|Cy=(=QOHIB-vk7TfT?FOFM^psAZ@p!
zkxkg|Ee=!?Ck&=87*0lMwWHi1uE7bYY`=XNpnD}i_mMdcXAI&<ZF~(F?{D3!Ai#Ha
zyc?(0DzU92@fi9fM<`-q>AkskQQtDD54ct!k=KfWO$78dtB(V8+e`yXM82+th_>_x
z9U?U#kye_B|K3tTuZTI`f*S(P0<8V{2?u;Ez?!AOUKR!2BjyB~L>TPxfk}K4K=@5`
z^nX24YEqopr>3DLB4^h_MEmv@he#br)D0k#4UxuRYv8LtV_|HLe?@dH*hc7a7m@yg
zp;QOmB>B|e&d?pNCm`_&gC%}cegP-B7xR}eY-UYCHxA5fdq8HN1?b*kH2UL2z6Oz)
z&Ii%0VqphLJ+eAJAhUEbOSRyZfQJCi0{4=GZ2+!e0Jh+Qh)J<Yv<2I|bP~#6r=QOQ
z7{3=^`@IKO=CeeXw<P3f7$mg+7dRvuLm(VLA_o%XzykBm0>9D)ItPr?XA9zQ$NWzD
z4<dgce<JFN_}ekLoRTYA2il5%9_nwy6L1p8{{`j{@;7oAaq%&(fGdE<JZ&{&-~fxB
zHmp^|=MR*on#wdbft?zEr%}7$7$lu+njvyl1=)%e;;a((T0peJrC#Fil3K{ruFcHu
zn%?i)sN#3))visbswESB0tbiVY-oG|GF2=YwH={NTc^mhfJo~EGKSTxy0w1@`#-SP
zt~nNI89+XtL0!$-KdtlszW}sl3eYMbKp}u*0I2V@gZ((v-@T?{A1{45aOIH-NJYdg
zeHPWK0bbS8{xQN<i!%k)QT^F0-12B2h%c2*X!Zx_7v@*XdIL`Wg0wcE$;inBOzfh3
zo^<z%wSOLaGsWM0WZw?^;wmb>*QWpuSeETbwN2@1Vo7{Zs$aGFxWu)w{Du$h1@bY1
z8KQH=%Se2RpF;2sNIXHlqh>b)rpQ6dcr%RuCPVFfQ;^dDBxV?c@bA|CCG1nkP`jS&
zPshGELz`eO_J7(0Kfr!~-au#lDK)CyQLcgI{;q-Z2M#n2<e(<mX;98V;@Nc}@EJ%P
zw}s$_{y$^`e+n+!4{YO7e+@7J;&;b1*WKSWcmJ}1<)Ai!HCT0Yj|V=Jpl2rp&LZQ(
z;A-F-Q&W4c798HmMgB(5=`Bs`kNJ2Qm)wAL+ikjjXYu$41AYw)I1Zrx<9BleeH|+B
zJTsvYT_k7I5#bYWyM-bCpF^M)ezagsME<n|l!W;R0uf7KCcle7kwaj-CBRHDh3FYP
z-hI6DU@ua;(TP+(q{5I=fSHJVCX&jc+Pg}8M9l!4$niHQn}WH94nEx1-yUb<<T#w@
z{I28pI->EbY0UKiFXnLLm1h?4s;4gQ0k1i}gDx6Ws*l-V=!*E5*ou8&@$UqeB8Flg
z4m7yrpe}u^xjG+-_wji|J|BrY+5#>DZZEjw30wqjctYS@8z?@uv>1J7<H){#lt7v?
zH#mVVAV`mdKw%tx3ot&IYSJZI0jDAR(R+glLLUy}vKp+$64mg!@_X9()f-C_!0A6#
z{73Xr_1mlTTL%3mXLAubNbI*T{q}<;FaYM`a{Pfe1N)!Rt8&u!V0o5q0iw$3aPuV5
zy4r+vi~j{Nz~4{0{)-PLJ;qb4-nT)n#o2g*Xdul(-m^iZ??n*lZbafiwg5giF$h)r
z5FF}u8dtw&@j*#2-B5@~V_p9h@I$x<-xI)4Ws>1Ur%WHC7fI>oSdt&1!%x~rkZD5Y
zz<HmrpT_R_DQ&8%+M3d)1uBM|MbJK$An&SscjEJ-7xITHEZ}nh%>igmE<!Q`09n|7
zj=jdg&)C<6sGR2@pP_F?VCm-%_jR|UzXU(m@tTEb@GAH#NG`sjUs`8bfF@9G_5VEN
z3-sz!=`W<*(k(;c<$j8C_qN%@`?L^;Ux8EhJHfRi8O@-ZknRD=gfFE2I{?t0_6vYs
z0OF-wL@B?v&eYbMHf4D;qKq!L!F4Lyg2V{E6MI!=u)2_BGzb?W1?aW5NS}JCue%lf
zg_Y>ffW^fd6IxXAErHz-Xbk~P)1^qSfNTnThs^&hf#0#ON7twu7b7pDSJz0tfUc45
zPNZ)XM?rlPA)8li@PmLZBDg@e2^5fhZm*De1$$*Vnk=i5rDn%+<Ra{oB^lMTTwk}W
z4-u$7tkqvjkk>40=@-IUy1S6el8`o#e#4Uf8%!Ta3v-9Xl<Bfw^#3*+M5FdDf~bG*
zL9R#$qJGw>jZx;eEa88_90y@_$t}p6)|d1*;7hvukgF1W#b-c%Z{B}-+cN$SP+u4e
zw8DZC8D~-E?bxf#lPPm8%B)#&H`4DYW!9ry-vx(BI}O4=v$)F&=1TQ{WwK&N^9Ehv
zd3-t^LuNr(hLgxn|Iut7L$8HM`g`m`^eDRcz`C1^H9j6du1?4}UcE30JYk7woSg7a
zmOn@aBoN8%#)sZlWZqr9NpuVL>@V$bYjzCdqlf%(DdyVdsh1hT&rf0_&a#>8aDE!S
zI7@%2<@^*nan`W92hJKEYmsa5<IV-kIcf@fhRm}Li7z3c+0);W(4k2ro^eP_aY#Jx
zkQix6JZDL0sN4$)_4s-uy9@~c-nfIrR)@qFkkDkf#*)y%NhDryNKAD|yyTErWJ$b;
z4oEu<rPOSH7+sIT9TAb}xjfg<I+fux$7!y0D*aCDbTv9Hg5}UnViDA2x))i69#us8
zt60&cTZzQaH_tG}t|NH-eDf+kvY2eqH2~xN|M@U~MrC2B{?{2pSwDzPitYdD4#9`e
z$NPV^CAbz{bBO4ERh!Fv68Gb41b(zm={&2Go~jTgevDVMcJ);6F|LIS;Pp7bCQV<u
zi$BIyKd-ZTIlgWx2H?`|QE~mjDc)mT%T-7<8?b9(2~@qBk2B&d4;cx8QHX-;crluv
zy~nuX#CwcugmOF#Smmuikja!+vn?0dh#pl$`l~6gbnB3Kd7s6{ARM~o&1XzK;*frF
zn!o>T!+$PBH`C@l##I@OVU&vHDr6jv#vo#<Y4QeQAh!v?vHU3er6YaY)z}|{t+E@B
zD6{xC!!oGOog0{%mw)Iy#`R0SieIx?-&32JTk+>T#`QED9`4~^H#V`L#^dTZPec9E
z(9edp;%80qUBEKB6m-jQs*}2WhI$tJEXJjl#>F4wy2I4#^>Wh-cQq~JG-P&|<v0yf
z{<CnpvpaX~$#MfSk&JZ)5xJ%if2WYjbb7NC@!K41k%OHCw)iowfQuO_vpXEn9tSiT
zkhXmruo$DvZ+9^J9Ly9j+N~2Ys>F&YAoZ#4s@_VUK8WEOL@P&<J~b?A2EO?$*yc{~
zD);H&|0wA~huDrX6-@;fC!^0@S!Z9GyKM$%<7~<WK9NTtGZivUnTwr^z+Dbzx`UYp
zMyIoQpzSt}y7S%=Ow`PBKyv^Emv@J`rvzl84n~`7aCvu_m0&cTD=DEIe<OMbQum=p
z6{(~@?{UpgKJ$ul6B3`#&r#o@ICkfAA(s&k0MzQH``GPS-LwX5M8;wpu)2AVagBB7
z)83Xy6>w7_qRr@GBuD)(CTh42B2sQf;zXW@$gl(=1|l(Vt-zfJ?*3u^Y<L=63|p;u
zD*HQ#+r9I^XsoDvHxf%WQH}f(qtb%~68T(hntG>Q-=yQ*CLW;l8t|2wYl(En=>rha
zczGUKoRr88vj9vBzlu>)i#1v$d=yy-sO%c;t@GXzT}UbC16CL4M)w(Xfx7K6WC7S@
zT~K%dpANS;-CjNyoT~h|ZqeLu6kr74LVzuOwr1>bpXd{SwM^fPxXW}d1Q*-vM6dzL
ziQF^DQf$j4M||yK{sshikp+uo6TpuHd=|MS2<%{UA$k*7ttA?qx3iXr<daBku&V4b
zBw>GWALII!xe*5%ARTeAHgGZds>~QjWqfEL#m9v|W_Fr-r(`r~-?fRk8J}UQcMd*>
zU*HpZ4pKKlhBd{1jH|o+YQQaVK%*ScP09pt*+u0mbBlu+>tJpN6TGH;WtQsWr*U=@
z9MBzrf)|yq%=mqNne`UZ3d+0%LQf&}MGgrnbIA<bX~uz9l{<m28a?07CSE><LOz$e
zrAjJYN`NjE-9(%zfe6l?@9q<M0Wx<&BzVc*X(l+B=!*{MK0v|c-DxH}paKW85=?M;
zcbZ8K<`oBXKNwrxsP^sR4MiDE1;i-!Zs~l_bknM~2*Ab2R%|tR;wPMDEQE(=b5HB~
z(Vf;Y2vZ{36~8u_T_a`+ep*r)O~Y~c(G=ACVVbtLBL1{x{=H%NkEu7^0yOo;1JJZ9
zuz=f&0ls|}(`_2|>PL<24MR%?Wl3NAGo$nhTe@pme?l-WH1*oNoN?l=3^Nm_tzg{?
zr@6!YY~O&yS_lNM>tC4}U{tYeFh4?E%kL8GlT^&CBi|ISQbi`3-IWi`?@L{-pcmmF
zkDwYrZz2yR4Z73J@`D<c_a0_}O8X*m0vN4y4<d<+O-W@6P1LVmG_5lZVu7=uFklvx
z)umFsIXDZGKqkIwa_AJ1SCMs)2%ZInCfmbAeNY_-v=LD7EGRTtPN4b@W)qm;Sx{&O
zI+z9yW;2-L$GDcB1%>8%Kw8W7j_gXx`x4pYBaHwZt-Sv7yUWyTF*l>tsthw2KN_T6
z@Y99h?ozFFh4H_ujFOx9WppX%206|m0nSnra6Xycre1nRMw6}?WqgLr?x-1R%hMHx
z{c)-+UqiN#X>i}|GD959H4f$}Fu`57%M5ogS;ZLge;QD5-|aHP9MHgG5Q_<z;Qe8j
z83{&Rr*$NkuG1RwKO_T;w(&Jc@yyWv&vlw}1lw&!SO<EWZ#52H2K)-rG01^~_shM<
z)H`8UdgGJ~Ga5hY`!n!UE?|bGnBD$kT;0tl6=mWe;WIU6l&!?MIE!`nGkP6T&q6A=
zn|7PA4rZQ%c@B*2rqb&_1Hz1PKnopE0ifV++HEdzFpC|`%V2`LX}9SE#(RwGnPjv(
zpgjC7_cE2>ov1RpRCJeG0^RU6-~zw9wo#q`K_Iv~drV(p=>Ld7*EyhVfPzQy9&?!!
z=z0h92AJUL>@ocu%wPxeCYa*KxRzdk_LwUGY4B)CtvW1dck`Fx(z~0#Zj{#THkac|
z`^zQxT7@qy&2J&^R^t2@ysH(Ot1PG=pw)o1CTvIEgLN=wm$}knt^o5O7!A<3k@u5g
zqPxx2fOrcpP2I45Xi%nmOn>XRKQ?P{tiF8*`8cWLJ?0uP+Lm=k_ZPNhZR6{)ABYqm
zWTM>T(RAY^<Lt}dn4GQ=Gr&3+j)Ma@(3ZR*(Lo@7ugL>{oyF@`@E_n+@(;idb_f(d
z#<etoeWm~h*IEbTac~F+D*uNG4xFQylfbv82z;i+>jvf#@al?>zz;7WQ1V2u(geOU
zp>rerbrXLnosNV5aG;XEi+obriF+P&NPKV7@o_FbG>~WG;~w6@b*s(&M;#cyPf$wt
zU4ZUe)75ddvWzp~_66Yf2I#&q=gAlI-(!{==&UFJ__e2N&dK*)_J-cyH1MABS1uMJ
z_<mFG=TPHc(;FYCcp$TJHv(iZ?UZ@T@qXprqk8ejxPC<yqdWQMYorh<LUtj$kv&Mf
zRE-Vyh+;hWrN(dJ3vk++-*3QaLkP&>`^aAGUt$m!AV>H;J}z27^x->s^QBMclE*Vv
z`yhNie!|3$+=8QY?m)}Q2S~4q6wwB63Ap2OZk@kyeyQg(_5fU+%$9>awYQJ^5bU?$
zszB~RB$CN@QLxkfAK6XIVen$}kz%0N;~AIh{!imxzVc%l+~+g?k4(O^4)xT+!U%uT
z#c?<thr)b<T!@|<(jKAP&hd4hq8Fi8@$)^euk)&kwj!y1bM!GYp1@0dzqE%78u!MD
zN_#(SyHHv=_zc+(_zl2sA#Q0I1I{yD_nV8sW1hAN{3U(-o6Vc>H?CZ&xrjE+0=1nz
z&L<+gz9Qh=|4U);h@F%lTU|M;YYpS=^SC*6O(kcr{b1@H%x?T=PUAxr59KvJg#c}+
zfCBU-xQKs_d;|F(5&3R|pPJQpP1zcMrfW1`qqdy`?&pAiDh)O++4c_RatHHs0>(||
zR4`OHZ5<@fqQd+27V+C;{4U~_mW-X!{$Nf6sDZQ@;EWtUvoFZ51A1Q4q~FJ<y;+UD
zubgF-Pc<EBQ_0o#+gQH>@>`O`52ifvjg%!UQKD;0skV)xe{%YE{)pfG`Te7*_f2-A
zeL0Q3tN2}Bqtgk{fRZb~w*(iVFOgrVz%R%_#QG`@N@tkP4leq=1N;rZA4z~I06RIr
z9~|5PFn=cCN)r0foB>V)dkaJxQ+AQuf&8WY|9!Im5OK?YBH8c1ozEm5{<mb&*qzy^
z3y#8MycI_otQm6h6{2Z&1W~ppfnB<WeYe@DQftOpV6|oxfNjs3A>7VX>i-YG_@WU$
zlRwXbq54{eGjn9b3W=P9UW<nG8NGa6A$oVs_?Sfgwf?kb<ew2TXFL98;ZK+%BprPk
z@jZukpW`cc(I7&f;^;qKKj&3holI6OSqTqs>*Qyp<@oVa8iaA8i;V$P<KGc1-{5y6
zdVg&n*zXYcX@TwV)9eODf1fWpt-~xDitzNGE)mlNN8r-3anzoV`)MSvLOx?8XV6v?
zqH60vn*!`bLIg=clxbCDJtBX3og=21Pb|C`Ohx*95B4JUC4{~zA+_S?E!E3MVdd@H
z0vAE1sSUUka2Em!ybmdtBw+gK5!1p3oB^gB0l%>Uzs7e_NkFffZoHf>k*H~I14aRN
zBA~$EAr+DY%p}8BU~1W_3}ig0B0nRkU{nps79@Nc-)FJ5N@xhSO6FIjyl2Y?4&uhK
zQ$nVV&159Tp^=Pk3{B+~_dm_kB3<U`RI0TlJ`UT{AuijWkSa-tXG8o%OGe2KhKv&Z
z4XKzwybvyJEtj^~{x*W?`aOo~$NiM_$Aq$CAG`TNKD{&b`_39%DG{BBelj@(JsC%u
zRTnj8_RM8R#Ls@@B=mKuqV!ET1W8w+9N%dmM*P8^_%%(yrqEM3{Mm$Xcq~ADGVgSN
zKaKHcN!$>7?0)k4I<8Cc$oyi`!KQ(&Pf%e`WR+FWzmQ`HN^B!c2CwFHzR!X!Gc4SR
z3LK0w#eTq075EvWUlDhD38>6@$!8HZH32K@e#(xlh5Zq!gI-xn-;AuKt4QRUfmzGf
zucqj<h{>*PnV$*pB$x~MH<FgXJeNUN4_ggTjg3L{v&O|iq$;)=ZxRjA!VY|A1zUbh
zi>?UuO0Bn|@Y(9+qQ{{F+75`>HOwE}mB8xbhm+CO<>&g?kYIQ)d}im6GR6(iu|`Iu
z8^{Hfbmd8o`xD+L1}lS?pE!pX@D)2XWR7>@eLak^rWsP%24pAo0-oR)6+%h_x0<z?
zX>A%}qb}<P`@iV1Kd@1c9iUqdB0)Wt;th}AOhJc;Y3$*nCfo-HhjH+ab?_(PLq~OR
zz~teektML5GJFC4KZ`#M_MZeiiO1F~;Uxdav0X|g&F{Q5h@+5o^cjx6!cj=0lT7}@
zNr*DKnc&E|_&I2@!L!hK-7}o7n3F>VAd-qs-6P#lx<|VDPWPOEUscq5Sg`s?{gcbK
znqtZ90bK~0R7)lmK>4GR`NL#Fro1IHlE6{O{7&F<mW*`6AtT*!KAEUDSQ_D2W569O
zg`5v60*nvNBF2FRb{*_AIHyBcgR=rQ{2Hdo%$UnR{6~>YWuJ)YPtd05H9r1CDxt4S
zWu$L0I_fKH!=^sY$}UElmVcUpQzND-Kn=KGsLXCEQ^{7QBGIcN$-C(<lTKx-So{(2
zd%;%`UQY(84)~a(IyhuPIH+zNRH7u`;h>r&P!nwJqvHQIMeQS|n#Jo|hd+R?0iM6G
zL>U3sKdOVnCJzV4SO;Mo{EUM%OW=604UdXHVnX0+fajc(rmi|a=5-VEAEj2$o9T~}
z=K4$YAuN7D&r6#04JP_$l}*^~;6mpBZM@GAQCmaWWZxMw$C6A7Y?|>ZZta<Nmki;C
z`m^i2CrD(mVE$(P-U_zK#Au1*LL48oj@8)II__c}x4~fx>zKtfwtNnk3PY~;9EXYY
zJ6I*M6zs_)q80lv@~8DFeItBIcP2h-i!AY7w%&$R$o~I_1>FXyxdokNK?y`WOfHy1
zK7sI1Ffpc{4%>gB(>fsCV5XjQ$KxlyOh?n`H%%#b>SeLY90sTho2~$V8xyqi{#I(|
zWy<BV%y>F2QcXzA*Zpq(#+P<t`8t{XOw+I?B-&PH{P9}66~`#!KYkV_3*%;#QOokb
z$Px6Kx6-#_H<hj);Nu`4JZ=9mc}&~?Z1C>j+7Vp1I^a(9x-wc4yb#*|+Tf!xKAzy-
znvQW1dT?pdFW{nxqIs8q5f09Uv^9X3+7{BLj5;7E5ug>np319E_l&*R=4YAr8+UYW
z!tbF7hw|vj5wjZy%B%toYLS^7G(e6A7|$$T(iCq8``2WW*)B_JGPpY&Qr#f+jU}bF
zJ1wcxEvdF5V@X9h{KP(HPwl_9tH;U@@Sh2_iI_b&(TE;{lLk~Hgo6}hA9{_A9jpU|
zc5K86`h9*dU>y7rFkXI+n!T2(f-F)N1ULif2*Xwccgp7vD#})HQp9`*PF1T0t`1cb
z?gXR(xZtXpP&!rn*2bFx?ygBuA9%J6=$3}c*aN9Xb|_TPI%4(%Rz=1FZb(J6JC;Mf
zw@#(MktC!$2kCT_Br-|j$EeMsCct`RAz(UkLeeZ!LItPr_aneHCHrAvGQN}n2Bry8
z51c+A(+AO;-<2TSkuK)NY#p+%_`5bggV&UdUPRziM)}9_#yIGV173=!WIn_8CydqK
zI>`P$E@&<IHK4!$3eX)g@7uugb?PYQzfQzWk<7%vtOf63tf@5><9lpbBiO2}$-62w
zlu1{82v%J+4(thZmA323$OqP^^h?zLQf4AuaoP$%(16Q3F=9Rjr~y|W;Bj=7aLti0
zxZnX-E|g7IePjbp19u-u2-nrdYfJp%xmbF@l@Eo;;!~T&c)*RwLWhQ`$S2mR^h<Fn
z-MNyZB#})LpW7rFfonh#Q2|>b=}EJw5XvNr&uqZy0PnW}yV-!ryP{&KpjE_tX_J@$
zwlPWQ;8Gp=Vjtfg(!lzih2-17h2-ymL3B7kcf@>U2`b1k2r9sNNbjRmq@ZmyVs_Y)
zGzEA9B@wVSl999`l|t!M<ZByn2Dn@s@O&FEc||IRvZ=^UTak&nH5;ZRI*T2H6j-Md
z(cgws>AE8q9wm#SHf#zuiPqp6lZ0?>ktRu#s1nL<7ctTQ*?=<vuCf7p*nr8is2VD2
z88I*0EGB_XCkdSgk40X!KBd1MpVD23TvRp*&i~8+ZG}0E_E&5c?c|Uw1UwaKnly`Q
zp`w!`=5-rz7Ptp(z>93a<VjQy<+P;#U$a@<0=OMn=wwn0c>}!;nbL<R`nn6ypA4CV
zL#B5G%i?zKHj7yfrlrO7w3wh{TQ~~nEkK$Dx+3`_aC!aH8}ad*9R3#jPrxkr*Sv`@
zt?TmDeiVmHf}e!+hcqw6%X)<K;@jAIPo`dm@hN1kV^LjXyUjxS46=}}7jjCH%yYTt
z|E|T{4yLum^tPCw%p+jl(fk)%o#{HjR_D2P$khpKbGbA67y+7MJ1xw63zzy=v!`L(
z5Zizq&lHfYz8dwY#l^t22d4|qjz}X97cH|@dO&^K=LriO2e1i19h*BLZH^M~6>yJ(
z)5bHKq?%Jz9pxG#ThOyBr%AttL7IZiCgPj!oY$VDQ9GR)*U~6}4v_;0rV%`dR~;fd
zv6o*ljTP?wnZ<|5bY{TQ*7*YPTjI{K?~U)LtZ%hFWqs=`DBsam7!9$`JLCM)D+zSs
zwqobmIDZxg%4iM_T9A>BS&fiq(ChA;j$n__eIi);9B4vibt$3eEg}3x%{my#na+H}
zAd--6@%m77Qp7xGfeQ)vjK%5bCR{=y#kfoo*lH8F1Kg=3po2~rd0`(P4OO40p$((f
z>l3or!?oZ`fHeQr)|NH_bdeb$m`3y{UisB@p=upgQ=Ns?ylcT2C#Dy{%J1D^+dF>0
z#c!VVtF{hN>sS|=a;$}AjCIW03b6^@c$=@z*Pi1DC!$=NiS~__WReap16hq;<45|3
zsf=_wfDXr3vWjL?(da4*yaC{I78tR>R*-SVMsZ1V0ImVY;j-PZFejR9|L}PZ!T4VX
zPHz^)_|Wr%*iqH6>}En(|Iut7LeH3rnmY;Ho(7!(n;~myP}!wFiRwtF%jIr(PAI##
z>}p9{Z!!0PIo)D(wJc0RMLkSDtKT{>dU{Nnn9Yj$tTy<pObkb@Af@wN3*=$+TB3FK
z+cYX@q?ZdA=@kNWl|mcwrLIzpNA&b41;|7u!!}u86>xTY!~z!qe8J+nT3jni8q`(k
zbk$~?R1Ub-B&FkCD`XRT?=MrRVG|Ce)FE=pQHbO~<W@^$F}N2kPG>>kk`W0(WSJ#0
z7hD^NaLVE@yaQZr9jf6`97?CN+ti~FDQe5#Ww5v#!R1?=j$pziBa%r1cUU6x^wxY^
znCPf<B62$oTG4cjdMvP?s|56Ewa^MnWFEN3AtGNow#_+80wEH(Q`dhQ3^xIM*#dR^
z5-?c?c>wRW8LR-;fdsV2pN!mvUb9E~cFY9nbZEH-e?hY+8sh+RuLa!;sFMZh)FM#O
z^ofAE#}c>;%xMtNwtWh+()yDARD4ONvqM%e0p9E3AaI`r-3O?P1?li0P!NGEFb`M)
z^TC`50i6@uBln|MMWug|ib|)$r|z_ton2A?@R`TqGsgm#0DQ#)b%+$ORg&qI(<WkK
z;I!7S1otYq6l|YC`mpuRqYZULZLiU$E&WoYJw68`i{xBeByUN_o?C+A&SSeY;rA5p
zK2MEMIsxY5L`$tsK6O}Xb?V6m$N$AkZRCWIS%4kE!?K%2Mf9JR+WF|ikXQqWCR}N0
z?-z;FaS$EKKbC;MzB7}!?`3WPgg#BqHpYBM3oVftx&{!@G3t8o{9jC@umN9=u|#Ay
z+aa>Z`jq~ynCCNUZn92w%sK|AI${k>aH=m?-)No6ZjR&hX7nomMzTADu|oyC^8dhA
zyM(rMGFRJ~v}yak7-^3G(a6%uQGX9Vi}D{ckfrh98gHx3VynHyI^GI+;)D{%i*UTu
zI#$~x+O&Q5L0aN;EV2yYS93Og@4SGIAMe`bpAFb}#_#dR#Qk8mY5mv9rytS<&<JD^
z;&w@jHx`Zw6){N10aiwj0Y00Iv@>3Uv;s2`S&HzhAt7(43-d0@L@??~Rc=+x@5%|*
zPqY9>Q^-KuWnZK<4kjT^2L3jSqsf5q5m^hSDFaDIFA+Zz`;mxCn)vyI3QYoodE^29
zd6Q&yD7g$d3;S`p{~MfbS}2=rr`muI0d7V>9ZUp15AgWFfT^J=HefE8?F7`$dO31F
z_S2Bk@uKmmc5En}fYWWjb%2`_P}{4(-2qMy3|Kxi%?8W^^9}*EgI<C3z<x%Vfbn=W
zLq)A3W@gj|To1Sf0kw?^+za5$z<?D)Gi<<BVBRC3cFL=ei?N@TG+?bz4gqJ|cn^bZ
zNxZ1GOMx!|I6E+4<<KlZ8gyF#MHqD2P_C3<KNm?Hbh&)`O^L~THUE47N$rN$Abqiq
zA+{T$TKT+%C&e4(wL>9T&ao^v0B!|K?T7;R1DJ!99cWcU*_Pz1V5TvkwSo0VuEKsE
zk~p9vkj&b{`Cpkm3#coZX{WjxxfZ~DBr&t_|0C`^z@w<T|38yPdI$tUC-g2I#1NQ}
z%p@d`goGFoRJtHYFH&bIf<TZeE7FS~0)ikS2r_gO=^&zXsfq|FNYDRscW1&9;_rQ*
z=l`zoIG&tyPx+p6%iNiroyDkw8R%PV1`|k}sM>J6f&RO>1TuKQvAtQ1#n>FB8sdc1
zmLpxP|6@3w9+lQ0DIhQ4HcKb78Y?iaOf|$wNf>8fH<shsY6Qt0u@M_qvGR3q@nlWd
zEMd#yhPharczYm!*jSZ>Kr9-97mLQ5I1tO~XE^X2;9UI{(q?d0>r5LbDMhq-(I%GE
z*EyB|W*J8SdVw6mmqPYkO<&)jAK>C^hqtFLbupcZ#y}dyGAf)c#nJK{&f4Ovo2kDB
zeINA2>h}i6C%FVa%Y&(Jp)V>S7CK+E5+ZW}PO7Fm-xOyJLgWTF;~aaur)62v+h*Wx
zI0`1PSS>~1&vF4O0kk{%r71wk2)dxR^OQRdeeuy-0ZO|HyVWk||LljOww6_-o+j!t
z7}x5NTOWty0#eRJmRIg!0ujD?07NX=6HFkwK(ZDjUvO9h=@W^STjAHn0Mfz)QUT-o
zxdQRbC1_|aGH*G>1R{L(0tidFE=_bP$vQx+&jrZ2GK!RD8dt%%55{6ye&5CRE9^Gp
zVmkucR5OX{*fx493EgozG`Q=R3*tO7{<CL=Sk=-^5W-PEK!~M!stbrs0N9)hi0D#^
z)Dqh=jE679KxF)g6||ks|GvQ>=d`ggax+ZSwF%bbDbyC|{JDVkbAYx+xLD<0HbDzN
z0{|_S`wvXej{e^YfNi;;J{O@#txVLrK%E!VVo7bsu^s)K({nzcUok<86}4xJrxzZv
zdSdEWc*OK$WpsWu`01$@Cbu?&Zp4W{LB*2Xo?}-ol~M@$s;U1S`uWfo%dybkoeRje
z=)Y!ytRJCsHp{iN0dme&%#{}lQ5&3y`CXr~zC&5X32GjPHNW=|d@o1NS+#(wzJYCV
zY)^^%HAOHMD_#c|<L@!t$6*C^JYz`E*G=T-vFr8rlXg+gvt(xJJ61RJt>6qimL199
z3PO%Hg9{&eF@lR#tfNctg9JX5Be;Qc#S9)v#)-LBE$FGL*>V7y4<CaMQ8nRXaBt>+
zTLLd|34WNsM>#Cy)?yS**_xQhFN3@Uz{No9<YIdayB~4@Kg+}JDt6+s{u*}gJXJLv
zC^=BJg^x7@34;g%iNVy_CC~|i{FEb*L7+J7JPC9LyDB_B=Rw&Qat!6HsnJ$|aH6d$
zZKAv16!ZT8j!v4+I8!;FCj~@P)8GOIFJK_Ls4K^jT=Z=Ok2m$NqF)1jQJHQWKjxwz
zfqo+Tj<*M1$DkGlBEz>ie$K_9EnnS9z#tfa_tCF667=YH;b@M$9GMCEdtIHfKFCku
z=MBoymO~6ok=zariR7ALgILe|0HZX5TcygRRLL|Jz@cbsNt+1L({y+qr`9N6hp+i$
zl4JtQLcc!1M3vs+sA&Qd@^JtYvc4Rrvw`VPBWsJZ8fI`Qf|Crvg;6ilC}(g5{n}=5
zFZ5p|xF~sdj#_4LAs<h0A?wd^CObH>iT|W5IIC+0_r~ZIf(xVGrqMIO`3hbgGq@l6
z4G1o}uLnncGq{jXAh?hX;5h3VoY>;kK#o3%pnvO`!Tm9MmEgjtpJ|jccm%;4m_bCp
z&nAfIqJbR0<Om}6f;%`2V!UYDh%TAaRch@h?I4UU{)3SXqe#<8lyWYCL`8;hT>b|e
zg)h6lbisc53O|n&g)N)r8Vok+N*DGH*qy_!F9LFue&_Fo=rH~wgpN)X#XnX{sWEbI
zrn8U_vN>gqT>M?epBPUUOn-yW6aGp)@uy2R7l#op4u_Zy%OESqU>99AudB1-!*O~Q
zN3OHSs-k!cM`;%jmjEHg)g=?e5EDc>2M8W;73fE}fGFz%;(ZfD1s8`GaVW;h71N;@
zA|k8u4u?*EBVGJea`E?}>8~O(u~dZU5d<IM92f84v<eO4%ol>Z|C=nw$cAxPXUzx?
zo~9v~HN|)0L}3V2HXVjyHxhqh$PDJFX8IHIf%p?Ln~Oia6b^sO;ZVY1RnuX93`gNm
zjHjU-)lG*&J_v_GHp0bWY5ZNw;jb;<?tTHg@?aI){ls)<QX|C%_YaiC2b6<2#3MPd
zQe#=NL8SBf%R1hPW@9#mnt%#KaP0{oF$ToxA+HHY$cF$($i{G36)S_!>p1`^0Lt$G
z#Jdl~b~_D3qAa60ZsZQg@gAf346b};Ce^WPjh)DTG{?>V!%n>4rT}*2m{;7#v|j4A
zQHKdL8$@Z^ns0Se%nA&hxADcQ5|@5Ho>Dk^70eP=X-z*9^j(fy1ir%|syCDffAe=J
zHu!M9F^K9FG;Kw)wXqe6j^)t*`VYq9-6VxgW07$r#v<GIIBx$3+qPs~$R)XTd@>70
zX%EM05-NIu54bCu8TIMv%tl-TKEUx^4v~>R%HML#2S^c|i9saxlpQF&=;tXMf1;bq
z7;<!3+Xz`Ij-7}jvhUl~nYp;IdLV9a-pdilppT201|s8r7>I1eg^U1pKZkw<S(Zd!
zlw5S-l5U($(H+mR23^rT4>?5FSl<YZrh6k7v#-_1H*|R~T|qfKtVL+S-l0+n9I~+F
zRdD#X>fG)%F^vYbc=x662}buCJbmD}$e}I!4KnohGcpfkvWw40rcd3~&@+c2Q-}cJ
za(M8~ym>=I9eYk!eZF473j&-sJ>vnI9Ouk>I+@G#+h{WO251bx|GY*8r_OhIEnPv`
zQ}jP}iZD)fG4{ZC%oF3MGV^c=nC@cZiOqYt*oe%%U2I-)vGMY!|E-rJfHFIfsDu|r
zqEE{JZW(<l?g?aad`VyaL!Y|5!XjMGgT3g&Xzc4X&F;eN7#>v5oA{DzNtxwO^_{Z|
z3d!M*Z-hvpc_kk+lcqRrLM8%`ox}U-a%Tq--RI|Gqq^AmV58?==5X29#VFpzDDWT4
z94-gA*d)5x1YwhVmBV$}A0ts_F*jFGW-%`(acrc_VsU!Rkz?w(l(~=`WZGB3eiioO
zrpIKCum6L+v$A?&IsE?WkcS6Dq(B^r{%V4w8&9TOb}3oQp6VJg{XBA!c_IrjE0KV&
zx>Q0W^dUzum2jCA()XhOo%6Vm9PDEBqKi=hjB@X$Lh?-)n@AU%g4pCf8w$xcTx|44
zE=J)P<=#()<UcVI)rlday$R2bX7=gNREIOiGcpfkx1RXoyRW%8^UPuB?5iSj_>H_F
zw`zpsvJe%KZ<|>q0<d9I=NOnt$%~QHQzg%?cwzZ3af2IuMHn@9F)EHx?j<iQ|Lqc}
ziHl7MY;rGoVfl`Wjp|}kQpYIwk{6cmVN?Nd;%pbgX;+--KH_-f_kW-74bFIXu@$pQ
zoWRyFE5ylb8pjrF#c4Z>L(Hn|cVBQua+y{|<Oetq{T+*gtGUbu52?sAed}q;#7fGi
z?Aa%%`)2md@O7f86UZ#UMEJA+%e`-k$d6o%PP!PC`p2#*B0qGoIpbne@gKWJFCu5T
z7@c=9s`QUtQ$&7@O(nV}g^ZST;lT*9nZr>To4MvmXg?Kjt-?8MM3>aYW*uE3?lH~~
zyVV_jPxpz-Cc)vrIr@vr;nxlZU%wG7oyC#p8Wl&G@wrXKqR(`V%rCu*Q?cv0#siuD
zi;J_Vxj18&<}iG^Pn{>Jr)7oY^Y{Xj(wzTQWL=p-a<e&VQkC4hxu`B*bTO*pVpI#G
z+^1ks`GSkh3obUbvB`Z37L_l%*woCnVg0RxQSMW)sC)?{(a&jQw3L1pr`ow3b+O64
zpTp&=rp?RPe1eTw_ZM;0%gu)P1`lh>S1=NNnuyWWH=TX@ba|ai>C-aj6nt7%RQ}Bb
z&<bD603cSu#T-$&0N?{20idw_D@GN~#ioB(=Rz|NBeCNxkD=>U+{Oa)yBVYnK|Upj
zn9oZ&8s!qi8Ka1N&9r#~o8{OrHh91hqsISeR8;;0Bc@G_E*N>d<(y{K(eDGWa$Fy5
zWV%^1CYFt+$9Ssu=Sasl`#ltqTrv4v5k<Og`st1zZ~TaEj1>F7e{kgTJ<{T`TvU+`
zIUPu1IsS_IE2=Y}V?RK|k{k`F+%_uC>03e$ztk)E%H&|_AbvzwYxvowC*(GNOFT1I
zWqu;^+4);SKH}nRPcFVZbJ#h-l$68I*9g9l66}2Und_p!wTyfir+`zE_+L?`ipcU~
zjyN*ReIA#PkGa^CaIuNUCiiJvLOyEk|3#ouE=El;%6%S}kdM0rD(hmCfKBexxP<%z
zHe%@&ch$e9>lTpALXP9&`tSehJLkwPDWAZB=(9K+e3kHQmw8aNoZaJT%ATH{CFCE?
z1d{RfNRM|SSpu3wGRVEWCFP%8jHISdY?@({dwEOBKe^a=y4a**V^vqYdp~=JE3!D{
zV&vmul!j66<t-_n#70cfUS#C^mUFdhi2g)oMJ`j+WoAX_aw(?P8Ph>O90cG%tX9f@
z<)D-tjs0otD`C<e`xPuKVz4gbXo-*9m+2DnR%{&az7zd$?oDT(KHXQ&K4npHL0nqi
zN<h)qPs_{53y>QokWTnq2@uh(pK`SMCm=c{-)?6A4o1ObFP70J7>xgq<tr<<Bj`5M
zekk^#*ozf4=D)HpC+j0{u+wy4!$BAhMCn@NAoKqK;G7v{<Q-01X*9Os*othIbG-3i
z*_YPk-8d-B)OrsGOZf1~wY1J4<s4JXDz{TsHWyhbd6$c?Wx4u_$YJMrnMUy(Lgz{2
zTbz|<Pzhj_dpomQL1xyg!(Jo9*Eu?JbmHjD@g_$Xj;<V5A|)ku!Zu!2b3_T+gU`zF
zQ}J1vg=mG?|E=VB3xf)r%_?y`bUCA`oY5vQkfv|x^kTN7LlHrHafA=$BmP&vgww}o
z_z~~T>1GDku=SclHx?Y+z+vk+)yvX%*al%c7hAnfGak{va1}>)4Eax5%yG6b-+)0e
zn7@VMyCa-BtI_)&d5GztTq5`KkbTbK`WB!nB=WZD4_(QIzXLA*#^4VdU0Q?Tej4(c
z4(B)KiUrwP4gshdj(R|=Lq$pB$g1ENHYmNkoLPy|8hHah$46z_mjPCxw8H)ij-J@o
z#I`nvYh~l{({7kf#$AiKyk4rj9R5X0z?Z!OzRL5}<bc&U6@zmFPWSY50>DrzNg(Gy
zEbkaQt}+kliR_rE=Y?!7zUWzpuLdd3JE4v*xU}{1l-AH#1>hpkOEe1P>o|IYz8{Ck
zyeWw1({VR@Ep?6sM}L-=*SXj>bg}L8v~4zM0q_gcMi&7axfu1$VdR>@dW<TQ!CGu$
zD6uGQUCLTNM`;rPy2!>?1<RIkscZ!~eB<hXO$P(M$@9&PfG_dI*wUqqI2+4g5ZQjo
zv4I9rav`5#mK=xK=*7pQq8hF;4;R@Jz!l`L@Fhwvd>!MtGH^P+m)?>OQNO0GDDf51
z_ydh1_<D~1WHpFmD2E7?C<e-8R>T#f9RtqGCL3Je$l;%^_Fr+(|Fb-w-SFS&aOU*&
zql>Qr_!#`Ouk1uT@UgkOi{Vc$h66DglEd&xW)<a4*olwq1^Aif$qS$^FKnyms8Tb)
zzcquq=|}!-(A+Nj1n1YiB@ggdk}G_)+V8{3eq(z1w#_T~@`WB;RwiEqm*wbPQH!rQ
zY`|zh<sx(RGCw_M>o`x6=6_rA$wd5Qvi~>u!G*Gi|A`j{2_G9c#AF^RuJ}b!lQH(2
z$)dS)##3w40*uAl^gYIPS(=35MvhS!^PgjBGQwnOK6+wl5_<13dW6o`9E*^PrAf#u
zCOT!CkUv?P?AN`5ee}_$Ke03^_*>}WZyf%F&1Q~|k>@r2EoEgAvTry9p2ig9U1&5^
z<V3BVohw$Pxi|b4V_Oy5Ltx39>a^Q}?HGUQbpSaHQYb{;ypn>Bq;S~s%hFN|eN4mg
z-MMLlvcWNyU{;_O!Y<TlC-Y&KuqD1BdUlDuxleGoz6>{&*_IuTAT}K;T#Uel3f>rw
zar|vvIi=#x#%E8f_LY4p&2-JF1eeSo{CY|rHaS!g9DBSx(kB-@r{Oz7_}+@|t)}lp
z{5Ey^e)3vVoJ%-$I?4CM$#MV|B2VGOMxMgSc8>7?gNWa)m0Aig)JK33Kx}^eLXnHz
z2p78vPua1w8z`?tS<Dm>WN5D^1#FHp{xdu%m{n9u=7ns9vw9A@9c1`E&i|ou*~x7U
zev;fO-RZzlqTM*yMoPkoONm^||HMhi6DOa!WIB>eg^!&alb_0z%8=+37u`|l>X!19
z9LfsHlTj8cqv%iT!$|S>H00IHE*Z<ZBD&*S4z3%e>G;XynCRj!-L-q@cgahZrsAVA
z@g1f6j?5=v@SjR2`#;J01E&K=<@RB}0|3Iuf2y1u@WjV-ackEaT%#$6aI%|Yh6{QI
zmt^yiB|8Bv-FIDd?KyM}bZ59!XdD$1-QilHsn|>tc8ovA2&P2r!(?eDhT_EL811<X
zL>~qnz8p3$i?Z@WnPb$OWp!zD`1{aVC5OL*RB0ALU8|gZYxk+?m*+`|W;q==%6tR|
zyNDp1{Kqo;KJoDpKCH@(p>o2<9*((Up|=o=GE0<hj*IeG7v*`PTxRA%S3!3+y4FCP
zK$%6?xt94uY-YOHi7HFpeD5jSj`~l;P@r)&oR86h9ELUw=VB;k#Hk3*8vUJng@yQ>
z&0b*yZKCT=()M}c6R>oXBd7&l@WPkii-`U&$|05+8xCF|G2wg@&IO?FIA#N6u?dDn
zN^&^#kfl*B`ro+d&&fsKhW<zweZ$n(MSzcUF_1j@GL(zKHkSZPa?!V;KN@|}9U_6^
zU7X<e;@|p#@=vf^#_=h~at@lxNFt*Qb0lNutSQwPi!->qMJw1ps~A|24GAYjduX2E
zGOv%Lsk-?%(w!7OYQoV{{H^4m2c*xS@1cW5gjlaejpTp)6CE)DIV-tiO)E0ODI1TB
zeL`NLQ*^}ok3zghdfSH<@h)cn&_6iL<CDVcr>yn)!*~7swLZW2jI~wvUTP~66695~
zMbL|OS{{0}lIog&4XPH>z$sf+IKTOq=fJ||Ulq1Sn13ztE^YoLUfJxl5HDhN{v|HP
zoPUX}kK?bUHq}R7YHQ--xzrZzBd@pB5ApHz?~u`o5wy41JHOc{zZi1IZ$&;yVMF!S
zO#7svK7We6k8~mG=V4qBjoNwBKDlAY-H;k-!BWN1S-Nb?-=xcZ58?c(-PS(DKd2CL
z*|ut6Sd;r7csb<jZH+^GJwt9s2S}#|V<Pp4s!`4|p(!}{l|r=0Hr2mp)SMOebCr1|
zB2_wK7ly&wmxZ%i)01<whCJY&s^#^@d(MD%TVjZh|HR>4>a(3JHuQ#la&*Yu;Kx~c
zq^Kbx{@kc5Cvx}+j$K)hR@wHh-@K^i+j6vqT*)Ys-8v`8+e}xW+d3&E+iSZ{u66>0
zr1WI?HQLqfp?z{XWosW6D1A0bl<kMOc{^PF-0oJC7TLC_?eVy`cDni@O2`Mk#j;!b
z1Xx*$7Lk?ZvF9+WMZ})0|6PwOxK9_{GV?DUAWdv8^50cHvsR7>!9@y((kk07cHCWl
z^j|qzL;m=@NOr5OunX>x;az;Qy-q7*^(yh#k4X>l@Q}~h$7O_Mh2*<YKq^8P@0}17
zwXVDcQzWqQP#`U$B!N+-etOEmt05k8$dHf#>C?WV{b+-xS+17biWa9uwk`kgSc8KZ
zuAtI}70GV765(nyYmtApx0{t+qWOQ&=9BU~kGkdKJC1UNc$Ud0{W?&@{VTFXV^<&j
z78a&OwpE#YJ96(M%ZJD%|Dq=?v#z?dI0j#~w=J&arBp$`^*W!-;|GmD@XE>x5rgY@
z$Iy3lZnsxOm({KI>Jb-zA(IQ2lDfSr+bSj2tM;InE4mwPi)FXeZ{*q%H8PkMr?<dr
zR*Q3(2IS)*;fdQHIEU#8wm=_e3e8k);69hi22Z{fPK#_?cPUj{b=ox#@(<oKGGBJ<
z=4U36u{a>xt9_f5jfn5jt`$?Y_~Z1O>nHaJxhscwa$7znti<SAyKV)AC{C#BZM{Q$
zWjVin+`B&a3W!CChbOlBNaaF14`1IYEYG3gPM|equ|nIjvZM%TgptMd4gYwNnq+J2
z_>)EyxE_~en`Dz@`vTSHfWz$5jhHDYMcA^<QkRc^%IszJ8MC>gpN)29$AG8I+7BV-
z)6pW=JsYjfxDTE(+nMl8w9>hleYWXov(wL-y}ka~Xk*Jyc`Dkp_-CS>824;$OPhCp
z%B;eGXL9Q~^;xqwO0|B<Y|7iun0=6oS?mw5K4sSJS+j^q&qf<+w0p|zEF0k*9gy27
zd7Py`WtPh*nYH=ZN`CY0=%>s&HF2$Ejei=GS^p5tzIv?BQFcLI&H<bK|E#QBNAW=0
zK<6l4Z?pS&dHCEql9jb4%|3pH&wZcAd$Y1CHE{ktZd+DX(<aWpqt0e!{SoW@yU9;k
zS&gbY|E{|`D=Wa%Uw9@ft3{ksZs60FwtjqO$bAo=&ResxermLS{3MualX$-1!qLZB
z*(jx~tRFw0`cyqyHI?MA2bJvXfO*JnTkGR(-&`PY$h0e!!|bb7pY9IPdPJ9H`;EGu
zVWL6omCibLO#bXz=7)Idt(G*=Ce_NP74WI8?e)x~eHU3$oBX|(*7AN?ZPbktTKCz7
zwa!z@YPSmI*N)!{)Xo+ts6BqAh<5aoB3hsEvi8~iT3U-=>S?386wrFT;;U7=^^Lmm
zxTlttP*S_TBt*LyQbv3API2vegs(RDaB)oy3D>@QFTXbNqX4bqlrp+Dq*-aLUUCJk
z^}IkWI_odBdac^p<5oqrqpy2v3zA;Yu8%9EefvQk?bWjRwe?f$Y5i*z)n3W-w_2@x
zDeZAwE$#KV@><^yD{5<hEUwLXD^z<Y^sZX!h>td|WvDj!Zjd%0GE1EtlULi&D4!PB
zyrlN9d@1c4t%z2ocv0=QVmd!8weW}Mcj(VEII(q+ec4It)FVl+sQ$ASs2f&KQm=e`
zMQw0(l6qwM1ND^-SJl0jd#Hn!XR0IP2dc@jBh{f-HmL2}%vK+d`$(-mq=Pzk@W<+;
zjvLjG?}N10zxqb~{MgUx&n0y=;O`IA5x3u0BX*Ba6Hbp*Bgaipm#z9pZ4o|B?HxZ`
zEpY4;UETCXy81=gdFs0>Ppk8zd#Gibty0@9UaKDI&{0i!yQ_Nt{a)&tsKsiK*GRR?
zmowF}euk<h%vbMzxLh?>tyT-hdul`8wyV9pXQ+q%n57!M534IWPEdQlq^qBn-=Wsq
z)LOlB_Jlg5ilYA5^$qp>z`5$umAd-<txwf!9Xx+gORgWb)yGrTYwj(hom_BRHIgc6
zuTA&SmTxSojn4W`J#fC1Hv9Kr?Nqx<>Y<DXt=r1HTK-qPw0X^gwV>VsnwCFHT~ThE
z`d4IiZQ6SUw78`n+M1Z_>b{F#tB0CgQ|H$V($XIvSFd&|q)q8uP^(wOTkF8+Kl4j~
z)~{}mHaBl!?Va~e>gwk91vR5lh&H@-er@;p<7(=i7%lkd9ktTFvf8ZoOKAn#l+r3!
z&8M{(URLW@po&)JH81Ve)Eny0B`RuZ$4hCGTNT#|{^qYOU6EJ&?uAfo>xDCFpDmtR
zo@M}>C~Nk}OR86;8)`S-;@W^gUfQZVfm+SqytQ_(m(oUG)PpqX?5*c_=+84av3c9=
zt@Zj2AK63C=a*k=`1Uz9=sfF`aYenc&PVI`#d`J155d~@=~vZx<&UWYd<tsC`u?ph
z-6Crf*ZOF)8vLf7i@&a3UGS+|=S?pyy=`9YK{YR}=B~@?k3*)bUpN0z?b!H{+OWeu
zwaG|X>)$b0YhB@IRcm)m9m)`nE)%Ma``k~<4Ea)5YuP-sg4aB>##{U}`;0^CQY~0J
zIANRmzEVI_&zw}>E##&3{L4f0@(t8}X4K?O`cn;l@2J}2K#(@h-&>oT>ZzSQdrqD0
zeNjDCIY8T2>|3?L-CJsAQ6H^z`$Ou;UHjGHvsSCaZvCMKc9ON|_I_G~{y;4^vyj&B
z3;n(-1>e)2+o3<t;KcSM^Y!{3AK6Pk`0?**@n$>K^|Mc@dq)4HZclxr);o1lU2^)Q
zI>-AvwaD(>YIw|Xb+2u^8aDluy1l{{)%V1H^=#+uYSruCs0Yh^st(bARn_J%YDu@R
zt4m95P;a%|q<Y@?R$amOCcm1pSsh>DtQ!CRZFO<PIkm#s73$bfPc8YNr`D;#PF+1x
z%}^Ki%cD)}?WOI>ETOeMyjLyVb+MY&_PTm+#agw^tnbv<tAD4?$@{C?<=i^ex9UbU
za_>R)r6!xyhc~XPTEm^{^r8FI<KD~EBDW5z>a^Xe|NHmUflYo_>&`i*j&FNft?Rcy
zop<+~`toWIZRE}UYB}E>>hMl`)VEIPn^dV{{<!CM=+84av2~HT{706$FzBMX^k`}A
ztMz%b8I4M5A3i*<M%)Y5t{%9dJ{WmbU7GMn9o~vJzrP)<?Hep<e<xj3^V@>8*>5jX
z@8$8+N_6wqdN#_h-BrD{LNiCG_9_8dhT^ByAL^xbTYg_X^ia~;9Prn^tN)u?^7OCj
z&AfhE_0B%JR;%iE^>i$=yNskYjeo3Gy&9m6s}ZJs*!7fJu)DwZo6oOmyVJ$AJck0c
znFA&5-G@J`)o%Z)hHpKi+JCyJ)-I8yu6-P&l|6b(-P!LCwda{U+T0~a)$(`BXnz+d
ztWAFGr(G;`N=>VMTV1m=NRy2)t)Y*kooahXy*AhLwi>a1*a4qBvfguReKoyNH`V)m
zn7XKEoGPyf)Bcp(saw;UshtBxstddas>eN()aaYd)#=yTse@jBN&TteRCU{fRqFg@
z!`0pIeX0)2tf~&`n4+GqSyT0XW2~BXvW=SDV4eEtQWsUb&`?#Y%~7v!8KPeKx~N(r
zvZeY{=W6QEgU8g+p(AzmQAjzpZ>FjqsPw)%d|(Il>ovcq1NT=}JAS`VZSOlm{jGX$
zwWog}wO6HfYT~O6)btO^s1*-Ks?(Aa)x&-IsLQtZRgEufYJ+RVRR1n>)n@Bjt6gVh
zs(S_vR4e{4TOGZ#pZascJL<p#Hg$d5H`PuHJ=7lcW7KLRJE_uJ_K}Bt9_ybsRVw}=
z+g^W8RDWK9&A9q=%iIZoTSw2e0{V08dy%;QJO>{D^yfPIU;kWxzSHmZ?D;<_(KsV}
zN*Vt$)&K2)IpXR6dd5!!(Et6CKLtR44t?$lKz|;C&IN$}Tt@v90Q&QpcG&>*ob&x`
zQ)Amu58Lbi64n2$K>rYj^Z&n;Y3_0Kf3MWJ0?_}(zRnGR{%;PpTmb0*>S%u!fd21J
zw`Tz8|1|o$CMk(4BwM<hsqXFo|G~fR_6+}T0J{52{C@(_|AjvOzX0g}#vuDY0O<e9
zDF0^w`oA;H{t<xwpXT?fcw-K~#hU6Ss=Leaf9Y3uTW0?q0QUd?ouZrcbT{_xf0LN*
z&cXFx0MOk!y8kl(-M!Q6KLXJI+j-qGT3NSTvNil4(@OufL-7EPP5&Q<|F5H)api^%
z7)xzed_44R(Z<hRc@N)nqH;ZFNuE=9D{Tu#`zFStMA_EpIZ?Thvl`DSyp^^EqgoSX
zM5w2vJ|}9gS<|zEcx2{P{W&49GJk?(tnG%+dfSaWg|2u-Y0AX)vw!rxEXzUbNB`s-
z0<};7+4ovtu{dSoTe1}OiKYat&phdyNA?T4;weRisY;N2_9@>B`l5Jcq6cq}Xs9X^
zm(ucjtP;fUY(*VORDw<+AD^TI@e|4JqV}xQzL~ls!Uhn8gP<LlYq~#j)TTh^!&4EV
z>;2FAzAt-)0>D1|9Ayl$k3R4F&}%>}bshbSua~TU%b&L>%dTi;;&woGpiF$5H>zZU
zGVuUl-A1!`?u|E-<hkfONT1w9nb?!yOXHNF*9lrQMwwU;P;LwUbaM8ri@uqmL6JnB
z)l>;;2%Xeal<bo41+QR=w|?|x-#}T<(3G%zo>G$!RAqW=52?wI%@pyg+is()9y@9z
zW!!PrD&#7)I@v@CD-H-hRSA1jc3<$E$*JLvQMEqLuf9V)<mvX&*L<`5rgL6v(nV3i
z)@T0i8!oq+9tJXI*#Df`A71wztq+V<!uUdVc7$bdN*F&B)}%c(VKimS-GPA2>{)*>
zP8PrDDaB+qH_A5gkYZLgGs@a$-|)ShZ}Div7t@Tg6Zo29t7gXHruI2Ed^^}z-|+RF
zam#m6tHpd@B<AlF<9|-=4{!NS(ia!N{nxmEm-y}olN<cPcby}2a+clyFW)g<nMssm
z`d=*2r}>L7SjQ9>1<Jhb`!Ki$C1U}OiK9lFB>UVuzU}R+@A&$D`M0l4)@Md5>AgK{
z4d~MJwVt*HGvNK<--cJAFC-}GKFHlf<pwkE`P%eavC6S`q$g3PXv(p2M71IJke|ri
zhvGgcb`Uj{oPSPKj{QzlBUVX24T>eIl71Dsv$>KUhulq+T08(nH*#(U3M<OI1m&0~
zQ8$660X4GZZla)g2nrExD^ZIjD(PPnbu_5vLTkn=>Aj$*n<~e4g3?VC6pujhDN&D@
zqcDP~S3tGheNl5}1B%C>IH)P<h3NkQ^h6hGvXF7|0(1p6P(U>=SxFxbDmPJ3WPxH!
zoN}xoQEw?qdKPGYWmNtNbyL)we&q(!{cO4&5HApPAH!fSDW9N&wm{P;$*ah}Yo;6%
z!^TaNQplhv&7yRR!hZ%DZ=!90U+li9IkN$U2PkGT7=6fnF2mvj&_u?tlv0Ckpo#@e
z6;K_4cM}DLCn!d-P`4oZaE8TfqHR-^V_$*FO;L0D1%($VdNU3Rk#hi3WhH3FK@Wpw
zKQ%fIy~r{$&wUgW-k|72Eyj_%6|F9Ob<o_SHl;vk$=yUj;RA{R^uSJ{b|mMz(4(AE
zFOd5P#>s2YGfA9cLE|P0iaemG&FK4s=vI`f^iXAL(=kRl_BYg$yNQCr7Ze2&j2>&)
z*K6@ck4n&damMvMTw%0}H?Fg<jhdfiTz}tvRF0bA2Z|6-{6N$-u||(DqVcJIkFoBH
znlmd<_=BR9YFw|()m8=7=)un%M|}$le%~?503AO#8CANOah>1ta2Ewd04Pc(8rNSW
zcPm<PqF;iq+r;R>ccioBR{Q^$f+7$Ua~KbYi8`7dxk1$C)aEL=XMt`i^hlD?gRe=s
zi-IBu6c>ng6BJgogP`IYW>Hh)j2>HE<W~Ftn1Ui06e&b)MXnP;(+hfvnjEJlt2Cnr
zkN-rylx$r8l3N3AqM!%?#b9RS9ddq=-2I@x(IX$ZKf09_DDwJE2tC_Ywl!|o!Z<tH
z)7JPt)IK|}-{pd5Z@_w`8g1DaHy#Py0AImAGt}?09+hFVJ;m2T7qm3mMq#}=%{coD
zH0x#K>^k>TdsaTb5qg0%FcGX-3!`mqu$>_Ihk&Y^VVuo`x*3c;E5F}}!gUBP>XwvZ
zwA}=l##AN(nqr?B=6AW+7V73t^ViTF)NwG)^T1XTYM))e?}EM-2b@0}S4L8k7C*9j
z0F8xKKyEX$aTB#?74#depWOyzE5dF%wLAzGfvgtTExEhs^{aM|LVm;b+nUn<l7}tw
zK~v>6``yTq&?e$X7!s7*ouThESNey#kIIoV3Nt4=616WVHfhRjc9@YfRpoXg(6~9e
z<sTG9Kv5)G>HjIwOT;Vv!=WpqmD@Ji7TE+;?D-?vY4$$}?<NY0qM&%tQ0d={XjzHM
z?P;J`(@?p+#Qo8&tUyr=6ea0_pU8E4ymGr6^qQ*le+V5;E%-`(<dsCFe`!#;iGrdy
zC{7UVgiExA^h_IS@=~mFyS$6seJDzR;(VNPyBN7XZmRV6Vw7%#SK$5WAqh0wl9m3R
z$lXM#MM+Tfi&Of~AgUF`gC6+^UZxglE^@2=e@sCU0g9`!O8?R1T92p?psS%`989Mc
zC7|u8#cpciCJKsDpeVw6@e2LV9{+Y(qJGRO7UurwR#u=W4T=ZMjb`Lpj&&gzG_x5O
z+#`rQ++4ZsgZx&qayt~>O%xPmK+%^u!3#bj?^1&y)F2&HOWhYWXEvZH3yR9D7pKX&
z0BE{{rb-i~e=I1<FoP$<M^THqpmP%iMLAH^AbL5XwIFIcXc()MKd9UkHK$)tln2Eb
z#sjxkBcm9lL&^C#XfDDV(9O{GjF0l}qoAk&im9Bzrx0ZyQN>Cg2AW0G!p+ew|DdP{
ziW_mpTt&7;RgX31a=#>MMyyd{Gjw{KQ6k>M7Uhv(%w^qh7iG6y2^2jwV{SMotZ2nm
zqXaj3qkhti5(A)?+)We|l|d0pH16U>6(#yva=)V*bMwK!mS~hH28sy@Mu|bx!c7zu
zRX|Y;6g7$ZEj=*@nhHJVzNk5~0Yz0%EN5J_BIn=e|Lsr-o@<_{Nzh@S`mVV#x3v2x
zD5`<tm!?LEZ-{P1t3;2CpcW^fZC&J6`~R4N;ssD#rbk{P_krZTndrYTS9rTs)SX0Q
z?o8;&=0=H+K;tF~it3=47;ls);}WGWC=x(5mhmynMeaTnH9+Cr!l*hxmZJBk8Y@ol
z4%sqk#)>xZ*WstbpQ1h5(`{6ao>3DN%Uc*L-X&_E6l29(9#XVQ)IfMQN4NZgq82FL
zOfps+BG->VGn?FZr5ROQL06_2RnLQFUxu-w1H79kC~AYk^8O*USVJuu(i7W=elgXk
zI>&v{t*k&%2NWgAxew9OsljHV*N49kf0yxb+C^@)|Bops>Vo1tIrEOZ=&qnx0j=B2
zsLGwI=rquj1l9jIYEFNks0WG;M4L?X4;dE`(9Mh!g`QcSWK<0Y%>vrF-|8+3iu#~<
zhjFls==Z5f9Mp>19r<t<-je^1DJWh9#oZ>xx-?IzTHZvXE>WxX)r`93WvN<^1fwo@
z`Ko=JWYjI~J}Ot6(Et?TvBtVB9#5j(jWg=@1J#G1S?#{)R(7Cx2^2MnHj$|BPz#as
z>I7q5Lu#=s-dMLCbd%DIbvz&DE((fBP`pC)-bA&cy_#sOTLLOiYVwEsqFeDm(GV27
zG-KTXat<W-1E7haHoPdc+J!h{-D{wk)Xb>+jzHrc3W_LDw51k&29X`5F+CCnniZg%
zCu-!L=$2nlGy+8$m0v_HS~NB4^4UYR3~Dk4RBx!px<&A-nj7m{!n=urqA@5=H8s}p
znL~CIJ^@+xH>irxGd2Dd6bdMQjWX({5|v$V-FA$F&5BVsAJIqCBdtMqINqo`3Avjn
zwTK2qJt}{SsJ)_%x?6~Ph8lQ-%1u#o`UOQ3P>iAm-w<^q<6;D8;>dj(Xd1AZU7!Z{
zlZ<sAA$Jo6MGPn&llwiQc5h<T-AlB)peYOQrl>jnf+7|aB|-5HIXB|GA=ZnQ@Q<j?
z9@dLg<oOw`m)%D}p@KqUj<h59XvJ8!nrM77woZ%-H%GVpgF*wv>Bh#oKZ&}Y=&w?X
zHk@O{s@0Kk5rw=pJ(LVOH&IZ;f#PfC#z<<>A;wr&ifBqBV_hNlMa`KFDB?j85vv^6
zJ#BG2<CTo-k}d9{reyr?VT((SSB`U!FRpPjC1aoas2n$=sX&pa91oOjahp`-IL~>+
zO#@YTP`Np}<sTFYpt!Co8D&5*B38-Z9&g+%C=V^hO;1oVUW7l?OgWz7J_?FNP%Nbu
z`G}G)PC0%JS^*U8sfC-PTmC`O3=~u7fmKAG3X0Dd56eK2N-a7tE_Orxl9Y@Z$lXLi
z(Hs<c7>(6kqLheJG7i8WZ=z&e_*YOQf#SVrC4*<6;(nzjv#7}pP;ti~ZWT2-0h+q>
z7@uIdi&BeZPz=+YmA9hJh7X_y`>Dl7s3mt3mE*ol@w55rzeF3UFYx*~xv7!LOZl~(
znsN9CcrH*<`6V2!W0G+=kNc>s&1gk2B8mEo$CGGBRpapcMC}W04=VSgcoh^~<BU`u
z=+I`x8Hc^7!Gm}s^-rSz9&a3;0-6KK#^D&`ZlctpH7FwJ34W|1J4$G*k=hY{gkl{2
z40LXaZutd88&IrFG!Ap?KpO~}mE;~qZCVk%t!AX=0nHTTJKaY?@fs-hP<b2C3o#Bp
zgL;5s1}NMd-SQ8L*FiBc+BkfY=)Dq*)V|atfS%xzQELhse!@a~CD}+lh}=yS6mNiH
zDK+Rvl<oAu45Dv{H4gJIl)Iwl^y{Iuw##k(bUlXb8`1NjWWR&#&&3$2$<UHi=VR!o
zBqMb@_}oOPMmwr;Mm19FfFhNZBb6%Xpg8aT=vG#sXb*~&L}%-%d4T2`l#fbM`H?lP
z3@gVRYA}|O!EXk+ix$=TieFFR-)U|Zh=1pQbAAD(M40$mXJ@@qf^nGZ1731%97aVO
z4#0OQ+ZdKIHv+Q~rc50v(_ErY2Srnc$Ta9+PA>dXk@g<wMnn1J;PB6s%}vxXbUQJ0
z?=odnvhPE@5yazZ*TdXla97-%eyK)hs=*VTjwQ4&OXw=HzYMBQP(Ix`90MIhmG*(s
zP1IkT>E#?Ot?hnq`VG_zF?9-31+Lms#c6IUd_St=ruaGiQk5=ZjHt%0U=Le)UaYmN
z7c@53(0a?Z^4;SNtt#|jb3^Oz={72tpV1W*H;Jwhtq5q=K=VLj-4``yR-kwb6n~KW
zeWJIAP9yr!7-JXT9VkC5!PwOTx(AfMBX<)8MK@4XOEk3ir6*AWKyw6CGc{wE;lAir
zR-ot(igv1@?IG&c1ViI#rt<!A^bDx(G&Qs_$j3J~c0Go769vWFpg7ai*u}GT*->tR
zsv-Oq(7f*=cOQx#ptw)8QA9gQE%+R`d@p*UJ~iQGRJ*=|_GDaeZRsvbEqa3D8YsR6
zg%xc-Xn1O+{6uIi7rE8`Kc=AQ<u|JE)J`6@n72}msp*m}W=acVY7F$%X2w+e>|TDC
zL#NJ?Z81k+`=GYCnUT*vy|>?Jk6M0?^P=6>$1gyiIsu*E(u}D=m@Y^&@>PTW2>-ID
z`{*tn;uimw-nRSq^&6x+f?a5COkGdV9puv<5QUPAsXQ#_b})gyc8`8MCO!2G!DNEZ
zrXmwljC{wSFHwmlfU$UY(fbebI5#aH+x`0c4c4ay5_EN{k?&9FUBK`?$CwEz#?(6h
z0+=@+*gXb_if9CTrG=62E_4+j-h_^4jQkGeQ~s$_sF1s;B!xH!$xr4t-h0|Z2a3vw
zpuf>gC80MNBg5!6cY(0{&U5^FB|j<8fPKerfL<kD2^=IzDX%3enS3;n!uMbUxgJP)
z8(Kw{QU*0wGWXCvjP{!FJjfNe7W%SiNB;nPQ`)~yQUU|vSF}(vZ$f{KS2B;|XGBva
z^Ah~9=1QOsdi}8Do6{-5_~Qfpd?`2nb`BS3iTG(qd*~p)a5>ywX^>xrJ}6lUT!h)8
zBqfvk@+n)Pg9zV*uzhg9F;NMe1$`Hu7d)gC1;!}!79>02HY5D!@KMc_z?W%1i~Uu?
z|3v%G$lG9l2fi!qJ>cu$r#t)w^m@v`<BoyjpGAMShYt3Ole^km4fd;`4=0Onfis&h
zM==XaP%?SZLW)0C<%8jrS<RG8KiWg!e}J|G-WA%9Q-wP4i_qtb_9-i&+^b0W0vM%{
z9|P8J&_L`*A^#HpgP~J_$A{}F#h@Qcz}p(+gxBz@vx-aYp+o#C$UE!}hWORjo85Fu
zr`&c@55k`(d=;u%26t&>$!(#O1%&6P+*3|Kxd2Kz2wc9gld=k)FW#rT56_Q+rEvEu
zurxg1OU>kBF=Zuic|IoP1=<C?0IJH92`P6-udjy`nxZj!G7;P$4?ccNnK9H)k<;zb
zL;XtXUCnRThnwK;m`mw`M^3QZr%B>bj~GtYN&R{IfFJyy@EHll{$jiZAR@^~3WPpN
zH1^j+Zci|hHbN(%zXAO!@HP)A=4Ml4|1qePY9!r2e+_;-J*6Ts<8d?t#WKx|Bz_e$
zrUnL6QQTy|H`H&e{%NwYKLT-H`~@Q}+RWI$5C7MR%LkS*?<N{aqu~=V4}<TIyc+b=
zG$Sb=^aOFaii|1N+}K|Nej`-8Az%xB>C>2zgfpPs@Y5a`V~Fz*emwq4dGI=hVJuvn
zZTHVYIETiD(8^%`8fW84!UJamFs=+0{QI>34qq5&d7v`9C!pfd--11#B*jc5_+|J-
z1g{1kOH$nYh=~U5VECVi$IH-SKBt{mE!eY$1ENm6vA;gTYGkk*<JUpB9m<F6N$+9&
znD!gc42%~MR8*)2?QA9XZ$f?t{U71?!jFQtqgMpIJMj0RB>=Gw`5W-X;EN?0`vc&+
z;um{+mJW!q$x3;4$&I!pDdi(2Tcf+pmGV5}+b9b;_j?<)gN~$qVKb%t4CLP=DNEji
zkAwz5AJcvsdMQy^G7GvhMOpF}de!ik5Be+gICNoiWy!D5@%Z7n`$jB~OJ4D`>5blC
z`4aykJe>4xCy`hU9sh25n*x)xHpllpQ-7nm`XBamDvUH2K8t@bk5g{Zn&!oS*H9tX
zC8-J-Hw4)$P`<Cys6wi;WHaxYDh^`4@!F_dva)0sm3W6tIny_Klg#g-cPK?E9|hfq
z-KWr7sY>}mv{Rw-3*lFg>0xLG{A@-3B77#iH}qZnPfk<H57!aA@X$FkH#6<TzlY}X
za@L%&ADbD({Aud_?xgRUmFZ{dO>@2-N3!(undhX&=9O%(V5k2~vvLPmA;_N}LI)ww
z?>6e$Oj&XbK-I}G41|GDU--391Nt(ViA*+wyd?BG`g@_np&ik`L<JhdN3@`C;V)7#
zA)iYHM^VWw*ssOU_f%jn@&ieV?kMR2Cc5}{`Mxu=Pn_?rk;eVy<nx#retE;m?=aUi
zt8!(`iuL-#*=~&yE7KNcWjmW=#2RB|@;dUDq$O0KJ_zZfCCxzQ1HyRtB_QT0&PFN7
z1EBTDXglp<3~;^O=nv$A_h1Z&(aw3Yd>nSG;fo;u3%(8f=`>{tTkl45h?4?8h6-&H
z733BD<tHI%Z?4uB@&e{4-eZn-Ym_(7G1{%ZuY1p#`2v$#6|($k=4={njtQ%x`Vh0^
z6?ykTBUws}Pg0Jts3g89UJv0RC331cwhaC+UK3G^_TK1Cf?ni%;A}(^=cOsf=#9i}
zw2wosU>^ZLEtPjHKu5vz9qGhz*m>cv1$LX@J>fS<QhH)B6bjM6FOO!4F>t|L556}0
z)Z*8eqko}U8B2fa1E-xueW{SRd<oO*nij8_C-J#vEeD&mcn!$Sh+LKw7Xzsl(7q;x
zFQ6(sr{VMp(DBL2vChz*=ruxrZ<>-m1pY_(x#;(&{UH1$Qfdm%={lV=n4VZ3!5|E7
zBAAB4T55WdnrX<{o+R$R?u35DoX8gKY?ffWIqWUKJPYUe7nRpZdtY$c9}IBvR>H43
zrUyftt(@auS+iy*&E9uP`5|h6Yy(IKzHZM$BZ)Q1WIaHq@(!5^0R4zcR3)QERN`0K
zKPKaEXg?z=guFERZ$KG5=}Vz9eR&t!4!>iG?<nPEsCe1Pr#O6(O3Z>Ef&(#Xu9-_y
zpqXi2b1Ag=A03%UQblt?uqtNp56w*GIo~rS4KSCs9{6=kZ7~|GYz4JS+u)W{UJ*b%
zK9|@K#GOD+m!;<=(^@IKbOHH#_`1+{$#f?P{Dt0Y=p{h+A}@mda`@%YBG3)-%CWBK
z^+;6obUqGA+{9~luAo=|1-dnnv&ykL1n7d|cIZrg_)-9~hIO=gQDgP_5;OA~W>;Ez
z+s&oU8Y4&fT+;Pl#9iI#c!oK_t;|Jf#6M4deN)h-sm9JRl2qsG=Ely~ph1boPWE|q
z%D|tKr8<A589Ui));R({$3v=970OnqPIc%4_;X3dPR2qV34S#4wrPgi3;CG}NruW(
zn{^K0pd1d0G&j^Z=zSCzhjmspGj>iuamF}qmyE;u2Fwd6b$~9wxI0RL7&pYp<YZ&#
zE6@={OQ(HZs-eC|`}r1z+8nwWdw$ZsPCR;yz&c@2w%m1iMT5$muG2c%Q0?&hk@HAU
z9d1FXL!sMK4D|p%pI&Ficu;hwbv9a)fynhhodtMZhgJ>b*PyA*47C!`I$#|PpC1%;
z;1i(?>pG{1#`Q>@Fz9^zd_;<UpbMKiv)e&7Tn^RQPy1N(Qt`hE8jjvFPk~|rC|Z)^
zmw3Mc?Mv%5qO*govzzEYBd>>5b9fbRL-BSI{w(r3^kiZ5UjzME<T73bnqJUw5e3u?
z#X9rw$3r)DQt&q!`MnflCp+*u6+y))lIx7=o$t%u`A2nWs_-qJ$WLPw2Q6vR@qD%=
z<g&>(H)$kPeABkN$-iUKCW>PJ{xAQMvSPpXm%m+Cc-3p<GSf>FlXf=g?`R9}%S~D=
zT2Z3VZ(;IZH*rR<XwtrBlv1XBACrD&((Yz^8&kh=8@tCH|1tW<X8S19uP^j_;w&=t
z@0oJDNeclh8eDd>eX=RfH2r*Q(i>)+PtEqGrhY+_?q4P*9j`F_+ka>~N2U|Zmcgb6
z5IE|4!1Q3XuQxs0%=T&~A}g~D)6YuNZY6s53xE4B*27JU_9m@jYF;quXQt<u%=UCs
zj=lroXjATG(!wSUGuy3P`<Q<IG7<Rab)0GKdG7fS3PZ_})*(7ne6ex}#`fv={L99c
zG2I25^ktK-F)c2e)ar^ACV#*TAA-L9;XVIVc*gNQv-zrt;-X1!M>_!=HQP6uVaAy4
zF9<-_?)%#dJKRtV2OVoRdqC~8ANV&ctC@x;&6awmW+#(=Asjt?;6FMr51IvlG*2MR
zeCS`yYreu?D<1k+D5daDpUBZ*3W6RZ)&)l2P19vtFxvlm=s(Gcg~SZL(iX{JH7cQ=
zS2WLl<X@qI;tP9_=3>w*c*#PoFU>+;3O+yd9olO^J&BeVdP=}~_{e`%3q=Hfon`^1
zAL8QB_n-nyCGt22{RXp&$R#L`M@6=Satk}M0(PO$M(9_8UU9g6>^~`5xrtgIP?td*
z1{%Kg6{%rX8Co2-2Wihku6!mDY0cn@$RlZ|7KZ)%Ea!TW<v+X4cR1iNy~y{WqJw9k
zm>2#R;zht)Q&5Zt;UE^d(E9{l&<SQyD+8b|{3Fwk4|*S52w0)dZ>Si*@tsb}s{D=P
znWo`z(~+22yfQ7avMHx9jsS;DM`f{VKszJK5wM!sKGo#MnE-m4{FLR6H9)Fwt_2|;
z0i#<eqJ$^V<jO1Z5}wOK*WyWh_cb4m1p;d(vKBiKh#7bZ!~v#fB0A(}@MFK=5iq-D
zf+<;GYP^akp683)fQIngkD`cg-HTjBL~9OHR7WmgcZ@9%7=E<3M9+TIGhmhOXttKu
zblmY}%*d1|#W=<nRD)5kPnay+*rDSZDN0G?k;sd~-$rjD`Xw4G#xCUJ7%ZodCpK1;
zt;k<OKbS7r*HBTo@QQp7{kzb7P(E^utiR0e;T<re&T)95H=8cphP)`WF?tsoDaPN>
zZlw1Le&54S2K+HmnD4y<N`@-qQC~}#)|hw1%>K|jV6^@XhTPAH+y$VoFziPDzrrg3
z;~F+{4ef;mC9o%SF_sxnfAslwVPp{!Tuz`Sc;1WrFzub8oWG3Yw6_8X?^uZ(g#AHi
z_b5eq8#)*MSJ`II@(GyI@`z9dUJ}(ChS3)|tB=#(P!(E?s)<)?6d+g$^rjKKEP6L^
zB3>tO8M}S(g9&D*y`AW^3wZ)&>qCIw4~<cHN{6cOyPzME-Ya-q2^?Nk5}65t=6HCU
z`UT*jGj-#`w8(HgHm6-D;zIatAV`I_!tMm}<M<thpJgEU9C>B@AB8W2KUpF@-+<Ar
z6yEb9ax#iTe`K81MsookOF<u#!&>MlA}@l@q<uQHAccGcl1fB+30joM?;x*BZhXl!
za;NColYRlkycXfv{?L!k9L8VMX}f|iu>@VlM<6lB5~~-sd!gf?f8dK-GLZ+UYZ2PT
z840wJ%di)nc?QZYpvdFsiOt9avh0B#XC7q`{9{4UW5c8r#ZM{p=`VYhf53=3U&B;J
zxf90KP>#nr_W~k2QF6X59XS->8vGA{TneT075f(f0o4mDE%9{;-=aQKaPu<i8RTGp
z7!Z)@brgs8%)o$>EfnsoMT+I`H599(SdVxCB-0Q1Z}{zsqs<iVG4i3r-2fGXL=^Up
z*>yWeDgt?PF=!qJ229eGF*L8oT_xO%L9Gqc2lJEAV#o(VN5c2P>>c{817SV{ei`i6
zkfu1BgyQZJdSTS&E_?&<i&Z*5cE3ZT@H-s)o7n5<)gJ9!9JhD1`v(V%D5Y%1*$#|*
zLPN>vN0ggj{04NhsMU(#fa>fmPX-6<4s}4;iYxA6MmA^A*}n)0s1T~`g#CivP9|bk
zA-)z8z)tiLKEz~t0lI*Mu0e+Z@I`1K>fED=yi1uyzf6MOhnB=#yw8Sm8K<DTz%~K5
z6|h?_?6Ko*N|Ed*jKU%;imjouMZ&ZX2Hjrx*0t;&p#kGSo*63oQ1sOveD1+#Ws344
zx_=OB0rIb~9SpymzSL<i0A^AA6U6EZFZL1DXum)`cx_bVN&MI!h6ZHT5fiT<k`tiZ
zfMz(J7GhZ!6q#6b0i{fPEc_PeL3sP7d;z-)D^pPuD@Pl|K?JG?-6NW3=MN|ryN?hx
za5xypVi<i01TjbQqcH;d6I5(I7{A7R_&0zib{qQ`c4BRs?tqw|S-L$<=@TW};<vO=
zZnc8eNLBiTdD!B=Pf>0ilWg%PGn76d&@Z4t&@#v?(r%<Gw|KlDzAbj6pi9z~KFr$q
z7U+FQ`zqSUKu@7hk>Y<(QTq6L+Tvr0HyQn6FDtjENH#q_?4<KjIMmBI^s1QG((ea3
z`D>S)?SGqhDLyLelw10j%$siAy`A!Tp-!4=-qLDk-scIf>XcVI@AUVF<JOffNoPRD
zsvBPfq<+wLFDrexNQ(CbDI1gc>twPX#E;M$1Yd}ZbuyFDYX@C`oYz9c2a|ba?B2(Y
zRXx54dRw3c@w*;*(H2UdQRt0;avq64i~T^xQ~U(;)>y20XK9$96KOf~Cc#?smUG+{
zC-R?5I@>)9IH{!{Zr-%}%qf-zo^k5`XjXKTc{juIcZ+!BFG<7b^ZX#{1G0_K0m;fO
zCR6-1c=mAd_sQe}^d_0Ha>t)dQ*I@rcN{tq-bRLBKu1!kLdcgRFNOU~^qI8r>ob&F
zZ$Ss)?=*hO<F7J$8L5hX>vIGP&6|XS&0EgV=558bm7S>!<g+pHuYVpVt!Un`?Oe#o
z+e4i6(XVEO%s#d%VvQ+lY*=HeySF2MN&1Kiut|^qu%*)H4oHK^q&pepA+yIIJwf|D
z+9x1qM#e`$7bAZKxv0RvbmdkkbSN2f6F>eVGQ9v@K_zCQ|0#aF@n=K+1@fkheoi~_
zn{^zlLSd^p3(g&IW^gXhnb8LG-kLSqUoYsCTNSd#Kv(nT{Kw`kH>;9X#rB!EW35?X
zjTyZN_r=7&pHme70kRY~r-kAl4BeEhj9=s-#WhJ$#@CdjxZl#0@%ND5gYxKIoF{rJ
z@?TSx@yFp;qZdKD)Iu2_gM3^|W&F?30q7q<zA;ttuPE%1w@2;=UsIO+;x1yi62Xk7
z0X97vX$;<W;QbY}Q!<=rwt!B^spI(3$hYA26h@QM6#r`Qdoz^rqu>W)mj^wKC{!x0
z8hj^DDee;}H~!*+K=%^-4*W!67lr-1=t(UVf364k;zI%`!qA#UYe%Atp!IFEQ{eBB
z>xWQo$NO_HC@u!C`DovbmsgRWqn+FLac^O-LGxqZ9eK}mWju2*t~7oM!vBfhSnNf%
zZ9(OYoLa;&%KX1|phyG-W6YnmE-n)<z0vMP4URz>tK<3n&7s{As}0Q+|GLQM)6U1A
zajZN3qM+5VACI1h@;!QY@pBOV8_-sVKLamnGZVD);rRe*{3vJw{9;+uq8TU_r5PC=
zJ#4WXQjO!BSz;%r7{^)7ViQ^z8B?MCQjCm?v=4#ym2I(Gp`D<Iu;aWE+p?vRkq6oa
zdoDm@AJTru(-ylg%{U$n?*qRWKSEv(`Az(`NB%Z(UY2Zf?2JyJXoWXE3X0v0)fHO%
zqRqA|wjEkWuvVdkk<TXzXPeln=pCp1%M9aqD)K7ucK8wK@sUw%kru{rHYu^2;F;gC
zt>DW-Uw|J*d+SsqqYi#j(v6HN4iue1v6<G<XjKCZD}3xl_$0LE!Z(Gc;-x0mgOIbD
z#O@?|H1eLvo5P<4g@SztymvvKoMB{mK`)cdC}<M7e@FWd=ruzBcl<6x&SOu<+5J(A
zH$kxx?_bi|A1`cMV!uX<i=^1$$XT;uGvGJS-V1NsF^GK=y-?(H@WO9`$4+YrO5{AR
zl<_xo40bz_d%?5z#oj}2D}MjN-Vc5T{siiYpjs^p6kP&r%_gK8{qIOp!hsgX?Z?p2
zG^77)=$kDJUfm`oe4J+7_JMMT`*vAbN?4p?+`bCUMDGD~E_%E)HlZha_U~;0CH-zU
zggu^N^tb<I3&_+DqBs`^CkQYf$Je1zX#7OrRqzXOoIv{+^r}K%!r?$1UubFYzAP!>
zcLFlG5<G+-c%62JbwUCB_&|pdcrt#vpeOVh0=Fa4?~-BM9w?p?7{Tu7_IE_Y!)=zd
zgz7{-gk~KqI0YvhfmVeVDRIj;q1?+x|8h`o^lRXW4(K04I~{iW0(3iw(~y4w{S7@H
zs_6d{d<7yOgKwB_+^zyGgMKV=0|V?HqXNe0o0=Q_@4|0Oab`+I`uBn#3eTdKP#Ql2
z@xxC^^e>4#AO6e32c{eSKc`((E`j(<(F=rD$N$^VO!T<Hp1^xY`u_ya({ugLz;}eN
zgxz%K&dHu<big1zJw>VYv1Ci`l%~`g;bBYe4&|mvGHc|TI`H+Owc)3>P-?vnEt#sU
zVdf>jpQ_Ya3XMj;33}=9Txus@&roW81^+pEFF<>|tkhyuCrio7nx8#w$=?x=nV3Ag
zg|g-cacMp~V1&MhAZ$*PKOjg0l-|VIdw^mguEoY9`6AA)z#mId)-;C?CulqL>fuBL
zokIKD$VWg+igxVSfh8Y+XG4<Q6}>OeyMdf3lUxURH{>Ufvvsp)&0$Qed4PpX3sW<>
z5Qt<nH7FbL<aWppLtlemgr`BIbqM|eNGIc|J&4LdyA$CY?dS2#l|k}us0~^Sdk@-+
zU_TC?#k|&c=+#8N9{ElmyT`nM;ramd*^(vir%d~xzqO=X=r@9&4?hvRNzi65lNs_O
z@Jrx7p`DBD<hk%%2qb&Z?hmCq*K9?;2D%x&BxqgYT?Q|UNOJoOWler)MeN6lTM6^%
z4!)#tiF=hXBjSwCi+RqEhkh@e;kiE@ei=LuBX#CtEv7vDUHG&_qjOIlc7252CHT)&
zqjODYJ>;yDG3iZ>&I_QPv~xQwrggm0`9t^}#3_MZi+JM_#fa&t8J*dY>oJARPaON1
zJLo2|V=p7EG-;Z-8>(e~1Z%Y)cdALp%xjD+bGK#beQ91(SnVCn{iv^bjUhXBQ|ujk
zgP7qEvy)7dK(-mWoecXxH-eZ~GF<wE_V!RNQ)50SV{QY)sPLze{~BjpYKeXg=wjNF
zXlEeCEH(XAiZwdlL_b(HF71cPRAx4I<%t`_2`Xl{xu-8;?ug$u_jwlLie_fr%#4TG
zYZeR$*YBF2OI9*3lJ@&L+Y`*)U`g|tkYVPwvbCqPy(c}%@0p0_&pI=aV!j1uYjEF5
zHZE;~o}wo`pcTN$S0!Q^qyIj18u^|B?nd;#<H4htMI^>aCgwZMxYP@W8*#w+ikT2=
zTzZ6jE?s>YM-LeSqChvyp|IGzHnMu`CG#_DtEVSe53`t8iw#WuA?7tuX-DGp+#lx9
zdDkqQmAOT&A!H4$kIg{81DHE!G4CZAm+}Dk5S3^NZP?Vf)D3!!48Mf(YVOY7P%jcX
zMIyIo-vp2TrH49#0T}usC`2W=WQh5!nbCOwd=)Z}gx^95AE8$lUV`36?+e=Jnq&EI
z^IB+{*~3;>T0L%!ac9OnX=Q%szK$+7uX>z)YL(O)^j0aYA!ALNE#~Fr3ji0hu_}O*
z0rV<B%fr7;A8P=w2~e)1V{VbT7$dwi=hC;(l~j(sR!pCGL%$S;;SU(_&F+|@WGJTp
zHK+z3!C>bCJLV@Q#ozE_kaH^`rU9kpCRX;W_cLd`mAN%YYnpcwV$7?_%jQ@(Yw8~h
zGV`|t5oRf^F|o@W?baCRYfk%7=4EDUfbrZ=%rJm;1h~YgzY4I$WO@Rg9}T#~mttaO
z(bpy5^HHf^p}#gaF0pRKoa48_FU>>1W1F3M4_(YOrpa^+n&Ge^{6l6#K4<`r>mr|v
z!xb3*LFv~Zw*Y@*u5TZh)7SzUYG!QB4r|O<3F(#tYgAhe)&gji&|2`U09Gltkx>Rf
zc!aa_Q0RFQ>IlFe=~8y{F(1%XC86DEk3(Jsc^Zk#Cc#_W^@)imGj8t2L{kCogvIP)
zaO{R|MPUzy%i!6t$J{3KCGZ~5pP=26jn1J^F^a`MYjCzP*FkHvTbWyf_%m}XoOX02
z{qJu+iD->>tIw@L^qB7=ihow=QUjd46*v&!P08#VGM@tQiez{cV27cXq0x+yEaZHl
zu(Nm;z9F;{^cIP15%Zr1M=!NPk!jI6ABr2H>!2HOypYU4g^I!CO$jMkOelOG<on?R
zv9|`RHLsoUB*|LwtcqA;!Aevt^^WmxO&_aItubIN9F~D~u5gy_$;#P_%<}^5B$@so
zx{`zr!Vd>%aRBn%Md#!28<2~^evt}@bHHn2{;%baqx5xWGLEKXqECx5J=nX%*ce=5
z7Ia5m40?c*A|F}^!%Ok-i3DbE6l3*mKJz4HRpewzXDzIRZkS8_MDwIq)61OyKbj{?
zD^sh42h7vGHJEai(Asv4Aj59}#!vlT;+i+c8^EGVD^r1Rcz>urbO7xE$Zs;J7f`7H
zol@{5VoYZo^h3dihW<3<LGe$hK_&KMaGcVK690jto5+W=v=+iHmi2<W*fGw1h$LCl
zHRm+7rg>iT?03nex>X9RWY(y+GPUNtRYHr84tCb4hIy0q0l>w6<^qZEc|}Y;=r8cq
z0bT>1r(G`bC}zwM^yUEA+$ZSPAOGLf+UHnZHrrhLt$mPn-rsCaUnhL~O4Wx8LO}We
z%8wHjyZo3;y6-w*M^J690%A%)+d%^vk9-F4|Bcn!B$>2YjP-1!)jd|PSSK`VW{N$C
zc&f6c{#Alfo59<7Ahd)mHNMtVSv%N6YTO_}S^FpQ%JE9=<H+;CpN3x)udHnc-wHaz
z*}nDwbPDY}h17U^qO!IE_RZk;qBjHn2JK$xUq{}AIDf$NU60ylc&f7T409J@4HWBy
zVU09v8MPJ-Yk*nnv9-~(rk1sDwFaPd&bH?4Xmgw9Y$x@mAlU|@iy*uP;uRqO8>H`m
zyfU;K6?qxj37)Hf#ym<}djWdh*xg0`HFknOihK$*m<)M}vN5k8TDt>%UW8tII+P05
z_Cr1z{tMw3{;nv!HOH(o%2jjqu}0h??mLQq!QoD7jXZ0lwl}xh)^^HT#lp?ah1Ka+
zg{=*!uy?FmqAz>Uce_E<5M-@D*b$_0WS$JbFfv_8J1@;%8-iS>oxW?_h>Em_7DX=-
zdII??*r%bt6FLa~D)ybQ<7-Ea3&VFnZ)u#ewlw@lJXN{2Hv$W>HB7C{EV;Eab>=Tg
z)-{SXN3F48EiP6i#+aXPSaaB_h^6;ImhQYcUYE?5fhZ<Xsm+az#+#_fP3Sk!5Hk6K
zj8te68H^{RH_5yW`ZKVbjUL}rTFY6kaT0tOdJFK=069OqRr?^62dQg&pkE35;m~R5
zA4MLoV{i%sYgMr_v-;i|?ba-?W{WjTtcwflD#gmo8s*k*#;T|_Ypl(OZmq)!Ny=I=
zN>u=T0-&;F_&a<$eOn%+LqL8Ax#;Ul@H}H&yC9kPlF1!-M*Z3;wDXi<?T_Fyu-}V4
zKg6+iCiDt+8=;4=djzeH-Wm1{jptHY?hQ50x$Cgzuhq9!->)!F7}nUZ=C?HqtTAL=
z&smkQ<kskSR+8~=ZF!4;coWEX(N{~M{NPFLiDdW~zA2gQPFB`(wre~aemeYcGU<ul
zcj(<m&ey$a*MS~E&Xco^|6gn89w+7a{_$<M^T?1QiMEq;pzWJcOlF^X9A<ah-Dx^d
zkwda{jufJ{+73!JMHv!CIYeP8i4I0Wec9HQD3ug#rNmdLzD2+HJ#&A&UOzAY{P^`p
zpS`a8y6@+@uKT*r_w&p|EFVaEcjTV{x023gShj;FOe=a7oKnmm5Anxb3RL@4U2DE2
zPjs<+2$IvZO6^-craYrMSAU^NhCE_0+5XkM-oyjvp%5?gEH(W=TP$e=odeC;NH!nK
zU%D6m7HDqmW;<B;A^hLScNTKf;E#i*|5(a~X!iN!pNiZO(DVbd@KiqhuB20)*@N?F
z|DV8EO-3#uXKVSSiJ?59>C`~GyP|e4&uAH?_AY}I?TQ_(WHqC`;-qc=y*aov{|;l(
zNfi19g@&WyYU<AP*=cBOXzqb8^`hum;8XBBAvXuy3f%`wt5e70z*gV`pt00M{dLzm
zODB+T0OdD@Z~7P0f4IeDTtoqfz`5o@_MZlDS-z7~A@fgBEXy=cQw*qGYa*&>*E~&p
zdEk%suZiDKG&Vihf6<iltn9hqzo1u8cejC?3!J62!0hT_r-Nw?zrg3dcs6gsF6Fc@
zyC=8`xqSsr2Qyl}L|?xQOaB9ZD13gSwZj9*<>8@kGHCxNV3_vb0at%bf;5wbLiLEE
zrLe|zEvppeTDi(Is$w<wRC&a|x=ig>48sG<QRo3Q+yt&d^A>2{jw))1g0nzlagaJR
zGn+jq{yy}DAg9XNFCc#h79R!6!6(3(@Yhow#|hc5kv|y|<bS@Hf~O)d4}ps)pb!B?
zz4~rhpdM1~{z1DY*OFW9Uo#=akXn`aGT1EFlI{NyyE2vQn8_^t2*v0TJ1jsU-eT_X
zFt`E>7QnATl}(3UhXvO|&(EiwL;nrD>@PE_?xc{$Z}n(b#G176++2Jwdg7$)!RXl?
zyc0c*K6P>E%iwPGoB8+j`b)L`WM7-?gneU2HKd+b9@dO*dJQw#zOFcu?_{*%D|>Di
zCxc%d3AHVBN@9HXj<+^54A#wcO1S<QngKn5uk$gqlw1q_339_oZx?n-PJpDBJVyE?
z%Bu;TUg%`}2zJeLO0tpPm+zEp=c|7Gk>kR0=yb|qufI4{ZnyTD+s%u4w%@B`w|kWS
zs@<FH=Sg)td+bKn&35v<$xczl*IUXlyYJQ8UL=`gx06z@uM=!9p&_Rcp+JF?u@wup
z;fXEKz0lNwj-V+E;Ly~9l*NPKuf>8O9{HN|X5<?Tw!kB-V?*1p{1EsR@@$TV7=$x;
zn=|w|e6CT1e0a!9{!-{89L$7nw1encJCx*DeBJKIh3%4lmR;i4v+GiM>;t=J*~!X(
zY<KqFw9}G&J6UIrl>GOc-Bw~Pl^Se|(QrE&J%mLe6sm%T>_deXK<@?PSi*fjb44J7
zH+VyzQI}PbXHYKT2qbin{F}fd$gztXVnLO`$~{zx92fCIzvH2n<ZDg(Soj;@b6+sT
zyZpr&+ex@TrC8cvt6@g16yprLj96*+D)grLZ+0V0a!u?$7cWXD{~bCjh4S1E8#ANq
zwz6Votu{&N2s8K<q!8~CW^h6kx`#T>L({L({8IQX8dpc-ozyX#ouQAqI3*kgg?c0R
zGW;ocj7vbFyOHYzJr_QAb3=DQKLS5axvk;H;J*i5!SgzU>BR_4<PX)^0lRsjC{^^T
z&UB^a16!x<?e0!(>k)Z!!1*Z;jIbBOWU*rA0^2HzbziCu8bz^q9?@<rqaBy9nGxEJ
z2RH%^Euya3l?fd|j>`q1a*(aR&>2|7QD|sBb>0>EUy<7ZJ&9J7PI?ykIr<E>0>@(L
zuicysHq1i2u~>4pY5zClIu7kZihdK7=HFs#nG{TM)6x_RvRqNFbhU;T?NU=7P~8u=
z9-flyUugfza5EY;LXqiM;xm?0XPh2}E~Cz00UKlSI{0Xw!Igs0>$y(GPAuF_|4{>c
z4!NJ<$4NKz5Td;n<yBG+S9wDl!7%g|<jzN~4<31}n1log4}%SD&!q1tsLn36J>M4F
zPpF4e^vhDkkUXi@qE@8X+GKk|<GJKut6-RlBBsyhBNZ7d(ePzr;3X`68BMtk89EpG
z4AOaRHN!;p8(7YsW#|+Z-3UFOe4KZMcH@C%(7d5v!lr2GW@3uFrlFSPzmEJb;Tf)i
zgx=1l{cnRY9@mcqSJ;ggwP&>gSs)LoCzD6k+a6LeAP=>-SLa;Yf5<~JxN?V`Y`3)Y
zmea9}li<+9Xm~N$6Z{E_c>6jTq?cm}dzYcJvE(Y!H_#8v#4?8YlF8r*EI0CFB2Ec6
zcSFrE>~-kslw-z(xA71+wL{PHoL4Y-Rxt&e;qn&*XCj~;OYKx+fPz$`pDa<GO8&T=
zly9@6qDDP^tVMi{j>T>4U?2fLBU8fhb%?JqWW0((!>vb#5&cKe{5}+}fIka9$JC(>
zSoR?DeQf#%q_eLWI!Zs#8+i|{$;<<8g#T~yJxWY`i2NNqhp#*ohuTokKoVZXrHe^W
z-AZ%Stzu!9eJ!=O9q&`^I4w;c>1pQ^YBd^N6%&e)lsxgT!Q^Q><zy2rBZTIB0w#lN
zjiGMPyiXB22hE>E(H8I%)&tE*pNR*|X!$g;z<rI-E-YOMe<f(flCc<e8Tch~qrn}N
z^B{N$<veA^|Ep+rtHCG&AA{0xjGfA<M^qhaoL6jUv{O~8|G32VfNSh_vFcvIew-_}
z$@Z^ckYEX#-+&??qdDi&p&(Ij=7=NU-v*zrHIzJvg>RD1HI_f3T~Ysk!0co*`Ev%f
zbB%JUJB@z&@=Rrw)Fk$D6q|=)><}e`(I^&yW*E;f9nDOz4u<e@c*Y&zpP%;+={A21
z-%r^|tg1*=Czs!xo>G{xBFvGY|JkrpzMo6*d|oJ=2|cF3DWA>N^>u|#`FQBGe5ag)
zb$_>fCwu_89mr=x(>lY?L!UstD#&*=!gIN6zmjt5lHMfjggIsQ`@vn5mq$JhXZ?#Q
zm#Z7aezh9SFEqPW>}d9_{za`w`IN36STiq;Me3)Nu11y`&51gO{je8}2cQ{KK>yun
z#+=f>9X@kIe@&1>760$xYgl-7ffGIkosWDY==<=@+vGDIc@~^O`W@g%%9(*&fP90I
zdmOo5pa;)z4)5=ZN1D>={E8*bsx{wOY{v|ZzKT8dC!#f0sZ}1bGcD7qObgZ+qkMW^
z4jaJ9R@VrPxzgzW2#u$LVKij^;itz9uSDbF@GnQvPSo)%a1IzVieu4J$W6fVpTXJS
zeE8KsZk6~SB3~M~8hQE~|0d))TlK$1`Se%*(K$|Wc}E_oI#Lg-nUv~MnrmnnVQ1Bv
zP1ie=vQ+d8yDm`=tX8V>i|n<PIltRcw_A57%v{jF98a(UOGYb`VgEsBu0;9!pt<qP
zE7VzEG+%;6i$P{oVR}~oB52M?{og<{&+yYfl{19+i}A$O$Xx@P@>$u1+rhty@|siL
z(qdfHi9giwsb|#Kqgk{p(A=@-zLdq9kH|A_W~$FtzG-&dqL`>*M`f+VinUnf8%X7Q
z(9{Q-S6fz~*muwkv7{?>V`8B#n#Zv44X`&Bj3)g(=rQ0@<i3Lc9C#C0N;;=`{;SAm
zUKhI;%Q>R-=fi&#WGo7ES*_Ur1sRy4`L(W4hb51weXHFon$;gDHZ;b_GL@qaPpIeo
z(f&0#4Wm&L6zPcvZl_9C;{irf|7R%3T+`17L&`aJ_P2q)6Fi9}{~>({=s?p7!sn8Y
zkGzy~h0^q(;mbh21`}?L+&=is!2HJ3UqLTN{yEeB+fpFQcE2>1LTaa7?5bLxP%NnK
z)uU;DLY~v8EL!Un&0kE6B-{Tk+Z?*0;TtHj6^)rW`T3$x*jUc~K-kn-CK|I6Gci#9
zB7D}A;d1CbSh|7qMctipFUUHj{8#W(JhKmcgZ$k<KAlp27<p!9;qS>myuc|oLEvYR
zTHb{Oc|cKqiG9IDGcEOms(aO`)*p(ESq)RIK>uq#B3f%y_k?Z#8s<%(eJ=_zDkkfE
zG#Xch?v4fAq43Xy&urSC0hV9^tMqUKunaj~>G99UvH_%@2et*jz++}ic%3S4N^G=7
zZXO67X6MDf%C!Gx2waC?H40FVr8<&js>>#JPfC`^6Pk0Xt|c!yjS<?{&~)IBPv~j5
z<b{)PZ4|1C#{1FmTI#qOSQm}2Lcym|xD6Jhqi}WPmUVH;xk;4tP%YA@Q<Zt}zajrF
z^1TJU9~uvq-vIp)9(Wh}H*gb|O8s|IKwk=I$pcJt(bP{>B9CZ2p;fr}Y9;FNw9=A=
z@|b#7jWwwlApRww#cDq+HotnzE?T%d3bD!#pTM$PQP9j>Ognx6oQs7f`ftP%?ojzz
zV}^}oXMm@Hk6`(9@B`9wC}%CW0-8Om@=w5dl-rG~Nq**rVV3j$cSzt4w_ojCbt}y^
z<I%oDz^+j>Q&L^46{#HcC+i!hB%a%y8avEbkZk|zLe)%~qj@fh96>SD(|wG^CqQ;k
z!gIl=u+YqGo&?8&8a@9829v|?pEJr5MXgrNn&BumjD@M28Y}|KS-1OHxrB$}OH+wu
zAn(YP_r%#zOu7eT(>L7opQ-#;8n^L}vjeLJZiT6)vl(^?^<4GTyqoV)#2G)`Te~}%
zguUA_&&k}8G0siTcjAmP?x=hx^8?Ud;AFCQ<6hat$?Qq`J<#+xZY6jY_!4qlesW(3
zJDI_>ac(d2UkJ@tr!vn3p9Oi{(tR4agV0wa7hvA)%Eyn}J)Lqp+iYt0eEZlv+=F)g
z9<V#1pId%6D=)g0UG3g#*LWlC9BrrFA3Sa!SSQ(o5qCq->^w9+0DcR$EOg>V!?U5U
zfW80=e@GwaE`a|I{D-<ayf-z@-3~niiw<B3D-gFA@<HU9hMhQbGIwH*6W;-S1s>(v
zsLO}F;yuVW7Eg06%w>-#ZrYtY&F*rg*{YL;FW5btX|}H&W<4NF+uPl*VRqfW*6w&N
zv-^1}x3w)N%kKG#-phENY<ID)PUa*u;_HZ+mEiMOI0tMMaWYvfy6n|t-i{>=u#ow<
zTaCKquCn_|ffN52Iv-1T^~jw7W+Hz#9{C-dMm}bv?$fzWCVjSh7|%3?zL)210z<qA
znt2g_ykP5WrQMAdz25H0=G%RK#YlI%MRU6K+$+`-!j(2gvTb?K*xfF9;3vDQKB`L6
z{<Jg>N;7{%!?9SHOI>{rj)DFLOWA;MSD^V#U|X;wd{!Wt^tCQ?vv@l!{~VgRmzxc~
z55GE5&PJxoN+bRlNOusw9{ynDS*5xz$jrn&#Kb)LU}>m2Qyt3kJMG*{t>AHclA#z8
z%7cm(jaKqxnq8BroZ>OIIrK#%-n@62O=eC)<3(tG4p=+aiGK#R!g3Sk&vc_7K#tKQ
z^9uMq=?@NK(NXBvKvt#kJ>WT%$DWbf8UsDxMAB!I-v_@T$QDnWl})kRga>^HW47IJ
z>|=MiWr6Ctgg(~js`mSg?H^R9qH9<=8OC{c@}J60wR=vKlQBd6%jMsqDSNf<WmwPx
zh1i{rGq-X*&;{_<6Sbz@-%D)V0slulatZW2>fBg#6UYq3Wo;H`1>=q;>e+#CZ-bu?
z&FB^XH|1OhZ9JHRN8+aae~AY;6mq5Udv>jKo~>i`3)SsvQvJhH`+D@Z_KZh8p85r~
zmOE{_<kU}?m`cXKf~FH1okBBSea~e6?AFB6NoZV;I^KdMyc`^7jb+A$%n4u*dbXe8
zbGRPo^$wSv(D+gKeK2ScxCiutm*81;G~9!f$I8RKh5QeLbGTX-=U~AdMgoUTuIg6N
z`Gt)Ic|`n1cC1irs5Qt^c|fh`72ES|_~w7d&`w<mu5W{H0(H|Fh4!GaY4?0XG1Ii)
zi>SM8@avOq#)vZ+HQAkVpTNR7ASalaW8uGm+(zWSG5N_itD6&N7U~wkKL$Pw|0m=h
zfsTV_-JO|327)g>9mY;OYHhGRo-9_6^`IRi%54uQ#npdYV`E32P+ixvJD=(iRc`T6
zyXI0yae%sh8qN6tXM7?GG55+Gje>))&{*05i`!$#Xe>ArECbnPa<4~jCHc-GUw`Cx
zf|r>5q?<9L9{E;)2jN#D$IeA&6Lz~@wl*@^d2pGaXD*~5^;AdfuC_FlXB5R61>*-&
zb*lcNneFQpJ0-T&s9bp@6>G%*HFnt4FuM^=H=}7CEH<Or9aK>f7V(MF_{H%5g`$(e
zY~<LZaXHhCKLDPOoT*zp7JnCmIKqkF3VsEjw-?=xl(&lXL9~)x;Cj-pX5H=X?e4@`
zz`F@13~Wcas#9sKy6tGkb+rarEDuReqoR6DS*$Tao;CA^Wc$~YNnPhcEM17kEQOMm
zUWf%|bYonPo4%ag?#u{y7I+B7HHZ8gn4O$_{yC$nL>AY#r`cod=%x0*-d@=lic8th
zbJ^U<ya2_ssiKb1JD>-FHNYd_Bqlf!@XycthxEm+f>BkXNvFDObws9FrxptI7+x+O
zy61v?9Kfvsa-Gj*x7SNsbva{+66)^bVaLmLT{o}5i5fX}1fmgedZFXxQ_JoE<a40e
z^@$D!kCQ%#{6z(h_ik_zd}c%L5Aer<4JgOqO1G;%Qlp7_oMrYFNqf7qXJ(0J0M#(5
zXww`^J&uB>yWQ}w@qH?vMxn3lDls+A6L)*DpgkHN$#<e$vo*6y?+G;IHi>r+bdYr3
z;C8oR!DH}QX?O>Kh8_(vafogNIVg-SM6LstJ&VV<n-Q&mu8BO?|J)YFVmvjiixXW+
zdR@w0#g$H1QKY_6^Jw)P>RIybEL4_job6;i^<Nv6YG?9@<TUdT-}GR~cE{+!J;gM4
zB^JL5_Q-L(Q_*lPmi0ry!MTq2H)wWgq6ug|&J$%e;}(%`HuOVSxEb`|fsf$V!m@Kf
z#$hj)S>0eaC(3x@mXUt~G%*%!2Oi=Z;L$U1iRRhraWsF{noE7IW}&KMjnkqPJBlgE
ztL`;UYt)+UP1>K9#tkCxr)XG#W|Ps3-JB?&)NlvF-voY&XR6@=c73C}kz0!;lTrL4
z(mRqqomRrdMYlhebFJ5{jb}Q8#UN4cjsUkKzZ<LyU5j#BAm55{H)@)XEKzjJ6B-UR
zd(>+5RvX3glm-o1CXcG`)>=z_v7S?`VBofxm$&!b-@q(1zZ^@JfI~sPb702PWc0p|
z=AXeQW}-{5#MHg%FD`&KmMw(dO!~PP#&uqoGY{|C;M>rv;WPR~x%_HYvtEyB|4h`Q
z7r@YrK<!?AZEJfiU-N1W5{hQEbB!wMCo~Gla?M!OT2e7W{cE~QyN;Tk@Jcj2fTl;S
z1ry<a4}BY!@KG4|Dd<7qP(0NJjA7B~;9FSqA-EJz{R`ZNd}ok*INshMlgnrm%3<y0
z{SNvr%6Z?k|Nazk4GC)3$z=lnX$Bz+71f$Os>jr{S)IQ|E%hIom8vIHKVm$ZjDIzY
z_tBht#qRAW)EvwtTG<zIuSC-#V#Bn8mhjo}_wqrSWZicooi$eUE9mEuUkg2-a&n<Z
z!8bi8F_3KL?4w6ZNM}#f%XH6e9dU}itN4Q>f~bq26jAica#^CNSGv|v>Os}>X>O+3
zr}Am6&~y46vHRU}8K*Q}Q@4zY(NhGahq^M++Jg4Y1!T7n{z4S)OCLTEK0Er+XTZLA
zWElBsV;SGxaGwN!gXW9(-Ui?k&^^e%0r?w9e~0I6fPVw)ZZ99da0ikwhl15U)ox{h
zEEcMrYbK%|PODdWLK6Xv0h$0Pc8c{NWn777&Cv8Db;#9gw;ENm53InF&%wJ;cmj&j
z&Xc1YJNVxH(2tVNn!sfjH)_U`R`6d#o|TN3V@r1p{1woBOjVQ49gFB|#Gh+|q7cSM
zBs9l0T4hQhMYZZss2v2&t3S2ly*#2SRV&h{D$7$nDe<r7qymScc>qnn1x-}n56%G1
z42b<9w-bt*o{V{TluPPnZzZ~j^wU5iHx;?1q~Acg4?GUv%#^m^G1EHOdGJmLN8y?G
zdA>UchH+tCT(l2YX)I8?Rvl_St-f77pt^941*&U}nra=bE=ct(Y6XAHyGPk2j=Idv
zXl_QoMkr)PJ2SK4RV=p&8eau&f;K(n7LXShOgm0C<G+Eyzs#uiE=|=VYK%}_8;_)>
zu`F!8J5i|?Q<Ycn;Rq_>C1}FTyBrgq0~>?9u^8pe!+)akUuk^tA5$1@1I@MzU4^l_
z98;+&mA}hR6nv6C&UbrGqM%pCINu|=iGt4H@o=K->9ldaX5mBu_qu(2guj40e7=@>
ziGn`J*C<F7)T=Viw}pI9!QVu_U&*%|nsuEofZU7F-QaU-;fqrqhl;)#d5JPUV&&^W
zxeLtKWbWiMM`fGfB)TNZjwA6wVWNO@P2X4~!l0)xQO3@aZwNWNQQ|uIPeA{UJiCv+
zjrpm3H}M2c`FzhKcO1Ei;7PC#a=#-tigMFQKa6Jkp!?g+fya)=Y7gY&K4sHLEXhrj
z-AtyDn8)QV-vKf;hW|_s?ts6#OQN7Y^iS|zFc<k6@cZG$j>xws-v)3v@{Qs9F@F?v
zJ(Tzbnm6DJ+VSw~F`_p4^HK6O(uY=yr4QnhHm}3ahTjxpZUM8A|1a_vfh7hhm+f@l
zD#{%}dDAJE&qVm{#z`!*eRFvJ`Q+<|Uj3oJr2K2ZBGSKxe@#K6jMcU831}a9l;>=x
zyp_n+G}SSPPujdm9o|i*H6&6;zFH)5_U=n3^GPzjg~xk={JvJ%BCrk*eHFQTDU^E|
zzC*}A2lmBDi^1NM_d2)==RH9=L6qaf+1CrB*6{EvF=hh%DEZkLi=7&5Vxke8WoU3M
zl7lEQ0ewI2G49%cWM}XIIWys3N;<m=zC&n|%ais)8#f*1iLa7A1`Rl!@U6uH?#%c$
z!!Lo(rP>BUcOZQWILU6IFZ(f8d#LCBDrK>WLp?1cWqh5%#9M(2_TU*!kiCaztmhfD
zl!AuPi%CB~dNw)@M3+gV_kiw(m0h4s3os$_F;y5lIn*<yxNIcJD=~%Dxo<G4{D<W0
zsnJp}jbgYw=Nkc52Rozs?eMST@mvG&T>$?uxRv7Hq4)-1OVBtm4gPkV%wC%BI?83W
z@0&=zZD2o4eUkDf{T@pnPQ<k!-_OXMK+ZI9BBfv$^v|fzmbY&W&usvI4f)vS@bSWU
z!D{eE%HIu+!0b<v--13DfsM$w5&nG2%OKxE^4|vDhnv_=^<7T+J;5E=nUg_f4=3Wb
zQ7Ctse9I`5?Jr*kgkuPqUa1iN1Mth>pM-D1xFPbR;2)$AZXo)+(CtXS7MiV7-zp;0
z#6clSYz3d;p&P*|DES4>*@%*1@Kd9l`LNbqp7B-dP)Kaz-Ja%E7vZbelDj>hwKV}X
z&1B%gdnlkO*1inwpcn@M$?k3>3h;Tdve=Rlp3jP#=Qx!$(#HkgEO3grW)b94b5W$q
zxZrx|n$SNMI+b*+!APD{^aXOq3Z0_&pxJ6HqT>y+C@Z344RVRPs1n?j?^NcK-Wg=`
zG|2E**_3>23{<`hy%{+gTaY)KDyfgyse3#VFE8TiTyPgpqPYh9B7qZw?LjUF1iu44
z;5bUC0y4lB%_Y4T8pc+Q^fYSTmlR?p7(mjLz#^jPS9Ey{dMNZ7Fm`ezAzXx)gWN_c
zdX{8foGrS62Y-mFouJ3U=gwY`9q-CDJb>9-<uAzJMRAuwx1*Q@>A&XT67n6xw3ejv
z-LRtBV0{$p0*)bHE9AeVT=t`bRmjg7L(qiqy`HIO7F`N4lFSQ1-ux<x&A->vIA}h)
z6#RlRet_MZXHk(scJGQlrp&7;a{=@jxlU1#XRm<I_G7Re+4*cuusd|@<h^`@y2yiA
z9mI|0t#h5qOGzGt%H<$?YDH_{*C+jL@Ox^3K_|#{qRRKlw-f#l_)J!c=%|7{pl^mI
zU*!VuQS#45&38dg%7YPb3%JFe0S>Glt9?J8zpIBl!)$OR=|=70s5zeU-sCxtL(jmx
zHl!~{ZDZzn$Qymv!Dst6I0x7514p3;yQ;yXl+%p#v7|2q=Rjwp$2!X6BXYst>}2P@
z^yD8g?`Y3m#k^K%K3Nu=Or{xRdY3}q1;_BP3~&|3y+`4*q2B@LQ&<%W8$#jhNN?Je
zrT{)f{s8zr`3E9rj2>zbd3wko)1*qeir_^#PBM^xhHhe7>1fZy0Y%20A5elZcNL9>
zPmBeZVK&2TusIr>&l5S5sjNymcO#0p6%xDyYz1yWZYTIK8n#C6TFN;}zSntj9V3rh
z883po?NS*#^nmBZ_C?Q-+>va2sw8*~Xj;s0iXQ^MfMPhU3l5<ew)KO&4pbRi_n@ck
zX{KJrBH7PyPCn?F-<$*V$_tTUX;U<lltDa#=}Yid%E|y|Qg&?B7+T+j<oyweX5`HV
zjr-<-EQ~8RBRQKGjGY|gnb`IyoQ;&}NAeORcz3swVKVpvLA@XRf-;Ty{dsn5$ym?6
zbBnk!5?n<Pm^N?>RUU-?5HH4-6w`T~k)J5#k%2lni7fmYU~W|!2A32jO55>ka&z+%
zSyw{86;5QaCmwi&^p?;JE?F26Sd*W~3L$rx^kd+1(s{=%z^9V3YJp=w!ZI+H@>YT!
z3KLluAU6>HOjE-XJyYw2$i&<zFd>{My%2ny#Mt~vo<^N>3KFGT5no90BY0eQvOfdv
z$e}p+d&rpyzK@z5!v>B~!aJ0Z$Kzr@Px3?uWbysMKqstghSrnOnv=P}izL%H_{GRX
z={cmI2i{J$&O9ax-4_LT4JGgnk7Irl=#3f=KzrdY0%yagaRyF<Pq+l?8-))|_AI&a
zU|yn>IYHoxE+`0kcp}3^;A|8vBFEb(N~8x)fYZr|$yp7M3nNGP1jb?5tKbimcn<gz
z{Km*N24g=@p?AvS;%ner>^zfVyP*ItTLr3MKoQ0C0?VMA<t9p9(oaD%7YO7dw-kPd
z+(gzW<Z^fnN0))a6#E(cmdN>$>kZb0Zi8A+K-c0qA5+c&)ZvOlAnlh}`c&o+%`uOQ
zvw@#U=cTPc7_1A<ru@V3`GN1O<>dby>_NWINl%A=E#;;oR|7tC7W4elcF5(z-vgb8
zTX;1x&=dKqp!cF@L+C8*VHO?efEziQjh&jx93p_SOe_Lh5q=Bdn)u$t=&cl934a$#
zwt)UO519}BJo1Iey-Paxrvr1L>B<AQk$)-3k6dLjU<ZzMLrHK4#>_G0A$KvfdETew
zcj1pU1Mf7?)c#B`OS#b$I34D8gj-N}d+75hY!1TvvFsN3CZ=1!??>pEhA;|cnUDq!
zL+?kf6LQ8QFM%8=2F{|~@4#b(ZtT#*p7~ezgOiQK^Z0;eQeZDRUFeaN{0dL_x@#)-
zxAEi#@W+E3PX$hq?`G&5kdJ*bo!=qJ;wCY_3z;Z2$n>!^c5*tO_`C%Uok!qVR4|p&
z6sPB*;)BqwC}1|}^d5mq3R+9LX|AO__--C#`VbQ=0n!&zVh%VE6R#qFT@<d3+!f?E
zeL++5Gv^CjkK(2k8C+tPA~QTwi+7WWJ}>YYg_(x3pTx@$W=T=H8SIZT3?+Y9E35x0
zPEMVX$7)1b8PfVnPvc>%MoMd7$xCQ#db37U2Dd9sZ<e(Les`2Ht@xil{7?B~r=qNf
zBHvd@a~c*TBCXQW9Kt8U?vL|+VIop9J<ZvgpQvCH$5~dGsJI`zG%pe1(+AE;(mDHZ
zo<#nFj5OzS()l2{Q#G83up8`5Cf{~&d`=?rU()&cxCn<TP9&VD_yD=db{dd-B3660
zXH+pAX~iM<FQEq?iE-$7BD0{IAkQh3vy1dc!5qq2Kzf|=S!O%Ux+EfeSiqUib2yc9
z_>sIw0sJ)dVh7T>p&(HafPR7ee5S`)Lb@09lb!`;L9;CvJ2jhz9_o^)a46(@9@2@#
z+eu_g%V`45M+uxCauO96l1@NIT0*m?uBd|?%hd=w+)j5M_APW}UZR2%C#Mp69rVj6
z`wi&?c!VQ1=ODD<vjGuFL#__x95Ww9egq}HB{7XemR}LJ%bZ_HWH;KGg$2elQ^>Ru
zoQJ0dQ&?9XG81|+h1aKMrgcq3&V_$~d~brRY$Lajel7W$kzSMXIg)X9kj@UJa~z`v
zK;K9CgN!F0MTwaddO4Xo!^}pg0iH3IH^#F1U@@6rhyNW)@P!PgHy)^td<HlYJ_B21
zJlKYGQ}{ZRm<v4#x*9ZnNku;Lb8{0F%<!DEQH}#8=Unn}Y!W*)2PLlLA?F}`Cdy1g
zcpE|$2r+l4*aE$pLOGLl9)ZsdaOVo}V(@B|<#RmFm*nRY?#}PXac1MZ0zLpvg8u{j
z{*+^4no&9OIr6l&3Vw**8Bh9rQyp_rVgXFPbm1_9RB+VgFhi(d3*BM=B=RMhShQE1
z4qlGte2l`GOCT0QkHFGAa8hm}avbzg&MDG|AU}@$5A#qCkew;Wnd+t=PkLZI<(QbF
zx+7anbv%X=jM^1F5nhbOd^#x^XzZ~%k0Nvv!UsV<PUFm=kQd=Ep^#g!gh8Mp5B@o%
z?}0x8<@<r1koykY2lgaAi*)nQ<y1#6%Adt^vY}fb&uxg<sTfLpNMS7Noa!jEgv=lD
z5YsvygU>dq@leDx(E*_8N=$XE0U7Hf^~qNR22rY<hup+N*wKnKCw(&cIdFE^)TuB{
zl31!RP4^4R|2O<Xl%NZUoq8N4YVq*3B+^q>cqnWbG@oK~euF*>e2+|zfcuGRJ}~6`
z%0oHrc9xU=9QoMnFahRNAZG&BG<847AXc##+{eTAB0mEq`B}`Qkr(ltHdO0*raGQL
ziD78Sp1w1fOjE!n6lyxFF(8M54r6O1gTk;V(iT*c{C~pOsd*^Dl(gaq5B-&g=7RM>
z&gY$_SauWiaMDLW-vQ0``~SBRZ=IT0Z+x{E<9ysGHtuClgY+NUyfu4i^&e`!1?y<_
F{{eXQhc5sC

diff --git a/examples/qualcomm/oss_scripts/llama/decoder_runtime_evaluator.py b/examples/qualcomm/oss_scripts/llama/decoder_runtime_evaluator.py
index 7bebf513658..a75e67933e5 100644
--- a/examples/qualcomm/oss_scripts/llama/decoder_runtime_evaluator.py
+++ b/examples/qualcomm/oss_scripts/llama/decoder_runtime_evaluator.py
@@ -133,7 +133,7 @@ def _init_runner_base_cmd(self):
             base_cmd = " ".join(
                 [
                     f"export LD_LIBRARY_PATH={self.qnn_sdk}/lib/x86_64-linux-clang/:{args.build_folder}/lib &&",
-                    f"./{args.build_folder}/examples/qualcomm/oss_scripts/llama/{self.runner}",
+                    f"{args.build_folder}/examples/qualcomm/oss_scripts/llama/{self.runner}",
                     f"--decoder_model_version {DECODER_MODEL_VERSION[args.decoder_model]}",
                     f"--tokenizer_path {self.runtime_tokenizer_path}",
                     f"--output_path {self.device_output_response_path}",
diff --git a/examples/qualcomm/oss_scripts/llama/decoder_utils.py b/examples/qualcomm/oss_scripts/llama/decoder_utils.py
index 5380ff5220d..184eb857661 100644
--- a/examples/qualcomm/oss_scripts/llama/decoder_utils.py
+++ b/examples/qualcomm/oss_scripts/llama/decoder_utils.py
@@ -317,13 +317,9 @@ def retrieve_info_from_pte(pte_path: str) -> dict:
         pte_max_context_len = pte_max_seq_len
 
     # FP has no scale/zero_point, use following values, which is equivalent to not performing dequantize.
-    if kv_io_bit_width == 32:
+    if kv_io_bit_width == 32 or (logits_scale is None or logits_zero_point is None):
         logits_scale = 1
         logits_zero_point = 0
-    elif logits_scale is None or logits_zero_point is None:
-        raise RuntimeError(
-            "Unable to find scale/offset. The .pte file might be deprecated. Please generate a new .pte file"
-        )
     assert output_vocab_size is not None, "Couldn't find the vocab size"
     assert pte_max_seq_len is not None, "Couldn't find the max_seq_len from pte"
     meta_info = {
diff --git a/examples/qualcomm/oss_scripts/llama/llama.py b/examples/qualcomm/oss_scripts/llama/llama.py
index a8e28f96b71..ce0b7a80cfc 100755
--- a/examples/qualcomm/oss_scripts/llama/llama.py
+++ b/examples/qualcomm/oss_scripts/llama/llama.py
@@ -21,6 +21,7 @@
 )
 
 from executorch.backends.qualcomm.utils.utils import (
+    generate_gpu_compiler_spec,
     generate_htp_compiler_spec,
     generate_qnn_executorch_compiler_spec,
     get_soc_to_chipset_map,
@@ -119,9 +120,15 @@ def compile(
             # because the encoder is quite sensitive and quantization can make it harder for the model to distinguish
             # between images within the same conversation.
             to_skip = len(args.image_path) > 1
-            backend_options = generate_htp_compiler_spec(
-                use_fp16=to_skip,
-            )
+            if args.backend == "htp":
+                backend_options = generate_htp_compiler_spec(
+                    use_fp16=to_skip,
+                )
+            elif args.backend == "gpu":
+                backend_options = generate_gpu_compiler_spec()
+            else:
+                raise ValueError(f"Unsupported backend {args.backend}")
+
             encoder_compile_specs = generate_qnn_executorch_compiler_spec(
                 soc_model=get_soc_to_chipset_map()[args.soc_model],
                 backend_options=backend_options,
@@ -131,27 +138,40 @@ def compile(
             skip_quantize[modality] = to_skip
             compile_specs[modality] = encoder_compile_specs
         elif is_multimodal and modality == TOK_EMBEDDING:
-            backend_options = generate_htp_compiler_spec(
-                use_fp16=False,
-                # x86 emulator does not support weight sharing
-                use_weight_sharing=not args.enable_x86_64,
-            )
+            if args.backend == "htp":
+                backend_options = generate_htp_compiler_spec(
+                    use_fp16=False,
+                    # x86 emulator does not support weight sharing
+                    use_weight_sharing=not args.enable_x86_64,
+                )
+            elif args.backend == "gpu":
+                backend_options = generate_gpu_compiler_spec()
+            else:
+                raise ValueError(f"Unsupported backend {args.backend}")
+
             compile_specs[modality] = [
                 generate_qnn_executorch_compiler_spec(
                     soc_model=get_soc_to_chipset_map()[args.soc_model],
                     backend_options=backend_options,
                     # x86 emulator does not support shared buffer
                     shared_buffer=not args.enable_x86_64,
+                    online_prepare=args.online_prepare,
                 )
             ] * len(TOK_EMBEDDING_GRAPH_NAMES)
         elif modality == TEXT_DECODER:
             # compile spec for text decoder
-            backend_options = generate_htp_compiler_spec(
-                use_fp16=False,
-                use_multi_contexts=decoder_model_config.num_sharding > 1,
-                # x86 emulator does not support weight sharing
-                use_weight_sharing=not args.enable_x86_64,
-            )
+            if args.backend == "htp":
+                backend_options = generate_htp_compiler_spec(
+                    use_fp16=args.use_fp16,
+                    use_multi_contexts=decoder_model_config.num_sharding > 1,
+                    # x86 emulator does not support weight sharing
+                    use_weight_sharing=not args.enable_x86_64,
+                )
+            elif args.backend == "gpu":
+                backend_options = generate_gpu_compiler_spec()
+            else:
+                raise ValueError(f"Unsupported backend {args.backend}")
+            skip_quantize[modality] = args.use_fp16
             compile_specs[modality] = [
                 generate_qnn_executorch_compiler_spec(
                     soc_model=get_soc_to_chipset_map()[args.soc_model],
@@ -159,6 +179,7 @@ def compile(
                     # x86 emulator does not support shared buffer
                     shared_buffer=not args.enable_x86_64,
                     use_mha2sha=True,
+                    online_prepare=args.online_prepare,
                 )
             ] * len(DECODER_GRAPH_NAMES)
 
@@ -172,7 +193,11 @@ def compile(
     )
 
     # perform compilation
-    multi_modal_mgr.compile(compile_specs=compile_specs, pte_filenames=pte_filenames)
+    multi_modal_mgr.compile(
+        compile_specs=compile_specs,
+        pte_filenames=pte_filenames,
+        skip_quantize=skip_quantize,
+    )
 
 
 def inference(
@@ -529,6 +554,14 @@ def _build_parser():
         help="Number of examples in few-shot context",
     )
 
+    parser.add_argument(
+        "-F",
+        "--use_fp16",
+        help="If specified, will run in fp16 precision and discard ptq setting",
+        action="store_true",
+        default=False,
+    )
+
     parser.add_argument("-v", "--verbose", action="store_true")
 
     parser.add_argument(
@@ -592,6 +625,12 @@ def export_llama(args) -> None:
         pte_filename = "lookahead_llama_qnn"
     else:
         raise RuntimeError(f"Unknown model_mode: {args.model_mode}.")
+
+    if args.model_mode == "hybrid" and args.online_prepare:
+        raise RuntimeError(
+            "Currently hybrid mode is not compatible with online_prepare."
+        )
+
     if args.decoder_model == "stories260k":
         pte_filename = f"{args.decoder_model}_" + pte_filename
     pte_filenames = {
@@ -740,6 +779,7 @@ def export_llama(args) -> None:
 def main():
     parser = _build_parser()
     args = parser.parse_args()
+    args.build_folder = os.path.realpath(args.build_folder)
     try:
         export_llama(args)
     except Exception as e:
diff --git a/examples/qualcomm/oss_scripts/llama/qnn_llama_runner.cpp b/examples/qualcomm/oss_scripts/llama/qnn_llama_runner.cpp
index d8d82fece33..9b8cdd7999e 100644
--- a/examples/qualcomm/oss_scripts/llama/qnn_llama_runner.cpp
+++ b/examples/qualcomm/oss_scripts/llama/qnn_llama_runner.cpp
@@ -210,7 +210,6 @@ std::string get_formatted_prompt(
   return formatted_prompt;
 }
 
-template <typename T>
 void start_runner(
     std::unique_ptr<executorch::extension::Module> module,
     std::vector<std::string>& prompts,
@@ -219,7 +218,7 @@ void start_runner(
       gflags::GetCommandLineFlagInfoOrDie("tokenized_prompt").is_default ? false
                                                                          : true;
   // create llama runner
-  example::Runner<T> runner(
+  example::Runner runner(
       std::move(module),
       FLAGS_decoder_model_version.c_str(),
       FLAGS_model_path.c_str(),
@@ -298,26 +297,8 @@ int main(int argc, char** argv) {
             FLAGS_attention_sink_rope_path.c_str(),
             executorch::extension::Module::LoadMode::MmapUseMlockIgnoreErrors);
   }
-  // Using 8bit as default since this meta is introduced with 16bit kv io
-  // support and older models only have 8bit kv io.
-  example::KvBitWidth kv_bitwidth = example::KvBitWidth::kWidth8;
-  if (module->method_names()->count("get_kv_io_bit_width") > 0) {
-    kv_bitwidth = static_cast<example::KvBitWidth>(
-        module->get("get_kv_io_bit_width").get().toScalar().to<int64_t>());
-  }
-
-  if (kv_bitwidth == example::KvBitWidth::kWidth8) {
-    start_runner<uint8_t>(
-        std::move(module), prompts, std::move(attention_sink_rope_module));
-  } else if (kv_bitwidth == example::KvBitWidth::kWidth16) {
-    start_runner<uint16_t>(
-        std::move(module), prompts, std::move(attention_sink_rope_module));
-  } else {
-    ET_CHECK_MSG(
-        false,
-        "Unsupported kv bitwidth: %ld",
-        static_cast<int64_t>(kv_bitwidth));
-  }
+  start_runner(
+      std::move(module), prompts, std::move(attention_sink_rope_module));
 
   return 0;
 }
diff --git a/examples/qualcomm/oss_scripts/llama/qnn_multimodal_runner.cpp b/examples/qualcomm/oss_scripts/llama/qnn_multimodal_runner.cpp
index 29b6b9d7ddc..c9c2bd19940 100644
--- a/examples/qualcomm/oss_scripts/llama/qnn_multimodal_runner.cpp
+++ b/examples/qualcomm/oss_scripts/llama/qnn_multimodal_runner.cpp
@@ -137,7 +137,6 @@ std::vector<std::string> CollectPrompts(int argc, char** argv) {
   return prompts;
 }
 
-template <typename T>
 void start_multimodal_runner(
     std::unique_ptr<executorch::extension::Module> encoder,
     std::unique_ptr<executorch::extension::Module> tok_embedding,
@@ -150,7 +149,7 @@ void start_multimodal_runner(
                                                                          : true;
 
   // Create multimodal runner
-  example::QNNMultimodalRunner<T> runner(
+  example::QNNMultimodalRunner runner(
       std::move(encoder),
       std::move(tok_embedding),
       std::move(text_decoder),
@@ -289,35 +288,12 @@ int main(int argc, char** argv) {
           FLAGS_decoder_path.c_str(),
           executorch::extension::Module::LoadMode::MmapUseMlockIgnoreErrors);
 
-  // Using 8bit as default since this meta is introduced with 16bit kv io
-  // support and older models only have 8bit kv io.
-  example::KvBitWidth kv_bitwidth = example::KvBitWidth::kWidth8;
-  if (text_decoder->method_names()->count("get_kv_io_bit_width") > 0) {
-    kv_bitwidth = static_cast<example::KvBitWidth>(
-        text_decoder->get("get_kv_io_bit_width")
-            .get()
-            .toScalar()
-            .to<int64_t>());
-  }
-  // Start runner with appropriate KV bitwidth
-  if (kv_bitwidth == example::KvBitWidth::kWidth8) {
-    start_multimodal_runner<uint8_t>(
-        std::move(encoder),
-        std::move(tok_embedding),
-        std::move(text_decoder),
-        prompts);
-  } else if (kv_bitwidth == example::KvBitWidth::kWidth16) {
-    start_multimodal_runner<uint16_t>(
-        std::move(encoder),
-        std::move(tok_embedding),
-        std::move(text_decoder),
-        prompts);
-  } else {
-    ET_CHECK_MSG(
-        false,
-        "Unsupported kv bitwidth: %ld",
-        static_cast<int64_t>(kv_bitwidth));
-  }
+  // Start runner
+  start_multimodal_runner(
+      std::move(encoder),
+      std::move(tok_embedding),
+      std::move(text_decoder),
+      prompts);
 
   return 0;
 }
diff --git a/examples/qualcomm/oss_scripts/llama/runner/decoder_runner.h b/examples/qualcomm/oss_scripts/llama/runner/decoder_runner.h
index 888e9acd421..b714f737de3 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/decoder_runner.h
+++ b/examples/qualcomm/oss_scripts/llama/runner/decoder_runner.h
@@ -8,6 +8,7 @@
 
 #pragma once
 
+#include <executorch/examples/qualcomm/oss_scripts/llama/runner/utils.h>
 #include <executorch/extension/llm/sampler/sampler.h>
 #include <executorch/extension/module/module.h>
 #include <executorch/extension/tensor/tensor.h>
@@ -56,19 +57,36 @@ class DecoderRunner {
   inline int32_t logits_to_token(
       const executorch::aten::Tensor& logits_tensor,
       int64_t pos) {
-    auto* logits = logits_tensor.mutable_data_ptr<uint16_t>();
+    std::byte* logits = logits_tensor.mutable_data_ptr<std::byte>();
     auto num_tokens = logits_tensor.size(1);
     auto vocab_size = logits_tensor.size(2);
     static std::vector<float> logits_f(vocab_size);
-    auto* logits_last = logits;
+    std::byte* logits_last = logits;
     // offset to the meaningful logit we want for prefill model.
+    executorch::aten::ScalarType logits_dtype = logits_tensor.scalar_type();
+    size_t logits_nbytes = getDtypeSize(logits_dtype);
     if (num_tokens > 1) {
-      logits_last += pos * vocab_size;
+      logits_last += pos * vocab_size * logits_nbytes;
     }
-    // Discard dequantization (converting uint16_t to float) because the
+    // Discard dequantization (converting std::byte to float) because the
     // relative order of elements remains the same without conversion
     for (int i = 0; i < vocab_size; i++) {
-      logits_f[i] = logits_last[i];
+      switch (logits_dtype) {
+        case executorch::aten::ScalarType::UInt16:
+          logits_f[i] = reinterpret_cast<uint16_t*>(logits_last)[i];
+          break;
+        case executorch::aten::ScalarType::Byte:
+          logits_f[i] = reinterpret_cast<uint8_t*>(logits_last)[i];
+          break;
+        case executorch::aten::ScalarType::Float:
+          logits_f[i] = reinterpret_cast<float*>(logits_last)[i];
+          break;
+        default:
+          ET_CHECK_MSG(
+              false,
+              "The scalar_type %s of logits is not supported",
+              executorch::runtime::toString(logits_dtype));
+      }
     }
     return sampler_->sample(logits_f.data());
   }
diff --git a/examples/qualcomm/oss_scripts/llama/runner/kv_manager.cpp b/examples/qualcomm/oss_scripts/llama/runner/kv_manager.cpp
index e5c12068bab..7288ca5fbd1 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/kv_manager.cpp
+++ b/examples/qualcomm/oss_scripts/llama/runner/kv_manager.cpp
@@ -7,24 +7,105 @@
  */
 
 #include <executorch/examples/qualcomm/oss_scripts/llama/runner/kv_manager.h>
+#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
 #include <executorch/runtime/platform/assert.h>
+
+using executorch::runtime::MethodMeta;
+using executorch::runtime::Result;
+using executorch::runtime::TensorInfo;
 namespace example {
-template <typename T>
-KVManager<T>::KVManager(Metadata metadata) : metadata_(metadata) {
+
+namespace {
+void fill_mask(
+    executorch::aten::ScalarType scalar_type,
+    std::byte* buf,
+    size_t size,
+    bool use_pos_value) {
+  if (use_pos_value) {
+    switch (scalar_type) {
+      case executorch::aten::ScalarType::UInt16:
+        std::fill_n(reinterpret_cast<uint16_t*>(buf), size, 65535u);
+        break;
+      case executorch::aten::ScalarType::Byte:
+        std::fill_n(reinterpret_cast<uint8_t*>(buf), size, 255u);
+        break;
+      case executorch::aten::ScalarType::Float:
+        std::fill_n(reinterpret_cast<float*>(buf), size, 0.0);
+        break;
+      default:
+        ET_CHECK_MSG(
+            false,
+            "Unsupported scalar type %s",
+            executorch::runtime::toString(scalar_type));
+        break;
+    }
+  } else {
+    switch (scalar_type) {
+      case executorch::aten::ScalarType::UInt16:
+        std::fill_n(reinterpret_cast<uint16_t*>(buf), size, 0u);
+        break;
+      case executorch::aten::ScalarType::Byte:
+        std::fill_n(reinterpret_cast<uint8_t*>(buf), size, 0u);
+        break;
+      // -65535 acts as the additive "very negative" attention-mask value;
+      // chosen as a large finite negative so masked positions effectively
+      // zero out after softmax without relying on -inf.
+      case executorch::aten::ScalarType::Float:
+        std::fill_n(reinterpret_cast<float*>(buf), size, -65535.0);
+        break;
+      default:
+        ET_CHECK_MSG(
+            false,
+            "Unsupported scalar type %s",
+            executorch::runtime::toString(scalar_type));
+        break;
+    }
+  }
+}
+} // namespace
+
+KVManager::KVManager(Metadata metadata, std::unique_ptr<MethodMeta> method_meta)
+    : metadata_(metadata) {
+  Result<TensorInfo> attention_mask = method_meta->input_tensor_meta(1);
+  attention_mask_dtype_ = attention_mask->scalar_type();
+
+  // inputs are [input_tokens, attention_mask, (sliding window attention_mask),
+  // (input_pos), kv_caches] search kv_cache in inputs
+  for (int i = 2; i < method_meta->num_inputs(); i++) {
+    Result<TensorInfo> tensor_meta = method_meta->input_tensor_meta(i);
+    // k_cache: [1, n_heads, head_dim, seq_len]
+    size_t tensor_nbytes = tensor_meta->nbytes();
+    size_t expected_tensor_nbytes = metadata_.head_dim * metadata_.num_heads *
+        metadata_.max_cache_len * getDtypeSize(tensor_meta->scalar_type());
+    if (tensor_nbytes != expected_tensor_nbytes) {
+      // Not a kv_cache tensor (e.g. input_pos, sliding window attention mask).
+      continue;
+    }
+    if (kv_cache_dtype_ == executorch::aten::ScalarType::Undefined) {
+      kv_cache_dtype_ = tensor_meta->scalar_type();
+    } else {
+      ET_CHECK_MSG(
+          tensor_meta->scalar_type() == kv_cache_dtype_,
+          "Currently mixed scalar type of kv_cache is not allowed");
+    }
+  }
+  ET_CHECK_MSG(
+      kv_cache_dtype_ != executorch::aten::ScalarType::Undefined,
+      "kv_cache_dtype was not detected from method inputs");
   k_cache_.resize(metadata_.num_layers);
   v_cache_.resize(metadata_.num_layers);
 
   // Calculate cache size
   size_t cache_in_bytes = metadata_.num_layers * metadata_.num_heads *
-      metadata_.head_dim * metadata_.max_cache_len * sizeof(T);
+      metadata_.head_dim * metadata_.max_cache_len *
+      getDtypeSize(kv_cache_dtype_);
   size_t cache_out_bytes = metadata_.num_layers * metadata_.num_heads *
-      metadata_.head_dim * metadata_.max_ar_len * sizeof(T);
+      metadata_.head_dim * metadata_.max_ar_len * getDtypeSize(kv_cache_dtype_);
   total_cache_size_ = 2 * (cache_in_bytes + cache_out_bytes);
 };
 
-template <typename T>
-void KVManager<T>::init_attention_mask(
-    uint16_t* attention_mask,
+void KVManager::init_attention_mask(
+    std::byte* attention_mask,
     const std::vector<int32_t>& attention_map,
     int32_t ar_len,
     int32_t n_past) {
@@ -33,38 +114,51 @@ void KVManager<T>::init_attention_mask(
       "The size of attention_map (%zu) doesn't match with ar_len (%d)",
       attention_map.size(),
       ar_len);
-  uint16_t neg_val = 0;
-  uint16_t pos_val = 65535;
   // Clear the attention mask
-  std::fill_n(attention_mask, ar_len * metadata_.context_len, neg_val);
+  fill_mask(
+      attention_mask_dtype_,
+      attention_mask,
+      ar_len * metadata_.context_len,
+      /*use_pos_value=*/false);
 
   // SMART_MASK requires special handling of attention mask
-  uint16_t* past_ptr = attention_mask;
-  uint16_t* new_ptr = attention_mask + (metadata_.context_len - ar_len);
+  std::byte* past_ptr = attention_mask;
+  std::byte* new_ptr = attention_mask +
+      (metadata_.context_len - ar_len) * getDtypeSize(attention_mask_dtype_);
   // All inputs will necessarily attend to n_past and itself
   for (int i = 0; i < ar_len; i++) {
     // Iterate across ar_len
     if (attention_map[i] < 0) {
       // If negative, attend to only past tokens
-      std::fill_n(past_ptr, n_past, pos_val);
+      fill_mask(
+          attention_mask_dtype_,
+          past_ptr,
+          n_past,
+          /*use_pos_value=*/true);
     } else {
       // If positive, copy attention map from (relative to 0th input) parent
       // Parent token index
       const int32_t pidx = attention_map[i];
-      uint16_t* parent_ptr = attention_mask + pidx * metadata_.context_len;
+      std::byte* parent_ptr = attention_mask +
+          pidx * metadata_.context_len * getDtypeSize(attention_mask_dtype_);
       std::memcpy(
-          past_ptr, parent_ptr, metadata_.context_len * sizeof(uint16_t));
+          past_ptr,
+          parent_ptr,
+          metadata_.context_len * getDtypeSize(attention_mask_dtype_));
     }
     // Attend to itself
-    new_ptr[i] = pos_val;
-    past_ptr += metadata_.context_len;
-    new_ptr += metadata_.context_len;
+    fill_mask(
+        attention_mask_dtype_,
+        new_ptr + i * getDtypeSize(attention_mask_dtype_),
+        1,
+        /*use_pos_value=*/true);
+    past_ptr += metadata_.context_len * getDtypeSize(attention_mask_dtype_);
+    new_ptr += metadata_.context_len * getDtypeSize(attention_mask_dtype_);
   }
 }
 
-template <typename T>
-void KVManager<T>::init_attention_mask(
-    uint16_t* attention_mask,
+void KVManager::init_attention_mask(
+    std::byte* attention_mask,
     const std::vector<int32_t>& attention_map,
     int32_t ar_len,
     int32_t n_past,
@@ -75,30 +169,44 @@ void KVManager<T>::init_attention_mask(
       "The size of attention_map (%zu) doesn't match with ar_len (%d)",
       attention_map.size(),
       ar_len);
-  uint16_t neg_val = 0;
-  uint16_t pos_val = 65535;
   // Clear the attention mask
-  std::fill_n(attention_mask, ar_len * metadata_.context_len, neg_val);
+  fill_mask(
+      attention_mask_dtype_,
+      attention_mask,
+      ar_len * metadata_.context_len,
+      /*use_pos_value=*/false);
 
   // SMART_MASK requires special handling of attention mask
-  uint16_t* past_ptr = attention_mask;
-  uint16_t* new_ptr = attention_mask + (metadata_.context_len - ar_len);
+  std::byte* past_ptr = attention_mask;
+  std::byte* new_ptr = attention_mask +
+      (metadata_.context_len - ar_len) * getDtypeSize(attention_mask_dtype_);
   // All inputs will necessarily attend to n_past and itself
   for (int i = 0; i < ar_len; i++) {
     // Iterate across ar_len
     if (attention_map[i] < 0) {
       // If negative, attend to only past tokens
-      std::fill_n(past_ptr, n_past, pos_val);
+      fill_mask(
+          attention_mask_dtype_,
+          past_ptr,
+          n_past,
+          /*use_pos_value=*/true);
     } else {
       // If positive, copy attention map from (relative to 0th input) parent
       // Parent token index
       const int32_t pidx = attention_map[i];
-      uint16_t* parent_ptr = attention_mask + pidx * metadata_.context_len;
+      std::byte* parent_ptr = attention_mask +
+          pidx * metadata_.context_len * getDtypeSize(attention_mask_dtype_);
       std::memcpy(
-          past_ptr, parent_ptr, metadata_.context_len * sizeof(uint16_t));
+          past_ptr,
+          parent_ptr,
+          metadata_.context_len * getDtypeSize(attention_mask_dtype_));
     }
     // Attend to itself
-    new_ptr[i] = pos_val;
+    fill_mask(
+        attention_mask_dtype_,
+        new_ptr + i * getDtypeSize(attention_mask_dtype_),
+        1,
+        /*use_pos_value=*/true);
 
     // mask by limitation of sliding_window
     int32_t available_context_len = position_offset.empty()
@@ -107,87 +215,73 @@ void KVManager<T>::init_attention_mask(
     // if available_context_len is less than 0, it means we need to mask some
     // tokens in the past to avoid exceeding the sliding window
     if (available_context_len < 0) {
-      std::fill_n(past_ptr, -available_context_len, neg_val);
+      fill_mask(
+          attention_mask_dtype_,
+          past_ptr,
+          -available_context_len,
+          /*use_pos_value=*/false);
     }
 
-    past_ptr += metadata_.context_len;
-    new_ptr += metadata_.context_len;
+    past_ptr += metadata_.context_len * getDtypeSize(attention_mask_dtype_);
+    new_ptr += metadata_.context_len * getDtypeSize(attention_mask_dtype_);
   }
 }
 
-template <typename T>
-void KVManager<T>::update_attention_mask(
-    uint16_t* attention_mask,
+void KVManager::update_attention_mask(
+    std::byte* attention_mask,
     int32_t ar_len,
     int32_t n_past,
     int32_t n_update) {
-  uint16_t pos_val = 65535;
-  uint16_t* cur_ptr = attention_mask;
-  cur_ptr += n_past;
+  std::byte* cur_ptr =
+      attention_mask + n_past * getDtypeSize(attention_mask_dtype_);
 
   for (int i = 0; i < ar_len; i++) {
-    std::fill_n(cur_ptr, n_update, pos_val);
-    cur_ptr += metadata_.context_len;
+    fill_mask(attention_mask_dtype_, cur_ptr, n_update, /*use_pos_value=*/true);
+    cur_ptr += metadata_.context_len * getDtypeSize(attention_mask_dtype_);
   }
 }
 
-template <typename T>
-void KVManager<T>::update_attention_mask(
-    uint16_t* attention_mask,
+void KVManager::update_attention_mask(
+    std::byte* attention_mask,
     int32_t ar_len,
     int32_t n_past,
     int32_t n_update,
     int32_t sliding_window,
     const std::vector<int32_t>& position_offset) {
-  uint16_t pos_val = 65535;
-  uint16_t neg_val = 0;
-  uint16_t* cur_ptr = attention_mask;
-  cur_ptr += n_past;
+  std::byte* cur_ptr =
+      attention_mask + n_past * getDtypeSize(attention_mask_dtype_);
 
   for (int i = 0; i < ar_len; i++) {
-    std::fill_n(cur_ptr, n_update, pos_val);
+    fill_mask(attention_mask_dtype_, cur_ptr, n_update, /*use_pos_value=*/true);
     int32_t available_cache_len = position_offset.empty()
         ? sliding_window - (i + 1)
         : sliding_window - (position_offset[i] + 1);
     if (n_past + n_update > available_cache_len) {
-      std::fill_n(
-          cur_ptr - n_past, n_past + n_update - available_cache_len, neg_val);
+      fill_mask(
+          attention_mask_dtype_,
+          cur_ptr - n_past * getDtypeSize(attention_mask_dtype_),
+          n_past + n_update,
+          /*use_pos_value=*/false);
     }
-    cur_ptr += metadata_.context_len;
+    cur_ptr += metadata_.context_len * getDtypeSize(attention_mask_dtype_);
   }
 }
 
-template <typename T>
-void KVManager<T>::init_cache(IMemAlloc* buffer_manager, int32_t ar_len) {
+void KVManager::init_cache(IMemAlloc* buffer_manager, int32_t ar_len) {
   cur_ar_len_ = ar_len;
-  const size_t max_in_cache_block_in_bytes =
-      metadata_.max_cache_len * sizeof(T);
-  const size_t max_out_cache_block_in_bytes = metadata_.max_ar_len * sizeof(T);
-
-  const size_t cache_in_bytes =
-      metadata_.num_heads * metadata_.head_dim * max_in_cache_block_in_bytes;
-  const size_t cache_out_bytes =
-      metadata_.num_heads * metadata_.head_dim * max_out_cache_block_in_bytes;
+  const size_t cache_in_bytes = metadata_.num_heads * metadata_.head_dim *
+      metadata_.max_cache_len * getDtypeSize(kv_cache_dtype_);
+  const size_t cache_out_bytes = metadata_.num_heads * metadata_.head_dim *
+      metadata_.max_ar_len * getDtypeSize(kv_cache_dtype_);
   for (int layer = 0; layer < metadata_.num_layers; ++layer) {
-    // Allocate buffer for key cache and value cache
-    T* single_layer_k_cache_in =
-        reinterpret_cast<T*>(buffer_manager->allocate(cache_in_bytes));
-    T* single_layer_k_cache_out =
-        reinterpret_cast<T*>(buffer_manager->allocate(cache_out_bytes));
-    T* single_layer_v_cache_in =
-        reinterpret_cast<T*>(buffer_manager->allocate(cache_in_bytes));
-    T* single_layer_v_cache_out =
-        reinterpret_cast<T*>(buffer_manager->allocate(cache_out_bytes));
-
-    k_cache_[layer].buffer = single_layer_k_cache_in;
-    k_cache_[layer].output_buffer = single_layer_k_cache_out;
-    v_cache_[layer].buffer = single_layer_v_cache_in;
-    v_cache_[layer].output_buffer = single_layer_v_cache_out;
+    k_cache_[layer].buffer = buffer_manager->allocate(cache_in_bytes);
+    k_cache_[layer].output_buffer = buffer_manager->allocate(cache_out_bytes);
+    v_cache_[layer].buffer = buffer_manager->allocate(cache_in_bytes);
+    v_cache_[layer].output_buffer = buffer_manager->allocate(cache_out_bytes);
   }
 }
 
-template <typename T>
-void KVManager<T>::rearrange_cache(int32_t ar_len_dst) {
+void KVManager::rearrange_cache(int32_t ar_len_dst) {
   // Don't need to rearrange if cur_ar_len_ is equal to target ar_len
   if (cur_ar_len_ == ar_len_dst)
     return;
@@ -199,75 +293,73 @@ void KVManager<T>::rearrange_cache(int32_t ar_len_dst) {
   cur_ar_len_ = ar_len_dst;
 }
 
-template <typename T>
-void KVManager<T>::rearrange_key(KVCache<T>& k_cache, int32_t ar_len_dst) {
+void KVManager::rearrange_key(KVCache& k_cache, int32_t ar_len_dst) {
   const int32_t src_cache_num = (cur_ar_len_ == metadata_.context_len)
       ? metadata_.context_len
       : metadata_.context_len - cur_ar_len_;
   const int32_t dst_cache_num = metadata_.context_len - ar_len_dst;
-  T* k_cache_in_read_ptr = k_cache.buffer;
-  T* k_cache_in_write_ptr = k_cache.buffer;
-
+  std::byte* k_cache_in_read_ptr = k_cache.buffer;
+  std::byte* k_cache_in_write_ptr = k_cache.buffer;
+  size_t src_cache_nbytes = src_cache_num * getDtypeSize(kv_cache_dtype_);
+  size_t dst_cache_nbytes = dst_cache_num * getDtypeSize(kv_cache_dtype_);
   if (src_cache_num > dst_cache_num) {
     // copy from first dimension
     for (int i = 0; i < metadata_.head_dim * metadata_.num_heads; i++) {
-      std::memmove(
-          k_cache_in_write_ptr, k_cache_in_read_ptr, dst_cache_num * sizeof(T));
-      k_cache_in_read_ptr += src_cache_num;
-      k_cache_in_write_ptr += dst_cache_num;
+      std::memmove(k_cache_in_write_ptr, k_cache_in_read_ptr, dst_cache_nbytes);
+      k_cache_in_read_ptr += src_cache_nbytes;
+      k_cache_in_write_ptr += dst_cache_nbytes;
     }
   } else {
     k_cache_in_read_ptr +=
-        (metadata_.head_dim * metadata_.num_heads - 1) * src_cache_num;
+        (metadata_.head_dim * metadata_.num_heads - 1) * src_cache_nbytes;
     k_cache_in_write_ptr +=
-        (metadata_.head_dim * metadata_.num_heads - 1) * dst_cache_num;
+        (metadata_.head_dim * metadata_.num_heads - 1) * dst_cache_nbytes;
     // copy from last dimension
     for (int i = 0; i < metadata_.head_dim * metadata_.num_heads; i++) {
-      std::memmove(
-          k_cache_in_write_ptr, k_cache_in_read_ptr, src_cache_num * sizeof(T));
-      k_cache_in_read_ptr -= src_cache_num;
-      k_cache_in_write_ptr -= dst_cache_num;
+      std::memmove(k_cache_in_write_ptr, k_cache_in_read_ptr, src_cache_nbytes);
+      k_cache_in_read_ptr -= src_cache_nbytes;
+      k_cache_in_write_ptr -= dst_cache_nbytes;
     }
   }
 }
 
-template <typename T>
-void KVManager<T>::rearrange_value(KVCache<T>& v_cache, int32_t ar_len_dst) {
+void KVManager::rearrange_value(KVCache& v_cache, int32_t ar_len_dst) {
   const int32_t src_cache_num = (cur_ar_len_ == metadata_.context_len)
       ? metadata_.context_len
       : metadata_.context_len - cur_ar_len_;
   const int32_t dst_cache_num = metadata_.context_len - ar_len_dst;
-  T* v_cache_in_read_ptr = v_cache.buffer;
-  T* v_cache_in_write_ptr = v_cache.buffer;
+  std::byte* v_cache_in_read_ptr = v_cache.buffer;
+  std::byte* v_cache_in_write_ptr = v_cache.buffer;
+  size_t src_cache_nbytes = src_cache_num * getDtypeSize(kv_cache_dtype_);
+  size_t dst_cache_nbytes = dst_cache_num * getDtypeSize(kv_cache_dtype_);
   if (src_cache_num > dst_cache_num) {
     // copy from first dimension
     for (int i = 0; i < metadata_.num_heads; i++) {
       std::memmove(
           v_cache_in_write_ptr,
           v_cache_in_read_ptr,
-          dst_cache_num * metadata_.head_dim * sizeof(T));
-      v_cache_in_read_ptr += src_cache_num * metadata_.head_dim;
-      v_cache_in_write_ptr += dst_cache_num * metadata_.head_dim;
+          dst_cache_nbytes * metadata_.head_dim);
+      v_cache_in_read_ptr += src_cache_nbytes * metadata_.head_dim;
+      v_cache_in_write_ptr += dst_cache_nbytes * metadata_.head_dim;
     }
   } else {
     v_cache_in_read_ptr +=
-        metadata_.head_dim * (metadata_.num_heads - 1) * src_cache_num;
+        metadata_.head_dim * (metadata_.num_heads - 1) * src_cache_nbytes;
     v_cache_in_write_ptr +=
-        metadata_.head_dim * (metadata_.num_heads - 1) * dst_cache_num;
+        metadata_.head_dim * (metadata_.num_heads - 1) * dst_cache_nbytes;
     // copy from last dimension
     for (int i = 0; i < metadata_.num_heads; i++) {
       std::memmove(
           v_cache_in_write_ptr,
           v_cache_in_read_ptr,
-          src_cache_num * metadata_.head_dim * sizeof(T));
-      v_cache_in_read_ptr -= src_cache_num * metadata_.head_dim;
-      v_cache_in_write_ptr -= dst_cache_num * metadata_.head_dim;
+          src_cache_nbytes * metadata_.head_dim);
+      v_cache_in_read_ptr -= src_cache_nbytes * metadata_.head_dim;
+      v_cache_in_write_ptr -= dst_cache_nbytes * metadata_.head_dim;
     }
   }
 }
 
-template <typename T>
-void KVManager<T>::update_cache(
+void KVManager::update_cache(
     int32_t ar_len,
     int32_t n_past,
     int32_t n_update,
@@ -283,20 +375,19 @@ void KVManager<T>::update_cache(
   }
 }
 
-template <typename T>
-void KVManager<T>::update_key(
-    KVCache<T>& k_cache,
+void KVManager::update_key(
+    KVCache& k_cache,
     int32_t n_past,
     int32_t n_update,
     const std::vector<bool>& selected) {
-  T* write_ptr = k_cache.buffer;
-  T* read_ptr = k_cache.output_buffer;
-  const int32_t copy_size = n_update * sizeof(T);
+  std::byte* write_ptr = k_cache.buffer;
+  std::byte* read_ptr = k_cache.output_buffer;
+  const int32_t copy_size = n_update * getDtypeSize(kv_cache_dtype_);
   const int32_t iter_size = (cur_ar_len_ == metadata_.context_len)
-      ? metadata_.context_len
-      : metadata_.context_len - cur_ar_len_;
-  const int32_t out_size = cur_ar_len_;
-  const int32_t past_size = n_past;
+      ? metadata_.context_len * getDtypeSize(kv_cache_dtype_)
+      : (metadata_.context_len - cur_ar_len_) * getDtypeSize(kv_cache_dtype_);
+  const int32_t out_size = cur_ar_len_ * getDtypeSize(kv_cache_dtype_);
+  const int32_t past_size = n_past * getDtypeSize(kv_cache_dtype_);
   const int32_t n_iter = metadata_.head_dim * metadata_.num_heads;
 
   write_ptr += past_size;
@@ -316,7 +407,11 @@ void KVManager<T>::update_key(
     for (int i = 0; i < n_iter; ++i) {
       auto wp = write_ptr, rp = read_ptr;
       for (auto ind : true_indices) {
-        *wp++ = rp[ind];
+        std::memmove(
+            wp,
+            rp + ind * getDtypeSize(kv_cache_dtype_),
+            getDtypeSize(kv_cache_dtype_));
+        wp += getDtypeSize(kv_cache_dtype_);
       }
       write_ptr += iter_size;
       read_ptr += out_size;
@@ -324,21 +419,25 @@ void KVManager<T>::update_key(
   }
 }
 
-template <typename T>
-void KVManager<T>::update_value(
-    KVCache<T>& v_cache,
+void KVManager::update_value(
+    KVCache& v_cache,
     int32_t n_past,
     int32_t n_update,
     const std::vector<bool>& selected) {
-  T* write_ptr = v_cache.buffer;
-  T* read_ptr = v_cache.output_buffer;
-  const int32_t copy_size = n_update * metadata_.head_dim * sizeof(T);
-  const int32_t past_size = n_past * metadata_.head_dim;
+  std::byte* write_ptr = v_cache.buffer;
+  std::byte* read_ptr = v_cache.output_buffer;
+  const int32_t copy_size =
+      n_update * metadata_.head_dim * getDtypeSize(kv_cache_dtype_);
+  const int32_t past_size =
+      n_past * metadata_.head_dim * getDtypeSize(kv_cache_dtype_);
   const int32_t n_iter = metadata_.num_heads;
   const int32_t iter_size = (cur_ar_len_ == metadata_.context_len)
-      ? metadata_.context_len * metadata_.head_dim
-      : (metadata_.context_len - cur_ar_len_) * metadata_.head_dim;
-  const int32_t out_size = cur_ar_len_ * metadata_.head_dim;
+      ? metadata_.context_len * metadata_.head_dim *
+          getDtypeSize(kv_cache_dtype_)
+      : (metadata_.context_len - cur_ar_len_) * metadata_.head_dim *
+          getDtypeSize(kv_cache_dtype_);
+  const int32_t out_size =
+      cur_ar_len_ * metadata_.head_dim * getDtypeSize(kv_cache_dtype_);
 
   write_ptr += past_size;
 
@@ -354,13 +453,14 @@ void KVManager<T>::update_value(
       auto wp = write_ptr, rp = read_ptr;
       for (auto sel : selected) {
         if (sel) {
-          std::memcpy(wp, rp, metadata_.head_dim * sizeof(T));
-          wp += metadata_.head_dim;
+          std::memcpy(
+              wp, rp, metadata_.head_dim * getDtypeSize(kv_cache_dtype_));
+          wp += metadata_.head_dim * getDtypeSize(kv_cache_dtype_);
           update_times--;
           if (update_times == 0)
             break;
         }
-        rp += metadata_.head_dim;
+        rp += metadata_.head_dim * getDtypeSize(kv_cache_dtype_);
       }
       write_ptr += iter_size;
       read_ptr += out_size;
@@ -368,8 +468,4 @@ void KVManager<T>::update_value(
   }
 }
 
-// Explicit instantiations
-template class KVManager<uint16_t>;
-template class KVManager<uint8_t>;
-
 } // namespace example
diff --git a/examples/qualcomm/oss_scripts/llama/runner/kv_manager.h b/examples/qualcomm/oss_scripts/llama/runner/kv_manager.h
index 06fe88517a7..3b8e67dd38d 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/kv_manager.h
+++ b/examples/qualcomm/oss_scripts/llama/runner/kv_manager.h
@@ -8,6 +8,7 @@
 
 #pragma once
 #include <executorch/examples/qualcomm/oss_scripts/llama/runner/imem_alloc.h>
+#include <executorch/examples/qualcomm/oss_scripts/llama/runner/utils.h>
 #include <cstdint>
 #include <memory>
 #include <vector>
@@ -15,17 +16,15 @@
 namespace example {
 
 // Structure to hold key-value cache buffers
-template <typename T>
 struct KVCache {
-  T* buffer;
-  T* output_buffer;
+  std::byte* buffer;
+  std::byte* output_buffer;
 };
 
 /**
  * @class KVManager
  * @brief Class for kv cache update, rearrangement, and buffer allocatation.
  */
-template <typename T>
 class KVManager {
  public:
   struct Metadata {
@@ -36,7 +35,9 @@ class KVManager {
     int64_t num_heads;
     int64_t num_layers;
   };
-  KVManager(Metadata metadata);
+  KVManager(
+      Metadata metadata,
+      std::unique_ptr<executorch::runtime::MethodMeta> method_meta);
 
   /**
    * @brief Allocate buffer for KV cache and set the cur_ar_len_.
@@ -71,7 +72,7 @@ class KVManager {
    * @param n_past Number of past elements in the cache.
    */
   void init_attention_mask(
-      uint16_t* attention_mask,
+      std::byte* attention_mask,
       const std::vector<int32_t>& attention_map,
       int32_t ar_len,
       int32_t n_past);
@@ -98,7 +99,7 @@ class KVManager {
    * @param position_offset (optional) attention mask position offset of
    */
   void init_attention_mask(
-      uint16_t* attention_mask,
+      std::byte* attention_mask,
       const std::vector<int32_t>& attention_map,
       int32_t ar_len,
       int32_t n_past,
@@ -114,7 +115,7 @@ class KVManager {
    * @param n_update Number of elements to be updated.
    */
   void update_attention_mask(
-      uint16_t* attention_mask,
+      std::byte* attention_mask,
       int32_t ar_len,
       int32_t n_past,
       int32_t n_update);
@@ -132,7 +133,7 @@ class KVManager {
    * lookahead decoder
    */
   void update_attention_mask(
-      uint16_t* attention_mask,
+      std::byte* attention_mask,
       int32_t ar_len,
       int32_t n_past,
       int32_t n_update,
@@ -152,10 +153,10 @@ class KVManager {
       int32_t n_update,
       const std::vector<bool>& selected);
 
-  const std::vector<KVCache<T>>& get_k_cache_() const {
+  const std::vector<KVCache>& get_k_cache_() const {
     return k_cache_;
   }
-  const std::vector<KVCache<T>>& get_v_cache_() const {
+  const std::vector<KVCache>& get_v_cache_() const {
     return v_cache_;
   }
 
@@ -169,15 +170,19 @@ class KVManager {
 
  private:
   // Helper functions to rearrange and update key and value caches
-  void rearrange_key(KVCache<T>& k_cache, int32_t ar_len_dst);
-  void rearrange_value(KVCache<T>& v_cache, int32_t ar_len_dst);
+
+  void rearrange_key(KVCache& k_cache, int32_t ar_len_dst);
+
+  void rearrange_value(KVCache& v_cache, int32_t ar_len_dst);
+
   void update_key(
-      KVCache<T>& k_cache,
+      KVCache& k_cache,
       int32_t n_past,
       int32_t n_update,
       const std::vector<bool>& selected);
+
   void update_value(
-      KVCache<T>& v_cache,
+      KVCache& v_cache,
       int32_t n_past,
       int32_t n_update,
       const std::vector<bool>& selected);
@@ -186,10 +191,14 @@ class KVManager {
   Metadata metadata_;
   size_t total_cache_size_;
   int32_t cur_ar_len_;
+  executorch::aten::ScalarType attention_mask_dtype_ =
+      executorch::aten::ScalarType::Undefined;
+  executorch::aten::ScalarType kv_cache_dtype_ =
+      executorch::aten::ScalarType::Undefined;
   // Store start pointer of k and v cache for input and output
   // input: layer -> head * head_dim * max_cache_len
   // output: layer -> head * head_dim * max_ar_len
-  std::vector<KVCache<T>> k_cache_;
-  std::vector<KVCache<T>> v_cache_;
+  std::vector<KVCache> k_cache_;
+  std::vector<KVCache> v_cache_;
 };
 } // namespace example
diff --git a/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.cpp b/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.cpp
index f7e44292f26..298fc1ac9ff 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.cpp
+++ b/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.cpp
@@ -13,20 +13,19 @@ using executorch::runtime::Result;
 
 namespace example {
 
-template <typename T>
-void LhdTokenGenerator<T>::prepare_io(
+void LhdTokenGenerator::prepare_io(
     std::vector<uint64_t> input_tokens,
     std::vector<int32_t> input_pos) {
   for (int i = 0; i < metadata_.ar_len; i++) {
     if (i < input_tokens.size()) {
       // Prepare pos data
-      this->input_pos_.data[i] = input_pos[i];
+      reinterpret_cast<int32_t*>(this->input_pos_.data)[i] = input_pos[i];
 
       // Support CPU 4-bit embedding, which requires int64 input.
       // However, for QNN embedding, only int32 input is needed.
       // Therefore, we need to cast to the correct type to write the data.
       if (metadata_.use_int64_token) {
-        this->input_toks_.data[i] = input_tokens[i];
+        reinterpret_cast<int64_t*>(this->input_toks_.data)[i] = input_tokens[i];
       } else {
         int32_t* input_toks_ptr =
             reinterpret_cast<int32_t*>(this->input_toks_.data);
@@ -36,8 +35,7 @@ void LhdTokenGenerator<T>::prepare_io(
   }
 }
 
-template <typename T>
-void LhdTokenGenerator<T>::init_attention_mask(int32_t n_past) {
+void LhdTokenGenerator::init_attention_mask(int32_t n_past) {
   std::vector<int32_t> attention_map;
   attention_map.reserve(metadata_.ar_len);
   // Initialize attention mask with current position
@@ -73,8 +71,7 @@ void LhdTokenGenerator<T>::init_attention_mask(int32_t n_past) {
   }
 }
 
-template <typename T>
-void LhdTokenGenerator<T>::init_lookahead_branch(
+void LhdTokenGenerator::init_lookahead_branch(
     const std::vector<uint64_t>& tokens) {
   for (int i = 0; i < metadata_.ngram - 1; ++i) {
     for (int j = 0; j < metadata_.window; ++j) {
@@ -91,8 +88,7 @@ void LhdTokenGenerator<T>::init_lookahead_branch(
   is_lhd_branch_initialized_ = true;
 }
 
-template <typename T>
-void LhdTokenGenerator<T>::init_verification_branch(uint64_t cur_token) {
+void LhdTokenGenerator::init_verification_branch(uint64_t cur_token) {
   const int g_cur = ngrams_pool_.cnt[cur_token];
 
   v_branch_.resize(g_cur);
@@ -116,8 +112,7 @@ void LhdTokenGenerator<T>::init_verification_branch(uint64_t cur_token) {
   }
 }
 
-template <typename T>
-void LhdTokenGenerator<T>::update_ngrams_pool() {
+void LhdTokenGenerator::update_ngrams_pool() {
   std::vector<int32_t> ngram(metadata_.ngram - 1);
   // n-gram pool generation
   for (int f = 0; f < metadata_.window; ++f) {
@@ -170,8 +165,7 @@ void LhdTokenGenerator<T>::update_ngrams_pool() {
   }
 }
 
-template <typename T>
-void LhdTokenGenerator<T>::update_lookahead_branch(
+void LhdTokenGenerator::update_lookahead_branch(
     const executorch::aten::Tensor& logits_tensor) {
   for (int i = 0; i < metadata_.window; i++) {
     lhd_branch_prev_[i] = lhd_branch_[0][i];
@@ -189,8 +183,7 @@ void LhdTokenGenerator<T>::update_lookahead_branch(
   }
 }
 
-template <typename T>
-Result<int64_t> LhdTokenGenerator<T>::generate(
+Result<int64_t> LhdTokenGenerator::generate(
     std::vector<uint64_t> tokens,
     int64_t start_pos,
     int32_t seq_len,
@@ -427,8 +420,4 @@ Result<int64_t> LhdTokenGenerator<T>::generate(
   return pos - start_pos;
 }
 
-// Explicit instantiations
-template class LhdTokenGenerator<uint16_t>;
-template class LhdTokenGenerator<uint8_t>;
-
 } // namespace example
diff --git a/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h b/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h
index 796dde88014..8fdffb8af72 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h
+++ b/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h
@@ -15,8 +15,8 @@ namespace example {
  * @brief Class for generating the token using decoder and key-value manager
  * with lookahead decoding.
  */
-template <typename T>
-class LhdTokenGenerator : public TokenGenerator<T> {
+
+class LhdTokenGenerator : public TokenGenerator {
  public:
   struct Metadata {
     int32_t context_len;
@@ -34,18 +34,19 @@ class LhdTokenGenerator : public TokenGenerator<T> {
   LhdTokenGenerator(
       tokenizers::Tokenizer* tokenizer,
       DecoderRunner* decoder_runner,
-      KVManager<T>* kv_manager,
+      KVManager* kv_manager,
       const std::string& forward_name,
       std::unique_ptr<std::unordered_set<uint64_t>>&& eos_ids,
       Metadata metadata,
-      executorch::llm::Stats* stats)
-      : TokenGenerator<T>(
+      executorch::llm::Stats* stats,
+      std::unique_ptr<executorch::runtime::MethodMeta> method_meta)
+      : TokenGenerator(
             tokenizer,
             decoder_runner,
             kv_manager,
             forward_name,
             std::move(eos_ids),
-            typename TokenGenerator<T>::Metadata{
+            TokenGenerator::Metadata{
                 metadata.context_len,
                 metadata.num_heads,
                 metadata.num_layers,
@@ -54,7 +55,8 @@ class LhdTokenGenerator : public TokenGenerator<T> {
                 metadata.use_int64_token,
                 metadata.sliding_window,
                 metadata.cache_mode},
-            stats),
+            stats,
+            std::move(method_meta)),
         metadata_(metadata),
         lhd_branch_(metadata.ngram - 1, std::vector<int32_t>(metadata.window)),
         lhd_branch_prev_(metadata.window),
@@ -104,7 +106,7 @@ class LhdTokenGenerator : public TokenGenerator<T> {
  private:
   // Bring base class's virtual prepare_io into scope so the overload below
   // does not hide it (-Woverloaded-virtual).
-  using TokenGenerator<T>::prepare_io;
+  using TokenGenerator::prepare_io;
   /**
    * @brief Fill in I/O buffers with prompt token and position.
    * @param cur_token Current token.
diff --git a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.cpp b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.cpp
index 14a93104e1a..de8d1bea0fe 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.cpp
+++ b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.cpp
@@ -13,8 +13,7 @@ using executorch::runtime::Result;
 
 namespace example {
 
-template <typename T>
-void MultimodalLhdTokenGenerator<T>::prepare_io(
+void MultimodalLhdTokenGenerator::prepare_io(
     std::vector<uint64_t> input_tokens,
     std::vector<int32_t> input_pos) {
   for (int i = 0; i < metadata_.ar_len; i++) {
@@ -51,8 +50,7 @@ void MultimodalLhdTokenGenerator<T>::prepare_io(
   }
 }
 
-template <typename T>
-void MultimodalLhdTokenGenerator<T>::init_attention_mask(int32_t n_past) {
+void MultimodalLhdTokenGenerator::init_attention_mask(int32_t n_past) {
   std::vector<int32_t> attention_map;
   attention_map.reserve(metadata_.ar_len);
   // Initialize attention mask with current position
@@ -88,8 +86,7 @@ void MultimodalLhdTokenGenerator<T>::init_attention_mask(int32_t n_past) {
   }
 }
 
-template <typename T>
-void MultimodalLhdTokenGenerator<T>::init_lookahead_branch(
+void MultimodalLhdTokenGenerator::init_lookahead_branch(
     const std::vector<uint64_t>& tokens) {
   for (int i = 0; i < metadata_.ngram - 1; ++i) {
     for (int j = 0; j < metadata_.window; ++j) {
@@ -106,9 +103,7 @@ void MultimodalLhdTokenGenerator<T>::init_lookahead_branch(
   is_lhd_branch_initialized_ = true;
 }
 
-template <typename T>
-void MultimodalLhdTokenGenerator<T>::init_verification_branch(
-    uint64_t cur_token) {
+void MultimodalLhdTokenGenerator::init_verification_branch(uint64_t cur_token) {
   const int g_cur = ngrams_pool_.cnt[cur_token];
 
   v_branch_.resize(g_cur);
@@ -132,8 +127,7 @@ void MultimodalLhdTokenGenerator<T>::init_verification_branch(
   }
 }
 
-template <typename T>
-void MultimodalLhdTokenGenerator<T>::update_ngrams_pool() {
+void MultimodalLhdTokenGenerator::update_ngrams_pool() {
   std::vector<int32_t> ngram(metadata_.ngram - 1);
   // n-gram pool generation
   for (int f = 0; f < metadata_.window; ++f) {
@@ -186,8 +180,7 @@ void MultimodalLhdTokenGenerator<T>::update_ngrams_pool() {
   }
 }
 
-template <typename T>
-void MultimodalLhdTokenGenerator<T>::update_lookahead_branch(
+void MultimodalLhdTokenGenerator::update_lookahead_branch(
     const executorch::aten::Tensor& logits_tensor) {
   for (int i = 0; i < metadata_.window; i++) {
     lhd_branch_prev_[i] = lhd_branch_[0][i];
@@ -205,8 +198,7 @@ void MultimodalLhdTokenGenerator<T>::update_lookahead_branch(
   }
 }
 
-template <typename T>
-Result<int64_t> MultimodalLhdTokenGenerator<T>::generate(
+Result<int64_t> MultimodalLhdTokenGenerator::generate(
     std::vector<uint64_t> tokens,
     int64_t start_pos,
     int32_t seq_len,
@@ -412,8 +404,4 @@ Result<int64_t> MultimodalLhdTokenGenerator<T>::generate(
   return pos - start_pos;
 }
 
-// Explicit instantiations
-template class MultimodalLhdTokenGenerator<uint16_t>;
-template class MultimodalLhdTokenGenerator<uint8_t>;
-
 } // namespace example
diff --git a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.h b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.h
index 7494afec6da..6ffe285e536 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.h
+++ b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.h
@@ -15,9 +15,7 @@ namespace example {
  * @class MultimodalLhdTokenGenerator
  * @brief Extended LhdTokenGenerator with multimodal embedding support
  */
-template <typename T>
-class MultimodalLhdTokenGenerator
-    : public example::MultimodalTokenGenerator<T> {
+class MultimodalLhdTokenGenerator : public example::MultimodalTokenGenerator {
  public:
   struct Metadata {
     int32_t context_len;
@@ -37,19 +35,20 @@ class MultimodalLhdTokenGenerator
       tokenizers::Tokenizer* tokenizer,
       TokenEmbeddingProcessor* embedding_runner,
       DecoderRunner* decoder_runner,
-      KVManager<T>* kv_manager,
+      KVManager* kv_manager,
       const std::string& forward_name,
       std::unique_ptr<std::unordered_set<uint64_t>>&& eos_ids,
       Metadata metadata,
-      executorch::llm::Stats* stats)
-      : MultimodalTokenGenerator<T>(
+      executorch::llm::Stats* stats,
+      std::unique_ptr<executorch::extension::MethodMeta> method_meta)
+      : MultimodalTokenGenerator(
             tokenizer,
             embedding_runner,
             decoder_runner,
             kv_manager,
             forward_name,
             std::move(eos_ids),
-            typename MultimodalTokenGenerator<T>::Metadata{
+            MultimodalTokenGenerator::Metadata{
                 metadata.context_len,
                 metadata.num_heads,
                 metadata.num_layers,
@@ -59,7 +58,8 @@ class MultimodalLhdTokenGenerator
                 metadata.sliding_window,
                 metadata.cache_mode,
                 metadata.embedding_dim},
-            stats),
+            stats,
+            std::move(method_meta)),
         tok_embedding_runner_(embedding_runner),
         metadata_(metadata),
         lhd_branch_(metadata.ngram - 1, std::vector<int32_t>(metadata.window)),
@@ -110,7 +110,7 @@ class MultimodalLhdTokenGenerator
  private:
   // Bring base class's virtual prepare_io into scope so the overload below
   // does not hide it (-Woverloaded-virtual).
-  using TokenGenerator<T>::prepare_io;
+  using TokenGenerator::prepare_io;
   /**
    * @brief Fill in I/O buffers with prompt token and position.
    * @param cur_token Current token.
diff --git a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_prompt_processor.cpp b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_prompt_processor.cpp
index 2859e16a42a..f63a431791b 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_prompt_processor.cpp
+++ b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_prompt_processor.cpp
@@ -16,13 +16,13 @@ using executorch::runtime::TensorInfo;
 
 namespace example {
 
-template <typename T>
-MultimodalPromptProcessor<T>::MultimodalPromptProcessor(
+MultimodalPromptProcessor::MultimodalPromptProcessor(
     DecoderRunner* decoder_runner,
-    KVManager<T>* kv_manager,
+    KVManager* kv_manager,
     const std::string& method_name,
-    Metadata metadata)
-    : PromptProcessor<T>(
+    Metadata metadata,
+    std::unique_ptr<MethodMeta> method_meta)
+    : PromptProcessor(
           decoder_runner,
           kv_manager,
           method_name,
@@ -33,7 +33,8 @@ MultimodalPromptProcessor<T>::MultimodalPromptProcessor(
            metadata.vocab_size,
            metadata.use_int64_token,
            metadata.sliding_window,
-           metadata.cache_mode}),
+           metadata.cache_mode},
+          std::move(method_meta)),
       metadata_(metadata) {
   // Set input_toks_.size to 0 since we use embeddings instead
   input_toks_.size = 0;
@@ -41,8 +42,7 @@ MultimodalPromptProcessor<T>::MultimodalPromptProcessor(
       metadata_.ar_len * metadata_.embedding_dim * sizeof(float);
 };
 
-template <typename T>
-void MultimodalPromptProcessor<T>::init_io(
+void MultimodalPromptProcessor::init_io(
     IMemAlloc* buffer_manager,
     Result<MethodMeta> method_meta) {
   size_t idx = 0;
@@ -66,8 +66,7 @@ void MultimodalPromptProcessor<T>::init_io(
 
   // [I]: attention_mask
   Result<TensorInfo> attention_mask = method_meta->input_tensor_meta(idx++);
-  attention_mask_.data = reinterpret_cast<uint16_t*>(
-      buffer_manager->allocate(attention_mask_.size));
+  attention_mask_.data = buffer_manager->allocate(attention_mask_.size);
   attention_mask_.tensor = std::make_unique<TensorImpl>(
       attention_mask->scalar_type(),
       attention_mask->sizes().size(),
@@ -83,8 +82,8 @@ void MultimodalPromptProcessor<T>::init_io(
   if (metadata_.cache_mode == CacheMode::HybridCache) {
     Result<TensorInfo> window_attention_mask =
         method_meta->input_tensor_meta(idx++);
-    window_attention_mask_.data = reinterpret_cast<uint16_t*>(
-        buffer_manager->allocate(window_attention_mask_.size));
+    window_attention_mask_.data =
+        buffer_manager->allocate(window_attention_mask_.size);
     window_attention_mask_.tensor = std::make_unique<TensorImpl>(
         window_attention_mask->scalar_type(),
         window_attention_mask->sizes().size(),
@@ -120,32 +119,29 @@ void MultimodalPromptProcessor<T>::init_io(
     for (int cache_group = 0; cache_group < 2; ++cache_group) {
       std::vector<std::unique_ptr<TensorImpl>>& cache =
           (cache_group == 0 ? k_cache_in_ : v_cache_in_);
-      std::vector<KVCache<T>> cache_ptrs = (cache_group == 0)
+      std::vector<KVCache> cache_ptrs = (cache_group == 0)
           ? kv_manager_->get_k_cache_()
           : kv_manager_->get_v_cache_();
       for (int layer = 0; layer < metadata_.num_layers; ++layer, ++index) {
         Result<TensorInfo> kv_cache = method_meta->input_tensor_meta(index);
 
-        T* cache_ptr = cache_ptrs[layer].buffer;
-
         cache[layer] = std::make_unique<TensorImpl>(
             kv_cache->scalar_type(),
             kv_cache->sizes().size(),
             const_cast<TensorImpl::SizesType*>(kv_cache->sizes().data()),
-            cache_ptr,
+            cache_ptrs[layer].buffer,
             const_cast<TensorImpl::DimOrderType*>(
                 kv_cache->dim_order().data()));
         input_tensors_.emplace_back(cache[layer].get());
         buffer_manager->add_memory_info(
-            cache_ptr, cache[layer]->nbytes(), kv_cache.get());
+            cache_ptrs[layer].buffer, cache[layer]->nbytes(), kv_cache.get());
       }
     }
   }
 
   // [O]: logits
   Result<TensorInfo> logits = method_meta->output_tensor_meta(0);
-  logits_.data =
-      reinterpret_cast<uint16_t*>(buffer_manager->allocate(logits_.size));
+  logits_.data = buffer_manager->allocate(logits_.size);
   logits_.tensor = std::make_unique<TensorImpl>(
       logits->scalar_type(),
       logits->sizes().size(),
@@ -160,21 +156,22 @@ void MultimodalPromptProcessor<T>::init_io(
   for (int cache_group = 0; cache_group < 2; ++cache_group) {
     std::vector<std::unique_ptr<TensorImpl>>& cache =
         (cache_group == 0 ? k_cache_out_ : v_cache_out_);
-    std::vector<KVCache<T>> cache_ptrs = (cache_group == 0)
+    std::vector<KVCache> cache_ptrs = (cache_group == 0)
         ? kv_manager_->get_k_cache_()
         : kv_manager_->get_v_cache_();
     for (int layer = 0; layer < metadata_.num_layers; ++layer, ++index) {
       Result<TensorInfo> kv_cache = method_meta->output_tensor_meta(index);
-      T* cache_ptr = cache_ptrs[layer].output_buffer;
       cache[layer] = std::make_unique<TensorImpl>(
           kv_cache->scalar_type(),
           kv_cache->sizes().size(),
           const_cast<TensorImpl::SizesType*>(kv_cache->sizes().data()),
-          cache_ptr,
+          cache_ptrs[layer].output_buffer,
           const_cast<TensorImpl::DimOrderType*>(kv_cache->dim_order().data()));
       output_tensors_.emplace_back(cache[layer].get());
       buffer_manager->add_memory_info(
-          cache_ptr, cache[layer]->nbytes(), kv_cache.get());
+          cache_ptrs[layer].output_buffer,
+          cache[layer]->nbytes(),
+          kv_cache.get());
     }
   }
 
@@ -186,8 +183,7 @@ void MultimodalPromptProcessor<T>::init_io(
 }
 
 // prepare embedding
-template <typename T>
-void MultimodalPromptProcessor<T>::prepare_io(
+void MultimodalPromptProcessor::prepare_io(
     const TensorStruct<float>& prompt_embedding,
     int32_t num_prompt_tokens,
     int64_t prompt_pos,
@@ -208,8 +204,7 @@ void MultimodalPromptProcessor<T>::prepare_io(
   }
 }
 
-template <typename T>
-Result<uint64_t> MultimodalPromptProcessor<T>::prefill(
+Result<uint64_t> MultimodalPromptProcessor::prefill(
     const TensorStruct<float>& prompt_embedding,
     int64_t start_pos,
     bool dump_logits,
@@ -301,8 +296,4 @@ Result<uint64_t> MultimodalPromptProcessor<T>::prefill(
   return cur_token;
 }
 
-// Explicit instantiations
-template class MultimodalPromptProcessor<uint16_t>;
-template class MultimodalPromptProcessor<uint8_t>;
-
 } // namespace example
diff --git a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_prompt_processor.h b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_prompt_processor.h
index fcfc07c9590..c2769ed9f50 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_prompt_processor.h
+++ b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_prompt_processor.h
@@ -16,8 +16,7 @@ namespace example {
  * @class MultimodalPromptProcessor
  * @brief Extended PromptProcessor with multimodal embedding support
  */
-template <typename T>
-class MultimodalPromptProcessor : public example::PromptProcessor<T> {
+class MultimodalPromptProcessor : public example::PromptProcessor {
  public:
   struct Metadata {
     int32_t context_len;
@@ -33,9 +32,10 @@ class MultimodalPromptProcessor : public example::PromptProcessor<T> {
 
   MultimodalPromptProcessor(
       DecoderRunner* decoder_runner,
-      KVManager<T>* kv_manager,
+      KVManager* kv_manager,
       const std::string& method_name,
-      Metadata metadata);
+      Metadata metadata,
+      std::unique_ptr<executorch::extension::MethodMeta> method_meta);
 
   int64_t get_num_heads() const {
     return metadata_.num_heads;
@@ -74,34 +74,29 @@ class MultimodalPromptProcessor : public example::PromptProcessor<T> {
    * @return Total I/O size in bytes.
    */
   inline const size_t total_prompt_processor_io_size_in_bytes() const {
-    if (metadata_.cache_mode == CacheMode::HybridCache) {
-      return input_toks_.size + input_pos_.size + attention_mask_.size +
-          window_attention_mask_.size + logits_.size + input_embedding_.size;
-    } else {
-      return input_toks_.size + input_pos_.size + attention_mask_.size +
-          logits_.size + input_embedding_.size;
-    }
+    return input_toks_.size + input_pos_.size + attention_mask_.size +
+        window_attention_mask_.size + logits_.size + input_embedding_.size;
   }
 
  private:
   // Reuse members from token_generator
-  using PromptProcessor<T>::decoder_runner_;
-  using PromptProcessor<T>::kv_manager_;
-  using PromptProcessor<T>::method_name_;
-  using PromptProcessor<T>::k_cache_in_;
-  using PromptProcessor<T>::v_cache_in_;
-  using PromptProcessor<T>::k_cache_out_;
-  using PromptProcessor<T>::v_cache_out_;
-  using PromptProcessor<T>::input_toks_;
-  using PromptProcessor<T>::input_pos_;
-  using PromptProcessor<T>::attention_mask_;
-  using PromptProcessor<T>::window_attention_mask_;
-  using PromptProcessor<T>::logits_;
-  using PromptProcessor<T>::inputs_;
-  using PromptProcessor<T>::input_tensors_;
-  using PromptProcessor<T>::output_tensors_;
-  using PromptProcessor<T>::prompt_all_logits_;
-  using PromptProcessor<T>::is_bert;
+  using PromptProcessor::attention_mask_;
+  using PromptProcessor::decoder_runner_;
+  using PromptProcessor::input_pos_;
+  using PromptProcessor::input_tensors_;
+  using PromptProcessor::input_toks_;
+  using PromptProcessor::inputs_;
+  using PromptProcessor::is_bert;
+  using PromptProcessor::k_cache_in_;
+  using PromptProcessor::k_cache_out_;
+  using PromptProcessor::kv_manager_;
+  using PromptProcessor::logits_;
+  using PromptProcessor::method_name_;
+  using PromptProcessor::output_tensors_;
+  using PromptProcessor::prompt_all_logits_;
+  using PromptProcessor::v_cache_in_;
+  using PromptProcessor::v_cache_out_;
+  using PromptProcessor::window_attention_mask_;
 
   /**
    * @brief Fill in I/O buffers with embedding data and position.
diff --git a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_runner.cpp
index 32e3baf27a9..32575994222 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_runner.cpp
+++ b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_runner.cpp
@@ -74,17 +74,17 @@ void print_performance_report(
 
 void save_logits(
     const std::string& dump_logits_path,
-    const std::vector<uint16_t>& prefill_logits,
-    const std::vector<uint16_t>& decode_logits) {
+    const std::vector<std::byte>& prefill_logits,
+    const std::vector<std::byte>& decode_logits) {
   std::ofstream outFile(dump_logits_path.c_str(), std::ios::binary);
   if (outFile.is_open()) {
     outFile.write(
         reinterpret_cast<const char*>(prefill_logits.data()),
-        prefill_logits.size() * sizeof(uint16_t));
+        prefill_logits.size());
 
     outFile.write(
         reinterpret_cast<const char*>(decode_logits.data()),
-        decode_logits.size() * sizeof(uint16_t));
+        decode_logits.size());
     outFile.close();
   } else {
     ET_CHECK_MSG(false, "Error saving the dump logits file");
@@ -93,8 +93,7 @@ void save_logits(
 
 } // namespace
 
-template <typename T>
-QNNMultimodalRunner<T>::QNNMultimodalRunner(
+QNNMultimodalRunner::QNNMultimodalRunner(
     std::unique_ptr<executorch::extension::Module> encoder,
     std::unique_ptr<executorch::extension::Module> tok_embedding,
     std::unique_ptr<executorch::extension::Module> text_decoder,
@@ -148,16 +147,14 @@ QNNMultimodalRunner<T>::QNNMultimodalRunner(
   ET_LOG(Info, "eval mode=%d", eval_mode_);
 }
 
-template <typename T>
-bool QNNMultimodalRunner<T>::is_loaded() const {
+bool QNNMultimodalRunner::is_loaded() const {
   return encoder_->is_loaded() && tok_embedding_->is_loaded() &&
       text_decoder_->is_loaded() && embedding_merger_ && tokenizer_ &&
       decoder_runner_ && prompt_processor_ && token_generator_ && kv_manager_ &&
       buffer_manager_;
 }
 
-template <typename T>
-Error QNNMultimodalRunner<T>::load() {
+Error QNNMultimodalRunner::load() {
   if (is_loaded()) {
     return Error::Ok;
   }
@@ -298,19 +295,22 @@ Error QNNMultimodalRunner<T>::load() {
     sliding_window =
         ET_UNWRAP(text_decoder_->get("get_sliding_window")).toInt();
   }
-  kv_manager_ = std::make_unique<KVManager<T>>(typename KVManager<T>::Metadata{
-      context_len_,
-      head_dim,
-      max_ar_len,
-      max_cache_len,
-      num_heads,
-      num_layers});
-
-  prompt_processor_ = std::make_unique<MultimodalPromptProcessor<T>>(
+  kv_manager_ = std::make_unique<KVManager>(
+      KVManager::Metadata{
+          context_len_,
+          head_dim,
+          max_ar_len,
+          max_cache_len,
+          num_heads,
+          num_layers},
+      std::make_unique<MethodMeta>(std::move(
+          text_decoder_->method_meta(token_generator_method_name).get())));
+
+  prompt_processor_ = std::make_unique<MultimodalPromptProcessor>(
       decoder_runner_.get(),
       kv_manager_.get(),
       prompt_processor_method_name,
-      typename MultimodalPromptProcessor<T>::Metadata{
+      MultimodalPromptProcessor::Metadata{
           context_len_,
           num_heads,
           num_layers,
@@ -319,7 +319,9 @@ Error QNNMultimodalRunner<T>::load() {
           use_int64_token,
           sliding_window,
           cache_mode_,
-          static_cast<int32_t>(dim)});
+          static_cast<int32_t>(dim)},
+      std::make_unique<MethodMeta>(std::move(
+          text_decoder_->method_meta(prompt_processor_method_name).get())));
 
   // Initialize EmbeddingGenerator
   tok_embedding_generator_ = std::make_unique<TokenEmbeddingProcessor>(
@@ -333,14 +335,14 @@ Error QNNMultimodalRunner<T>::load() {
           static_cast<int32_t>(dim)});
   if (eval_mode_ == EvalMode::kLookaheadDecoding) {
     // Initialize TokenGenerator
-    token_generator_ = std::make_unique<MultimodalLhdTokenGenerator<T>>(
+    token_generator_ = std::make_unique<MultimodalLhdTokenGenerator>(
         tokenizer_.get(),
         tok_embedding_generator_.get(),
         decoder_runner_.get(),
         kv_manager_.get(),
         token_generator_method_name,
         std::move(eos_ids),
-        typename MultimodalLhdTokenGenerator<T>::Metadata{
+        MultimodalLhdTokenGenerator::Metadata{
             context_len_,
             num_heads,
             num_layers,
@@ -353,16 +355,18 @@ Error QNNMultimodalRunner<T>::load() {
             sliding_window,
             cache_mode_,
             static_cast<int32_t>(dim)},
-        &stats_);
+        &stats_,
+        std::make_unique<MethodMeta>(std::move(
+            text_decoder_->method_meta(token_generator_method_name).get())));
   } else {
-    token_generator_ = std::make_unique<MultimodalTokenGenerator<T>>(
+    token_generator_ = std::make_unique<MultimodalTokenGenerator>(
         tokenizer_.get(),
         tok_embedding_generator_.get(),
         decoder_runner_.get(),
         kv_manager_.get(),
         token_generator_method_name,
         std::move(eos_ids),
-        typename MultimodalTokenGenerator<T>::Metadata{
+        MultimodalTokenGenerator::Metadata{
             context_len_,
             num_heads,
             num_layers,
@@ -372,7 +376,9 @@ Error QNNMultimodalRunner<T>::load() {
             sliding_window,
             cache_mode_,
             static_cast<int32_t>(dim)},
-        &stats_);
+        &stats_,
+        std::make_unique<MethodMeta>(std::move(
+            text_decoder_->method_meta(token_generator_method_name).get())));
   }
 
   buffer_manager_ = std::make_unique<ClientMem>();
@@ -409,8 +415,7 @@ Error QNNMultimodalRunner<T>::load() {
   return Error::Ok;
 }
 
-template <typename T>
-executorch::runtime::Error QNNMultimodalRunner<T>::generate(
+executorch::runtime::Error QNNMultimodalRunner::generate(
     const std::vector<MultimodalInput>& inputs,
     const llm::GenerationConfig& config,
     std::function<void(const std::string&)> token_callback,
@@ -561,8 +566,7 @@ executorch::runtime::Error QNNMultimodalRunner<T>::generate(
   return Error::Ok;
 }
 
-template <typename T>
-Result<ModelVersion> QNNMultimodalRunner<T>::get_model_version() {
+Result<ModelVersion> QNNMultimodalRunner::get_model_version() {
   if (!is_loaded()) {
     stats_.model_load_start_ms = time_in_ms();
     ET_CHECK_OK_OR_RETURN_ERROR(load());
@@ -571,16 +575,11 @@ Result<ModelVersion> QNNMultimodalRunner<T>::get_model_version() {
   return model_version_;
 }
 
-template <typename T>
-Result<MethodMeta> QNNMultimodalRunner<T>::get_encoder_method_meta() {
+Result<MethodMeta> QNNMultimodalRunner::get_encoder_method_meta() {
   if (!is_loaded()) {
     ET_CHECK_OK_OR_RETURN_ERROR(load());
   }
   return encoder_->method_meta(kEncoderForwardName);
 }
 
-// Explicit instantiations
-template class QNNMultimodalRunner<uint16_t>;
-template class QNNMultimodalRunner<uint8_t>;
-
 } // namespace example
diff --git a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_runner.h b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_runner.h
index 5407d5712b7..363ded0f055 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_runner.h
+++ b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_runner.h
@@ -66,12 +66,6 @@ inline Modality modality_of(const ModelVersion& model_version) {
       [](const auto& model) { return modality_of(model); }, model_version);
 }
 
-enum KvBitWidth {
-  kWidth8 = 8,
-  kWidth16 = 16,
-};
-
-template <typename T>
 class QNNMultimodalRunner
     : public executorch::extension::llm::MultimodalRunner {
  public:
@@ -139,11 +133,11 @@ class QNNMultimodalRunner
 
   ModelVersion model_version_;
   std::unique_ptr<IMemAlloc> buffer_manager_;
-  std::unique_ptr<KVManager<T>> kv_manager_;
+  std::unique_ptr<KVManager> kv_manager_;
   std::unique_ptr<tokenizers::Tokenizer> tokenizer_;
   std::unique_ptr<DecoderRunner> decoder_runner_;
-  std::unique_ptr<MultimodalPromptProcessor<T>> prompt_processor_;
-  std::unique_ptr<MultimodalTokenGenerator<T>> token_generator_;
+  std::unique_ptr<MultimodalPromptProcessor> prompt_processor_;
+  std::unique_ptr<MultimodalTokenGenerator> token_generator_;
   std::unique_ptr<EncoderRunner> encoder_runner_;
   std::unique_ptr<TokenEmbeddingRunner> tok_embedding_runner_;
   std::unique_ptr<TokenEmbeddingProcessor> tok_embedding_processor_;
diff --git a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_token_generator.cpp b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_token_generator.cpp
index 2ed8ae51f1d..e3f6f8e214e 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_token_generator.cpp
+++ b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_token_generator.cpp
@@ -15,17 +15,17 @@ using executorch::runtime::TensorInfo;
 
 namespace example {
 // Constructor with embedding runner support
-template <typename T>
-MultimodalTokenGenerator<T>::MultimodalTokenGenerator(
+MultimodalTokenGenerator::MultimodalTokenGenerator(
     tokenizers::Tokenizer* tokenizer,
     TokenEmbeddingProcessor* tok_embedding_runner,
     DecoderRunner* decoder_runner,
-    KVManager<T>* kv_manager,
+    KVManager* kv_manager,
     const std::string& method_name,
     std::unique_ptr<std::unordered_set<uint64_t>>&& eos_ids,
     Metadata metadata,
-    executorch::llm::Stats* stats)
-    : TokenGenerator<T>(
+    executorch::llm::Stats* stats,
+    std::unique_ptr<executorch::extension::MethodMeta> method_meta)
+    : TokenGenerator(
           tokenizer,
           decoder_runner,
           kv_manager,
@@ -39,7 +39,8 @@ MultimodalTokenGenerator<T>::MultimodalTokenGenerator(
            metadata.use_int64_token,
            metadata.sliding_window,
            metadata.cache_mode},
-          stats),
+          stats,
+          std::move(method_meta)),
       tok_embedding_runner_(tok_embedding_runner),
       metadata_(metadata) {
   // Set input_toks_.size to 0 since we use embeddings instead
@@ -48,8 +49,7 @@ MultimodalTokenGenerator<T>::MultimodalTokenGenerator(
       metadata_.ar_len * metadata_.embedding_dim * sizeof(float);
 }
 
-template <typename T>
-void MultimodalTokenGenerator<T>::init_io(
+void MultimodalTokenGenerator::init_io(
     IMemAlloc* buffer_manager,
     Result<MethodMeta> method_meta) {
   size_t idx = 0;
@@ -73,8 +73,7 @@ void MultimodalTokenGenerator<T>::init_io(
 
   // [I]: attention_mask
   Result<TensorInfo> attention_mask = method_meta->input_tensor_meta(idx++);
-  attention_mask_.data = reinterpret_cast<uint16_t*>(
-      buffer_manager->allocate(attention_mask_.size));
+  attention_mask_.data = buffer_manager->allocate(attention_mask_.size);
   attention_mask_.tensor = std::make_unique<TensorImpl>(
       attention_mask->scalar_type(),
       attention_mask->sizes().size(),
@@ -90,8 +89,8 @@ void MultimodalTokenGenerator<T>::init_io(
   if (metadata_.cache_mode == CacheMode::HybridCache) {
     Result<TensorInfo> window_attention_mask =
         method_meta->input_tensor_meta(idx++);
-    window_attention_mask_.data = reinterpret_cast<uint16_t*>(
-        buffer_manager->allocate(window_attention_mask_.size));
+    window_attention_mask_.data =
+        buffer_manager->allocate(window_attention_mask_.size);
     window_attention_mask_.tensor = std::make_unique<TensorImpl>(
         window_attention_mask->scalar_type(),
         window_attention_mask->sizes().size(),
@@ -126,30 +125,27 @@ void MultimodalTokenGenerator<T>::init_io(
   for (int cache_group = 0; cache_group < 2; ++cache_group) {
     std::vector<std::unique_ptr<TensorImpl>>& cache =
         (cache_group == 0 ? k_cache_in_ : v_cache_in_);
-    std::vector<KVCache<T>> cache_ptrs = (cache_group == 0)
+    std::vector<KVCache> cache_ptrs = (cache_group == 0)
         ? kv_manager_->get_k_cache_()
         : kv_manager_->get_v_cache_();
     for (int layer = 0; layer < metadata_.num_layers; ++layer, ++index) {
       Result<TensorInfo> kv_cache = method_meta->input_tensor_meta(index);
 
-      T* cache_ptr = cache_ptrs[layer].buffer;
-
       cache[layer] = std::make_unique<TensorImpl>(
           kv_cache->scalar_type(),
           kv_cache->sizes().size(),
           const_cast<TensorImpl::SizesType*>(kv_cache->sizes().data()),
-          cache_ptr,
+          cache_ptrs[layer].buffer,
           const_cast<TensorImpl::DimOrderType*>(kv_cache->dim_order().data()));
       input_tensors_.emplace_back(cache[layer].get());
       buffer_manager->add_memory_info(
-          cache_ptr, cache[layer]->nbytes(), kv_cache.get());
+          cache_ptrs[layer].buffer, cache[layer]->nbytes(), kv_cache.get());
     }
   }
 
   // [O]: logits
   Result<TensorInfo> logits = method_meta->output_tensor_meta(0);
-  logits_.data =
-      reinterpret_cast<uint16_t*>(buffer_manager->allocate(logits_.size));
+  logits_.data = buffer_manager->allocate(logits_.size);
   logits_.tensor = std::make_unique<TensorImpl>(
       logits->scalar_type(),
       logits->sizes().size(),
@@ -164,21 +160,22 @@ void MultimodalTokenGenerator<T>::init_io(
   for (int cache_group = 0; cache_group < 2; ++cache_group) {
     std::vector<std::unique_ptr<TensorImpl>>& cache =
         (cache_group == 0 ? k_cache_out_ : v_cache_out_);
-    std::vector<KVCache<T>> cache_ptrs = (cache_group == 0)
+    std::vector<KVCache> cache_ptrs = (cache_group == 0)
         ? kv_manager_->get_k_cache_()
         : kv_manager_->get_v_cache_();
     for (int layer = 0; layer < metadata_.num_layers; ++layer, ++index) {
       Result<TensorInfo> kv_cache = method_meta->output_tensor_meta(index);
-      T* cache_ptr = cache_ptrs[layer].output_buffer;
       cache[layer] = std::make_unique<TensorImpl>(
           kv_cache->scalar_type(),
           kv_cache->sizes().size(),
           const_cast<TensorImpl::SizesType*>(kv_cache->sizes().data()),
-          cache_ptr,
+          cache_ptrs[layer].output_buffer,
           const_cast<TensorImpl::DimOrderType*>(kv_cache->dim_order().data()));
       output_tensors_.emplace_back(cache[layer].get());
       buffer_manager->add_memory_info(
-          cache_ptr, cache[layer]->nbytes(), kv_cache.get());
+          cache_ptrs[layer].output_buffer,
+          cache[layer]->nbytes(),
+          kv_cache.get());
     }
   }
 
@@ -190,8 +187,7 @@ void MultimodalTokenGenerator<T>::init_io(
 }
 
 // This function only considers the case where token_generator_ar_len equals 1.
-template <typename T>
-void MultimodalTokenGenerator<T>::prepare_io(
+void MultimodalTokenGenerator::prepare_io(
     uint64_t cur_token,
     int64_t start_pos) {
   // Generate embedding for current token using embedding runner
@@ -209,8 +205,4 @@ void MultimodalTokenGenerator<T>::prepare_io(
   *input_pos_.data = static_cast<int32_t>(start_pos);
 }
 
-// Explicit instantiations
-template class MultimodalTokenGenerator<uint16_t>;
-template class MultimodalTokenGenerator<uint8_t>;
-
 } // namespace example
diff --git a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_token_generator.h b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_token_generator.h
index 9eb9c79aaa4..2d0bf9385b4 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_token_generator.h
+++ b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_token_generator.h
@@ -16,8 +16,7 @@ namespace example {
  * @class MultimodalTokenGenerator
  * @brief Extended TokenGenerator with multimodal embedding support
  */
-template <typename T>
-class MultimodalTokenGenerator : public example::TokenGenerator<T> {
+class MultimodalTokenGenerator : public example::TokenGenerator {
  public:
   struct Metadata {
     int32_t context_len;
@@ -36,11 +35,12 @@ class MultimodalTokenGenerator : public example::TokenGenerator<T> {
       tokenizers::Tokenizer* tokenizer,
       TokenEmbeddingProcessor* tok_embedding_runner,
       DecoderRunner* decoder_runner,
-      KVManager<T>* kv_manager,
+      KVManager* kv_manager,
       const std::string& method_name,
       std::unique_ptr<std::unordered_set<uint64_t>>&& eos_ids,
       Metadata metadata,
-      executorch::llm::Stats* stats);
+      executorch::llm::Stats* stats,
+      std::unique_ptr<executorch::extension::MethodMeta> method_meta);
 
   virtual ~MultimodalTokenGenerator() = default;
 
@@ -54,36 +54,31 @@ class MultimodalTokenGenerator : public example::TokenGenerator<T> {
       override;
 
   inline const size_t total_token_generator_io_size_in_bytes() const {
-    if (metadata_.cache_mode == CacheMode::HybridCache) {
-      return input_toks_.size + input_pos_.size + attention_mask_.size +
-          window_attention_mask_.size + logits_.size + input_embedding_.size;
-    } else {
-      return input_toks_.size + input_pos_.size + attention_mask_.size +
-          logits_.size + input_embedding_.size;
-    }
+    return input_toks_.size + input_pos_.size + attention_mask_.size +
+        window_attention_mask_.size + logits_.size + input_embedding_.size;
   }
 
  protected:
   // Reuse members from token_generator
-  using TokenGenerator<T>::kv_manager_;
-  using TokenGenerator<T>::input_pos_;
-  using TokenGenerator<T>::attention_mask_;
-  using TokenGenerator<T>::window_attention_mask_;
-  using TokenGenerator<T>::inputs_;
-  using TokenGenerator<T>::input_tensors_;
-  using TokenGenerator<T>::output_tensors_;
+  using TokenGenerator::attention_mask_;
+  using TokenGenerator::input_pos_;
+  using TokenGenerator::input_tensors_;
+  using TokenGenerator::inputs_;
+  using TokenGenerator::kv_manager_;
+  using TokenGenerator::output_tensors_;
+  using TokenGenerator::window_attention_mask_;
 
   // Additional members specific to multimodal
   TensorStruct<float> input_embedding_;
 
  private:
   // Reuse members from token_generator
-  using TokenGenerator<T>::input_toks_;
-  using TokenGenerator<T>::logits_;
-  using TokenGenerator<T>::k_cache_in_;
-  using TokenGenerator<T>::v_cache_in_;
-  using TokenGenerator<T>::k_cache_out_;
-  using TokenGenerator<T>::v_cache_out_;
+  using TokenGenerator::input_toks_;
+  using TokenGenerator::k_cache_in_;
+  using TokenGenerator::k_cache_out_;
+  using TokenGenerator::logits_;
+  using TokenGenerator::v_cache_in_;
+  using TokenGenerator::v_cache_out_;
 
   // Additional members specific to multimodal
   TokenEmbeddingProcessor* tok_embedding_runner_;
diff --git a/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.cpp b/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.cpp
index 59744d488bd..0cb52246a39 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.cpp
+++ b/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.cpp
@@ -17,12 +17,12 @@ using executorch::runtime::Span;
 using executorch::runtime::TensorInfo;
 namespace example {
 
-template <typename T>
-PromptProcessor<T>::PromptProcessor(
+PromptProcessor::PromptProcessor(
     DecoderRunner* decoder_runner,
-    KVManager<T>* kv_manager,
+    KVManager* kv_manager,
     const std::string& method_name,
-    Metadata metadata)
+    Metadata metadata,
+    std::unique_ptr<MethodMeta> method_meta)
     : decoder_runner_(decoder_runner),
       kv_manager_(kv_manager),
       method_name_(method_name),
@@ -32,33 +32,41 @@ PromptProcessor<T>::PromptProcessor(
   k_cache_out_.resize(metadata_.num_layers);
   v_cache_out_.resize(metadata_.num_layers);
   // Calculate I/O size
+  Result<TensorInfo> attention_mask = method_meta->input_tensor_meta(1);
+  Result<TensorInfo> logits = method_meta->output_tensor_meta(0);
   input_toks_.size = metadata_.ar_len * sizeof(int64_t);
-  if (is_bert())
+  if (is_bert()) {
     input_pos_.size = 0;
-  else
+  } else {
     input_pos_.size = metadata_.ar_len * sizeof(int32_t);
+  }
 
+  attention_mask_.dtype = attention_mask->scalar_type();
+  attention_mask_.size = metadata_.ar_len * metadata_.context_len *
+      attention_mask_.getElementSize();
   switch (metadata_.cache_mode) {
     case CacheMode::StaticCahce:
-      attention_mask_.size =
-          metadata_.ar_len * metadata_.context_len * sizeof(uint16_t);
       window_attention_mask_.size = 0;
       break;
-    case CacheMode::HybridCache:
-      attention_mask_.size =
-          metadata_.ar_len * metadata_.context_len * sizeof(uint16_t);
-      window_attention_mask_.size =
-          metadata_.ar_len * metadata_.context_len * sizeof(uint16_t);
+    case CacheMode::HybridCache: {
+      Result<TensorInfo> window_attention_mask =
+          method_meta->input_tensor_meta(2);
+      window_attention_mask_.dtype = window_attention_mask->scalar_type();
+      window_attention_mask_.size = metadata_.ar_len * metadata_.context_len *
+          window_attention_mask_.getElementSize();
       break;
+    }
     default:
       ET_CHECK_MSG(false, "Unsupported llama cache mode");
       break;
   }
 
-  logits_.size = metadata_.ar_len * metadata_.vocab_size * sizeof(uint16_t);
+  logits_.dtype = logits->scalar_type();
+  logits_.size =
+      metadata_.ar_len * metadata_.vocab_size * logits_.getElementSize();
 };
-template <typename T>
-void PromptProcessor<T>::init_io(
+
+void PromptProcessor::init_io(
     IMemAlloc* buffer_manager,
     Result<MethodMeta> method_meta) {
   size_t idx = 0;
@@ -80,8 +88,7 @@ void PromptProcessor<T>::init_io(
 
   // [I]: attention_mask
   Result<TensorInfo> attention_mask = method_meta->input_tensor_meta(idx++);
-  attention_mask_.data = reinterpret_cast<uint16_t*>(
-      buffer_manager->allocate(attention_mask_.size));
+  attention_mask_.data = buffer_manager->allocate(attention_mask_.size);
   attention_mask_.tensor = std::make_unique<TensorImpl>(
       attention_mask->scalar_type(),
       attention_mask->sizes().size(),
@@ -97,8 +104,8 @@ void PromptProcessor<T>::init_io(
   if (metadata_.cache_mode == CacheMode::HybridCache) {
     Result<TensorInfo> window_attention_mask =
         method_meta->input_tensor_meta(idx++);
-    window_attention_mask_.data = reinterpret_cast<uint16_t*>(
-        buffer_manager->allocate(window_attention_mask_.size));
+    window_attention_mask_.data =
+        buffer_manager->allocate(window_attention_mask_.size);
     window_attention_mask_.tensor = std::make_unique<TensorImpl>(
         window_attention_mask->scalar_type(),
         window_attention_mask->sizes().size(),
@@ -136,33 +143,30 @@ void PromptProcessor<T>::init_io(
     for (int cache_group = 0; cache_group < 2; ++cache_group) {
       std::vector<std::unique_ptr<TensorImpl>>& cache =
           (cache_group == 0 ? k_cache_in_ : v_cache_in_);
-      std::vector<KVCache<T>> cache_ptrs = (cache_group == 0)
+      std::vector<KVCache> cache_ptrs = (cache_group == 0)
           ? kv_manager_->get_k_cache_()
           : kv_manager_->get_v_cache_();
       for (int layer = 0; layer < metadata_.num_layers; ++layer, ++index) {
         Result<TensorInfo> kv_cache = method_meta->input_tensor_meta(index);
 
-        T* cache_ptr = cache_ptrs[layer].buffer;
-
         cache[layer] = std::make_unique<TensorImpl>(
             kv_cache->scalar_type(),
             kv_cache->sizes().size(),
             const_cast<TensorImpl::SizesType*>(kv_cache->sizes().data()),
-            cache_ptr,
+            cache_ptrs[layer].buffer,
             const_cast<TensorImpl::DimOrderType*>(
                 kv_cache->dim_order().data()));
         input_tensors_.emplace_back(cache[layer].get());
         cache_inputs_.emplace_back(input_tensors_.back());
         buffer_manager->add_memory_info(
-            cache_ptr, cache[layer]->nbytes(), kv_cache.get());
+            cache_ptrs[layer].buffer, cache[layer]->nbytes(), kv_cache.get());
       }
     }
   }
 
   // [O]: logits
   Result<TensorInfo> logits = method_meta->output_tensor_meta(0);
-  logits_.data =
-      reinterpret_cast<uint16_t*>(buffer_manager->allocate(logits_.size));
+  logits_.data = buffer_manager->allocate(logits_.size);
   logits_.tensor = std::make_unique<TensorImpl>(
       logits->scalar_type(),
       logits->sizes().size(),
@@ -177,21 +181,22 @@ void PromptProcessor<T>::init_io(
   for (int cache_group = 0; cache_group < 2; ++cache_group) {
     std::vector<std::unique_ptr<TensorImpl>>& cache =
         (cache_group == 0 ? k_cache_out_ : v_cache_out_);
-    std::vector<KVCache<T>> cache_ptrs = (cache_group == 0)
+    std::vector<KVCache> cache_ptrs = (cache_group == 0)
         ? kv_manager_->get_k_cache_()
         : kv_manager_->get_v_cache_();
     for (int layer = 0; layer < metadata_.num_layers; ++layer, ++index) {
       Result<TensorInfo> kv_cache = method_meta->output_tensor_meta(index);
-      T* cache_ptr = cache_ptrs[layer].output_buffer;
       cache[layer] = std::make_unique<TensorImpl>(
           kv_cache->scalar_type(),
           kv_cache->sizes().size(),
           const_cast<TensorImpl::SizesType*>(kv_cache->sizes().data()),
-          cache_ptr,
+          cache_ptrs[layer].output_buffer,
           const_cast<TensorImpl::DimOrderType*>(kv_cache->dim_order().data()));
       output_tensors_.emplace_back(cache[layer].get());
       buffer_manager->add_memory_info(
-          cache_ptr, cache[layer]->nbytes(), kv_cache.get());
+          cache_ptrs[layer].output_buffer,
+          cache[layer]->nbytes(),
+          kv_cache.get());
     }
   }
   // Prepare the vector of EValue to run inference
@@ -201,13 +206,11 @@ void PromptProcessor<T>::init_io(
   }
 }
 
-template <typename T>
-const std::vector<uint16_t>& PromptProcessor<T>::get_all_logits() {
+const std::vector<std::byte>& PromptProcessor::get_all_logits() {
   return prompt_all_logits_;
 }
 
-template <typename T>
-void PromptProcessor<T>::prepare_io(
+void PromptProcessor::prepare_io(
     const std::vector<uint64_t>& prompt_tokens,
     int64_t prompt_pos,
     int64_t start_pos) {
@@ -232,8 +235,7 @@ void PromptProcessor<T>::prepare_io(
   }
 }
 
-template <typename T>
-Result<uint64_t> PromptProcessor<T>::prefill(
+Result<uint64_t> PromptProcessor::prefill(
     std::vector<uint64_t> prompt_tokens,
     int64_t start_pos,
     bool dump_logits,
@@ -339,7 +341,9 @@ Result<uint64_t> PromptProcessor<T>::prefill(
       prompt_all_logits_.insert(
           prompt_all_logits_.end(),
           logits_.data,
-          logits_.data + metadata_.ar_len * metadata_.vocab_size);
+          logits_.data +
+              metadata_.ar_len * metadata_.vocab_size *
+                  logits_.getElementSize());
     }
     // In the last run, offset to the meaningful logits.
     if (i == num_iters - 1) {
@@ -369,8 +373,4 @@ Result<uint64_t> PromptProcessor<T>::prefill(
   return cur_token;
 }
 
-// Explicit instantiations
-template class PromptProcessor<uint16_t>;
-template class PromptProcessor<uint8_t>;
-
 } // namespace example
diff --git a/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.h b/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.h
index 599f7050d83..5317a8a77e1 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.h
+++ b/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.h
@@ -21,7 +21,7 @@ namespace example {
  * @class PromptProcessor
  * @brief Class for processing prompts using decoder and key-value manager.
  */
-template <typename T>
+
 class PromptProcessor {
  public:
   struct Metadata {
@@ -36,9 +36,10 @@ class PromptProcessor {
   };
   PromptProcessor(
       DecoderRunner* decoder_runner,
-      KVManager<T>* kv_manager,
+      KVManager* kv_manager,
       const std::string& method_name,
-      Metadata metadata);
+      Metadata metadata,
+      std::unique_ptr<executorch::extension::MethodMeta> method_meta);
 
   virtual ~PromptProcessor() = default;
 
@@ -55,9 +56,9 @@ class PromptProcessor {
   /**
    * @brief Get the all logits generated
    *
-   * @return std::vector<uint16_t>& all the logits generated
+   * @return std::vector<std::byte>& all the logits generated
    */
-  virtual const std::vector<uint16_t>& get_all_logits();
+  virtual const std::vector<std::byte>& get_all_logits();
 
   /**
    * Prefill an LLM Module with the given text input.
@@ -79,13 +80,8 @@ class PromptProcessor {
    * @return Total I/O size in bytes.
    */
   inline const size_t total_prompt_processor_io_size_in_bytes() const {
-    if (metadata_.cache_mode == CacheMode::HybridCache) {
-      return input_toks_.size + input_pos_.size + attention_mask_.size +
-          window_attention_mask_.size + logits_.size;
-    } else {
-      return input_toks_.size + input_pos_.size + attention_mask_.size +
-          logits_.size;
-    }
+    return input_toks_.size + input_pos_.size + attention_mask_.size +
+        window_attention_mask_.size + logits_.size;
   }
 
  protected:
@@ -105,7 +101,7 @@ class PromptProcessor {
       int64_t prompt_pos,
       int64_t start_pos);
   DecoderRunner* decoder_runner_;
-  KVManager<T>* kv_manager_;
+  KVManager* kv_manager_;
   std::string method_name_;
 
   // metadata
@@ -114,9 +110,9 @@ class PromptProcessor {
   // inputs and outputs
   TensorStruct<int64_t> input_toks_;
   TensorStruct<int32_t> input_pos_;
-  TensorStruct<uint16_t> attention_mask_;
-  TensorStruct<uint16_t> window_attention_mask_;
-  TensorStruct<uint16_t> logits_;
+  TensorStructRaw attention_mask_;
+  TensorStructRaw window_attention_mask_;
+  TensorStructRaw logits_;
 
   // layer -> TensorImpl
   std::vector<std::unique_ptr<executorch::aten::TensorImpl>> k_cache_in_;
@@ -131,6 +127,6 @@ class PromptProcessor {
   std::vector<executorch::runtime::EValue> cache_inputs_;
 
   // Unused by default, only used when dump_logits_path is provided.
-  std::vector<uint16_t> prompt_all_logits_;
+  std::vector<std::byte> prompt_all_logits_;
 };
 } // namespace example
diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp
index 0a4a8b9abb5..7257e869dcc 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp
+++ b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp
@@ -66,17 +66,17 @@ void print_performance_report(
 
 void save_logits(
     const std::string& dump_logits_path,
-    const std::vector<uint16_t>& prefill_logits,
-    const std::vector<uint16_t>& decode_logits) {
+    const std::vector<std::byte>& prefill_logits,
+    const std::vector<std::byte>& decode_logits) {
   std::ofstream outFile(dump_logits_path.c_str(), std::ios::binary);
   if (outFile.is_open()) {
     outFile.write(
         reinterpret_cast<const char*>(prefill_logits.data()),
-        prefill_logits.size() * sizeof(uint16_t));
+        prefill_logits.size());
 
     outFile.write(
         reinterpret_cast<const char*>(decode_logits.data()),
-        decode_logits.size() * sizeof(uint16_t));
+        decode_logits.size());
     outFile.close();
   } else {
     ET_CHECK_MSG(false, "Error saving the dump logits file");
@@ -85,8 +85,7 @@ void save_logits(
 
 } // namespace
 
-template <typename T>
-Runner<T>::Runner(
+Runner::Runner(
     std::unique_ptr<executorch::extension::Module> module,
     const std::string& decoder_model_version,
     const std::string& model_path,
@@ -152,14 +151,12 @@ Runner<T>::Runner(
   ET_LOG(Info, "eval mode=%d", eval_mode_);
 }
 
-template <typename T>
-bool Runner<T>::is_loaded() const {
+bool Runner::is_loaded() const {
   return module_->is_loaded() && tokenizer_ && decoder_runner_ &&
       prompt_processor_ && token_generator_ && kv_manager_ && buffer_manager_;
 }
 
-template <typename T>
-Error Runner<T>::load() {
+Error Runner::load() {
   if (is_loaded()) {
     return Error::Ok;
   }
@@ -275,13 +272,16 @@ Error Runner<T>::load() {
   if (module_->method_names()->count("get_sliding_window") > 0) {
     sliding_window = ET_UNWRAP(module_->get("get_sliding_window")).toInt();
   }
-  kv_manager_ = std::make_unique<KVManager<T>>(typename KVManager<T>::Metadata{
-      context_len_,
-      head_dim,
-      max_ar_len,
-      max_cache_len,
-      num_heads,
-      num_layers});
+  kv_manager_ = std::make_unique<KVManager>(
+      KVManager::Metadata{
+          context_len_,
+          head_dim,
+          max_ar_len,
+          max_cache_len,
+          num_heads,
+          num_layers},
+      std::make_unique<MethodMeta>(
+          std::move(module_->method_meta(token_generator_method_name).get())));
 
   if (attention_sink_rope_module_ != nullptr) {
     attention_sink_rope_runner_ = std::make_unique<AttentionSinkRopeRunner>(
@@ -290,11 +290,11 @@ Error Runner<T>::load() {
         attention_sink_rope_runner_->load(method_names));
   }
 
-  prompt_processor_ = std::make_unique<PromptProcessor<T>>(
+  prompt_processor_ = std::make_unique<PromptProcessor>(
       decoder_runner_.get(),
       kv_manager_.get(),
       prompt_processor_method_name,
-      typename PromptProcessor<T>::Metadata{
+      PromptProcessor::Metadata{
           context_len_,
           num_heads,
           num_layers,
@@ -302,15 +302,17 @@ Error Runner<T>::load() {
           vocab_size,
           use_int64_token,
           sliding_window,
-          cache_mode_});
+          cache_mode_},
+      std::make_unique<MethodMeta>(
+          std::move(module_->method_meta(prompt_processor_method_name).get())));
   if (eval_mode_ == EvalMode::kLookaheadDecoding) {
-    token_generator_ = std::make_unique<LhdTokenGenerator<T>>(
+    token_generator_ = std::make_unique<LhdTokenGenerator>(
         tokenizer_.get(),
         decoder_runner_.get(),
         kv_manager_.get(),
         token_generator_method_name,
         std::move(eos_ids),
-        typename LhdTokenGenerator<T>::Metadata{
+        LhdTokenGenerator::Metadata{
             context_len_,
             num_heads,
             num_layers,
@@ -322,15 +324,17 @@ Error Runner<T>::load() {
             gcap_,
             sliding_window,
             cache_mode_},
-        &stats_);
+        &stats_,
+        std::make_unique<MethodMeta>(std::move(
+            module_->method_meta(token_generator_method_name).get())));
   } else {
-    token_generator_ = std::make_unique<TokenGenerator<T>>(
+    token_generator_ = std::make_unique<TokenGenerator>(
         tokenizer_.get(),
         decoder_runner_.get(),
         kv_manager_.get(),
         token_generator_method_name,
         std::move(eos_ids),
-        typename TokenGenerator<T>::Metadata{
+        TokenGenerator::Metadata{
             context_len_,
             num_heads,
             num_layers,
@@ -339,7 +343,9 @@ Error Runner<T>::load() {
             use_int64_token,
             sliding_window,
             cache_mode_},
-        &stats_);
+        &stats_,
+        std::make_unique<MethodMeta>(std::move(
+            module_->method_meta(token_generator_method_name).get())));
   }
 
   buffer_manager_ = std::make_unique<ClientMem>();
@@ -360,8 +366,7 @@ Error Runner<T>::load() {
   return Error::Ok;
 }
 
-template <typename T>
-Error Runner<T>::generate(
+Error Runner::generate(
     const std::string& prompt,
     const llm::GenerationConfig& config,
     std::function<void(const std::string&)> token_callback,
@@ -370,8 +375,7 @@ Error Runner<T>::generate(
       prompt, false, config, token_callback, stats_callback);
 }
 
-template <typename T>
-Error Runner<T>::generate_from_prompt_or_file(
+Error Runner::generate_from_prompt_or_file(
     const std::string& prompt,
     bool tokenized_prompt,
     const llm::GenerationConfig& config,
@@ -500,8 +504,7 @@ Error Runner<T>::generate_from_prompt_or_file(
   return Error::Ok;
 }
 
-template <typename T>
-Result<DecoderModelVersion> Runner<T>::get_decoder_model_version() {
+Result<DecoderModelVersion> Runner::get_decoder_model_version() {
   if (!is_loaded()) {
     stats_.model_load_start_ms = time_in_ms();
     ET_CHECK_OK_OR_RETURN_ERROR(load());
@@ -510,8 +513,4 @@ Result<DecoderModelVersion> Runner<T>::get_decoder_model_version() {
   return decoder_model_version_;
 }
 
-// Explicit instantiations
-template class Runner<uint16_t>;
-template class Runner<uint8_t>;
-
 } // namespace example
diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.h b/examples/qualcomm/oss_scripts/llama/runner/runner.h
index 39ce62c2d9f..5d03a12f61a 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/runner.h
+++ b/examples/qualcomm/oss_scripts/llama/runner/runner.h
@@ -46,12 +46,6 @@ enum DecoderModelVersion {
   kGemma2,
 };
 
-enum KvBitWidth {
-  kWidth8 = 8,
-  kWidth16 = 16,
-};
-
-template <typename T>
 class Runner : public executorch::extension::llm::IRunner {
  public:
   explicit Runner(
@@ -121,14 +115,15 @@ class Runner : public executorch::extension::llm::IRunner {
 
   DecoderModelVersion decoder_model_version_;
   std::unique_ptr<IMemAlloc> buffer_manager_;
-  std::unique_ptr<KVManager<T>> kv_manager_;
+  std::unique_ptr<KVManager> kv_manager_;
   std::unique_ptr<tokenizers::Tokenizer> tokenizer_;
   std::unique_ptr<DecoderRunner> decoder_runner_;
   std::unique_ptr<AttentionSinkRopeRunner> attention_sink_rope_runner_;
-  std::unique_ptr<PromptProcessor<T>> prompt_processor_;
-  std::unique_ptr<TokenGenerator<T>> token_generator_;
+  std::unique_ptr<PromptProcessor> prompt_processor_;
+  std::unique_ptr<TokenGenerator> token_generator_;
 
   // stats
   executorch::llm::Stats stats_;
 };
+
 } // namespace example
diff --git a/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp b/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp
index 8ab82d932e1..098fcf9efa6 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp
+++ b/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp
@@ -17,15 +17,15 @@ using executorch::runtime::Span;
 using executorch::runtime::TensorInfo;
 
 namespace example {
-template <typename T>
-TokenGenerator<T>::TokenGenerator(
+TokenGenerator::TokenGenerator(
     tokenizers::Tokenizer* tokenizer,
     DecoderRunner* decoder_runner,
-    KVManager<T>* kv_manager,
+    KVManager* kv_manager,
     const std::string& method_name,
     std::unique_ptr<std::unordered_set<uint64_t>>&& eos_ids,
     Metadata metadata,
-    executorch::llm::Stats* stats)
+    executorch::llm::Stats* stats,
+    std::unique_ptr<MethodMeta> method_meta)
     : tokenizer_(tokenizer),
       decoder_runner_(decoder_runner),
       kv_manager_(kv_manager),
@@ -39,32 +39,37 @@ TokenGenerator<T>::TokenGenerator(
   v_cache_out_.resize(metadata_.num_layers);
 
   // Calculate I/O size
+  Result<TensorInfo> attention_mask = method_meta->input_tensor_meta(1);
+  Result<TensorInfo> logits = method_meta->output_tensor_meta(0);
+
   input_toks_.size = metadata_.ar_len * sizeof(int64_t);
   input_pos_.size = metadata_.ar_len * sizeof(int32_t);
-  attention_mask_.size =
-      metadata_.ar_len * metadata_.context_len * sizeof(uint16_t);
+  attention_mask_.dtype = attention_mask->scalar_type();
+  attention_mask_.size = metadata_.ar_len * metadata_.context_len *
+      attention_mask_.getElementSize();
 
   switch (metadata_.cache_mode) {
     case CacheMode::StaticCahce:
-      attention_mask_.size =
-          metadata_.ar_len * metadata_.context_len * sizeof(uint16_t);
       window_attention_mask_.size = 0;
       break;
-    case CacheMode::HybridCache:
-      attention_mask_.size =
-          metadata_.ar_len * metadata_.context_len * sizeof(uint16_t);
-      window_attention_mask_.size =
-          metadata_.ar_len * metadata_.context_len * sizeof(uint16_t);
+    case CacheMode::HybridCache: {
+      Result<TensorInfo> window_attention_mask =
+          method_meta->input_tensor_meta(2);
+      window_attention_mask_.dtype = window_attention_mask->scalar_type();
+      window_attention_mask_.size = metadata_.ar_len * metadata_.context_len *
+          window_attention_mask_.getElementSize();
       break;
+    }
     default:
       ET_CHECK_MSG(false, "Unsupported llama cache mode");
       break;
   }
 
-  logits_.size = metadata_.ar_len * metadata_.vocab_size * sizeof(uint16_t);
+  logits_.dtype = logits->scalar_type();
+  logits_.size =
+      metadata_.ar_len * metadata_.vocab_size * logits_.getElementSize();
 }
-template <typename T>
-void TokenGenerator<T>::init_io(
+void TokenGenerator::init_io(
     IMemAlloc* buffer_manager,
     Result<MethodMeta> method_meta) {
   size_t idx = 0;
@@ -86,8 +91,7 @@ void TokenGenerator<T>::init_io(
 
   // [I]: attention_mask
   Result<TensorInfo> attention_mask = method_meta->input_tensor_meta(idx++);
-  attention_mask_.data = reinterpret_cast<uint16_t*>(
-      buffer_manager->allocate(attention_mask_.size));
+  attention_mask_.data = buffer_manager->allocate(attention_mask_.size);
   attention_mask_.tensor = std::make_unique<TensorImpl>(
       attention_mask->scalar_type(),
       attention_mask->sizes().size(),
@@ -103,8 +107,8 @@ void TokenGenerator<T>::init_io(
   if (metadata_.cache_mode == CacheMode::HybridCache) {
     Result<TensorInfo> window_attention_mask =
         method_meta->input_tensor_meta(idx++);
-    window_attention_mask_.data = reinterpret_cast<uint16_t*>(
-        buffer_manager->allocate(window_attention_mask_.size));
+    window_attention_mask_.data =
+        buffer_manager->allocate(window_attention_mask_.size);
     window_attention_mask_.tensor = std::make_unique<TensorImpl>(
         window_attention_mask->scalar_type(),
         window_attention_mask->sizes().size(),
@@ -141,31 +145,28 @@ void TokenGenerator<T>::init_io(
   for (int cache_group = 0; cache_group < 2; ++cache_group) {
     std::vector<std::unique_ptr<TensorImpl>>& cache =
         (cache_group == 0 ? k_cache_in_ : v_cache_in_);
-    std::vector<KVCache<T>> cache_ptrs = (cache_group == 0)
+    std::vector<KVCache> cache_ptrs = (cache_group == 0)
         ? kv_manager_->get_k_cache_()
         : kv_manager_->get_v_cache_();
     for (int layer = 0; layer < metadata_.num_layers; ++layer, ++index) {
       Result<TensorInfo> kv_cache = method_meta->input_tensor_meta(index);
 
-      T* cache_ptr = cache_ptrs[layer].buffer;
-
       cache[layer] = std::make_unique<TensorImpl>(
           kv_cache->scalar_type(),
           kv_cache->sizes().size(),
           const_cast<TensorImpl::SizesType*>(kv_cache->sizes().data()),
-          cache_ptr,
+          cache_ptrs[layer].buffer,
           const_cast<TensorImpl::DimOrderType*>(kv_cache->dim_order().data()));
       input_tensors_.emplace_back(cache[layer].get());
       cache_inputs_.emplace_back(input_tensors_.back());
       buffer_manager->add_memory_info(
-          cache_ptr, cache[layer]->nbytes(), kv_cache.get());
+          cache_ptrs[layer].buffer, cache[layer]->nbytes(), kv_cache.get());
     }
   }
 
   // [O]: logits
   Result<TensorInfo> logits = method_meta->output_tensor_meta(0);
-  logits_.data =
-      reinterpret_cast<uint16_t*>(buffer_manager->allocate(logits_.size));
+  logits_.data = buffer_manager->allocate(logits_.size);
   logits_.tensor = std::make_unique<TensorImpl>(
       logits->scalar_type(),
       logits->sizes().size(),
@@ -180,21 +181,22 @@ void TokenGenerator<T>::init_io(
   for (int cache_group = 0; cache_group < 2; ++cache_group) {
     std::vector<std::unique_ptr<TensorImpl>>& cache =
         (cache_group == 0 ? k_cache_out_ : v_cache_out_);
-    std::vector<KVCache<T>> cache_ptrs = (cache_group == 0)
+    std::vector<KVCache> cache_ptrs = (cache_group == 0)
         ? kv_manager_->get_k_cache_()
         : kv_manager_->get_v_cache_();
     for (int layer = 0; layer < metadata_.num_layers; ++layer, ++index) {
       Result<TensorInfo> kv_cache = method_meta->output_tensor_meta(index);
-      T* cache_ptr = cache_ptrs[layer].output_buffer;
       cache[layer] = std::make_unique<TensorImpl>(
           kv_cache->scalar_type(),
           kv_cache->sizes().size(),
           const_cast<TensorImpl::SizesType*>(kv_cache->sizes().data()),
-          cache_ptr,
+          cache_ptrs[layer].output_buffer,
           const_cast<TensorImpl::DimOrderType*>(kv_cache->dim_order().data()));
       output_tensors_.emplace_back(cache[layer].get());
       buffer_manager->add_memory_info(
-          cache_ptr, cache[layer]->nbytes(), kv_cache.get());
+          cache_ptrs[layer].output_buffer,
+          cache[layer]->nbytes(),
+          kv_cache.get());
     }
   }
   // Prepare the vector of EValue to run inference
@@ -204,14 +206,12 @@ void TokenGenerator<T>::init_io(
   }
 }
 
-template <typename T>
-const std::vector<uint16_t>& TokenGenerator<T>::get_all_logits() {
+const std::vector<std::byte>& TokenGenerator::get_all_logits() {
   return token_all_logits_;
 }
 
 // This function only considers the case where token_generator_ar_len equals 1.
-template <typename T>
-void TokenGenerator<T>::prepare_io(uint64_t cur_token, int64_t start_pos) {
+void TokenGenerator::prepare_io(uint64_t cur_token, int64_t start_pos) {
   // update input_tok
   *input_toks_.data =
       metadata_.use_int64_token ? cur_token : static_cast<int32_t>(cur_token);
@@ -219,8 +219,7 @@ void TokenGenerator<T>::prepare_io(uint64_t cur_token, int64_t start_pos) {
   *input_pos_.data = static_cast<int32_t>(start_pos);
 }
 
-template <typename T>
-Result<int64_t> TokenGenerator<T>::generate(
+Result<int64_t> TokenGenerator::generate(
     std::vector<uint64_t> tokens,
     int64_t start_pos,
     int32_t seq_len,
@@ -306,7 +305,9 @@ Result<int64_t> TokenGenerator<T>::generate(
       token_all_logits_.insert(
           token_all_logits_.end(),
           logits_.data,
-          logits_.data + metadata_.ar_len * metadata_.vocab_size);
+          logits_.data +
+              metadata_.ar_len * metadata_.vocab_size *
+                  logits_.getElementSize());
     }
     ET_CHECK_OK_OR_RETURN_ERROR(logits_res.error());
     executorch::aten::Tensor& logits_tensor = logits_res.get();
@@ -374,8 +375,5 @@ Result<int64_t> TokenGenerator<T>::generate(
 
   return pos - start_pos;
 }
-// Explicit instantiations
-template class TokenGenerator<uint16_t>;
-template class TokenGenerator<uint8_t>;
 
 } // namespace example
diff --git a/examples/qualcomm/oss_scripts/llama/runner/token_generator.h b/examples/qualcomm/oss_scripts/llama/runner/token_generator.h
index 7f9264b1102..6945d907a76 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/token_generator.h
+++ b/examples/qualcomm/oss_scripts/llama/runner/token_generator.h
@@ -22,7 +22,7 @@ namespace example {
  * @class TokenGenerator
  * @brief Class for generating the token using decoder and key-value manager.
  */
-template <typename T>
+
 class TokenGenerator {
  public:
   struct Metadata {
@@ -38,11 +38,12 @@ class TokenGenerator {
   TokenGenerator(
       tokenizers::Tokenizer* tokenizer,
       DecoderRunner* decoder_runner,
-      KVManager<T>* kv_manager,
+      KVManager* kv_manager,
       const std::string& method_name,
       std::unique_ptr<std::unordered_set<uint64_t>>&& eos_ids,
       Metadata metadata,
-      executorch::llm::Stats* stats);
+      executorch::llm::Stats* stats,
+      std::unique_ptr<executorch::extension::MethodMeta> method_meta);
 
   virtual ~TokenGenerator() = default;
   /**
@@ -58,9 +59,9 @@ class TokenGenerator {
   /**
    * @brief Get the all logits generated
    *
-   * @return std::vector<uint16_t>& all the logits generated
+   * @return std::vector<std::byte>& all the logits generated
    */
-  virtual const std::vector<uint16_t>& get_all_logits();
+  virtual const std::vector<std::byte>& get_all_logits();
 
   /**
      * @brief Generate tokens.
@@ -78,28 +79,23 @@ class TokenGenerator {
       bool dump_logits,
       AttentionSinkRopeRunner* attention_sink_rope_runner);
   inline const size_t total_token_generator_io_size_in_bytes() const {
-    if (metadata_.cache_mode == CacheMode::HybridCache) {
-      return input_toks_.size + input_pos_.size + attention_mask_.size +
-          window_attention_mask_.size + logits_.size;
-    } else {
-      return input_toks_.size + input_pos_.size + attention_mask_.size +
-          logits_.size;
-    }
+    return input_toks_.size + input_pos_.size + attention_mask_.size +
+        window_attention_mask_.size + logits_.size;
   }
 
  protected:
   tokenizers::Tokenizer* tokenizer_;
   DecoderRunner* decoder_runner_;
-  KVManager<T>* kv_manager_;
+  KVManager* kv_manager_;
   std::string method_name_;
   std::unique_ptr<std::unordered_set<uint64_t>> eos_ids_;
 
   // inputs and outputs
   TensorStruct<int64_t> input_toks_;
   TensorStruct<int32_t> input_pos_;
-  TensorStruct<uint16_t> attention_mask_;
-  TensorStruct<uint16_t> window_attention_mask_;
-  TensorStruct<uint16_t> logits_;
+  TensorStructRaw attention_mask_;
+  TensorStructRaw window_attention_mask_;
+  TensorStructRaw logits_;
 
   // layer -> TensorImpl
   std::vector<std::unique_ptr<executorch::aten::TensorImpl>> k_cache_in_;
@@ -128,6 +124,6 @@ class TokenGenerator {
   Metadata metadata_;
 
   // Unused by default, only used when dump_logits_path is provided.
-  std::vector<uint16_t> token_all_logits_;
+  std::vector<std::byte> token_all_logits_;
 };
 } // namespace example
diff --git a/examples/qualcomm/oss_scripts/llama/runner/utils.h b/examples/qualcomm/oss_scripts/llama/runner/utils.h
index bef6b1a2017..df6dddfdc6e 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/utils.h
+++ b/examples/qualcomm/oss_scripts/llama/runner/utils.h
@@ -8,10 +8,16 @@
 
 #pragma once
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
 #include <cstddef>
 #include <memory>
 
 // Template struct to hold tensor data and tensor
+
+// TODO: Refactor these struct to use TensorPtr
+// see https://docs.pytorch.org/executorch/stable/extension-tensor.html
+
+// TensorStruct whose dtype known in compile time
 template <typename T>
 struct TensorStruct {
   std::unique_ptr<executorch::aten::TensorImpl> tensor;
@@ -20,3 +26,38 @@ struct TensorStruct {
   // data size in bytes
   size_t size;
 };
+
+inline size_t getDtypeSize(executorch::aten::ScalarType dtype) {
+  switch (dtype) {
+    case executorch::aten::ScalarType::Float:
+      return sizeof(float);
+    case executorch::aten::ScalarType::Double:
+      return sizeof(double);
+    case executorch::aten::ScalarType::Int:
+      return sizeof(int32_t);
+    case executorch::aten::ScalarType::Long:
+      return sizeof(int64_t);
+    case executorch::aten::ScalarType::Byte:
+      return sizeof(uint8_t);
+    case executorch::aten::ScalarType::UInt16:
+      return sizeof(uint16_t);
+    default:
+      ET_CHECK_MSG(
+          false,
+          "Unsupported scalar type %s",
+          executorch::runtime::toString(dtype));
+      break;
+  }
+}
+
+// TensorStruct whose dtype known in runtime, and raw file is used
+struct TensorStructRaw {
+  std::unique_ptr<executorch::aten::TensorImpl> tensor;
+  std::byte* data;
+  // data size in bytes
+  size_t size;
+  executorch::aten::ScalarType dtype;
+  size_t getElementSize() const {
+    return getDtypeSize(dtype);
+  }
+};
diff --git a/examples/qualcomm/oss_scripts/llama/wrappers/attention_sink_wrappers.py b/examples/qualcomm/oss_scripts/llama/wrappers/attention_sink_wrappers.py
index 48386f181d8..de857dfc17c 100644
--- a/examples/qualcomm/oss_scripts/llama/wrappers/attention_sink_wrappers.py
+++ b/examples/qualcomm/oss_scripts/llama/wrappers/attention_sink_wrappers.py
@@ -13,6 +13,7 @@
 
 import torch
 from executorch.backends.qualcomm._passes import TagQuantIO
+from executorch.backends.qualcomm._passes.build_quant_io import BuildQuantIo
 from executorch.backends.qualcomm._passes.qnn_pass_manager import (
     get_capture_program_passes,
 )
@@ -460,6 +461,7 @@ def compile(self, attention_sink_evictor_pte_path: str):
                 alloc_graph_input=False,
                 alloc_graph_output=False,
             ),
+            passes=[BuildQuantIo()],
             extract_delegate_segments=True,
         )
         exec_prog_mgr = edge_prog_mgr.to_executorch(executorch_config)
diff --git a/examples/qualcomm/oss_scripts/llama/wrappers/llm_wrappers.py b/examples/qualcomm/oss_scripts/llama/wrappers/llm_wrappers.py
index ef72e0765fd..0d5052c89bd 100644
--- a/examples/qualcomm/oss_scripts/llama/wrappers/llm_wrappers.py
+++ b/examples/qualcomm/oss_scripts/llama/wrappers/llm_wrappers.py
@@ -19,6 +19,7 @@
 import torch
 
 from executorch.backends.qualcomm._passes import FoldQDQ, I64toI32, TagQuantIO
+from executorch.backends.qualcomm._passes.build_quant_io import BuildQuantIo
 from executorch.backends.qualcomm._passes.qnn_pass_manager import (
     get_capture_program_passes,
 )
@@ -607,23 +608,28 @@ def quantize(self, request: Request):  # noqa: C901
         ):
             return
 
+        data = request.method_data[TEXT_DECODER]
         # check bit width graph io
         fixed_point_type = {"kv_type": torch.float32, "io_type": torch.float32}
-        if self.quant_recipe.get_kv_io_bit_width() == 8:
-            fixed_point_type["kv_type"] = torch.uint8
-        elif self.quant_recipe.get_kv_io_bit_width() == 16:
-            fixed_point_type["kv_type"] = torch.uint16
+        if data.skip_quantize:
+            # already init as float32
+            return
         else:
-            raise RuntimeError(
-                f"unknown kv io bit width {self.quant_recipe.get_kv_io_bit_width()}"
-            )
+            if self.quant_recipe.get_kv_io_bit_width() == 8:
+                fixed_point_type["kv_type"] = torch.uint8
+            elif self.quant_recipe.get_kv_io_bit_width() == 16:
+                fixed_point_type["kv_type"] = torch.uint16
+            else:
+                raise RuntimeError(
+                    f"unknown kv io bit width {self.quant_recipe.get_kv_io_bit_width()}"
+                )
 
-        if self.quant_recipe.get_logits_output_bit_width() == 16:
-            fixed_point_type["io_type"] = torch.uint16
-        else:
-            raise RuntimeError(
-                f"unknown logits io bit width {self.quant_recipe.get_logits_output_bit_width()}"
-            )
+            if self.quant_recipe.get_logits_output_bit_width() == 16:
+                fixed_point_type["io_type"] = torch.uint16
+            else:
+                raise RuntimeError(
+                    f"unknown logits io bit width {self.quant_recipe.get_logits_output_bit_width()}"
+                )
 
         data = request.method_data[TEXT_DECODER]
         audio_turns = request.method_data[
@@ -906,7 +912,11 @@ def compile(self, request: Request):  # noqa: C901
         # here we use a mechanism to make sure the encoding align correctly and
         # save AoT quantization time as well.
         # ---
-        if self.prefill.decoder is not None and self.prefill.model_args.use_kv_cache:
+        if (
+            self.prefill.decoder is not None
+            and self.prefill.model_args.use_kv_cache
+            and not request.method_data[TEXT_DECODER].skip_quantize
+        ):
             self._encoding_override(
                 decode_model=self.decode.decoder,
                 prefill_model=self.prefill.decoder,
@@ -973,6 +983,7 @@ def compile(self, request: Request):  # noqa: C901
                     alloc_graph_input=False,
                     alloc_graph_output=False,
                 ),
+                passes=[BuildQuantIo()],
             )
             tok_embedding_exec_prog_mgr = tok_embedding_edge_prog_mgr.to_executorch(
                 executorch_config
@@ -1009,6 +1020,7 @@ def compile(self, request: Request):  # noqa: C901
                 alloc_graph_input=False,
                 alloc_graph_output=False,
             ),
+            passes=[BuildQuantIo()],
         )
         exec_prog_mgr = edge_prog_mgr.to_executorch(executorch_config)
         data = request.method_data[TEXT_DECODER]
@@ -1127,7 +1139,9 @@ def compile(self, request: Request):
         if self.control_args.verbose:
             print_delegation_info(edge_prog_mgr.exported_program().graph_module)
 
-        exec_prog_mgr = edge_prog_mgr.to_executorch(ExecutorchBackendConfig())
+        exec_prog_mgr = edge_prog_mgr.to_executorch(
+            ExecutorchBackendConfig(passes=[BuildQuantIo()])
+        )
         data = request.method_data[self.modality]
         with open(
             f"{self.control_args.artifact}/{data.pte_filename}.pte", "wb"
@@ -1223,6 +1237,7 @@ def compile(
         self,
         compile_specs: Dict[str, List[CompileSpec]],
         pte_filenames: Dict[str, str],
+        skip_quantize: Dict[str, bool],
     ):
         compile_request = Request(
             inspect.currentframe().f_code.co_name,
@@ -1230,6 +1245,7 @@ def compile(
                 m: Request.Data(
                     compile_spec=compile_specs[m],
                     pte_filename=pte_filenames[m],
+                    skip_quantize=skip_quantize[m] if m in skip_quantize else False,
                 )
                 for m in self._modalities
             },
diff --git a/exir/passes/spec_prop_pass.py b/exir/passes/spec_prop_pass.py
index 9adbf65dd90..73f943e55e0 100644
--- a/exir/passes/spec_prop_pass.py
+++ b/exir/passes/spec_prop_pass.py
@@ -11,6 +11,7 @@
 
 import torch
 from executorch.exir.delegate import executorch_call_delegate
+from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, ProxyValue
 from executorch.exir.tensor import TensorSpec
 from torch.export.exported_program import ExportGraphSignature
@@ -18,6 +19,14 @@
 from torch.fx.passes.infra.pass_base import PassResult
 from torch.utils import _pytree as pytree
 
+# register llama.fallback (optional — only needed for QNN/llama sharding paths)
+try:
+    import executorch.extension.llm.custom_ops.op_fallback  # noqa: F401
+
+    _llama_fallback_default = exir_ops.edge.llama.fallback.default
+except (ImportError, AttributeError):
+    _llama_fallback_default = None
+
 
 # pyre-ignore
 def make_spec(x):
@@ -75,9 +84,9 @@ def get_spec(x):
                     elif node.op == "call_function" and node.target == operator.getitem:
                         value_spec = pytree.tree_map(get_spec, node.args[0])
                         node.meta["spec"] = value_spec[node.args[1]]
-                    elif (
-                        node.op == "call_function"
-                        and node.target == executorch_call_delegate
+                    elif node.op == "call_function" and node.target in (
+                        executorch_call_delegate,
+                        _llama_fallback_default,
                     ):
                         # Note: We currently rely on delegate node specs not being regenerated,
                         # as the spec is set somewhat manually when adding the call delegate node.
diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp
index e072694f913..b9215f978bc 100644
--- a/extension/android/jni/jni_layer_llama.cpp
+++ b/extension/android/jni/jni_layer_llama.cpp
@@ -206,41 +206,14 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
                 data_files_vector,
                 cpp_load_mode);
         std::string decoder_model = "llama3"; // use llama3 for now
-        // Using 8bit as default since this meta is introduced with 16bit kv io
-        // support and older models only have 8bit kv io.
-        example::KvBitWidth kv_bitwidth = example::KvBitWidth::kWidth8;
-        if (module->method_names()->count("get_kv_io_bit_width") > 0) {
-          kv_bitwidth = static_cast<example::KvBitWidth>(
-              module->get("get_kv_io_bit_width")
-                  .get()
-                  .toScalar()
-                  .to<int64_t>());
-        }
-
-        if (kv_bitwidth == example::KvBitWidth::kWidth8) {
-          runner_ = std::make_unique<example::Runner<uint8_t>>(
-              std::move(module),
-              decoder_model.c_str(),
-              model_path->toStdString().c_str(),
-              tokenizer_path->toStdString().c_str(),
-              "",
-              "",
-              temperature_);
-        } else if (kv_bitwidth == example::KvBitWidth::kWidth16) {
-          runner_ = std::make_unique<example::Runner<uint16_t>>(
-              std::move(module),
-              decoder_model.c_str(),
-              model_path->toStdString().c_str(),
-              tokenizer_path->toStdString().c_str(),
-              "",
-              "",
-              temperature_);
-        } else {
-          ET_CHECK_MSG(
-              false,
-              "Unsupported kv bitwidth: %ld",
-              static_cast<int64_t>(kv_bitwidth));
-        }
+        runner_ = std::make_unique<example::Runner>(
+            std::move(module),
+            decoder_model.c_str(),
+            model_path->toStdString().c_str(),
+            tokenizer_path->toStdString().c_str(),
+            "",
+            "",
+            temperature_);
         model_type_category_ = MODEL_TYPE_CATEGORY_LLM;
 #endif
 #if defined(EXECUTORCH_BUILD_MEDIATEK)
diff --git a/extension/llm/custom_ops/model_sharding.py b/extension/llm/custom_ops/model_sharding.py
index 6838b0958a2..916b13a90b8 100644
--- a/extension/llm/custom_ops/model_sharding.py
+++ b/extension/llm/custom_ops/model_sharding.py
@@ -7,8 +7,9 @@
 import re
 from typing import List
 
-import torch
+import executorch.extension.llm.custom_ops.op_fallback  # noqa: F401
 
+import torch
 from executorch.backends.qualcomm.utils.constants import (
     QCOM_PASS_ACTIVATE_KEY,
     QCOM_PASS_ARGS_KWARGS_DEFAULTS_KEY,
@@ -17,27 +18,6 @@
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.export.exported_program import ExportedProgram
-from torch.library import impl, Library
-
-
-fallback_op_lib = Library("llama", "DEF")
-# registering an operator.
-fallback_op_lib.define("fallback(Tensor input) -> Tensor")
-
-
-@impl(fallback_op_lib, "fallback")
-def fallback_impl(a: torch.Tensor) -> torch.Tensor:
-    return a
-
-
-# registering the out variant.
-fallback_op_lib.define("fallback.out(Tensor input, *, Tensor(a!) output) -> Tensor(a!)")
-
-
-@impl(fallback_op_lib, "fallback.out")
-def fallback_out_impl(a: torch.Tensor, *, out: torch.Tensor) -> torch.Tensor:
-    out.copy_(a)
-    return out
 
 
 class SplitGraph(ExportPass):
diff --git a/extension/llm/custom_ops/op_fallback.py b/extension/llm/custom_ops/op_fallback.py
new file mode 100644
index 00000000000..e94c81db51a
--- /dev/null
+++ b/extension/llm/custom_ops/op_fallback.py
@@ -0,0 +1,29 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-ignore-all-errors
+
+import torch
+
+from torch.library import impl, Library
+
+fallback_op_lib = Library("llama", "DEF")
+# registering an operator.
+fallback_op_lib.define("fallback(Tensor input) -> Tensor")
+
+
+@impl(fallback_op_lib, "fallback")
+def fallback_impl(a: torch.Tensor) -> torch.Tensor:
+    return a
+
+
+# registering the out variant.
+fallback_op_lib.define("fallback.out(Tensor input, *, Tensor(a!) output) -> Tensor(a!)")
+
+
+@impl(fallback_op_lib, "fallback.out")
+def fallback_out_impl(a: torch.Tensor, *, out: torch.Tensor) -> torch.Tensor:
+    out.copy_(a)
+    return out

From 75fb249849b905c79f243f5f1ed2efe6620f6876 Mon Sep 17 00:00:00 2001
From: Gasoonjia <gasoonjia@icloud.com>
Date: Tue, 26 May 2026 02:09:16 -0700
Subject: [PATCH 016/103] add cuda allocator to cmake target (#19764) (#19764)

Summary: Pull Request resolved:
https://github.com/pytorch/executorch/pull/19764

Reviewed By: kirklandsign

Differential Revision: D106332819
---
 backends/cuda/CMakeLists.txt | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/backends/cuda/CMakeLists.txt b/backends/cuda/CMakeLists.txt
index 217c893efe5..d56e994eab4 100644
--- a/backends/cuda/CMakeLists.txt
+++ b/backends/cuda/CMakeLists.txt
@@ -103,7 +103,7 @@ install(
 )
 
 # CUDA-specific AOTI shim symbols (dynamically linked)
-set(_aoti_cuda_shim_sources runtime/shims/memory.cpp
+set(_aoti_cuda_shim_sources runtime/cuda_allocator.cpp runtime/shims/memory.cpp
                             runtime/shims/cuda_guard.cpp
 )
 
@@ -180,8 +180,12 @@ install(
 
 # CUDA backend implementation
 set(_aoti_cuda_backend_sources runtime/cuda_backend.cpp)
+if(_cuda_is_msvc_toolchain)
+  # MSVC links aoti_cuda_backend into portable_lib without relying on C++
+  # symbols exported from aoti_cuda_shims.dll.
+  list(APPEND _aoti_cuda_backend_sources runtime/cuda_allocator.cpp)
+endif()
 
-# CUDA backend implementation
 add_library(aoti_cuda_backend STATIC ${_aoti_cuda_backend_sources})
 
 target_include_directories(

From c5e3e2bb0e8d8591b316d9d9b26ddc3967ae3a6c Mon Sep 17 00:00:00 2001
From: Erik Lundell <erik.lundell@arm.com>
Date: Tue, 26 May 2026 14:50:16 +0200
Subject: [PATCH 017/103] Arm backend: Fix missing init in VGFSetup (#19765)

As documented at
https://vkdoc.net/man/VkDataGraphPipelineSessionBindPointRequirementARM
.stype of VkDataGraphPipelineSessionBindPointRequirementARM should alway
be set to
VK_STRUCTURE_TYPE_DATA_GRAPH_PIPELINE_SESSION_BIND_POINT_REQUIREMENT_ARM

cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils
@Sebastian-Larsson @robell @rascani

Signed-off-by: Erik Lundell <erik.lundell@arm.com>
---
 backends/arm/runtime/VGFSetup.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/backends/arm/runtime/VGFSetup.cpp b/backends/arm/runtime/VGFSetup.cpp
index b62a6b2ec23..307d0ab266e 100644
--- a/backends/arm/runtime/VGFSetup.cpp
+++ b/backends/arm/runtime/VGFSetup.cpp
@@ -793,9 +793,14 @@ bool VgfRepr::process_vgf(
     return false;
   }
 
-  vector<VkDataGraphPipelineSessionBindPointRequirementARM>
-      bind_point_requirements;
-  bind_point_requirements.resize(bind_point_count);
+  vector<VkDataGraphPipelineSessionBindPointRequirementARM> bind_point_requirements(
+      bind_point_count,
+      {
+          .sType =
+              VK_STRUCTURE_TYPE_DATA_GRAPH_PIPELINE_SESSION_BIND_POINT_REQUIREMENT_ARM,
+          .pNext = nullptr,
+      });
+
   result = vkGetDataGraphPipelineSessionBindPointRequirementsARM(
       vk_device,
       &bind_point_requirements_info,

From a89f1b4b2ed977caea66376daa023d0b9bdfb461 Mon Sep 17 00:00:00 2001
From: Per Held <per.held@arm.com>
Date: Fri, 8 May 2026 15:00:45 +0200
Subject: [PATCH 018/103] Arm backend: Enable CPPCHECK for Cortex-M

Enable CPPCHECK for Cortex-M sources and headers. The Cortex-M kernels
are registered through generated wrappers, so cppcheck cannot see
direct call sites for the exported *_out entry points and reports them
as unused. Keep narrow unusedFunction suppressions for those
registration-visible functions.

The scratch buffer context header is linted as a standalone header but
currently exposes helper API without in-tree call sites, so suppress
unusedFunction at file scope there instead of dropping Cortex-M header
coverage.

Keep the quantize and dequantize context parameters non-const to match
the generated kernel ABI; changing them to const changes the mangled
symbols used by registration.

Signed-off-by: Per Held <per.held@arm.com>

Change-Id: I3bcb6e5d3f125ae400005d1b033b24a07eb7924f
---
 .lintrunner.toml                                        | 2 ++
 backends/cortex_m/ops/cmsis_scratch_buffer_context.h    | 1 +
 backends/cortex_m/ops/cortex_m_ops_common.h             | 4 ++--
 backends/cortex_m/ops/op_dequantize_per_tensor.cpp      | 1 +
 backends/cortex_m/ops/op_maximum.cpp                    | 3 ++-
 backends/cortex_m/ops/op_minimum.cpp                    | 3 ++-
 backends/cortex_m/ops/op_pad.cpp                        | 1 +
 backends/cortex_m/ops/op_quantize_per_tensor.cpp        | 1 +
 backends/cortex_m/ops/op_quantized_add.cpp              | 4 ++--
 backends/cortex_m/ops/op_quantized_avg_pool2d.cpp       | 1 +
 backends/cortex_m/ops/op_quantized_batch_matmul.cpp     | 1 +
 backends/cortex_m/ops/op_quantized_conv2d.cpp           | 1 +
 backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp | 1 +
 backends/cortex_m/ops/op_quantized_linear.cpp           | 1 +
 backends/cortex_m/ops/op_quantized_max_pool2d.cpp       | 1 +
 backends/cortex_m/ops/op_quantized_mul.cpp              | 4 ++--
 backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp | 1 +
 backends/cortex_m/ops/op_softmax.cpp                    | 1 +
 backends/cortex_m/ops/op_transpose.cpp                  | 1 +
 19 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/.lintrunner.toml b/.lintrunner.toml
index 3ee436f61e8..02380ce1356 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -112,6 +112,8 @@ include_patterns = [
     'backends/arm/**/*.cpp',
     'backends/arm/**/*.h',
     'backends/arm/**/*.hpp',
+    'backends/cortex_m/**/*.cpp',
+    'backends/cortex_m/**/*.h',
     'examples/arm/**/*.cpp',
     'examples/arm/**/*.h',
     'examples/arm/**/*.hpp',
diff --git a/backends/cortex_m/ops/cmsis_scratch_buffer_context.h b/backends/cortex_m/ops/cmsis_scratch_buffer_context.h
index 4672f05e777..656309abcee 100644
--- a/backends/cortex_m/ops/cmsis_scratch_buffer_context.h
+++ b/backends/cortex_m/ops/cmsis_scratch_buffer_context.h
@@ -1,3 +1,4 @@
+// cppcheck-suppress-file unusedFunction
 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
diff --git a/backends/cortex_m/ops/cortex_m_ops_common.h b/backends/cortex_m/ops/cortex_m_ops_common.h
index 4c0f83d6eb6..2e3f49dd861 100644
--- a/backends/cortex_m/ops/cortex_m_ops_common.h
+++ b/backends/cortex_m/ops/cortex_m_ops_common.h
@@ -113,8 +113,7 @@ inline void validate_quantization_params(
     const int64_t shift2,
     const int64_t output_zero_point,
     const int64_t output_multiplier,
-    const int64_t output_shift,
-    Tensor& output) {
+    const int64_t output_shift) {
   validate_single_quant_params(
       zero_point1, multiplier1, shift1, "Single quant Input1");
   validate_single_quant_params(
@@ -346,6 +345,7 @@ inline bool prepare_cmsis_pool2d_config(
 // https://github.com/ARM-software/CMSIS-NN/blob/main/Include/arm_nnsupportfunctions.h#L1625
 // multiplier: Range {ARM_NN_Q31_MIN + 1, Q32_MAX}
 // shift     : Range {-31, 30}
+// cppcheck-suppress unusedFunction
 inline bool validate_per_channel_quant_params(
     const Int64ArrayRef multipliers,
     const Int64ArrayRef shifts,
diff --git a/backends/cortex_m/ops/op_dequantize_per_tensor.cpp b/backends/cortex_m/ops/op_dequantize_per_tensor.cpp
index ca648f74695..136bce297b0 100644
--- a/backends/cortex_m/ops/op_dequantize_per_tensor.cpp
+++ b/backends/cortex_m/ops/op_dequantize_per_tensor.cpp
@@ -100,6 +100,7 @@ F dequantize_val(float scale, int32_t zero_point, Q qvalue) {
 } // namespace
 
 Tensor& dequantize_per_tensor_out(
+    // cppcheck-suppress constParameterReference
     KernelRuntimeContext& context,
     const Tensor& input,
     double scale,
diff --git a/backends/cortex_m/ops/op_maximum.cpp b/backends/cortex_m/ops/op_maximum.cpp
index fc76f5c8c48..936ef273684 100644
--- a/backends/cortex_m/ops/op_maximum.cpp
+++ b/backends/cortex_m/ops/op_maximum.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2025 Arm Limited and/or its affiliates.
+ * Copyright 2025-2026 Arm Limited and/or its affiliates.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
@@ -12,6 +12,7 @@ namespace native {
 
 using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
 
+// cppcheck-suppress unusedFunction
 Tensor& maximum_out(
     KernelRuntimeContext& context,
     const Tensor& input1,
diff --git a/backends/cortex_m/ops/op_minimum.cpp b/backends/cortex_m/ops/op_minimum.cpp
index 5a75cb8a1dc..3324a4e39d7 100644
--- a/backends/cortex_m/ops/op_minimum.cpp
+++ b/backends/cortex_m/ops/op_minimum.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
- * Copyright 2025 Arm Limited and/or its affiliates.
+ * Copyright 2025-2026 Arm Limited and/or its affiliates.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
@@ -14,6 +14,7 @@ namespace native {
 
 using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
 
+// cppcheck-suppress unusedFunction
 Tensor& minimum_out(
     KernelRuntimeContext& context,
     const Tensor& input1,
diff --git a/backends/cortex_m/ops/op_pad.cpp b/backends/cortex_m/ops/op_pad.cpp
index e59f986c37d..57b5257873e 100644
--- a/backends/cortex_m/ops/op_pad.cpp
+++ b/backends/cortex_m/ops/op_pad.cpp
@@ -19,6 +19,7 @@ constexpr size_t kMaxSupportedDims = 4;
 
 } // namespace
 
+// cppcheck-suppress unusedFunction
 Tensor& pad_out(
     KernelRuntimeContext& context,
     const Tensor& input,
diff --git a/backends/cortex_m/ops/op_quantize_per_tensor.cpp b/backends/cortex_m/ops/op_quantize_per_tensor.cpp
index 7809db379c7..d8bb34c6eb4 100644
--- a/backends/cortex_m/ops/op_quantize_per_tensor.cpp
+++ b/backends/cortex_m/ops/op_quantize_per_tensor.cpp
@@ -97,6 +97,7 @@ Q quantize_val(
 } // namespace
 
 Tensor& quantize_per_tensor_out(
+    // cppcheck-suppress constParameterReference
     KernelRuntimeContext& context,
     const Tensor& input,
     double scale,
diff --git a/backends/cortex_m/ops/op_quantized_add.cpp b/backends/cortex_m/ops/op_quantized_add.cpp
index f607977aa48..f93bb6c1be9 100644
--- a/backends/cortex_m/ops/op_quantized_add.cpp
+++ b/backends/cortex_m/ops/op_quantized_add.cpp
@@ -13,6 +13,7 @@ namespace cortex_m {
 namespace native {
 using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
 
+// cppcheck-suppress unusedFunction
 Tensor& quantized_add_out(
     KernelRuntimeContext& context,
     const Tensor& input1_int8,
@@ -49,8 +50,7 @@ Tensor& quantized_add_out(
       input2_shift,
       output_zero_point,
       output_multiplier,
-      output_shift,
-      out);
+      output_shift);
 
   ET_LOG(
       Debug,
diff --git a/backends/cortex_m/ops/op_quantized_avg_pool2d.cpp b/backends/cortex_m/ops/op_quantized_avg_pool2d.cpp
index fc04edcc82b..0d22971f89b 100644
--- a/backends/cortex_m/ops/op_quantized_avg_pool2d.cpp
+++ b/backends/cortex_m/ops/op_quantized_avg_pool2d.cpp
@@ -12,6 +12,7 @@ namespace native {
 
 using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
 
+// cppcheck-suppress unusedFunction
 Tensor& quantized_avg_pool2d_out(
     KernelRuntimeContext& context,
     const Tensor& input,
diff --git a/backends/cortex_m/ops/op_quantized_batch_matmul.cpp b/backends/cortex_m/ops/op_quantized_batch_matmul.cpp
index 345753ca8fc..fd0859e8b00 100644
--- a/backends/cortex_m/ops/op_quantized_batch_matmul.cpp
+++ b/backends/cortex_m/ops/op_quantized_batch_matmul.cpp
@@ -63,6 +63,7 @@ bool validate_batch_matmul_arguments(
 
 } // namespace
 
+// cppcheck-suppress unusedFunction
 Tensor& quantized_batch_matmul_out(
     KernelRuntimeContext& context,
     const Tensor& lhs,
diff --git a/backends/cortex_m/ops/op_quantized_conv2d.cpp b/backends/cortex_m/ops/op_quantized_conv2d.cpp
index 8af374c03f8..3d4f19e10d0 100644
--- a/backends/cortex_m/ops/op_quantized_conv2d.cpp
+++ b/backends/cortex_m/ops/op_quantized_conv2d.cpp
@@ -98,6 +98,7 @@ bool validate_conv2d_arguments(
 }
 } // namespace
 
+// cppcheck-suppress unusedFunction
 Tensor& quantized_conv2d_out(
     KernelRuntimeContext& context,
     const Tensor& input,
diff --git a/backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp b/backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp
index 21d4f257501..a8e1fc21ed7 100644
--- a/backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp
+++ b/backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp
@@ -135,6 +135,7 @@ bool validate_depthwise_conv2d_arguments(
 }
 } // namespace
 
+// cppcheck-suppress unusedFunction
 Tensor& quantized_depthwise_conv2d_out(
     KernelRuntimeContext& context,
     const Tensor& input,
diff --git a/backends/cortex_m/ops/op_quantized_linear.cpp b/backends/cortex_m/ops/op_quantized_linear.cpp
index 5d018cbc0c4..7448058de8e 100644
--- a/backends/cortex_m/ops/op_quantized_linear.cpp
+++ b/backends/cortex_m/ops/op_quantized_linear.cpp
@@ -13,6 +13,7 @@ namespace cortex_m {
 namespace native {
 using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
 
+// cppcheck-suppress unusedFunction
 Tensor& quantized_linear_out(
     KernelRuntimeContext& context,
     const Tensor& input,
diff --git a/backends/cortex_m/ops/op_quantized_max_pool2d.cpp b/backends/cortex_m/ops/op_quantized_max_pool2d.cpp
index 181a29c1b65..ca1b00ff340 100644
--- a/backends/cortex_m/ops/op_quantized_max_pool2d.cpp
+++ b/backends/cortex_m/ops/op_quantized_max_pool2d.cpp
@@ -10,6 +10,7 @@
 namespace cortex_m {
 namespace native {
 
+// cppcheck-suppress unusedFunction
 Tensor& quantized_max_pool2d_out(
     KernelRuntimeContext& context,
     const Tensor& input,
diff --git a/backends/cortex_m/ops/op_quantized_mul.cpp b/backends/cortex_m/ops/op_quantized_mul.cpp
index 524e74a6b9f..93ce2303d64 100644
--- a/backends/cortex_m/ops/op_quantized_mul.cpp
+++ b/backends/cortex_m/ops/op_quantized_mul.cpp
@@ -18,6 +18,7 @@ constexpr int32_t kInt8ActivationMax = std::numeric_limits<int8_t>::max();
 
 using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
 
+// cppcheck-suppress unusedFunction
 Tensor& quantized_mul_out(
     KernelRuntimeContext& context,
     const Tensor& input1_int8,
@@ -50,8 +51,7 @@ Tensor& quantized_mul_out(
       kZeroShift,
       output_zero_point,
       output_multiplier,
-      output_shift,
-      out);
+      output_shift);
 
   // Extract quantization parameters
   int8_t* input1_ptr = input1_int8.data_ptr<int8_t>();
diff --git a/backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp b/backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp
index d2b66b18802..e7ecbc7c7b4 100644
--- a/backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp
+++ b/backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp
@@ -83,6 +83,7 @@ bool validate_transpose_conv2d_arguments(
 }
 } // namespace
 
+// cppcheck-suppress unusedFunction
 Tensor& quantized_transpose_conv2d_out(
     KernelRuntimeContext& context,
     const Tensor& input,
diff --git a/backends/cortex_m/ops/op_softmax.cpp b/backends/cortex_m/ops/op_softmax.cpp
index c07a538db84..97d78d07a05 100644
--- a/backends/cortex_m/ops/op_softmax.cpp
+++ b/backends/cortex_m/ops/op_softmax.cpp
@@ -36,6 +36,7 @@ inline int64_t normalize_dim(const Tensor& tensor, int64_t dim) {
 
 } // namespace
 
+// cppcheck-suppress unusedFunction
 Tensor& softmax_out(
     KernelRuntimeContext& context,
     const Tensor& input,
diff --git a/backends/cortex_m/ops/op_transpose.cpp b/backends/cortex_m/ops/op_transpose.cpp
index 7fcbc034283..9ef144296b7 100644
--- a/backends/cortex_m/ops/op_transpose.cpp
+++ b/backends/cortex_m/ops/op_transpose.cpp
@@ -22,6 +22,7 @@ constexpr size_t kMaxSupportedDims = 4;
 
 } // namespace
 
+// cppcheck-suppress unusedFunction
 Tensor& transpose_out(
     KernelRuntimeContext& context,
     const Tensor& input,

From 0bf018f3cce25add0608e6fdd44773bf10cd4209 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 26 May 2026 18:14:17 +0200
Subject: [PATCH 019/103] Add Yolo26 to matrix of tested models on RISC-V
 (#19741)

### Summary

It relates to https://github.com/pytorch/executorch/issues/18833. It
doesn't add Yolo on baremetal, but it at least makes sure that it works
using Portable Kernels and XNNPACK backends.

### Test plan

It's only adding a model to CI, so the CI is the test plan.
---
 .github/workflows/riscv64.yml   | 31 ++++++++++++++++---------------
 examples/riscv/aot_riscv.py     | 33 +++++++++++++++++++++++++++++++++
 examples/riscv/requirements.txt |  1 +
 examples/riscv/setup.sh         |  5 ++++-
 4 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml
index 14b9ad62047..a7a5273e2b0 100644
--- a/.github/workflows/riscv64.yml
+++ b/.github/workflows/riscv64.yml
@@ -28,21 +28,22 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        include:
-          - { model: add,        xnnpack: false, quantize: false }
-          - { model: add,        xnnpack: true,  quantize: false }
-          - { model: mv2,        xnnpack: false, quantize: false }
-          - { model: mv2,        xnnpack: true,  quantize: false }
-          - { model: mv2,        xnnpack: true,  quantize: true }
-          - { model: mobilebert, xnnpack: false, quantize: false }
-          - { model: mobilebert, xnnpack: true,  quantize: false }
-          - { model: mobilebert, xnnpack: true,  quantize: true }
-          - { model: llama2,     xnnpack: false, quantize: false }
-          - { model: llama2,     xnnpack: true,  quantize: false }
-          - { model: llama2,     xnnpack: true,  quantize: true }
-          - { model: resnet18,   xnnpack: false, quantize: false }
-          - { model: resnet18,   xnnpack: true,  quantize: false }
-          - { model: resnet18,   xnnpack: true,  quantize: true }
+        model:
+          - add
+          - mv2
+          - mobilebert
+          - llama2
+          - resnet18
+          - yolo26
+        xnnpack: [true, false]
+        quantize: [true, false]
+        exclude:
+          # We only enable quantization with XNNPACK
+          - xnnpack: false
+            quantize: true
+          # We don't test quantization for Yolo26
+          - model: yolo26
+            quantize: true
     permissions:
       id-token: write
       contents: read
diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py
index 529e2b1e767..edc30c2653b 100644
--- a/examples/riscv/aot_riscv.py
+++ b/examples/riscv/aot_riscv.py
@@ -114,12 +114,45 @@ def build_resnet18():
     return model, example_inputs, test_inputs, False
 
 
+def build_yolo26():
+    # Mirrors examples/models/yolo26/export_and_validate.py: predict() once
+    # to materialise the predictor state Ultralytics expects pre-export.
+    import numpy as np
+    from ultralytics import YOLO
+
+    input_h, input_w = 320, 320
+    yolo = YOLO("yolo26n")
+    yolo.predict(
+        np.ones((input_h, input_w, 3)),
+        imgsz=(input_h, input_w),
+        device="cpu",
+    )
+
+    class Wrapper(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.model = yolo.model.to(torch.device("cpu")).eval()
+
+        def forward(self, x):
+            # yolo.model emits (predictions, feature_maps) in eval; keep the
+            # predictions tensor so BundledIO sees a single tensor output.
+            out = self.model(x)
+            return out[0] if isinstance(out, (tuple, list)) else out
+
+    model = Wrapper().eval()
+    torch.manual_seed(0)
+    example_inputs = (torch.randn(1, 3, input_h, input_w),)
+    test_inputs = [example_inputs]
+    return model, example_inputs, test_inputs, False
+
+
 MODELS = {
     "add": build_add,
     "mv2": build_mv2,
     "mobilebert": build_mobilebert,
     "llama2": build_llama2,
     "resnet18": build_resnet18,
+    "yolo26": build_yolo26,
 }
 
 
diff --git a/examples/riscv/requirements.txt b/examples/riscv/requirements.txt
index 273e7156a1d..649696ae65c 100644
--- a/examples/riscv/requirements.txt
+++ b/examples/riscv/requirements.txt
@@ -1,2 +1,3 @@
 torchvision
 transformers
+ultralytics
diff --git a/examples/riscv/setup.sh b/examples/riscv/setup.sh
index 955c8ca3386..48d5ed27642 100755
--- a/examples/riscv/setup.sh
+++ b/examples/riscv/setup.sh
@@ -33,7 +33,10 @@ ${SUDO} apt-get install -y --no-install-recommends \
     cmake \
     file \
     ca-certificates \
-    qemu-user-static
+    qemu-user-static \
+    libglib2.0-0t64 \
+    libxcb1 \
+    libgl1
 
 if [[ -n "${GCC_VERSION+x}" ]]; then
     ${SUDO} update-alternatives --install /usr/bin/riscv64-linux-gnu-gcc riscv64-linux-gnu-gcc /usr/bin/riscv64-linux-gnu-gcc${GCC_VERSION:+-${GCC_VERSION}} 100

From 6128a45130a0e6504c48b8bbdf01259f28ad964c Mon Sep 17 00:00:00 2001
From: Hansong Zhang <107070759+kirklandsign@users.noreply.github.com>
Date: Tue, 26 May 2026 09:29:07 -0700
Subject: [PATCH 020/103] Convert minibench Java files to Kotlin (#19760)

Convert BenchmarkActivity, BenchmarkMetric, LlmBenchmark,
LlmModelRunner, and ModelRunner from Java to Kotlin.

Differential Revision: D106195816
---
 .../pytorch/minibench/BenchmarkActivity.java  | 136 ------------------
 .../pytorch/minibench/BenchmarkActivity.kt    | 116 +++++++++++++++
 .../pytorch/minibench/BenchmarkMetric.java    |  74 ----------
 .../org/pytorch/minibench/BenchmarkMetric.kt  |  54 +++++++
 .../org/pytorch/minibench/LlmBenchmark.java   | 123 ----------------
 .../org/pytorch/minibench/LlmBenchmark.kt     |  91 ++++++++++++
 .../org/pytorch/minibench/LlmModelRunner.java | 110 --------------
 .../org/pytorch/minibench/LlmModelRunner.kt   |  91 ++++++++++++
 .../org/pytorch/minibench/ModelRunner.java    |  99 -------------
 .../java/org/pytorch/minibench/ModelRunner.kt |  90 ++++++++++++
 ...xampleUnitTest.java => ExampleUnitTest.kt} |  15 +-
 11 files changed, 449 insertions(+), 550 deletions(-)
 delete mode 100644 extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java
 create mode 100644 extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.kt
 delete mode 100644 extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java
 create mode 100644 extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.kt
 delete mode 100644 extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmark.java
 create mode 100644 extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmark.kt
 delete mode 100644 extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmModelRunner.java
 create mode 100644 extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmModelRunner.kt
 delete mode 100644 extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.java
 create mode 100644 extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.kt
 rename extension/benchmark/android/benchmark/app/src/test/java/org/pytorch/minibench/{ExampleUnitTest.java => ExampleUnitTest.kt} (55%)

diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java
deleted file mode 100644
index 5e1dd48926b..00000000000
--- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.minibench;
-
-import android.app.Activity;
-import android.content.Intent;
-import android.os.Bundle;
-import android.os.Handler;
-import android.os.HandlerThread;
-import android.os.Looper;
-import android.system.ErrnoException;
-import android.system.Os;
-import com.google.gson.Gson;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-public class BenchmarkActivity extends Activity {
-
-  File mModel;
-  int mNumIter;
-  int mNumWarmupIter;
-  String mTokenizerPath;
-  float mTemperature;
-  String mPrompt;
-
-  HandlerThread mHandlerThread;
-  BenchmarkHandler mHandler;
-
-  List<BenchmarkMetric> mResult;
-
-  @Override
-  protected void onCreate(Bundle savedInstanceState) {
-    super.onCreate(savedInstanceState);
-
-    try {
-      Os.setenv("ADSP_LIBRARY_PATH", getApplicationInfo().nativeLibraryDir, true);
-    } catch (ErrnoException e) {
-      finish();
-    }
-
-    Intent intent = getIntent();
-    File modelDir = new File(intent.getStringExtra("model_dir"));
-    File model =
-        Arrays.stream(modelDir.listFiles())
-            .filter(file -> file.getName().endsWith(".pte"))
-            .findFirst()
-            .get();
-
-    int numIter = intent.getIntExtra("num_iter", 50);
-    int numWarmupIter = intent.getIntExtra("num_warm_up_iter", 10);
-    String tokenizerPath = intent.getStringExtra("tokenizer_path");
-    float temperature = intent.getFloatExtra("temperature", 0.8f);
-    String prompt = intent.getStringExtra("prompt");
-
-    mModel = model;
-    mNumIter = numIter;
-    mNumWarmupIter = numWarmupIter;
-    mTokenizerPath = tokenizerPath;
-    mTemperature = temperature;
-    mPrompt = prompt;
-    if (mPrompt == null) {
-      mPrompt = "The ultimate answer";
-    }
-    mResult = new ArrayList<>();
-
-    mHandlerThread = new HandlerThread("ModelRunner");
-    mHandlerThread.start();
-    mHandler = new BenchmarkHandler(mHandlerThread.getLooper(), this);
-
-    mHandler.sendEmptyMessage(BenchmarkHandler.MESSAGE_RUN_BENCHMARK);
-  }
-
-  void writeResult() {
-    try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.json")) {
-      Gson gson = new Gson();
-      writer.write(gson.toJson(mResult));
-    } catch (IOException e) {
-      e.printStackTrace();
-    } finally {
-      finish();
-    }
-  }
-}
-
-class BenchmarkHandler extends Handler {
-  public static int MESSAGE_RUN_BENCHMARK = 1;
-  public static int MESSAGE_LLM_RUN_BENCHMARK = 2;
-
-  ModelRunner mModelRunner;
-  BenchmarkActivity mBenchmarkActivity;
-
-  LlmModelRunner mLlmModelRunner;
-  LlmBenchmark mLlmBenchmark;
-
-  public BenchmarkHandler(Looper looper, BenchmarkActivity benchmarkActivity) {
-    super(looper);
-    mModelRunner = new ModelRunner();
-    mBenchmarkActivity = benchmarkActivity;
-  }
-
-  @Override
-  public void handleMessage(android.os.Message msg) {
-    if (msg.what == MESSAGE_RUN_BENCHMARK) {
-      mModelRunner.runBenchmark(
-          mBenchmarkActivity.mModel,
-          mBenchmarkActivity.mNumWarmupIter,
-          mBenchmarkActivity.mNumIter,
-          mBenchmarkActivity.mResult);
-
-      if (mBenchmarkActivity.mTokenizerPath == null) {
-        mBenchmarkActivity.writeResult();
-      } else {
-        this.sendEmptyMessage(MESSAGE_LLM_RUN_BENCHMARK);
-      }
-    } else if (msg.what == MESSAGE_LLM_RUN_BENCHMARK) {
-      mLlmBenchmark =
-          new LlmBenchmark(
-              mBenchmarkActivity,
-              mBenchmarkActivity.mModel.getPath(),
-              mBenchmarkActivity.mTokenizerPath,
-              mBenchmarkActivity.mPrompt,
-              mBenchmarkActivity.mTemperature,
-              mBenchmarkActivity.mResult);
-    }
-  }
-}
diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.kt b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.kt
new file mode 100644
index 00000000000..b1d69c5f24f
--- /dev/null
+++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.kt
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.minibench
+
+import android.app.Activity
+import android.os.Bundle
+import android.os.Handler
+import android.os.HandlerThread
+import android.os.Looper
+import android.os.Message
+import android.system.Os
+import com.google.gson.Gson
+import java.io.File
+import java.io.FileWriter
+import java.io.IOException
+
+class BenchmarkActivity : Activity() {
+
+  lateinit var model: File
+  var numIter: Int = 0
+  var numWarmupIter: Int = 0
+  var tokenizerPath: String? = null
+  var temperature: Float = 0.8f
+  var prompt: String = "The ultimate answer"
+
+  private lateinit var handlerThread: HandlerThread
+  private lateinit var handler: BenchmarkHandler
+
+  val results: MutableList<BenchmarkMetric> = mutableListOf()
+
+  override fun onCreate(savedInstanceState: Bundle?) {
+    super.onCreate(savedInstanceState)
+
+    try {
+      Os.setenv("ADSP_LIBRARY_PATH", applicationInfo.nativeLibraryDir, true)
+    } catch (e: android.system.ErrnoException) {
+      finish()
+      return
+    }
+
+    val intent = intent
+    val modelDir = File(intent.getStringExtra("model_dir")!!)
+    model = modelDir.listFiles()!!.first { it.name.endsWith(".pte") }
+
+    numIter = intent.getIntExtra("num_iter", 50)
+    numWarmupIter = intent.getIntExtra("num_warm_up_iter", 10)
+    tokenizerPath = intent.getStringExtra("tokenizer_path")
+    temperature = intent.getFloatExtra("temperature", 0.8f)
+    prompt = intent.getStringExtra("prompt") ?: "The ultimate answer"
+
+    handlerThread = HandlerThread("ModelRunner")
+    handlerThread.start()
+    handler = BenchmarkHandler(handlerThread.looper, this)
+
+    handler.sendEmptyMessage(BenchmarkHandler.MESSAGE_RUN_BENCHMARK)
+  }
+
+  fun writeResult() {
+    try {
+      FileWriter("${filesDir}/benchmark_results.json").use { writer ->
+        writer.write(Gson().toJson(results))
+      }
+    } catch (e: IOException) {
+      e.printStackTrace()
+    } finally {
+      finish()
+    }
+  }
+}
+
+private class BenchmarkHandler(
+    looper: Looper,
+    private val activity: BenchmarkActivity,
+) : Handler(looper) {
+
+  private val modelRunner = ModelRunner()
+
+  override fun handleMessage(msg: Message) {
+    when (msg.what) {
+      MESSAGE_RUN_BENCHMARK -> {
+        modelRunner.runBenchmark(
+            activity.model,
+            activity.numWarmupIter,
+            activity.numIter,
+            activity.results,
+        )
+        if (activity.tokenizerPath == null) {
+          activity.writeResult()
+        } else {
+          sendEmptyMessage(MESSAGE_LLM_RUN_BENCHMARK)
+        }
+      }
+      MESSAGE_LLM_RUN_BENCHMARK -> {
+        LlmBenchmark(
+            activity,
+            activity.model.path,
+            activity.tokenizerPath!!,
+            activity.prompt,
+            activity.temperature,
+            activity.results,
+        )
+      }
+    }
+  }
+
+  companion object {
+    const val MESSAGE_RUN_BENCHMARK = 1
+    const val MESSAGE_LLM_RUN_BENCHMARK = 2
+  }
+}
diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java
deleted file mode 100644
index 66ab50550a4..00000000000
--- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.minibench;
-
-import android.app.ActivityManager;
-import android.os.Build;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-class BenchmarkMetric {
-  public static class BenchmarkModel {
-    // The model name, i.e. stories110M
-    String name;
-    String backend;
-    String quantization;
-
-    public BenchmarkModel(final String name, final String backend, final String quantization) {
-      this.name = name;
-      this.backend = backend;
-      this.quantization = quantization;
-    }
-  }
-
-  BenchmarkModel benchmarkModel;
-
-  // The metric name, i.e. TPS
-  String metric;
-
-  // The actual value and the option target value
-  double actualValue;
-  double targetValue;
-
-  public static class DeviceInfo {
-    // Let's see which information we want to include here
-    final String device = Build.BRAND;
-    // The phone model and Android release version
-    final String arch = Build.MODEL;
-    final String os = "Android " + Build.VERSION.RELEASE;
-    final long totalMem = new ActivityManager.MemoryInfo().totalMem;
-    final long availMem = new ActivityManager.MemoryInfo().availMem;
-  }
-
-  DeviceInfo deviceInfo = new DeviceInfo();
-
-  public BenchmarkMetric(
-      final BenchmarkModel benchmarkModel,
-      final String metric,
-      final double actualValue,
-      final double targetValue) {
-    this.benchmarkModel = benchmarkModel;
-    this.metric = metric;
-    this.actualValue = actualValue;
-    this.targetValue = targetValue;
-  }
-
-  // TODO (huydhn): Figure out a way to extract the backend and quantization information from
-  // the .pte model itself instead of parsing its name
-  public static BenchmarkMetric.BenchmarkModel extractBackendAndQuantization(final String model) {
-    final Matcher m =
-        Pattern.compile("(?<name>\\w+)_(?<backend>[\\w\\+]+)_(?<quantization>\\w+)").matcher(model);
-    if (m.matches()) {
-      return new BenchmarkMetric.BenchmarkModel(
-          m.group("name"), m.group("backend"), m.group("quantization"));
-    } else {
-      return new BenchmarkMetric.BenchmarkModel(model, "", "");
-    }
-  }
-}
diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.kt b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.kt
new file mode 100644
index 00000000000..7bed1ab05c0
--- /dev/null
+++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.kt
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.minibench
+
+import android.app.ActivityManager
+import android.os.Build
+
+class BenchmarkMetric(
+    val benchmarkModel: BenchmarkModel,
+    val metric: String,
+    val actualValue: Double,
+    val targetValue: Double,
+) {
+  data class BenchmarkModel(
+      val name: String,
+      val backend: String,
+      val quantization: String,
+  )
+
+  class DeviceInfo {
+    val device: String = Build.BRAND
+    val arch: String = Build.MODEL
+    val os: String = "Android ${Build.VERSION.RELEASE}"
+    val totalMem: Long = ActivityManager.MemoryInfo().totalMem
+    val availMem: Long = ActivityManager.MemoryInfo().availMem
+  }
+
+  val deviceInfo: DeviceInfo = DeviceInfo()
+
+  companion object {
+    // TODO (huydhn): Figure out a way to extract the backend and quantization information from
+    // the .pte model itself instead of parsing its name
+    @JvmStatic
+    fun extractBackendAndQuantization(model: String): BenchmarkModel {
+      val pattern = Regex("(?<name>\\w+)_(?<backend>[\\w+]+)_(?<quantization>\\w+)")
+      val match = pattern.matchEntire(model)
+      return if (match != null) {
+        BenchmarkModel(
+            match.groups["name"]!!.value,
+            match.groups["backend"]!!.value,
+            match.groups["quantization"]!!.value,
+        )
+      } else {
+        BenchmarkModel(model, "", "")
+      }
+    }
+  }
+}
diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmark.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmark.java
deleted file mode 100644
index 0c0436d2676..00000000000
--- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmark.java
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.minibench;
-
-import android.util.Log;
-import java.util.List;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class LlmBenchmark implements LlmModelRunnerCallback {
-  LlmModelRunner mLlmModelRunner;
-
-  String mPrompt;
-  StatsInfo mStatsInfo;
-
-  List<BenchmarkMetric> mResults;
-  BenchmarkActivity mActivity;
-
-  LlmBenchmark(
-      BenchmarkActivity activity,
-      String modelFile,
-      String tokenizerPath,
-      String prompt,
-      float temperature,
-      List<BenchmarkMetric> results) {
-    mResults = results;
-    mActivity = activity;
-    mStatsInfo = new StatsInfo();
-    mStatsInfo.modelName = modelFile.substring(modelFile.lastIndexOf('/') + 1).replace(".pte", "");
-    mPrompt = prompt;
-    mLlmModelRunner = new LlmModelRunner(modelFile, tokenizerPath, temperature, this);
-    mStatsInfo.loadStart = System.nanoTime();
-  }
-
-  @Override
-  public void onModelLoaded(int status) {
-    mStatsInfo.loadEnd = System.nanoTime();
-    mStatsInfo.loadStatus = status;
-    if (status != 0) {
-      Log.e("LlmBenchmarkRunner", "Loaded failed: " + status);
-      onGenerationStopped();
-      return;
-    }
-    mStatsInfo.generateStart = System.nanoTime();
-    mLlmModelRunner.generate(mPrompt);
-  }
-
-  @Override
-  public void onTokenGenerated(String token) {}
-
-  @Override
-  public void onStats(String stats) {
-    float tps = 0;
-    try {
-      JSONObject jsonObject = new JSONObject(stats);
-      int numGeneratedTokens = jsonObject.getInt("generated_tokens");
-      int inferenceEndMs = jsonObject.getInt("inference_end_ms");
-      int promptEvalEndMs = jsonObject.getInt("prompt_eval_end_ms");
-      tps = (float) numGeneratedTokens / (inferenceEndMs - promptEvalEndMs) * 1000;
-      mStatsInfo.tps = tps;
-    } catch (JSONException e) {
-      Log.e("LLM", "Error parsing JSON: " + e.getMessage());
-    }
-  }
-
-  @Override
-  public void onGenerationStopped() {
-    mStatsInfo.generateEnd = System.nanoTime();
-
-    final BenchmarkMetric.BenchmarkModel benchmarkModel =
-        BenchmarkMetric.extractBackendAndQuantization(mStatsInfo.modelName);
-    // The list of metrics we have atm includes:
-    // Load status
-    mResults.add(new BenchmarkMetric(benchmarkModel, "load_status", mStatsInfo.loadStatus, 0));
-    // Model load time
-    mResults.add(
-        new BenchmarkMetric(
-            benchmarkModel,
-            "llm_model_load_time(ms)",
-            (mStatsInfo.loadEnd - mStatsInfo.loadStart) * 1e-6,
-            0.0f));
-    // LLM generate time
-    mResults.add(
-        new BenchmarkMetric(
-            benchmarkModel,
-            "generate_time(ms)",
-            (mStatsInfo.generateEnd - mStatsInfo.generateStart) * 1e-6,
-            0.0f));
-    // Token per second
-    mResults.add(new BenchmarkMetric(benchmarkModel, "token_per_sec", mStatsInfo.tps, 0.0f));
-    mActivity.writeResult();
-  }
-}
-
-class StatsInfo {
-  int loadStatus;
-  long loadStart;
-  long loadEnd;
-  long generateStart;
-  long generateEnd;
-  float tps;
-  String modelName;
-
-  @Override
-  public String toString() {
-    return "loadStart: "
-        + loadStart
-        + "\nloadEnd: "
-        + loadEnd
-        + "\ngenerateStart: "
-        + generateStart
-        + "\ngenerateEnd: "
-        + generateEnd
-        + "\n"
-        + tps;
-  }
-}
diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmark.kt b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmark.kt
new file mode 100644
index 00000000000..5c75519f870
--- /dev/null
+++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmark.kt
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.minibench
+
+import android.util.Log
+import org.json.JSONException
+import org.json.JSONObject
+
+class LlmBenchmark(
+    private val activity: BenchmarkActivity,
+    modelFile: String,
+    tokenizerPath: String,
+    private val prompt: String,
+    temperature: Float,
+    private val results: MutableList<BenchmarkMetric>,
+) : LlmModelRunnerCallback {
+
+  private val runner: LlmModelRunner
+  private val statsInfo = StatsInfo()
+
+  init {
+    statsInfo.modelName = modelFile.substringAfterLast('/').removeSuffix(".pte")
+    runner = LlmModelRunner(modelFile, tokenizerPath, temperature, this)
+    statsInfo.loadStart = System.nanoTime()
+  }
+
+  override fun onModelLoaded(status: Int) {
+    statsInfo.loadEnd = System.nanoTime()
+    statsInfo.loadStatus = status
+    if (status != 0) {
+      Log.e("LlmBenchmarkRunner", "Loaded failed: $status")
+      onGenerationStopped()
+      return
+    }
+    statsInfo.generateStart = System.nanoTime()
+    runner.generate(prompt)
+  }
+
+  override fun onTokenGenerated(token: String) {}
+
+  override fun onStats(stats: String) {
+    try {
+      val json = JSONObject(stats)
+      val numGeneratedTokens = json.getInt("generated_tokens")
+      val inferenceEndMs = json.getInt("inference_end_ms")
+      val promptEvalEndMs = json.getInt("prompt_eval_end_ms")
+      statsInfo.tps = numGeneratedTokens.toFloat() / (inferenceEndMs - promptEvalEndMs) * 1000
+    } catch (e: JSONException) {
+      Log.e("LLM", "Error parsing JSON: ${e.message}")
+    }
+  }
+
+  override fun onGenerationStopped() {
+    statsInfo.generateEnd = System.nanoTime()
+
+    val benchmarkModel = BenchmarkMetric.extractBackendAndQuantization(statsInfo.modelName)
+    results.add(BenchmarkMetric(benchmarkModel, "load_status", statsInfo.loadStatus.toDouble(), 0.0))
+    results.add(
+        BenchmarkMetric(
+            benchmarkModel,
+            "llm_model_load_time(ms)",
+            (statsInfo.loadEnd - statsInfo.loadStart) * 1e-6,
+            0.0,
+        ))
+    results.add(
+        BenchmarkMetric(
+            benchmarkModel,
+            "generate_time(ms)",
+            (statsInfo.generateEnd - statsInfo.generateStart) * 1e-6,
+            0.0,
+        ))
+    results.add(BenchmarkMetric(benchmarkModel, "token_per_sec", statsInfo.tps.toDouble(), 0.0))
+    activity.writeResult()
+  }
+}
+
+private class StatsInfo {
+  var loadStatus: Int = 0
+  var loadStart: Long = 0
+  var loadEnd: Long = 0
+  var generateStart: Long = 0
+  var generateEnd: Long = 0
+  var tps: Float = 0f
+  var modelName: String = ""
+}
diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmModelRunner.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmModelRunner.java
deleted file mode 100644
index 3a345d3465b..00000000000
--- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmModelRunner.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.minibench;
-
-import android.os.Handler;
-import android.os.HandlerThread;
-import android.os.Looper;
-import android.os.Message;
-import android.util.Log;
-import org.pytorch.executorch.extension.llm.LlmCallback;
-import org.pytorch.executorch.extension.llm.LlmModule;
-
-/** A helper class to handle all model running logic within this class. */
-public class LlmModelRunner implements LlmCallback {
-  LlmModule mModule = null;
-
-  String mModelFilePath = "";
-  String mTokenizerFilePath = "";
-
-  LlmModelRunnerCallback mCallback = null;
-
-  HandlerThread mHandlerThread = null;
-  Handler mHandler = null;
-
-  /**
-   * ] Helper class to separate between UI logic and model runner logic. Automatically handle
-   * generate() request on worker thread.
-   *
-   * @param modelFilePath
-   * @param tokenizerFilePath
-   * @param callback
-   */
-  LlmModelRunner(
-      String modelFilePath,
-      String tokenizerFilePath,
-      float temperature,
-      LlmModelRunnerCallback callback) {
-    mModelFilePath = modelFilePath;
-    mTokenizerFilePath = tokenizerFilePath;
-    mCallback = callback;
-
-    mModule = new LlmModule(mModelFilePath, mTokenizerFilePath, 0.8f);
-    mHandlerThread = new HandlerThread("LlmModelRunner");
-    mHandlerThread.start();
-    mHandler = new LlmModelRunnerHandler(mHandlerThread.getLooper(), this);
-
-    mHandler.sendEmptyMessage(LlmModelRunnerHandler.MESSAGE_LOAD_MODEL);
-  }
-
-  int generate(String prompt) {
-    Message msg = Message.obtain(mHandler, LlmModelRunnerHandler.MESSAGE_GENERATE, prompt);
-    msg.sendToTarget();
-    return 0;
-  }
-
-  void stop() {
-    mModule.stop();
-  }
-
-  @Override
-  public void onResult(String result) {
-    mCallback.onTokenGenerated(result);
-  }
-
-  @Override
-  public void onStats(String result) {
-    mCallback.onStats(result);
-  }
-}
-
-class LlmModelRunnerHandler extends Handler {
-  public static int MESSAGE_LOAD_MODEL = 1;
-  public static int MESSAGE_GENERATE = 2;
-
-  private final LlmModelRunner mLlmModelRunner;
-
-  public LlmModelRunnerHandler(Looper looper, LlmModelRunner llmModelRunner) {
-    super(looper);
-    mLlmModelRunner = llmModelRunner;
-  }
-
-  @Override
-  public void handleMessage(android.os.Message msg) {
-    if (msg.what == MESSAGE_LOAD_MODEL) {
-      int status = 0;
-      try {
-        mLlmModelRunner.mModule.load();
-      } catch (Exception e) {
-        status =
-            (e instanceof org.pytorch.executorch.ExecutorchRuntimeException)
-                ? ((org.pytorch.executorch.ExecutorchRuntimeException) e).getErrorCode()
-                : -1;
-      }
-      mLlmModelRunner.mCallback.onModelLoaded(status);
-    } else if (msg.what == MESSAGE_GENERATE) {
-      try {
-        mLlmModelRunner.mModule.generate((String) msg.obj, mLlmModelRunner);
-      } catch (Exception e) {
-        Log.e("LlmModelRunner", "generate() failed", e);
-      }
-      mLlmModelRunner.mCallback.onGenerationStopped();
-    }
-  }
-}
diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmModelRunner.kt b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmModelRunner.kt
new file mode 100644
index 00000000000..29b9b177fb6
--- /dev/null
+++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmModelRunner.kt
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.minibench
+
+import android.os.Handler
+import android.os.HandlerThread
+import android.os.Looper
+import android.os.Message
+import android.util.Log
+import org.pytorch.executorch.ExecutorchRuntimeException
+import org.pytorch.executorch.extension.llm.LlmCallback
+import org.pytorch.executorch.extension.llm.LlmModule
+
+/** A helper class to handle all model running logic within this class. */
+class LlmModelRunner(
+    modelFilePath: String,
+    tokenizerFilePath: String,
+    temperature: Float,
+    val callback: LlmModelRunnerCallback,
+) : LlmCallback {
+
+  val module: LlmModule = LlmModule(modelFilePath, tokenizerFilePath, temperature)
+  private val handlerThread: HandlerThread = HandlerThread("LlmModelRunner")
+  private val handler: Handler
+
+  init {
+    handlerThread.start()
+    handler = LlmModelRunnerHandler(handlerThread.looper, this)
+    handler.sendEmptyMessage(LlmModelRunnerHandler.MESSAGE_LOAD_MODEL)
+  }
+
+  fun generate(prompt: String): Int {
+    val msg = Message.obtain(handler, LlmModelRunnerHandler.MESSAGE_GENERATE, prompt)
+    msg.sendToTarget()
+    return 0
+  }
+
+  fun stop() {
+    module.stop()
+  }
+
+  override fun onResult(result: String) {
+    callback.onTokenGenerated(result)
+  }
+
+  override fun onStats(stats: String) {
+    callback.onStats(stats)
+  }
+}
+
+private class LlmModelRunnerHandler(
+    looper: Looper,
+    private val runner: LlmModelRunner,
+) : Handler(looper) {
+
+  override fun handleMessage(msg: Message) {
+    when (msg.what) {
+      MESSAGE_LOAD_MODEL -> {
+        val status =
+            try {
+              runner.module.load()
+              0
+            } catch (e: ExecutorchRuntimeException) {
+              e.errorCode
+            } catch (e: Exception) {
+              -1
+            }
+        runner.callback.onModelLoaded(status)
+      }
+      MESSAGE_GENERATE -> {
+        try {
+          runner.module.generate(msg.obj as String, runner)
+        } catch (e: Exception) {
+          Log.e("LlmModelRunner", "generate() failed", e)
+        }
+        runner.callback.onGenerationStopped()
+      }
+    }
+  }
+
+  companion object {
+    const val MESSAGE_LOAD_MODEL = 1
+    const val MESSAGE_GENERATE = 2
+  }
+}
diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.java
deleted file mode 100644
index 915496a25af..00000000000
--- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.minibench;
-
-import android.os.Debug;
-import java.io.File;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import org.pytorch.executorch.Module;
-
-public class ModelRunner {
-  /**
-   * @return list of #BenchmarkMetric
-   */
-  public void runBenchmark(
-      File model, int numWarmupIter, int numIter, List<BenchmarkMetric> results) {
-    long pssIdle = Debug.getPss();
-
-    List<Double> latency = new ArrayList<>();
-
-    long loadStart = System.nanoTime();
-    Module module = Module.load(model.getPath());
-    int errorCode = 0;
-    try {
-      module.loadMethod("forward");
-    } catch (Exception e) {
-      errorCode =
-          (e instanceof org.pytorch.executorch.ExecutorchRuntimeException)
-              ? ((org.pytorch.executorch.ExecutorchRuntimeException) e).getErrorCode()
-              : -1;
-    }
-    long loadEnd = System.nanoTime();
-
-    final BenchmarkMetric.BenchmarkModel benchmarkModel =
-        BenchmarkMetric.extractBackendAndQuantization(model.getName().replace(".pte", ""));
-
-    if (errorCode != 0) {
-      results.add(
-          new BenchmarkMetric(
-              benchmarkModel, "model_load_time(ms)", (loadEnd - loadStart) * 1e-6, 0.0f));
-      results.add(new BenchmarkMetric(benchmarkModel, "load_status", errorCode, 0));
-      module.destroy();
-      return;
-    }
-
-    try {
-      for (int i = 0; i < numWarmupIter; i++) {
-        module.forward();
-      }
-
-      for (int i = 0; i < numIter; i++) {
-        long start = System.nanoTime();
-        module.forward();
-        double forwardMs = (System.nanoTime() - start) * 1e-6;
-        latency.add(forwardMs);
-      }
-
-      module.etdump();
-
-      // Currently the result has large variance from outliers, so only use
-      // 80% samples in the middle (trimmean 0.2)
-      Collections.sort(latency);
-      int resultSize = latency.size();
-      List<Double> usedLatencyResults = latency.subList(resultSize / 10, resultSize * 9 / 10);
-
-      results.add(
-          new BenchmarkMetric(
-              benchmarkModel,
-              "avg_inference_latency(ms)",
-              latency.stream().mapToDouble(l -> l).average().orElse(0.0f),
-              0.0f));
-      results.add(
-          new BenchmarkMetric(
-              benchmarkModel,
-              "trimmean_inference_latency(ms)",
-              usedLatencyResults.stream().mapToDouble(l -> l).average().orElse(0.0f),
-              0.0f));
-      // Model load time
-      results.add(
-          new BenchmarkMetric(
-              benchmarkModel, "model_load_time(ms)", (loadEnd - loadStart) * 1e-6, 0.0f));
-      // Load status
-      results.add(new BenchmarkMetric(benchmarkModel, "load_status", errorCode, 0));
-      // RAM PSS usage
-      results.add(
-          new BenchmarkMetric(
-              benchmarkModel, "ram_pss_usage(mb)", (Debug.getPss() - pssIdle) / 1024, 0));
-    } finally {
-      module.destroy();
-    }
-  }
-}
diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.kt b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.kt
new file mode 100644
index 00000000000..0f292b0d900
--- /dev/null
+++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/ModelRunner.kt
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.minibench
+
+import android.os.Debug
+import java.io.File
+import org.pytorch.executorch.ExecutorchRuntimeException
+import org.pytorch.executorch.Module
+
+class ModelRunner {
+
+  fun runBenchmark(
+      model: File,
+      numWarmupIter: Int,
+      numIter: Int,
+      results: MutableList<BenchmarkMetric>,
+  ) {
+    val pssIdle = Debug.getPss()
+    val latency = mutableListOf<Double>()
+
+    val loadStart = System.nanoTime()
+    val module = Module.load(model.path)
+    var errorCode = 0
+    try {
+      module.loadMethod("forward")
+    } catch (e: ExecutorchRuntimeException) {
+      errorCode = e.errorCode
+    } catch (e: Exception) {
+      errorCode = -1
+    }
+    val loadEnd = System.nanoTime()
+
+    val benchmarkModel =
+        BenchmarkMetric.extractBackendAndQuantization(model.name.removeSuffix(".pte"))
+
+    if (errorCode != 0) {
+      results.add(
+          BenchmarkMetric(benchmarkModel, "model_load_time(ms)", (loadEnd - loadStart) * 1e-6, 0.0))
+      results.add(BenchmarkMetric(benchmarkModel, "load_status", errorCode.toDouble(), 0.0))
+      module.destroy()
+      return
+    }
+
+    try {
+      repeat(numWarmupIter) { module.forward() }
+
+      repeat(numIter) {
+        val start = System.nanoTime()
+        module.forward()
+        latency.add((System.nanoTime() - start) * 1e-6)
+      }
+
+      module.etdump()
+
+      // Currently the result has large variance from outliers, so only use
+      // 80% samples in the middle (trimmean 0.2)
+      latency.sort()
+      val trimmed = latency.subList(latency.size / 10, latency.size * 9 / 10)
+
+      results.add(
+          BenchmarkMetric(
+              benchmarkModel,
+              "avg_inference_latency(ms)",
+              latency.average(),
+              0.0,
+          ))
+      results.add(
+          BenchmarkMetric(
+              benchmarkModel,
+              "trimmean_inference_latency(ms)",
+              trimmed.average(),
+              0.0,
+          ))
+      results.add(
+          BenchmarkMetric(benchmarkModel, "model_load_time(ms)", (loadEnd - loadStart) * 1e-6, 0.0))
+      results.add(BenchmarkMetric(benchmarkModel, "load_status", errorCode.toDouble(), 0.0))
+      results.add(
+          BenchmarkMetric(
+              benchmarkModel, "ram_pss_usage(mb)", (Debug.getPss() - pssIdle) / 1024.0, 0.0))
+    } finally {
+      module.destroy()
+    }
+  }
+}
diff --git a/extension/benchmark/android/benchmark/app/src/test/java/org/pytorch/minibench/ExampleUnitTest.java b/extension/benchmark/android/benchmark/app/src/test/java/org/pytorch/minibench/ExampleUnitTest.kt
similarity index 55%
rename from extension/benchmark/android/benchmark/app/src/test/java/org/pytorch/minibench/ExampleUnitTest.java
rename to extension/benchmark/android/benchmark/app/src/test/java/org/pytorch/minibench/ExampleUnitTest.kt
index c6a6a76a4d8..b98a49e4bf9 100644
--- a/extension/benchmark/android/benchmark/app/src/test/java/org/pytorch/minibench/ExampleUnitTest.java
+++ b/extension/benchmark/android/benchmark/app/src/test/java/org/pytorch/minibench/ExampleUnitTest.kt
@@ -6,20 +6,19 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-package org.pytorch.minibench;
+package org.pytorch.minibench
 
-import static org.junit.Assert.*;
-
-import org.junit.Test;
+import org.junit.Assert.assertEquals
+import org.junit.Test
 
 /**
  * Example local unit test, which will execute on the development machine (host).
  *
- * @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
+ * @see [Testing documentation](http://d.android.com/tools/testing)
  */
-public class ExampleUnitTest {
+class ExampleUnitTest {
   @Test
-  public void addition_isCorrect() {
-    assertEquals(4, 2 + 2);
+  fun addition_isCorrect() {
+    assertEquals(4, 2 + 2)
   }
 }

From 043c404bf8146391dbc8ff89e732d2479f8c7bb9 Mon Sep 17 00:00:00 2001
From: RJ Ascani <rja@meta.com>
Date: Tue, 26 May 2026 10:21:55 -0700
Subject: [PATCH 021/103] Cortex-M backend: enable Cortex-M0+ builds against
 Corstone-300 (#19731)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Summary
Extend the Cortex-M cross-CPU build pipeline to Armv6-M by patching two
upstream issues that block the Corstone-300 target source and the CMSIS
Cortex DFP from building for `cortex-m0plus`:

* `core_platform/0003-*.patch` guards the `HardFault_Handler` in
`targets/corstone-300/target.cpp`. The handler uses an `ite eq` IT-block
in inline asm and dereferences the SCB CFSR/BFAR/MMFAR fault-status
registers; both are Armv7-M / Armv8-M Mainline only. The patch wraps the
rich handler in `__ARM_ARCH_7M__ / 7EM / 8M_MAIN / 8_1M_MAIN` and falls
back to a minimal stub on Armv6-M / Armv8-M Baseline (M0/M0+/M23).

* `core_software/0002-*.patch` fixes `cmsis.cmake`'s handling of the M0+
device. The Cortex DFP names the device directory and headers
`ARMCM0plus` (lowercase suffix), while the device sources
(`startup_ARMCM0plus.c`, `system_ARMCM0plus.c`) gate their
implementations on the `ARMCM0P` preprocessor macro — three different
spellings. The previous `string(TOUPPER ...)` produced `ARMCM0PLUS`: the
include path lookup failed and the source files hit their `#error device
not specified!` guard. Override `ARM_CPU` to `ARMCM0plus` for the
directory + filename and introduce a separate `CMSIS_DEVICE_CPU_DEFINE`
set to `ARMCM0P` for the cmsis_startup and cmsis_system
compile-definitions; all other cores still drive both paths from the
uppercased default.

Both patches are layered via the existing `patch_repo` mechanism; the
`corstone_utils.cmake` TODO is updated so the deletion plan for 0002 and
0003 is documented together.

### Test Plan
Locally validated end-to-end on the Corstone-300 FVP with the `qadd`
model: `cortex-m0plus` build links a runner that includes
`startup_ARMCM0plus.c` / `system_ARMCM0plus.c` and the patched
`target.cpp`, and the FVP run prints
`TEST: BundleIO index[0] Test_result: PASS` with all error stats zero.
The bundled `libcmsis-nn.a` reports `Tag_CPU_arch: v6S-M` and
`Tag_THUMB_ISA_use: Thumb-1` with zero DSP / MVE / saturating
instructions, confirming the scalar code path was exercised.

Authored with Claude.

cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils
@Sebastian-Larsson @robell
---
 backends/arm/scripts/corstone_utils.cmake     | 11 +--
 ...-Guard-HardFault-Handler-for-Armv6-M.patch | 49 ++++++++++++
 ...irectory-case-and-compile-define-mis.patch | 77 +++++++++++++++++++
 3 files changed, 132 insertions(+), 5 deletions(-)
 create mode 100644 examples/arm/ethos-u-setup/core_platform/0003-Guard-HardFault-Handler-for-Armv6-M.patch
 create mode 100644 examples/arm/ethos-u-setup/core_software/0002-Fix-ARMCM0plus-directory-case-and-compile-define-mis.patch

diff --git a/backends/arm/scripts/corstone_utils.cmake b/backends/arm/scripts/corstone_utils.cmake
index 58ce4f9a919..34f04ba1225 100644
--- a/backends/arm/scripts/corstone_utils.cmake
+++ b/backends/arm/scripts/corstone_utils.cmake
@@ -50,11 +50,12 @@ function(fetch_ethos_u_content ETHOS_SDK_PATH ET_DIR_PATH)
     WORKING_DIRECTORY ${ET_DIR_PATH}
   )
   # Always patch the core_platform repo since this is fast enough. TODO:
-  # examples/arm/ethos-u-setup/core_platform/0002-*.patch is a transient bridge
-  # that guards Armv8-M-only MPU init so the source compiles for non-Armv8-M
-  # Cortex-M cores. Once the same guard lands upstream in ethos-u/core_platform
-  # and ${core_platform_base_rev} is bumped past that commit, delete the 0002
-  # patch.
+  # examples/arm/ethos-u-setup/core_platform/0002-*.patch and 0003-*.patch are
+  # transient bridges that guard Armv8-M-only MPU init and the Armv7-M-and-newer
+  # HardFault handler so the Corstone-300 target source compiles for older
+  # Cortex-M cores. Once the equivalent guards land upstream in
+  # ethos-u/core_platform and ${core_platform_base_rev} is bumped past those
+  # commits, delete the 0002 and 0003 patches.
   set(core_platform_base_rev "26.02")
   execute_process(
     COMMAND
diff --git a/examples/arm/ethos-u-setup/core_platform/0003-Guard-HardFault-Handler-for-Armv6-M.patch b/examples/arm/ethos-u-setup/core_platform/0003-Guard-HardFault-Handler-for-Armv6-M.patch
new file mode 100644
index 00000000000..57a27cb3dee
--- /dev/null
+++ b/examples/arm/ethos-u-setup/core_platform/0003-Guard-HardFault-Handler-for-Armv6-M.patch
@@ -0,0 +1,49 @@
+From 380045853a133f298cee1bcf0c959b93ea94f9a2 Mon Sep 17 00:00:00 2001
+From: RJ Ascani <rja@meta.com>
+Date: Wed, 13 May 2026 15:42:13 -0700
+Subject: [PATCH] Guard HardFault_Handler for Armv6-M / Armv8-M Baseline
+
+The Corstone-300 HardFault_Handler is written for Armv7-M / Armv8-M
+Mainline: it uses an `ite eq` IT-block in inline asm, and dereferences
+the SCB CFSR/BFAR/MMFAR fault-status registers. Neither is available
+on Armv6-M (Cortex-M0/M0+) or Armv8-M Baseline (Cortex-M23), so the
+file fails to compile when the Corstone-300 target source is built
+with `-mcpu=cortex-m0plus` to exercise the scalar CMSIS-NN code paths
+on the Corstone-300 M55 simulator (an ISA superset).
+
+Wrap the Mainline-only implementation in
+`__ARM_ARCH_7M__ / 7EM / 8M_MAIN / 8_1M_MAIN` and fall back to a
+minimal `printf("Hard fault"); exit(1)` stub on Baseline cores.
+---
+ targets/corstone-300/target.cpp | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/targets/corstone-300/target.cpp b/targets/corstone-300/target.cpp
+index bda2248..4aa3eea 100644
+--- a/targets/corstone-300/target.cpp
++++ b/targets/corstone-300/target.cpp
+@@ -246,6 +246,11 @@ struct ExcContext {
+ };
+ 
+ void HardFault_Handler() {
++    // Armv6-M (M0/M0+) and Armv8-M Baseline (M23) lack the IT instruction and
++    // the SCB CFSR/BFAR/MMFAR fault-status registers, so the rich handler
++    // can't compile or run there. Fall back to a minimal stub on those cores.
++#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) || defined(__ARM_ARCH_8M_MAIN__) || \
++    defined(__ARM_ARCH_8_1M_MAIN__)
+     int irq;
+     struct ExcContext *e;
+     uint32_t sp;
+@@ -267,6 +272,9 @@ void HardFault_Handler() {
+            sp);
+     printf(
+         "%11s cfsr=0x%08" PRIx32 " bfar=0x%08" PRIx32 " mmfar=0x%08" PRIx32 "\n", "", SCB->CFSR, SCB->BFAR, SCB->MMFAR);
++#else
++    printf("Hard fault\n");
++#endif
+     exit(1);
+ }
+ }
+-- 
+2.53.0
+
diff --git a/examples/arm/ethos-u-setup/core_software/0002-Fix-ARMCM0plus-directory-case-and-compile-define-mis.patch b/examples/arm/ethos-u-setup/core_software/0002-Fix-ARMCM0plus-directory-case-and-compile-define-mis.patch
new file mode 100644
index 00000000000..96dcdd9f29d
--- /dev/null
+++ b/examples/arm/ethos-u-setup/core_software/0002-Fix-ARMCM0plus-directory-case-and-compile-define-mis.patch
@@ -0,0 +1,77 @@
+From 1ee9cf9c956ea6a266fc79dfa62071131f162510 Mon Sep 17 00:00:00 2001
+From: RJ Ascani <rja@meta.com>
+Date: Wed, 13 May 2026 15:48:07 -0700
+Subject: [PATCH] Fix ARMCM0plus directory case and compile-define mismatch
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The Cortex DFP names the Cortex-M0+ device directory and headers
+`ARMCM0plus` (lowercase suffix), while the device source files
+(`startup_ARMCM0plus.c`, `system_ARMCM0plus.c`) gate their
+implementations on the `ARMCM0P` preprocessor macro — three different
+spellings. `cmsis.cmake` previously did
+`string(TOUPPER \"ARMCM\${CPU_NUMBER}\" ARM_CPU)`, producing
+`ARMCM0PLUS`: the include path lookup fails and the source files hit
+their `#error device not specified!` guard.
+
+Override `ARM_CPU` to `ARMCM0plus` and introduce a separate
+`CMSIS_DEVICE_CPU_DEFINE` set to `ARMCM0P` for the cmsis_startup and
+cmsis_system compile-definitions; all other cores still drive both
+paths from the uppercased default.
+---
+ cmsis.cmake | 20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+diff --git a/cmsis.cmake b/cmsis.cmake
+index 7f2b93f..c49f205 100644
+--- a/cmsis.cmake
++++ b/cmsis.cmake
+@@ -23,6 +23,15 @@ endif()
+ 
+ string(TOUPPER "ARMCM${CPU_NUMBER}" ARM_CPU)
+ 
++# Cortex-M0+ is special: the Cortex DFP names the device directory and headers
++# `ARMCM0plus` (lowercase suffix), while the device sources gate their
++# implementations on the `ARMCM0P` preprocessor macro. Override both so the
++# directory lookup and `#include` resolution succeed; the compile-definition
++# override is applied instead of `CMSIS_DEVICE_CPU_FEATURE` further down.
++if(CPU_NUMBER STREQUAL "0plus")
++    set(ARM_CPU "ARMCM0plus")
++endif()
++
+ # Set CPU specific features
+ if(CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m33(\\+|$)")
+     set(ARM_FEATURES "_DSP_FP")
+@@ -50,6 +59,13 @@ else()
+     cmake_path(SET CMSIS_DEVICE_CPU_FEATURE "${ARM_CPU}")
+ endif()
+ 
++# Macro the device sources gate on. Matches CMSIS_DEVICE_CPU_FEATURE for most
++# cores; Cortex-M0+ keys off `ARMCM0P`, not `ARMCM0plus`.
++set(CMSIS_DEVICE_CPU_DEFINE "${CMSIS_DEVICE_CPU_FEATURE}")
++if(CPU_NUMBER STREQUAL "0plus")
++    set(CMSIS_DEVICE_CPU_DEFINE "ARMCM0P")
++endif()
++
+ target_include_directories(cmsis_device INTERFACE ${CMSIS_DEVICE_PATH}/${ARM_CPU}/Include)
+ 
+ target_compile_options(cmsis_device INTERFACE
+@@ -66,12 +82,12 @@ target_sources(cmsis_startup INTERFACE
+ set_source_files_properties(${CMSIS_DEVICE_PATH}/${ARM_CPU}/Source/startup_${ARM_CPU}.c
+     PROPERTIES COMPILE_FLAGS -Wno-redundant-decls)
+ 
+-target_compile_definitions(cmsis_startup INTERFACE ${CMSIS_DEVICE_CPU_FEATURE})
++target_compile_definitions(cmsis_startup INTERFACE ${CMSIS_DEVICE_CPU_DEFINE})
+ target_link_libraries(cmsis_startup INTERFACE cmsis_device)
+ 
+ # CMSIS system
+ add_library(cmsis_system INTERFACE)
+ target_sources(cmsis_system INTERFACE
+     ${CMSIS_DEVICE_PATH}/${ARM_CPU}/Source/system_${ARM_CPU}.c)
+-target_compile_definitions(cmsis_system INTERFACE ${CMSIS_DEVICE_CPU_FEATURE})
++target_compile_definitions(cmsis_system INTERFACE ${CMSIS_DEVICE_CPU_DEFINE})
+ target_link_libraries(cmsis_system INTERFACE cmsis_startup)
+-- 
+2.53.0
+

From fb3f6eba471ad2f59003b3cd7cb0f5396f0060cd Mon Sep 17 00:00:00 2001
From: Gregory Comer <gjcomer@meta.com>
Date: Tue, 26 May 2026 11:07:31 -0700
Subject: [PATCH 022/103] Harden against concurrency violations (#19734)
 (#19734)

Differential Revision: D106026285

Pull Request resolved: https://github.com/pytorch/executorch/pull/19734
---
 backends/xnnpack/runtime/XNNExecutor.cpp      | 52 +++++++++++++++++--
 backends/xnnpack/runtime/XNNExecutor.h        | 10 ++++
 backends/xnnpack/runtime/XNNPACKBackend.cpp   | 45 ++++++++++++++--
 .../xnnpack/runtime/XNNWorkspaceManager.cpp   |  2 +
 backends/xnnpack/targets.bzl                  |  2 +
 .../test/runtime/test_workspace_manager.cpp   |  4 ++
 backends/xnnpack/test/targets.bzl             |  3 ++
 7 files changed, 109 insertions(+), 9 deletions(-)

diff --git a/backends/xnnpack/runtime/XNNExecutor.cpp b/backends/xnnpack/runtime/XNNExecutor.cpp
index 103a8812931..1cba33a91e6 100644
--- a/backends/xnnpack/runtime/XNNExecutor.cpp
+++ b/backends/xnnpack/runtime/XNNExecutor.cpp
@@ -23,6 +23,28 @@ using executorch::runtime::is_contiguous_dim_order;
 using executorch::runtime::kTensorDimensionLimit;
 using executorch::runtime::Span;
 
+namespace {
+class InUseGuard {
+ public:
+  explicit InUseGuard(std::atomic<bool>& flag) : flag_(flag) {}
+  ~InUseGuard() {
+    if (!dismissed_) {
+      flag_.store(false, std::memory_order_release);
+    }
+  }
+  void dismiss() {
+    dismissed_ = true;
+  }
+
+  InUseGuard(const InUseGuard&) = delete;
+  InUseGuard& operator=(const InUseGuard&) = delete;
+
+ private:
+  std::atomic<bool>& flag_;
+  bool dismissed_ = false;
+};
+} // namespace
+
 /**
  * Initializes the XNNExecutor with the runtime and given number of
  * inputs/outputs externals_ is resized to the total number of inputs and
@@ -71,6 +93,21 @@ ET_NODISCARD Error XNNExecutor::initialize(
  * delegate->execute()
  */
 ET_NODISCARD Error XNNExecutor::prepare_args(Span<EValue*> args) {
+  ET_CHECK_MSG(
+      !destroyed_.load(std::memory_order_acquire),
+      "XNNExecutor::prepare_args called after destroy");
+
+  bool was_in_use = in_use_.exchange(true, std::memory_order_acquire);
+  if (was_in_use) {
+    ET_LOG(Error, "XNNExecutor::prepare_args called concurrently");
+  }
+  ET_DCHECK_MSG(!was_in_use, "XNNExecutor::prepare_args called concurrently");
+
+  InUseGuard in_use_guard(in_use_);
+  if (was_in_use) {
+    in_use_guard.dismiss();
+  }
+
   ET_CHECK_OR_RETURN_ERROR(
       runtime_ != nullptr,
       Internal,
@@ -142,6 +179,7 @@ ET_NODISCARD Error XNNExecutor::prepare_args(Span<EValue*> args) {
     return err;
   }
 
+  in_use_guard.dismiss();
   return Error::Ok;
 }
 
@@ -152,6 +190,8 @@ ET_NODISCARD Error XNNExecutor::prepare_args(Span<EValue*> args) {
  * After which we then execute the runtime through invoke_runtime.
  */
 ET_NODISCARD Error XNNExecutor::forward(BackendExecutionContext& context) {
+  InUseGuard in_use_guard(in_use_);
+
   ET_CHECK_OR_RETURN_ERROR(
       runtime_ != nullptr,
       Internal,
@@ -160,11 +200,13 @@ ET_NODISCARD Error XNNExecutor::forward(BackendExecutionContext& context) {
   xnn_status status = xnn_setup_runtime_v2(
       runtime_.get(), externals_.size(), externals_.data());
 
-  ET_CHECK_OR_RETURN_ERROR(
-      status == xnn_status_success,
-      Internal,
-      "Internal Error: Setting up the runtime failed with code: %s",
-      xnn_status_to_string(status));
+  if (status != xnn_status_success) {
+    ET_LOG(
+        Error,
+        "Internal Error: Setting up the runtime failed with code: %s",
+        xnn_status_to_string(status));
+    return Error::Internal;
+  }
 
   auto error = profiler_.start(context.event_tracer());
   if (error != Error::Ok) {
diff --git a/backends/xnnpack/runtime/XNNExecutor.h b/backends/xnnpack/runtime/XNNExecutor.h
index fa7c8360be4..0af8b6056b0 100644
--- a/backends/xnnpack/runtime/XNNExecutor.h
+++ b/backends/xnnpack/runtime/XNNExecutor.h
@@ -16,6 +16,7 @@
 #include <executorch/runtime/core/exec_aten/util/tensor_util.h>
 
 #include <xnnpack.h>
+#include <atomic>
 #include <memory>
 #include <vector>
 
@@ -36,11 +37,20 @@ class XNNExecutor {
   std::vector<xnn_external_value> externals_;
   std::vector<std::string> packed_data_names_;
   std::shared_ptr<XNNWorkspace> workspace_;
+  std::atomic<bool> in_use_{false};
+  std::atomic<bool> destroyed_{false};
 
  public:
   XNNExecutor(std::shared_ptr<XNNWorkspace> workspace)
       : workspace_(workspace) {}
 
+  ~XNNExecutor() {
+    ET_CHECK_MSG(
+        !in_use_.load(std::memory_order_acquire),
+        "XNNExecutor destroyed while in use");
+    destroyed_.store(true, std::memory_order_release);
+  }
+
   inline size_t getNumInputs() {
     return input_ids_.size();
   }
diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp
index c20fa985f46..a02cf98771b 100644
--- a/backends/xnnpack/runtime/XNNPACKBackend.cpp
+++ b/backends/xnnpack/runtime/XNNPACKBackend.cpp
@@ -16,6 +16,7 @@
 #include <executorch/runtime/core/evalue.h>
 #include <executorch/runtime/executor/pte_data_map.h>
 
+#include <cinttypes>
 #include <memory>
 #include <mutex>
 
@@ -129,6 +130,17 @@ class XnnpackBackend final
           Error, "XNNCompiler::compileModel failed: 0x%x", (unsigned int)err);
       return err;
     }
+
+    ET_LOG(
+        Info,
+        "XnnpackBackend::init delegate=%p workspace_id=%" PRIu64
+        " workspace_ptr=%p program_id=0x%" PRIxPTR " weight_cache=%s",
+        (void*)executor,
+        workspace->id(),
+        (void*)workspace_ptr,
+        program_id,
+        use_weight_cache ? "true" : "false");
+
     return executor;
   }
 
@@ -138,13 +150,23 @@ class XnnpackBackend final
       Span<EValue*> args) const override {
     auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
 
+    auto workspace = executor->get_workspace();
+    ET_LOG(
+        Info,
+        "XnnpackBackend::execute begin delegate=%p workspace_id=%" PRIu64
+        " num_args=%zu weight_cache=%s",
+        (void*)executor,
+        workspace->id(),
+        (size_t)args.size(),
+        executor->uses_weight_cache() ? "true" : "false");
+
     std::unique_lock<std::mutex> lock_weights_cache(
         weights_cache_mutex_, std::defer_lock);
     if (executor->uses_weight_cache()) {
       lock_weights_cache.lock();
     }
 
-    auto [raii_lock, _] = executor->get_workspace()->acquire();
+    auto [raii_lock, _] = workspace->acquire();
 
     // Prepare Inputs/Outputs and Propagate Input Shapes
     Error err = executor->prepare_args(args);
@@ -161,20 +183,36 @@ class XnnpackBackend final
     // Convert output data types if necessary (e.g., int32 -> int64 for Long)
     err = executor->convert_outputs(args);
 
+    ET_LOG(
+        Info,
+        "XnnpackBackend::execute end delegate=%p workspace_id=%" PRIu64
+        " err=0x%x",
+        (void*)executor,
+        workspace->id(),
+        (unsigned int)err);
+
     return err;
   }
 
   void destroy(DelegateHandle* handle) const override {
     if (handle != nullptr) {
       auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
+      auto workspace = executor->get_workspace();
+
+      ET_LOG(
+          Info,
+          "XnnpackBackend::destroy delegate=%p workspace_id=%" PRIu64,
+          (void*)executor,
+          workspace->id());
+
+      const std::lock_guard<std::mutex> lock_weights_cache(
+          weights_cache_mutex_);
 
 #ifdef ENABLE_XNNPACK_PROFILING
       executor->print_avg_op_timings();
 #endif
 
       if (executor->uses_weight_cache()) {
-        const std::lock_guard<std::mutex> lock_weights_cache(
-            weights_cache_mutex_);
         weights_cache_->delete_packed_data(executor->get_packed_data_names());
       }
 
@@ -183,7 +221,6 @@ class XnnpackBackend final
       // the same backend instance. Make sure to hold onto the workspace
       // shared_ptr, as the pointer in the executor is freed, which includes
       // the mutex referenced by raii_lock.
-      auto workspace = executor->get_workspace();
       auto [raii_lock, _] = workspace->acquire();
 
       // XNNExecutor is not trivially destructible. Since this was constructed
diff --git a/backends/xnnpack/runtime/XNNWorkspaceManager.cpp b/backends/xnnpack/runtime/XNNWorkspaceManager.cpp
index d3550da5cc7..e115074a108 100644
--- a/backends/xnnpack/runtime/XNNWorkspaceManager.cpp
+++ b/backends/xnnpack/runtime/XNNWorkspaceManager.cpp
@@ -61,7 +61,9 @@ XNNWorkspaceManager::get_or_create_workspace(
       return create_result.error();
     }
 
+#ifndef XNNPACK_WORKSPACE_ALWAYS_LOCK
     create_result.get()->disable_locking();
+#endif
     return create_result.get();
   } else if (mode == WorkspaceSharingMode::PerModel) {
     return get_or_create_model_workspace(program_id);
diff --git a/backends/xnnpack/targets.bzl b/backends/xnnpack/targets.bzl
index 868e68e5b8c..b3af589df10 100644
--- a/backends/xnnpack/targets.bzl
+++ b/backends/xnnpack/targets.bzl
@@ -14,6 +14,8 @@ def _get_preprocessor_flags():
     if native.read_config("executorch", "xnnpack_weights_cache", "0") != "0":
         preprocessor_flags.append("-DENABLE_XNNPACK_WEIGHTS_CACHE")
 
+    preprocessor_flags.append("-DXNNPACK_WORKSPACE_ALWAYS_LOCK")
+
     # Enable if not disabled through config
     return preprocessor_flags
 
diff --git a/backends/xnnpack/test/runtime/test_workspace_manager.cpp b/backends/xnnpack/test/runtime/test_workspace_manager.cpp
index a7689966635..a239d19b415 100644
--- a/backends/xnnpack/test/runtime/test_workspace_manager.cpp
+++ b/backends/xnnpack/test/runtime/test_workspace_manager.cpp
@@ -116,7 +116,11 @@ TEST_F(XNNWorkspaceManagerTest, DisabledModeAcquireDoesNotLock) {
 
   auto [lock, ptr] = workspace->acquire();
   ASSERT_NE(ptr, nullptr);
+#ifdef XNNPACK_WORKSPACE_ALWAYS_LOCK
+  EXPECT_TRUE(lock.owns_lock());
+#else
   EXPECT_FALSE(lock.owns_lock());
+#endif
 }
 
 TEST_F(XNNWorkspaceManagerTest, PerModelMode) {
diff --git a/backends/xnnpack/test/targets.bzl b/backends/xnnpack/test/targets.bzl
index 812986a12e6..d690e1c9dcd 100644
--- a/backends/xnnpack/test/targets.bzl
+++ b/backends/xnnpack/test/targets.bzl
@@ -96,6 +96,9 @@ def define_common_targets():
     runtime.cxx_test(
         name = "test_workspace_manager",
         srcs = ["runtime/test_workspace_manager.cpp"],
+        preprocessor_flags = [
+            "-DXNNPACK_WORKSPACE_ALWAYS_LOCK",
+        ],
         deps = [
                 third_party_dep("XNNPACK"),
                 "//executorch/backends/xnnpack:xnnpack_backend",

From 50ee05ec1533ac61724ef0d3e4913b77af04faf6 Mon Sep 17 00:00:00 2001
From: Hansong Zhang <107070759+kirklandsign@users.noreply.github.com>
Date: Tue, 26 May 2026 14:00:32 -0700
Subject: [PATCH 023/103] Convert Experimental, DType, MethodMetadata from Java
 to Kotlin

Differential Revision: D106394605

Pull Request resolved: https://github.com/pytorch/executorch/pull/19775
---
 extension/android/BUCK                        | 10 ++--
 .../executorch/{DType.java => DType.kt}       | 26 +++------
 .../pytorch/executorch/MethodMetadata.java    | 34 -----------
 .../org/pytorch/executorch/MethodMetadata.kt  | 12 ++++
 .../{Experimental.java => Experimental.kt}    |  7 ++-
 .../executorch/annotations/package-info.java  |  2 -
 .../org/pytorch/executorch/package-info.java  | 57 -------------------
 7 files changed, 31 insertions(+), 117 deletions(-)
 rename extension/android/executorch_android/src/main/java/org/pytorch/executorch/{DType.java => DType.kt} (77%)
 delete mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/MethodMetadata.java
 create mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/MethodMetadata.kt
 rename extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/{Experimental.java => Experimental.kt} (68%)
 delete mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/package-info.java
 delete mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/package-info.java

diff --git a/extension/android/BUCK b/extension/android/BUCK
index 110b428575d..bae5579b2a8 100644
--- a/extension/android/BUCK
+++ b/extension/android/BUCK
@@ -8,17 +8,19 @@ non_fbcode_target(_kind = fb_android_library,
     warnings_as_errors = False,
     required_for_source_only_abi = True,
     srcs = [
-        "executorch_android/src/main/java/org/pytorch/executorch/DType.java",
+        "executorch_android/src/main/java/org/pytorch/executorch/DType.kt",
         "executorch_android/src/main/java/org/pytorch/executorch/EValue.java",
         "executorch_android/src/main/java/org/pytorch/executorch/ExecuTorchRuntime.java",
         "executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.java",
-        "executorch_android/src/main/java/org/pytorch/executorch/MethodMetadata.java",
+        "executorch_android/src/main/java/org/pytorch/executorch/MethodMetadata.kt",
         "executorch_android/src/main/java/org/pytorch/executorch/Module.java",
         "executorch_android/src/main/java/org/pytorch/executorch/Tensor.java",
-        "executorch_android/src/main/java/org/pytorch/executorch/annotations/Experimental.java",
+        "executorch_android/src/main/java/org/pytorch/executorch/annotations/Experimental.kt",
     ],
     autoglob = False,
-    language = "JAVA",
+    language = "KOTLIN",
+    pure_kotlin = False,
+    extra_kotlinc_arguments = ["-Xjvm-default=all"],
     deps = [
         "//fbandroid/java/com/facebook/jni:jni",
         "//fbandroid/libraries/soloader/java/com/facebook/soloader/nativeloader:nativeloader",
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/DType.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/DType.kt
similarity index 77%
rename from extension/android/executorch_android/src/main/java/org/pytorch/executorch/DType.java
rename to extension/android/executorch_android/src/main/java/org/pytorch/executorch/DType.kt
index 3aca4871d64..a58baa34b60 100644
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/DType.java
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/DType.kt
@@ -6,17 +6,17 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-package org.pytorch.executorch;
+package org.pytorch.executorch
 
-import org.pytorch.executorch.annotations.Experimental;
+import org.pytorch.executorch.annotations.Experimental
 
 /**
  * Codes representing tensor data types.
  *
- * <p>Warning: These APIs are experimental and subject to change without notice
+ * Warning: These APIs are experimental and subject to change without notice
  */
 @Experimental
-public enum DType {
+enum class DType(@JvmField val jniCode: Int) {
   // NOTE: "jniCode" must be kept in sync with scalar_type.h.
   // NOTE: Never serialize "jniCode", because it can change between releases.
 
@@ -68,18 +68,10 @@ public enum DType {
   BITS16(22),
   ;
 
-  final int jniCode;
-
-  DType(int jniCode) {
-    this.jniCode = jniCode;
-  }
-
-  public static DType fromJniCode(int jniCode) {
-    for (DType dtype : values()) {
-      if (dtype.jniCode == jniCode) {
-        return dtype;
-      }
-    }
-    throw new IllegalArgumentException("No DType found for jniCode " + jniCode);
+  companion object {
+    @JvmStatic
+    fun fromJniCode(jniCode: Int): DType =
+        entries.find { it.jniCode == jniCode }
+            ?: throw IllegalArgumentException("No DType found for jniCode $jniCode")
   }
 }
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/MethodMetadata.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/MethodMetadata.java
deleted file mode 100644
index a46b27ab39e..00000000000
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/MethodMetadata.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.executorch;
-
-/** Immutable metadata for a method in a Module. */
-public class MethodMetadata {
-  private final String mName;
-  private final String[] mBackends;
-
-  MethodMetadata(String name, String[] backends) {
-    mName = name;
-    mBackends = backends;
-  }
-
-  /**
-   * @return Method name
-   */
-  public String getName() {
-    return mName;
-  }
-
-  /**
-   * @return Backends used for this method
-   */
-  public String[] getBackends() {
-    return mBackends;
-  }
-}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/MethodMetadata.kt b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/MethodMetadata.kt
new file mode 100644
index 00000000000..2f25f32c92f
--- /dev/null
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/MethodMetadata.kt
@@ -0,0 +1,12 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.executorch
+
+/** Immutable metadata for a method in a Module. */
+class MethodMetadata internal constructor(val name: String, val backends: Array<String>)
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/Experimental.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/Experimental.kt
similarity index 68%
rename from extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/Experimental.java
rename to extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/Experimental.kt
index f5f36fc56da..1a38bb13b99 100644
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/Experimental.java
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/Experimental.kt
@@ -6,13 +6,14 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-package org.pytorch.executorch.annotations;
+package org.pytorch.executorch.annotations
 
 /**
  * This annotation indicates that an API is experimental and may change or be removed at any time.
  * It does not provide any guarantees for API stability or backward-compatibility.
  *
- * <p>This status is not permanent, and APIs marked with this annotation will need to be either made
+ * This status is not permanent, and APIs marked with this annotation will need to be either made
  * more robust or removed in the future.
  */
-public @interface Experimental {}
+@Retention(AnnotationRetention.BINARY)
+annotation class Experimental
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/package-info.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/package-info.java
deleted file mode 100644
index 2173a04c69d..00000000000
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/package-info.java
+++ /dev/null
@@ -1,2 +0,0 @@
-/** Annotations used by ExecuTorch Android Java/JNI package. */
-package org.pytorch.executorch.annotations;
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/package-info.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/package-info.java
deleted file mode 100644
index 7a5ed0bb5a5..00000000000
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/package-info.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * ExecuTorch Android Java API.
- *
- * <p>This package provides Java bindings for running ExecuTorch models on Android. Use these
- * classes to load a {@code .pte} model file and run inference directly from your Java or Kotlin
- * Android app — no C++ required.
- *
- * <h2>Quick Start</h2>
- *
- * <p><b>Step 1.</b> Add the dependency to your {@code app/build.gradle.kts}:
- *
- * <pre>{@code
- * dependencies {
- *     implementation("org.pytorch:executorch-android:${executorch_version}")
- * }
- * }</pre>
- *
- * <p><b>Step 2.</b> Load your model and run inference:
- *
- * <pre>{@code
- * import org.pytorch.executorch.EValue;
- * import org.pytorch.executorch.Module;
- * import org.pytorch.executorch.Tensor;
- *
- * // Load your exported .pte model file
- * Module module = Module.load("/data/local/tmp/model.pte");
- *
- * // Build an input tensor  e.g. a 1x3x224x224 image
- * float[] inputData = new float[1 * 3 * 224 * 224];
- * Tensor inputTensor = Tensor.fromBlob(inputData, new long[]{1, 3, 224, 224});
- *
- * // Run inference
- * EValue[] output = module.forward(EValue.from(inputTensor));
- *
- * // Read the result
- * float[] scores = output[0].toTensor().getDataAsFloatArray();
- * }</pre>
- *
- * <h2>Key Classes</h2>
- *
- * <ul>
- *   <li>{@link org.pytorch.executorch.Module} — load and run a {@code .pte} model
- *   <li>{@link org.pytorch.executorch.Tensor} — create input tensors and read outputs
- *   <li>{@link org.pytorch.executorch.EValue} — wrap inputs and unwrap outputs
- *   <li>{@link org.pytorch.executorch.DType} — supported data types (FLOAT, INT32, etc.)
- * </ul>
- *
- * <h2>More Resources</h2>
- *
- * <ul>
- *   <li><a href="https://pytorch.org/executorch/main/using-executorch-android.html">Using
- *       ExecuTorch on Android</a> — full setup guide, AAR install, build from source
- *   <li><a href="https://github.com/meta-pytorch/executorch-examples">Android Demo Apps</a> —
- *       working example apps you can build and run immediately
- * </ul>
- */
-package org.pytorch.executorch;

From 5d36c7c953f58eb7807a0ef45c83b13ab8881da3 Mon Sep 17 00:00:00 2001
From: roman-janik-nxp <roman.janik@nxp.com>
Date: Tue, 26 May 2026 23:27:14 +0200
Subject: [PATCH 024/103] =?UTF-8?q?NXP=20backend:=20Improve=20docs=20for?=
 =?UTF-8?q?=20NXP=20eIQ=20Neutron=20Kernel=20Selective=20Kernel=E2=80=A6?=
 =?UTF-8?q?=20(#19772)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

… Registration

### Summary
Docs improvement.

### Test plan
Docs only.


cc @robert-kalmar @JakeStevens @digantdesai @rascani
---
 .../backends/nxp/nxp-kernel-selection.md      | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/docs/source/backends/nxp/nxp-kernel-selection.md b/docs/source/backends/nxp/nxp-kernel-selection.md
index 3ff61323694..307f06d1d02 100644
--- a/docs/source/backends/nxp/nxp-kernel-selection.md
+++ b/docs/source/backends/nxp/nxp-kernel-selection.md
@@ -1,25 +1,25 @@
 # NXP eIQ Neutron Kernel Selective Kernel Registration
 
-The NXP ExecuTorch backend supports selective Neutron kernel registration for `Neutron-C` targets, which decreases the
+The NXP ExecuTorch backend supports selective Neutron kernel registration for `Neutron-C` targets, which reduces the
 size of the Neutron Firmware. During the backend's conversion to the Neutron representation by the Neutron Converter,
 microcode for the Neutron accelerator is generated.
 The microcode consists of kernel calls executed by the Neutron Driver. The code for kernel call functions is
-distributed in Neutron Firmware. 
+distributed in the Neutron Firmware. 
 
-The `eiq_neutron_sdk.neutron_converter` optionally generates the `*_kernel_selection.c` file, registering 
-only kernels that are required for a particular model or in the case of ExecuTorch, a delegated subgraph. This 
-`*_kernel_selection.c`, when used during the application linking, takes precedence over the default list of registered 
+The `eiq_neutron_sdk.neutron_converter` optionally generates a `*_kernel_selection.c` file, registering 
+only kernels that are required for a particular model or, in the case of ExecuTorch, a delegated subgraph. This 
+`*_kernel_selection.c`, when used during application linking, takes precedence over the default list of registered 
 kernels in the Neutron Firmware, and allows the linker to include only the necessary Neutron kernels.
-This software is required for deployment on an edge device (e.g. `i.MXRT700`) and is
-distributed via the MCUXpresso SDK. The MCUXpresso SDK enables building of a final application that is then flashed on 
+The Neutron Firmware is required for deployment on an edge device (e.g. `i.MX RT700`) and is
+distributed via the MCUXpresso SDK. The MCUXpresso SDK enables the building of a final application that is then flashed on 
 the edge device. For more details about this process, see
 [eIQ ExecuTorch Library User Guide](https://mcuxpresso.nxp.com/mcuxsdk/latest/html/middleware/eiq/executorch/docs/nxp/ugindex.html).
 
-By default, for Neutron-C targets like `i.MXRT700`, all kernel implementations are present in the Neutron Firmware, which
+By default, for Neutron-C targets like `i.MX RT700`, all kernel implementations are present in the Neutron Firmware, which
 is linked to the final application. This enables an easy build process for any model, but increases the size of the
-final application with unused code. In the case of limited RAM, you can link only kernels that are used in the set of
-models deployed. This way you can reduce the size of the final app by linking only selected kernels, used in one or
-multiple models.
+final application with unused code. In memory-constrained environments, you can link only the kernels required by the
+deployed models. This way you can reduce the size of the final application by linking only selected kernels, used in one
+or more models.
 
 The feature works as follows: The Neutron Converter with the appropriate flag exports a kernel selection file for each 
 converted subgraph, the kernel selection files are then merged and ready to be included in the MCUXpresso SDK to use for
@@ -30,7 +30,7 @@ a selection-only build.
 
 ## Export kernel selection file
 
-To turn on this feature on the side of NXP ExecuTorch backend, use the parameter `--dump_kernel_selection_code` in 
+To enable this feature in the NXP ExecuTorch backend, use the parameter `--dump_kernel_selection_code` in 
 `aot_neutron_compile.py`. An example with the CifarNet model:
 
 ```commandline
@@ -43,7 +43,7 @@ This command will create a `*_kernel_selection.c` file alongside the converted P
 
 ## Kernel Registration for Multiple Models
 
-If you want to use or experiment with multiple models in one application while having reduced kernel set, you can
+If you want to use or experiment with multiple models in one application while having a reduced kernel set, you can
 create one kernel selection file with the script `merge_kernel_selection_code.py`:
 
 ```commandline

From cedfd486dc6bcc7fef3015d1b949c958a247c4ec Mon Sep 17 00:00:00 2001
From: Per Held <per.held@arm.com>
Date: Tue, 26 May 2026 23:43:37 +0200
Subject: [PATCH 025/103] Arm backend: Validate TOSA resize parameters (#19757)

Re-upload with BUCK changes.

Share TOSA RESIZE parameter validation between upsample support checks
and fake RESIZE lowering so invalid nearest and bilinear resize
parameters are rejected before delegation.


Change-Id: I57c267aca96d733879ae90329267e44adce399c6


cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils
@Sebastian-Larsson @robell @rascani

Signed-off-by: Per Held <per.held@arm.com>
---
 backends/arm/operator_support/TARGETS         |   1 +
 .../arm/operator_support/upsample_support.py  |  82 ++++--
 .../misc/tosa_dialect/test_tosa_resize.py     |  26 +-
 .../arm/test/ops/test_upsample_nearest2d.py   |  11 +
 backends/arm/tosa/BUCK                        |  11 +
 backends/arm/tosa/dialect/BUCK                |   1 +
 backends/arm/tosa/dialect/ops/resize.py       |  62 ++---
 backends/arm/tosa/resize_utils.py             | 259 ++++++++++++++++++
 8 files changed, 389 insertions(+), 64 deletions(-)
 create mode 100644 backends/arm/tosa/resize_utils.py

diff --git a/backends/arm/operator_support/TARGETS b/backends/arm/operator_support/TARGETS
index 8f6721bd911..a2fd054d472 100644
--- a/backends/arm/operator_support/TARGETS
+++ b/backends/arm/operator_support/TARGETS
@@ -6,6 +6,7 @@ runtime.python_library(
     deps = [
         "//executorch/backends/arm:constants",
         "//executorch/backends/arm/_passes:passes",
+        "//executorch/backends/arm/tosa:resize_utils",
         "//executorch/backends/arm/tosa:tosa",
         "//executorch/backends/transforms:remove_getitem_op",
         "//executorch/backends/xnnpack/_passes:xnnpack_passes",
diff --git a/backends/arm/operator_support/upsample_support.py b/backends/arm/operator_support/upsample_support.py
index bd03a4d2b4f..42e88f08521 100644
--- a/backends/arm/operator_support/upsample_support.py
+++ b/backends/arm/operator_support/upsample_support.py
@@ -13,9 +13,53 @@
     SupportedTOSAOperatorCheck,
 )
 from executorch.backends.arm.tosa import TosaSpecification
+from executorch.backends.arm.tosa.resize_utils import get_tosa_resize_validation_error
 from executorch.exir.dialects._ops import ops as exir_ops
 
 
+def _is_upsample_node_tosa_supported(
+    support_check: SupportedTOSAOperatorCheck,
+    node: fx.Node,
+    tosa_spec: TosaSpecification,
+    *,
+    align_corners: bool,
+) -> bool:
+    input_node = ensure_type(fx.Node, node.args[0])
+    input_size_yx = get_first_fake_tensor(input_node).shape[2:]
+    output_size_yx = get_first_fake_tensor(node).shape[2:]
+
+    try:
+        scale_y_n, scale_y_d, offset_y, border_y = (
+            RewriteUpsamplePass.get_resize_parameters_1d(
+                input_size_yx[0], output_size_yx[0], align_corners
+            )
+        )
+        scale_x_n, scale_x_d, offset_x, border_x = (
+            RewriteUpsamplePass.get_resize_parameters_1d(
+                input_size_yx[1], output_size_yx[1], align_corners
+            )
+        )
+    except RuntimeError as err:
+        support_check.reporter.report_reject(node, str(err))
+        return False
+
+    # Validate the exact TOSA RESIZE parameters that RewriteUpsamplePass will
+    # emit so support checks and fake-op validation reject the same cases.
+    validation_error = get_tosa_resize_validation_error(
+        input_hw=input_size_yx,
+        output_hw=output_size_yx,
+        scale=[scale_y_n, scale_y_d, scale_x_n, scale_x_d],
+        offset=[offset_y, offset_x],
+        border=[border_y, border_x],
+        tosa_spec=tosa_spec,
+    )
+    if validation_error is not None:
+        support_check.reporter.report_reject(node, validation_error)
+        return False
+
+    return True
+
+
 @register_tosa_support_check
 class UpsampleNearest2dSupported(SupportedTOSAOperatorCheck):
     """Provide the explicit TOSA support gate for nearest upsample."""
@@ -23,9 +67,11 @@ class UpsampleNearest2dSupported(SupportedTOSAOperatorCheck):
     targets = [exir_ops.edge.aten.upsample_nearest2d.vec]
 
     def is_node_tosa_supported(
-        self, _node: fx.Node, _tosa_spec: TosaSpecification
+        self, node: fx.Node, tosa_spec: TosaSpecification
     ) -> bool:  # type: ignore[override, misc]
-        return True
+        return _is_upsample_node_tosa_supported(
+            self, node, tosa_spec, align_corners=False
+        )
 
 
 @register_tosa_support_check
@@ -37,33 +83,9 @@ class UpsampleBilinear2dSupported(SupportedTOSAOperatorCheck):
     targets = [exir_ops.edge.aten.upsample_bilinear2d.vec]
 
     def is_node_tosa_supported(
-        self, node: fx.Node, _tosa_spec: TosaSpecification
+        self, node: fx.Node, tosa_spec: TosaSpecification
     ) -> bool:  # type: ignore[override, misc]
-        input_node = ensure_type(fx.Node, node.args[0])
         align_corners = ensure_type(bool, node.args[2])
-        input_size_yx = get_first_fake_tensor(input_node).shape[2:]
-        output_size_yx = get_first_fake_tensor(node).shape[2:]
-
-        try:
-            scale_y_n, scale_y_d, _, _ = RewriteUpsamplePass.get_resize_parameters_1d(
-                input_size_yx[0], output_size_yx[0], align_corners
-            )
-            scale_x_n, scale_x_d, _, _ = RewriteUpsamplePass.get_resize_parameters_1d(
-                input_size_yx[1], output_size_yx[1], align_corners
-            )
-        except RuntimeError as err:
-            self.reporter.report_reject(node, str(err))
-            return False
-
-        # get_resize_parameters_1d() returns the TOSA RESIZE scale fraction for
-        # each spatial dimension. For align_corners=False, this is the effective
-        # output_size / input_size ratio, so the 1/16 boundary is checked
-        # directly in the same representation that RESIZE lowering will use.
-        if scale_y_d >= 16 * scale_y_n or scale_x_d >= 16 * scale_x_n:
-            self.reporter.report_reject(
-                node,
-                "Bilinear RESIZE downscale must be strictly greater than 1/16",
-            )
-            return False
-
-        return True
+        return _is_upsample_node_tosa_supported(
+            self, node, tosa_spec, align_corners=align_corners
+        )
diff --git a/backends/arm/test/misc/tosa_dialect/test_tosa_resize.py b/backends/arm/test/misc/tosa_dialect/test_tosa_resize.py
index d9d8b89feb6..0a90de5c0c0 100644
--- a/backends/arm/test/misc/tosa_dialect/test_tosa_resize.py
+++ b/backends/arm/test/misc/tosa_dialect/test_tosa_resize.py
@@ -33,13 +33,14 @@ def _expr(sym: torch.SymInt) -> sympy.Expr:
     return sympy.sympify(getattr(sym.node, "expr", sym.node._expr))
 
 
-def test_bilinear_resize_rejects_exact_one_sixteenth_downscale():
+@pytest.mark.parametrize("resize_mode", ("nearest", "bilinear"))
+def test_resize_rejects_exact_one_sixteenth_downscale(resize_mode: str):
     with TosaLoweringContext(
         TosaSpecification.create_from_string("TOSA-1.0+INT")
     ), FakeTensorMode() as mode:
         with pytest.raises(
             TosaValueError,
-            match="Bilinear RESIZE downscale must be strictly greater than 1/16",
+            match="RESIZE downscale must be strictly greater than 1/16",
         ):
             exir_ops.backend.tosa.RESIZE.default(
                 mode.from_tensor(
@@ -48,7 +49,26 @@ def test_bilinear_resize_rejects_exact_one_sixteenth_downscale():
                 [2, 32, 2, 32],
                 [15, 15],
                 [-15, -15],
-                resize_mode="bilinear",
+                resize_mode=resize_mode,
+            )
+
+
+def test_resize_rejects_scale_numerator_over_tosa_limit():
+    with TosaLoweringContext(
+        TosaSpecification.create_from_string("TOSA-1.0+INT")
+    ), FakeTensorMode() as mode:
+        with pytest.raises(
+            TosaValueError,
+            match="RESIZE scale numerator must be <= 2048",
+        ):
+            exir_ops.backend.tosa.RESIZE.default(
+                mode.from_tensor(torch.randint(0, 10, (1, 3, 4, 2), dtype=torch.int8)),
+                # 2049 violates scale_n <= 1 << 11, while 2049/2 still stays
+                # within MAX_SCALE so this test isolates the numerator rule.
+                [2049, 2, 4, 2],
+                [0, 0],
+                [0, 0],
+                resize_mode="nearest",
             )
 
 
diff --git a/backends/arm/test/ops/test_upsample_nearest2d.py b/backends/arm/test/ops/test_upsample_nearest2d.py
index 5781e4ed29d..d8bf4d7dbd5 100644
--- a/backends/arm/test/ops/test_upsample_nearest2d.py
+++ b/backends/arm/test/ops/test_upsample_nearest2d.py
@@ -198,6 +198,17 @@ def test_upsample_nearest2d_vec_tosa_FP_interpolate(test_data: torch.Tensor):
     pipeline.run()
 
 
+def test_upsample_nearest2d_vec_tosa_does_not_delegate_exact_one_sixteenth_downscale():
+    pipeline = OpNotSupportedPipeline[input_t1](
+        Interpolate(size=None, scale_factor=1.0 / 16.0),
+        (torch.randn(1, 3, 256, 448),),
+        {exir_op: 1},
+        n_expected_delegates=0,
+    )
+
+    pipeline.run()
+
+
 @common.parametrize("test_data", test_data_suite)
 def test_upsample_nearest2d_vec_tosa_INT(test_data: torch.Tensor):
     test_data, size, scale_factor, compare_outputs = test_data()
diff --git a/backends/arm/tosa/BUCK b/backends/arm/tosa/BUCK
index 46ff6648c54..81d1f62437f 100644
--- a/backends/arm/tosa/BUCK
+++ b/backends/arm/tosa/BUCK
@@ -41,6 +41,17 @@ fbcode_target(_kind = runtime.python_library,
     ],
 )
 
+fbcode_target(_kind = runtime.python_library,
+    name = "resize_utils",
+    srcs = [
+        "resize_utils.py",
+    ],
+    deps = [
+        "//caffe2:torch",
+        ":specification",
+    ],
+)
+
 fbcode_target(_kind = runtime.python_library,
     name = "tosa",
     srcs = [
diff --git a/backends/arm/tosa/dialect/BUCK b/backends/arm/tosa/dialect/BUCK
index 4e7f5837766..5081f5d6945 100644
--- a/backends/arm/tosa/dialect/BUCK
+++ b/backends/arm/tosa/dialect/BUCK
@@ -22,6 +22,7 @@ fbcode_target(_kind = runtime.python_library,
     deps = [
         ":core",
         "//caffe2:torch",
+        "//executorch/backends/arm/tosa:resize_utils",
         "//executorch/backends/arm/tosa:tosa",
     ],
 )
diff --git a/backends/arm/tosa/dialect/ops/resize.py b/backends/arm/tosa/dialect/ops/resize.py
index c48ff508afc..8a2d4c5e60a 100644
--- a/backends/arm/tosa/dialect/ops/resize.py
+++ b/backends/arm/tosa/dialect/ops/resize.py
@@ -8,6 +8,10 @@
 import torch
 from executorch.backends.arm.tosa.dialect.lib import TosaValueError
 from executorch.backends.arm.tosa.dialect.ops_registration import register_fake_tosa_op
+from executorch.backends.arm.tosa.resize_utils import (
+    calculate_tosa_resize_output_hw,
+    get_tosa_resize_validation_error,
+)
 
 from executorch.backends.arm.tosa.specification import (
     get_context_spec,
@@ -50,23 +54,17 @@ def _get_output_dtype(
     return output_dtype
 
 
-def _validate_resize_parameters(scale, border, resize_mode):
-    def in_int16_range(values):
-        return all(
-            (x >= -(2**15)) and (x <= 2**15 - 1) for x in values if isinstance(x, int)
-        )
-
-    if not in_int16_range(scale):
-        raise TosaValueError("scale is out of the int16 range", op="RESIZE")
-    if not in_int16_range(border):
-        raise TosaValueError("border is out of the int16 range", op="RESIZE")
-    if resize_mode == "bilinear":
-        scale_y_n, scale_y_d, scale_x_n, scale_x_d = scale
-        if scale_y_d >= 16 * scale_y_n or scale_x_d >= 16 * scale_x_n:
-            raise TosaValueError(
-                "Bilinear RESIZE downscale must be strictly greater than 1/16",
-                op="RESIZE",
-            )
+def _validate_resize_parameters(input_hw, output_hw, scale, offset, border, tosa_spec):
+    validation_error = get_tosa_resize_validation_error(
+        input_hw=input_hw,
+        output_hw=output_hw,
+        scale=scale,
+        offset=offset,
+        border=border,
+        tosa_spec=tosa_spec,
+    )
+    if validation_error is not None:
+        raise TosaValueError(validation_error, op="RESIZE")
 
 
 @register_fake_tosa_op(
@@ -88,24 +86,26 @@ def RESIZE(
             f"Input tensor must be 4D, but got {x.dim()}D", op="RESIZE"
         )
     _validate_resize_mode(resize_mode)
-    _validate_resize_parameters(scale, border, resize_mode)
     output_dtype = _get_output_dtype(x.dtype, tosa_spec, resize_mode)
 
     input_shape = x.shape
-    scale_y_n, scale_y_d, scale_x_n, scale_x_d = scale
-    offset_y, offset_x = offset
-    border_y, border_x = border
     H, W = input_shape[1], input_shape[2]
-    # RESIZE first upscales the input by an integer value, to "upscale space".
-    H_upscaled = (H - 1) * scale_y_n
-    # offset and border are provided in this scale, therefore adjust for these while in this space.
-    H_shifted = H_upscaled - offset_y + border_y
-    # Then, complete the RESIZE by downscaling with another integer value, approximating multplication with a fraction.
-    OH = (H_shifted // scale_y_d) + 1
-    # Mirror the same computation horizontally for the output width.
-    W_upscaled = (W - 1) * scale_x_n
-    W_shifted = W_upscaled - offset_x + border_x
-    OW = (W_shifted // scale_x_d) + 1
+    _validate_resize_parameters((H, W), None, scale, offset, border, tosa_spec)
+    output_hw = calculate_tosa_resize_output_hw((H, W), scale, offset, border)
+    _validate_resize_parameters((H, W), output_hw, scale, offset, border, tosa_spec)
+    if output_hw is None:
+        scale_y_n, scale_y_d, scale_x_n, scale_x_d = scale
+        offset_y, offset_x = offset
+        border_y, border_x = border
+        # RESIZE first upscales the input by an integer value to "upscale
+        # space". Offset and border are encoded in that space, then RESIZE
+        # completes by downscaling with another integer value, approximating
+        # multiplication by a fraction.
+        OH = ((H - 1) * scale_y_n - offset_y + border_y) // scale_y_d + 1
+        OW = ((W - 1) * scale_x_n - offset_x + border_x) // scale_x_d + 1
+    else:
+        OH, OW = output_hw
+
     fake_aten_tensor = torch.empty(
         size=(input_shape[0], OH, OW, input_shape[3]), dtype=output_dtype
     )
diff --git a/backends/arm/tosa/resize_utils.py b/backends/arm/tosa/resize_utils.py
new file mode 100644
index 00000000000..6c716bfa59c
--- /dev/null
+++ b/backends/arm/tosa/resize_utils.py
@@ -0,0 +1,259 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Sequence
+
+import torch
+
+from executorch.backends.arm.tosa.specification import TosaSpecification
+
+_MAX_RESIZE_DIMENSION = 16384
+_MAX_RESIZE_SCALE_NUMERATOR = 1 << 11
+_MAX_SCALE = 2048
+_MAX_SCALE_LEVEL_8K = 256
+_INT16_MIN = -(2**15)
+_INT16_MAX = 2**15 - 1
+
+
+def _as_concrete_ints(values: Sequence[int | torch.SymInt]) -> list[int] | None:
+    if all(isinstance(value, int) for value in values):
+        return [int(value) for value in values]
+    return None
+
+
+def _concrete_int_values(values: Sequence[int | torch.SymInt]) -> list[int]:
+    return [int(value) for value in values if isinstance(value, int)]
+
+
+def _first_outside_range(
+    values: Sequence[int], min_value: int, max_value: int
+) -> int | None:
+    return next(
+        (value for value in values if value < min_value or value > max_value), None
+    )
+
+
+def _max_scale(tosa_spec: TosaSpecification) -> int:
+    return _MAX_SCALE_LEVEL_8K if getattr(tosa_spec, "level_8k", False) else _MAX_SCALE
+
+
+def _validate_dimensions(
+    input_hw: Sequence[int | torch.SymInt],
+    output_hw: Sequence[int | torch.SymInt] | None,
+) -> str | None:
+    concrete_dimensions: list[int] = []
+    input_hw_ints = _as_concrete_ints(input_hw)
+    output_hw_ints = _as_concrete_ints(output_hw) if output_hw is not None else None
+    if input_hw_ints is not None:
+        concrete_dimensions.extend(input_hw_ints)
+    if output_hw_ints is not None:
+        concrete_dimensions.extend(output_hw_ints)
+
+    invalid_dimension = next(
+        (
+            dimension
+            for dimension in concrete_dimensions
+            if dimension >= _MAX_RESIZE_DIMENSION
+        ),
+        None,
+    )
+    if invalid_dimension is not None:
+        return (
+            "RESIZE dimensions must be less than "
+            f"{_MAX_RESIZE_DIMENSION}; got {invalid_dimension}"
+        )
+    return None
+
+
+def _validate_scale(
+    scale: Sequence[int | torch.SymInt],
+    tosa_spec: TosaSpecification,
+) -> str | None:
+    invalid_scale = _first_outside_range(
+        _concrete_int_values(scale), _INT16_MIN, _INT16_MAX
+    )
+    if invalid_scale is not None:
+        return (
+            "RESIZE scale must be in int16 range "
+            f"[{_INT16_MIN}, {_INT16_MAX}]; got {invalid_scale}"
+        )
+
+    scale_ints = _as_concrete_ints(scale)
+    if scale_ints is None:
+        return None
+
+    scale_y_n, scale_y_d, scale_x_n, scale_x_d = scale_ints
+    if min(scale_y_n, scale_y_d, scale_x_n, scale_x_d) <= 0:
+        return f"RESIZE scale values must be positive; got {scale_ints}"
+
+    max_scale = _max_scale(tosa_spec)
+    if scale_y_n > max_scale * scale_y_d or scale_x_n > max_scale * scale_x_d:
+        return (
+            f"RESIZE scale ratio must be <= MAX_SCALE ({max_scale}); "
+            f"got y={scale_y_n}/{scale_y_d}, x={scale_x_n}/{scale_x_d}"
+        )
+
+    if (
+        scale_y_n > _MAX_RESIZE_SCALE_NUMERATOR
+        or scale_x_n > _MAX_RESIZE_SCALE_NUMERATOR
+    ):
+        return (
+            "RESIZE scale numerator must be <= "
+            f"{_MAX_RESIZE_SCALE_NUMERATOR}; got y={scale_y_n}, x={scale_x_n}"
+        )
+
+    # The scale values are already in the doubled rational representation that
+    # TOSA RESIZE lowering emits, so the lower-bound downscale rule can be
+    # checked directly against them.
+    if scale_y_d >= 16 * scale_y_n or scale_x_d >= 16 * scale_x_n:
+        return (
+            "RESIZE downscale must be strictly greater than 1/16; "
+            f"got y={scale_y_n}/{scale_y_d}, x={scale_x_n}/{scale_x_d}"
+        )
+    return None
+
+
+def _validate_offset(
+    offset: Sequence[int | torch.SymInt],
+    scale_ints: list[int],
+) -> str | None:
+    offset_ints = _as_concrete_ints(offset)
+    if offset_ints is None:
+        return None
+
+    scale_y_n, _, scale_x_n, _ = scale_ints
+    offset_y, offset_x = offset_ints
+    if offset_y < -scale_y_n or offset_y >= 16 * scale_y_n:
+        return (
+            f"RESIZE offset_y must be in [{-scale_y_n}, {16 * scale_y_n}); "
+            f"got {offset_y}"
+        )
+    if offset_x < -scale_x_n or offset_x >= 16 * scale_x_n:
+        return (
+            f"RESIZE offset_x must be in [{-scale_x_n}, {16 * scale_x_n}); "
+            f"got {offset_x}"
+        )
+    return None
+
+
+def _validate_border(
+    border: Sequence[int | torch.SymInt],
+    scale_ints: list[int],
+) -> str | None:
+    invalid_border = _first_outside_range(
+        _concrete_int_values(border), _INT16_MIN, _INT16_MAX
+    )
+    if invalid_border is not None:
+        return (
+            "RESIZE border must be in int16 range "
+            f"[{_INT16_MIN}, {_INT16_MAX}]; got {invalid_border}"
+        )
+
+    border_ints = _as_concrete_ints(border)
+    if border_ints is None:
+        return None
+
+    scale_y_n, _, scale_x_n, _ = scale_ints
+    border_y, border_x = border_ints
+    if border_y < -16 * scale_y_n or border_y >= scale_y_n:
+        return (
+            f"RESIZE border_y must be in [{-16 * scale_y_n}, {scale_y_n}); "
+            f"got {border_y}"
+        )
+    if border_x < -16 * scale_x_n or border_x >= scale_x_n:
+        return (
+            f"RESIZE border_x must be in [{-16 * scale_x_n}, {scale_x_n}); "
+            f"got {border_x}"
+        )
+    return None
+
+
+def _validate_output_shape(
+    input_hw: Sequence[int | torch.SymInt],
+    output_hw: Sequence[int | torch.SymInt] | None,
+    scale: Sequence[int | torch.SymInt],
+    offset: Sequence[int | torch.SymInt],
+    border: Sequence[int | torch.SymInt],
+) -> str | None:
+    if output_hw is None:
+        return None
+
+    output_hw_ints = _as_concrete_ints(output_hw)
+    expected_output_hw = calculate_tosa_resize_output_hw(
+        input_hw, scale, offset, border
+    )
+    if (
+        output_hw_ints is not None
+        and expected_output_hw is not None
+        and tuple(output_hw_ints) != expected_output_hw
+    ):
+        return (
+            "RESIZE output shape is inconsistent with input and parameters; "
+            f"expected {expected_output_hw}, got {tuple(output_hw_ints)}"
+        )
+    return None
+
+
+def calculate_tosa_resize_output_hw(
+    input_hw: Sequence[int | torch.SymInt],
+    scale: Sequence[int | torch.SymInt],
+    offset: Sequence[int | torch.SymInt],
+    border: Sequence[int | torch.SymInt],
+) -> tuple[int, int] | None:
+    input_hw_ints = _as_concrete_ints(input_hw)
+    scale_ints = _as_concrete_ints(scale)
+    offset_ints = _as_concrete_ints(offset)
+    border_ints = _as_concrete_ints(border)
+    if (
+        input_hw_ints is None
+        or scale_ints is None
+        or offset_ints is None
+        or border_ints is None
+    ):
+        return None
+
+    input_h, input_w = input_hw_ints
+    scale_y_n, scale_y_d, scale_x_n, scale_x_d = scale_ints
+    offset_y, offset_x = offset_ints
+    border_y, border_x = border_ints
+
+    # RESIZE first upscales the input by an integer value to "upscale space".
+    # Offset and border are encoded in that space, then RESIZE completes by
+    # downscaling with another integer value, approximating multiplication by a
+    # fraction.
+    return (
+        ((input_h - 1) * scale_y_n - offset_y + border_y) // scale_y_d + 1,
+        ((input_w - 1) * scale_x_n - offset_x + border_x) // scale_x_d + 1,
+    )
+
+
+def get_tosa_resize_validation_error(
+    *,
+    input_hw: Sequence[int | torch.SymInt],
+    output_hw: Sequence[int | torch.SymInt] | None,
+    scale: Sequence[int | torch.SymInt],
+    offset: Sequence[int | torch.SymInt],
+    border: Sequence[int | torch.SymInt],
+    tosa_spec: TosaSpecification,
+) -> str | None:
+    scale_ints = _as_concrete_ints(scale)
+
+    validation_error = _validate_dimensions(input_hw, output_hw)
+    if validation_error is not None:
+        return validation_error
+    validation_error = _validate_scale(scale, tosa_spec)
+    if validation_error is not None:
+        return validation_error
+    if scale_ints is None:
+        return None
+
+    for validation_error in (
+        _validate_offset(offset, scale_ints),
+        _validate_border(border, scale_ints),
+        _validate_output_shape(input_hw, output_hw, scale, offset, border),
+    ):
+        if validation_error is not None:
+            return validation_error
+    return None

From 29c3a232ca7f1db4140b1ae653f88750ea13e704 Mon Sep 17 00:00:00 2001
From: Sicheng Stephen Jia <ssjia@meta.com>
Date: Tue, 26 May 2026 17:53:22 -0400
Subject: [PATCH 026/103] Fix cortex_m test failures from D106339880

Differential Revision: D106408368

Pull Request resolved: https://github.com/pytorch/executorch/pull/19783
---
 backends/cortex_m/passes/BUCK                        | 1 +
 backends/cortex_m/passes/convert_to_cortex_m_pass.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/backends/cortex_m/passes/BUCK b/backends/cortex_m/passes/BUCK
index 4e49c8cd319..f1b7b9a201d 100644
--- a/backends/cortex_m/passes/BUCK
+++ b/backends/cortex_m/passes/BUCK
@@ -36,6 +36,7 @@ fbcode_target(_kind = runtime.python_library,
         "decompose_hardswish_pass.py",
         "decompose_mean_pass.py",
         "quantized_clamp_activation_pass.py",
+        "scratch_buffer_sizes.py",
     ],
     deps=[
         "//caffe2:torch",
diff --git a/backends/cortex_m/passes/convert_to_cortex_m_pass.py b/backends/cortex_m/passes/convert_to_cortex_m_pass.py
index e61ddaf63bc..5704645caf8 100644
--- a/backends/cortex_m/passes/convert_to_cortex_m_pass.py
+++ b/backends/cortex_m/passes/convert_to_cortex_m_pass.py
@@ -12,7 +12,7 @@
 import torch.fx
 from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
 
-from executorch.backends.cortex_m.passes import CortexMPass
+from executorch.backends.cortex_m.passes.cortex_m_pass import CortexMPass
 from executorch.backends.cortex_m.passes.passes_utils import quantize_multiplier_aot
 from executorch.backends.cortex_m.passes.scratch_buffer_sizes import (
     required_cmsis_nn_buffer_sizes,

From ae4fdb5fda63dc7ef8f5a34e55b2d8233ba8a941 Mon Sep 17 00:00:00 2001
From: Gregory Comer <gjcomer@meta.com>
Date: Tue, 26 May 2026 16:19:58 -0700
Subject: [PATCH 027/103] Set test seed per-test (#19744)

### Summary
In https://github.com/pytorch/executorch/pull/19651, I added a global
seed for pytest runs. This was intended to reduce random tolerance
flakes, but didn't actually do so in practice. This is because the
parallel test runners don't guarantee any ordering, so random state is
unstable between runs.

I've updated it to set the seed per-test. This should hopefully make the
random state invariant of test execution order.
---
 backends/cadence/aot/tests/test_replace_ops_passes.py | 2 ++
 conftest.py                                           | 8 ++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/backends/cadence/aot/tests/test_replace_ops_passes.py b/backends/cadence/aot/tests/test_replace_ops_passes.py
index 170da6deb09..a73ef02c996 100644
--- a/backends/cadence/aot/tests/test_replace_ops_passes.py
+++ b/backends/cadence/aot/tests/test_replace_ops_passes.py
@@ -1250,6 +1250,7 @@ def test_replace_conv1d_with_linear(self) -> None:
             inputs,
             "ReplaceTrivialConvWithLinear",
             rtol=2e-5,
+            atol=5e-6,
         )
 
         # Assert that conv1d is trivially converted to linear
@@ -1294,6 +1295,7 @@ def test_replace_conv2d_with_linear(self) -> None:
             inputs,
             "ReplaceTrivialConvWithLinear",
             rtol=2e-5,
+            atol=5e-6,
         )
 
         # Assert that conv2d is trivially converted to linear
diff --git a/conftest.py b/conftest.py
index 19d777a74e0..be0e6e4ea3d 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,3 +1,4 @@
+import hashlib
 import sys
 
 import torch
@@ -13,5 +14,8 @@
         "backends/apple/**",
     ]
 
-# Seed the run
-torch.manual_seed(42)
+
+def pytest_runtest_setup(item):
+    # Set a stable seed for each test based on a hash of the test name.
+    seed = int(hashlib.sha256(item.nodeid.encode()).hexdigest(), 16) % (2**32)
+    torch.manual_seed(seed)

From b4d62edb4b1f941e84d9a3d675e2a082bd09c2a6 Mon Sep 17 00:00:00 2001
From: Hansong Zhang <107070759+kirklandsign@users.noreply.github.com>
Date: Tue, 26 May 2026 16:24:48 -0700
Subject: [PATCH 028/103] Collapse Experimental.kt annotation onto a single
 line to satisfy linter

Differential Revision: D106430647

Pull Request resolved: https://github.com/pytorch/executorch/pull/19790
---
 .../java/org/pytorch/executorch/annotations/Experimental.kt    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/Experimental.kt b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/Experimental.kt
index 1a38bb13b99..42a5980d6ba 100644
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/Experimental.kt
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/Experimental.kt
@@ -15,5 +15,4 @@ package org.pytorch.executorch.annotations
  * This status is not permanent, and APIs marked with this annotation will need to be either made
  * more robust or removed in the future.
  */
-@Retention(AnnotationRetention.BINARY)
-annotation class Experimental
+@Retention(AnnotationRetention.BINARY) annotation class Experimental

From 034b044382d95894eab62f1a258fc2fec6f3a34a Mon Sep 17 00:00:00 2001
From: Ethan Ng <ethann@meta.com>
Date: Tue, 26 May 2026 17:15:16 -0700
Subject: [PATCH 029/103] Handle out_dtype in
 ReplacePT2DequantWithCadenceDequantPass (#19743)

Differential Revision: D105630451

Pull Request resolved: https://github.com/pytorch/executorch/pull/19743
---
 backends/cadence/aot/replace_ops.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/backends/cadence/aot/replace_ops.py b/backends/cadence/aot/replace_ops.py
index 4b60feb2121..50112a4eb66 100644
--- a/backends/cadence/aot/replace_ops.py
+++ b/backends/cadence/aot/replace_ops.py
@@ -162,14 +162,31 @@ def targets(self) -> list[EdgeOpOverload]:
 
     def maybe_remove_or_replace(self, node: torch.fx.Node) -> bool:
         ns = exir_ops.edge if isinstance(node.target, EdgeOpOverload) else torch.ops
+        out_dtype = node.kwargs.get("out_dtype")
+        kwargs = {k: v for k, v in node.kwargs.items() if k != "out_dtype"}
         with node.graph.inserting_before(node):
             new_node = node.graph.call_function(
                 ns.cadence.dequantize_per_tensor.default,
                 args=node.args,
-                kwargs=node.kwargs,
+                kwargs=kwargs,
             )
-            new_node.meta = node.meta
-        node.replace_all_uses_with(new_node)
+            new_node.meta = node.meta.copy()
+            if (
+                out_dtype is not None
+                and out_dtype != torch.float32
+                and "val" in new_node.meta
+            ):
+                new_node.meta["val"] = new_node.meta["val"].to(torch.float32)
+        if out_dtype is not None and out_dtype != torch.float32:
+            with node.graph.inserting_after(new_node):
+                cast_node = node.graph.call_function(
+                    ns.aten.to.dtype,
+                    args=(new_node, out_dtype),
+                )
+                cast_node.meta = node.meta.copy()
+            node.replace_all_uses_with(cast_node)
+        else:
+            node.replace_all_uses_with(new_node)
         return True
 
 
From 79fe3a30148d4cebbff9a2f89254469787e74256 Mon Sep 17 00:00:00 2001
From: Daisuke Majima <rockyshikoku@gmail.com>
Date: Wed, 27 May 2026 09:25:09 +0900
Subject: [PATCH 030/103] Add coreml_compute_plan.py: report which CoreML ops
 dispatch to ANE / GPU / CPU (#19252)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Summary

CoreML decides at compile/load time which device each MIL operation will
execute on, and coremltools 9.0+ exposes that through `MLComputePlan`.
The recurring question on the issue tracker is *"why isn't my model
running fully on the ANE?"* — for example:

- #4091 — `llama model is not fully lowered to ANE`
- #11541 — `CoreML model is crashing on iPhone GPU, but not on iPhone
CPU or macOS GPU`
- #8439 — `ANE compile OOMs on certain input shapes`
- #8445 — `CPU Overhead After ANE Execution`

Today the only way for an ExecuTorch user to answer it is to break out
Swift / Xcode.  This PR adds a Python wrapper around `MLComputePlan` so
the answer is one shell command:

```
$ python coreml_compute_plan.py --model_path my_model.mlpackage \
      --compute_units cpu_and_ne --show_non_ane

=== my_model.mlpackage ===
  ANE:   412 / 480 ( 85.8%)
  CPU:    68 / 480 ( 14.2%)

  Non-ANE op types:
       32  ios17.cast
       18  ios17.gather
       12  ios17.reshape
        6  ios17.constexpr_blockwise_shift_scale
```

Inputs supported:

| Input | Behavior |
|---|---|
| `.pte` | Extract every Core ML partition into a tempdir, then analyze
each. |
| `.mlpackage` | Compile to `.mlmodelc` in a tempdir, then analyze. |
| `.mlmodelc` | Analyze directly. |

The PTE path reuses the same JSON/named-data extraction logic that
`extract_coreml_models.py` uses, and is inlined into the script so it
can
be run against a plain CoreML model without depending on the executorch
package.

### Test plan

Added `test_coreml_compute_plan.py` covering:

- `_device_name(...)` for `None` and a stub
`MLNeuralEngineComputeDevice`.
- `_COMPUTE_UNIT_CHOICES` mapping (`cpu_and_ne` / `all`).
- `analyze_one(...)` end-to-end on a tiny `relu(x @ x.T) + x.sum()`
  mlpackage built with `coremltools.convert(...)`: returns rows for
  every dispatched op, with a `main` function and the expected MIL op
  types (`matmul`, `relu`, `add`, `reduce_sum`).

```
$ python -m pytest examples/apple/coreml/scripts/test_coreml_compute_plan.py -v
============================== 7 passed in 3.68s ===============================
```

I also ran the script against a few hand-built `.mlpackage` and
`.mlmodelc` files on macOS 26 with coremltools 9.0 and verified the
output matches what `MLComputePlan` returns directly.

Authored with Claude.

cc @kimishpatel @YifanShenSZ @cymbalrush @metascroy
---
 examples/apple/coreml/scripts/BUCK            |  13 +
 .../coreml/scripts/coreml_compute_plan.py     | 236 ++++++++++++++++++
 .../coreml/scripts/extract_coreml_models.py   |  15 +-
 .../scripts/test_coreml_compute_plan.py       | 161 ++++++++++++
 4 files changed, 422 insertions(+), 3 deletions(-)
 create mode 100644 examples/apple/coreml/scripts/coreml_compute_plan.py
 create mode 100644 examples/apple/coreml/scripts/test_coreml_compute_plan.py

diff --git a/examples/apple/coreml/scripts/BUCK b/examples/apple/coreml/scripts/BUCK
index 164feb8d306..42a97ea893f 100644
--- a/examples/apple/coreml/scripts/BUCK
+++ b/examples/apple/coreml/scripts/BUCK
@@ -16,6 +16,19 @@ fbcode_target(_kind = python_binary,
     ],
 )
 
+fbcode_target(_kind = python_binary,
+    name = "coreml_compute_plan",
+    srcs = [
+        "coreml_compute_plan.py",
+    ],
+    main_function = "executorch.examples.apple.coreml.scripts.coreml_compute_plan.main",
+    deps = [
+        "//executorch/backends/apple/coreml:executorchcoreml",
+        "//executorch/exir:schema",
+        "//executorch/exir/_serialize:lib",
+    ],
+)
+
 fbcode_target(_kind = python_binary,
     name = "export",
     srcs = [
diff --git a/examples/apple/coreml/scripts/coreml_compute_plan.py b/examples/apple/coreml/scripts/coreml_compute_plan.py
new file mode 100644
index 00000000000..c0ca08db831
--- /dev/null
+++ b/examples/apple/coreml/scripts/coreml_compute_plan.py
@@ -0,0 +1,236 @@
+# Copyright © 2026 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+"""Report which CoreML operations would dispatch to ANE / GPU / CPU.
+
+The CoreML runtime decides at compile/load time which compute device each
+MIL operation will run on; that decision is exposed by ``MLComputePlan``
+in coremltools 9.0+.  This script wraps that API so users can answer
+"why isn't my model running on the ANE?" without writing Swift.
+
+Usage::
+
+    # Analyze a CoreML model directly (mlpackage or compiled mlmodelc).
+    python coreml_compute_plan.py --model_path path/to/model.mlpackage
+
+    # Analyze every Core ML partition embedded in an ExecuTorch .pte.
+    python coreml_compute_plan.py --model_path path/to/program.pte
+
+    # Show ops that fell off the ANE, grouped by op type.
+    python coreml_compute_plan.py --model_path model.mlpackage --show_non_ane
+
+    # Pick which devices the runtime is allowed to consider.
+    python coreml_compute_plan.py --model_path model.mlpackage \\
+        --compute_units cpu_and_ne
+"""
+
+import argparse
+import os
+import sys
+import tempfile
+from collections import Counter
+from typing import Iterable, List, Tuple
+
+import coremltools as ct
+from coremltools.models.compute_device import (
+    MLCPUComputeDevice,
+    MLGPUComputeDevice,
+    MLNeuralEngineComputeDevice,
+)
+from coremltools.models.compute_plan import MLComputePlan
+
+from executorch.examples.apple.coreml.scripts.extract_coreml_models import (
+    extract_coreml_models,
+)
+
+
+_DEVICE_NAMES: List[Tuple[type, str]] = [
+    (MLNeuralEngineComputeDevice, "ANE"),
+    (MLGPUComputeDevice, "GPU"),
+    (MLCPUComputeDevice, "CPU"),
+]
+
+_COMPUTE_UNIT_CHOICES = {
+    "all": ct.ComputeUnit.ALL,
+    "cpu_and_ne": ct.ComputeUnit.CPU_AND_NE,
+    "cpu_and_gpu": ct.ComputeUnit.CPU_AND_GPU,
+    "cpu_only": ct.ComputeUnit.CPU_ONLY,
+}
+
+
+def _device_name(device) -> str:
+    if device is None:
+        return "unknown"
+    for cls, name in _DEVICE_NAMES:
+        if isinstance(device, cls):
+            return name
+    return type(device).__name__
+
+
+def _iter_operations(block) -> Iterable:
+    for op in block.operations:
+        yield op
+        for nested in getattr(op, "blocks", None) or []:
+            yield from _iter_operations(nested)
+
+
+def _ensure_compiled(model_path: str, tmpdir: str) -> str:
+    """Return a `.mlmodelc` path; compile from `.mlpackage` if needed."""
+    if model_path.endswith(".mlmodelc"):
+        return model_path
+    if model_path.endswith(".mlpackage"):
+        dest = os.path.join(
+            tmpdir, os.path.basename(model_path).replace(".mlpackage", ".mlmodelc")
+        )
+        return str(ct.models.utils.compile_model(model_path, destination_path=dest))
+    raise ValueError(f"Expected a .mlpackage or .mlmodelc path, got: {model_path}")
+
+
+def analyze_one(
+    model_path: str, compute_units: ct.ComputeUnit
+) -> List[Tuple[str, str, str]]:
+    """Return [(function, operator_name, device)] for every op that has a plan.
+
+    coremltools 9.0's ``MLComputePlan.load_from_path`` only exposes usage for
+    the default function of a multifunction package, so a multifunction
+    .mlpackage is analyzed function-by-function by projecting each function
+    as the ``main`` of a temp single-function copy.
+    """
+    function_names = _mlpackage_function_names(model_path)
+    if len(function_names) <= 1:
+        return _analyze_compiled(model_path, compute_units)
+    rows: List[Tuple[str, str, str]] = []
+    with tempfile.TemporaryDirectory() as tmpdir:
+        for fname in function_names:
+            projected = _project_to_single(model_path, fname, tmpdir)
+            for _, op_name, device in _analyze_compiled(projected, compute_units):
+                rows.append((fname, op_name, device))
+    return rows
+
+
+def _analyze_compiled(
+    model_path: str, compute_units: ct.ComputeUnit
+) -> List[Tuple[str, str, str]]:
+    with tempfile.TemporaryDirectory() as tmpdir:
+        compiled = _ensure_compiled(model_path, tmpdir)
+        plan = MLComputePlan.load_from_path(compiled, compute_units=compute_units)
+        program = plan.model_structure.program
+        if program is None:
+            raise RuntimeError(
+                f"{model_path} is not an MLProgram model; this tool only supports "
+                "the MLProgram backend (the CoreML backend executorch produces today)."
+            )
+
+        rows: List[Tuple[str, str, str]] = []
+        for fname, fn in program.functions.items():
+            for op in _iter_operations(fn.block):
+                usage = plan.get_compute_device_usage_for_mlprogram_operation(op)
+                if usage is None:
+                    # Constants and similar non-dispatched ops don't have a plan.
+                    continue
+                rows.append(
+                    (
+                        fname,
+                        op.operator_name,
+                        _device_name(usage.preferred_compute_device),
+                    )
+                )
+        return rows
+
+
+def _mlpackage_function_names(model_path: str) -> List[str]:
+    """Names of the MLProgram functions inside an .mlpackage, or [] otherwise."""
+    if not model_path.endswith(".mlpackage"):
+        return []
+    spec = ct.models.MLModel(model_path, skip_model_load=True).get_spec()
+    if spec.WhichOneof("Type") != "mlProgram":
+        return []
+    return list(spec.mlProgram.functions.keys())
+
+
+def _project_to_single(src_mlpackage: str, function_name: str, tmpdir: str) -> str:
+    """Re-save ``src_mlpackage`` with only ``function_name`` exposed as ``main``."""
+    from coremltools.models.utils import MultiFunctionDescriptor, save_multifunction
+
+    dest = os.path.join(tmpdir, f"{function_name}.mlpackage")
+    desc = MultiFunctionDescriptor()
+    desc.add_function(
+        src_mlpackage,
+        src_function_name=function_name,
+        target_function_name="main",
+    )
+    desc.default_function_name = "main"
+    save_multifunction(desc, dest)
+    return dest
+
+
+def _print_report(
+    label: str, rows: List[Tuple[str, str, str]], show_non_ane: bool
+) -> None:
+    print(f"\n=== {label} ===")
+    if not rows:
+        print("  (no dispatched operations found)")
+        return
+    by_device = Counter(device for _, _, device in rows)
+    total = sum(by_device.values())
+    for device in ("ANE", "GPU", "CPU", "unknown"):
+        count = by_device.get(device, 0)
+        if count == 0:
+            continue
+        pct = 100.0 * count / total
+        print(f"  {device}: {count:5d} / {total} ({pct:5.1f}%)")
+
+    if show_non_ane:
+        non_ane = [(fn, op_name) for fn, op_name, dev in rows if dev != "ANE"]
+        if non_ane:
+            print("\n  Non-ANE op types:")
+            for op_name, count in Counter(op for _, op in non_ane).most_common():
+                print(f"    {count:5d}  {op_name}")
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
+    parser.add_argument(
+        "--model_path",
+        required=True,
+        help="Path to a .pte, .mlpackage, or .mlmodelc.",
+    )
+    parser.add_argument(
+        "--compute_units",
+        default="cpu_and_ne",
+        choices=sorted(_COMPUTE_UNIT_CHOICES),
+        help="Which devices the runtime may use when planning dispatch.",
+    )
+    parser.add_argument(
+        "--show_non_ane",
+        action="store_true",
+        help="List op types that did not get assigned to the ANE.",
+    )
+    args = parser.parse_args()
+
+    compute_units = _COMPUTE_UNIT_CHOICES[args.compute_units]
+    model_path = args.model_path
+
+    if model_path.endswith(".pte"):
+        with open(model_path, "rb") as f:
+            pte_data = f.read()
+        with tempfile.TemporaryDirectory() as out_dir:
+            extracted = extract_coreml_models(pte_data, out_dir=out_dir)
+            if not extracted:
+                print(
+                    f"{model_path} does not contain any CoreML delegate partitions.",
+                    file=sys.stderr,
+                )
+                return 1
+            for path in extracted:
+                rows = analyze_one(str(path), compute_units)
+                _print_report(path.name, rows, args.show_non_ane)
+    else:
+        rows = analyze_one(model_path, compute_units)
+        _print_report(os.path.basename(model_path.rstrip("/")), rows, args.show_non_ane)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/examples/apple/coreml/scripts/extract_coreml_models.py b/examples/apple/coreml/scripts/extract_coreml_models.py
index 685b6b594f3..8956550eb4d 100644
--- a/examples/apple/coreml/scripts/extract_coreml_models.py
+++ b/examples/apple/coreml/scripts/extract_coreml_models.py
@@ -9,7 +9,7 @@
 import shutil
 from pathlib import Path
 
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 
 from executorch.backends.apple.coreml import executorchcoreml
 from executorch.exir._serialize._program import deserialize_pte_binary
@@ -22,7 +22,12 @@
 COREML_BACKEND_ID = "CoreMLBackend"
 
 
-def extract_coreml_models(pte_data: bytes):
+def extract_coreml_models(
+    pte_data: bytes,
+    out_dir: Optional[Union[str, Path]] = None,
+) -> List[Path]:
+    out_root = Path(out_dir) if out_dir is not None else Path("extracted_coreml_models")
+
     pte_file = deserialize_pte_binary(pte_data)
     program = pte_file.program
 
@@ -44,6 +49,7 @@ def extract_coreml_models(pte_data: bytes):
     ]
 
     # Track extracted models to avoid duplicates (multifunction models share partitions)
+    extracted_paths: List[Path] = []
     extracted_keys: set = set()
     model_index: int = 1
 
@@ -95,7 +101,7 @@ def extract_coreml_models(pte_data: bytes):
         if model_name is None:
             model_name = f"model_{model_index}"
 
-        model_path: Path = Path() / "extracted_coreml_models" / model_name
+        model_path: Path = out_root / model_name
         if model_path.exists():
             shutil.rmtree(model_path.absolute())
         os.makedirs(model_path.absolute())
@@ -104,11 +110,14 @@ def extract_coreml_models(pte_data: bytes):
             coreml_processed_bytes, str(model_path.absolute())
         ):
             print(f"Core ML models are extracted and saved to path = {model_path}")
+            extracted_paths.append(model_path)
         model_index += 1
 
     if len(coreml_delegates) == 0:
         print("The model isn't delegated to Core ML.")
 
+    return extracted_paths
+
 
 def main() -> None:
     """
diff --git a/examples/apple/coreml/scripts/test_coreml_compute_plan.py b/examples/apple/coreml/scripts/test_coreml_compute_plan.py
new file mode 100644
index 00000000000..83f06b7a2a8
--- /dev/null
+++ b/examples/apple/coreml/scripts/test_coreml_compute_plan.py
@@ -0,0 +1,161 @@
+# Copyright © 2026 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+"""Tests for coreml_compute_plan.py."""
+
+import os
+import shutil
+import tempfile
+import unittest
+from collections import Counter
+
+import coremltools as ct
+import torch
+from coremltools.models.utils import MultiFunctionDescriptor, save_multifunction
+
+from executorch.examples.apple.coreml.scripts.coreml_compute_plan import (
+    _COMPUTE_UNIT_CHOICES,
+    _device_name,
+    analyze_one,
+)
+
+
+class _Op:
+    def __init__(self, operator_name: str, blocks=None):
+        self.operator_name = operator_name
+        self.blocks = blocks or []
+
+
+class _Block:
+    __slots__ = ("operations",)
+
+    def __init__(self, ops):
+        self.operations = ops
+
+
+def _build_small_mlpackage(out_dir: str) -> str:
+    class M(torch.nn.Module):
+        def forward(self, x):
+            return torch.nn.functional.relu(x @ x.T) + x.sum()
+
+    model = M().eval()
+    ep = torch.export.export(model, (torch.randn(8, 8),), strict=True)
+    ep = ep.run_decompositions({})
+    mlmodel = ct.convert(
+        ep,
+        source="pytorch",
+        convert_to="mlprogram",
+        minimum_deployment_target=ct.target.iOS17,
+        skip_model_load=True,
+    )
+    out = os.path.join(out_dir, "tiny.mlpackage")
+    mlmodel.save(out)
+    return out
+
+
+class TestDeviceName(unittest.TestCase):
+    def test_none_device(self):
+        self.assertEqual(_device_name(None), "unknown")
+
+    def test_known_device_classes(self):
+        from coremltools.models.compute_device import MLNeuralEngineComputeDevice
+
+        # Don't construct the device classes directly (they wrap proxies that
+        # may be unavailable in some envs); just confirm the type-mapping path
+        # returns sensible names by mocking the isinstance check with a fake.
+        class FakeNE(MLNeuralEngineComputeDevice):
+            def __init__(self):
+                pass
+
+        self.assertEqual(_device_name(FakeNE()), "ANE")
+
+
+class TestComputeUnitChoices(unittest.TestCase):
+    def test_includes_cpu_and_ne(self):
+        self.assertEqual(_COMPUTE_UNIT_CHOICES["cpu_and_ne"], ct.ComputeUnit.CPU_AND_NE)
+
+    def test_includes_all(self):
+        self.assertEqual(_COMPUTE_UNIT_CHOICES["all"], ct.ComputeUnit.ALL)
+
+
+class TestAnalyzeOne(unittest.TestCase):
+    """End-to-end: build a tiny mlpackage and analyze it."""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.tmpdir = tempfile.mkdtemp()
+        cls.mlpackage = _build_small_mlpackage(cls.tmpdir)
+
+    @classmethod
+    def tearDownClass(cls):
+        shutil.rmtree(cls.tmpdir, ignore_errors=True)
+
+    def test_returns_rows_for_dispatched_ops(self):
+        rows = analyze_one(self.mlpackage, ct.ComputeUnit.CPU_AND_NE)
+        self.assertGreater(len(rows), 0, "expected at least one dispatched op")
+        # Every row is (function_name, operator_name, device_name).
+        for fname, op_name, device in rows:
+            self.assertIsInstance(fname, str)
+            self.assertIsInstance(op_name, str)
+            self.assertIn(device, {"ANE", "GPU", "CPU", "unknown"})
+
+    def test_main_function_present(self):
+        rows = analyze_one(self.mlpackage, ct.ComputeUnit.CPU_ONLY)
+        self.assertIn("main", {fname for fname, _, _ in rows})
+
+    def test_op_types_for_relu_matmul_model(self):
+        # The toy model is `relu(x @ x.T) + x.sum()` so the lowered MIL
+        # should at least contain matmul, relu, add and reduce_sum.
+        rows = analyze_one(self.mlpackage, ct.ComputeUnit.CPU_ONLY)
+        op_types = Counter(op for _, op, _ in rows)
+        # Op names are versioned (e.g. "ios17.matmul"), so match by suffix.
+        suffixes = {name.split(".")[-1] for name in op_types}
+        for expected in ("matmul", "relu", "add", "reduce_sum"):
+            self.assertIn(expected, suffixes, f"missing op {expected}: {suffixes}")
+
+
+class TestAnalyzeOneMultifunction(unittest.TestCase):
+    """Verify analyze_one walks every function of a multifunction .mlpackage.
+
+    coremltools 9.0's MLComputePlan.load_from_path only exposes usage for
+    the default function, so analyze_one re-projects each function through
+    MultiFunctionDescriptor to surface plans for the rest.
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        cls.tmpdir = tempfile.mkdtemp()
+        single = _build_small_mlpackage(cls.tmpdir)
+        desc = MultiFunctionDescriptor()
+        desc.add_function(
+            single, src_function_name="main", target_function_name="prefill"
+        )
+        desc.add_function(
+            single, src_function_name="main", target_function_name="decode"
+        )
+        desc.default_function_name = "prefill"
+        cls.multi = os.path.join(cls.tmpdir, "multi.mlpackage")
+        save_multifunction(desc, cls.multi)
+
+    @classmethod
+    def tearDownClass(cls):
+        shutil.rmtree(cls.tmpdir, ignore_errors=True)
+
+    def test_reports_every_function(self):
+        rows = analyze_one(self.multi, ct.ComputeUnit.CPU_ONLY)
+        fnames = {fname for fname, _, _ in rows}
+        self.assertEqual(fnames, {"prefill", "decode"})
+
+    def test_each_function_lowers_the_same_ops(self):
+        rows = analyze_one(self.multi, ct.ComputeUnit.CPU_ONLY)
+        per_fn: dict = {}
+        for fname, op_name, _ in rows:
+            per_fn.setdefault(fname, set()).add(op_name.split(".")[-1])
+        for fname in ("prefill", "decode"):
+            self.assertIn("matmul", per_fn.get(fname, set()), f"{fname} missing matmul")
+            self.assertIn("relu", per_fn.get(fname, set()), f"{fname} missing relu")
+
+
+if __name__ == "__main__":
+    unittest.main()

From fb420f302ee73d2e1abebb18e423c6dff20309ab Mon Sep 17 00:00:00 2001
From: Gregory Comer <gjcomer@meta.com>
Date: Tue, 26 May 2026 18:50:49 -0700
Subject: [PATCH 031/103] Fix bug with mixed weight cache + workspace sharing

Differential Revision: D106412035

Pull Request resolved: https://github.com/pytorch/executorch/pull/19777
---
 backends/xnnpack/runtime/XNNExecutor.cpp    |  2 +-
 backends/xnnpack/runtime/XNNExecutor.h      |  2 +-
 backends/xnnpack/runtime/XNNPACKBackend.cpp | 36 ++-------------------
 backends/xnnpack/runtime/XNNWorkspace.h     |  9 ++++++
 4 files changed, 13 insertions(+), 36 deletions(-)

diff --git a/backends/xnnpack/runtime/XNNExecutor.cpp b/backends/xnnpack/runtime/XNNExecutor.cpp
index 1cba33a91e6..5a150f92b6b 100644
--- a/backends/xnnpack/runtime/XNNExecutor.cpp
+++ b/backends/xnnpack/runtime/XNNExecutor.cpp
@@ -93,7 +93,7 @@ ET_NODISCARD Error XNNExecutor::initialize(
  * delegate->execute()
  */
 ET_NODISCARD Error XNNExecutor::prepare_args(Span<EValue*> args) {
-  ET_CHECK_MSG(
+  ET_DCHECK_MSG(
       !destroyed_.load(std::memory_order_acquire),
       "XNNExecutor::prepare_args called after destroy");
 
diff --git a/backends/xnnpack/runtime/XNNExecutor.h b/backends/xnnpack/runtime/XNNExecutor.h
index 0af8b6056b0..2d709678c1c 100644
--- a/backends/xnnpack/runtime/XNNExecutor.h
+++ b/backends/xnnpack/runtime/XNNExecutor.h
@@ -45,7 +45,7 @@ class XNNExecutor {
       : workspace_(workspace) {}
 
   ~XNNExecutor() {
-    ET_CHECK_MSG(
+    ET_DCHECK_MSG(
         !in_use_.load(std::memory_order_acquire),
         "XNNExecutor destroyed while in use");
     destroyed_.store(true, std::memory_order_release);
diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp
index a02cf98771b..9eaadda86f8 100644
--- a/backends/xnnpack/runtime/XNNPACKBackend.cpp
+++ b/backends/xnnpack/runtime/XNNPACKBackend.cpp
@@ -16,7 +16,6 @@
 #include <executorch/runtime/core/evalue.h>
 #include <executorch/runtime/executor/pte_data_map.h>
 
-#include <cinttypes>
 #include <memory>
 #include <mutex>
 
@@ -101,6 +100,7 @@ class XnnpackBackend final
       lock_weights_cache.lock();
       weights_cache_->initialize_for_runtime(
           context.get_runtime_allocator(), named_data_map);
+      workspace->set_uses_weight_cache();
     }
 
     auto [workspace_lock, workspace_ptr] = workspace->acquire();
@@ -131,16 +131,6 @@ class XnnpackBackend final
       return err;
     }
 
-    ET_LOG(
-        Info,
-        "XnnpackBackend::init delegate=%p workspace_id=%" PRIu64
-        " workspace_ptr=%p program_id=0x%" PRIxPTR " weight_cache=%s",
-        (void*)executor,
-        workspace->id(),
-        (void*)workspace_ptr,
-        program_id,
-        use_weight_cache ? "true" : "false");
-
     return executor;
   }
 
@@ -151,18 +141,10 @@ class XnnpackBackend final
     auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
 
     auto workspace = executor->get_workspace();
-    ET_LOG(
-        Info,
-        "XnnpackBackend::execute begin delegate=%p workspace_id=%" PRIu64
-        " num_args=%zu weight_cache=%s",
-        (void*)executor,
-        workspace->id(),
-        (size_t)args.size(),
-        executor->uses_weight_cache() ? "true" : "false");
 
     std::unique_lock<std::mutex> lock_weights_cache(
         weights_cache_mutex_, std::defer_lock);
-    if (executor->uses_weight_cache()) {
+    if (executor->uses_weight_cache() || workspace->uses_weight_cache()) {
       lock_weights_cache.lock();
     }
 
@@ -183,14 +165,6 @@ class XnnpackBackend final
     // Convert output data types if necessary (e.g., int32 -> int64 for Long)
     err = executor->convert_outputs(args);
 
-    ET_LOG(
-        Info,
-        "XnnpackBackend::execute end delegate=%p workspace_id=%" PRIu64
-        " err=0x%x",
-        (void*)executor,
-        workspace->id(),
-        (unsigned int)err);
-
     return err;
   }
 
@@ -199,12 +173,6 @@ class XnnpackBackend final
       auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
       auto workspace = executor->get_workspace();
 
-      ET_LOG(
-          Info,
-          "XnnpackBackend::destroy delegate=%p workspace_id=%" PRIu64,
-          (void*)executor,
-          workspace->id());
-
       const std::lock_guard<std::mutex> lock_weights_cache(
           weights_cache_mutex_);
 
diff --git a/backends/xnnpack/runtime/XNNWorkspace.h b/backends/xnnpack/runtime/XNNWorkspace.h
index b7ef442c460..e1b452a0a8b 100644
--- a/backends/xnnpack/runtime/XNNWorkspace.h
+++ b/backends/xnnpack/runtime/XNNWorkspace.h
@@ -59,6 +59,14 @@ class XNNWorkspace {
     lock_required_ = false;
   }
 
+  void set_uses_weight_cache() {
+    uses_weight_cache_.store(true, std::memory_order_release);
+  }
+
+  bool uses_weight_cache() const {
+    return uses_weight_cache_.load(std::memory_order_acquire);
+  }
+
   static runtime::Result<std::shared_ptr<XNNWorkspace>> create() {
     // Because this class can't be moved, we need to construct it in-place.
     xnn_workspace_t workspace = nullptr;
@@ -80,6 +88,7 @@ class XNNWorkspace {
   std::mutex mutex_;
   uint64_t id_;
   bool lock_required_ = true;
+  std::atomic<bool> uses_weight_cache_{false};
   WorkspacePtr workspace_;
 };
 

From 77df9b79ae212c6a538ff16f3538954a5bac10ca Mon Sep 17 00:00:00 2001
From: Andrew Grebenisan <33402477+DrJessop@users.noreply.github.com>
Date: Tue, 26 May 2026 20:08:12 -0700
Subject: [PATCH 032/103] New exported program pass manager and exported
 program passes (#16986)

Differential Revision: D91725222

Pull Request resolved: https://github.com/pytorch/executorch/pull/16986
---
 backends/arm/test/tester/test_pipeline.py     |   2 +-
 .../_passes/recompose_pad_maxpool2d.py        |   7 +-
 backends/qualcomm/_passes/utils.py            |  33 ++-
 exir/BUCK                                     |  12 +
 exir/_program_utils.py                        | 104 ++++++++
 exir/pass_base.py                             |  58 ++++-
 exir/pass_manager.py                          | 201 +++++++++++++--
 exir/program/BUCK                             |   1 +
 exir/program/_program.py                      | 163 ++++--------
 exir/tests/test_pass_infra.py                 | 243 +++++++++++++++++-
 10 files changed, 671 insertions(+), 153 deletions(-)
 create mode 100644 exir/_program_utils.py

diff --git a/backends/arm/test/tester/test_pipeline.py b/backends/arm/test/tester/test_pipeline.py
index 7e7f576e35c..86a5f857e58 100644
--- a/backends/arm/test/tester/test_pipeline.py
+++ b/backends/arm/test/tester/test_pipeline.py
@@ -48,7 +48,7 @@
 from executorch.backends.arm.vgf.compile_spec import VgfCompileSpec
 from executorch.backends.test.harness.stages import StageType
 from executorch.exir.pass_base import ExportPass
-from torch._export.pass_base import PassType
+from executorch.exir.pass_manager import PassType
 from torch.export.graph_signature import InputKind, OutputKind
 from torchao.quantization.pt2e.quantizer import QuantizationSpec
 
diff --git a/backends/qualcomm/_passes/recompose_pad_maxpool2d.py b/backends/qualcomm/_passes/recompose_pad_maxpool2d.py
index 81b4836f251..6a8374cb66a 100644
--- a/backends/qualcomm/_passes/recompose_pad_maxpool2d.py
+++ b/backends/qualcomm/_passes/recompose_pad_maxpool2d.py
@@ -13,12 +13,8 @@
 from executorch.exir.pass_base import ExportPass, PassResult
 from executorch.exir.passes import dead_code_elimination_pass
 
-from torch._subclasses.fake_tensor import FakeTensorMode
-
-
-def add_fake_tensor_to_node(padding_node, input_shape, padding_args, dtype):
-    fake_mode = FakeTensorMode()
 
+def add_fake_tensor_to_node(padding_node, input_shape, padding_args, dtype, fake_mode):
     with fake_mode:
         batch, channels, height, width = input_shape
         pad_left, pad_right, pad_top, pad_bottom = padding_args
@@ -114,6 +110,7 @@ def call(self, graph_module: torch.fx.GraphModule):  # noqa C901
                         input_node.meta["val"].shape,
                         padding,
                         input_node.meta["val"].dtype,
+                        input_node.meta["val"].fake_mode,
                     )
                     if quant_attrs:
                         padding_node.meta["quant_attrs"] = node.meta["quant_attrs"]
diff --git a/backends/qualcomm/_passes/utils.py b/backends/qualcomm/_passes/utils.py
index 542fa1115a6..91a7cfdc69a 100755
--- a/backends/qualcomm/_passes/utils.py
+++ b/backends/qualcomm/_passes/utils.py
@@ -137,7 +137,23 @@ def copy_nn_module_stack(src, target):
         target.meta["nn_module_stack"] = value
 
 
-def merge_decomposed_graph(
+def _unify_fake_mode(node: torch.fx.Node, fake_mode) -> None:
+    val = node.meta.get("val")
+    if val is None:
+        return
+    if isinstance(val, FakeTensor) and val.fake_mode is not fake_mode:
+        node.meta["val"] = fake_mode.from_tensor(val)
+    elif isinstance(val, (list, tuple)):
+        unified = []
+        for v in val:
+            if isinstance(v, FakeTensor) and v.fake_mode is not fake_mode:
+                unified.append(fake_mode.from_tensor(v))
+            else:
+                unified.append(v)
+        node.meta["val"] = type(val)(unified)
+
+
+def merge_decomposed_graph(  # noqa: C901
     remap: Dict[str, torch.fx.Node],
     target_node: torch.fx.Node,
     target_graph: torch.fx.GraphModule,
@@ -148,6 +164,16 @@ def merge_decomposed_graph(
         [torch.fx.Node, torch.fx.Node, Dict[str, torch.fx.Node]], None
     ] = None,
 ) -> None:
+    target_fake_mode = None
+    target_val = target_node.meta.get("val")
+    if isinstance(target_val, FakeTensor):
+        target_fake_mode = target_val.fake_mode
+    elif isinstance(target_val, (list, tuple)):
+        for v in target_val:
+            if isinstance(v, FakeTensor):
+                target_fake_mode = v.fake_mode
+                break
+
     def default_output_process(node):
         for user in node.users.copy():
             # remap
@@ -170,10 +196,13 @@ def default_output_process(node):
                 # replace node map from string to graph node
                 remap[decomposed_node] = remap.pop(decomposed_node.name)
             else:
-                remap[decomposed_node] = target_graph.node_copy(
+                copied = target_graph.node_copy(
                     decomposed_node,
                     arg_transform=lambda x, remap=remap: remap[x],
                 )
+                if target_fake_mode is not None:
+                    _unify_fake_mode(copied, target_fake_mode)
+                remap[decomposed_node] = copied
 
 
 def is_float_tensor(node: torch.fx.Node) -> bool:
diff --git a/exir/BUCK b/exir/BUCK
index f00b3f1c787..d70900c02ae 100644
--- a/exir/BUCK
+++ b/exir/BUCK
@@ -259,6 +259,16 @@ fbcode_target(_kind = runtime.python_library,
     ],
 )
 
+fbcode_target(_kind = runtime.python_library,
+    name = "_program_utils",
+    srcs = [
+        "_program_utils.py",
+    ],
+    deps = [
+        "//caffe2:torch",
+    ],
+)
+
 fbcode_target(_kind = runtime.python_library,
     name = "pass_manager",
     srcs = [
@@ -266,7 +276,9 @@ fbcode_target(_kind = runtime.python_library,
     ],
     deps = [
         "fbsource//third-party/pypi/typing-extensions:typing-extensions",
+        ":_program_utils",
         ":error",
+        ":pass_base",
         "//caffe2:torch",
     ],
 )
diff --git a/exir/_program_utils.py b/exir/_program_utils.py
new file mode 100644
index 00000000000..d0d2039d93a
--- /dev/null
+++ b/exir/_program_utils.py
@@ -0,0 +1,104 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import torch
+from torch.export.exported_program import (
+    ConstantArgument,
+    ExportGraphSignature,
+    InputSpec,
+    OutputSpec,
+)
+
+
+def _get_updated_range_constraints(gm):
+    def get_shape_env(gm):
+        vals = [
+            node.meta["val"]
+            for node in gm.graph.nodes
+            if node.meta.get("val", None) is not None
+        ]
+        from torch._guards import detect_fake_mode  # type: ignore[21]
+
+        fake_mode = detect_fake_mode(vals)
+        if fake_mode is not None:
+            return fake_mode.shape_env
+        for v in vals:
+            if isinstance(v, torch.SymInt):
+                return v.node.shape_env
+
+    shape_env = get_shape_env(gm)
+    if shape_env is None:
+        return {}
+    range_constraints = {
+        shape_env.replacements.get(k, k): v for k, v in shape_env.var_to_range.items()
+    }
+    # Only when we have an unbacked symint, and it's used as constructor inputs,
+    # runtime_var_to_range will make a difference compated to var_to_range.
+    # e.g. [2, oo) -> [0, oo)
+    for k, v in shape_env.var_to_range.items():
+        if k not in shape_env.replacements:
+            range_constraints[k] = v
+    return range_constraints
+
+
+def _get_updated_graph_signature(
+    old_signature: ExportGraphSignature,
+    new_gm: torch.fx.GraphModule,
+) -> ExportGraphSignature:
+    """
+    Update the graph signature's user_input/user_outputs.
+    """
+    new_input_specs = []
+    i = 0
+    for node in new_gm.graph.nodes:
+        if node.op != "placeholder":
+            continue
+
+        assert i < len(
+            old_signature.input_specs
+        ), "Number of inputs changed after transformation"
+        old_input_spec = old_signature.input_specs[i]
+        arg = (
+            old_input_spec.arg
+            if isinstance(old_input_spec.arg, ConstantArgument)
+            # pyre-fixme[20]: Argument `class_fqn` expected.
+            else type(old_input_spec.arg)(node.name)
+        )
+        new_input_specs.append(
+            InputSpec(
+                old_input_spec.kind,
+                arg,
+                old_input_spec.target,
+                persistent=old_input_spec.persistent,
+            )
+        )
+        i += 1
+
+    output_node = new_gm.graph.output_node()
+    assert output_node.op == "output"
+
+    new_output_specs = []
+    for i, node in enumerate(output_node.args[0]):
+        assert i < len(
+            old_signature.output_specs
+        ), "Number of outputs changed after transformation"
+        old_output_spec = old_signature.output_specs[i]
+        arg = (
+            old_output_spec.arg
+            if isinstance(old_output_spec.arg, ConstantArgument)
+            # pyre-fixme[20]: Argument `class_fqn` expected.
+            else type(old_output_spec.arg)(node.name)
+        )
+        new_output_specs.append(
+            OutputSpec(old_output_spec.kind, arg, old_output_spec.target)
+        )
+
+    new_signature = ExportGraphSignature(
+        input_specs=new_input_specs, output_specs=new_output_specs
+    )
+    return new_signature
diff --git a/exir/pass_base.py b/exir/pass_base.py
index 8ab0c675240..f93dd75d156 100644
--- a/exir/pass_base.py
+++ b/exir/pass_base.py
@@ -6,10 +6,11 @@
 # LICENSE file in the root directory of this source tree.
 
 # pyre-strict
-
 import operator
 import traceback
+from abc import ABC, abstractmethod
 from contextlib import nullcontext
+from dataclasses import dataclass
 from typing import (
     Any,
     Callable,
@@ -27,9 +28,7 @@
 
 import torch
 from executorch.exir import memory
-
 from executorch.exir.delegate import executorch_call_delegate, is_lowered_module
-
 from executorch.exir.dialects.edge._ops import EdgeOpOverload
 from executorch.exir.error import ExportError, ExportErrorType
 from torch import fx
@@ -37,6 +36,7 @@
 from torch._subclasses import FakeTensorMode, UnsupportedFakeTensorException
 from torch._subclasses.fake_tensor import FakeTensor
 from torch._subclasses.functional_tensor import FunctionalTensor, FunctionalTensorMode
+from torch.export import ExportedProgram
 from torch.fx import traceback as fx_traceback
 from torch.fx.experimental.proxy_tensor import PythonKeyTracer
 from torch.fx.graph import CodeGen
@@ -182,6 +182,58 @@ class ExportPassBaseError(RuntimeError):
     pass
 
 
+@dataclass(frozen=True)
+class ExportedProgramPassResult:
+    exported_program: ExportedProgram
+    modified: bool
+
+
+class ExportedProgramPassBase(ABC):
+    """
+    Base interface for implementing passes that operate on ExportedProgram.
+    """
+
+    def __call__(self, exported_program: ExportedProgram) -> ExportedProgramPassResult:
+        """
+        Runs the precondition check, the pass itself, and the postcondition check.
+        """
+
+        self.requires(exported_program)
+        res = self.call(exported_program)
+        self.ensures(exported_program)
+        return res
+
+    @abstractmethod
+    def call(self, exported_program: ExportedProgram) -> ExportedProgramPassResult:
+        """
+        The pass that is run through the given exported program. To implement a
+        pass, it is required to implement this function.
+
+        Args:
+            exported_program: The exported program we will run a pass on
+        """
+
+    def requires(self, exported_program: ExportedProgram) -> None:  # noqa: B027
+        """
+        This function will be called before the pass is run and will check that
+        the given exported program contains the preconditions needed to run the
+        pass. It is not required to implement this function.
+
+        Args:
+            exported_program: The exported program we will run checks on
+        """
+
+    def ensures(self, exported_program: ExportedProgram) -> None:  # noqa: B027
+        """
+        This function will be called after the pass is run and will check that
+        the given exported program contains the postconditions needed to run the
+        pass. It is not required to implement this function.
+
+        Args:
+            exported_program: The exported program we will run checks on
+        """
+
+
 class _ExportPassBase(PassBase):
     """
     Interpreter-based pass class to help users maintain the IR spec while writing
diff --git a/exir/pass_manager.py b/exir/pass_manager.py
index b812ccea7b8..351e98651dd 100644
--- a/exir/pass_manager.py
+++ b/exir/pass_manager.py
@@ -5,28 +5,46 @@
 # LICENSE file in the root directory of this source tree.
 
 # pyre-strict
-
-from typing import Callable, List, Optional, Union
+import copy
+import inspect
+import logging
+from typing import Callable, List, Optional, Type, TypeAlias, Union
 
 import torch
 import torch.fx.passes.infra.pass_manager as fx
 import torch.utils._pytree as pytree
+from executorch.exir._program_utils import (
+    _get_updated_graph_signature,
+    _get_updated_range_constraints,
+)
 from executorch.exir.error import ExportError, ExportErrorType
+from executorch.exir.pass_base import ExportedProgramPassBase, ExportedProgramPassResult
+from torch._export.verifier import Verifier
+from torch.export import ExportedProgram
 from torch.fx.passes.infra.pass_base import PassResult
-from typing_extensions import TypeAlias
+from torch.fx.passes.infra.pass_manager import pass_result_wrapper
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.WARNING)
+
+PassType: TypeAlias = Union[
+    ExportedProgramPassBase, Callable[[torch.fx.GraphModule], Optional[PassResult]]
+]
+
 
-PassType: TypeAlias = Callable[[torch.fx.GraphModule], Optional[PassResult]]
+def _get_pass_name(fn: PassType) -> str:
+    """Returns a human-readable name for a pass."""
+    return fn.__name__ if inspect.isfunction(fn) else type(fn).__name__
 
 
 class PassManager(fx.PassManager):
     """
-    Class to run multiple passes on a given graph module. The PassManager is
-    callable so to run it, we can just call the PassManager instance.
+    Runs multiple passes on a GraphModule.
 
-    Private Attributes:
-        * **passes**: A list of callable passes
-        * **params**: An instance of PassManagerParams containing the result of the
-            flags set in the constructor.
+    This is the legacy PassManager that extends torch.fx.passes.infra.pass_manager.PassManager.
+    Use this when you need to run passes on a GraphModule directly.
+
+    For running passes on ExportedProgram, use ExportedProgramPassManager instead.
     """
 
     def __init__(
@@ -34,14 +52,11 @@ def __init__(
         passes: Optional[Union[List[PassType], List[List[PassType]]]] = None,
         run_checks_after_each_pass: bool = False,
         suppress_check_failures: bool = False,
+        steps: int = 1,
     ) -> None:
-        r"""
-        Args:
-            passes: A list of passes
-            enable_debug_pass: set to true to enable the debug passes
-            run_checks_after_each_pass: whether to run checks and linting after each pass
-        """
-
+        logger.warning(
+            "PassManager is deprecated. Please use ExportedProgramPassManager instead."
+        )
         # Flatten the passes to a list of callables
         passes = passes if passes else []
         flattened_passes = [
@@ -52,6 +67,7 @@ def __init__(
             flattened_passes,
             run_checks_after_each_pass=run_checks_after_each_pass,
             suppress_check_failures=suppress_check_failures,
+            steps=steps,
         )
 
     def check(self, module: torch.nn.Module) -> None:
@@ -65,10 +81,9 @@ def check(self, module: torch.nn.Module) -> None:
               node's spec field is a tuple)
             - Ensure that the graph module has type torch.fx.GraphModule
         """
-        assert isinstance(module, fx.GraphModule)
+        assert isinstance(module, torch.fx.GraphModule)
         module.recompile()
         module.graph.lint()
-        # TODO(qihan): use verifier.check_is_exir
 
         for node in module.graph.nodes:
             if node.op == "call_method":
@@ -76,3 +91,151 @@ def check(self, module: torch.nn.Module) -> None:
                     ExportErrorType.NOT_SUPPORTED,
                     f"call_method `{node}` is not supported except for backend delegate.",
                 )
+
+
+class ExportedProgramPassManager(fx.PassManager):
+    """
+    Runs multiple passes on an ExportedProgram.
+
+    This PassManager is specifically designed for ExportedProgram and supports
+    both GraphModule-only passes and ExportedProgram-aware passes.
+
+    For running passes on GraphModule directly, use PassManager instead.
+    """
+
+    def __init__(
+        self,
+        passes: Optional[Union[List[PassType], List[List[PassType]]]] = None,
+        constraints: Optional[List[Callable[[Callable, Callable], bool]]] = None,
+        run_checks_after_each_pass: bool = False,
+        suppress_check_failures: bool = False,
+        steps: int = 1,
+    ) -> None:
+        wrapped_passes = (
+            [
+                (
+                    fn
+                    if isinstance(fn, ExportedProgramPassBase)
+                    else pass_result_wrapper(fn)
+                )
+                for fn in pytree.tree_flatten(passes)[0]
+            ]
+            if passes
+            else []
+        )
+
+        super().__init__(
+            wrapped_passes,
+            constraints=constraints,
+            run_checks_after_each_pass=run_checks_after_each_pass,
+            suppress_check_failures=suppress_check_failures,
+            steps=steps,
+        )
+
+    def check(self, exported_program: ExportedProgram) -> None:
+        """Validates graph module invariants."""
+        graph_module = exported_program.graph_module
+        graph_module.recompile()
+        graph_module.graph.lint()
+
+        for node in graph_module.graph.nodes:
+            if node.op == "call_method":
+                raise ExportError(
+                    ExportErrorType.NOT_SUPPORTED,
+                    f"call_method `{node}` is not supported except for backend delegate.",
+                )
+
+        exported_program.validate()
+
+    # pyre-ignore[14]: Intentionally overriding with different signature for ExportedProgram
+    def __call__(  # noqa: C901
+        self,
+        exported_program: ExportedProgram,
+        override_verifiers: Optional[list[Type[Verifier]]] = None,
+    ) -> ExportedProgramPassResult:
+        """
+        Runs passes on an ExportedProgram.
+
+        Handles both GraphModule-only passes and ExportedProgram-aware passes. Will create a shallow copy of the exported program before running passes.
+
+        Args:
+            exported_program: The exported program to transform.
+
+        Returns:
+            ExportedProgramPassResult containing the transformed program.
+        """
+        if not self._validated:
+            self.solve_constraints()
+
+        exported_program = copy.copy(exported_program)
+
+        if override_verifiers:
+            exported_program._verifiers = override_verifiers
+
+        self.check(exported_program)
+
+        overall_modified = False
+
+        for _ in range(self.steps):
+            step_modified = False
+
+            for i, fn in enumerate(self.passes):
+                pass_modified = False
+                try:
+                    if not isinstance(fn, ExportedProgramPassBase):
+                        res = fn(exported_program.graph_module)
+                        if res is None:
+                            raise TypeError(
+                                f"The result of pass {_get_pass_name(fn)} should be type PassResult. "
+                                "Please wrap it with pass_result_wrapper()"
+                            )
+
+                        if res.modified:
+                            # Not running _update_exported_program_graph_module here because it is
+                            # possible that the verifier will fail upon new ExportedProgram construction,
+                            # and we should only run verification after each pass if
+                            # run_checks_after_each_pass is True.
+                            res.graph_module.recompile()
+                            exported_program._graph_module = res.graph_module
+                            exported_program._graph_signature = (
+                                _get_updated_graph_signature(
+                                    exported_program.graph_signature,
+                                    res.graph_module,
+                                )
+                            )
+                            exported_program._range_constraints = (
+                                _get_updated_range_constraints(res.graph_module)
+                            )
+                            pass_modified = True
+
+                    else:
+                        assert isinstance(fn, ExportedProgramPassBase)
+                        ep_res = fn(exported_program)
+                        exported_program = ep_res.exported_program
+
+                        if ep_res.modified:
+                            pass_modified = True
+                            exported_program.graph_module.recompile()
+
+                    if self.run_checks_after_each_pass:
+                        self.check(exported_program)
+
+                    if pass_modified:
+                        step_modified = True
+                        logger.debug(
+                            "Graph after pass '%s': %s",
+                            _get_pass_name(fn),
+                            exported_program.graph_module.graph,
+                        )
+
+                except Exception as e:
+                    prev_names = [_get_pass_name(p) for p in self.passes[:i]]
+                    msg = f"An error occurred when running the '{_get_pass_name(fn)}' pass after the following passes: {prev_names}"
+                    raise Exception(msg) from e  # noqa: TRY002
+
+            overall_modified = overall_modified or step_modified
+            if not step_modified:
+                break
+
+        self.check(exported_program)
+        return ExportedProgramPassResult(exported_program, overall_modified)
diff --git a/exir/program/BUCK b/exir/program/BUCK
index 7d9642efdb7..11f62edd99e 100644
--- a/exir/program/BUCK
+++ b/exir/program/BUCK
@@ -22,6 +22,7 @@ fbcode_target(_kind = runtime.python_library,
     ],
     deps = [
         "//caffe2:torch",
+        "//executorch/exir:_program_utils",
         "//executorch/exir:error",
         "//executorch/exir:graph_module",
         "//executorch/exir:pass_base",
diff --git a/exir/program/_program.py b/exir/program/_program.py
index b3d94c8ffd7..485d72bbe45 100644
--- a/exir/program/_program.py
+++ b/exir/program/_program.py
@@ -5,8 +5,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-# pyre-unsafe
-
+# pyre-strict
 import copy
 import io
 import logging
@@ -38,7 +37,8 @@
 from executorch.exir.operator.convert import _pybind_schema_to_native_schema
 from executorch.exir.operator.util import _QUANT_PRIMITIVES
 from executorch.exir.pass_base import PassBase
-from executorch.exir.pass_manager import PassType
+from executorch.exir.pass_manager import ExportedProgramPassManager, PassType
+
 from executorch.exir.passes import (
     base_post_op_replace_passes,
     base_pre_op_replace_passes,
@@ -88,17 +88,11 @@
 from torch.export._remove_auto_functionalized_pass import (
     unsafe_remove_auto_functionalized_pass,
 )
-from torch.export.exported_program import (
-    ConstantArgument,
-    ExportGraphSignature,
-    InputKind,
-    InputSpec,
-    OutputSpec,
-    TensorArgument,
-)
+from torch.export.exported_program import InputKind, InputSpec, TensorArgument
 from torch.fx import _pytree as fx_pytree
 from torch.fx._compatibility import compatibility
-from torch.fx.passes.infra.pass_manager import PassManager
+from torch.fx.passes.infra.pass_manager import PassManager as GraphModulePassManager
+
 from torch.utils import _pytree as pytree
 
 Val = Any
@@ -131,93 +125,10 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
 transform_op_to_aten_op = {}
 
 
-def _get_updated_range_constraints(gm):
-    def get_shape_env(gm):
-        vals = [
-            node.meta["val"]
-            for node in gm.graph.nodes
-            if node.meta.get("val", None) is not None
-        ]
-        from torch._guards import detect_fake_mode  # type: ignore[21]
-
-        fake_mode = detect_fake_mode(vals)
-        if fake_mode is not None:
-            return fake_mode.shape_env
-        for v in vals:
-            if isinstance(v, torch.SymInt):
-                return v.node.shape_env
-
-    shape_env = get_shape_env(gm)
-    if shape_env is None:
-        return {}
-    range_constraints = {
-        shape_env.replacements.get(k, k): v for k, v in shape_env.var_to_range.items()
-    }
-    # Only when we have an unbacked symint, and it's used as constructor inputs,
-    # runtime_var_to_range will make a difference compated to var_to_range.
-    # e.g. [2, oo) -> [0, oo)
-    for k, v in shape_env.var_to_range.items():
-        if k not in shape_env.replacements:
-            range_constraints[k] = v
-    return range_constraints
-
-
-def _get_updated_graph_signature(
-    old_signature: ExportGraphSignature,
-    new_gm: torch.fx.GraphModule,
-) -> ExportGraphSignature:
-    """
-    Update the graph signature's user_input/user_outputs.
-    """
-    new_input_specs = []
-    i = 0
-    for node in new_gm.graph.nodes:
-        if node.op != "placeholder":
-            continue
-
-        assert i < len(
-            old_signature.input_specs
-        ), "Number of inputs changed after transformation"
-        old_input_spec = old_signature.input_specs[i]
-        arg = (
-            old_input_spec.arg
-            if isinstance(old_input_spec.arg, ConstantArgument)
-            # pyre-fixme[20]: Argument `class_fqn` expected.
-            else type(old_input_spec.arg)(node.name)
-        )
-        new_input_specs.append(
-            InputSpec(
-                old_input_spec.kind,
-                arg,
-                old_input_spec.target,
-                persistent=old_input_spec.persistent,
-            )
-        )
-        i += 1
-
-    output_node = new_gm.graph.output_node()
-    assert output_node.op == "output"
-
-    new_output_specs = []
-    for i, node in enumerate(output_node.args[0]):
-        assert i < len(
-            old_signature.output_specs
-        ), "Number of outputs changed after transformation"
-        old_output_spec = old_signature.output_specs[i]
-        arg = (
-            old_output_spec.arg
-            if isinstance(old_output_spec.arg, ConstantArgument)
-            # pyre-fixme[20]: Argument `class_fqn` expected.
-            else type(old_output_spec.arg)(node.name)
-        )
-        new_output_specs.append(
-            OutputSpec(old_output_spec.kind, arg, old_output_spec.target)
-        )
-
-    new_signature = ExportGraphSignature(
-        input_specs=new_input_specs, output_specs=new_output_specs
-    )
-    return new_signature
+from executorch.exir._program_utils import (  # noqa: E402
+    _get_updated_graph_signature,
+    _get_updated_range_constraints,
+)
 
 
 def _transform(
@@ -243,13 +154,13 @@ def _transform(
     ), f"Expected all passes to be of PassType, not list or Verifier. Use override_verifiers kwarg instead. Got: {list(passes)}"
 
     return _transform_with_pass_manager(
-        self, PassManager(list(passes)), override_verifiers
+        self, ExportedProgramPassManager(list(passes)), override_verifiers
     )
 
 
 def _transform_with_pass_manager(
-    self,
-    pass_manager: PassManager,
+    self: ExportedProgram,
+    pass_manager: Union[ExportedProgramPassManager, GraphModulePassManager],
     override_verifiers: None | list[Type[Verifier]] = None,
 ) -> "ExportedProgram":
     """
@@ -258,22 +169,26 @@ def _transform_with_pass_manager(
     Args:
         self: The ExportedProgram instance to transform
         pass_manager: An instance of PassManager to apply transformations.
+            - ExportedProgramPassManager: operates on the full ExportedProgram
+            - GraphModulePassManager: operates on the GraphModule only
         override_verifiers: Optional list of verifier classes to use instead of the default verifiers.
             This is needed if the transforms yields illegal graph that the default verifier cannot handle.
 
     Returns:
         ExportedProgram: A new ExportedProgram with the transformations applied, or self if no changes were made
     """
-    res = pass_manager(self.graph_module)
-    transformed_gm = res.graph_module if res is not None else self.graph_module
-    assert transformed_gm is not None
-
-    if transformed_gm is self.graph_module and not res.modified:
-        return self
-
-    return _update_exported_program_graph_module(
-        self, transformed_gm, override_verifiers
-    )
+    if isinstance(pass_manager, ExportedProgramPassManager):
+        res = pass_manager(self, override_verifiers)
+        if not res.modified:
+            return self
+        return res.exported_program
+    else:
+        res = pass_manager(self.graph_module)
+        if not res.modified:
+            return self
+        return _update_exported_program_graph_module(
+            self, res.graph_module, override_verifiers
+        )
 
 
 def _update_exported_program_graph_module(
@@ -1324,7 +1239,12 @@ def collect_named_data_store_outputs(
 def to_edge_transform_and_lower(  # noqa: C901
     programs: Union[ExportedProgram, Dict[str, ExportedProgram]],
     transform_passes: Optional[
-        Union[Sequence[PassType], Dict[str, Sequence[PassType]], PassManager]
+        Union[
+            Sequence[PassType],
+            Dict[str, Sequence[PassType]],
+            GraphModulePassManager,
+            ExportedProgramPassManager,
+        ]
     ] = None,
     partitioner: Optional[
         Union[List[Partitioner], Dict[str, List[Partitioner]]]
@@ -1359,7 +1279,7 @@ def to_edge_transform_and_lower(  # noqa: C901
             2) a dictionary -
                 only method names specified in the dictionary will be transformed
                 with their corresponding passes
-            3) an instance of a PassManager -
+            3) an instance of a PassManager (either a GraphModulePassManager or an ExportedProgramPassManager) -
                 all methods in the given EdgeProgramManager will be
                 transformed with the given PassManager instance.
 
@@ -1604,7 +1524,12 @@ def exported_program(self, method_name: str = "forward") -> ExportedProgram:
     @et_logger("transform")
     def transform(
         self,
-        passes: Union[Sequence[PassType], Dict[str, Sequence[PassType]], PassManager],
+        passes: Union[
+            Sequence[PassType],
+            Dict[str, Sequence[PassType]],
+            ExportedProgramPassManager,
+            GraphModulePassManager,
+        ],
         compile_config: Optional[EdgeCompileConfig] = None,
     ) -> "EdgeProgramManager":
         """
@@ -1618,7 +1543,7 @@ def transform(
                 2) a dictionary mapping method names to lists of passes -
                     only method names specified in the dictionary will be
                     transformed with their corresponding passes.
-                3) a PassManager instance -
+                3) a PassManager (either ExportedProgramPassManager or GraphModulePassManager) instance -
                     all methods in the given EdgeProgramManager will be
                     transformed with the given PassManager instance.
             compile_config: Compile config to use for veriy the correctness of model
@@ -1637,13 +1562,15 @@ def transform(
         # Cast passes parameter upfront.
         passes_seq: Optional[Sequence[PassType]] = None
         passes_dict: Optional[Dict[str, Sequence[PassType]]] = None
-        pass_manager: Optional[PassManager] = None
+        pass_manager: Optional[
+            Union[ExportedProgramPassManager, GraphModulePassManager]
+        ] = None
 
         if isinstance(passes, Sequence):
             passes_seq = passes
         if isinstance(passes, dict):
             passes_dict = passes
-        if isinstance(passes, PassManager):
+        if isinstance(passes, (ExportedProgramPassManager, GraphModulePassManager)):
             pass_manager = passes
 
         for name, program in self._edge_programs.items():
diff --git a/exir/tests/test_pass_infra.py b/exir/tests/test_pass_infra.py
index ded3c0e849d..7df6b76b93a 100644
--- a/exir/tests/test_pass_infra.py
+++ b/exir/tests/test_pass_infra.py
@@ -9,14 +9,22 @@
 
 import unittest
 
+import executorch.exir as exir
 import torch
-from executorch.exir import to_edge
-from executorch.exir.pass_base import ExportPassBaseError, ProxyValue
-from executorch.exir.pass_manager import PassManager
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import (
+    ExportedProgramPassBase,
+    ExportedProgramPassResult,
+    ExportPassBaseError,
+    ProxyValue,
+)
+from executorch.exir.pass_manager import ExportedProgramPassManager, PassManager
 from executorch.exir.passes import ScalarToTensorPass
 from executorch.exir.passes.pass_registry import PassRegistry
-from torch.export import Dim, export
-from torch.fx.passes.infra.pass_base import PassBase
+from executorch.exir.program import to_edge
+from torch.export import Dim, export, ExportedProgram
+from torch.export.graph_signature import InputKind, InputSpec, TensorArgument
+from torch.fx.passes.infra.pass_base import PassBase, PassResult
 
 
 class TestPassInfra(unittest.TestCase):
@@ -216,3 +224,228 @@ def test_rejects_implicit_symbolic_scalar_coercions(self) -> None:
 
         with self.assertRaisesRegex(ExportPassBaseError, "converted to float"):
             float(ProxyValue(sym_float, torch.fx.Graph().placeholder("x")))
+
+
+class TestExportedProgramPassManager(unittest.TestCase):
+    def test_runs_graph_module_passes_on_exported_program(self) -> None:
+        """
+        Tests that ExportedProgramPassManager runs GraphModule passes
+        on an ExportedProgram and the graph is correctly modified.
+        """
+
+        def replace_add_with_mul(gm: torch.fx.GraphModule) -> PassResult:
+            modified = False
+            for node in gm.graph.find_nodes(
+                op="call_function", target=exir_ops.edge.aten.add.Tensor
+            ):
+                node.target = exir_ops.edge.aten.mul.Tensor
+                modified = True
+            return PassResult(gm, modified)
+
+        def f(x: torch.Tensor) -> torch.Tensor:
+            y = torch.add(x, x)
+            z = torch.add(y, x)
+            return z
+
+        exported_program = (
+            exir.capture(f, (torch.randn(10),), exir.CaptureConfig())
+            .to_edge()
+            .exported_program
+        )
+
+        pm = ExportedProgramPassManager(passes=[replace_add_with_mul])
+        result = pm(exported_program)
+
+        # Verify return type
+        self.assertIsInstance(result, ExportedProgramPassResult)
+        self.assertTrue(result.modified)
+
+        # Check that all add ops were replaced with mul
+        self.assertEqual(
+            len(
+                result.exported_program.graph.find_nodes(
+                    op="call_function", target=exir_ops.edge.aten.add.Tensor
+                )
+            ),
+            0,
+        )
+
+    def test_updates_constants_on_exported_program(self) -> None:
+        """
+        Tests that ExportedProgramPassManager can update constants
+        in the ExportedProgram using an ExportedProgram-aware pass.
+        """
+
+        class DoubleConstantsPass(ExportedProgramPassBase):
+            """Pass that doubles all constant tensor values in the ExportedProgram."""
+
+            def call(self, ep: ExportedProgram) -> ExportedProgramPassResult:
+                modified = False
+                for key, const in ep.constants.items():
+                    if isinstance(const, torch.Tensor):
+                        ep.constants[key] = const * 2
+                        modified = True
+                return ExportedProgramPassResult(ep, modified)
+
+        class ModuleWithConstant(torch.nn.Module):
+            def __init__(self) -> None:
+                super().__init__()
+                self.weight = torch.ones(3)
+
+            def forward(self, x: torch.Tensor) -> torch.Tensor:
+                return x + self.weight
+
+        module = ModuleWithConstant()
+        exported_program = to_edge(
+            torch.export.export(module, (torch.randn(3),))
+        ).exported_program()
+
+        # Verify there are constants in the ExportedProgram
+        self.assertGreater(
+            len(exported_program.constants), 0, "Expected constants in ExportedProgram"
+        )
+
+        # Store original constant values
+        original_values = {
+            key: const.clone()
+            for key, const in exported_program.constants.items()
+            if isinstance(const, torch.Tensor)
+        }
+
+        pm = ExportedProgramPassManager(passes=[DoubleConstantsPass()])
+        result = pm(exported_program)
+
+        self.assertIsInstance(result, ExportedProgramPassResult)
+        self.assertTrue(result.modified)
+
+        # Verify constants were doubled
+        for key, original_const in original_values.items():
+            new_const = result.exported_program.constants[key]
+            self.assertTrue(
+                torch.allclose(new_const, original_const * 2),
+                f"Constant {key} was not doubled correctly",
+            )
+
+    def test_adds_constant_to_exported_program(self) -> None:
+        """
+        Tests that ExportedProgramPassManager can add a new constant
+        to the ExportedProgram, including updating the graph and input specs.
+        """
+
+        class AddConstantPass(ExportedProgramPassBase):
+            """Pass that adds a new constant tensor to the ExportedProgram."""
+
+            def call(self, ep: ExportedProgram) -> ExportedProgramPassResult:
+                graph = ep.graph_module.graph
+                sig = ep.graph_signature
+
+                # Find the first user input to insert before it
+                placeholders = graph.find_nodes(op="placeholder")
+                assert len(placeholders) == 1
+                user_input_node = placeholders[0]
+
+                # Create a new constant tensor
+                new_constant_name = "_test_added_constant"
+                new_constant_tensor = torch.tensor([1.0, 2.0, 3.0])
+
+                # Add placeholder node for the new constant
+                with graph.inserting_before(user_input_node):
+                    new_placeholder = graph.placeholder(new_constant_name)
+                    # Set up meta for the new placeholder
+                    new_placeholder.meta["val"] = new_constant_tensor
+
+                # Add the constant to the constants dict
+                ep.constants[new_constant_name] = new_constant_tensor
+
+                # Update input specs to include the new constant
+                new_input_spec = InputSpec(
+                    kind=InputKind.CONSTANT_TENSOR,
+                    arg=TensorArgument(name=new_placeholder.name),
+                    target=new_constant_name,
+                    persistent=False,
+                )
+                sig.input_specs = (new_input_spec, sig.input_specs[0])
+
+                return ExportedProgramPassResult(ep, modified=True)
+
+        class IdentityModule(torch.nn.Module):
+            def __init__(self) -> None:
+                super().__init__()
+
+            def forward(self, x: torch.Tensor) -> torch.Tensor:
+                return x
+
+        exported_program = to_edge(
+            torch.export.export(IdentityModule(), (torch.randn(3),))
+        ).exported_program()
+        assert len(exported_program.constants) == 0
+        assert len(exported_program.graph_signature.input_specs) == 1
+
+        pm = ExportedProgramPassManager(passes=[AddConstantPass()])
+        result = pm(exported_program)
+
+        self.assertIsInstance(result, ExportedProgramPassResult)
+        self.assertTrue(result.modified)
+
+        # Verify the new constant was added to constants dict
+        self.assertEqual(len(result.exported_program.constants), 1)
+        self.assertIn("_test_added_constant", result.exported_program.constants)
+        self.assertTrue(
+            torch.allclose(
+                result.exported_program.constants["_test_added_constant"],
+                torch.tensor([1.0, 2.0, 3.0]),
+            )
+        )
+
+        # Verify input_specs was updated
+        self.assertEqual(
+            len(result.exported_program.graph_signature.input_specs),
+            2,
+        )
+
+        # Verify the new placeholder exists in the graph
+        placeholder_names = [
+            node.target
+            for node in result.exported_program.graph_module.graph.find_nodes(
+                op="placeholder"
+            )
+        ]
+        self.assertTrue(len(placeholder_names) == 2)
+
+        # Verify the new input spec has the correct kind
+        new_spec = None
+        for spec in result.exported_program.graph_signature.input_specs:
+            if spec.target == "_test_added_constant":
+                new_spec = spec
+                break
+        self.assertIsNotNone(new_spec)
+        self.assertEqual(new_spec.kind, InputKind.CONSTANT_TENSOR)
+
+    def test_invalid_pass_creates_call_method(self) -> None:
+        """
+        Tests that ExportedProgramPassManager detects invalid passes
+        that introduce call_method nodes.
+        """
+
+        def introduce_call_method(gm: torch.fx.GraphModule) -> PassResult:
+            node = list(gm.graph.nodes)[-2]
+            with gm.graph.inserting_after(node):
+                gm.graph.call_method("torch.ops.relu", (torch.randn(2),))
+            return PassResult(gm, True)
+
+        def f(x: torch.Tensor) -> torch.Tensor:
+            y = torch.add(x, x)
+            return y
+
+        exported_program = (
+            exir.capture(f, (torch.randn(10),), exir.CaptureConfig())
+            .to_edge()
+            .exported_program
+        )
+
+        pm = ExportedProgramPassManager(
+            passes=[introduce_call_method], run_checks_after_each_pass=True
+        )
+
+        with self.assertRaisesRegex(Exception, "call_method"):
+            pm(exported_program)

From 2c9c9dda6eaf3ad764b2dc260a503efc01526eef Mon Sep 17 00:00:00 2001
From: Usamah <usamah.zaheer@arm.com>
Date: Wed, 27 May 2026 10:43:09 +0100
Subject: [PATCH 033/103] Arm backend: Enable Swin2SR TOSA ref tests (#19771)

Summary:
- Enable Swin2SR FP and INT TOSA pipelines to run through the reference
model.
- Keep quantized VGF runtime execution Linux-only until Darwin VKML
validation is available.
- Record current Swin2SR partition boundaries and track delegation gaps
in MLETORCH-2163.

Test Plan:
- lintrunner on test_swin2sr_arm.py
- backends/arm/scripts/pre-push

cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils
@Sebastian-Larsson @robell @rascani

Signed-off-by: Usamah Zaheer <usamah.zaheer@arm.com>
---
 backends/arm/test/models/test_swin2sr_arm.py | 41 +++++++++++++-------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/backends/arm/test/models/test_swin2sr_arm.py b/backends/arm/test/models/test_swin2sr_arm.py
index 6bf9b2a18d5..e4fc6f07950 100644
--- a/backends/arm/test/models/test_swin2sr_arm.py
+++ b/backends/arm/test/models/test_swin2sr_arm.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import sys
 from typing import Tuple
 
 import torch
@@ -17,7 +18,7 @@
 
 input_t = Tuple[torch.Tensor]
 
-exir_ops = [
+ops_expected_absent_after_lowering = [
     "executorch_exir_dialects_edge__ops_aten_add_Tensor",
     "executorch_exir_dialects_edge__ops_aten_convolution_default",
     "executorch_exir_dialects_edge__ops_aten_layer_norm_default",
@@ -27,6 +28,21 @@
     "executorch_exir_dialects_edge__ops_aten_softmax_int",
 ]
 
+# TODO/MLETORCH-2163: Investigate Swin2SR delegation gaps around index/view
+# in FP and Q/DQ, clamp, and expand_copy in INT.
+swin2sr_fp_lowered_outer_graph_ops = {
+    "torch.ops.higher_order.executorch_call_delegate": 2,
+    "executorch_exir_dialects_edge__ops_aten_index_Tensor": 2,
+    "executorch_exir_dialects_edge__ops_aten_view_copy_default": 2,
+}
+swin2sr_int_lowered_outer_graph_ops = {
+    "torch.ops.higher_order.executorch_call_delegate": 3,
+    "executorch_exir_dialects_edge__ops_aten_clamp_default": 4,
+    "executorch_exir_dialects_edge__ops_aten_expand_copy_default": 4,
+    "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 5,
+    "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 6,
+}
+
 
 class TinySwin2SR(torch.nn.Module):
     def __init__(self):
@@ -62,12 +78,10 @@ def test_swin2sr_tosa_FP():
         model,
         model_inputs,
         aten_op=[],
-        exir_op=exir_ops,
+        exir_op=ops_expected_absent_after_lowering,
         use_to_edge_transform_and_lower=True,
     )
-    pipeline.pop_stage("check_count.exir")
-    # TODO: MLETORCH-2134 re-enable once Swin2SR runs on the TOSA ref model.
-    pipeline.pop_stage("run_method_and_compare_outputs")
+    pipeline.change_args("check_count.exir", swin2sr_fp_lowered_outer_graph_ops)
     pipeline.run()
 
 
@@ -77,12 +91,10 @@ def test_swin2sr_tosa_INT():
         model,
         model_inputs,
         aten_op=[],
-        exir_op=exir_ops,
+        exir_op=ops_expected_absent_after_lowering,
         use_to_edge_transform_and_lower=True,
     )
-    pipeline.pop_stage("check_count.exir")
-    # TODO: MLETORCH-2134 re-enable once Swin2SR runs on the TOSA ref model.
-    pipeline.pop_stage("run_method_and_compare_outputs")
+    pipeline.change_args("check_count.exir", swin2sr_int_lowered_outer_graph_ops)
     pipeline.run()
 
 
@@ -93,13 +105,12 @@ def test_swin2sr_vgf_quant():
         model,
         model_inputs,
         aten_op=[],
-        exir_op=exir_ops,
+        exir_op=ops_expected_absent_after_lowering,
         use_to_edge_transform_and_lower=True,
         quantize=True,
+        run_on_vulkan_runtime=sys.platform == "linux",
     )
-    pipeline.pop_stage("check_count.exir")
-    # TODO: MLETORCH-2134 re-enable once Swin2SR runs on the TOSA ref model.
-    pipeline.pop_stage("run_method_and_compare_outputs")
+    pipeline.change_args("check_count.exir", swin2sr_int_lowered_outer_graph_ops)
     pipeline.run()
 
 
@@ -110,9 +121,9 @@ def test_swin2sr_vgf_no_quant():
         model,
         model_inputs,
         aten_op=[],
-        exir_op=exir_ops,
+        exir_op=ops_expected_absent_after_lowering,
         use_to_edge_transform_and_lower=True,
         quantize=False,
     )
-    pipeline.pop_stage("check_count.exir")
+    pipeline.change_args("check_count.exir", swin2sr_fp_lowered_outer_graph_ops)
     pipeline.run()

From dd00d42d7d0a751ddbf99d72efee802c427c654b Mon Sep 17 00:00:00 2001
From: SaoirseARM <44364573+SaoirseARM@users.noreply.github.com>
Date: Wed, 27 May 2026 10:56:01 +0100
Subject: [PATCH 034/103] Arm backend: Fix nested control-flow partition checks
 (#19697)

- Updates so that the outer cond graph is picked up.
- Updates to nested quantization.
- Removes need for increased threshold.

Signed-off-by: Saoirse Stewart <saoirse.stewart@arm.com>
---
 backends/arm/_passes/arm_pass_utils.py        |  49 +-------
 .../arm/_passes/control_flow_const_inline.py  |   8 +-
 backends/arm/_passes/insert_rescales_pass.py  |   8 +-
 .../arm/_passes/scalars_to_attribute_pass.py  |   8 +-
 .../operator_support/control_flow_support.py  |  26 +++--
 backends/arm/operators/op_cond_if.py          |  19 +++-
 backends/arm/operators/op_while.py            |  19 +++-
 backends/arm/quantizer/arm_quantizer.py       | 105 ++++++++++++------
 backends/arm/test/ops/test_cond.py            |   2 -
 backends/arm/tosa/backend.py                  |  61 +++++++++-
 backends/arm/tosa/mapping.py                  |   1 +
 backends/arm/tosa/partitioner.py              |   8 +-
 12 files changed, 193 insertions(+), 121 deletions(-)

diff --git a/backends/arm/_passes/arm_pass_utils.py b/backends/arm/_passes/arm_pass_utils.py
index 000f92135eb..f66b17b9da2 100644
--- a/backends/arm/_passes/arm_pass_utils.py
+++ b/backends/arm/_passes/arm_pass_utils.py
@@ -9,7 +9,7 @@
 import operator
 import traceback
 from inspect import isclass
-from typing import cast, List, Optional, Sequence, Tuple
+from typing import cast, Optional, Sequence
 
 import torch
 import torch.fx
@@ -19,10 +19,6 @@
 from executorch.exir import ExportedProgram
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.dialects.edge._ops import EdgeOpOverload
-from executorch.exir.graph_module import (
-    _get_control_flow_submodules,
-    get_control_flow_submodules,
-)
 from executorch.exir.pass_base import NodeMetadata
 
 from torch._export.utils import (
@@ -36,7 +32,6 @@
 from torch._ops import OpOverload
 from torch._subclasses.fake_tensor import FakeTensor
 from torch.export.graph_signature import InputKind
-from torch.fx import GraphModule, Node
 
 
 def is_submodule_node(node: torch.fx.Node):
@@ -364,48 +359,6 @@ def set_node_arg(node: torch.fx.Node, i: int | str, value):
         raise RuntimeError("Invalid type")
 
 
-def is_nested_control_flow_graph(graph_module: GraphModule) -> bool:
-    """Returns True if graph_module is a nested control-flow graph."""
-
-    # Find all top-level control-flow submodules
-    top_cf = get_control_flow_submodules(graph_module)
-    # For each submodule, see if it itself has control-flow inside
-    for _, submod, _ in top_cf:
-        if get_control_flow_submodules(submod):
-            return True
-    return False
-
-
-def get_cond_while_submodules_nested(
-    graph_module: GraphModule,
-    apply_quantization: bool = False,
-) -> List[Tuple[str, GraphModule, Node]]:
-    """Recursively find cond/while_loop submodules in an GraphModule.
-
-    In nested control flow graphs, FX records the submodule functions
-    (true/false or cond/body) in reverse order compared to top-level graphs. We
-    must swap the indices when nested so that cond (first) and body/true_fn
-    (second) are consistently identified across all nesting levels.
-
-    """
-
-    # Determine arg indices based on nesting and whether only cond branch is needed
-    nested = is_nested_control_flow_graph(graph_module)
-    # cond: [true_fn, false_fn] or swapped if nested
-    cond_indices = [2, 1] if nested else [1, 2]
-    # while_loop: [cond_fn, body_fn] or swapped if nested
-    while_indices = [1, 0] if nested else [0, 1]
-    if apply_quantization:
-        # only keep the cond_fn for while_loop (first index) when quantizing.
-        while_indices = [while_indices[0]]
-    mapping = {
-        torch.ops.higher_order.cond: cond_indices,
-        torch.ops.higher_order.while_loop: while_indices,
-    }
-    # collect cond/while submodules (using mapping indices)
-    return _get_control_flow_submodules(graph_module, mapping)
-
-
 def to_2tuple(value):
     """Normalizes scalars, and 1-element sequences to a tuple of length 2."""
     if isinstance(value, int):
diff --git a/backends/arm/_passes/control_flow_const_inline.py b/backends/arm/_passes/control_flow_const_inline.py
index cc76e5d9957..177ad30754e 100644
--- a/backends/arm/_passes/control_flow_const_inline.py
+++ b/backends/arm/_passes/control_flow_const_inline.py
@@ -7,12 +7,10 @@
 
 import torch
 from executorch.backends.arm._passes.arm_pass import ArmPass
-from executorch.backends.arm._passes.arm_pass_utils import (
-    get_cond_while_submodules_nested,
-    is_submodule_node,
-)
+from executorch.backends.arm._passes.arm_pass_utils import is_submodule_node
 from executorch.backends.transforms.utils import is_get_attr_node
 from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.graph_module import get_cond_while_submodules
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx import GraphModule
 
@@ -37,7 +35,7 @@ class ControlFlowConstInlinePass(ArmPass):
 
     def _convert_getattr(self, graph_module):
         modified = False
-        for _, submodule, _ in get_cond_while_submodules_nested(graph_module):
+        for _, submodule, _ in get_cond_while_submodules(graph_module):
             for submodule_node in submodule.graph.nodes:
                 if submodule_node.target in self._targeted_ops:
                     self._convert_getattr(submodule)
diff --git a/backends/arm/_passes/insert_rescales_pass.py b/backends/arm/_passes/insert_rescales_pass.py
index 06c27005440..45374c12c3b 100644
--- a/backends/arm/_passes/insert_rescales_pass.py
+++ b/backends/arm/_passes/insert_rescales_pass.py
@@ -509,7 +509,13 @@ def _rescale_submodule_inputs(
             input_node = input_nodes[qargs_index]
             if len(input_node.users) == 0:
                 continue
-            if len(out_qparams_map := input_node.meta.get("output_qparams", {})) != 1:
+            out_qparams_map = input_node.meta.get("output_qparams", {})
+            if len(out_qparams_map) == 0:
+                # Nested control-flow submodules may also expose frozen captured
+                # values as placeholders. Those are not control-flow boundary
+                # inputs, so there is no qparam pair to bridge with a RESCALE.
+                continue
+            if len(out_qparams_map) != 1:
                 raise ValueError(
                     f"Expected submodule input {input_node} to have exactly one output qparam, got {out_qparams_map}"
                 )
diff --git a/backends/arm/_passes/scalars_to_attribute_pass.py b/backends/arm/_passes/scalars_to_attribute_pass.py
index 0473caf91e7..63a38b8cb2f 100644
--- a/backends/arm/_passes/scalars_to_attribute_pass.py
+++ b/backends/arm/_passes/scalars_to_attribute_pass.py
@@ -8,11 +8,9 @@
 
 import torch
 from executorch.backends.arm._passes import ArmPass
-from executorch.backends.arm._passes.arm_pass_utils import (
-    get_cond_while_submodules_nested,
-    get_first_fake_tensor,
-)
+from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
 from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
+from executorch.exir.graph_module import get_cond_while_submodules
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx import GraphModule, Node
 from torchao.quantization.pt2e.utils import get_new_attr_name_with_prefix
@@ -98,7 +96,7 @@ def handle_control_nodes(self, graph_module: GraphModule) -> None:
         """Apply scalar argument conversion on subgraphs of control-flow
         nodes.
         """
-        for _, submodule, _ in get_cond_while_submodules_nested(graph_module):
+        for _, submodule, _ in get_cond_while_submodules(graph_module):
             for submodule_node in submodule.graph.nodes:
                 self._convert_scalar_args(submodule, submodule_node)
 
diff --git a/backends/arm/operator_support/control_flow_support.py b/backends/arm/operator_support/control_flow_support.py
index b34ebeaece0..f5251357cd3 100644
--- a/backends/arm/operator_support/control_flow_support.py
+++ b/backends/arm/operator_support/control_flow_support.py
@@ -19,6 +19,13 @@
 from torch.fx.passes.operator_support import OperatorSupportBase
 
 
+def _owning_graph_module(node: fx.Node) -> fx.GraphModule:
+    graph_module = getattr(node.graph, "owning_module", None)
+    if not isinstance(graph_module, fx.GraphModule):
+        raise RuntimeError(f"Could not resolve owning GraphModule for node {node}")
+    return graph_module
+
+
 def _fully_partitioned(submodule: fx.GraphModule) -> bool:
     """Check that all nested control-flow ops within this submodule are also
     fully partitioned.
@@ -27,8 +34,8 @@ def _fully_partitioned(submodule: fx.GraphModule) -> bool:
 
     for submodule_node in submodule.graph.nodes:
         if submodule_node.target in ControlFlowOpSupported._targeted_ops:
-            if _submodules_fully_partitioned(submodule_node, submodule):
-                return True
+            if not _submodules_fully_partitioned(submodule_node, submodule):
+                return False
 
         if submodule_node.op != "call_function":
             continue
@@ -56,13 +63,18 @@ def _fully_partitioned(submodule: fx.GraphModule) -> bool:
     return True
 
 
-def _submodules_fully_partitioned(node: fx.Node, graph_module: fx.GraphModule) -> bool:
+def _submodules_fully_partitioned(
+    node: fx.Node, graph_module: fx.GraphModule | None = None
+) -> bool:
     """Returns whether the submodule arguments to a cond node were fully
     partitioned.
 
     Updates "val" meta of the submodules if they are.
 
     """
+    if graph_module is None:
+        graph_module = _owning_graph_module(node)
+
     match node.target:
         case torch.ops.higher_order.cond:
             submodule_args = node.args[1:3]
@@ -129,9 +141,7 @@ def is_node_supported(
                         node, f"Submodule had unsupported user {user}"
                     )
                     return False
-                if not _submodules_fully_partitioned(
-                    user, self.exported_program.graph_module
-                ):
+                if not _submodules_fully_partitioned(user):
                     self.reporter.report_reject(
                         node, "One submodule was not fully partitioned"
                     )
@@ -174,9 +184,7 @@ def is_node_supported(
                 )
                 return False
 
-            if not _submodules_fully_partitioned(
-                node, self.exported_program.graph_module
-            ):
+            if not _submodules_fully_partitioned(node):
                 self.reporter.report_reject(
                     node, "Submodule was not fully partitioned."
                 )
diff --git a/backends/arm/operators/op_cond_if.py b/backends/arm/operators/op_cond_if.py
index 05d38e2a1f0..513100c2b15 100644
--- a/backends/arm/operators/op_cond_if.py
+++ b/backends/arm/operators/op_cond_if.py
@@ -17,7 +17,11 @@
     validate_num_inputs,
     validate_valid_dtype,
 )
-from executorch.backends.arm.tosa.mapping import TosaArg  # type: ignore
+from executorch.backends.arm.tosa.mapping import (  # type: ignore
+    TOSA_CONTROL_FLOW_REGION_NAME_META,
+    TOSA_TENSOR_NAME_META,
+    TosaArg,
+)
 from torch.fx import Node
 
 
@@ -38,7 +42,12 @@ def define_node(
         validate_cf_extension(self.target, self.tosa_spec)
 
         attr = ts.TosaSerializerAttribute()
-        if_graph, else_graph = (cast(Node, arg).target for arg in node.args[1:3])
+        if_graph, else_graph = (
+            cast(Node, arg).meta.get(
+                TOSA_CONTROL_FLOW_REGION_NAME_META, str(cast(Node, arg).target)
+            )
+            for arg in node.args[1:3]
+        )
         attr.CondIfAttribute(if_graph, else_graph)
 
         self._serialize_operator(
@@ -47,7 +56,11 @@ def define_node(
             ts.Op.COND_IF,
             [
                 inputs[0].name,
-                *(subgraph_input.name for subgraph_input in inputs[-1].special),
+                *(
+                    subgraph_input.name
+                    + subgraph_input.meta.get(TOSA_TENSOR_NAME_META, "")
+                    for subgraph_input in inputs[-1].special
+                ),
             ],
             output.multiple_output_names,
             attr,
diff --git a/backends/arm/operators/op_while.py b/backends/arm/operators/op_while.py
index 2b6314d3454..58501dd3ba0 100644
--- a/backends/arm/operators/op_while.py
+++ b/backends/arm/operators/op_while.py
@@ -15,8 +15,14 @@
     validate_cf_extension,
     validate_num_inputs,
 )
-from executorch.backends.arm.tosa.mapping import map_dtype, TosaArg
+from executorch.backends.arm.tosa.mapping import (
+    map_dtype,
+    TOSA_CONTROL_FLOW_REGION_NAME_META,
+    TOSA_TENSOR_NAME_META,
+    TosaArg,
+)
 from executorch.backends.arm.tosa.utils import normalize_symint
+
 from torch.fx import Node
 
 
@@ -46,7 +52,12 @@ def define_node(
             )
 
         attr = ts.TosaSerializerAttribute()
-        cond_graph, body_graph = (str(cast(Node, arg).target) for arg in node.args[:2])
+        cond_graph, body_graph = (
+            cast(Node, arg).meta.get(
+                TOSA_CONTROL_FLOW_REGION_NAME_META, str(cast(Node, arg).target)
+            )
+            for arg in node.args[:2]
+        )
         attr.WhileLoopAttribute(cond_graph, body_graph)
 
         input_names: list[str] = []
@@ -55,7 +66,9 @@ def define_node(
                 raise ValueError(
                     f"{self.target}: Unsupported carried input type {type(loop_input)}."
                 )
-            input_names.append(loop_input.name)
+            input_names.append(
+                loop_input.name + loop_input.meta.get(TOSA_TENSOR_NAME_META, "")
+            )
 
         num_inputs = len(input_names)
         num_outputs = len(output.multiple_output_names)
diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py
index f1dfb5f1323..3508410509c 100644
--- a/backends/arm/quantizer/arm_quantizer.py
+++ b/backends/arm/quantizer/arm_quantizer.py
@@ -40,6 +40,10 @@
 from executorch.backends.cortex_m.quantizer.pattern_matcher import PatternMatcher
 
 from executorch.backends.cortex_m.quantizer_reporter import QuantizerReporter
+from executorch.exir.graph_module import (
+    _get_control_flow_submodules,
+    get_cond_while_submodules,
+)
 
 from torch._ops import OpOverload
 
@@ -52,10 +56,6 @@
 from executorch.backends.arm.common.arm_compile_spec import (
     ArmCompileSpec,
 )  # isort: skip
-from executorch.backends.arm._passes.arm_pass_utils import (
-    get_cond_while_submodules_nested,
-    is_submodule_node,
-)
 
 from executorch.backends.arm.quantizer.arm_quantizer_utils import (
     _get_int32_bias_qspec,
@@ -107,6 +107,29 @@
 logger = logging.getLogger(__name__)
 
 
+def get_cond_while_submodules_ao(
+    graph_module: GraphModule,
+    apply_quantization: bool = False,
+) -> list[tuple[str, GraphModule, Node]]:
+    """Return cond/while submodules for the current graph module.
+
+    Quantization handles ``while_loop`` body functions natively in torchao, so
+    only the ``while_loop`` cond function is processed explicitly there.
+
+    """
+
+    if not apply_quantization:
+        return get_cond_while_submodules(graph_module)
+
+    return _get_control_flow_submodules(
+        graph_module,
+        {
+            torch.ops.higher_order.cond: [1, 2],
+            torch.ops.higher_order.while_loop: [0],
+        },
+    )
+
+
 @functools.lru_cache
 def get_symmetric_quantization_config(
     is_per_channel: bool = True,
@@ -810,42 +833,56 @@ def _quantize_with_submodules(
         prepare_fn = prepare_qat_pt2e if is_qat else prepare_pt2e
 
         prepared = prepare_fn(model, self)
-        # Prepare conditional submodules (e.g., if/while bodies)
-        # prepare only cond branches and while_loop cond_fn
-        for name, submodule, _ in get_cond_while_submodules_nested(
-            prepared, apply_quantization=True
-        ):
-            prepared.set_submodule(name, prepare_fn(submodule, self), strict=True)
-            for submodule_node in submodule.graph.nodes:
-                if is_submodule_node(submodule_node):
-                    for nested_name, nested_sub, _ in get_cond_while_submodules_nested(
-                        submodule, apply_quantization=True
-                    ):
-                        prepared.set_submodule(
-                            nested_name, prepare_fn(nested_sub, self), strict=True
-                        )
+
+        def _prepare_control_flow_submodules(
+            source_graph_module: GraphModule, prefix: str = ""
+        ) -> None:
+            for name, submodule, _ in get_cond_while_submodules_ao(
+                source_graph_module, apply_quantization=True
+            ):
+                qualified_name = f"{prefix}.{name}" if prefix else name
+                prepared.set_submodule(
+                    qualified_name, prepare_fn(submodule, self), strict=True
+                )
+                _prepare_control_flow_submodules(submodule, qualified_name)
+
+        _prepare_control_flow_submodules(prepared)
 
         for inp in calibration_samples:
             prepared(*inp)
 
-        # Prepare conditional submodules (e.g., if/while bodies)
-        # convert only cond branches and while_loop cond_fn
-        for _, submodule, _ in get_cond_while_submodules_nested(
-            prepared, apply_quantization=True
+        def _convert_control_flow_submodule(
+            graph_module: GraphModule,
+        ) -> GraphModule:
+            converted_submodules: list[tuple[str, GraphModule]] = []
+            for name, submodule, _ in get_cond_while_submodules_ao(
+                graph_module, apply_quantization=True
+            ):
+                converted_submodules.append(
+                    (name, _convert_control_flow_submodule(submodule))
+                )
+            converted_graph_module = convert_pt2e(
+                graph_module, fold_quantize=fold_quantize
+            )
+            for name, converted_submodule in converted_submodules:
+                converted_graph_module.set_submodule(
+                    name, converted_submodule, strict=True
+                )
+            return converted_graph_module
+
+        converted_top_level_submodules: list[tuple[str, GraphModule]] = []
+        for name, submodule, _ in list(
+            get_cond_while_submodules_ao(prepared, apply_quantization=True)
         ):
-            converted = convert_pt2e(submodule, fold_quantize=fold_quantize)
-            for submodule_node in submodule.graph.nodes:
-                if is_submodule_node(submodule_node):
-                    for nested_name, nested_sub, _ in get_cond_while_submodules_nested(
-                        submodule, apply_quantization=True
-                    ):
-                        converted.set_submodule(
-                            nested_name,
-                            convert_pt2e(nested_sub, fold_quantize=fold_quantize),
-                            strict=True,
-                        )
+            converted_top_level_submodules.append(
+                (name, _convert_control_flow_submodule(submodule))
+            )
+
+        converted = convert_pt2e(prepared, fold_quantize=fold_quantize)
+        for name, converted_submodule in converted_top_level_submodules:
+            converted.set_submodule(name, converted_submodule, strict=True)
 
-        return convert_pt2e(prepared, fold_quantize=fold_quantize)
+        return converted
 
 
 class _TOSAQuantizerV1(Quantizer):
diff --git a/backends/arm/test/ops/test_cond.py b/backends/arm/test/ops/test_cond.py
index 8c6d9ef329c..6f489f0ab01 100644
--- a/backends/arm/test/ops/test_cond.py
+++ b/backends/arm/test/ops/test_cond.py
@@ -250,8 +250,6 @@ def test_cond_tosa_INT(case: Callable[[], tuple[torch.nn.Module, tuple]]):
         example_inputs,
         aten_op,
         tosa_extensions=["cf"],
-        frobenius_threshold=0.8,
-        cosine_threshold=0.8,  # MLETORCH-1808
     )
     _set_branch_calibration_samples(pipeline, module, example_inputs)
     # Make sure no cond ops are left after partitioning.
diff --git a/backends/arm/tosa/backend.py b/backends/arm/tosa/backend.py
index 6b864e284b1..b0cae15022d 100644
--- a/backends/arm/tosa/backend.py
+++ b/backends/arm/tosa/backend.py
@@ -23,9 +23,6 @@
 
 import tosa_serializer as ts
 
-from executorch.backends.arm._passes.arm_pass_utils import (
-    get_cond_while_submodules_nested,
-)
 from executorch.backends.arm.common.arm_compile_spec import ArmCompileSpec
 from executorch.backends.arm.common.debug import debug_fail, debug_tosa_dump
 from executorch.backends.arm.debug.schema import DebugHook
@@ -35,9 +32,13 @@
     process_placeholder,
 )
 from executorch.backends.arm.tosa.compile_spec import TosaCompileSpec
-from executorch.backends.arm.tosa.mapping import TOSA_TENSOR_NAME_META
+from executorch.backends.arm.tosa.mapping import (
+    TOSA_CONTROL_FLOW_REGION_NAME_META,
+    TOSA_TENSOR_NAME_META,
+)
 from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
 from executorch.exir.backend.compile_spec_schema import CompileSpec
+from executorch.exir.graph_module import get_cond_while_submodules
 from torch.export.exported_program import ExportedProgram
 from torch.fx import Graph, GraphModule, Node
 
@@ -45,6 +46,15 @@
 logger = logging.getLogger(__name__)
 
 
+def _qualify_control_flow_region_name(
+    parent_region_name: str | None, child_region_name: str
+) -> str:
+    """Return a globally unique TOSA region name for nested control flow."""
+    if parent_region_name is None:
+        return child_region_name
+    return f"{parent_region_name}__{child_region_name}"
+
+
 def _annotate_external_ids(ep_graph: Graph) -> Dict[str, int]:
     """Assign deterministic output IDs to leaf outputs.
 
@@ -325,6 +335,43 @@ def _preprocess_module(  # noqa: C901
             RuntimeError: If an FX node with an unsupported op kind is found.
 
         """
+
+        def _annotate_control_flow_region_names(
+            graph_module: GraphModule, parent_region_name: str | None
+        ) -> None:
+            for node in graph_module.graph.nodes:
+                if node.op != "call_function":
+                    continue
+
+                match node.target:
+                    case torch.ops.higher_order.cond:
+                        arg_indices = [1, 2]
+                    case torch.ops.higher_order.while_loop:
+                        arg_indices = [0, 1]
+                    case _:
+                        continue
+
+                for arg_index in arg_indices:
+                    submodule_node = node.args[arg_index]
+                    if not isinstance(submodule_node, Node):
+                        raise RuntimeError(
+                            f"Expected control flow submodule arg {arg_index} to be a Node."
+                        )
+                    if submodule_node.op != "get_attr":
+                        raise RuntimeError(
+                            f"Expected control flow submodule arg {arg_index} to be a get_attr node."
+                        )
+                    if not isinstance(submodule_node.target, str):
+                        raise RuntimeError(
+                            "Expected control flow submodule target to be a string."
+                        )
+
+                    submodule_node.meta[TOSA_CONTROL_FLOW_REGION_NAME_META] = (
+                        _qualify_control_flow_region_name(
+                            parent_region_name, submodule_node.target
+                        )
+                    )
+
         tosa_spec = compile_spec.tosa_spec
         node_to_id_map = _annotate_external_ids(graph_module.graph)
         artifact_path = compile_spec._get_intermediate_path()
@@ -348,6 +395,8 @@ def _preprocess_module(  # noqa: C901
         else:
             logger.debug("No re-sorting outputs (workaround) during TOSA lowering.")
 
+        _annotate_control_flow_region_names(graph_module, submodule_name)
+
         if submodule_name is not None:
             tosa_graph.startRegion(submodule_name)
             tosa_graph.currRegion.addBasicBlock(submodule_name)
@@ -396,7 +445,7 @@ def _preprocess_module(  # noqa: C901
                 raise
 
         # Recursively preprocess controlflow submodules.
-        for name, submodule, control_flow_node in get_cond_while_submodules_nested(
+        for name, submodule, control_flow_node in get_cond_while_submodules(
             graph_module
         ):
             TOSABackend._regularize_submodule(submodule, control_flow_node)
@@ -406,7 +455,7 @@ def _preprocess_module(  # noqa: C901
                 compile_spec,
                 tosa_graph,
                 debug_hook,
-                submodule_name=name,
+                submodule_name=_qualify_control_flow_region_name(submodule_name, name),
                 containing_graph_module=graph_module,
             )
 
diff --git a/backends/arm/tosa/mapping.py b/backends/arm/tosa/mapping.py
index b37c41a070b..0e91120c3b8 100644
--- a/backends/arm/tosa/mapping.py
+++ b/backends/arm/tosa/mapping.py
@@ -17,6 +17,7 @@
 import tosa_serializer as ts
 from executorch.backends.arm.tosa.specification import TosaSpecification
 
+TOSA_CONTROL_FLOW_REGION_NAME_META = "tosa_control_flow_region_name"
 TOSA_TENSOR_NAME_META = "tosa_tensor_name"
 
 UNSUPPORTED_DTYPES = (
diff --git a/backends/arm/tosa/partitioner.py b/backends/arm/tosa/partitioner.py
index bd900f4cc81..d93e212c314 100644
--- a/backends/arm/tosa/partitioner.py
+++ b/backends/arm/tosa/partitioner.py
@@ -21,10 +21,7 @@
 from typing import Callable, cast, List, Optional, Sequence, Tuple
 
 import torch
-from executorch.backends.arm._passes.arm_pass_utils import (
-    get_cond_while_submodules_nested,
-    get_first_fake_tensor,
-)
+from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
 from executorch.backends.arm._passes.convert_expand_copy_to_repeat import (
     calculate_multiples,
 )
@@ -43,6 +40,7 @@
 )
 from executorch.exir.backend.utils import tag_constant_data, WhyNoPartitionReporter
 from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.graph_module import get_cond_while_submodules
 from torch.export.exported_program import ExportedProgram
 from torch.fx import GraphModule
 from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner, Partition
@@ -400,7 +398,7 @@ def _tag_module(  # noqa
         tags: set[str] = set()
         if tag_iterator is None:
             tag_iterator = count(0)
-        for _, submodule, _ in get_cond_while_submodules_nested(module):
+        for _, submodule, _ in get_cond_while_submodules(module):
             submodule_tags = self._tag_module(
                 submodule, containing_program, reporter, tag_iterator
             )

From d83aa08ad3ea82902addd9736a6bbf311fa7fd26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Lindstr=C3=B6m?=
 <33344797+martinlsm@users.noreply.github.com>
Date: Wed, 27 May 2026 13:07:30 +0200
Subject: [PATCH 035/103] Arm backend: Reuse identical CONST_SHAPE nodes
 (#19770)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cache CONST_SHAPE nodes created by InsertConstShapesPass and reuse them
when a later view/repeat needs the same shape. This removes duplicate
shape constants.

This improvement is model dependent. Models with few repeated literal
shapes will not see any meaningful change, but some models can benefit
from it notably.

The table below shows the results of a local test lowering DeiT Tiny to
TOSA-FP. The lowering time reduced in this run, likely because passes
following InsertConstShapesPass had fewer nodes to iterate over.

| Metric         | Baseline | Optimized | Delta            |
| -------------- | -------- | --------- | ---------------- |
| Total ops      | 2106     | 1736      | -370 (-17.6%)    |
| CONST_SHAPE    | 466      | 96        | -370 (-79.4%)    |
| TOSA size      | 23.82 MB | 23.75 MB  | -71.6 KB (-0.3%) |
| Execution time | 118.7 s  | 78.4 s    | -40.3 s (-34.0%) |

Signed-off-by: Martin Lindström <Martin.Lindstroem@arm.com>
---
 backends/arm/_passes/insert_const_shapes.py | 22 ++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/backends/arm/_passes/insert_const_shapes.py b/backends/arm/_passes/insert_const_shapes.py
index b03394379d9..059731857b4 100644
--- a/backends/arm/_passes/insert_const_shapes.py
+++ b/backends/arm/_passes/insert_const_shapes.py
@@ -26,6 +26,10 @@ class InsertConstShapesPass(ArmPass):
         exir_ops.edge.aten.repeat.default,
     }
 
+    def __init__(self) -> None:
+        super().__init__()
+        self._const_shape_cache: dict[tuple[int, ...], Any] = {}
+
     @staticmethod
     def _is_shape_arg(arg: Any) -> bool:
         """Return True when `arg` looks like a literal shape list/tuple."""
@@ -46,13 +50,17 @@ def call_operator(self, op, args, kwargs, meta, updated: Optional[bool] = False)
                     # Insert a const node for the shape argument
                     if op == exir_ops.edge.aten.view_copy.default:
                         arg = meta.data["val"].shape
-                    const_node = super().call_shape_operator(
-                        exir_ops.backend.tosa.CONST_SHAPE.default,
-                        (arg,),
-                        {},
-                        meta,
-                        True,
-                    )
+                    shape = tuple(arg)
+                    const_node = self._const_shape_cache.get(shape)
+                    if const_node is None:
+                        const_node = super().call_shape_operator(
+                            exir_ops.backend.tosa.CONST_SHAPE.default,
+                            (arg,),
+                            {},
+                            meta,
+                            True,
+                        )
+                        self._const_shape_cache[shape] = const_node
                     new_args.append(const_node)
                     updated = True
                 else:

From 85dfa447a06990757de19b640a76e72d695ceb6a Mon Sep 17 00:00:00 2001
From: Martin Pavella <martin.pavella@nxp.com>
Date: Wed, 27 May 2026 14:58:48 +0200
Subject: [PATCH 036/103] NXP backend: Add `mean.dim` support with new Neutron
 flow. (#19740)

### Summary
Add `mean.dim` support with new Neutron flow.

### Test plan
Unit tests provided.

cc @robert-kalmar @JakeStevens @digantdesai @rascani
---
 backends/nxp/backend/edge_helper.py           |   2 +-
 .../max_pool2d_with_indices_converter.py      |   4 +-
 .../ops_converters/mean_dim_converter.py      | 113 ++++++---
 .../node_converter/test_mean_dim_converter.py | 217 +++++++++++++++++-
 backends/nxp/tests/ops_aliases.py             |   1 +
 5 files changed, 297 insertions(+), 40 deletions(-)

diff --git a/backends/nxp/backend/edge_helper.py b/backends/nxp/backend/edge_helper.py
index 957b673bb6a..1ea86f589ac 100644
--- a/backends/nxp/backend/edge_helper.py
+++ b/backends/nxp/backend/edge_helper.py
@@ -318,7 +318,7 @@ def is_no_op_on_neutron(node: Node, parameters_mapping: dict[str, Parameter]) ->
                         input_data = torch.rand(val.shape, dtype=val.dtype) * 10 - 5
                         args_with_random_data.append(input_data)
 
-                case list():
+                case list() if any(isinstance(a, Node) for a in arg):
                     # Lists of input nodes are not supported to keep the code simple. It is not crucial to support this
                     #  case as the affected operators are either not supported on Neutron, or are extremely unlikely to
                     #  be no-ops (e.g. GRU). One exception is `aten.cat`, which is explicitly supported above.
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py
index 975aaf57625..b7e761c45e6 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py
@@ -152,9 +152,7 @@ def _get_node_args(
         :return: Tuple of (kernel_size, stride, padding, dilation, ceil_mode).
         """
         kernel_size = node.args[1]
-        stride = node.args[
-            2
-        ]  # The default value is equal to the kernel_size, so it is never empty here.
+        stride = try_get_arg(node, 2) or kernel_size
         padding = try_get_arg(node, 3) or (0, 0)
         dilation = try_get_arg(node, 4) or (1, 1)
         ceil_mode = try_get_arg(node, 5) or False
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/mean_dim_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/mean_dim_converter.py
index c4b828df39f..4ba56a6b755 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/mean_dim_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/mean_dim_converter.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import torch
+
 from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT
 
 from executorch.backends.nxp.backend.ir.converter.conversion.translator import (
@@ -11,6 +12,7 @@
 )
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
+    is_not_qdq_node,
     NodeConverter,
 )
 from executorch.backends.nxp.backend.ir.converter.node_converters.shared.reduce_utils import (
@@ -21,10 +23,40 @@
 )
 from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from torch.fx import Node
+from torch.fx.passes.infra.partitioner import Partition
 from torch.nn import Parameter
 
 
 class MeanDimConverter(NodeConverter):
+
+    @classmethod
+    def supports_partitioning_result(
+        cls,
+        node: Node,
+        partition_list: list[Partition],
+        custom_delegation_options: CustomDelegationOptions,
+        neutron_target_spec: NeutronTargetSpec,
+        parameters_mapping: dict[str, Parameter],
+    ) -> bool:
+        if custom_delegation_options.use_new_flow_neutron_c:
+            dim, keepdim = MeanDimConverter._get_attrs(node)
+            input_shape = node.args[0].meta["val"].shape
+
+            is_alone_in_partition = cls.is_node_alone_in_partition(
+                node, partition_list, filter_fn=is_not_qdq_node
+            )
+
+            if (
+                is_alone_in_partition
+                and keepdim
+                and all(input_shape[d] == 1 for d in dim)
+            ):
+                # The operator is a no-op, so the Neutron Converter will skip it. If it's the only node in the
+                #  partition, the graph would end up empty.
+                return False
+
+        return True
+
     @staticmethod
     def _is_supported_on_target(
         node: Node,
@@ -32,34 +64,49 @@ def _is_supported_on_target(
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
-        keepdim = node.args[2] if len(node.args) >= 3 else False
-        rank = len(node.args[0].meta["val"].shape)
-        dim = [MeanDimConverter._to_pos_dim(d, rank) for d in node.args[1]]
+        if custom_delegation_options.use_new_flow_neutron_c:
+            # Requirements specified by the new Neutron flow documentation.
+
+            if not NodeConverter.uses_quantization_type_for_io(
+                node,
+                supported_types=[torch.int8, torch.uint8],
+                input_indices=[0],
+                output_indices=[0],
+            ):
+                return False
 
-        if rank != 4 or not keepdim:
-            # neutron-converter/src/OperatorC/GlobalAvgPoolPlugin.cpp#74-77
-            return False
+            return True
 
-        # The `mean.dim` gets converted to AveragePool by the NeutronConverter, so the channels must be a
-        #  multiple of `num_macs`.
-        # neutron-converter/src/OperatorC/GlobalAvgPoolPlugin.cpp#59-85
-        num_macs = neutron_target_spec.get_num_macs()
-        channels_dim = 1 if node.meta[NXP_NODE_FORMAT].is_channels_first() else -1
-        if (node.meta["val"].shape[channels_dim] % num_macs) != 0:
-            return False
+        else:
+            # Requirements of the old Neutron flow.
+            rank = len(node.args[0].meta["val"].shape)
+            dim, keepdim = MeanDimConverter._get_attrs(node)
+            dim = [MeanDimConverter._to_pos_dim(d, rank) for d in dim]
 
-        # Neutron only supports reduction over the spatial dimensions H, W.
-        if node.meta[NXP_NODE_FORMAT].is_channels_first():
-            # The input is NCHW. H and W are at indices 2 and 3.
-            if dim not in [[2, 3], [3, 2]]:
+            if rank != 4 or not keepdim:
+                # neutron-converter/src/OperatorC/GlobalAvgPoolPlugin.cpp#74-77
                 return False
-        else:
-            # The input is formatless. It can be considered as NHWC, as this is the way Neutron will look at
-            #  the dimensions. So H and W are the middle dimensions.
-            if dim not in [[1, 2], [2, 1]]:
+
+            # The `mean.dim` gets converted to AveragePool by the NeutronConverter, so the channels must be a
+            #  multiple of `num_macs`.
+            # neutron-converter/src/OperatorC/GlobalAvgPoolPlugin.cpp#59-85
+            num_macs = neutron_target_spec.get_num_macs()
+            channels_dim = 1 if node.meta[NXP_NODE_FORMAT].is_channels_first() else -1
+            if (node.meta["val"].shape[channels_dim] % num_macs) != 0:
                 return False
 
-        return True
+            # Neutron only supports reduction over the spatial dimensions H, W.
+            if node.meta[NXP_NODE_FORMAT].is_channels_first():
+                # The input is NCHW. H and W are at indices 2 and 3.
+                if dim not in [[2, 3], [3, 2]]:
+                    return False
+            else:
+                # The input is formatless. It can be considered as NHWC, as this is the way Neutron will look at
+                #  the dimensions. So H and W are the middle dimensions.
+                if dim not in [[1, 2], [2, 1]]:
+                    return False
+
+            return True
 
     @staticmethod
     def _is_supported_in_IR(
@@ -91,15 +138,29 @@ def _normalize_and_to_channel_last_dim(dim: list[int], rank: int) -> list[int]:
         perm = create_channels_last_to_channels_first_permutation(rank, True)
         dim = [perm[d] for d in dim]
 
+        # noinspection PyTypeChecker
         return dim
 
-    # Mean Dim Node format: (Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None)
+    @staticmethod
+    def _get_attrs(node: Node) -> tuple[list[int], bool]:
+        dim = node.args[1]
+        keepdim = node.args[2] if len(node.args) >= 3 else False
+        return dim, keepdim
+
     def convert(self, node: Node):
-        """Convert 'mean.dim' operator to TFLite 'Mean'."""
+        """Convert the 'mean.dim' operator to NeutronIR 'Mean'.
+        The ExecuTorch schema is:
+            mean.dim(
+                Tensor self,
+                int[1]? dim,
+                bool keepdim=False,
+                *,
+                ScalarType? dtype=None
+            ) -> Tensor
+        """
         self.assert_convertible(node)
 
-        dim = node.args[1]
-        keepdim = node.args[2] if len(node.args) >= 3 else False
+        dim, keepdim = self._get_attrs(node)
 
         t_op = self._create_tflite_op_with_io_tensors(node)
         t_op.builtin_options = mean_options.Mean(keepdim)
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py
index 7c0a5e8ffcf..a265ca557c9 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py
@@ -1,15 +1,18 @@
-# Copyright 2025 NXP
+# Copyright 2025-2026 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
 import numpy as np
+
+# noinspection PyUnusedImports
 import pytest
 import torch
 
 from executorch.backends.nxp.backend.edge_program_converter import (
     EdgeProgramToIRConverter,
 )
+from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator
 from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
 from executorch.backends.nxp.tests.executors import (
     convert_run_compare,
@@ -17,10 +20,21 @@
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
+from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
+from executorch.backends.nxp.tests.model_output_comparator import (
+    AllCloseOutputComparator,
+)
 from executorch.backends.nxp.tests.models import MeanDimConvModule, MeanDimLinearModule
-from executorch.backends.nxp.tests.use_qat import *  # noqa F403
-from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+from executorch.backends.nxp.tests.ops_aliases import (
+    AddTensor,
+    ExecutorchDelegateCall,
+    GetItem,
+    MaxPool2DWithIndices,
+    MeanDim,
+)
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -39,6 +53,12 @@ def forward(self, x):
         return torch.mean(x, dim=self.dim, keepdim=self.keepdim)
 
 
+class MeanDimAddModule(MeanDimModule):
+    def forward(self, x):
+        x = super().forward(x)
+        return x + x
+
+
 @pytest.mark.parametrize(
     "input_shape, dim",
     [
@@ -60,7 +80,7 @@ def test_mean_dim_conv_quant_conversion(
         model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     ).exported_program()
     # Make sure the `mean.dim` was delegated.
-    assert not graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim])
+    assert not graph_contains_any_of_ops(ep.graph, [MeanDim])
     assert any("lowered_module" in n.name for n in ep.graph.nodes)
 
     # Capture generated model
@@ -109,7 +129,7 @@ def test_mean_dim_linear_unsupported_quant_conversion(
     nodes = list(edge_program.graph.nodes)
 
     # Last 2 dimensions are not used or keepdim is False, cannot be converted to MeanDim, node is not delegated
-    assert nodes[6].target.__name__ == "aten.mean.dim"
+    assert nodes[6].target == MeanDim
 
     # Capture generated model
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
@@ -157,7 +177,7 @@ def test_mean_dim_conv_unsupported_quant_conversion(
     nodes = list(edge_program.graph.nodes)
 
     # Last 2 dimensions are not used or keepdim is False, cannot be converted to MeanDim, node is not delegated
-    assert nodes[6].target.__name__ == "aten.mean.dim"
+    assert nodes[6].target == MeanDim
 
     # Capture generated model
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
@@ -197,7 +217,7 @@ def test_mean_dim__formatless__supported(
     ).exported_program()
 
     # Make sure the `mean.dim` was delegated.
-    assert not graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim])
+    assert not graph_contains_any_of_ops(ep.graph, [MeanDim])
     assert any("lowered_module" in n.name for n in ep.graph.nodes)
 
     # Capture generated model
@@ -230,7 +250,7 @@ def test_mean_dim__formatless__unsupported(input_shape, dim, use_qat, keepdim=Tr
     ).exported_program()
 
     # Make sure the `mean.dim` was NOT delegated.
-    assert graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim])
+    assert graph_contains_any_of_ops(ep.graph, [MeanDim])
     assert not any("lowered_module" in n.name for n in ep.graph.nodes)
 
 
@@ -252,7 +272,7 @@ def test_mean_dim__formatless__unsupported_channels(
     ).exported_program()
 
     # Make sure the `mean.dim` was NOT delegated.
-    assert graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim])
+    assert graph_contains_any_of_ops(ep.graph, [MeanDim])
     assert not any("lowered_module" in n.name for n in ep.graph.nodes)
 
 
@@ -277,4 +297,181 @@ def test_mean_dim__channels_first__unsupported_channels(
     ).exported_program()
 
     # Make sure the `mean.dim` was NOT delegated.
-    assert graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim])
+    assert graph_contains_any_of_ops(ep.graph, [MeanDim])
+
+
+class MaxPoolMeanDimModule(torch.nn.Module):
+    def __init__(self, dim, keepdim):
+        super().__init__()
+        self.dim, self.keepdim = dim, keepdim
+
+    def forward(self, x):
+        x = torch.max_pool2d(
+            x, kernel_size=1
+        )  # NoOp, but it enforces the channels first format.
+        return torch.mean(x, dim=self.dim, keepdim=self.keepdim)
+
+
+class TestMeanDimNewNeutronFlow:
+
+    # noinspection PyMethodMayBeStatic
+    def assert_delegated(
+        self,
+        model,
+        input_shape,
+        mocker,
+        use_qat=False,
+        atol=None,
+        expected_delegated_ops=None,
+    ):
+        if expected_delegated_ops is None:
+            expected_delegated_ops = {MeanDim: 1}
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops=expected_delegated_ops,
+            expected_non_delegated_ops={},
+        )
+
+        # Cover also negative values to thoroughly test the operator.
+        dataset_creator = RandomDatasetCreator(low=-2, high=2)
+
+        kwargs = {"atol": atol} if atol is not None else {}
+        output_comparator = AllCloseOutputComparator(**kwargs)
+
+        lower_run_compare(
+            model,
+            input_shape,
+            graph_verifier,
+            dataset_creator,
+            output_comparator,
+            use_qat=use_qat,
+            use_new_flow_neutron_c=True,  # Use the new flow.
+        )
+
+    # noinspection PyMethodMayBeStatic
+    def assert_not_delegated(self, model, input_shape):
+        delegated_ep = to_quantized_edge_program(
+            model, input_shape, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Make sure the `mean` was NOT delegated.
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
+        assert graph_contains_any_of_ops(delegated_ep.graph, [MeanDim])
+
+    @pytest.fixture(params=[True, False], ids=lambda keep_dim: f"keep_dim = {keep_dim}")
+    def keep_dim(self, request):
+        return request.param
+
+    def test__basic_nsys_inference__qat(self, mocker, use_qat, keep_dim):
+        input_shape = (23,)
+        model = MeanDimModule(0, keep_dim)
+        self.assert_delegated(model, input_shape, mocker, use_qat=use_qat)
+
+    @pytest.mark.parametrize(
+        "input_shape, dim",
+        [
+            pytest.param((5,), 0, id="1D, dim = 0."),
+            pytest.param((4, 2), 0, id="2D, dim = 0."),
+            pytest.param((4, 2), -1, id="2D, dim = -1."),
+            pytest.param((3, 1, 4), 2, id="3D, dim = 2."),
+            pytest.param((1, 3, 3, 7), 3, id="4D, dim = 3."),
+            pytest.param((3, 1, 4, 1, 5), -1, id="5D, dim = -1."),
+            pytest.param((3, 1, 4, 1, 5), 0, id="5D, dim = 0."),
+        ],
+    )
+    def test__single_dims(self, mocker, input_shape, dim, keep_dim):
+        model = MeanDimModule(dim, keep_dim)
+        # Relatively large error, but it is actually equal to the output scale, so it is a single bit error.
+        # TODO Replace with quantized dataset testing and `atol = 1`.
+        atol = 0.014
+        self.assert_delegated(model, input_shape, mocker, atol=atol)
+
+    @pytest.mark.parametrize(
+        "input_shape, dim",
+        [
+            pytest.param((4, 2), (-2,), id="2D, dim = (-2,)."),
+            pytest.param((2, 3, 4), (0, 2), id="3D, dim = (0, 2,)."),
+            pytest.param((1, 3, 3, 7), (2, -3), id="4D, dim = (2, -3)."),
+            pytest.param((3, 1, 4, 1, 5), (3, -5, -4), id="5D, dim = (3, -5 ,-4)."),
+        ],
+    )
+    def test__tuple_dims(self, mocker, input_shape, dim, keep_dim):
+        model = MeanDimModule(dim, keep_dim)
+        # Relatively large error, but it is actually equal to the output scale, so it is a single bit error.
+        # TODO Replace with quantized dataset testing and `atol = 1`.
+        atol = 0.015
+        self.assert_delegated(model, input_shape, mocker, atol=atol)
+
+    def test__compute_error(self, mocker, keep_dim):
+        input_shape, dim = (1, 3, 3, 7), -2
+        model = MeanDimModule(dim, keep_dim)
+
+        # Neutron produces an incorrect result in this case (maximum absolute error ~= 0.0607 (more than 2 * scale)).
+        # This test detects the failure to alert us once the bug is fixed. It should be fixed in Neutron 3.1.2.
+        with pytest.raises(AssertionError):
+            self.assert_delegated(model, input_shape, mocker, atol=0.06)
+
+    @pytest.mark.parametrize(
+        "input_shape, dim",
+        [
+            pytest.param((3, 1, 4), 1, id="3D, dim = 1."),
+            pytest.param((3, 1, 4, 1, 5), -2, id="5D, dim = -2."),
+        ],
+    )
+    def test__noop__only_node__not_delegated(self, input_shape, dim):
+        keep_dim = True  # Reduction over a dimension of size `1` with `keep_dim=True` is a no-op.
+        model = MeanDimModule(dim, keep_dim)
+        self.assert_not_delegated(model, input_shape)
+
+    @pytest.mark.parametrize(
+        "input_shape, dim",
+        [
+            pytest.param((3, 1, 4), 1, id="3D, dim = 1."),
+            pytest.param((3, 1, 4, 1, 5), -2, id="5D, dim = -2."),
+        ],
+    )
+    def test__noop__not_only_node__delegated(self, mocker, input_shape, dim):
+        keep_dim = True  # Reduction over a dimension of size `1` with `keep_dim=True` is a no-op.
+        model = MeanDimAddModule(dim, keep_dim)
+        self.assert_delegated(
+            model,
+            input_shape,
+            mocker,
+            expected_delegated_ops={MeanDim: 1, AddTensor: 1},
+        )
+
+    @pytest.mark.parametrize(
+        "input_shape, dim",
+        [
+            pytest.param((3, 1, 4), 1, id="3D, dim = 1."),
+            pytest.param((3, 1, 4, 1, 5), -2, id="5D, dim = -2."),
+        ],
+    )
+    def test__no_reduction__keepdim_false__delegated(self, mocker, input_shape, dim):
+        # These cases reduce over a dimension of size 1.
+        # When `keep_dim=True` the node is a noop, and it's not delegated (see `test__noop__only_node__not_delegated`),
+        # but with `keep_dim=False` it changes the shape so it's not a noop and is therefore delegated successfully.
+        keep_dim = False
+        model = MeanDimModule(dim, keep_dim)
+        self.assert_delegated(model, input_shape, mocker)
+
+    @pytest.mark.parametrize(
+        "input_shape, dim",
+        [((1, 7, 3, 3), 1)],
+        ids=lambda val: f"shape={val}" if isinstance(val, tuple) else f"dim={val}",
+    )
+    def test__channels_first(self, mocker, input_shape, dim, keep_dim):
+        # Just 1 test case to verify correct handling of the `dim`.
+        # Most cases fall into the single bit error case, and since this test uses 2 operators, the error accumulates
+        #  and the final error is larger. We cannot with 100% certainty say that the error is only caused by the single
+        #  bit errors and not related to the format. That's why only this 1 case with no errors is used.
+        model = MaxPoolMeanDimModule(dim, keep_dim)
+        self.assert_delegated(
+            model,
+            input_shape,
+            mocker,
+            expected_delegated_ops={MaxPool2DWithIndices: 1, GetItem: 1, MeanDim: 1},
+        )
diff --git a/backends/nxp/tests/ops_aliases.py b/backends/nxp/tests/ops_aliases.py
index 7f855dd63af..06eb9c84bd0 100644
--- a/backends/nxp/tests/ops_aliases.py
+++ b/backends/nxp/tests/ops_aliases.py
@@ -26,6 +26,7 @@
 HardTanh_ = exir_ops.edge.aten.hardtanh_.default
 LeakyRelu = exir_ops.edge.aten.leaky_relu.default
 MaxPool2DWithIndices = exir_ops.edge.aten.max_pool2d_with_indices.default
+MeanDim = exir_ops.edge.aten.mean.dim
 MulTensor = exir_ops.edge.aten.mul.Tensor
 QuantizePerChannel = exir_ops.edge.quantized_decomposed.quantize_per_channel.default
 QuantizePerTensor = exir_ops.edge.quantized_decomposed.quantize_per_tensor.default

From 4741f3ae35aaaa16a8ac750726ccf24f4850aa96 Mon Sep 17 00:00:00 2001
From: Sebastian Larsson <38941629+Sebastian-Larsson@users.noreply.github.com>
Date: Wed, 27 May 2026 15:18:59 +0200
Subject: [PATCH 037/103] Arm backend: Relocate not-equal decomposition after
 rank matching (#19769)

Move DecomposeNotEqualPass to the post scalar-removal node
transformation block. This removes its special placement between
ReplaceScalarWithTensorByProfilePass and MatchArgRanksPass.

Also match ranks for ne.Tensor before decomposition so scalar not-equal
does not produce mismatched TOSA EQUAL operands.

Signed-off-by: Sebastian Larsson <sebastian.larsson@arm.com>
---
 backends/arm/_passes/arm_pass_manager.py     | 4 +---
 backends/arm/_passes/match_arg_ranks_pass.py | 1 +
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
index 5a135696463..8a02f7393de 100644
--- a/backends/arm/_passes/arm_pass_manager.py
+++ b/backends/arm/_passes/arm_pass_manager.py
@@ -481,9 +481,6 @@ def _tosa_pipeline(
                 ConvertFullLikeToFullPass(),
                 MatchArgDtypePass(),
                 UnsqueezeScalarPlaceholdersPass(exported_program),
-                # TODO: Move DecomposeNotEqualPass to before or after this block of
-                # passes. Ticket: MLETORCH-1540
-                DecomposeNotEqualPass(),
                 MatchArgRanksPass(exported_program),
             ]
         )
@@ -491,6 +488,7 @@ def _tosa_pipeline(
         # Node transformation passes (post scalar-removal)
         self.add_passes(
             [
+                DecomposeNotEqualPass(),
                 NormalizeIndexPutNoneIndicesPass(),
                 NormalizeIndexPutBoolIndexTensorPass(),
                 RewriteIndexPutPass(),
diff --git a/backends/arm/_passes/match_arg_ranks_pass.py b/backends/arm/_passes/match_arg_ranks_pass.py
index 905286e39b0..199eafe0cfb 100644
--- a/backends/arm/_passes/match_arg_ranks_pass.py
+++ b/backends/arm/_passes/match_arg_ranks_pass.py
@@ -57,6 +57,7 @@ def __init__(self, exported_program: ExportedProgram, *args, **kwargs) -> None:
         exir_ops.edge.aten.ge.Tensor,
         exir_ops.edge.aten.lt.Tensor,
         exir_ops.edge.aten.le.Tensor,
+        exir_ops.edge.aten.ne.Tensor,
         exir_ops.edge.aten.pow.Tensor_Tensor,
         exir_ops.edge.aten.remainder.Tensor,
         exir_ops.edge.aten.where.self,

From 628246784dd2efb71ebdbae4157d87da442c39f4 Mon Sep 17 00:00:00 2001
From: Sicheng Stephen Jia <ssjia@meta.com>
Date: Wed, 27 May 2026 13:50:37 -0400
Subject: [PATCH 038/103] [executorch][qualcomm] Add op_fallback.py to
 model_sharding_py BUCK target

Differential Revision: D106429294

Pull Request resolved: https://github.com/pytorch/executorch/pull/19809
---
 extension/llm/custom_ops/targets.bzl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/extension/llm/custom_ops/targets.bzl b/extension/llm/custom_ops/targets.bzl
index 6746d7ab877..1d1feeda0c1 100644
--- a/extension/llm/custom_ops/targets.bzl
+++ b/extension/llm/custom_ops/targets.bzl
@@ -141,6 +141,7 @@ def define_common_targets():
         name = "model_sharding_py",
         srcs = [
             "model_sharding.py",
+            "op_fallback.py",
         ],
         visibility = ["PUBLIC"],
         deps = [

From 2f229597f743105a432b91e086ad219d0f29a728 Mon Sep 17 00:00:00 2001
From: Siddartha Pothapragada <sidart@meta.com>
Date: Wed, 27 May 2026 11:05:20 -0700
Subject: [PATCH 039/103] Remove debug exit(0) blocking test_llama_stories_110m
 (#19814)

Summary:
Remove debug `print` and `exit(0)` statements accidentally left in
`TestExampleLLMScript.test_llama_stories_110m` that cause the test to
exit before executing any assertions.

These lines were introduced in commit 508cbf07be38 (PR #19146) and
prevent the `test-static-llama-qnn-linux (stories_110m)` CI job from
running actual model validation, blocking viable/strict progression.

Differential Revision: D106533426
---
 backends/qualcomm/tests/test_qnn_delegate.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
index ee6678fa499..08f5c1f67de 100644
--- a/backends/qualcomm/tests/test_qnn_delegate.py
+++ b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -7733,8 +7733,6 @@ def test_llama_stories_110m(self):
         if self.use_fp16:
             cmds.append("--use_fp16")
         self.add_default_cmds(cmds)
-        print(" ".join(cmds))
-        exit(0)
         golden_start_with = "Once upon a time,"
         p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
         with Listener((self.ip, self.port)) as listener:

From 52892b2ecda1446e21c585d297c4a653376df080 Mon Sep 17 00:00:00 2001
From: Hansong Zhang <107070759+kirklandsign@users.noreply.github.com>
Date: Wed, 27 May 2026 12:25:07 -0700
Subject: [PATCH 040/103] Convert ExecuTorchRuntime,
 ExecutorchRuntimeException, EValue from Java to Kotlin (#19788)

Differential Revision: D106413930

Pull Request resolved: https://github.com/pytorch/executorch/pull/19788
---
 extension/android/BUCK                        |   6 +-
 .../executorch/ModuleInstrumentationTest.kt   |   2 +-
 .../java/org/pytorch/executorch/EValue.java   | 253 ------------------
 .../java/org/pytorch/executorch/EValue.kt     | 209 +++++++++++++++
 .../pytorch/executorch/ExecuTorchRuntime.java |  68 -----
 .../pytorch/executorch/ExecuTorchRuntime.kt   |  62 +++++
 .../ExecutorchRuntimeException.java           | 198 --------------
 .../executorch/ExecutorchRuntimeException.kt  | 133 +++++++++
 8 files changed, 408 insertions(+), 523 deletions(-)
 delete mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/EValue.java
 create mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/EValue.kt
 delete mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecuTorchRuntime.java
 create mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecuTorchRuntime.kt
 delete mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.java
 create mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.kt

diff --git a/extension/android/BUCK b/extension/android/BUCK
index bae5579b2a8..1f1b611ff01 100644
--- a/extension/android/BUCK
+++ b/extension/android/BUCK
@@ -9,9 +9,9 @@ non_fbcode_target(_kind = fb_android_library,
     required_for_source_only_abi = True,
     srcs = [
         "executorch_android/src/main/java/org/pytorch/executorch/DType.kt",
-        "executorch_android/src/main/java/org/pytorch/executorch/EValue.java",
-        "executorch_android/src/main/java/org/pytorch/executorch/ExecuTorchRuntime.java",
-        "executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.java",
+        "executorch_android/src/main/java/org/pytorch/executorch/EValue.kt",
+        "executorch_android/src/main/java/org/pytorch/executorch/ExecuTorchRuntime.kt",
+        "executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.kt",
         "executorch_android/src/main/java/org/pytorch/executorch/MethodMetadata.kt",
         "executorch_android/src/main/java/org/pytorch/executorch/Module.java",
         "executorch_android/src/main/java/org/pytorch/executorch/Tensor.java",
diff --git a/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/ModuleInstrumentationTest.kt b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/ModuleInstrumentationTest.kt
index b2f10537c2f..1888466ffa6 100644
--- a/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/ModuleInstrumentationTest.kt
+++ b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/ModuleInstrumentationTest.kt
@@ -94,7 +94,7 @@ class ModuleInstrumentationTest {
           }
       Assert.assertEquals(
           ExecutorchRuntimeException.INVALID_ARGUMENT,
-          exception.getErrorCode(),
+          exception.errorCode,
       )
     } finally {
       module.destroy()
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/EValue.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/EValue.java
deleted file mode 100644
index e85efb291e7..00000000000
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/EValue.java
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.executorch;
-
-import com.facebook.jni.annotations.DoNotStrip;
-import java.nio.ByteBuffer;
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-import java.util.Locale;
-import org.pytorch.executorch.annotations.Experimental;
-
-/**
- * Java representation of an ExecuTorch value, which is implemented as tagged union that can be one
- * of the supported types: https://pytorch.org/docs/stable/jit.html#types .
- *
- * <p>Calling {@code toX} methods for inappropriate types will throw {@link IllegalStateException}.
- *
- * <p>{@code EValue} objects are constructed with {@code EValue.from(value)}, {@code
- * EValue.tupleFrom(value1, value2, ...)}, {@code EValue.listFrom(value1, value2, ...)}, or one of
- * the {@code dict} methods, depending on the key type.
- *
- * <p>Data is retrieved from {@code EValue} objects with the {@code toX()} methods. Note that {@code
- * str}-type EValues must be extracted with {@link #toStr()}, rather than {@link #toString()}.
- *
- * <p>{@code EValue} objects may retain references to objects passed into their constructors, and
- * may return references to their internal state from {@code toX()}.
- *
- * <p>Warning: These APIs are experimental and subject to change without notice
- */
-@Experimental
-@DoNotStrip
-public class EValue {
-  private static final int TYPE_CODE_NONE = 0;
-
-  private static final int TYPE_CODE_TENSOR = 1;
-  private static final int TYPE_CODE_STRING = 2;
-  private static final int TYPE_CODE_DOUBLE = 3;
-  private static final int TYPE_CODE_INT = 4;
-  private static final int TYPE_CODE_BOOL = 5;
-
-  private String[] TYPE_NAMES = {
-    "None", "Tensor", "String", "Double", "Int", "Bool",
-  };
-
-  @DoNotStrip private final int mTypeCode;
-  @DoNotStrip private Object mData;
-
-  @DoNotStrip
-  private EValue(int typeCode) {
-    this.mTypeCode = typeCode;
-  }
-
-  @DoNotStrip
-  public boolean isNone() {
-    return TYPE_CODE_NONE == this.mTypeCode;
-  }
-
-  @DoNotStrip
-  public boolean isTensor() {
-    return TYPE_CODE_TENSOR == this.mTypeCode;
-  }
-
-  @DoNotStrip
-  public boolean isBool() {
-    return TYPE_CODE_BOOL == this.mTypeCode;
-  }
-
-  @DoNotStrip
-  public boolean isInt() {
-    return TYPE_CODE_INT == this.mTypeCode;
-  }
-
-  @DoNotStrip
-  public boolean isDouble() {
-    return TYPE_CODE_DOUBLE == this.mTypeCode;
-  }
-
-  @DoNotStrip
-  public boolean isString() {
-    return TYPE_CODE_STRING == this.mTypeCode;
-  }
-
-  /** Creates a new {@code EValue} of type {@code Optional} that contains no value. */
-  @DoNotStrip
-  public static EValue optionalNone() {
-    return new EValue(TYPE_CODE_NONE);
-  }
-
-  /** Creates a new {@code EValue} of type {@code Tensor}. */
-  @DoNotStrip
-  public static EValue from(Tensor tensor) {
-    final EValue iv = new EValue(TYPE_CODE_TENSOR);
-    iv.mData = tensor;
-    return iv;
-  }
-
-  /** Creates a new {@code EValue} of type {@code bool}. */
-  @DoNotStrip
-  public static EValue from(boolean value) {
-    final EValue iv = new EValue(TYPE_CODE_BOOL);
-    iv.mData = value;
-    return iv;
-  }
-
-  /** Creates a new {@code EValue} of type {@code int}. */
-  @DoNotStrip
-  public static EValue from(long value) {
-    final EValue iv = new EValue(TYPE_CODE_INT);
-    iv.mData = value;
-    return iv;
-  }
-
-  /** Creates a new {@code EValue} of type {@code double}. */
-  @DoNotStrip
-  public static EValue from(double value) {
-    final EValue iv = new EValue(TYPE_CODE_DOUBLE);
-    iv.mData = value;
-    return iv;
-  }
-
-  /** Creates a new {@code EValue} of type {@code str}. */
-  @DoNotStrip
-  public static EValue from(String value) {
-    final EValue iv = new EValue(TYPE_CODE_STRING);
-    iv.mData = value;
-    return iv;
-  }
-
-  @DoNotStrip
-  public Tensor toTensor() {
-    preconditionType(TYPE_CODE_TENSOR, mTypeCode);
-    return (Tensor) mData;
-  }
-
-  @DoNotStrip
-  public boolean toBool() {
-    preconditionType(TYPE_CODE_BOOL, mTypeCode);
-    return (boolean) mData;
-  }
-
-  @DoNotStrip
-  public long toInt() {
-    preconditionType(TYPE_CODE_INT, mTypeCode);
-    return (long) mData;
-  }
-
-  @DoNotStrip
-  public double toDouble() {
-    preconditionType(TYPE_CODE_DOUBLE, mTypeCode);
-    return (double) mData;
-  }
-
-  @DoNotStrip
-  public String toStr() {
-    preconditionType(TYPE_CODE_STRING, mTypeCode);
-    return (String) mData;
-  }
-
-  private void preconditionType(int typeCodeExpected, int typeCode) {
-    if (typeCode != typeCodeExpected) {
-      throw new IllegalStateException(
-          String.format(
-              Locale.US,
-              "Expected EValue type %s, actual type %s",
-              getTypeName(typeCodeExpected),
-              getTypeName(typeCode)));
-    }
-  }
-
-  private String getTypeName(int typeCode) {
-    return typeCode >= 0 && typeCode < TYPE_NAMES.length ? TYPE_NAMES[typeCode] : "Unknown";
-  }
-
-  /**
-   * Serializes an {@code EValue} into a byte array. Note: This method is experimental and subject
-   * to change without notice.
-   *
-   * @return The serialized byte array.
-   */
-  public byte[] toByteArray() {
-    if (isNone()) {
-      return ByteBuffer.allocate(1).put((byte) TYPE_CODE_NONE).array();
-    } else if (isTensor()) {
-      Tensor t = toTensor();
-      byte[] tByteArray = t.toByteArray();
-      return ByteBuffer.allocate(1 + tByteArray.length)
-          .put((byte) TYPE_CODE_TENSOR)
-          .put(tByteArray)
-          .array();
-    } else if (isBool()) {
-      return ByteBuffer.allocate(2)
-          .put((byte) TYPE_CODE_BOOL)
-          .put((byte) (toBool() ? 1 : 0))
-          .array();
-    } else if (isInt()) {
-      return ByteBuffer.allocate(9).put((byte) TYPE_CODE_INT).putLong(toInt()).array();
-    } else if (isDouble()) {
-      return ByteBuffer.allocate(9).put((byte) TYPE_CODE_DOUBLE).putDouble(toDouble()).array();
-    } else if (isString()) {
-      byte[] strBytes = toStr().getBytes(StandardCharsets.UTF_8);
-      return ByteBuffer.allocate(1 + 4 + strBytes.length)
-          .put((byte) TYPE_CODE_STRING)
-          .putInt(strBytes.length)
-          .put(strBytes)
-          .array();
-    } else {
-      throw new IllegalArgumentException("Unknown EValue type code: " + mTypeCode);
-    }
-  }
-
-  /**
-   * Deserializes an {@code EValue} from a byte[]. Note: This method is experimental and subject to
-   * change without notice.
-   *
-   * @param bytes The byte array to deserialize from.
-   * @return The deserialized {@code EValue}.
-   */
-  public static EValue fromByteArray(byte[] bytes) {
-    ByteBuffer buffer = ByteBuffer.wrap(bytes);
-    if (buffer == null) {
-      throw new IllegalArgumentException("buffer cannot be null");
-    }
-    if (!buffer.hasRemaining()) {
-      throw new IllegalArgumentException("invalid buffer");
-    }
-    int typeCode = buffer.get();
-    switch (typeCode) {
-      case TYPE_CODE_NONE:
-        return new EValue(TYPE_CODE_NONE);
-      case TYPE_CODE_TENSOR:
-        byte[] bufferArray = buffer.array();
-        return from(Tensor.fromByteArray(Arrays.copyOfRange(bufferArray, 1, bufferArray.length)));
-      case TYPE_CODE_STRING:
-        int strLen = buffer.getInt();
-        byte[] strBytes = new byte[strLen];
-        buffer.get(strBytes);
-        return from(new String(strBytes, StandardCharsets.UTF_8));
-      case TYPE_CODE_DOUBLE:
-        return from(buffer.getDouble());
-      case TYPE_CODE_INT:
-        return from(buffer.getLong());
-      case TYPE_CODE_BOOL:
-        return from(buffer.get() != 0);
-    }
-    throw new IllegalArgumentException("invalid type code: " + typeCode);
-  }
-}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/EValue.kt b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/EValue.kt
new file mode 100644
index 00000000000..08c02d5c84a
--- /dev/null
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/EValue.kt
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.executorch
+
+import com.facebook.jni.annotations.DoNotStrip
+import java.nio.ByteBuffer
+import java.nio.charset.StandardCharsets
+import java.util.Arrays
+import java.util.Locale
+import org.pytorch.executorch.annotations.Experimental
+
+/**
+ * Java representation of an ExecuTorch value, which is implemented as tagged union that can be one
+ * of the supported types: https://pytorch.org/docs/stable/jit.html#types .
+ *
+ * Calling `toX` methods for inappropriate types will throw [IllegalStateException].
+ *
+ * `EValue` objects are constructed with `EValue.from(value)`, depending on the value type.
+ *
+ * Data is retrieved from `EValue` objects with the `toX()` methods. Note that `str`-type EValues
+ * must be extracted with [toStr], rather than [toString].
+ *
+ * `EValue` objects may retain references to objects passed into their constructors, and may return
+ * references to their internal state from `toX()`.
+ *
+ * Warning: These APIs are experimental and subject to change without notice
+ */
+@Experimental
+@DoNotStrip
+class EValue
+@DoNotStrip
+private constructor(
+    // JNI reads this field by name via GetFieldID("mTypeCode")
+    @JvmField @DoNotStrip val mTypeCode: Int
+) {
+
+  // JNI accesses this field by name via GetFieldID("mData"), requires @JvmField for direct field
+  // access
+  @JvmField @DoNotStrip var mData: Any? = null
+
+  private val typeNames = arrayOf("None", "Tensor", "String", "Double", "Int", "Bool")
+
+  val isNone: Boolean
+    @DoNotStrip get() = TYPE_CODE_NONE == mTypeCode
+
+  val isTensor: Boolean
+    @DoNotStrip get() = TYPE_CODE_TENSOR == mTypeCode
+
+  val isBool: Boolean
+    @DoNotStrip get() = TYPE_CODE_BOOL == mTypeCode
+
+  val isInt: Boolean
+    @DoNotStrip get() = TYPE_CODE_INT == mTypeCode
+
+  val isDouble: Boolean
+    @DoNotStrip get() = TYPE_CODE_DOUBLE == mTypeCode
+
+  val isString: Boolean
+    @DoNotStrip get() = TYPE_CODE_STRING == mTypeCode
+
+  @DoNotStrip
+  fun toTensor(): Tensor {
+    preconditionType(TYPE_CODE_TENSOR, mTypeCode)
+    return mData as? Tensor ?: throw IllegalStateException("EValue data is null or not a Tensor")
+  }
+
+  @DoNotStrip
+  fun toBool(): Boolean {
+    preconditionType(TYPE_CODE_BOOL, mTypeCode)
+    return mData as? Boolean ?: throw IllegalStateException("EValue data is null or not a Boolean")
+  }
+
+  @DoNotStrip
+  fun toInt(): Long {
+    preconditionType(TYPE_CODE_INT, mTypeCode)
+    return mData as? Long ?: throw IllegalStateException("EValue data is null or not a Long")
+  }
+
+  @DoNotStrip
+  fun toDouble(): Double {
+    preconditionType(TYPE_CODE_DOUBLE, mTypeCode)
+    return mData as? Double ?: throw IllegalStateException("EValue data is null or not a Double")
+  }
+
+  @DoNotStrip
+  fun toStr(): String {
+    preconditionType(TYPE_CODE_STRING, mTypeCode)
+    return mData as? String ?: throw IllegalStateException("EValue data is null or not a String")
+  }
+
+  private fun preconditionType(typeCodeExpected: Int, typeCode: Int) {
+    if (typeCode != typeCodeExpected) {
+      throw IllegalStateException(
+          String.format(
+              Locale.US,
+              "Expected EValue type %s, actual type %s",
+              getTypeName(typeCodeExpected),
+              getTypeName(typeCode),
+          )
+      )
+    }
+  }
+
+  private fun getTypeName(typeCode: Int): String =
+      if (typeCode in typeNames.indices) typeNames[typeCode] else "Unknown"
+
+  /**
+   * Serializes an `EValue` into a byte array. Note: This method is experimental and subject to
+   * change without notice.
+   */
+  fun toByteArray(): ByteArray =
+      when {
+        isNone -> ByteBuffer.allocate(1).put(TYPE_CODE_NONE.toByte()).array()
+        isTensor -> {
+          val tByteArray = toTensor().toByteArray()
+          ByteBuffer.allocate(1 + tByteArray.size)
+              .put(TYPE_CODE_TENSOR.toByte())
+              .put(tByteArray)
+              .array()
+        }
+        isBool ->
+            ByteBuffer.allocate(2)
+                .put(TYPE_CODE_BOOL.toByte())
+                .put(if (toBool()) 1.toByte() else 0.toByte())
+                .array()
+        isInt -> ByteBuffer.allocate(9).put(TYPE_CODE_INT.toByte()).putLong(toInt()).array()
+        isDouble ->
+            ByteBuffer.allocate(9).put(TYPE_CODE_DOUBLE.toByte()).putDouble(toDouble()).array()
+        isString -> {
+          val strBytes = toStr().toByteArray(StandardCharsets.UTF_8)
+          ByteBuffer.allocate(1 + 4 + strBytes.size)
+              .put(TYPE_CODE_STRING.toByte())
+              .putInt(strBytes.size)
+              .put(strBytes)
+              .array()
+        }
+        else -> throw IllegalArgumentException("Unknown EValue type code: $mTypeCode")
+      }
+
+  companion object {
+    private const val TYPE_CODE_NONE = 0
+    private const val TYPE_CODE_TENSOR = 1
+    private const val TYPE_CODE_STRING = 2
+    private const val TYPE_CODE_DOUBLE = 3
+    private const val TYPE_CODE_INT = 4
+    private const val TYPE_CODE_BOOL = 5
+
+    /** Creates a new `EValue` of type `Optional` that contains no value. */
+    @DoNotStrip @JvmStatic fun optionalNone(): EValue = EValue(TYPE_CODE_NONE)
+
+    /** Creates a new `EValue` of type `Tensor`. */
+    @DoNotStrip
+    @JvmStatic
+    fun from(tensor: Tensor): EValue = EValue(TYPE_CODE_TENSOR).also { it.mData = tensor }
+
+    /** Creates a new `EValue` of type `bool`. */
+    @DoNotStrip
+    @JvmStatic
+    fun from(value: Boolean): EValue = EValue(TYPE_CODE_BOOL).also { it.mData = value }
+
+    /** Creates a new `EValue` of type `int`. */
+    @DoNotStrip
+    @JvmStatic
+    fun from(value: Long): EValue = EValue(TYPE_CODE_INT).also { it.mData = value }
+
+    /** Creates a new `EValue` of type `double`. */
+    @DoNotStrip
+    @JvmStatic
+    fun from(value: Double): EValue = EValue(TYPE_CODE_DOUBLE).also { it.mData = value }
+
+    /** Creates a new `EValue` of type `str`. */
+    @DoNotStrip
+    @JvmStatic
+    fun from(value: String): EValue = EValue(TYPE_CODE_STRING).also { it.mData = value }
+
+    /**
+     * Deserializes an `EValue` from a byte[]. Note: This method is experimental and subject to
+     * change without notice.
+     */
+    @JvmStatic
+    fun fromByteArray(bytes: ByteArray): EValue {
+      val buffer = ByteBuffer.wrap(bytes)
+      require(buffer.hasRemaining()) { "invalid buffer" }
+      return when (val typeCode = buffer.get().toInt()) {
+        TYPE_CODE_NONE -> EValue(TYPE_CODE_NONE)
+        TYPE_CODE_TENSOR -> {
+          val bufferArray = buffer.array()
+          from(Tensor.fromByteArray(Arrays.copyOfRange(bufferArray, 1, bufferArray.size)))
+        }
+        TYPE_CODE_STRING -> {
+          val strLen = buffer.getInt()
+          val strBytes = ByteArray(strLen)
+          buffer.get(strBytes)
+          from(String(strBytes, StandardCharsets.UTF_8))
+        }
+        TYPE_CODE_DOUBLE -> from(buffer.getDouble())
+        TYPE_CODE_INT -> from(buffer.getLong())
+        TYPE_CODE_BOOL -> from(buffer.get().toInt() != 0)
+        else -> throw IllegalArgumentException("invalid type code: $typeCode")
+      }
+    }
+  }
+}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecuTorchRuntime.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecuTorchRuntime.java
deleted file mode 100644
index 6372da9a397..00000000000
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecuTorchRuntime.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.executorch;
-
-import com.facebook.jni.annotations.DoNotStrip;
-import com.facebook.soloader.nativeloader.NativeLoader;
-import com.facebook.soloader.nativeloader.SystemDelegate;
-import java.io.File;
-
-/** Class for entire ExecuTorch Runtime related functions. */
-public class ExecuTorchRuntime {
-
-  static {
-    if (!NativeLoader.isInitialized()) {
-      NativeLoader.init(new SystemDelegate());
-    }
-    // Loads libexecutorch.so from jniLibs
-    NativeLoader.loadLibrary("executorch");
-  }
-
-  private static final ExecuTorchRuntime sInstance = new ExecuTorchRuntime();
-
-  private ExecuTorchRuntime() {}
-
-  /** Get the runtime instance. */
-  public static ExecuTorchRuntime getRuntime() {
-    return sInstance;
-  }
-
-  /**
-   * Validates that the given path points to a readable file.
-   *
-   * @throws IllegalArgumentException if the path is null, does not exist, is not a file, or is not
-   *     readable.
-   */
-  public static void validateFilePath(String path, String description) {
-    if (path == null) {
-      throw new IllegalArgumentException("Cannot load " + description + ": path is null");
-    }
-    File file = new File(path);
-    if (!file.exists()) {
-      throw new IllegalArgumentException(
-          "Cannot load " + description + ": path does not exist: " + path);
-    }
-    if (!file.isFile()) {
-      throw new IllegalArgumentException(
-          "Cannot load " + description + ": path is not a file: " + path);
-    }
-    if (!file.canRead()) {
-      throw new IllegalArgumentException(
-          "Cannot load " + description + ": path is not readable: " + path);
-    }
-  }
-
-  /** Get all registered ops. */
-  @DoNotStrip
-  public static native String[] getRegisteredOps();
-
-  /** Get all registered backends. */
-  @DoNotStrip
-  public static native String[] getRegisteredBackends();
-}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecuTorchRuntime.kt b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecuTorchRuntime.kt
new file mode 100644
index 00000000000..52d846c5647
--- /dev/null
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecuTorchRuntime.kt
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.executorch
+
+import com.facebook.jni.annotations.DoNotStrip
+import com.facebook.soloader.nativeloader.NativeLoader
+import com.facebook.soloader.nativeloader.SystemDelegate
+import java.io.File
+
+/** Class for entire ExecuTorch Runtime related functions. */
+class ExecuTorchRuntime private constructor() {
+
+  companion object {
+    init {
+      if (!NativeLoader.isInitialized()) {
+        NativeLoader.init(SystemDelegate())
+      }
+      // Loads libexecutorch.so from jniLibs
+      NativeLoader.loadLibrary("executorch")
+    }
+
+    private val sInstance = ExecuTorchRuntime()
+
+    /** Get the runtime instance. */
+    @JvmStatic fun getRuntime(): ExecuTorchRuntime = sInstance
+
+    /**
+     * Validates that the given path points to a readable file.
+     *
+     * @throws IllegalArgumentException if the path is null, does not exist, is not a file, or is
+     *   not readable.
+     */
+    @JvmStatic
+    fun validateFilePath(path: String?, description: String) {
+      if (path == null) {
+        throw IllegalArgumentException("Cannot load $description: path is null")
+      }
+      val file = File(path)
+      if (!file.exists()) {
+        throw IllegalArgumentException("Cannot load $description: path does not exist: $path")
+      }
+      if (!file.isFile) {
+        throw IllegalArgumentException("Cannot load $description: path is not a file: $path")
+      }
+      if (!file.canRead()) {
+        throw IllegalArgumentException("Cannot load $description: path is not readable: $path")
+      }
+    }
+
+    /** Get all registered ops. */
+    @DoNotStrip @JvmStatic external fun getRegisteredOps(): Array<String>
+
+    /** Get all registered backends. */
+    @DoNotStrip @JvmStatic external fun getRegisteredBackends(): Array<String>
+  }
+}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.java
deleted file mode 100644
index 6f9d654be66..00000000000
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.java
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.executorch;
-
-import com.facebook.jni.annotations.DoNotStrip;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Base exception for all ExecuTorch runtime errors. Each instance carries an integer error code
- * corresponding to the native {@code runtime/core/error.h} values, accessible via {@link
- * #getErrorCode()}.
- */
-public class ExecutorchRuntimeException extends RuntimeException {
-  // Error code constants - keep in sync with runtime/core/error.h
-
-  // System errors
-
-  /** Operation completed successfully. */
-  public static final int OK = 0x00;
-
-  /** An unexpected internal error occurred in the runtime. */
-  public static final int INTERNAL = 0x01;
-
-  /** The runtime or method is in an invalid state for the requested operation. */
-  public static final int INVALID_STATE = 0x02;
-
-  /** The method has finished execution and has no more work to do. */
-  public static final int END_OF_METHOD = 0x03;
-
-  /** A required resource has already been loaded. */
-  public static final int ALREADY_LOADED = 0x04;
-
-  // Logical errors
-
-  /** The requested operation is not supported by this build or backend. */
-  public static final int NOT_SUPPORTED = 0x10;
-
-  /** The requested operation has not been implemented. */
-  public static final int NOT_IMPLEMENTED = 0x11;
-
-  /** One or more arguments passed to the operation are invalid. */
-  public static final int INVALID_ARGUMENT = 0x12;
-
-  /** A value or tensor has an unexpected type. */
-  public static final int INVALID_TYPE = 0x13;
-
-  /** A required operator kernel is not registered. */
-  public static final int OPERATOR_MISSING = 0x14;
-
-  /** The maximum number of registered kernels has been exceeded. */
-  public static final int REGISTRATION_EXCEEDING_MAX_KERNELS = 0x15;
-
-  /** A kernel with the same name is already registered. */
-  public static final int REGISTRATION_ALREADY_REGISTERED = 0x16;
-
-  // Resource errors
-
-  /** A required resource (file, tensor, program) was not found. */
-  public static final int NOT_FOUND = 0x20;
-
-  /** A memory allocation failed. */
-  public static final int MEMORY_ALLOCATION_FAILED = 0x21;
-
-  /** Access to a resource was denied or failed. */
-  public static final int ACCESS_FAILED = 0x22;
-
-  /** The loaded program is malformed or incompatible. */
-  public static final int INVALID_PROGRAM = 0x23;
-
-  /** External data referenced by the program is invalid or missing. */
-  public static final int INVALID_EXTERNAL_DATA = 0x24;
-
-  /** The system has run out of a required resource. */
-  public static final int OUT_OF_RESOURCES = 0x25;
-
-  // Delegate errors
-
-  /** A delegate reported an incompatible model or configuration. */
-  public static final int DELEGATE_INVALID_COMPATIBILITY = 0x30;
-
-  /** A delegate failed to allocate required memory. */
-  public static final int DELEGATE_MEMORY_ALLOCATION_FAILED = 0x31;
-
-  /** A delegate received an invalid or stale handle. */
-  public static final int DELEGATE_INVALID_HANDLE = 0x32;
-
-  private static final Map<Integer, String> ERROR_CODE_MESSAGES;
-
-  static {
-    Map<Integer, String> map = new HashMap<>();
-
-    // System errors
-    map.put(OK, "Operation successful");
-    map.put(INTERNAL, "Internal error");
-    map.put(INVALID_STATE, "Invalid state");
-    map.put(END_OF_METHOD, "End of method reached");
-    map.put(ALREADY_LOADED, "Already loaded");
-    // Logical errors
-    map.put(NOT_SUPPORTED, "Operation not supported");
-    map.put(NOT_IMPLEMENTED, "Operation not implemented");
-    map.put(INVALID_ARGUMENT, "Invalid argument");
-    map.put(INVALID_TYPE, "Invalid type");
-    map.put(OPERATOR_MISSING, "Operator missing");
-    map.put(REGISTRATION_EXCEEDING_MAX_KERNELS, "Exceeded max kernels");
-    map.put(REGISTRATION_ALREADY_REGISTERED, "Kernel already registered");
-    // Resource errors
-    map.put(NOT_FOUND, "Resource not found");
-    map.put(MEMORY_ALLOCATION_FAILED, "Memory allocation failed");
-    map.put(ACCESS_FAILED, "Access failed");
-    map.put(INVALID_PROGRAM, "Invalid program");
-    map.put(INVALID_EXTERNAL_DATA, "Invalid external data");
-    map.put(OUT_OF_RESOURCES, "Out of resources");
-    // Delegate errors
-    map.put(DELEGATE_INVALID_COMPATIBILITY, "Delegate invalid compatibility");
-    map.put(DELEGATE_MEMORY_ALLOCATION_FAILED, "Delegate memory allocation failed");
-    map.put(DELEGATE_INVALID_HANDLE, "Delegate invalid handle");
-    ERROR_CODE_MESSAGES = Collections.unmodifiableMap(map);
-  }
-
-  static class ErrorHelper {
-    static String formatMessage(int errorCode, String details) {
-      String baseMessage = ERROR_CODE_MESSAGES.get(errorCode);
-      if (baseMessage == null) {
-        baseMessage = "Unknown error code 0x" + Integer.toHexString(errorCode);
-      }
-
-      String safeDetails = details != null ? details : "No details provided";
-      return String.format(
-          "[ExecuTorch Error 0x%s] %s: %s",
-          Integer.toHexString(errorCode), baseMessage, safeDetails);
-    }
-
-    static String getDetailedErrorLogs() {
-      StringBuilder sb = new StringBuilder();
-      try {
-        String[] logEntries = Module.readLogBufferStatic(); // JNI call
-        if (logEntries != null && logEntries.length > 0) {
-          sb.append("\nDetailed logs:\n");
-          for (String entry : logEntries) {
-            sb.append(entry).append("\n");
-          }
-        }
-      } catch (Exception e) {
-        sb.append("Failed to retrieve detailed logs: ").append(e.getMessage());
-      }
-      return sb.toString();
-    }
-  }
-
-  private final int errorCode;
-
-  @DoNotStrip
-  public ExecutorchRuntimeException(int errorCode, String details) {
-    super(ErrorHelper.formatMessage(errorCode, details));
-    this.errorCode = errorCode;
-  }
-
-  public ExecutorchRuntimeException(int errorCode, String details, Throwable cause) {
-    super(ErrorHelper.formatMessage(errorCode, details), cause);
-    this.errorCode = errorCode;
-  }
-
-  /** Returns the numeric error code from {@code runtime/core/error.h}. */
-  public int getErrorCode() {
-    return errorCode;
-  }
-
-  /** Returns detailed log output captured from the native runtime, if available. */
-  public String getDetailedError() {
-    return ErrorHelper.getDetailedErrorLogs();
-  }
-
-  @DoNotStrip
-  public static class ExecutorchInvalidArgumentException extends ExecutorchRuntimeException {
-    @DoNotStrip
-    public ExecutorchInvalidArgumentException(String details) {
-      super(INVALID_ARGUMENT, details);
-    }
-  }
-
-  @DoNotStrip
-  public static RuntimeException makeExecutorchException(int errorCode, String details) {
-    switch (errorCode) {
-      case INVALID_ARGUMENT:
-        return new ExecutorchInvalidArgumentException(details);
-      default:
-        return new ExecutorchRuntimeException(errorCode, details);
-    }
-  }
-}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.kt b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.kt
new file mode 100644
index 00000000000..5ec3dd255d8
--- /dev/null
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.kt
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.executorch
+
+import com.facebook.jni.annotations.DoNotStrip
+
+/**
+ * Base exception for all ExecuTorch runtime errors. Each instance carries an integer error code
+ * corresponding to the native `runtime/core/error.h` values, accessible via [getErrorCode].
+ */
+open class ExecutorchRuntimeException
+@DoNotStrip
+constructor(
+    val errorCode: Int,
+    details: String?,
+) : RuntimeException(ErrorHelper.formatMessage(errorCode, details)) {
+
+  constructor(
+      errorCode: Int,
+      details: String?,
+      cause: Throwable?,
+  ) : this(errorCode, details) {
+    if (cause != null) initCause(cause)
+  }
+
+  /** Returns detailed log output captured from the native runtime, if available. */
+  fun getDetailedError(): String = ErrorHelper.getDetailedErrorLogs()
+
+  @DoNotStrip
+  class ExecutorchInvalidArgumentException @DoNotStrip constructor(details: String?) :
+      ExecutorchRuntimeException(INVALID_ARGUMENT, details)
+
+  private object ErrorHelper {
+    private val ERROR_CODE_MESSAGES: Map<Int, String> =
+        mapOf(
+            // System errors
+            OK to "Operation successful",
+            INTERNAL to "Internal error",
+            INVALID_STATE to "Invalid state",
+            END_OF_METHOD to "End of method reached",
+            ALREADY_LOADED to "Already loaded",
+            // Logical errors
+            NOT_SUPPORTED to "Operation not supported",
+            NOT_IMPLEMENTED to "Operation not implemented",
+            INVALID_ARGUMENT to "Invalid argument",
+            INVALID_TYPE to "Invalid type",
+            OPERATOR_MISSING to "Operator missing",
+            REGISTRATION_EXCEEDING_MAX_KERNELS to "Exceeded max kernels",
+            REGISTRATION_ALREADY_REGISTERED to "Kernel already registered",
+            // Resource errors
+            NOT_FOUND to "Resource not found",
+            MEMORY_ALLOCATION_FAILED to "Memory allocation failed",
+            ACCESS_FAILED to "Access failed",
+            INVALID_PROGRAM to "Invalid program",
+            INVALID_EXTERNAL_DATA to "Invalid external data",
+            OUT_OF_RESOURCES to "Out of resources",
+            // Delegate errors
+            DELEGATE_INVALID_COMPATIBILITY to "Delegate invalid compatibility",
+            DELEGATE_MEMORY_ALLOCATION_FAILED to "Delegate memory allocation failed",
+            DELEGATE_INVALID_HANDLE to "Delegate invalid handle",
+        )
+
+    fun formatMessage(errorCode: Int, details: String?): String {
+      val baseMessage =
+          ERROR_CODE_MESSAGES[errorCode] ?: "Unknown error code 0x${Integer.toHexString(errorCode)}"
+      val safeDetails = details ?: "No details provided"
+      return "[ExecuTorch Error 0x${Integer.toHexString(errorCode)}] $baseMessage: $safeDetails"
+    }
+
+    fun getDetailedErrorLogs(): String {
+      val sb = StringBuilder()
+      try {
+        val logEntries = Module.readLogBufferStatic() // JNI call
+        if (logEntries != null && logEntries.isNotEmpty()) {
+          sb.append("\nDetailed logs:\n")
+          for (entry in logEntries) {
+            sb.append(entry).append("\n")
+          }
+        }
+      } catch (e: Exception) {
+        sb.append("Failed to retrieve detailed logs: ").append(e.message)
+      }
+      return sb.toString()
+    }
+  }
+
+  companion object {
+    // Error code constants - keep in sync with runtime/core/error.h
+
+    // System errors
+    const val OK = 0x00
+    const val INTERNAL = 0x01
+    const val INVALID_STATE = 0x02
+    const val END_OF_METHOD = 0x03
+    const val ALREADY_LOADED = 0x04
+
+    // Logical errors
+    const val NOT_SUPPORTED = 0x10
+    const val NOT_IMPLEMENTED = 0x11
+    const val INVALID_ARGUMENT = 0x12
+    const val INVALID_TYPE = 0x13
+    const val OPERATOR_MISSING = 0x14
+    const val REGISTRATION_EXCEEDING_MAX_KERNELS = 0x15
+    const val REGISTRATION_ALREADY_REGISTERED = 0x16
+
+    // Resource errors
+    const val NOT_FOUND = 0x20
+    const val MEMORY_ALLOCATION_FAILED = 0x21
+    const val ACCESS_FAILED = 0x22
+    const val INVALID_PROGRAM = 0x23
+    const val INVALID_EXTERNAL_DATA = 0x24
+    const val OUT_OF_RESOURCES = 0x25
+
+    // Delegate errors
+    const val DELEGATE_INVALID_COMPATIBILITY = 0x30
+    const val DELEGATE_MEMORY_ALLOCATION_FAILED = 0x31
+    const val DELEGATE_INVALID_HANDLE = 0x32
+
+    @DoNotStrip
+    @JvmStatic
+    fun makeExecutorchException(errorCode: Int, details: String?): RuntimeException =
+        when (errorCode) {
+          INVALID_ARGUMENT -> ExecutorchInvalidArgumentException(details)
+          else -> ExecutorchRuntimeException(errorCode, details)
+        }
+  }
+}

From 8be91e0b3c80b6e1338c36711124d065d667900e Mon Sep 17 00:00:00 2001
From: Digant Desai <digantdesai@meta.com>
Date: Wed, 27 May 2026 12:27:41 -0700
Subject: [PATCH 041/103] WebGPU: add memory aliasing for intermediate tensor
 buffers (#19305)

USE ETVK's mem_obj_id for the WebGPU runtime to implement memory
aliasing
---
 backends/webgpu/runtime/WebGPUGraph.cpp     | 315 ++++++++++++++++----
 backends/webgpu/runtime/WebGPUGraph.h       |  46 +++
 backends/webgpu/test/ops/add/test_add.py    |  15 +
 backends/webgpu/test/test_build_webgpu.sh   |   7 +-
 backends/webgpu/test/test_webgpu_native.cpp |  65 ++++
 5 files changed, 384 insertions(+), 64 deletions(-)

diff --git a/backends/webgpu/runtime/WebGPUGraph.cpp b/backends/webgpu/runtime/WebGPUGraph.cpp
index f0e4c7959c0..91404fb164f 100644
--- a/backends/webgpu/runtime/WebGPUGraph.cpp
+++ b/backends/webgpu/runtime/WebGPUGraph.cpp
@@ -50,9 +50,15 @@ size_t vk_datatype_size(vkgraph::VkDataType dtype) {
 WebGPUGraph::WebGPUGraph() = default;
 
 WebGPUGraph::~WebGPUGraph() {
-  for (auto& t : tensors_) {
-    if (t.buffer) {
-      wgpuBufferRelease(t.buffer);
+  for (size_t i = 0; i < tensors_.size(); i++) {
+    if (tensors_[i].buffer &&
+        (i >= tensor_mem_obj_ids_.size() || tensor_mem_obj_ids_[i] < 0)) {
+      wgpuBufferRelease(tensors_[i].buffer);
+    }
+  }
+  for (auto& buf : shared_buffers_) {
+    if (buf) {
+      wgpuBufferRelease(buf);
     }
   }
   for (auto& buf : output_staging_buffers_) {
@@ -68,6 +74,21 @@ WebGPUGraph::~WebGPUGraph() {
       wgpuBindGroupRelease(d.bind_group);
     }
   }
+  for (auto& [_, shader] : shader_cache_) {
+    if (shader) {
+      wgpuShaderModuleRelease(shader);
+    }
+  }
+  for (auto& [_, pipeline] : pipeline_cache_) {
+    if (pipeline) {
+      wgpuComputePipelineRelease(pipeline);
+    }
+  }
+  for (auto& [_, bgl] : bgl_cache_) {
+    if (bgl) {
+      wgpuBindGroupLayoutRelease(bgl);
+    }
+  }
 }
 
 void WebGPUGraph::build(
@@ -94,6 +115,7 @@ void WebGPUGraph::build(
   const int num_vals = values ? values->size() : 0;
   value_types_.resize(num_vals, ValueType::Null);
   tensors_.resize(num_vals);
+  tensor_mem_obj_ids_.resize(num_vals, -1);
   ints_.resize(num_vals, 0);
   doubles_.resize(num_vals, 0.0);
   bools_.resize(num_vals, false);
@@ -121,27 +143,40 @@ void WebGPUGraph::build(
         }
         tensor.nbytes = numel * vk_datatype_size(vk_tensor->datatype());
 
-        // Create GPU buffer
-        WGPUBufferDescriptor buf_desc = {};
-        buf_desc.size = tensor.nbytes > 0 ? tensor.nbytes : 4;
-        buf_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst |
-            WGPUBufferUsage_CopySrc;
-        buf_desc.mappedAtCreation = false;
-        tensor.buffer = wgpuDeviceCreateBuffer(device_, &buf_desc);
-
-        // Upload constant data if this tensor has a constant_id
         int constant_id = vk_tensor->constant_id();
-        if (constant_id >= 0 && constant_data) {
-          const auto* constants = graph->constants();
-          if (constants && constant_id < static_cast<int>(constants->size())) {
-            const auto* vk_bytes = constants->Get(constant_id);
-            // Only upload from embedded bytes (not named data map)
-            if (vk_bytes->offset() != UINT64_MAX) {
-              const uint8_t* src = constant_data + vk_bytes->offset();
-              wgpuQueueWriteBuffer(
-                  queue_, tensor.buffer, 0, src, tensor.nbytes);
+        int mem_obj_id = vk_tensor->mem_obj_id();
+
+        // Constants always get dedicated buffers regardless of mem_obj_id
+        if (constant_id >= 0 || mem_obj_id < 0) {
+          tensor_mem_obj_ids_[i] = -1;
+          WGPUBufferDescriptor buf_desc = {};
+          buf_desc.size = std::max(tensor.nbytes, size_t(4));
+          buf_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst |
+              WGPUBufferUsage_CopySrc;
+          buf_desc.mappedAtCreation = false;
+          tensor.buffer = wgpuDeviceCreateBuffer(device_, &buf_desc);
+
+          if (constant_id >= 0 && constant_data && tensor.nbytes > 0) {
+            const auto* constants = graph->constants();
+            if (constants &&
+                constant_id < static_cast<int>(constants->size())) {
+              const auto* vk_bytes = constants->Get(constant_id);
+              if (vk_bytes->offset() != UINT64_MAX) {
+                const uint8_t* src = constant_data + vk_bytes->offset();
+                wgpuQueueWriteBuffer(
+                    queue_, tensor.buffer, 0, src, tensor.nbytes);
+              }
             }
           }
+        } else {
+          // Shared buffer: track required size, defer allocation to pass 2
+          tensor_mem_obj_ids_[i] = mem_obj_id;
+          size_t id = static_cast<size_t>(mem_obj_id);
+          if (id >= shared_buffer_sizes_.size()) {
+            shared_buffer_sizes_.resize(id + 1, 0);
+          }
+          shared_buffer_sizes_[id] =
+              std::max(shared_buffer_sizes_[id], tensor.nbytes);
         }
         break;
       }
@@ -166,6 +201,23 @@ void WebGPUGraph::build(
     }
   }
 
+  // Allocate shared buffers and assign to tensors
+  shared_buffers_.resize(shared_buffer_sizes_.size(), nullptr);
+  for (size_t id = 0; id < shared_buffer_sizes_.size(); id++) {
+    WGPUBufferDescriptor buf_desc = {};
+    buf_desc.size = std::max(shared_buffer_sizes_[id], size_t(4));
+    buf_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst |
+        WGPUBufferUsage_CopySrc;
+    buf_desc.mappedAtCreation = false;
+    shared_buffers_[id] = wgpuDeviceCreateBuffer(device_, &buf_desc);
+  }
+  for (int i = 0; i < num_vals; i++) {
+    int mid = tensor_mem_obj_ids_[i];
+    if (mid >= 0) {
+      tensors_[i].buffer = shared_buffers_[mid];
+    }
+  }
+
   // Phase 2: Record input and output IDs
   const auto* fb_input_ids = graph->input_ids();
   if (fb_input_ids) {
@@ -181,7 +233,7 @@ void WebGPUGraph::build(
 
       // Create staging buffer for output readback
       WGPUBufferDescriptor staging_desc = {};
-      staging_desc.size = tensors_[oid].nbytes > 0 ? tensors_[oid].nbytes : 4;
+      staging_desc.size = std::max(tensors_[oid].nbytes, size_t(4));
       staging_desc.usage = WGPUBufferUsage_MapRead | WGPUBufferUsage_CopyDst;
       staging_desc.mappedAtCreation = false;
       output_staging_buffers_.push_back(
@@ -189,6 +241,14 @@ void WebGPUGraph::build(
     }
   }
 
+  for (size_t i = 0; i < output_ids_.size(); i++) {
+    int oid = output_ids_[i];
+    output_copies_.push_back(
+        {tensors_[oid].buffer,
+         output_staging_buffers_[i],
+         tensors_[oid].nbytes});
+  }
+
   // Phase 3: Build operator dispatch chain
   const auto* chain = graph->chain();
   if (chain) {
@@ -213,9 +273,70 @@ void WebGPUGraph::build(
   }
 }
 
+WGPUShaderModule WebGPUGraph::get_or_create_shader(
+    const std::string& key,
+    const char* wgsl_source) {
+  auto it = shader_cache_.find(key);
+  if (it != shader_cache_.end()) {
+    return it->second;
+  }
+
+  WGPUShaderSourceWGSL wgsl_desc = {};
+  wgsl_desc.chain.sType = WGPUSType_ShaderSourceWGSL;
+  wgsl_desc.code = {wgsl_source, WGPU_STRLEN};
+
+  WGPUShaderModuleDescriptor shader_desc = {};
+  shader_desc.nextInChain = &wgsl_desc.chain;
+  WGPUShaderModule shader = wgpuDeviceCreateShaderModule(device_, &shader_desc);
+
+  shader_cache_[key] = shader;
+  return shader;
+}
+
+WGPUComputePipeline WebGPUGraph::get_or_create_pipeline(
+    const std::string& key,
+    WGPUShaderModule shader,
+    WGPUPipelineLayout layout) {
+  auto it = pipeline_cache_.find(key);
+  if (it != pipeline_cache_.end()) {
+    return it->second;
+  }
+
+  WGPUComputePipelineDescriptor pipeline_desc = {};
+  pipeline_desc.layout = layout;
+  pipeline_desc.compute.module = shader;
+  pipeline_desc.compute.entryPoint = {"main", WGPU_STRLEN};
+  WGPUComputePipeline pipeline =
+      wgpuDeviceCreateComputePipeline(device_, &pipeline_desc);
+
+  pipeline_cache_[key] = pipeline;
+  return pipeline;
+}
+
+WGPUBindGroupLayout WebGPUGraph::get_or_create_bgl(
+    const std::string& key,
+    const WGPUBindGroupLayoutEntry* entries,
+    uint32_t count) {
+  auto it = bgl_cache_.find(key);
+  if (it != bgl_cache_.end()) {
+    return it->second;
+  }
+
+  WGPUBindGroupLayoutDescriptor bgl_desc = {};
+  bgl_desc.entryCount = count;
+  bgl_desc.entries = entries;
+  WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(device_, &bgl_desc);
+
+  bgl_cache_[key] = bgl;
+  return bgl;
+}
+
 void WebGPUGraph::copy_inputs(
     const std::vector<std::pair<const void*, size_t>>& inputs) {
   for (size_t i = 0; i < inputs.size() && i < input_ids_.size(); i++) {
+    if (inputs[i].second == 0) {
+      continue;
+    }
     int tid = input_ids_[i];
     const auto& tensor = tensors_[tid];
     wgpuQueueWriteBuffer(
@@ -224,43 +345,89 @@ void WebGPUGraph::copy_inputs(
 }
 
 void WebGPUGraph::execute() {
-  WGPUCommandEncoderDescriptor enc_desc = {};
-  WGPUCommandEncoder encoder =
-      wgpuDeviceCreateCommandEncoder(device_, &enc_desc);
-
-  WGPUComputePassDescriptor pass_desc = {};
-  WGPUComputePassEncoder pass =
-      wgpuCommandEncoderBeginComputePass(encoder, &pass_desc);
-
-  for (const auto& dispatch : dispatches_) {
-    wgpuComputePassEncoderSetPipeline(pass, dispatch.pipeline);
-    wgpuComputePassEncoderSetBindGroup(
-        pass, 0, dispatch.bind_group, 0, nullptr);
-    wgpuComputePassEncoderDispatchWorkgroups(
-        pass, dispatch.workgroup_count_x, 1, 1);
-  }
+  const size_t n = dispatches_.size();
+  const size_t chunk = execute_config_.chunk_size;
+
+  if (chunk == 0 || n <= chunk) {
+    WGPUCommandEncoderDescriptor enc_desc = {};
+    WGPUCommandEncoder encoder =
+        wgpuDeviceCreateCommandEncoder(device_, &enc_desc);
+
+    WGPUComputePassDescriptor pass_desc = {};
+    WGPUComputePassEncoder pass =
+        wgpuCommandEncoderBeginComputePass(encoder, &pass_desc);
+
+    for (const auto& dispatch : dispatches_) {
+      wgpuComputePassEncoderSetPipeline(pass, dispatch.pipeline);
+      wgpuComputePassEncoderSetBindGroup(
+          pass, 0, dispatch.bind_group, 0, nullptr);
+      wgpuComputePassEncoderDispatchWorkgroups(
+          pass, dispatch.workgroup_count_x, 1, 1);
+    }
 
-  wgpuComputePassEncoderEnd(pass);
-  wgpuComputePassEncoderRelease(pass);
+    wgpuComputePassEncoderEnd(pass);
+    wgpuComputePassEncoderRelease(pass);
 
-  // Copy outputs to staging buffers
-  for (size_t i = 0; i < output_ids_.size(); i++) {
-    int oid = output_ids_[i];
-    wgpuCommandEncoderCopyBufferToBuffer(
-        encoder,
-        tensors_[oid].buffer,
-        0,
-        output_staging_buffers_[i],
-        0,
-        tensors_[oid].nbytes);
+    for (const auto& copy : output_copies_) {
+      wgpuCommandEncoderCopyBufferToBuffer(
+          encoder, copy.src_buffer, 0, copy.staging_buffer, 0, copy.nbytes);
+    }
+
+    WGPUCommandBufferDescriptor cmd_desc = {};
+    WGPUCommandBuffer cmd = wgpuCommandEncoderFinish(encoder, &cmd_desc);
+    wgpuQueueSubmit(queue_, 1, &cmd);
+
+    wgpuCommandBufferRelease(cmd);
+    wgpuCommandEncoderRelease(encoder);
+    return;
   }
 
-  WGPUCommandBufferDescriptor cmd_desc = {};
-  WGPUCommandBuffer cmd = wgpuCommandEncoderFinish(encoder, &cmd_desc);
-  wgpuQueueSubmit(queue_, 1, &cmd);
+  const size_t first_chunk = execute_config_.initial_chunk_size > 0
+      ? execute_config_.initial_chunk_size
+      : chunk;
+
+  size_t start = 0;
+  size_t current_chunk = first_chunk;
 
-  wgpuCommandBufferRelease(cmd);
-  wgpuCommandEncoderRelease(encoder);
+  while (start < n) {
+    size_t end = std::min(start + current_chunk, n);
+
+    WGPUCommandEncoderDescriptor enc_desc = {};
+    WGPUCommandEncoder encoder =
+        wgpuDeviceCreateCommandEncoder(device_, &enc_desc);
+
+    WGPUComputePassDescriptor pass_desc = {};
+    WGPUComputePassEncoder pass =
+        wgpuCommandEncoderBeginComputePass(encoder, &pass_desc);
+
+    for (size_t i = start; i < end; i++) {
+      wgpuComputePassEncoderSetPipeline(pass, dispatches_[i].pipeline);
+      wgpuComputePassEncoderSetBindGroup(
+          pass, 0, dispatches_[i].bind_group, 0, nullptr);
+      wgpuComputePassEncoderDispatchWorkgroups(
+          pass, dispatches_[i].workgroup_count_x, 1, 1);
+    }
+
+    wgpuComputePassEncoderEnd(pass);
+    wgpuComputePassEncoderRelease(pass);
+
+    if (end == n) {
+      for (const auto& copy : output_copies_) {
+        wgpuCommandEncoderCopyBufferToBuffer(
+            encoder, copy.src_buffer, 0, copy.staging_buffer, 0, copy.nbytes);
+      }
+    }
+
+    WGPUCommandBufferDescriptor cmd_desc = {};
+    WGPUCommandBuffer cmd = wgpuCommandEncoderFinish(encoder, &cmd_desc);
+    wgpuQueueSubmit(queue_, 1, &cmd);
+
+    wgpuCommandBufferRelease(cmd);
+    wgpuCommandEncoderRelease(encoder);
+
+    start = end;
+    current_chunk = chunk;
+  }
 }
 
 namespace {
@@ -283,24 +450,35 @@ void buffer_map_callback(
 } // namespace
 
 void WebGPUGraph::copy_outputs(std::vector<std::pair<void*, size_t>>& outputs) {
-  for (size_t i = 0; i < outputs.size() && i < output_staging_buffers_.size();
-       i++) {
-    MapCallbackData cb_data;
+  const size_t count = std::min(outputs.size(), output_staging_buffers_.size());
+
+  std::vector<MapCallbackData> cb_data(count);
+
+  for (size_t i = 0; i < count; i++) {
+    if (outputs[i].second == 0) {
+      cb_data[i].done = true;
+      cb_data[i].status = WGPUMapAsyncStatus_Success;
+      continue;
+    }
     WGPUBufferMapCallbackInfo cb_info = {};
     cb_info.mode = WGPUCallbackMode_AllowSpontaneous;
     cb_info.callback = buffer_map_callback;
-    cb_info.userdata1 = &cb_data;
+    cb_info.userdata1 = &cb_data[i];
     wgpuBufferMapAsync(
         output_staging_buffers_[i],
         WGPUMapMode_Read,
         0,
         outputs[i].second,
         cb_info);
+  }
 
-    // Poll until the map callback fires.
-    wgpuDevicePoll(device_, true, nullptr);
+  wgpuDevicePoll(device_, true, nullptr);
 
-    if (cb_data.status == WGPUMapAsyncStatus_Success) {
+  for (size_t i = 0; i < count; i++) {
+    if (outputs[i].second == 0) {
+      continue;
+    }
+    if (cb_data[i].status == WGPUMapAsyncStatus_Success) {
       const void* mapped = wgpuBufferGetConstMappedRange(
           output_staging_buffers_[i], 0, outputs[i].second);
       std::memcpy(outputs[i].first, mapped, outputs[i].second);
@@ -315,15 +493,28 @@ WebGPUMemoryStats WebGPUGraph::memory_stats() const {
   WebGPUMemoryStats stats;
   for (size_t i = 0; i < value_types_.size(); i++) {
     if (value_types_[i] == ValueType::Tensor && tensors_[i].nbytes > 0) {
-      stats.tensor_buffer_bytes += tensors_[i].nbytes;
       stats.num_tensors++;
+      // Shared tensors are tracked via shared_buffer_sizes_
+      bool is_shared =
+          i < tensor_mem_obj_ids_.size() && tensor_mem_obj_ids_[i] >= 0;
+      if (!is_shared) {
+        stats.unshared_tensor_buffer_bytes += tensors_[i].nbytes;
+      }
     }
   }
+  for (size_t s : shared_buffer_sizes_) {
+    stats.shared_buffer_bytes += s;
+  }
+  stats.num_shared_objects = static_cast<int>(shared_buffers_.size());
+  stats.tensor_buffer_bytes =
+      stats.shared_buffer_bytes + stats.unshared_tensor_buffer_bytes;
   for (size_t i = 0; i < output_ids_.size(); i++) {
     stats.staging_buffer_bytes += tensors_[output_ids_[i]].nbytes;
   }
   stats.uniform_buffer_bytes = uniform_buffer_bytes_;
   stats.num_dispatches = static_cast<int>(dispatches_.size());
+  stats.num_cached_pipelines = static_cast<int>(pipeline_cache_.size());
+  stats.num_cached_shaders = static_cast<int>(shader_cache_.size());
   return stats;
 }
 
diff --git a/backends/webgpu/runtime/WebGPUGraph.h b/backends/webgpu/runtime/WebGPUGraph.h
index 2d6996e9219..3aa96917a4e 100644
--- a/backends/webgpu/runtime/WebGPUGraph.h
+++ b/backends/webgpu/runtime/WebGPUGraph.h
@@ -12,6 +12,7 @@
 
 #include <cstdint>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 namespace executorch {
@@ -30,12 +31,28 @@ struct WebGPUDispatch {
   uint32_t workgroup_count_x = 1;
 };
 
+struct OutputCopy {
+  WGPUBuffer src_buffer = nullptr;
+  WGPUBuffer staging_buffer = nullptr;
+  size_t nbytes = 0;
+};
+
+struct ExecuteConfig {
+  size_t chunk_size = 0;
+  size_t initial_chunk_size = 0;
+};
+
 struct WebGPUMemoryStats {
   size_t tensor_buffer_bytes = 0;
+  size_t shared_buffer_bytes = 0;
+  int num_shared_objects = 0;
+  size_t unshared_tensor_buffer_bytes = 0;
   size_t staging_buffer_bytes = 0;
   size_t uniform_buffer_bytes = 0;
   int num_tensors = 0;
   int num_dispatches = 0;
+  int num_cached_pipelines = 0;
+  int num_cached_shaders = 0;
 
   size_t total_bytes() const {
     return tensor_buffer_bytes + staging_buffer_bytes + uniform_buffer_bytes;
@@ -99,6 +116,20 @@ class WebGPUGraph {
     uniform_buffer_bytes_ += bytes;
   }
 
+  WGPUShaderModule get_or_create_shader(
+      const std::string& key,
+      const char* wgsl_source);
+
+  WGPUComputePipeline get_or_create_pipeline(
+      const std::string& key,
+      WGPUShaderModule shader,
+      WGPUPipelineLayout layout);
+
+  WGPUBindGroupLayout get_or_create_bgl(
+      const std::string& key,
+      const WGPUBindGroupLayoutEntry* entries,
+      uint32_t count);
+
   void set_instance(WGPUInstance instance) {
     instance_ = instance;
   }
@@ -134,11 +165,26 @@ class WebGPUGraph {
   std::vector<int> input_ids_;
   std::vector<int> output_ids_;
 
+  // Memory aliasing: tensors with the same mem_obj_id share a WGPUBuffer.
+  std::vector<int> tensor_mem_obj_ids_;
+  std::vector<WGPUBuffer> shared_buffers_;
+  std::vector<size_t> shared_buffer_sizes_;
+
   // Staging buffers for reading back outputs (MapRead | CopyDst).
   std::vector<WGPUBuffer> output_staging_buffers_;
 
+  // Pre-computed output copy descriptors for execute().
+  std::vector<OutputCopy> output_copies_;
+
   std::vector<WebGPUDispatch> dispatches_;
 
+  ExecuteConfig execute_config_;
+
+  // Caches for reusing GPU objects across dispatches.
+  std::unordered_map<std::string, WGPUShaderModule> shader_cache_;
+  std::unordered_map<std::string, WGPUComputePipeline> pipeline_cache_;
+  std::unordered_map<std::string, WGPUBindGroupLayout> bgl_cache_;
+
   size_t uniform_buffer_bytes_ = 0;
 };
 
diff --git a/backends/webgpu/test/ops/add/test_add.py b/backends/webgpu/test/ops/add/test_add.py
index f4b33ced76d..e8da644a1f9 100644
--- a/backends/webgpu/test/ops/add/test_add.py
+++ b/backends/webgpu/test/ops/add/test_add.py
@@ -31,6 +31,8 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         z = x + y
         z = z + x
         z = z + y
+        z = z + x
+        z = z + y
         return z
 
 
@@ -97,5 +99,18 @@ def export_add_model(output_path: str) -> None:
     print(f"Exported {output_path}")
 
 
+def export_chained_add_model(output_path: str) -> None:
+    """Export a chained add model (z=x+y; z=z+x; z=z+y; z=z+x; z=z+y) to .pte for memory aliasing testing."""
+    model = AddChainedModule()
+    example_inputs = (torch.randn(1024, 1024), torch.randn(1024, 1024))
+    ep = torch.export.export(model, example_inputs)
+    et_program = to_edge_transform_and_lower(
+        ep, partitioner=[VulkanPartitioner()]
+    ).to_executorch()
+    with open(output_path, "wb") as f:
+        f.write(et_program.buffer)
+    print(f"Exported {output_path}")
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/backends/webgpu/test/test_build_webgpu.sh b/backends/webgpu/test/test_build_webgpu.sh
index 684926cb181..a42b2304ee7 100755
--- a/backends/webgpu/test/test_build_webgpu.sh
+++ b/backends/webgpu/test/test_build_webgpu.sh
@@ -22,12 +22,14 @@ $PYTHON_EXECUTABLE -m pytest "${SCRIPT_DIR}/ops/add/test_add.py" -v
 
 # ── Step 2: Export .pte model ─────────────────────────────────────────────────
 
-echo "=== Step 2: Export test model ==="
+echo "=== Step 2: Export test models ==="
 PTE_MODEL="/tmp/webgpu_add_test.pte"
+PTE_CHAINED_MODEL="/tmp/webgpu_chained_add_test.pte"
 cd "${EXECUTORCH_ROOT}"
 $PYTHON_EXECUTABLE -c "
-from executorch.backends.webgpu.test.ops.add.test_add import export_add_model
+from executorch.backends.webgpu.test.ops.add.test_add import export_add_model, export_chained_add_model
 export_add_model('${PTE_MODEL}')
+export_chained_add_model('${PTE_CHAINED_MODEL}')
 "
 
 # ── Step 3: Native build + test (wgpu-native) ────────────────────────────────
@@ -60,6 +62,7 @@ cmake --build "${NATIVE_BUILD_DIR}" --target webgpu_native_test -j${NPROC}
 
 echo "=== Step 4: Run native test ==="
 WEBGPU_TEST_MODEL="${PTE_MODEL}" \
+WEBGPU_TEST_CHAINED_MODEL="${PTE_CHAINED_MODEL}" \
     "${NATIVE_BUILD_DIR}/backends/webgpu/webgpu_native_test"
 
 echo "=== Done ==="
diff --git a/backends/webgpu/test/test_webgpu_native.cpp b/backends/webgpu/test/test_webgpu_native.cpp
index c60695e11c9..d3005debf37 100644
--- a/backends/webgpu/test/test_webgpu_native.cpp
+++ b/backends/webgpu/test/test_webgpu_native.cpp
@@ -75,6 +75,62 @@ static bool test_single_add(const std::string& model_path) {
   return true;
 }
 
+static bool test_chained_add(const std::string& model_path) {
+  printf("\n--- Test: chained add (1024x1024, 5 ops) ---\n");
+
+  Module module(model_path);
+  auto err = module.load_forward();
+  if (err != Error::Ok) {
+    printf("FAIL: could not load forward method (error %d)\n", (int)err);
+    return false;
+  }
+  printf("Model loaded: %s\n", model_path.c_str());
+
+  constexpr int dim = 1024;
+  constexpr int size = dim * dim;
+
+  std::vector<float> x_data(size);
+  std::vector<float> y_data(size);
+  for (int i = 0; i < size; i++) {
+    x_data[i] = static_cast<float>(i % 100) * 0.01f;
+    y_data[i] = static_cast<float>(i % 50) * 0.02f;
+  }
+
+  auto x = make_tensor_ptr({dim, dim}, std::vector<float>(x_data));
+  auto y = make_tensor_ptr({dim, dim}, std::vector<float>(y_data));
+
+  auto result = module.forward({EValue(x), EValue(y)});
+  if (!result.ok()) {
+    printf("FAIL: forward failed (error %d)\n", (int)result.error());
+    return false;
+  }
+
+  const auto& outputs = result.get();
+  if (outputs.empty() || !outputs[0].isTensor()) {
+    printf("FAIL: no tensor output\n");
+    return false;
+  }
+
+  // z=x+y; z=z+x=2x+y; z=z+y=2x+2y; z=z+x=3x+2y; z=z+y=3x+3y
+  const auto& out_tensor = outputs[0].toTensor();
+  const float* out_data = out_tensor.const_data_ptr<float>();
+
+  float max_error = 0.0f;
+  for (int i = 0; i < size; i++) {
+    float expected = 3.0f * x_data[i] + 3.0f * y_data[i];
+    float error = std::abs(out_data[i] - expected);
+    max_error = std::max(max_error, error);
+  }
+
+  printf("Max error: %e (checked %d elements)\n", max_error, size);
+  if (max_error > 1e-3f) {
+    printf("FAIL: max error exceeds tolerance 1e-3\n");
+    return false;
+  }
+  printf("PASS: chained add test\n");
+  return true;
+}
+
 int main(int argc, char** argv) {
   std::string model_path = "webgpu_add_test.pte";
   if (argc > 1) {
@@ -84,6 +140,11 @@ int main(int argc, char** argv) {
     model_path = env;
   }
 
+  std::string chained_model_path;
+  if (const char* env = std::getenv("WEBGPU_TEST_CHAINED_MODEL")) {
+    chained_model_path = env;
+  }
+
   WebGPUContext ctx;
   try {
     ctx = create_webgpu_context();
@@ -97,6 +158,10 @@ int main(int argc, char** argv) {
 
   bool ok = test_single_add(model_path);
 
+  if (!chained_model_path.empty()) {
+    ok = test_chained_add(chained_model_path) && ok;
+  }
+
   set_default_webgpu_context(nullptr);
   destroy_webgpu_context(ctx);
 

From 1e8dc3095a39a709f862034b7b76caedc3de1d2b Mon Sep 17 00:00:00 2001
From: Chizkiyahu Raful <37312901+chizkiyahu@users.noreply.github.com>
Date: Wed, 27 May 2026 23:17:56 +0300
Subject: [PATCH 042/103] Serialize/flatbuffer to program (#18129)

exir: add flatbuffer-to-program reader

This continues the work from
https://github.com/pytorch/executorch/pull/17333.


cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils
@Sebastian-Larsson @robell @rascani

---------

Signed-off-by: Chizkiyahu Raful <chizkiyahu.raful@arm.com>
---
 exir/_serialize/_flatbuffer.py                |  67 ---------
 exir/_serialize/_flatbuffer_program.py        | 141 +++++++++++++++++-
 exir/_serialize/_program.py                   |  24 +--
 exir/_serialize/test/test_flatbuffer.py       |  65 +-------
 .../test/test_flatbuffer_program.py           |  51 +------
 exir/_serialize/test/test_program.py          |  88 ++++++++++-
 6 files changed, 228 insertions(+), 208 deletions(-)

diff --git a/exir/_serialize/_flatbuffer.py b/exir/_serialize/_flatbuffer.py
index 219e4517aea..43e203d1ff9 100644
--- a/exir/_serialize/_flatbuffer.py
+++ b/exir/_serialize/_flatbuffer.py
@@ -12,7 +12,6 @@
 import importlib.resources
 import os
 import re
-import shutil
 import stat
 import subprocess
 import tempfile
@@ -384,72 +383,6 @@ def _flatc_decompile(
     )
 
 
-def _program_json_to_flatbuffer(
-    program_json: str,
-    *,
-    constant_tensor_alignment: Optional[int] = None,
-    delegate_alignment: Optional[int] = None,
-) -> _FlatbufferResult:
-    """Converts Program-compatible JSON into binary flatbuffer data.
-
-    Args:
-        program_json: The JSON to convert. Must be compatible with the root
-            table type of //executorch/schema/program.fbs.
-        constant_tensor_alignment: If provided, the alignment to use for tensor
-            data embedded in the output flatbuffer data. If not provided, uses
-            the alignment in the schema.
-        delegate_alignment: If provided, the alignment to use for delegate
-            data embedded in the output flatbuffer data. If not provided, uses
-            the alignment in the schema.
-
-    Returns: The flatbuffer data and associated metadata.
-    """
-    with tempfile.TemporaryDirectory() as temp_dir:
-        schema_info = _prepare_schema(
-            out_dir=temp_dir,
-            constant_tensor_alignment=constant_tensor_alignment,
-            delegate_alignment=delegate_alignment,
-        )
-        file_stem = "data"
-        json_path = os.path.join(temp_dir, file_stem + ".json")
-        output_path = os.path.join(temp_dir, file_stem + ".pte")
-
-        with open(json_path, "wb") as json_file:
-            json_file.write(program_json.encode("ascii"))
-
-        try:
-            _flatc_compile(temp_dir, schema_info.root_path, json_path)
-        except Exception as err:
-            # It's helpful to save the breaking files for debugging. Optionally
-            # move them out of the auto-deleting temporary directory. Don't do
-            # this by default because some input files can be many GB in size,
-            # and these copies won't be auto-deleted.
-            should_save = os.getenv(_SAVE_FLATC_ENV, "").strip() not in {"", "0"}
-            extra_message = ""
-            if should_save:
-                try:
-                    saved_dir = tempfile.mkdtemp(prefix="exir-saved-flatc-")
-                    for f in os.listdir(temp_dir):
-                        shutil.move(src=os.path.join(temp_dir, f), dst=saved_dir)
-                    extra_message += f" Moved input files to '{saved_dir}'."
-                except Exception as err2:
-                    extra_message += (
-                        f" (Failed to save input files for debugging: {err2})"
-                    )
-            else:
-                extra_message += (
-                    f" Set {_SAVE_FLATC_ENV}=1 to save input files on failure."
-                )
-
-            raise RuntimeError(
-                f"Failed to compile {json_path} to {output_path}." + extra_message
-            ) from err
-        with open(output_path, "rb") as output_file:
-            return _FlatbufferResult(
-                data=output_file.read(), max_alignment=schema_info.max_alignment
-            )
-
-
 def _replace_infinity_in_json_file(content: bytes) -> bytes:
     """Replace -inf and inf with "inf" and "-inf" in the JSON file. program.fbs
     is used to convert from flatbuffer to JSON. +-inf float values are not
diff --git a/exir/_serialize/_flatbuffer_program.py b/exir/_serialize/_flatbuffer_program.py
index 4c1c315347a..cd742c8361d 100644
--- a/exir/_serialize/_flatbuffer_program.py
+++ b/exir/_serialize/_flatbuffer_program.py
@@ -8,12 +8,14 @@
 import enum
 import functools
 import importlib
+import pkgutil
 import tempfile
 
 from contextvars import ContextVar
 from dataclasses import fields, is_dataclass
 from functools import lru_cache
-from typing import Any, Dict, Optional
+from types import ModuleType
+from typing import Any, Dict, get_args, get_origin, get_type_hints, Optional, Union
 
 import flatbuffers  # pyre-ignore[21]
 from executorch.exir._serialize._flatbuffer import (
@@ -22,6 +24,7 @@
     _prepare_schema,
     _SchemaInfo,
 )
+from executorch.exir._serialize.generated import executorch_flatbuffer as _generated_fb
 from executorch.exir._serialize.generated.executorch_flatbuffer import (
     BackendDelegateInlineData as _BackendDelegateInlineData,
     Buffer as _Buffer,
@@ -33,6 +36,7 @@
 
 _T_CLASS_CACHE: Dict[type, type] = {}
 _FIELD_NAME_CACHE: Dict[type, tuple[tuple[str, str], ...]] = {}
+_TYPE_HINTS_CACHE: Dict[type, Dict[str, Any]] = {}
 _BUFFER_ALIGNMENT: ContextVar[int] = ContextVar("_BUFFER_ALIGNMENT", default=1)
 _DELEGATE_ALIGNMENT: ContextVar[int] = ContextVar("_DELEGATE_ALIGNMENT", default=1)
 
@@ -64,6 +68,15 @@ def _dataclass_field_map(dataclass_type: type) -> tuple[tuple[str, str], ...]:
     return mapping
 
 
+def _dataclass_type_hints(dataclass_type: type) -> Dict[str, Any]:
+    cached = _TYPE_HINTS_CACHE.get(dataclass_type)
+    if cached is not None:
+        return cached
+    type_hints = get_type_hints(dataclass_type)
+    _TYPE_HINTS_CACHE[dataclass_type] = type_hints
+    return type_hints
+
+
 def _create_aligned_byte_vector(builder: Any, data: bytes, alignment: int) -> int:
     if not _is_valid_alignment(alignment):
         raise ValueError(f"Bad alignment {alignment}")
@@ -194,6 +207,126 @@ def convert_program(val: Program) -> ProgramT:
     return _convert_dataclass(val)
 
 
+# The generated FlatBuffer Python modules import child tables/unions as modules
+# (for example, Program.ExecutionPlan becomes the ExecutionPlan module), but the
+# unpacking helpers later expect those globals to be the corresponding classes.
+# Rebind module globals like ExecutionPlan -> ExecutionPlan.ExecutionPlan so the
+# generated InitFromObj()/InitFromPackedBuf() code can instantiate nested types.
+def _patch_generated_module_aliases(module: ModuleType) -> None:
+    for name, maybe_module in vars(module).items():
+        if not isinstance(maybe_module, ModuleType):
+            continue
+        maybe_class = getattr(maybe_module, name, None)
+        if isinstance(maybe_class, type):
+            setattr(module, name, maybe_class)
+
+
+@lru_cache(maxsize=1)
+def _patch_generated_flatbuffer_aliases() -> None:
+    package_name = _generated_fb.__name__
+    for module_info in pkgutil.iter_modules(_generated_fb.__path__):
+        module = importlib.import_module(f"{package_name}.{module_info.name}")
+        _patch_generated_module_aliases(module)
+
+
+def _flatbuffer_dataclass_names(val: Any) -> tuple[str, Optional[str]]:
+    val_type_name = type(val).__name__
+    if val_type_name.endswith("T"):
+        return val_type_name, val_type_name[:-1]
+    return val_type_name, None
+
+
+def _matches_dataclass_union_type(
+    union_type: Any, val_type_name: str, val_dataclass_name: Optional[str]
+) -> bool:
+    if not is_dataclass(union_type):
+        return False
+    union_name = union_type.__name__
+    return union_name == val_type_name or (
+        val_dataclass_name is not None and union_name == val_dataclass_name
+    )
+
+
+def _matches_non_dataclass_union_type(union_type: Any, val: Any) -> bool:
+    if union_type is Any:
+        return True
+    if union_type is str and isinstance(val, (bytes, bytearray, memoryview)):
+        return True
+    union_origin = get_origin(union_type)
+    if union_origin is list and hasattr(val, "__iter__"):
+        return True
+    return isinstance(union_type, type) and isinstance(val, union_type)
+
+
+def _union_choice_from_value(union_types: tuple[Any, ...], val: Any) -> Any:
+    if val is None:
+        for union_type in union_types:
+            if union_type is type(None):
+                return union_type
+        return None
+
+    val_type_name, val_dataclass_name = _flatbuffer_dataclass_names(val)
+
+    for union_type in union_types:
+        if union_type is type(None):
+            continue
+        if _matches_dataclass_union_type(union_type, val_type_name, val_dataclass_name):
+            return union_type
+        if _matches_non_dataclass_union_type(union_type, val):
+            return union_type
+    return None
+
+
+def _convert_from_flatbuffer_value(val: Any, expected_type: Any) -> Any:
+    if val is None:
+        return None
+
+    origin = get_origin(expected_type)
+    if origin is list:
+        item_type = get_args(expected_type)[0]
+        return [_convert_from_flatbuffer_value(item, item_type) for item in val]
+
+    if origin is Union:
+        union_type = _union_choice_from_value(get_args(expected_type), val)
+        if union_type is None:
+            raise TypeError(
+                f"Could not match value type {type(val)} to {expected_type}"
+            )
+        if union_type is type(None):
+            return None
+        return _convert_from_flatbuffer_value(val, union_type)
+
+    if expected_type is bytes:
+        return _coerce_bytes(val)
+    if expected_type is str and isinstance(val, (bytes, bytearray, memoryview)):
+        return _coerce_bytes(val).decode("utf-8")
+    if is_dataclass(expected_type):
+        return _convert_from_flatbuffer_dataclass(val, expected_type)
+    if isinstance(expected_type, type) and issubclass(expected_type, enum.Enum):
+        if isinstance(val, expected_type):
+            return val
+        return expected_type(val)
+    if isinstance(expected_type, type):
+        return expected_type(val)
+    return val
+
+
+def _convert_from_flatbuffer_dataclass(val: Any, dataclass_type: type) -> Any:
+    result = {}
+    type_hints = _dataclass_type_hints(dataclass_type)
+    for src_name, dst_name in _dataclass_field_map(dataclass_type):
+        result[src_name] = _convert_from_flatbuffer_value(
+            getattr(val, dst_name), type_hints[src_name]
+        )
+    return dataclass_type(**result)
+
+
+def _flatbuffer_to_program(program_data: bytes) -> Program:
+    _patch_generated_flatbuffer_aliases()
+    program_t = ProgramT.InitFromPackedBuf(program_data)
+    return _convert_from_flatbuffer_dataclass(program_t, Program)
+
+
 @lru_cache(maxsize=1)
 def _get_schema_info(
     constant_tensor_alignment: Optional[int], delegate_alignment: Optional[int]
@@ -213,11 +346,7 @@ def _program_to_flatbuffer(
     constant_tensor_alignment: Optional[int] = None,
     delegate_alignment: Optional[int] = None,
 ) -> _FlatbufferResult:
-    """Converts a Program dataclass into binary flatbuffer data.
-
-    Unlike _program_json_to_flatbuffer(), this does not use JSON or invoke
-    flatc to build the binary.
-    """
+    """Converts a Program dataclass into binary flatbuffer data."""
     schema_info = _get_schema_info(constant_tensor_alignment, delegate_alignment)
     _set_pack_alignments(schema_info.tensor_alignment, schema_info.delegate_alignment)
     _install_fast_packers()
diff --git a/exir/_serialize/_program.py b/exir/_serialize/_program.py
index 4ab2a3572b4..230b50bf558 100644
--- a/exir/_serialize/_program.py
+++ b/exir/_serialize/_program.py
@@ -16,12 +16,12 @@
 from typing import ClassVar, Dict, List, Literal, Optional, Sequence, Tuple
 
 from executorch.exir._serialize._cord import Cord
-from executorch.exir._serialize._dataclass import _DataclassEncoder, _json_to_dataclass
-from executorch.exir._serialize._flatbuffer import (
-    _FlatbufferResult,
-    _program_flatbuffer_to_json,
+from executorch.exir._serialize._dataclass import _DataclassEncoder
+from executorch.exir._serialize._flatbuffer import _FlatbufferResult
+from executorch.exir._serialize._flatbuffer_program import (
+    _flatbuffer_to_program,
+    _program_to_flatbuffer,
 )
-from executorch.exir._serialize._flatbuffer_program import _program_to_flatbuffer
 from executorch.exir._serialize._named_data_store import (
     NamedDataStore,
     NamedDataStoreOutput,
@@ -86,12 +86,6 @@ def _program_to_json(program: Program) -> str:
     return json.dumps(program, cls=_DataclassEncoder)
 
 
-def _json_to_program(program_json: bytes) -> Program:
-    """Returns a Program deserialized from the given JSON string."""
-    # construct program class recursively from dict
-    return _json_to_dataclass(json.loads(program_json), cls=Program)
-
-
 def _insert_flatbuffer_header(
     flatbuffer_data: bytes, magic_regex: str, header_data: bytes
 ) -> bytes:
@@ -757,9 +751,7 @@ def deserialize_pte_binary(program_data: bytes) -> PTEFile:
         segment_base_offset = eh.segment_base_offset
 
     # Parse the flatbuffer data.
-    program: Program = _json_to_program(
-        _program_flatbuffer_to_json(program_data[:program_size])
-    )
+    program: Program = _flatbuffer_to_program(program_data[:program_size])
 
     if segment_base_offset != 0:
         # Move segment data back into the Program.
@@ -799,9 +791,7 @@ def _extract_delegate_payload(
         program_size = len(pte_data)
 
     # Parse the program flatbuffer
-    program: Program = _json_to_program(
-        _program_flatbuffer_to_json(pte_data[:program_size])
-    )
+    program: Program = _flatbuffer_to_program(pte_data[:program_size])
 
     # Search for the matching delegate
     match_count = 0
diff --git a/exir/_serialize/test/test_flatbuffer.py b/exir/_serialize/test/test_flatbuffer.py
index 801ddca112d..e623da55cd2 100644
--- a/exir/_serialize/test/test_flatbuffer.py
+++ b/exir/_serialize/test/test_flatbuffer.py
@@ -7,19 +7,13 @@
 # LICENSE file in the root directory of this source tree.
 
 import os
-import re
-import shutil
 import tempfile
 import unittest
 from typing import Dict, Optional, Sequence
 from unittest.mock import patch
 
 from executorch.exir._serialize import _flatbuffer
-from executorch.exir._serialize._flatbuffer import (
-    _program_json_to_flatbuffer,
-    _ResourceFiles,
-    _SchemaInfo,
-)
+from executorch.exir._serialize._flatbuffer import _ResourceFiles, _SchemaInfo
 
 
 def read_file(dir: str, filename: str) -> bytes:
@@ -277,60 +271,3 @@ def test_bad_delegate_alignment_fails(self) -> None:
                             out_dir,
                             delegate_alignment=bad_alignment,
                         )
-
-
-class TestProgramJsonToFlatbuffer(unittest.TestCase):
-    @patch.dict(os.environ, {_flatbuffer._SAVE_FLATC_ENV: "1"})
-    def test_save_json_on_failure(self) -> None:
-        err_msg: Optional[str] = None
-        try:
-            _program_json_to_flatbuffer("} some bad json {")
-            self.fail("Should have raised an exception")
-        except RuntimeError as err:
-            err_msg = err.args[0]
-
-        self.assertIsNotNone(err_msg)
-        match = re.search(r"Moved input files to '(.*?)'", err_msg)
-        self.assertTrue(match, msg=f"Unexpected error message: {err_msg}")
-        path = match.group(1)
-
-        files = frozenset(os.listdir(path))
-        # Delete the files otherwise they'll accumulate every time the
-        # test is run.
-        shutil.rmtree(path)
-        # Check for a couple of the files that should be there.
-        self.assertIn("data.json", files)
-        self.assertIn("program.fbs", files)
-
-    @patch.dict(os.environ, {_flatbuffer._SAVE_FLATC_ENV: "1"})
-    def test_unable_to_save_json_on_failure(self) -> None:
-        err_msg: Optional[str] = None
-        try:
-            with patch.object(
-                _flatbuffer.shutil,
-                "move",
-                side_effect=Exception("shutil.move mock failure"),
-            ):
-                _program_json_to_flatbuffer("} some bad json {")
-            self.fail("Should have raised an exception")
-        except RuntimeError as err:
-            err_msg = err.args[0]
-
-        self.assertIsNotNone(err_msg)
-        self.assertIn("Failed to save input files", err_msg)
-
-    @patch.dict(os.environ, {_flatbuffer._SAVE_FLATC_ENV: ""})
-    def test_no_save_json_on_failure(self) -> None:
-        err_msg: Optional[str] = None
-        try:
-            _program_json_to_flatbuffer("} some bad json {")
-            self.fail("Should have raised an exception")
-        except RuntimeError as err:
-            err_msg = err.args[0]
-
-        self.assertIsNotNone(err_msg)
-        self.assertIn(
-            f"Set {_flatbuffer._SAVE_FLATC_ENV}=1 to save input files", err_msg
-        )
-        self.assertNotIn("Moved input files", err_msg)
-        self.assertNotIn("Failed to save input files", err_msg)
diff --git a/exir/_serialize/test/test_flatbuffer_program.py b/exir/_serialize/test/test_flatbuffer_program.py
index 05e05d4e610..4910f9b431f 100644
--- a/exir/_serialize/test/test_flatbuffer_program.py
+++ b/exir/_serialize/test/test_flatbuffer_program.py
@@ -4,15 +4,12 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-import json
 import unittest
 
-from executorch.exir._serialize._flatbuffer import (
-    _program_flatbuffer_to_json,
-    _program_json_to_flatbuffer,
+from executorch.exir._serialize._flatbuffer_program import (
+    _flatbuffer_to_program,
+    _program_to_flatbuffer,
 )
-from executorch.exir._serialize._flatbuffer_program import _program_to_flatbuffer
-from executorch.exir._serialize._program import _json_to_program, _program_to_json
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from executorch.exir.schema import (
     AllocationDetails,
@@ -157,50 +154,12 @@ def _make_program(self) -> Program:
             named_data=[],
         )
 
-    def _flatbuffer_to_dict(self, flatbuffer_data: bytes) -> dict:
-        return json.loads(_program_flatbuffer_to_json(flatbuffer_data))
-
-    def test_roundtrip_via_json(self) -> None:
+    def test_roundtrip_via_direct_python(self) -> None:
         program = self._make_program()
         result = _program_to_flatbuffer(
             program, constant_tensor_alignment=32, delegate_alignment=64
         )
-        self.assertGreater(len(result.data), 8)
-        self.assertEqual(result.data[4:6], b"ET")
-        self.assertGreaterEqual(result.max_alignment, 64)
-
-        program2 = _json_to_program(_program_flatbuffer_to_json(result.data))
-        self.assertEqual(program2, program)
-
-    def test_flatbuffer_paths_match(self) -> None:
-        program = self._make_program()
-        cases = [
-            (None, None),
-            (32, 64),
-        ]
-        for constant_tensor_alignment, delegate_alignment in cases:
-            with self.subTest(
-                constant_tensor_alignment=constant_tensor_alignment,
-                delegate_alignment=delegate_alignment,
-            ):
-                result = _program_to_flatbuffer(
-                    program,
-                    constant_tensor_alignment=constant_tensor_alignment,
-                    delegate_alignment=delegate_alignment,
-                )
-                result2 = _program_json_to_flatbuffer(
-                    _program_to_json(program),
-                    constant_tensor_alignment=constant_tensor_alignment,
-                    delegate_alignment=delegate_alignment,
-                )
-                direct_dict = self._flatbuffer_to_dict(result.data)
-                json_path_dict = self._flatbuffer_to_dict(result2.data)
-                self.assertEqual(
-                    direct_dict,
-                    json_path_dict,
-                    "Flatbuffer JSON differs between direct and JSON paths",
-                )
-                self.assertEqual(result.max_alignment, result2.max_alignment)
+        self.assertEqual(_flatbuffer_to_program(result.data), program)
 
     def test_bad_alignment_fails(self) -> None:
         program = Program(
diff --git a/exir/_serialize/test/test_program.py b/exir/_serialize/test/test_program.py
index 579934e9d38..0d0d833c952 100644
--- a/exir/_serialize/test/test_program.py
+++ b/exir/_serialize/test/test_program.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env fbpython
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -16,12 +17,11 @@
 
 from typing import Dict, List, Sequence
 
-from executorch.exir._serialize._flatbuffer import _program_flatbuffer_to_json
+from executorch.exir._serialize._flatbuffer_program import _flatbuffer_to_program
 from executorch.exir._serialize._named_data_store import NamedDataStoreOutput
 from executorch.exir._serialize._program import (
     _ExtendedHeader,
     _get_extended_header,
-    _json_to_program,
     _program_to_json,
     deserialize_pte_binary,
     PTEFile,
@@ -30,6 +30,8 @@
 from executorch.exir._serialize.data_serializer import DataEntry
 from executorch.exir._serialize.padding import aligned_size
 
+from executorch.exir.backend.compile_spec_schema import CompileSpec
+
 from executorch.exir.schema import (
     BackendDelegate,
     BackendDelegateDataReference,
@@ -39,7 +41,15 @@
     DataLocation,
     DataSegment,
     DeviceType,
+    Double,
+    EValue,
     ExecutionPlan,
+    Frame,
+    FrameList,
+    FreeCall,
+    Instruction,
+    JumpFalseCall,
+    MoveCall,
     NonConstBufferDevice,
     Program,
     SubsegmentOffsets,
@@ -197,7 +207,7 @@ def constant_segment_with_tensor_alignment(
         self.assertGreater(eh.segment_data_size, 0)
 
         # Peek inside the actual flatbuffer data to see the segments.
-        program_with_segments = _json_to_program(_program_flatbuffer_to_json(pte_data))
+        program_with_segments = _flatbuffer_to_program(pte_data)
 
         # The constant tensor data should appear as the only segment.
         self.assertEqual(len(program_with_segments.segments), 1)
@@ -467,6 +477,68 @@ def test_round_trip_no_header_no_segments(self) -> None:
         self.assertEqual(deserialized.mutable_data, None)
         self.assertEqual(deserialized.named_data, None)
 
+    def test_deserialize_pte_binary_with_rich_flatbuffer_types(self) -> None:
+        program = get_test_program()
+        plan = program.execution_plan[0]
+        plan.values.append(EValue(Double(float("inf"))))
+        plan.delegates.append(
+            BackendDelegate(
+                id="delegate0",
+                processed=BackendDelegateDataReference(
+                    location=DataLocation.INLINE,
+                    index=0,
+                ),
+                compile_specs=[CompileSpec(key="k", value=b"v")],
+            )
+        )
+        plan.chains[0].instructions.extend(
+            [
+                Instruction(MoveCall(move_from=0, move_to=1)),
+                Instruction(
+                    JumpFalseCall(cond_value_index=1, destination_instruction=0)
+                ),
+                Instruction(FreeCall(value_index=0)),
+            ]
+        )
+        plan.chains[0].stacktrace = [
+            FrameList(
+                items=[
+                    Frame(
+                        filename="file.py",
+                        lineno=idx + 1,
+                        name="fn",
+                        context="ctx",
+                    )
+                ]
+            )
+            for idx, _ in enumerate(plan.chains[0].instructions)
+        ]
+        program.constant_buffer.append(Buffer(storage=b"abcd"))
+        program.backend_delegate_data.append(
+            BackendDelegateInlineData(data=b"delegate-data")
+        )
+
+        deserialized = deserialize_pte_binary(
+            bytes(serialize_pte_binary(PTEFile(program=program)))
+        )
+
+        self.assert_programs_equal(program, deserialized.program)
+        self.assertEqual(deserialized.mutable_data, None)
+        self.assertEqual(deserialized.named_data, None)
+        self.assertIsInstance(plan.values[-1].val, Double)
+        self.assertIsInstance(
+            deserialized.program.execution_plan[0].values[-1].val,
+            Double,
+        )
+        self.assertEqual(
+            deserialized.program.execution_plan[0].values[-1].val.double_val,
+            "inf",
+        )
+        self.assertEqual(
+            deserialized.program.execution_plan[0].delegates[0].compile_specs[0].value,
+            b"v",
+        )
+
     def test_round_trip_large_buffer_sizes(self) -> None:
         """Tests that when the non_const_buffer_sizes contains integers
         overflowing a signed/unsigned 32 bit integer, we can still serialize the
@@ -531,7 +603,7 @@ def test_round_trip_no_segments_and_no_header(self) -> None:
         self.assertIsNone(eh)
 
         # Peek inside the flatbuffer data to confirm that there are no segments.
-        program_with_segments = _json_to_program(_program_flatbuffer_to_json(pte_data))
+        program_with_segments = _flatbuffer_to_program(pte_data)
         self.assertEqual(program_with_segments.segments, [])
 
         # Convert back.
@@ -597,7 +669,7 @@ def test_round_trip_with_segments(self) -> None:
         # this also implicity tests the case where we try parsing the entire
         # file with segment data following it, demonstrating that the extra data
         # doesn't upset the flatbuffer parsing path.
-        program_with_segments = _json_to_program(_program_flatbuffer_to_json(pte_data))
+        program_with_segments = _flatbuffer_to_program(pte_data)
 
         # The delegate blobs we added to the program should appear as segments.
         # The one empty blob should have been ignored, hence the `- 1`.
@@ -694,7 +766,7 @@ def test_no_constants(self) -> None:
         self.assertEqual(program.segments, [])
 
         # Peek inside the actual flatbuffer data to see the segments.
-        flatbuffer_program = _json_to_program(_program_flatbuffer_to_json(pte_data))
+        flatbuffer_program = _flatbuffer_to_program(pte_data)
 
         # Constant buffer should be empty.
         self.assertEqual(len(flatbuffer_program.constant_buffer), 0)
@@ -814,7 +886,7 @@ def test_constant_delegate_and_named_data_segments(self) -> None:
         self.assertGreater(eh.segment_data_size, 0)
 
         # Peek inside the actual flatbuffer data to see the segments.
-        program_with_segments = _json_to_program(_program_flatbuffer_to_json(pte_data))
+        program_with_segments = _flatbuffer_to_program(pte_data)
 
         # Segment table should contain a constant segment, the delegate blobs
         # and a named data segment.
@@ -1017,7 +1089,7 @@ def test_named_data_segments(self) -> None:
         self.assertGreater(eh.segment_data_size, 0)
 
         # Peek inside the actual flatbuffer data to see the named data segments.
-        program_with_segments = _json_to_program(_program_flatbuffer_to_json(pte_data))
+        program_with_segments = _flatbuffer_to_program(pte_data)
         # pyre-ignore Incompatible parameter type [6]
         self.assertEqual(len(program_with_segments.named_data), len(pte_named_data))
 

From daa7ad2d28e60a51a59b1d082c9eaf2ddaf877cb Mon Sep 17 00:00:00 2001
From: Hansong Zhang <107070759+kirklandsign@users.noreply.github.com>
Date: Wed, 27 May 2026 13:29:16 -0700
Subject: [PATCH 043/103] Update golden artifact path for android_test_setup.sh
 (#19819)

---
 extension/android/executorch_android/android_test_setup.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extension/android/executorch_android/android_test_setup.sh b/extension/android/executorch_android/android_test_setup.sh
index 350c60b2e25..9ed1ae63da2 100644
--- a/extension/android/executorch_android/android_test_setup.sh
+++ b/extension/android/executorch_android/android_test_setup.sh
@@ -29,7 +29,7 @@ prepare_tinyllama() {
 }
 
 prepare_golden() {
-  local url="https://gha-artifacts.s3.amazonaws.com/pytorch/executorch/test-backend-artifacts/golden-artifacts-xnnpack/golden_artifacts_26022500.zip"
+  local url="https://gha-artifacts.s3.amazonaws.com/pytorch/executorch/test-backend-artifacts/golden-artifacts-xnnpack/golden_artifacts_26052718.zip"
   curl -sL -o /tmp/golden.zip "$url"
   unzip -o /tmp/golden.zip -d /tmp/golden/
   for model in mobilenet_v2 vit_b_16; do

From b1446cc87162b6803a0b3d1ec0e1f93af5065224 Mon Sep 17 00:00:00 2001
From: Per Held <per.held@arm.com>
Date: Thu, 21 May 2026 16:12:42 +0200
Subject: [PATCH 044/103] Arm backend: Simplify fake RESIZE validation

Avoid revalidating RESIZE output shape against dimensions computed by
the same formula. Validate parameters once, compute the fake output
shape, and directly validate the computed output dimensions.

Signed-off-by: Per Held <per.held@arm.com>
Change-Id: I97bb91f9fc440c980782955692056196038d5de0
---
 .../misc/tosa_dialect/test_tosa_resize.py     | 24 +++++++++++++++++++
 backends/arm/tosa/dialect/ops/resize.py       |  5 +++-
 backends/arm/tosa/resize_utils.py             | 19 +++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/backends/arm/test/misc/tosa_dialect/test_tosa_resize.py b/backends/arm/test/misc/tosa_dialect/test_tosa_resize.py
index 0a90de5c0c0..eddb69a8caf 100644
--- a/backends/arm/test/misc/tosa_dialect/test_tosa_resize.py
+++ b/backends/arm/test/misc/tosa_dialect/test_tosa_resize.py
@@ -72,6 +72,30 @@ def test_resize_rejects_scale_numerator_over_tosa_limit():
             )
 
 
+@pytest.mark.parametrize(
+    "offset,border",
+    (
+        ([1, 0], [-1, 0]),
+        ([0, 1], [0, -1]),
+    ),
+)
+def test_resize_rejects_non_positive_output_dimensions(offset, border):
+    with TosaLoweringContext(
+        TosaSpecification.create_from_string("TOSA-1.0+INT")
+    ), FakeTensorMode() as mode:
+        with pytest.raises(
+            TosaValueError,
+            match="RESIZE output dimensions must be positive",
+        ):
+            exir_ops.backend.tosa.RESIZE.default(
+                mode.from_tensor(torch.randint(0, 10, (1, 1, 1, 1), dtype=torch.int8)),
+                [1, 1, 1, 1],
+                offset,
+                border,
+                resize_mode="nearest",
+            )
+
+
 def test_resize_accepts_symbolic_scale_and_border_values():
     shape_env = ShapeEnv()
     scale_y_n = _make_symint(shape_env, "scale_y_n", hint=2, min=1, max=8)
diff --git a/backends/arm/tosa/dialect/ops/resize.py b/backends/arm/tosa/dialect/ops/resize.py
index 8a2d4c5e60a..0d06253ccd8 100644
--- a/backends/arm/tosa/dialect/ops/resize.py
+++ b/backends/arm/tosa/dialect/ops/resize.py
@@ -10,6 +10,7 @@
 from executorch.backends.arm.tosa.dialect.ops_registration import register_fake_tosa_op
 from executorch.backends.arm.tosa.resize_utils import (
     calculate_tosa_resize_output_hw,
+    get_tosa_resize_output_hw_validation_error,
     get_tosa_resize_validation_error,
 )
 
@@ -92,7 +93,9 @@ def RESIZE(
     H, W = input_shape[1], input_shape[2]
     _validate_resize_parameters((H, W), None, scale, offset, border, tosa_spec)
     output_hw = calculate_tosa_resize_output_hw((H, W), scale, offset, border)
-    _validate_resize_parameters((H, W), output_hw, scale, offset, border, tosa_spec)
+    validation_error = get_tosa_resize_output_hw_validation_error(output_hw)
+    if validation_error is not None:
+        raise TosaValueError(validation_error, op="RESIZE")
     if output_hw is None:
         scale_y_n, scale_y_d, scale_x_n, scale_x_d = scale
         offset_y, offset_x = offset
diff --git a/backends/arm/tosa/resize_utils.py b/backends/arm/tosa/resize_utils.py
index 6c716bfa59c..23be6ff42fc 100644
--- a/backends/arm/tosa/resize_utils.py
+++ b/backends/arm/tosa/resize_utils.py
@@ -67,6 +67,25 @@ def _validate_dimensions(
     return None
 
 
+def get_tosa_resize_output_hw_validation_error(
+    output_hw: Sequence[int | torch.SymInt] | None,
+) -> str | None:
+    if output_hw is None:
+        return None
+
+    output_hw_ints = _as_concrete_ints(output_hw)
+    if output_hw_ints is None:
+        return None
+
+    invalid_dimension = next(
+        (dimension for dimension in output_hw_ints if dimension <= 0), None
+    )
+    if invalid_dimension is not None:
+        return f"RESIZE output dimensions must be positive; got {invalid_dimension}"
+
+    return _validate_dimensions((), output_hw)
+
+
 def _validate_scale(
     scale: Sequence[int | torch.SymInt],
     tosa_spec: TosaSpecification,

From 9d1853129d7988570dd62585e65f27efebad8b68 Mon Sep 17 00:00:00 2001
From: Christoffer Johansson Lundqvist
 <119742508+Christoffer-JL@users.noreply.github.com>
Date: Wed, 27 May 2026 23:23:54 +0200
Subject: [PATCH 045/103] Arm backend: Fix bmm quantization bug (#19798)

bmm nodes are now forwarded to ArmPass in stead of ExportPass.

This fixes an issue where _call_quantized_bmm_without_fake_kernel()
does not get called, leading to dtype mismatch error


cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils
@Sebastian-Larsson @robell @rascani

Signed-off-by: Christoffer J.L <christoffer.johanssonlundqvist@arm.com>
---
 backends/arm/_passes/replace_scalar_with_tensor_pass.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backends/arm/_passes/replace_scalar_with_tensor_pass.py b/backends/arm/_passes/replace_scalar_with_tensor_pass.py
index edd5fc97213..53f0e517a7f 100644
--- a/backends/arm/_passes/replace_scalar_with_tensor_pass.py
+++ b/backends/arm/_passes/replace_scalar_with_tensor_pass.py
@@ -126,4 +126,4 @@ def call_operator(self, op, args, kwargs, meta):
             return super().call_operator(op, args, kwargs, meta)
         else:
             # Do not handle; forward unchanged.
-            return ExportPass.call_operator(self, op, args, kwargs, meta)
+            return ArmPass.call_operator(self, op, args, kwargs, meta)

From 5393742be88b6e8cf863c5e98cf31543c3d512ac Mon Sep 17 00:00:00 2001
From: ssjia <ssjia@devvm1479.ncg0.facebook.com>
Date: Wed, 27 May 2026 09:25:39 -0700
Subject: [PATCH 046/103] [executorch][runtime] Fix -Werror failures under
 Apple toolchain

Two `-Werror` failures surfaced when building `xplat/executorch/runtime` under the iOS toolchain (`-Werror -Wshadow -Wswitch-default`):

1. `EXECUTORCH_SCOPE_PROF` in `runtime/platform/profiler.h` hardcodes the local variable name `profiler`. When the macro is invoked at function scope and again inside a nested block in the same function (for example `Program::load` invokes it at the top of the function and then again inside `check_header` / `verify_internal_consistency` blocks), `-Wshadow` fires and the build fails. Fixed by token-pasting `__LINE__` so each invocation gets a unique identifier. No caller changes required.

2. `to_string(Error)` in `runtime/core/error.h` is a switch statement covering every enum value with a trailing `return "Error::Unknown"` fallback after the switch. Apple's toolchain promotes `-Wswitch-default` to an error and rejects switches that lack an explicit `default:` arm. Folded the trailing fallback into a `default:` arm inside the switch.

Both issues only surfaced under the Apple toolchain; fbcode toolchain does not promote these warnings to errors, so devserver / Linux builds continued to pass.

Differential Revision: [D106523959](https://our.internmc.facebook.com/intern/diff/D106523959/)


ghstack-source-id: 386608989
Pull-Request: https://github.com/pytorch/executorch/pull/19811
---
 runtime/core/error.h        | 3 ++-
 runtime/platform/profiler.h | 8 ++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/runtime/core/error.h b/runtime/core/error.h
index 80c2ef645d4..b923604ca89 100644
--- a/runtime/core/error.h
+++ b/runtime/core/error.h
@@ -151,8 +151,9 @@ constexpr const char* to_string(const Error error) {
       return "Error::RegistrationExceedingMaxKernels";
     case Error::RegistrationAlreadyRegistered:
       return "Error::RegistrationAlreadyRegistered";
+    default:
+      return "Error::Unknown";
   }
-  return "Error::Unknown";
 }
 
 } // namespace runtime
diff --git a/runtime/platform/profiler.h b/runtime/platform/profiler.h
index d6362781394..cb011bd0ef9 100644
--- a/runtime/platform/profiler.h
+++ b/runtime/platform/profiler.h
@@ -227,8 +227,12 @@ using ::executorch::runtime::track_allocator;
 #define EXECUTORCH_END_PROF(token_id) \
   ::executorch::runtime::end_profiling(token_id);
 
-#define EXECUTORCH_SCOPE_PROF(name) \
-  ::executorch::runtime::ExecutorchProfiler profiler(name);
+#define EXECUTORCH_SCOPE_PROF_CONCAT_IMPL(a, b) a##b
+#define EXECUTORCH_SCOPE_PROF_CONCAT(a, b) \
+  EXECUTORCH_SCOPE_PROF_CONCAT_IMPL(a, b)
+#define EXECUTORCH_SCOPE_PROF(name)                                       \
+  ::executorch::runtime::ExecutorchProfiler EXECUTORCH_SCOPE_PROF_CONCAT( \
+      et_profiler_, __LINE__)(name);
 
 #define EXECUTORCH_PROFILE_INSTRUCTION_SCOPE(chain_idx, instruction_idx) \
   ::executorch::runtime::ExecutorchProfilerInstructionScope              \

From 5c0aa4f8cf6b3a338ce8499015dd533be205ab0b Mon Sep 17 00:00:00 2001
From: ssjia <ssjia@devvm1479.ncg0.facebook.com>
Date: Wed, 27 May 2026 09:25:40 -0700
Subject: [PATCH 047/103] [executorch][coreml] Fix CoreML SDK proto header
 includes

Pull Request resolved: https://github.com/pytorch/executorch/pull/19789

CoreML SDK builds include generated CoreMLTools proto headers through short `format/*.pb.h` imports. iOS Buck compilation could not resolve those generated headers because they were not exposed under a flat include namespace. This makes the generated proto headers available at the include paths used by the SDK sources.
ghstack-source-id: 386608986
@exported-using-ghexport

Differential Revision: [D106430265](https://our.internmc.facebook.com/intern/diff/D106430265/)
---
 backends/apple/coreml/BUCK | 1 +
 1 file changed, 1 insertion(+)

diff --git a/backends/apple/coreml/BUCK b/backends/apple/coreml/BUCK
index 792adcf4d70..688ca64b990 100644
--- a/backends/apple/coreml/BUCK
+++ b/backends/apple/coreml/BUCK
@@ -171,6 +171,7 @@ runtime.cxx_library(
         "format/{}.pb.h".format(name): "fbsource//third-party/pypi/coremltools:exported-cpp-protoc[{}.pb.h]".format(name)
         for name in _PROTOS
     },
+    header_namespace = "",
     compiler_flags = [
         "-Wno-global-constructors",
     ],

From 0ed8dcf8733592a428877cd3b31b3532d266f361 Mon Sep 17 00:00:00 2001
From: Sicheng Stephen Jia <ssjia@meta.com>
Date: Wed, 27 May 2026 18:12:56 -0400
Subject: [PATCH 048/103] Fix etsize workflow build failures under
 -fno-exceptions

Differential Revision: D106539321

Pull Request resolved: https://github.com/pytorch/executorch/pull/19815
---
 kernels/portable/targets.bzl | 22 +++++++++++++---------
 test/targets.bzl             |  4 +++-
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/kernels/portable/targets.bzl b/kernels/portable/targets.bzl
index 2c6e0b5c35f..b80ce347768 100644
--- a/kernels/portable/targets.bzl
+++ b/kernels/portable/targets.bzl
@@ -66,15 +66,19 @@ def define_common_targets():
         "visibility": ["PUBLIC"],
     }
 
-    executorch_generated_lib(
-        name = "generated_lib",
-        deps = [
-            ":executorch_aten_ops",
-            ":executorch_custom_ops",
-        ],
-        kernel_deps = ["//executorch/kernels/portable:operators"],
-        **generated_lib_common_args
-    )
+    for support_exceptions in [True, False]:
+        exception_suffix = "_no_exceptions" if not support_exceptions else ""
+
+        executorch_generated_lib(
+            name = "generated_lib" + exception_suffix,
+            deps = [
+                ":executorch_aten_ops",
+                ":executorch_custom_ops",
+            ],
+            kernel_deps = ["//executorch/kernels/portable:operators"],
+            support_exceptions = support_exceptions,
+            **generated_lib_common_args
+        )
 
     if True in get_aten_mode_options():
         executorch_generated_lib(
diff --git a/test/targets.bzl b/test/targets.bzl
index 023a1d48960..0047d5563fc 100644
--- a/test/targets.bzl
+++ b/test/targets.bzl
@@ -36,7 +36,9 @@ def define_common_targets():
         name = "size_test_all_ops",
         srcs = SIZE_TEST_SOURCES,
         deps = SIZE_TEST_DEPS + [
-            "//executorch/kernels/portable:generated_lib",
+            # size_test_all_ops is built with -fno-exceptions in the size CI;
+            # use the _no_exceptions variant whose codegen omits try/catch.
+            "//executorch/kernels/portable:generated_lib_no_exceptions",
             "//executorch/runtime/executor/test:test_backend_compiler_lib",
         ],
         define_static_target = True,

From d366f43906057614f4d88003cf5c3a8ea1b3dd3c Mon Sep 17 00:00:00 2001
From: Hansong Zhang <107070759+kirklandsign@users.noreply.github.com>
Date: Wed, 27 May 2026 15:22:39 -0700
Subject: [PATCH 049/103] Convert SGD and TrainingModule from Java to Kotlin
 (#19822)

Differential Revision: D106549057

Pull Request resolved: https://github.com/pytorch/executorch/pull/19822
---
 extension/android/BUCK                        |   6 +-
 .../org/pytorch/executorch/training/SGD.java  | 103 -------------
 .../org/pytorch/executorch/training/SGD.kt    | 100 ++++++++++++
 .../executorch/training/TrainingModule.java   | 140 -----------------
 .../executorch/training/TrainingModule.kt     | 144 ++++++++++++++++++
 5 files changed, 247 insertions(+), 246 deletions(-)
 delete mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/SGD.java
 create mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/SGD.kt
 delete mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.java
 create mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.kt

diff --git a/extension/android/BUCK b/extension/android/BUCK
index 1f1b611ff01..170c826f40f 100644
--- a/extension/android/BUCK
+++ b/extension/android/BUCK
@@ -33,11 +33,11 @@ non_fbcode_target(_kind = fb_android_library,
     name = "executorch_training",
     warnings_as_errors = False,
     srcs = [
-        "executorch_android/src/main/java/org/pytorch/executorch/training/SGD.java",
-        "executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.java",
+        "executorch_android/src/main/java/org/pytorch/executorch/training/SGD.kt",
+        "executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.kt",
     ],
     autoglob = False,
-    language = "JAVA",
+    language = "KOTLIN",
     deps = [
         ":executorch",
         "//fbandroid/java/com/facebook/jni:jni",
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/SGD.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/SGD.java
deleted file mode 100644
index 58c7704b83e..00000000000
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/SGD.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.executorch.training;
-
-import com.facebook.jni.HybridData;
-import com.facebook.jni.annotations.DoNotStrip;
-import com.facebook.soloader.nativeloader.NativeLoader;
-import com.facebook.soloader.nativeloader.SystemDelegate;
-import java.util.Map;
-import org.pytorch.executorch.Tensor;
-import org.pytorch.executorch.annotations.Experimental;
-
-/**
- * Java wrapper for ExecuTorch SGD Optimizer.
- *
- * <p>Warning: These APIs are experimental and subject to change without notice
- */
-@Experimental
-public class SGD {
-
-  static {
-    if (!NativeLoader.isInitialized()) {
-      NativeLoader.init(new SystemDelegate());
-    }
-    // Loads libexecutorch.so from jniLibs
-    NativeLoader.loadLibrary("executorch");
-  }
-
-  private final HybridData mHybridData;
-
-  @DoNotStrip
-  private static native HybridData initHybrid(
-      Map<String, Tensor> namedParameters,
-      double learningRate,
-      double momentum,
-      double dampening,
-      double weightDecay,
-      boolean nesterov);
-
-  private SGD(
-      Map<String, Tensor> namedParameters,
-      double learningRate,
-      double momentum,
-      double dampening,
-      double weightDecay,
-      boolean nesterov) {
-    mHybridData =
-        initHybrid(namedParameters, learningRate, momentum, dampening, weightDecay, nesterov);
-  }
-
-  /**
-   * Creates a new SGD optimizer with the specified parameters and options.
-   *
-   * @param namedParameters Map of parameter names to tensors to be optimized
-   * @param learningRate The learning rate for the optimizer
-   * @param momentum The momentum value
-   * @param dampening The dampening value
-   * @param weightDecay The weight decay value
-   * @param nesterov Whether to use Nesterov momentum
-   * @return new {@link SGD} object
-   */
-  public static SGD create(
-      Map<String, Tensor> namedParameters,
-      double learningRate,
-      double momentum,
-      double dampening,
-      double weightDecay,
-      boolean nesterov) {
-    return new SGD(namedParameters, learningRate, momentum, dampening, weightDecay, nesterov);
-  }
-
-  /**
-   * Creates a new SGD optimizer with default options.
-   *
-   * @param namedParameters Map of parameter names to tensors to be optimized
-   * @param learningRate The learning rate for the optimizer
-   * @return new {@link SGD} object
-   */
-  public static SGD create(Map<String, Tensor> namedParameters, double learningRate) {
-    return create(namedParameters, learningRate, 0.0, 0.0, 0.0, false);
-  }
-
-  /**
-   * Performs a single optimization step using the provided gradients.
-   *
-   * @param namedGradients Map of parameter names to gradient tensors
-   */
-  public void step(Map<String, Tensor> namedGradients) {
-    if (!mHybridData.isValid()) {
-      throw new IllegalStateException("SGD optimizer has been destroyed");
-    }
-    stepNative(namedGradients);
-  }
-
-  @DoNotStrip
-  private native void stepNative(Map<String, Tensor> namedGradients);
-}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/SGD.kt b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/SGD.kt
new file mode 100644
index 00000000000..e4aa5373498
--- /dev/null
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/SGD.kt
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.executorch.training
+
+import com.facebook.jni.HybridData
+import com.facebook.jni.annotations.DoNotStrip
+import com.facebook.soloader.nativeloader.NativeLoader
+import com.facebook.soloader.nativeloader.SystemDelegate
+import org.pytorch.executorch.Tensor
+import org.pytorch.executorch.annotations.Experimental
+
+/**
+ * Kotlin wrapper for ExecuTorch SGD Optimizer.
+ *
+ * Warning: These APIs are experimental and subject to change without notice
+ */
+@Experimental
+class SGD
+private constructor(
+    namedParameters: Map<String, Tensor>,
+    learningRate: Double,
+    momentum: Double,
+    dampening: Double,
+    weightDecay: Double,
+    nesterov: Boolean,
+) {
+
+  private val mHybridData: HybridData =
+      initHybrid(namedParameters, learningRate, momentum, dampening, weightDecay, nesterov)
+
+  /**
+   * Performs a single optimization step using the provided gradients.
+   *
+   * @param namedGradients Map of parameter names to gradient tensors
+   */
+  fun step(namedGradients: Map<String, Tensor>) {
+    check(mHybridData.isValid) { "SGD optimizer has been destroyed" }
+    stepNative(namedGradients)
+  }
+
+  @DoNotStrip private external fun stepNative(namedGradients: Map<String, Tensor>)
+
+  companion object {
+    init {
+      if (!NativeLoader.isInitialized()) {
+        NativeLoader.init(SystemDelegate())
+      }
+      NativeLoader.loadLibrary("executorch")
+    }
+
+    @DoNotStrip
+    @JvmStatic
+    private external fun initHybrid(
+        namedParameters: Map<String, Tensor>,
+        learningRate: Double,
+        momentum: Double,
+        dampening: Double,
+        weightDecay: Double,
+        nesterov: Boolean,
+    ): HybridData
+
+    /**
+     * Creates a new SGD optimizer with the specified parameters and options.
+     *
+     * @param namedParameters Map of parameter names to tensors to be optimized
+     * @param learningRate The learning rate for the optimizer
+     * @param momentum The momentum value
+     * @param dampening The dampening value
+     * @param weightDecay The weight decay value
+     * @param nesterov Whether to use Nesterov momentum
+     * @return new [SGD] object
+     */
+    @JvmStatic
+    fun create(
+        namedParameters: Map<String, Tensor>,
+        learningRate: Double,
+        momentum: Double,
+        dampening: Double,
+        weightDecay: Double,
+        nesterov: Boolean,
+    ): SGD = SGD(namedParameters, learningRate, momentum, dampening, weightDecay, nesterov)
+
+    /**
+     * Creates a new SGD optimizer with default options.
+     *
+     * @param namedParameters Map of parameter names to tensors to be optimized
+     * @param learningRate The learning rate for the optimizer
+     * @return new [SGD] object
+     */
+    @JvmStatic
+    fun create(namedParameters: Map<String, Tensor>, learningRate: Double): SGD =
+        create(namedParameters, learningRate, 0.0, 0.0, 0.0, false)
+  }
+}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.java
deleted file mode 100644
index dd2d5a37de2..00000000000
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.executorch.training;
-
-import com.facebook.jni.HybridData;
-import com.facebook.jni.annotations.DoNotStrip;
-import com.facebook.soloader.nativeloader.NativeLoader;
-import com.facebook.soloader.nativeloader.SystemDelegate;
-import java.io.Closeable;
-import java.util.Map;
-import java.util.concurrent.locks.ReentrantLock;
-import org.pytorch.executorch.EValue;
-import org.pytorch.executorch.ExecuTorchRuntime;
-import org.pytorch.executorch.Tensor;
-import org.pytorch.executorch.annotations.Experimental;
-
-/**
- * Java wrapper for ExecuTorch TrainingModule.
- *
- * <p>Warning: These APIs are experimental and subject to change without notice
- */
-@Experimental
-public class TrainingModule implements Closeable {
-
-  static {
-    if (!NativeLoader.isInitialized()) {
-      NativeLoader.init(new SystemDelegate());
-    }
-    // Loads libexecutorch.so from jniLibs
-    NativeLoader.loadLibrary("executorch");
-  }
-
-  private final HybridData mHybridData;
-  private final ReentrantLock mLock = new ReentrantLock();
-  private volatile boolean mDestroyed = false;
-
-  @DoNotStrip
-  private static native HybridData initHybrid(String moduleAbsolutePath, String dataAbsolutePath);
-
-  private TrainingModule(String moduleAbsolutePath, String dataAbsolutePath) {
-    mHybridData = initHybrid(moduleAbsolutePath, dataAbsolutePath);
-  }
-
-  private void checkNotDestroyed() {
-    if (mDestroyed) throw new IllegalStateException("TrainingModule has been destroyed");
-  }
-
-  /**
-   * Loads a serialized ExecuTorch Training Module from the specified path on the disk.
-   *
-   * @param modelPath path to file that contains the serialized ExecuTorch module.
-   * @param dataPath path to file that contains the ExecuTorch module external weights.
-   * @return new {@link TrainingModule} object which owns the model module.
-   */
-  public static TrainingModule load(final String modelPath, final String dataPath) {
-    ExecuTorchRuntime.validateFilePath(modelPath, "model path");
-    ExecuTorchRuntime.validateFilePath(dataPath, "data path");
-    return new TrainingModule(modelPath, dataPath);
-  }
-
-  /**
-   * Loads a serialized ExecuTorch training module from the specified path on the disk.
-   *
-   * @param modelPath path to file that contains the serialized ExecuTorch module. This PTE does not
-   *     rely on external weights.
-   * @return new {@link TrainingModule} object which owns the model module.
-   */
-  public static TrainingModule load(final String modelPath) {
-    ExecuTorchRuntime.validateFilePath(modelPath, "model path");
-    return new TrainingModule(modelPath, "");
-  }
-
-  /**
-   * Runs the specified joint-graph method of this module with the specified arguments.
-   *
-   * @param methodName name of the ExecuTorch method to run.
-   * @param inputs arguments that will be passed to ExecuTorch method.
-   * @return return value(s) from the method.
-   */
-  public EValue[] executeForwardBackward(String methodName, EValue... inputs) {
-    mLock.lock();
-    try {
-      checkNotDestroyed();
-      return executeForwardBackwardNative(methodName, inputs);
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  @DoNotStrip
-  private native EValue[] executeForwardBackwardNative(String methodName, EValue... inputs);
-
-  public Map<String, Tensor> namedParameters(String methodName) {
-    mLock.lock();
-    try {
-      checkNotDestroyed();
-      return namedParametersNative(methodName);
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  @DoNotStrip
-  private native Map<String, Tensor> namedParametersNative(String methodName);
-
-  public Map<String, Tensor> namedGradients(String methodName) {
-    mLock.lock();
-    try {
-      checkNotDestroyed();
-      return namedGradientsNative(methodName);
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  @DoNotStrip
-  private native Map<String, Tensor> namedGradientsNative(String methodName);
-
-  @Override
-  public void close() {
-    if (mLock.tryLock()) {
-      try {
-        if (!mDestroyed) {
-          mDestroyed = true;
-          mHybridData.resetNative();
-        }
-      } finally {
-        mLock.unlock();
-      }
-    } else {
-      throw new IllegalStateException("Cannot close module while method is executing");
-    }
-  }
-}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.kt b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.kt
new file mode 100644
index 00000000000..4caa4635fdd
--- /dev/null
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.kt
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.executorch.training
+
+import com.facebook.jni.HybridData
+import com.facebook.jni.annotations.DoNotStrip
+import com.facebook.soloader.nativeloader.NativeLoader
+import com.facebook.soloader.nativeloader.SystemDelegate
+import java.io.Closeable
+import java.util.concurrent.locks.ReentrantLock
+import org.pytorch.executorch.EValue
+import org.pytorch.executorch.ExecuTorchRuntime
+import org.pytorch.executorch.Tensor
+import org.pytorch.executorch.annotations.Experimental
+
+/**
+ * Kotlin wrapper for ExecuTorch TrainingModule.
+ *
+ * Warning: These APIs are experimental and subject to change without notice
+ */
+@Experimental
+class TrainingModule
+private constructor(moduleAbsolutePath: String, dataAbsolutePath: String) : Closeable {
+
+  private val mHybridData: HybridData = initHybrid(moduleAbsolutePath, dataAbsolutePath)
+  private val mLock = ReentrantLock()
+
+  @Volatile private var mDestroyed = false
+
+  private fun checkNotDestroyed() {
+    check(!mDestroyed) { "TrainingModule has been destroyed" }
+  }
+
+  /**
+   * Runs the specified joint-graph method of this module with the specified arguments.
+   *
+   * @param methodName name of the ExecuTorch method to run.
+   * @param inputs arguments that will be passed to ExecuTorch method.
+   * @return return value(s) from the method.
+   */
+  fun executeForwardBackward(methodName: String, vararg inputs: EValue): Array<EValue> {
+    mLock.lock()
+    try {
+      checkNotDestroyed()
+      return executeForwardBackwardNative(methodName, *inputs)
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  @DoNotStrip
+  private external fun executeForwardBackwardNative(
+      methodName: String,
+      vararg inputs: EValue,
+  ): Array<EValue>
+
+  fun namedParameters(methodName: String): Map<String, Tensor> {
+    mLock.lock()
+    try {
+      checkNotDestroyed()
+      return namedParametersNative(methodName)
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  @DoNotStrip private external fun namedParametersNative(methodName: String): Map<String, Tensor>
+
+  fun namedGradients(methodName: String): Map<String, Tensor> {
+    mLock.lock()
+    try {
+      checkNotDestroyed()
+      return namedGradientsNative(methodName)
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  @DoNotStrip private external fun namedGradientsNative(methodName: String): Map<String, Tensor>
+
+  override fun close() {
+    if (mLock.tryLock()) {
+      try {
+        if (!mDestroyed) {
+          mDestroyed = true
+          mHybridData.resetNative()
+        }
+      } finally {
+        mLock.unlock()
+      }
+    } else {
+      throw IllegalStateException("Cannot close module while method is executing")
+    }
+  }
+
+  companion object {
+    init {
+      if (!NativeLoader.isInitialized()) {
+        NativeLoader.init(SystemDelegate())
+      }
+      NativeLoader.loadLibrary("executorch")
+    }
+
+    @DoNotStrip
+    @JvmStatic
+    private external fun initHybrid(
+        moduleAbsolutePath: String,
+        dataAbsolutePath: String,
+    ): HybridData
+
+    /**
+     * Loads a serialized ExecuTorch Training Module from the specified path on the disk.
+     *
+     * @param modelPath path to file that contains the serialized ExecuTorch module.
+     * @param dataPath path to file that contains the ExecuTorch module external weights.
+     * @return new [TrainingModule] object which owns the model module.
+     */
+    @JvmStatic
+    fun load(modelPath: String, dataPath: String): TrainingModule {
+      ExecuTorchRuntime.validateFilePath(modelPath, "model path")
+      ExecuTorchRuntime.validateFilePath(dataPath, "data path")
+      return TrainingModule(modelPath, dataPath)
+    }
+
+    /**
+     * Loads a serialized ExecuTorch training module from the specified path on the disk.
+     *
+     * @param modelPath path to file that contains the serialized ExecuTorch module. This PTE does
+     *   not rely on external weights.
+     * @return new [TrainingModule] object which owns the model module.
+     */
+    @JvmStatic
+    fun load(modelPath: String): TrainingModule {
+      ExecuTorchRuntime.validateFilePath(modelPath, "model path")
+      return TrainingModule(modelPath, "")
+    }
+  }
+}

From 53fa4dd54b437b3e2e9f46926280df1d55509b33 Mon Sep 17 00:00:00 2001
From: Hansong Zhang <107070759+kirklandsign@users.noreply.github.com>
Date: Wed, 27 May 2026 16:47:49 -0700
Subject: [PATCH 050/103] Fix `TrainingModule` class declaration formatting

Differential Revision: D106574405

Pull Request resolved: https://github.com/pytorch/executorch/pull/19830
---
 .../java/org/pytorch/executorch/training/TrainingModule.kt    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.kt b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.kt
index 4caa4635fdd..5556b0c16c4 100644
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.kt
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/training/TrainingModule.kt
@@ -25,8 +25,8 @@ import org.pytorch.executorch.annotations.Experimental
  * Warning: These APIs are experimental and subject to change without notice
  */
 @Experimental
-class TrainingModule
-private constructor(moduleAbsolutePath: String, dataAbsolutePath: String) : Closeable {
+class TrainingModule private constructor(moduleAbsolutePath: String, dataAbsolutePath: String) :
+    Closeable {
 
   private val mHybridData: HybridData = initHybrid(moduleAbsolutePath, dataAbsolutePath)
   private val mLock = ReentrantLock()

From d8d706abf3a6397f61885ef74ae5c06bdd0cca7a Mon Sep 17 00:00:00 2001
From: YIWENX14 <164585414+YIWENX14@users.noreply.github.com>
Date: Wed, 27 May 2026 18:35:38 -0700
Subject: [PATCH 051/103] Preserve model dtype when swapping weightless RMSNorm
 to RMSNormCoreML (#19786)

Differential Revision: D106400668

Pull Request resolved: https://github.com/pytorch/executorch/pull/19786
---
 examples/models/llama/norm.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/examples/models/llama/norm.py b/examples/models/llama/norm.py
index ec92b353eb4..0b6ed7f5b01 100644
--- a/examples/models/llama/norm.py
+++ b/examples/models/llama/norm.py
@@ -154,6 +154,14 @@ def replace_rms_norm_for_coreml_(model: torch.nn.Module) -> torch.nn.Module:
         # Preserve trained scale (no-op for ScalelessRMSNorm).
         if getattr(mod, "weight", None) is not None:
             new.weight = mod.weight
+        else:
+            # Source was weightless (e.g. ScalelessRMSNorm). The freshly-allocated
+            # `nn.Parameter(torch.ones(dim))` inside RMSNormCoreML defaults to fp32,
+            # which causes an fp32 leak in fp16 export. Match the model's existing
+            # parameter dtype/device.
+            ref = next((p for p in model.parameters() if p.is_floating_point()), None)
+            if ref is not None:
+                new.to(dtype=ref.dtype, device=ref.device)
         # Locate parent module via the dotted name and rebind the attribute.
         if "." in name:
             parent_name, attr = name.rsplit(".", 1)

From 7fd21f2b5877e0e14c73283827472b37a8f5148e Mon Sep 17 00:00:00 2001
From: Hansong Zhang <107070759+kirklandsign@users.noreply.github.com>
Date: Wed, 27 May 2026 21:03:13 -0700
Subject: [PATCH 052/103] Convert Module from Java to Kotlin (#19821)

Differential Revision: D106415170

Pull Request resolved: https://github.com/pytorch/executorch/pull/19821
---
 extension/android/BUCK                        |   2 +-
 .../java/org/pytorch/executorch/Module.java   | 315 ------------------
 .../java/org/pytorch/executorch/Module.kt     | 267 +++++++++++++++
 3 files changed, 268 insertions(+), 316 deletions(-)
 delete mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.java
 create mode 100644 extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.kt

diff --git a/extension/android/BUCK b/extension/android/BUCK
index 170c826f40f..92cb7c8c040 100644
--- a/extension/android/BUCK
+++ b/extension/android/BUCK
@@ -13,7 +13,7 @@ non_fbcode_target(_kind = fb_android_library,
         "executorch_android/src/main/java/org/pytorch/executorch/ExecuTorchRuntime.kt",
         "executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.kt",
         "executorch_android/src/main/java/org/pytorch/executorch/MethodMetadata.kt",
-        "executorch_android/src/main/java/org/pytorch/executorch/Module.java",
+        "executorch_android/src/main/java/org/pytorch/executorch/Module.kt",
         "executorch_android/src/main/java/org/pytorch/executorch/Tensor.java",
         "executorch_android/src/main/java/org/pytorch/executorch/annotations/Experimental.kt",
     ],
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.java
deleted file mode 100644
index 94a3ed8d160..00000000000
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.java
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-package org.pytorch.executorch;
-
-import com.facebook.jni.HybridData;
-import com.facebook.jni.annotations.DoNotStrip;
-import com.facebook.soloader.nativeloader.NativeLoader;
-import com.facebook.soloader.nativeloader.SystemDelegate;
-import java.io.Closeable;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.locks.Lock;
-import java.util.concurrent.locks.ReentrantLock;
-import org.pytorch.executorch.annotations.Experimental;
-
-/**
- * Java wrapper for ExecuTorch Module.
- *
- * <p>Warning: These APIs are experimental and subject to change without notice
- */
-@Experimental
-public class Module implements Closeable {
-
-  static {
-    if (!NativeLoader.isInitialized()) {
-      NativeLoader.init(new SystemDelegate());
-    }
-    // Loads libexecutorch.so from jniLibs
-    NativeLoader.loadLibrary("executorch");
-  }
-
-  /** Load mode for the module. Load the whole file as a buffer. */
-  public static final int LOAD_MODE_FILE = 0;
-
-  /** Load mode for the module. Use mmap to load pages into memory. */
-  public static final int LOAD_MODE_MMAP = 1;
-
-  /** Load mode for the module. Use memory locking and handle errors. */
-  public static final int LOAD_MODE_MMAP_USE_MLOCK = 2;
-
-  /** Load mode for the module. Use memory locking and ignore errors. */
-  public static final int LOAD_MODE_MMAP_USE_MLOCK_IGNORE_ERRORS = 3;
-
-  private final HybridData mHybridData;
-
-  private final Map<String, MethodMetadata> mMethodMetadata;
-
-  @DoNotStrip
-  private static native HybridData initHybrid(
-      String moduleAbsolutePath, int loadMode, int numThreads);
-
-  private Module(String moduleAbsolutePath, int loadMode, int numThreads) {
-    ExecuTorchRuntime runtime = ExecuTorchRuntime.getRuntime();
-
-    mHybridData = initHybrid(moduleAbsolutePath, loadMode, numThreads);
-
-    mMethodMetadata = populateMethodMeta();
-  }
-
-  private Map<String, MethodMetadata> populateMethodMeta() {
-    String[] methods = getMethods();
-    Map<String, MethodMetadata> metadata = new HashMap<String, MethodMetadata>();
-    for (String name : methods) {
-      metadata.put(name, new MethodMetadata(name, getUsedBackends(name)));
-    }
-    return metadata;
-  }
-
-  /** Lock protecting the non-thread safe methods in mHybridData. */
-  private Lock mLock = new ReentrantLock();
-
-  /**
-   * Loads a serialized ExecuTorch module from the specified path on the disk.
-   *
-   * @param modelPath path to file that contains the serialized ExecuTorch module.
-   * @param loadMode load mode for the module. See constants in {@link Module}.
-   * @return new {@link org.pytorch.executorch.Module} object which owns the model module.
-   */
-  public static Module load(final String modelPath, int loadMode) {
-    return load(modelPath, loadMode, 0);
-  }
-
-  /**
-   * Loads a serialized ExecuTorch module from the specified path on the disk.
-   *
-   * @param modelPath path to file that contains the serialized ExecuTorch module.
-   * @param loadMode load mode for the module. See constants in {@link Module}.
-   * @param numThreads the number of threads to use for inference. A value of 0 defaults to a
-   *     hardware-specific default.
-   * @return new {@link org.pytorch.executorch.Module} object which owns the model module.
-   */
-  public static Module load(final String modelPath, int loadMode, int numThreads) {
-    ExecuTorchRuntime.validateFilePath(modelPath, "model path");
-    return new Module(modelPath, loadMode, numThreads);
-  }
-
-  /**
-   * Loads a serialized ExecuTorch module from the specified path on the disk to run on CPU.
-   *
-   * @param modelPath path to file that contains the serialized ExecuTorch module.
-   * @return new {@link org.pytorch.executorch.Module} object which owns the model module.
-   */
-  public static Module load(final String modelPath) {
-    return load(modelPath, LOAD_MODE_FILE);
-  }
-
-  /**
-   * Runs the 'forward' method of this module with the specified arguments.
-   *
-   * @param inputs arguments for the ExecuTorch module's 'forward' method. Note: if method 'forward'
-   *     requires inputs but no inputs are given, the function will not error out, but run 'forward'
-   *     with sample inputs.
-   * @return return value from the 'forward' method.
-   */
-  public EValue[] forward(EValue... inputs) {
-    return execute("forward", inputs);
-  }
-
-  /**
-   * Runs the specified method of this module with the specified arguments.
-   *
-   * @param methodName name of the ExecuTorch method to run.
-   * @param inputs arguments that will be passed to ExecuTorch method.
-   * @return return value from the method.
-   */
-  public EValue[] execute(String methodName, EValue... inputs) {
-    mLock.lock();
-    try {
-      if (!mHybridData.isValid()) {
-        throw new IllegalStateException("Module has been destroyed");
-      }
-      return executeNative(methodName, inputs);
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  @DoNotStrip
-  private native EValue[] executeNative(String methodName, EValue... inputs);
-
-  /**
-   * Load a method on this module. This might help with the first time inference performance,
-   * because otherwise the method is loaded lazily when it's execute. Note: this function is
-   * synchronous, and will block until the method is loaded. Therefore, it is recommended to call
-   * this on a background thread. However, users need to make sure that they don't execute before
-   * this function returns.
-   */
-  public void loadMethod(String methodName) {
-    mLock.lock();
-    try {
-      if (!mHybridData.isValid()) {
-        throw new IllegalStateException("Module has been destroyed");
-      }
-      int errorCode = loadMethodNative(methodName);
-      if (errorCode != 0) {
-        throw new ExecutorchRuntimeException(errorCode, "Failed to load method: " + methodName);
-      }
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  @DoNotStrip
-  private native int loadMethodNative(String methodName);
-
-  /**
-   * Returns the names of the backends in a certain method.
-   *
-   * @param methodName method name to query
-   * @return an array of backend name
-   */
-  @DoNotStrip
-  private native String[] getUsedBackends(String methodName);
-
-  /**
-   * Returns the names of methods.
-   *
-   * @return name of methods in this Module
-   */
-  public String[] getMethods() {
-    mLock.lock();
-    try {
-      if (!mHybridData.isValid()) {
-        throw new IllegalStateException("Module has been destroyed");
-      }
-      return getMethodsNative();
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  @DoNotStrip
-  private native String[] getMethodsNative();
-
-  /**
-   * Get the corresponding @MethodMetadata for a method
-   *
-   * @param name method name
-   * @return @MethodMetadata for this method
-   */
-  public MethodMetadata getMethodMetadata(String name) {
-    mLock.lock();
-    try {
-      if (!mHybridData.isValid()) {
-        throw new IllegalStateException("Module has been destroyed");
-      }
-      MethodMetadata methodMetadata = mMethodMetadata.get(name);
-      if (methodMetadata == null) {
-        throw new IllegalArgumentException("method " + name + " does not exist for this module");
-      }
-      return methodMetadata;
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  @DoNotStrip
-  private static native String[] readLogBufferStaticNative();
-
-  public static String[] readLogBufferStatic() {
-    return readLogBufferStaticNative();
-  }
-
-  /** Retrieve the in-memory log buffer, containing the most recent ExecuTorch log entries. */
-  public String[] readLogBuffer() {
-    mLock.lock();
-    try {
-      if (!mHybridData.isValid()) {
-        throw new IllegalStateException("Module has been destroyed");
-      }
-      return readLogBufferNative();
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  @DoNotStrip
-  private native String[] readLogBufferNative();
-
-  /**
-   * Dump the ExecuTorch ETRecord file to /data/local/tmp/result.etdump.
-   *
-   * <p>Currently for internal (minibench) use only.
-   *
-   * @return true if the etdump was successfully written, false otherwise.
-   */
-  @Experimental
-  public boolean etdump() {
-    mLock.lock();
-    try {
-      if (!mHybridData.isValid()) {
-        throw new IllegalStateException("Module has been destroyed");
-      }
-      return etdumpNative();
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  @DoNotStrip
-  private native boolean etdumpNative();
-
-  /**
-   * Dump the ExecuTorch ETDump file to {@code outputPath}.
-   *
-   * @param outputPath absolute path to write the etdump file to.
-   * @return true if the etdump was successfully written, false otherwise.
-   */
-  @Experimental
-  public boolean etdump(String outputPath) {
-    mLock.lock();
-    try {
-      if (!mHybridData.isValid()) {
-        throw new IllegalStateException("Module has been destroyed");
-      }
-      return etdumpToNative(outputPath);
-    } finally {
-      mLock.unlock();
-    }
-  }
-
-  @DoNotStrip
-  private native boolean etdumpToNative(String outputPath);
-
-  /**
-   * Explicitly destroys the native Module object. Calling this method is not required, as the
-   * native object will be destroyed when this object is garbage-collected. However, the timing of
-   * garbage collection is not guaranteed, so proactively calling {@code destroy} can free memory
-   * more quickly. See {@link com.facebook.jni.HybridData#resetNative}.
-   */
-  public void destroy() {
-    if (mLock.tryLock()) {
-      try {
-        if (mHybridData.isValid()) {
-          mHybridData.resetNative();
-        }
-      } finally {
-        mLock.unlock();
-      }
-    } else {
-      throw new IllegalStateException("Cannot destroy module while method is executing");
-    }
-  }
-
-  @Override
-  public void close() {
-    destroy();
-  }
-}
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.kt b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.kt
new file mode 100644
index 00000000000..15f8dbbc992
--- /dev/null
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/Module.kt
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package org.pytorch.executorch
+
+import com.facebook.jni.HybridData
+import com.facebook.jni.annotations.DoNotStrip
+import com.facebook.soloader.nativeloader.NativeLoader
+import com.facebook.soloader.nativeloader.SystemDelegate
+import java.io.Closeable
+import java.util.concurrent.locks.ReentrantLock
+import org.pytorch.executorch.annotations.Experimental
+
+/**
+ * Java wrapper for ExecuTorch Module.
+ *
+ * Warning: These APIs are experimental and subject to change without notice
+ */
+@Experimental
+open class Module private constructor(moduleAbsolutePath: String, loadMode: Int, numThreads: Int) :
+    Closeable {
+
+  private val mHybridData: HybridData
+  private val mMethodMetadata: Map<String, MethodMetadata>
+
+  /** Lock protecting the non-thread safe methods in mHybridData. */
+  private val mLock = ReentrantLock()
+
+  init {
+    ExecuTorchRuntime.getRuntime()
+    mHybridData = initHybrid(moduleAbsolutePath, loadMode, numThreads)
+    mMethodMetadata = populateMethodMeta()
+  }
+
+  private fun populateMethodMeta(): Map<String, MethodMetadata> {
+    val methods = getMethodsNative()
+    val metadata = HashMap<String, MethodMetadata>()
+    for (name in methods) {
+      metadata[name] = MethodMetadata(name, getUsedBackends(name))
+    }
+    return metadata
+  }
+
+  /**
+   * Runs the 'forward' method of this module with the specified arguments.
+   *
+   * @param inputs arguments for the ExecuTorch module's 'forward' method. Note: if method 'forward'
+   *   requires inputs but no inputs are given, the function will not error out, but run 'forward'
+   *   with sample inputs.
+   * @return return value from the 'forward' method.
+   */
+  open fun forward(vararg inputs: EValue): Array<EValue> = execute("forward", *inputs)
+
+  /**
+   * Runs the specified method of this module with the specified arguments.
+   *
+   * @param methodName name of the ExecuTorch method to run.
+   * @param inputs arguments that will be passed to ExecuTorch method.
+   * @return return value from the method.
+   */
+  open fun execute(methodName: String, vararg inputs: EValue): Array<EValue> {
+    mLock.lock()
+    try {
+      check(mHybridData.isValid) { "Module has been destroyed" }
+      return executeNative(methodName, *inputs)
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  @DoNotStrip
+  private external fun executeNative(methodName: String, vararg inputs: EValue): Array<EValue>
+
+  /**
+   * Load a method on this module. This might help with the first time inference performance,
+   * because otherwise the method is loaded lazily when it's execute. Note: this function is
+   * synchronous, and will block until the method is loaded. Therefore, it is recommended to call
+   * this on a background thread. However, users need to make sure that they don't execute before
+   * this function returns.
+   */
+  open fun loadMethod(methodName: String) {
+    mLock.lock()
+    try {
+      check(mHybridData.isValid) { "Module has been destroyed" }
+      val errorCode = loadMethodNative(methodName)
+      if (errorCode != 0) {
+        throw ExecutorchRuntimeException(errorCode, "Failed to load method: $methodName")
+      }
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  @DoNotStrip private external fun loadMethodNative(methodName: String): Int
+
+  /**
+   * Returns the names of the backends in a certain method.
+   *
+   * @param methodName method name to query
+   * @return an array of backend name
+   */
+  @DoNotStrip private external fun getUsedBackends(methodName: String): Array<String>
+
+  /**
+   * Returns the names of methods.
+   *
+   * @return name of methods in this Module
+   */
+  open fun getMethods(): Array<String> {
+    mLock.lock()
+    try {
+      check(mHybridData.isValid) { "Module has been destroyed" }
+      return getMethodsNative()
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  @DoNotStrip private external fun getMethodsNative(): Array<String>
+
+  /**
+   * Get the corresponding [MethodMetadata] for a method
+   *
+   * @param name method name
+   * @return [MethodMetadata] for this method
+   */
+  open fun getMethodMetadata(name: String): MethodMetadata {
+    mLock.lock()
+    try {
+      check(mHybridData.isValid) { "Module has been destroyed" }
+      return mMethodMetadata[name]
+          ?: throw IllegalArgumentException("method $name does not exist for this module")
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  /** Retrieve the in-memory log buffer, containing the most recent ExecuTorch log entries. */
+  open fun readLogBuffer(): Array<String>? {
+    mLock.lock()
+    try {
+      check(mHybridData.isValid) { "Module has been destroyed" }
+      return readLogBufferNative()
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  @DoNotStrip private external fun readLogBufferNative(): Array<String>?
+
+  /**
+   * Dump the ExecuTorch ETRecord file to /data/local/tmp/result.etdump.
+   *
+   * Currently for internal (minibench) use only.
+   *
+   * @return true if the etdump was successfully written, false otherwise.
+   */
+  @Experimental
+  open fun etdump(): Boolean {
+    mLock.lock()
+    try {
+      check(mHybridData.isValid) { "Module has been destroyed" }
+      return etdumpNative()
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  @DoNotStrip private external fun etdumpNative(): Boolean
+
+  /**
+   * Dump the ExecuTorch ETDump file to [outputPath].
+   *
+   * @param outputPath absolute path to write the etdump file to.
+   * @return true if the etdump was successfully written, false otherwise.
+   */
+  @Experimental
+  open fun etdump(outputPath: String): Boolean {
+    mLock.lock()
+    try {
+      check(mHybridData.isValid) { "Module has been destroyed" }
+      return etdumpToNative(outputPath)
+    } finally {
+      mLock.unlock()
+    }
+  }
+
+  @DoNotStrip private external fun etdumpToNative(outputPath: String): Boolean
+
+  /**
+   * Explicitly destroys the native Module object. Calling this method is not required, as the
+   * native object will be destroyed when this object is garbage-collected. However, the timing of
+   * garbage collection is not guaranteed, so proactively calling `destroy` can free memory more
+   * quickly. See [com.facebook.jni.HybridData.resetNative].
+   */
+  open fun destroy() {
+    if (mLock.tryLock()) {
+      try {
+        if (mHybridData.isValid) {
+          mHybridData.resetNative()
+        }
+      } finally {
+        mLock.unlock()
+      }
+    } else {
+      throw IllegalStateException("Cannot destroy module while method is executing")
+    }
+  }
+
+  override fun close() {
+    destroy()
+  }
+
+  companion object {
+    init {
+      if (!NativeLoader.isInitialized()) {
+        NativeLoader.init(SystemDelegate())
+      }
+      NativeLoader.loadLibrary("executorch")
+    }
+
+    /** Load mode for the module. Load the whole file as a buffer. */
+    const val LOAD_MODE_FILE = 0
+
+    /** Load mode for the module. Use mmap to load pages into memory. */
+    const val LOAD_MODE_MMAP = 1
+
+    /** Load mode for the module. Use memory locking and handle errors. */
+    const val LOAD_MODE_MMAP_USE_MLOCK = 2
+
+    /** Load mode for the module. Use memory locking and ignore errors. */
+    const val LOAD_MODE_MMAP_USE_MLOCK_IGNORE_ERRORS = 3
+
+    /**
+     * Loads a serialized ExecuTorch module from the specified path on the disk.
+     *
+     * @param modelPath path to file that contains the serialized ExecuTorch module.
+     * @param loadMode load mode for the module. See constants in [Module].
+     * @param numThreads the number of threads to use for inference. A value of 0 defaults to a
+     *   hardware-specific default.
+     * @return new [Module] object which owns the model module.
+     */
+    @JvmStatic
+    @JvmOverloads
+    fun load(modelPath: String?, loadMode: Int = LOAD_MODE_FILE, numThreads: Int = 0): Module {
+      ExecuTorchRuntime.validateFilePath(modelPath, "model path")
+      return Module(modelPath!!, loadMode, numThreads)
+    }
+
+    @DoNotStrip
+    @JvmStatic
+    private external fun initHybrid(
+        moduleAbsolutePath: String,
+        loadMode: Int,
+        numThreads: Int,
+    ): HybridData
+
+    @DoNotStrip @JvmStatic fun readLogBufferStatic(): Array<String>? = readLogBufferStaticNative()
+
+    @DoNotStrip @JvmStatic private external fun readLogBufferStaticNative(): Array<String>?
+  }
+}

From 7c0f60a8c3e7f4c1fcc46667e669ac9eb0dffa5f Mon Sep 17 00:00:00 2001
From: Martin Pavella <martin.pavella@nxp.com>
Date: Thu, 28 May 2026 08:10:55 +0200
Subject: [PATCH 053/103] NXP backend: Add `tanh` support with new Neutron
 flow. (#19753)

### Summary
Add `tanh` support with new Neutron flow.

### Test plan
Unit tests provided.


cc @robert-kalmar @JakeStevens @digantdesai @rascani
---
 .../ops_converters/tanh_converter.py          | 32 ++++++-
 .../node_converter/test_tanh_converter.py     | 95 +++++++++++++++++--
 backends/nxp/tests/models.py                  |  9 +-
 backends/nxp/tests/ops_aliases.py             |  2 +
 4 files changed, 129 insertions(+), 9 deletions(-)

diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/tanh_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/tanh_converter.py
index 427865f8ee7..54192628e24 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/tanh_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/tanh_converter.py
@@ -1,8 +1,10 @@
-# Copyright 2025 NXP
+# Copyright 2025-2026 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import torch
+
 from executorch.backends.nxp.backend.custom_delegation_options import (
     CustomDelegationOptions,
 )
@@ -10,6 +12,8 @@
 from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
     BuiltinOperator,
 )
+
+from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from torch.fx import Node
 from torch.nn import Parameter
 
@@ -24,7 +28,33 @@ def _is_supported_in_IR(
     ) -> bool:
         return True
 
+    @staticmethod
+    def _is_supported_on_target(
+        node: Node,
+        neutron_target_spec: NeutronTargetSpec,
+        parameters_mapping: dict[str, Parameter],
+        custom_delegation_options: CustomDelegationOptions,
+    ) -> bool:
+        if custom_delegation_options.use_new_flow_neutron_c:
+            # Requirements specified by the new Neutron flow documentation.
+
+            if not NodeConverter.uses_quantization_type_for_io(
+                node,
+                supported_types=[torch.int8, torch.uint8],
+                input_indices=[0],
+                output_indices=[0],
+            ):
+                return False
+
+        return True
+
     def convert(self, node: Node):
+        """Convert the `aten.tanh` operator to NeutronIR `Tanh`.
+        The ExecuTorch schema is:
+            tanh(
+                Tensor self
+            ) -> Tensor
+        """
         self.assert_convertible(node)
 
         t_op = self._create_tflite_op_with_io_tensors(node)
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py
index 10892d28e38..ba2f5bf07d1 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py
@@ -1,4 +1,4 @@
-# Copyright 2025 NXP
+# Copyright 2025-2026 NXP
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -8,9 +8,13 @@
 
 import kgb
 import numpy as np
+
+# noinspection PyUnusedImports
+import pytest
 import torch
 
 from executorch.backends.nxp.nxp_backend import EdgeProgramToIRConverter
+from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator
 from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
 from executorch.backends.nxp.tests.executors import (
     convert_run_compare,
@@ -18,10 +22,13 @@
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
+from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
 from executorch.backends.nxp.tests.models import Conv2dWithActivation
-from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+from executorch.backends.nxp.tests.ops_aliases import Convolution, Tanh, Tanh_
 from parameterized import parameterized
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 class TestTanhConverter(unittest.TestCase):
@@ -73,10 +80,7 @@ def test_conv_tanh(
             lowered_module_graph = (
                 quantized_program.graph_module.lowered_module_0.original_module.graph
             )
-            tanh_ops = [
-                exir_ops.edge.aten.tanh.default,
-                exir_ops.edge.aten.tanh_.default,
-            ]
+            tanh_ops = [Tanh, Tanh_]
             assert graph_contains_any_of_ops(graph=lowered_module_graph, ops=tanh_ops)
 
             input_data = (np.random.random(input_shape) * 50).astype(np.int8)
@@ -88,3 +92,82 @@ def test_conv_tanh(
                 input_data=input_data,
                 atol=2.0,
             )
+
+
+class TanhModule(torch.nn.Module):
+    def __init__(self, inplace: bool = False):
+        super().__init__()
+        self.inplace = inplace
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.inplace:
+            return torch.tanh_(x)
+        else:
+            return torch.tanh(x)
+
+
+class TestTanhNewNeutronFlow:
+
+    # noinspection PyMethodMayBeStatic
+    def assert_delegated(
+        self,
+        model,
+        input_shape,
+        mocker,
+        use_qat=False,
+        expected_delegated_ops=None,
+    ):
+        if expected_delegated_ops is None:
+            expected_delegated_ops = {Tanh: 1}
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops=expected_delegated_ops,
+            expected_non_delegated_ops={},
+        )
+
+        # Cover also negative values to thoroughly test the operator.
+        dataset_creator = RandomDatasetCreator(low=-2, high=2)
+
+        lower_run_compare(
+            model,
+            input_shape,
+            graph_verifier,
+            dataset_creator,
+            use_qat=use_qat,
+            use_new_flow_neutron_c=True,  # Use the new flow.
+        )
+
+    @pytest.fixture(params=[True, False], ids=lambda inplace: f"inplace = {inplace}")
+    def inplace(self, request):
+        return request.param
+
+    def test__qat__inplace(self, mocker, use_qat, inplace):
+        shape = (23,)
+        model = TanhModule(inplace)
+        self.assert_delegated(model, shape, mocker, use_qat=use_qat)
+
+    @pytest.mark.parametrize(
+        "shape",
+        [
+            (16,),
+            (3, 5),
+            (2, 3, 4),
+            (2, 3, 4, 5),
+            (2, 3, 2, 3, 2),
+        ],
+        ids=lambda shape: f"{len(shape)}D",
+    )
+    def test__shapes(self, mocker, shape):
+        model = TanhModule()
+        self.assert_delegated(model, shape, mocker)
+
+    def test__with_convolution(self, mocker):
+        input_shape = (1, 3, 12, 16)
+        channels = input_shape[1]
+        model = Conv2dWithActivation(
+            activation=torch.tanh, in_channels=channels, out_channels=channels
+        )
+        self.assert_delegated(
+            model, input_shape, mocker, expected_delegated_ops={Tanh: 1, Convolution: 1}
+        )
diff --git a/backends/nxp/tests/models.py b/backends/nxp/tests/models.py
index 1292c4cf17d..0383734b4dd 100644
--- a/backends/nxp/tests/models.py
+++ b/backends/nxp/tests/models.py
@@ -456,11 +456,16 @@ def forward(self, x):
 
 
 class Conv2dWithActivation(torch.nn.Module):
-    def __init__(self, activation: torch.nn.Module | Callable, in_channels: int = 3):
+    def __init__(
+        self,
+        activation: torch.nn.Module | Callable,
+        in_channels: int = 3,
+        out_channels: int = 64,
+    ):
         super().__init__()
 
         self.conv = torch.nn.Conv2d(
-            in_channels=in_channels, out_channels=64, kernel_size=(3, 3)
+            in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 3)
         )
         self.activation = activation
 
diff --git a/backends/nxp/tests/ops_aliases.py b/backends/nxp/tests/ops_aliases.py
index 06eb9c84bd0..78a2ac10f55 100644
--- a/backends/nxp/tests/ops_aliases.py
+++ b/backends/nxp/tests/ops_aliases.py
@@ -39,6 +39,8 @@
 SqueezeDim = exir_ops.edge.aten.squeeze.dim
 SqueezeDims = exir_ops.edge.aten.squeeze.dims
 SubTensor = exir_ops.edge.aten.sub.Tensor
+Tanh = exir_ops.edge.aten.tanh.default
+Tanh_ = exir_ops.edge.aten.tanh_.default
 Unsqueeze = exir_ops.edge.aten.unsqueeze.default
 UpsampleBilinear2D = exir_ops.edge.aten.upsample_bilinear2d.vec
 UpsampleNearest2D = exir_ops.edge.aten.upsample_nearest2d.vec

From f59ac9d1e9ccea7a7e4ecb974c5d72051034f9b0 Mon Sep 17 00:00:00 2001
From: Martin Pavella <martin.pavella@nxp.com>
Date: Thu, 28 May 2026 08:18:00 +0200
Subject: [PATCH 054/103] NXP backend: Enable `aten.div.Tensor` with new
 Neutron flow. (#19802)

### Summary
Enable `aten.div.Tensor` with new Neutron flow.

### Test plan
Unit tests provided.


cc @robert-kalmar @JakeStevens @digantdesai @rascani
---
 .../generic_tests/test_convert_div_to_mul.py  | 62 ++++++++++++++++++-
 1 file changed, 61 insertions(+), 1 deletion(-)

diff --git a/backends/nxp/tests/generic_tests/test_convert_div_to_mul.py b/backends/nxp/tests/generic_tests/test_convert_div_to_mul.py
index ee89d5d5619..9201f32349f 100644
--- a/backends/nxp/tests/generic_tests/test_convert_div_to_mul.py
+++ b/backends/nxp/tests/generic_tests/test_convert_div_to_mul.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pytest
 import torch
+
 from executorch.backends.nxp.aten_passes.neutron_aten_pass_manager import (
     ConvertDivToMulPass,
     NeutronAtenPassManager,
@@ -13,6 +14,7 @@
 from executorch.backends.nxp.backend.edge_program_converter import (
     EdgeProgramToIRConverter,
 )
+from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator
 from executorch.backends.nxp.tests.executorch_pipeline import (
     neutron_target_spec,
     to_quantized_edge_program,
@@ -21,11 +23,13 @@
     convert_run_compare,
     graph_contains_any_of_ops,
 )
-
+from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
 from executorch.backends.nxp.tests.models import (
     NonstaticDivLinearModel,
     StaticDivLinearModel,
 )
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+from executorch.backends.nxp.tests.ops_aliases import MulTensor
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch.export import ExportedProgram
 
@@ -248,3 +252,59 @@ def test_convert_div_to_mul_full_pipeline(mocker, input_shape, is_scalar):
         input_data=example_input,
         tfl_model=neutron_ir_model,
     )
+
+
+class StaticDivModel(torch.nn.Module):
+    def __init__(self, divisor):
+        super().__init__()
+        self.divisor = divisor
+
+    def forward(self, x):
+        return x / self.divisor
+
+
+class TestConvertDivToMulNewNeutronFlow:
+
+    @pytest.mark.parametrize(
+        "input_shape",
+        [
+            (23,),
+            (3, 7),
+            (2, 3, 4),
+            (1, 2, 3, 4),
+            (1, 2, 3, 2, 1),
+        ],
+        ids=lambda shape: f"{len(shape)}D",
+    )
+    @pytest.mark.parametrize(
+        "is_scalar",
+        [False, True],
+        ids=lambda is_scalar: "scalar" if is_scalar else "tensor",
+    )
+    def test__static__full_pipeline(
+        self, mocker, input_shape: tuple[int, ...], is_scalar: bool
+    ):
+        if is_scalar:
+            divisor = np.random.uniform(0.01, 15)
+            model = StaticDivModel(divisor)
+        else:
+            divisor = torch.rand(input_shape) + 0.01
+            model = StaticDivModel(divisor)
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            # By the time `DetailedGraphVerifier` checks for operators, the `div` has already been replaced by `mul`.
+            expected_delegated_ops={MulTensor: 1},
+            expected_non_delegated_ops={},
+        )
+
+        # Cover also negative values to thoroughly test the operator.
+        dataset_creator = RandomDatasetCreator(low=-2, high=2)
+
+        lower_run_compare(
+            model,
+            input_shape,
+            graph_verifier,
+            dataset_creator,
+            use_new_flow_neutron_c=True,  # Use the new flow.
+        )

From b48a457a783f490dcc012167ff3b9d6f93c22ed5 Mon Sep 17 00:00:00 2001
From: Sebastian Larsson <38941629+Sebastian-Larsson@users.noreply.github.com>
Date: Thu, 28 May 2026 08:33:47 +0200
Subject: [PATCH 055/103] Arm backend: Remove Ethos-U core driver submodule
 (#19664)

Use the Ethos-U scratch checkout as the source for core driver headers.
Keep baremetal builds on the same driver copy as the Corstone platform
flow, and remove the stale Arm third-party README entry.

Signed-off-by: Sebastian Larsson <sebastian.larsson@arm.com>
---
 .gitmodules                                  |  3 ---
 backends/arm/CMakeLists.txt                  | 24 ++++++++++++++++----
 backends/arm/README.md                       |  2 --
 backends/arm/scripts/corstone_utils.cmake    | 10 +++++---
 backends/arm/third-party/ethos-u-core-driver |  1 -
 5 files changed, 26 insertions(+), 14 deletions(-)
 delete mode 160000 backends/arm/third-party/ethos-u-core-driver

diff --git a/.gitmodules b/.gitmodules
index 917e755da27..0f4d09aa998 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,3 @@
-[submodule "backends/arm/third-party/ethos-u-core-driver"]
-	path = backends/arm/third-party/ethos-u-core-driver
-	url = https://git.gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-core-driver.git
 [submodule "backends/vulkan/third-party/Vulkan-Headers"]
 	path = backends/vulkan/third-party/Vulkan-Headers
 	url = https://github.com/KhronosGroup/Vulkan-Headers
diff --git a/backends/arm/CMakeLists.txt b/backends/arm/CMakeLists.txt
index d8a6c1afce7..726fcfcd0d3 100644
--- a/backends/arm/CMakeLists.txt
+++ b/backends/arm/CMakeLists.txt
@@ -39,6 +39,11 @@ set(ETHOSU_LINUX_DRIVER_SOURCE_DIR
       PATH
       "Optional local path to an existing ethos-u-linux-driver stack checkout"
 )
+set(ETHOS_SDK_PATH
+    "${EXECUTORCH_ROOT}/examples/arm/arm-scratch/ethos-u"
+    CACHE PATH "Path to Ethos-U bare metal driver/env"
+)
+option(FETCH_ETHOS_U_CONTENT "Fetch ethos_u dependencies" ON)
 
 if(EXECUTORCH_BUILD_ARM_BAREMETAL AND EXECUTORCH_BUILD_ARM_ETHOSU_LINUX)
   message(
@@ -52,8 +57,6 @@ if(EXECUTORCH_BUILD_ARM_BAREMETAL OR EXECUTORCH_BUILD_ARM_ETHOSU_LINUX)
 
   add_compile_options("-Wall" "-Werror")
 
-  set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party")
-
   set(_arm_backend_sources
       backends/arm/runtime/EthosUBackend.cpp
       backends/arm/runtime/EthosUBackend_IoMemcpy.cpp
@@ -72,11 +75,22 @@ if(EXECUTORCH_BUILD_ARM_BAREMETAL OR EXECUTORCH_BUILD_ARM_ETHOSU_LINUX)
       executorch_delegate_ethos_u
       PRIVATE ${EXECUTORCH_ROOT}/backends/arm/runtime/EthosUBackend_Cortex_M.cpp
     )
-    set(_ethosu_core_driver_include
-        "${THIRD_PARTY_ROOT}/ethos-u-core-driver/include"
+    include(${EXECUTORCH_ROOT}/backends/arm/scripts/corstone_utils.cmake)
+    if(FETCH_ETHOS_U_CONTENT)
+      fetch_ethos_u_content(${ETHOS_SDK_PATH} ${EXECUTORCH_ROOT})
+    endif()
+    set(DRIVER_ETHOSU_INCLUDE_DIR
+        "${ETHOS_SDK_PATH}/core_software/core_driver/include"
     )
+    if(NOT EXISTS "${DRIVER_ETHOSU_INCLUDE_DIR}/ethosu_driver.h")
+      message(
+        FATAL_ERROR
+          "Ethos-U core driver headers were not found in ${DRIVER_ETHOSU_INCLUDE_DIR}."
+          " Run examples/arm/setup.sh or enable FETCH_ETHOS_U_CONTENT."
+      )
+    endif()
     target_include_directories(
-      executorch_delegate_ethos_u PRIVATE ${_ethosu_core_driver_include}
+      executorch_delegate_ethos_u PRIVATE ${DRIVER_ETHOSU_INCLUDE_DIR}
     )
     target_link_libraries(executorch_delegate_ethos_u PUBLIC ethosu_core_driver)
   elseif(EXECUTORCH_BUILD_ARM_ETHOSU_LINUX)
diff --git a/backends/arm/README.md b/backends/arm/README.md
index f822077e170..237f2433cb5 100644
--- a/backends/arm/README.md
+++ b/backends/arm/README.md
@@ -61,8 +61,6 @@ backends/arm/
 │   ├── models/                    # Model level unit tests
 │   └── tester/                    # Testing harnesses and utilities
 │
-├── third-party/                   # External dependencies
-│
 ├── tosa/                          # Shared TOSA backend implementation and dialect
 │
 └── vgf/                           # Implementations of VgfPartitioner and VgfBackend
diff --git a/backends/arm/scripts/corstone_utils.cmake b/backends/arm/scripts/corstone_utils.cmake
index 34f04ba1225..0ed1e4aea0f 100644
--- a/backends/arm/scripts/corstone_utils.cmake
+++ b/backends/arm/scripts/corstone_utils.cmake
@@ -8,6 +8,7 @@ function(fetch_ethos_u_content ETHOS_SDK_PATH ET_DIR_PATH)
 
   file(MAKE_DIRECTORY ${ETHOS_SDK_PATH}/../ethos_u)
   include(FetchContent)
+  find_package(Python3 REQUIRED COMPONENTS Interpreter)
   set(ethos_u_base_tag "26.02")
   FetchContent_Declare(
     ethos_u
@@ -33,10 +34,13 @@ function(fetch_ethos_u_content ETHOS_SDK_PATH ET_DIR_PATH)
       "source backends/arm/scripts/utils.sh && patch_repo ${ETHOS_SDK_PATH} ${ethos_u_base_rev} ${patch_dir}"
     WORKING_DIRECTORY ${ET_DIR_PATH}
   )
-  # Get ethos_u externals only if core_platform folder does not already exist.
-  if(NOT EXISTS "${ETHOS_SDK_PATH}/core_platform")
+
+  # Get ethos_u externals only if core driver headers do not already exist.
+  if(NOT EXISTS
+     "${ETHOS_SDK_PATH}/core_software/core_driver/include/ethosu_driver.h"
+  )
     execute_process(
-      COMMAND ${PYTHON_EXECUTABLE} fetch_externals.py -c
+      COMMAND ${Python3_EXECUTABLE} fetch_externals.py -c
               ${ethos_u_base_tag}.json fetch
       WORKING_DIRECTORY ${ETHOS_SDK_PATH}
     )
diff --git a/backends/arm/third-party/ethos-u-core-driver b/backends/arm/third-party/ethos-u-core-driver
deleted file mode 160000
index 03567073fe2..00000000000
--- a/backends/arm/third-party/ethos-u-core-driver
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 03567073fe2b9802c0bd73f9534da6f8a03924d1

From 9981ba7e224265197639cabb3687d479424aeda6 Mon Sep 17 00:00:00 2001
From: Yufeng Shi <yufeng.shi@arm.com>
Date: Thu, 28 May 2026 10:23:51 +0100
Subject: [PATCH 056/103] Arm backend: Add FP8 support for primitive lowering
 ops (#19805)

Change-Id: I3bec5e29ea3d2daf81a46dca50e7ae0c9c11e787


cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils
@Sebastian-Larsson @robell @rascani

Signed-off-by: Yufeng Shi <yufeng.shi@arm.com>
---
 .../arm/operator_support/gather_support.py    | 31 ++++++++++--
 .../operator_support/slice_copy_support.py    | 26 +++++++++-
 backends/arm/operators/op_cat.py              |  4 ++
 backends/arm/operators/op_permute.py          |  4 ++
 backends/arm/operators/op_repeat.py           |  4 ++
 backends/arm/operators/op_tosa_gather.py      | 10 ++++
 backends/arm/operators/op_tosa_pad.py         |  5 +-
 backends/arm/operators/op_tosa_scatter.py     | 18 ++++++-
 backends/arm/operators/op_tosa_slice.py       |  4 ++
 backends/arm/operators/op_view.py             |  4 ++
 .../test/misc/test_tosa_dialect_scatter.py    | 38 +++++++++++++++
 backends/arm/test/ops/test_cat.py             | 31 ++++++++++++
 backends/arm/test/ops/test_constant_pad_nd.py | 29 ++++++++++++
 backends/arm/test/ops/test_gather.py          | 47 +++++++++++++++++++
 backends/arm/test/ops/test_repeat.py          | 25 ++++++++++
 backends/arm/test/ops/test_slice.py           | 26 ++++++++++
 backends/arm/test/ops/test_view.py            | 42 +++++++++++++++++
 backends/arm/tosa/dialect/ops/gather.py       | 12 +++++
 backends/arm/tosa/dialect/ops/pad.py          |  4 ++
 backends/arm/tosa/dialect/ops/slice.py        |  4 ++
 20 files changed, 360 insertions(+), 8 deletions(-)
 create mode 100644 backends/arm/test/misc/test_tosa_dialect_scatter.py

diff --git a/backends/arm/operator_support/gather_support.py b/backends/arm/operator_support/gather_support.py
index 651727cd8b6..6d923c0441c 100644
--- a/backends/arm/operator_support/gather_support.py
+++ b/backends/arm/operator_support/gather_support.py
@@ -49,7 +49,7 @@ class GatherSupported(SupportedTOSAOperatorCheck):
 
     targets = [exir_ops.edge.aten.gather.default]
 
-    def is_node_tosa_supported(
+    def is_node_tosa_supported(  # noqa: C901
         self, node: fx.Node, tosa_spec: TosaSpecification
     ) -> bool:  # type: ignore[override, misc]
         if len(node.args) != 3:
@@ -115,8 +115,14 @@ def is_node_tosa_supported(
                     f"{node.target}: dtype {values_dtype} requires INT profile.",
                 )
                 return False
-        # fp16/fp32/bf16: either FP profile, or INT profile (via quantization)
-        elif values_dtype in (torch.float16, torch.float32, torch.bfloat16):
+        # fp16/fp32/bf16/fp8: either FP profile, or INT profile (via quantization)
+        elif values_dtype in (
+            torch.float16,
+            torch.float32,
+            torch.bfloat16,
+            torch.float8_e4m3fn,
+            torch.float8_e5m2,
+        ):
             if values_dtype == torch.bfloat16 and not tosa_spec.support_extension(
                 "bf16"
             ):
@@ -125,6 +131,22 @@ def is_node_tosa_supported(
                     f"{node.target}: dtype {values_dtype} requires bf16 extension.",
                 )
                 return False
+            if values_dtype == torch.float8_e4m3fn and not tosa_spec.support_extension(
+                "fp8e4m3"
+            ):
+                self.reporter.report_reject(
+                    node,
+                    f"{node.target}: dtype {values_dtype} requires fp8e4m3 extension.",
+                )
+                return False
+            if values_dtype == torch.float8_e5m2 and not tosa_spec.support_extension(
+                "fp8e5m2"
+            ):
+                self.reporter.report_reject(
+                    node,
+                    f"{node.target}: dtype {values_dtype} requires fp8e5m2 extension.",
+                )
+                return False
             if not (tosa_spec.support_float() or tosa_spec.support_integer()):
                 self.reporter.report_reject(
                     node,
@@ -136,7 +158,8 @@ def is_node_tosa_supported(
             self.reporter.report_reject(
                 node,
                 f"{node.target}: unsupported values dtype {values_dtype}; "
-                "expected bool/int8/int16/int32/float16/bfloat16/float32.",
+                "expected bool/int8/int16/int32/float16/bfloat16/float32/"
+                "float8_e4m3fn/float8_e5m2.",
             )
             return False
 
diff --git a/backends/arm/operator_support/slice_copy_support.py b/backends/arm/operator_support/slice_copy_support.py
index bcc3ddfbbbb..c9ef4a85bdf 100644
--- a/backends/arm/operator_support/slice_copy_support.py
+++ b/backends/arm/operator_support/slice_copy_support.py
@@ -53,7 +53,13 @@ def is_node_tosa_supported(
         values_dtype = node.args[0].meta["val"].dtype  # type: ignore[union-attr]
 
         SUPPORTED_INT_DTYPES = (torch.int8, torch.int16, torch.int32)
-        SUPPORTED_FLOAT_DTYPES = (torch.float16, torch.float32, torch.bfloat16)
+        SUPPORTED_FLOAT_DTYPES = (
+            torch.float16,
+            torch.float32,
+            torch.bfloat16,
+            torch.float8_e4m3fn,
+            torch.float8_e5m2,
+        )
         SUPPORTED_DTYPES = (torch.bool,) + SUPPORTED_INT_DTYPES + SUPPORTED_FLOAT_DTYPES
 
         # bool is supported in both INT and FP profiles
@@ -68,7 +74,7 @@ def is_node_tosa_supported(
                 )
                 return False
 
-        # fp16/fp32/bf16: either FP profile, or INT profile (via quantization)
+        # fp16/fp32/bf16/fp8: either FP profile, or INT profile (via quantization)
         elif values_dtype in SUPPORTED_FLOAT_DTYPES:
             if values_dtype == torch.bfloat16 and not tosa_spec.support_extension(
                 "bf16"
@@ -78,6 +84,22 @@ def is_node_tosa_supported(
                     f"{node.target}: dtype {values_dtype} requires bf16 extension.",
                 )
                 return False
+            if values_dtype == torch.float8_e4m3fn and not tosa_spec.support_extension(
+                "fp8e4m3"
+            ):
+                self.reporter.report_reject(
+                    node,
+                    f"{node.target}: dtype {values_dtype} requires fp8e4m3 extension.",
+                )
+                return False
+            if values_dtype == torch.float8_e5m2 and not tosa_spec.support_extension(
+                "fp8e5m2"
+            ):
+                self.reporter.report_reject(
+                    node,
+                    f"{node.target}: dtype {values_dtype} requires fp8e5m2 extension.",
+                )
+                return False
             if not (tosa_spec.support_float() or tosa_spec.support_integer()):
                 self.reporter.report_reject(
                     node,
diff --git a/backends/arm/operators/op_cat.py b/backends/arm/operators/op_cat.py
index 544beefadf9..97ea651cb12 100644
--- a/backends/arm/operators/op_cat.py
+++ b/backends/arm/operators/op_cat.py
@@ -44,6 +44,10 @@ def define_node(
             supported_dtypes.extend([ts.DType.FP16, ts.DType.FP32])
         if self.tosa_spec.support_extension("bf16"):
             supported_dtypes.append(ts.DType.BF16)
+        if self.tosa_spec.support_extension("fp8e4m3"):
+            supported_dtypes.append(ts.DType.FP8E4M3)
+        if self.tosa_spec.support_extension("fp8e5m2"):
+            supported_dtypes.append(ts.DType.FP8E5M2)
         validate_num_inputs(self.target, inputs, [1, 2])
         input_tosa_args = [TosaArg(arg, self.tosa_spec) for arg in inputs[0].special]
         validate_same_dtype(self.target, [*input_tosa_args, output], ts)
diff --git a/backends/arm/operators/op_permute.py b/backends/arm/operators/op_permute.py
index e200478d7b3..2418131af3e 100644
--- a/backends/arm/operators/op_permute.py
+++ b/backends/arm/operators/op_permute.py
@@ -43,6 +43,10 @@ def define_node(
             supported_dtypes.extend([ts.DType.FP16, ts.DType.FP32])
         if self.tosa_spec.support_extension("bf16"):
             supported_dtypes.append(ts.DType.BF16)
+        if self.tosa_spec.support_extension("fp8e4m3"):
+            supported_dtypes.append(ts.DType.FP8E4M3)
+        if self.tosa_spec.support_extension("fp8e5m2"):
+            supported_dtypes.append(ts.DType.FP8E5M2)
 
         validate_num_inputs(self.target, inputs, 2)
         validate_same_dtype(self.target, [inputs[0], output], ts)
diff --git a/backends/arm/operators/op_repeat.py b/backends/arm/operators/op_repeat.py
index 9b95c902847..f990dbef64b 100644
--- a/backends/arm/operators/op_repeat.py
+++ b/backends/arm/operators/op_repeat.py
@@ -42,6 +42,10 @@ def define_node(
             supported_dtypes.extend([ts.DType.FP16, ts.DType.FP32])
         if self.tosa_spec.support_extension("bf16"):
             supported_dtypes.append(ts.DType.BF16)
+        if self.tosa_spec.support_extension("fp8e4m3"):
+            supported_dtypes.append(ts.DType.FP8E4M3)
+        if self.tosa_spec.support_extension("fp8e5m2"):
+            supported_dtypes.append(ts.DType.FP8E5M2)
 
         validate_num_inputs(self.target, inputs, 2)
         validate_same_dtype(self.target, [inputs[0], output], ts)
diff --git a/backends/arm/operators/op_tosa_gather.py b/backends/arm/operators/op_tosa_gather.py
index c242d351c06..913e2cc02b3 100644
--- a/backends/arm/operators/op_tosa_gather.py
+++ b/backends/arm/operators/op_tosa_gather.py
@@ -63,6 +63,16 @@ def define_node(
                 ts.DType.FP16,
                 ts.DType.FP32,
                 ts.DType.BF16,
+                *(
+                    [ts.DType.FP8E4M3]
+                    if self.tosa_spec.support_extension("fp8e4m3")
+                    else []
+                ),
+                *(
+                    [ts.DType.FP8E5M2]
+                    if self.tosa_spec.support_extension("fp8e5m2")
+                    else []
+                ),
             ],
             self.tosa_spec,
         )
diff --git a/backends/arm/operators/op_tosa_pad.py b/backends/arm/operators/op_tosa_pad.py
index 6f1cd488469..6e93adde55b 100644
--- a/backends/arm/operators/op_tosa_pad.py
+++ b/backends/arm/operators/op_tosa_pad.py
@@ -41,6 +41,10 @@ def define_node(
             supported_dtypes.extend([ts.DType.FP16, ts.DType.FP32])
         if self.tosa_spec.support_extension("bf16"):
             supported_dtypes.append(ts.DType.BF16)
+        if self.tosa_spec.support_extension("fp8e4m3"):
+            supported_dtypes.append(ts.DType.FP8E4M3)
+        if self.tosa_spec.support_extension("fp8e5m2"):
+            supported_dtypes.append(ts.DType.FP8E5M2)
 
         validate_num_inputs(self.target, inputs, 2)
         validate_same_dtype(self.target, [inputs[0], output], ts)
@@ -50,7 +54,6 @@ def define_node(
             supported_dtypes,
             self.tosa_spec,
         )
-
         pad_const = tosa_graph.addConst(
             [1],
             output.dtype,
diff --git a/backends/arm/operators/op_tosa_scatter.py b/backends/arm/operators/op_tosa_scatter.py
index b87a2598993..63c44f91fac 100644
--- a/backends/arm/operators/op_tosa_scatter.py
+++ b/backends/arm/operators/op_tosa_scatter.py
@@ -36,7 +36,13 @@ def define_node(
         validate_same_dtype(self.target, [inputs[0], inputs[2], output], ts)
         validate_valid_dtype(
             self.target,
-            [inputs[0], inputs[1], inputs[2], output],
+            [inputs[1]],
+            [ts.DType.INT32],
+            self.tosa_spec,
+        )
+        validate_valid_dtype(
+            self.target,
+            [inputs[0], inputs[2], output],
             [
                 ts.DType.INT8,
                 ts.DType.INT16,
@@ -44,6 +50,16 @@ def define_node(
                 ts.DType.FP32,
                 ts.DType.FP16,
                 ts.DType.BF16,
+                *(
+                    [ts.DType.FP8E4M3]
+                    if self.tosa_spec.support_extension("fp8e4m3")
+                    else []
+                ),
+                *(
+                    [ts.DType.FP8E5M2]
+                    if self.tosa_spec.support_extension("fp8e5m2")
+                    else []
+                ),
             ],
             self.tosa_spec,
         )
diff --git a/backends/arm/operators/op_tosa_slice.py b/backends/arm/operators/op_tosa_slice.py
index 11ce95df466..818657642a8 100644
--- a/backends/arm/operators/op_tosa_slice.py
+++ b/backends/arm/operators/op_tosa_slice.py
@@ -42,6 +42,10 @@ def define_node(
             supported_dtypes.extend([ts.DType.FP16, ts.DType.FP32])
         if self.tosa_spec.support_extension("bf16"):
             supported_dtypes.append(ts.DType.BF16)
+        if self.tosa_spec.support_extension("fp8e4m3"):
+            supported_dtypes.append(ts.DType.FP8E4M3)
+        if self.tosa_spec.support_extension("fp8e5m2"):
+            supported_dtypes.append(ts.DType.FP8E5M2)
 
         validate_num_inputs(self.target, inputs, 3)
         validate_same_dtype(self.target, [inputs[0], output], ts)
diff --git a/backends/arm/operators/op_view.py b/backends/arm/operators/op_view.py
index 94ed23e2446..ba98f746476 100644
--- a/backends/arm/operators/op_view.py
+++ b/backends/arm/operators/op_view.py
@@ -42,6 +42,10 @@ def define_node(
             supported_dtypes.extend([ts.DType.FP16, ts.DType.FP32])
         if self.tosa_spec.support_extension("bf16"):
             supported_dtypes.append(ts.DType.BF16)
+        if self.tosa_spec.support_extension("fp8e4m3"):
+            supported_dtypes.append(ts.DType.FP8E4M3)
+        if self.tosa_spec.support_extension("fp8e5m2"):
+            supported_dtypes.append(ts.DType.FP8E5M2)
 
         validate_num_inputs(self.target, inputs, 2)
         validate_same_dtype(self.target, [inputs[0], output], ts)
diff --git a/backends/arm/test/misc/test_tosa_dialect_scatter.py b/backends/arm/test/misc/test_tosa_dialect_scatter.py
new file mode 100644
index 00000000000..dc75df60df9
--- /dev/null
+++ b/backends/arm/test/misc/test_tosa_dialect_scatter.py
@@ -0,0 +1,38 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import executorch.backends.arm.tosa.dialect  # noqa: F401
+import pytest
+import torch
+from executorch.backends.arm.tosa.specification import (
+    TosaLoweringContext,
+    TosaSpecification,
+)
+from executorch.exir.dialects._ops import ops as exir_ops
+from torch._subclasses.fake_tensor import FakeTensorMode
+
+
+@pytest.mark.parametrize(
+    "dtype, extension",
+    [
+        (torch.float8_e4m3fn, "fp8e4m3"),
+        (torch.float8_e5m2, "fp8e5m2"),
+    ],
+)
+def test_scatter_tosa_FP_fp8(dtype: torch.dtype, extension: str):
+    with TosaLoweringContext(
+        TosaSpecification.create_from_string(f"TOSA-1.0+FP+{extension}")
+    ), FakeTensorMode() as mode:
+        values_in = mode.from_tensor(
+            torch.rand((1, 5, 3), dtype=torch.float32).to(dtype)
+        )
+        indices = mode.from_tensor(torch.tensor([[1, 3]], dtype=torch.int32))
+        input_tensor = mode.from_tensor(
+            torch.rand((1, 2, 3), dtype=torch.float32).to(dtype)
+        )
+        output = exir_ops.backend.tosa.SCATTER.default(values_in, indices, input_tensor)
+
+    assert output.dtype == dtype
+    assert tuple(output.shape) == (1, 5, 3)
diff --git a/backends/arm/test/ops/test_cat.py b/backends/arm/test/ops/test_cat.py
index 1e145ef5485..29738ddbe32 100644
--- a/backends/arm/test/ops/test_cat.py
+++ b/backends/arm/test/ops/test_cat.py
@@ -98,6 +98,24 @@ class Cat(torch.nn.Module):
             0,
         ),
     }
+    test_parameters_fp8 = {
+        "cat_rand_two_tensors_fp8e4m3": lambda: (
+            (
+                torch.randn(1, 2, 4, 4, dtype=torch.float32).to(torch.float8_e4m3fn),
+                torch.randn(1, 2, 4, 1, dtype=torch.float32).to(torch.float8_e4m3fn),
+            ),
+            3,
+            "fp8e4m3",
+        ),
+        "cat_rand_dim0_fp8e5m2": lambda: (
+            (
+                torch.randn(1, 2, 4, 4, dtype=torch.float32).to(torch.float8_e5m2),
+                torch.randn(1, 2, 4, 4, dtype=torch.float32).to(torch.float8_e5m2),
+            ),
+            0,
+            "fp8e5m2",
+        ),
+    }
 
     def __init__(self):
         super().__init__()
@@ -135,6 +153,19 @@ def test_cat_tosa_FP_4d():
         pipeline.run()
 
 
+@common.parametrize("test_data", Cat.test_parameters_fp8)
+def test_cat_tosa_FP_fp8(test_data: Tuple):
+    tensors, dim, tosa_extension = test_data()
+    pipeline = TosaPipelineFP[input_t1](
+        Cat(),
+        (tensors, dim),
+        aten_op,
+        exir_op,
+        tosa_extensions=[tosa_extension],
+    )
+    pipeline.run()
+
+
 @common.parametrize("test_data", Cat.test_parameters)
 def test_cat_tosa_INT(test_data: Tuple):
     pipeline = TosaPipelineINT[input_t1](
diff --git a/backends/arm/test/ops/test_constant_pad_nd.py b/backends/arm/test/ops/test_constant_pad_nd.py
index 3742f710494..96d829851ed 100644
--- a/backends/arm/test/ops/test_constant_pad_nd.py
+++ b/backends/arm/test/ops/test_constant_pad_nd.py
@@ -128,6 +128,22 @@
         "constant",
     ),
 }
+test_data_suite_fp8 = {
+    "4dim_last1dim_fp8e4m3": lambda: (
+        torch.rand(1, 1, 8, 8, dtype=torch.float32).to(torch.float8_e4m3fn),
+        (1, 1, 0, 0, 0, 0, 0, 0),
+        1.0,
+        "constant",
+        "fp8e4m3",
+    ),
+    "3dim_last1dim_fp8e5m2": lambda: (
+        torch.rand(1, 1, 8, dtype=torch.float32).to(torch.float8_e5m2),
+        (1, 0, 1, 0, 0, 0),
+        -0.5,
+        "constant",
+        "fp8e5m2",
+    ),
+}
 
 
 class ConstantPadND(torch.nn.Module):
@@ -289,6 +305,19 @@ def test_constant_pad_nd_tosa_FP(test_data: Tuple):
     pipeline.run()
 
 
+@common.parametrize("test_data", test_data_suite_fp8)
+def test_constant_pad_nd_tosa_FP_fp8(test_data: Tuple):
+    test_data, padding, value, mode, tosa_extension = test_data()
+    pipeline = TosaPipelineFP[input_t1](
+        ConstantPadND(padding, value, mode),
+        (test_data,),
+        aten_op,
+        exir_op,
+        tosa_extensions=[tosa_extension],
+    )
+    pipeline.run()
+
+
 @common.parametrize("test_data", test_data_suite)
 def test_constant_pad_nd_tosa_INT(test_data: Tuple):
     test_data, padding, value, mode = test_data()
diff --git a/backends/arm/test/ops/test_gather.py b/backends/arm/test/ops/test_gather.py
index 1439210373d..66cb9508c73 100644
--- a/backends/arm/test/ops/test_gather.py
+++ b/backends/arm/test/ops/test_gather.py
@@ -87,6 +87,36 @@ def forward(self, input_: torch.Tensor, dim_, index_: torch.Tensor):
         ),  # Shape: [N=2, W=2, C=2]
     ),
 }
+test_data_fp_fp8: dict[str, tuple[input_params, str]] = {
+    "test_fp8e4m3_2d": (
+        (
+            torch.tensor(
+                [[0.5, 1.25, 2.5], [3.5, 4.25, 5.75]],
+                dtype=torch.float8_e4m3fn,
+            ),
+            1,
+            torch.tensor(
+                [[1, 0], [2, 1]],
+                dtype=torch.int64,
+            ),
+        ),
+        "fp8e4m3",
+    ),
+    "test_fp8e5m2_3d": (
+        (
+            torch.tensor(
+                [[[0.5, 1.5], [2.5, 3.5]], [[4.5, 5.5], [6.5, 7.5]]],
+                dtype=torch.float8_e5m2,
+            ),
+            1,
+            torch.tensor(
+                [[[0, 1], [1, 0]], [[1, 0], [0, 1]]],
+                dtype=torch.int64,
+            ),
+        ),
+        "fp8e5m2",
+    ),
+}
 
 
 # INT profile: integer inputs + bool (bool is supported via casts in
@@ -145,6 +175,23 @@ def test_gather_tosa_FP(test_data: input_params):
     pipeline.run()
 
 
+@common.parametrize("test_data", test_data_fp_fp8)
+def test_gather_tosa_FP_fp8(test_data: tuple[input_params, str]):
+    input_data, tosa_extension = test_data
+    pipeline = TosaPipelineFP[input_params](
+        Gather(),
+        input_data,
+        aten_op=Gather.aten_op,
+        exir_op=Gather.exir_op,
+        transform_passes=[
+            InsertInt32CastsAfterInt64PlaceholdersPass(),
+        ],  # int64 index are not currently supported and need to be cast to int32
+        run_on_tosa_ref_model=False,  # torch.gather() has no eager CPU FP8 implementation here, so eager reference execution fails.
+        tosa_extensions=[tosa_extension],
+    )
+    pipeline.run()
+
+
 @common.parametrize("test_data", test_data_int | test_data_fp)
 def test_gather_tosa_INT(test_data: input_params):
     pipeline = TosaPipelineINT[input_params](
diff --git a/backends/arm/test/ops/test_repeat.py b/backends/arm/test/ops/test_repeat.py
index 1a2f71183bb..3368864564d 100644
--- a/backends/arm/test/ops/test_repeat.py
+++ b/backends/arm/test/ops/test_repeat.py
@@ -85,6 +85,18 @@ def forward(self, x: torch.Tensor):
         (torch.randn(1, 1, 2, 2, dtype=torch.float16),),
     ),
 }
+test_data_suite_fp8 = {
+    "2_x_2_fp8e4m3": lambda: (
+        Repeat((2, 1)),
+        (torch.randn(3, 4, dtype=torch.float32).to(torch.float8_e4m3fn),),
+        "fp8e4m3",
+    ),
+    "4_x_4_fp8e5m2": lambda: (
+        Repeat((1, 2, 3, 2)),
+        (torch.randn(1, 1, 2, 2, dtype=torch.float32).to(torch.float8_e5m2),),
+        "fp8e5m2",
+    ),
+}
 
 
 @common.parametrize(
@@ -102,6 +114,19 @@ def test_repeat_tosa_FP(test_data: Tuple):
     pipeline.run()
 
 
+@common.parametrize("test_data", test_data_suite_fp8)
+def test_repeat_tosa_FP_fp8(test_data: Tuple):
+    module, test_data, tosa_extension = test_data()
+    pipeline = TosaPipelineFP[input_t1](
+        module,
+        test_data,
+        module.aten_op,
+        exir_op=[],
+        tosa_extensions=[tosa_extension],
+    )
+    pipeline.run()
+
+
 @common.parametrize("test_data", test_data_suite)
 def test_repeat_tosa_INT(test_data: Tuple):
     module, test_data = test_data()
diff --git a/backends/arm/test/ops/test_slice.py b/backends/arm/test/ops/test_slice.py
index 090d8abb56a..28c9731a6aa 100644
--- a/backends/arm/test/ops/test_slice.py
+++ b/backends/arm/test/ops/test_slice.py
@@ -50,6 +50,18 @@
         [(0, 1), (0, 5), (3, 5), (4, 10)],
     ),
 }
+test_data_suite_fp8 = {
+    "ones_slice_4_fp8e4m3": lambda: (
+        torch.ones((1, 12, 10, 10), dtype=torch.float32).to(torch.float8_e4m3fn),
+        [(0, 1), (0, 5), (3, 5), (4, 10)],
+        "fp8e4m3",
+    ),
+    "ones_slice_4_fp8e5m2": lambda: (
+        torch.ones((1, 12, 10, 10), dtype=torch.float32).to(torch.float8_e5m2),
+        [(0, 1), (0, 5), (3, 5), (4, 10)],
+        "fp8e5m2",
+    ),
+}
 
 
 class Slice(torch.nn.Module):
@@ -72,6 +84,20 @@ def test_slice_tensor_tosa_FP_bf16(test_data: torch.Tensor):
     pipeline.run()
 
 
+@common.parametrize("test_data", test_data_suite_fp8)
+def test_slice_tensor_tosa_FP_fp8(test_data):
+    input_data, slices, tosa_extension = test_data()
+    pipeline = TosaPipelineFP[input_t1](
+        Slice(),
+        (input_data, slices),
+        aten_op,
+        exir_op,
+        tosa_extensions=[tosa_extension],
+    )
+    pipeline.count_tosa_ops({"SLICE": 3})
+    pipeline.run()
+
+
 @common.parametrize("test_data", test_data_suite)
 def test_slice_tensor_tosa_INT_nchw(test_data: torch.Tensor):
     pipeline = TosaPipelineINT[input_t1](
diff --git a/backends/arm/test/ops/test_view.py b/backends/arm/test/ops/test_view.py
index b1e62c3efef..ce5bf13f2b8 100644
--- a/backends/arm/test/ops/test_view.py
+++ b/backends/arm/test/ops/test_view.py
@@ -86,6 +86,48 @@ def test_view_tosa_FP(test_data: Tuple):
     pipeline.run()
 
 
+class ViewPermuteFP8(torch.nn.Module):
+    def __init__(self, new_shape: tuple[int, ...], dims: tuple[int, ...]):
+        super().__init__()
+        self.new_shape = new_shape
+        self.dims = dims
+
+    def forward(self, x: torch.Tensor):
+        # Use permute to keep the graph lowerable for FP8 tests,
+        # since the mul used in View is not supported with FP8.
+        return x.view(self.new_shape).permute(self.dims)
+
+
+@common.parametrize(
+    "test_data",
+    {
+        "view_permute_fp8e4m3": lambda: (
+            torch.rand((2, 3, 4), dtype=torch.float32).to(torch.float8_e4m3fn),
+            (2, 4, 3),
+            (0, 2, 1),
+            "fp8e4m3",
+        ),
+        "view_permute_fp8e5m2": lambda: (
+            torch.rand((2, 3, 4), dtype=torch.float32).to(torch.float8_e5m2),
+            (2, 4, 3),
+            (0, 2, 1),
+            "fp8e5m2",
+        ),
+    },
+)
+def test_view_tosa_FP_fp8_permute(test_data: Tuple):
+    test_tensor, new_shape, dims, tosa_extension = test_data()
+    pipeline = TosaPipelineFP[input_t1](
+        ViewPermuteFP8(new_shape, dims),
+        (test_tensor,),
+        ["torch.ops.aten.view.default", "torch.ops.aten.permute.default"],
+        exir_op=[],
+        tosa_extensions=[tosa_extension],
+    )
+    pipeline.count_tosa_ops({"RESHAPE": 1, "TRANSPOSE": 1})
+    pipeline.run()
+
+
 @common.parametrize("test_data", View.test_suite)
 def test_view_tosa_INT(test_data: Tuple):
     test_tensor, new_shape = test_data()
diff --git a/backends/arm/tosa/dialect/ops/gather.py b/backends/arm/tosa/dialect/ops/gather.py
index 1e1982adae3..49374142cd6 100644
--- a/backends/arm/tosa/dialect/ops/gather.py
+++ b/backends/arm/tosa/dialect/ops/gather.py
@@ -42,6 +42,8 @@ def GATHER(values: torch.Tensor, indices: torch.Tensor) -> torch.Tensor:
         torch.float16,
         torch.float32,
         torch.bfloat16,
+        torch.float8_e4m3fn,
+        torch.float8_e5m2,
     )
     if values.dtype not in allowed_values_dtypes:
         raise TosaValueError(
@@ -57,6 +59,16 @@ def GATHER(values: torch.Tensor, indices: torch.Tensor) -> torch.Tensor:
                 op="GATHER",
             )
     else:
+        required_extension = {
+            torch.bfloat16: "bf16",
+            torch.float8_e4m3fn: "fp8e4m3",
+            torch.float8_e5m2: "fp8e5m2",
+        }.get(values.dtype)
+        if required_extension and not tosa_spec.support_extension(required_extension):
+            raise TosaValueError(
+                f"dtype {values.dtype} requires {required_extension} extension.",
+                op="GATHER",
+            )
         # Support in FP profile, or INT profile via quantization
         if not (tosa_spec.support_float() or tosa_spec.support_integer()):
             raise TosaValueError(
diff --git a/backends/arm/tosa/dialect/ops/pad.py b/backends/arm/tosa/dialect/ops/pad.py
index db2cab6fcfc..3b5628b0ede 100644
--- a/backends/arm/tosa/dialect/ops/pad.py
+++ b/backends/arm/tosa/dialect/ops/pad.py
@@ -33,6 +33,10 @@ def PAD(a: torch.Tensor, padding: List[int | torch.SymInt], *, value):
         supported_dtypes.update({torch.float16, torch.float32})
     if tosa_spec.support_extension("bf16"):
         supported_dtypes.add(torch.bfloat16)
+    if tosa_spec.support_extension("fp8e4m3"):
+        supported_dtypes.add(torch.float8_e4m3fn)
+    if tosa_spec.support_extension("fp8e5m2"):
+        supported_dtypes.add(torch.float8_e5m2)
     if a.dtype not in supported_dtypes:
         raise TosaValueError(
             f"Input tensor dtype {a.dtype} is not supported by the target TOSA specification."
diff --git a/backends/arm/tosa/dialect/ops/slice.py b/backends/arm/tosa/dialect/ops/slice.py
index 553c8dd489e..3406ccf911b 100644
--- a/backends/arm/tosa/dialect/ops/slice.py
+++ b/backends/arm/tosa/dialect/ops/slice.py
@@ -52,6 +52,10 @@ def SLICE(a, start, size):
         supported_dtypes += [torch.float16, torch.float32]
     if tosa_spec.support_extension("bf16"):
         supported_dtypes += [torch.bfloat16]
+    if tosa_spec.support_extension("fp8e4m3"):
+        supported_dtypes += [torch.float8_e4m3fn]
+    if tosa_spec.support_extension("fp8e5m2"):
+        supported_dtypes += [torch.float8_e5m2]
 
     if a.dtype not in supported_dtypes:
         raise TosaValueError(

From 990d9d198ac3aaab4403ed340d14e593ddf10dac Mon Sep 17 00:00:00 2001
From: Adrian Lundell <36153706+AdrianLundell@users.noreply.github.com>
Date: Thu, 28 May 2026 11:52:24 +0200
Subject: [PATCH 057/103] Arm backend: Add cmsis_nn fallback example (#19768)

Describes how the Ethos-U and Cortex-M backend can be used together to
accelerate e.g. op configurations not supported on Ethos-U55, and common
pitfalls to consider in doing this.


Signed-off-by: Adrian Lundell <adrian.lundell@arm.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 .../ethos_u_cmsis_nn_fallback_example.ipynb   | 262 ++++++++++++++++++
 1 file changed, 262 insertions(+)
 create mode 100644 examples/arm/ethos_u_cmsis_nn_fallback_example.ipynb

diff --git a/examples/arm/ethos_u_cmsis_nn_fallback_example.ipynb b/examples/arm/ethos_u_cmsis_nn_fallback_example.ipynb
new file mode 100644
index 00000000000..0dd8f7045fb
--- /dev/null
+++ b/examples/arm/ethos_u_cmsis_nn_fallback_example.ipynb
@@ -0,0 +1,262 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright 2026 Arm Limited and/or its affiliates.\n",
+    "#\n",
+    "# This source code is licensed under the BSD-style license found in the\n",
+    "# LICENSE file in the root directory of this source tree."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Ethos-U55 with CMSIS-NN fallback example\n",
+    "\n",
+    "This guide demonstrates the current full flow for handling operators which does not lower\n",
+    "to the Ethos-U55 using the Cortex-M backend to make sure they use accelerated CMSIS-NN implementations. \n",
+    "The basic idea is that the Ethos-U backend will reject any nodes which are not supported,\n",
+    "leaving them to be handled by the Cortex-M backend.\n",
+    "\n",
+    "Before you begin: Make sure you have completed the `ethos_u_minimal_example` for a\n",
+    "basic understanding of the Ethos-U backend and have your environment setup. \n",
+    "\n",
+    "\n",
+    "*Some scripts in this notebook produces long output logs: Configuring the 'Customizing Notebook Layout' settings to enable 'Output:scrolling' and setting 'Output:Text Line Limit' makes this more manageable*"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "\n",
+    "The first step is creating a simple model which does not fully lower to the Ethos-U55.\n",
+    "Importantly it is exported with channels_last data, since the Cortex-M backend currently\n",
+    "only supports lowering operators in that data-format.  \n",
+    "\n",
+    "Constraints for the basic operations performed by the Ethos-U55 can be found in the\n",
+    "[Ethos-U Vela repository](https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela/-/blob/main/SUPPORTED_OPS.md?ref_type=heads#ethos-u55-and-ethos-u65-tosa-conv2d-constraints). Note that the listed operators does not map exactly to PyTorch operators, but rather a subset found in\n",
+    "the graph after decompositions in the Ethos-U backend."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from executorch.backends.arm.ethosu import EthosUCompileSpec, EthosUPartitioner\n",
+    "from executorch.backends.arm.quantizer import (\n",
+    "    EthosUQuantizer,\n",
+    "    get_symmetric_quantization_config,\n",
+    ")\n",
+    "from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager\n",
+    "from executorch.exir import (\n",
+    "    EdgeCompileConfig,\n",
+    "    ExecutorchBackendConfig,\n",
+    "    to_edge_transform_and_lower,\n",
+    ")\n",
+    "from executorch.extension.export_util.utils import save_pte_program\n",
+    "from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e\n",
+    "\n",
+    "target = \"ethos-u55-128\"\n",
+    "output_path = \"ethos_u_cmsis_nn_fallback_example.pte\"\n",
+    "\n",
+    "class ToyMixedModule(torch.nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.conv1 = torch.nn.Conv2d(\n",
+    "            in_channels=3,\n",
+    "            out_channels=4,\n",
+    "            kernel_size=3,\n",
+    "            stride=1,\n",
+    "            padding=1,\n",
+    "            bias=False,\n",
+    "        )\n",
+    "        self.conv2 = torch.nn.Conv2d(\n",
+    "            in_channels=4,\n",
+    "            out_channels=1,\n",
+    "            kernel_size=3,\n",
+    "            stride=4,\n",
+    "            padding=1,\n",
+    "            bias=False,\n",
+    "        ) # Stride=4 not supported on Ethos-U55\n",
+    "\n",
+    "    def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
+    "        x = self.conv1(x)\n",
+    "        x = torch.relu(x)\n",
+    "        return self.conv2(x)\n",
+    "\n",
+    "model = ToyMixedModule().eval().to(memory_format=torch.channels_last)\n",
+    "example_inputs = (\n",
+    "    torch.randn(1, 3, 8, 8, dtype=torch.float32).to(memory_format=torch.channels_last),\n",
+    ")\n",
+    "exported_program = torch.export.export(model, example_inputs)\n",
+    "exported_program.module().graph.print_tabular()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Ethos-U lowering\n",
+    "\n",
+    "The Ethos-U lowering of the model is identical to the minimal example, and as expected\n",
+    "the printed graph leaves the regular `torch.nn.Conv2d` with `stride=4` and some quantization/dequantization nodes\n",
+    "outside of the Ethos_u call_delegate operator. \n",
+    "\n",
+    "One important part in this step is that this `torch.nn.Conv2d` with `stride=4` has been quantized to\n",
+    "a format supported by the Cortex-M backend by the Ethos-U quantizer even if it was not\n",
+    "delegated, since the Cortex-M backend will only lower correctly quantized operators. Would there be\n",
+    "a discrepancy, see the [quantizer tutorial](https://github.com/pytorch/executorch/blob/main/examples/arm/quantizer_tutorial.ipynb) for\n",
+    "how to configure more precise quantization."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compile_spec = EthosUCompileSpec(target=target)\n",
+    "quantizer = EthosUQuantizer(compile_spec)\n",
+    "quantizer.set_global(get_symmetric_quantization_config(is_per_channel=True))\n",
+    "\n",
+    "prepared = prepare_pt2e(exported_program.module(), quantizer)\n",
+    "prepared(*example_inputs)\n",
+    "quantized_model = convert_pt2e(prepared)\n",
+    "quantized_exported_program = torch.export.export(quantized_model, example_inputs)\n",
+    "\n",
+    "edge_program_manager = to_edge_transform_and_lower(\n",
+    "    quantized_exported_program,\n",
+    "    partitioner=[EthosUPartitioner(compile_spec)],\n",
+    "    compile_config=EdgeCompileConfig(_check_ir_validity=False),\n",
+    ")\n",
+    "\n",
+    "edge_program_manager.exported_program().graph_module.graph.print_tabular()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Cortex-M lowering\n",
+    "\n",
+    "Finally the Cortex-M backend is applied, and the graph is now fully accelerated. The\n",
+    "`cortex_m_kernels` can be spotted in the printed graph."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "edge_program_manager._edge_programs[\"forward\"] = CortexMPassManager(\n",
+    "     edge_program_manager.exported_program()\n",
+    ").transform()\n",
+    "\n",
+    "executorch_program = edge_program_manager.to_executorch(\n",
+    "     config=ExecutorchBackendConfig(extract_delegate_segments=False)\n",
+    ")\n",
+    "save_pte_program(executorch_program, output_path)\n",
+    "\n",
+    "edge_program_manager.exported_program().graph_module.graph.print_tabular()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Build\n",
+    "\n",
+    "The executor runner is built as usual, making sure to link the Cortex-M dependencies. In the available\n",
+    "example executor_runner CMakeFile this is already done, with the Cortex-M kernel and kernel registration libraries\n",
+    "`cortex_m_kernels` and `cortex_m_ops_lib` corresponding to `portable_kernels` and `arm_portable_ops_lib` for the the\n",
+    "unaccelerated portable kernels. For more information about kernel registration, see the\n",
+    "[documentation](https://docs.pytorch.org/executorch/stable/kernel-library-custom-aten-kernel.html).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash \n",
+    "source arm-scratch/setup_path.sh\n",
+    "# Ensure CMake resolves the ExecuTorch checkout root regardless of caller env\n",
+    "export EXECUTORCH_ROOT=$(cd ../.. && pwd)\n",
+    "\n",
+    "# Build example executor runner application to examples/arm/ethos_u_cmsis_nn_fallback_example\n",
+    "cmake -DCMAKE_TOOLCHAIN_FILE=$(pwd)/ethos-u-setup/arm-none-eabi-gcc.cmake \\\n",
+    "      -DCMAKE_BUILD_TYPE=Release \\\n",
+    "      -DET_PTE_FILE_PATH=ethos_u_cmsis_nn_fallback_example.pte \\\n",
+    "      -DTARGET_CPU=cortex-m55 \\\n",
+    "      -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \\\n",
+    "      -DMEMORY_MODE=Shared_Sram \\\n",
+    "      -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded \\\n",
+    "      -Bethos_u_cmsis_nn_fallback_example \\\n",
+    "      -S executor_runner/standalone\n",
+    "cmake --build ethos_u_cmsis_nn_fallback_example -j$(nproc) -- arm_executor_runner"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Sanity check output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import subprocess\n",
+    "import re\n",
+    "\n",
+    "# Use quantized model in eager mode as reference. By default the executor runner will use 1:s as input.\n",
+    "test_inputs = (torch.ones_like(example_inputs[0]),)\n",
+    "reference_result = quantized_exported_program.module()(*test_inputs).flatten().tolist()\n",
+    "\n",
+    "# Run the lowered .pte file on FVP using helper script and extract the output numbers using regex\n",
+    "fvp_output = subprocess.run(\"../../backends/arm/scripts/run_fvp.sh --elf=ethos_u_cmsis_nn_fallback_example/arm_executor_runner --target=ethos-u55-128\", shell=True, capture_output=True)\n",
+    "lowered_result = [float(x) for x in re.findall(\"-?\\d\\.\\d{6}\" , str(fvp_output.stdout))]\n",
+    "\n",
+    "print(reference_result)\n",
+    "print(lowered_result)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv (3.10.15)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From c505aa534448371146e881b6305349d8143138a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= <mans.nilsson@arm.com>
Date: Thu, 28 May 2026 12:07:30 +0200
Subject: [PATCH 058/103] Xnnpack: Support clone.default with
 skip_dim_order=True (#19797)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the default XNNPACK test config, skip_dim_order=False rewrites
aten.clone.default to dim_order_ops._clone_dim_order.default. That path
is already supported through CloneDimOrderConfig.

Some XNNPACK export flows use skip_dim_order=True, where
aten.clone.default stays as aten.clone.default and is not selected by
the partitioner.

Adds CloneConfig for dim-order-preserving aten.clone.default nodes so
this path is partitioned directly.

This reduces delegate splits in the EdgeTAM mask decoder, where
profiling exports use skip_dim_order=True.


cc @digantdesai @freddan80 @per @zingo @oscarandersson8218
@Sebastian-Larsson @robell @rascani

Signed-off-by: Måns Nilsson <mans.nilsson@arm.com>
---
 backends/xnnpack/operators/op_clone.py        | 19 +++++++++---
 backends/xnnpack/partition/config/__init__.py |  3 ++
 .../partition/config/generic_node_configs.py  | 21 +++++++++++++
 backends/xnnpack/test/ops/test_clone.py       | 30 ++++++++++++++++++-
 4 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/backends/xnnpack/operators/op_clone.py b/backends/xnnpack/operators/op_clone.py
index e4ddf187ecc..c36d750148c 100644
--- a/backends/xnnpack/operators/op_clone.py
+++ b/backends/xnnpack/operators/op_clone.py
@@ -1,5 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -13,6 +14,7 @@
     NodeVisitor,
     register_node_visitor,
 )
+from executorch.backends.xnnpack.operators.quant_params import QuantParams
 from executorch.backends.xnnpack.serialization.xnnpack_graph_schema import (
     XNNCopy,
     XNNGraph,
@@ -25,9 +27,6 @@
 class CloneVisitor(NodeVisitor):
     target = "aten.clone.default"
 
-    def __init__(self, *args) -> None:
-        super().__init__(*args)
-
     def define_node(
         self,
         node: torch.fx.Node,
@@ -35,7 +34,19 @@ def define_node(
         vals_to_ids: Dict[torch.fx.Node, int],
         debug_handle: int,
     ) -> None:
-        self.define_nodes_tensor_inputs_outputs(node, xnn_graph, vals_to_ids)
+        self.define_tensor(
+            node,
+            xnn_graph,
+            vals_to_ids,
+            quant_params=QuantParams.from_outputs(node),
+        )
+        input_node = get_input_node(node, 0)
+        self.define_tensor(
+            input_node,
+            xnn_graph,
+            vals_to_ids,
+            quant_params=QuantParams.from_inputs(input_node, self._exported_program),
+        )
 
         # Sanity check that the input and output dim order are the same. We don't
         # handle dim order conversions yet.
diff --git a/backends/xnnpack/partition/config/__init__.py b/backends/xnnpack/partition/config/__init__.py
index d0a3e94bbc9..c6c54f083d6 100644
--- a/backends/xnnpack/partition/config/__init__.py
+++ b/backends/xnnpack/partition/config/__init__.py
@@ -1,5 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -23,6 +24,7 @@
     CatConfig,
     CeilConfig,
     ClampConfig,
+    CloneConfig,
     CloneDimOrderConfig,
     ConstantPadConfig,
     CosConfig,
@@ -82,6 +84,7 @@
     BMMConfig,
     CatConfig,
     CeilConfig,
+    CloneConfig,
     CloneDimOrderConfig,
     ConstantPadConfig,
     ConvolutionConfig,
diff --git a/backends/xnnpack/partition/config/generic_node_configs.py b/backends/xnnpack/partition/config/generic_node_configs.py
index f58c8eefdbe..2f45a8bba04 100644
--- a/backends/xnnpack/partition/config/generic_node_configs.py
+++ b/backends/xnnpack/partition/config/generic_node_configs.py
@@ -239,6 +239,27 @@ def supported_precision_types(self) -> List[ConfigPrecisionType]:
         return [ConfigPrecisionType.FP32]
 
 
+class CloneConfig(GenericNodePartitionerConfig):
+    target_name = "clone.default"
+
+    def supported_precision_types(self) -> List[ConfigPrecisionType]:
+        return [ConfigPrecisionType.FP32]
+
+    def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool:
+        if not self.check_common_constraints(node, ep):
+            return False
+
+        input_meta = node.args[0].meta["val"]
+        output_meta = node.meta["val"]
+        input_dim_order = list(input_meta.dim_order())
+        output_dim_order = list(output_meta.dim_order())
+        if input_dim_order != output_dim_order:
+            why(node, reason="Only dim-order preserving clones are supported.")
+            return False
+
+        return True
+
+
 class ClampConfig(GenericNodePartitionerConfig):
     target_name = "clamp.default"
 
diff --git a/backends/xnnpack/test/ops/test_clone.py b/backends/xnnpack/test/ops/test_clone.py
index 0396b9b2bea..bb995a6cf1e 100644
--- a/backends/xnnpack/test/ops/test_clone.py
+++ b/backends/xnnpack/test/ops/test_clone.py
@@ -1,5 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -9,7 +10,8 @@
 import unittest
 
 import torch
-from executorch.backends.xnnpack.test.tester import Tester
+from executorch.backends.xnnpack.test.tester import Tester, ToEdgeTransformAndLower
+from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
 
 
 class TestClone(unittest.TestCase):
@@ -62,6 +64,32 @@ def test_fp32_clone(self):
         inputs = (torch.randn(2, 3, 4, 5),)
         self._test_clone_partitioned(inputs)
 
+    def test_fp32_clone_default_partitions_with_skip_dim_order(self):
+        """Test plain aten.clone.default partitioning without dim-order rewrite."""
+        inputs = (torch.randn(2, 3, 4, 5),)
+        (
+            Tester(self.Clone(), inputs)
+            .export()
+            .check_count({"torch.ops.aten.clone.default": 1})
+            .to_edge_transform_and_lower(
+                ToEdgeTransformAndLower(
+                    edge_compile_config=get_xnnpack_edge_compile_config(
+                        skip_dim_order=True
+                    )
+                )
+            )
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .check_not(
+                [
+                    "executorch_exir_dialects_edge__ops_aten_clone_default",
+                    "executorch_exir_dialects_edge__ops_dim_order_ops__clone_dim_order_default",
+                ]
+            )
+            .to_executorch()
+            .serialize()
+            .run_method_and_compare_outputs()
+        )
+
     def test_fp32_clone_2d(self):
         """Test FP32 clone with 2D tensor - should be partitioned"""
         inputs = (torch.randn(10, 20),)

From 94f971911d3ced56f701887d5c0fe3b501baeac4 Mon Sep 17 00:00:00 2001
From: Oscar Andersson <87121123+oscarandersson8218@users.noreply.github.com>
Date: Thu, 28 May 2026 13:32:39 +0200
Subject: [PATCH 059/103] [exir] Materialize alloc shapes in ToOutVarPass
 (#19806)

Fix a dynamic-shape lowering bug in exir.

ConstraintBasedSymShapeEvalPass concretizes TensorSpec metadata, but
ToOutVarPass was still building memory.alloc nodes from symbolic
FakeTensor/tensor_meta shapes. That let symbolic dims leak into the
generated ExecuTorch GraphModule and caused runtime failures when the
lowered module was executed in Python.

Build memory.alloc specs from concrete upper-bounded integer shapes
instead. If an alloc shape is still not concretely bounded, raise a
clear error.

Add an EXIR regression test that exports a dynamic-shape model, runs
ConstraintBasedSymShapeEvalPass + ToOutVarPass, and verifies that
memory.alloc shapes are concrete integers.


cc @digantdesai @freddan80 @per @zingo @mansnils @Sebastian-Larsson
@robell @rascani

---------

Signed-off-by: Oscar Andersson <oscar.andersson@arm.com>
---
 .../arm/test/models/test_torch_functions.py   |  4 --
 exir/passes/__init__.py                       | 28 +++++++----
 exir/tests/test_passes.py                     | 49 +++++++++++++++++++
 3 files changed, 67 insertions(+), 14 deletions(-)

diff --git a/backends/arm/test/models/test_torch_functions.py b/backends/arm/test/models/test_torch_functions.py
index 0ca8d3ac091..c6a4c5580dc 100644
--- a/backends/arm/test/models/test_torch_functions.py
+++ b/backends/arm/test/models/test_torch_functions.py
@@ -97,8 +97,6 @@ def forward(self, *args):
     "test_data",
     test_parameters,
     xfails={
-        "nonzero": "torch.fx.experimental.symbolic_shapes.GuardOnDataDependentSymNode: Could not guard on data-dependent expression Eq(u4, 0). "
-        "Requires dynamic output shape.",
         "topk": "NotImplementedError: No registered serialization name for <class 'torch.return_types.topk'> found",
         "sort": "NotImplementedError: No registered serialization name for <class 'torch.return_types.sort'> found",
     },
@@ -124,8 +122,6 @@ def test_torch_functions_tosa_FP(test_data):
     "test_data",
     test_parameters,
     xfails={
-        "nonzero": "torch.fx.experimental.symbolic_shapes.GuardOnDataDependentSymNode: Could not guard on data-dependent expression Eq(u4, 0). "
-        "Requires dynamic output shape.",
         "topk": "NotImplementedError: No registered serialization name for <class 'torch.return_types.topk'> found",
         "sort": "NotImplementedError: No registered serialization name for <class 'torch.return_types.sort'> found",
     },
diff --git a/exir/passes/__init__.py b/exir/passes/__init__.py
index 9b1b8efe682..ede866549b2 100644
--- a/exir/passes/__init__.py
+++ b/exir/passes/__init__.py
@@ -62,6 +62,7 @@
 
 from executorch.exir.passes.to_device_pass import ToDevicePass
 from executorch.exir.passes.weights_to_outputs_pass import weights_to_outputs_pass
+from executorch.exir.sym_util import eval_shape_upper_bound
 from torch import fx
 from torch._subclasses import FakeTensor
 from torch.fx.passes.infra.pass_base import PassBase, PassResult
@@ -281,31 +282,38 @@ def make_alloc_node(
     Note: tensor_metadata is only used in the case of a Tensor subclass, since
     fakifying a tensor subclass is not supported right now
     """
+
+    def materialize_alloc_spec(
+        shape: Union[torch.Size, Tuple[int, ...], List[int]],
+        dtype: torch.dtype,
+    ) -> memory.AllocSpec:
+        concrete_shape = eval_shape_upper_bound(shape)
+        if any(not isinstance(dim, int) for dim in concrete_shape):
+            raise RuntimeError(
+                "Memory allocator node requires concrete upper-bounded dimensions. "
+                f"Got shape {shape} and evaluated upper bounds {concrete_shape}."
+            )
+        return (tuple(concrete_shape), dtype)
+
     if val is None:
         if tensor_meta is not None:
             assert isinstance(tensor_meta, TensorMetadata)
-            alloc_spec = (tensor_meta.shape, tensor_meta.dtype)
+            alloc_spec = materialize_alloc_spec(tensor_meta.shape, tensor_meta.dtype)
         else:
             raise InternalError(
                 "Memory allocator node needs FakeTensor val or TensorMetadata to proceed"
             )
     elif isinstance(val, FakeTensor):
-        alloc_spec = (val.shape, val.dtype)
+        alloc_spec = materialize_alloc_spec(val.shape, val.dtype)
     else:
         assert isinstance(val, list) or isinstance(val, tuple)
         assert isinstance(tensor_meta, list) or isinstance(tensor_meta, tuple)
         alloc_spec: List[memory.AllocSpec] = []
         for v, t in zip(val, tensor_meta):
             if v is not None:
-                # pyre-fixme[6]: For 1st argument expected
-                #  `Union[List[Tuple[List[int], dtype]], Tuple[List[int], dtype]]` but
-                #  got `Tuple[Size, dtype]`.
-                alloc_spec.append((v.shape, v.dtype))
+                alloc_spec.append(materialize_alloc_spec(v.shape, v.dtype))
             elif t is not None:
-                # pyre-fixme[6]: For 1st argument expected
-                #  `Union[List[Tuple[List[int], dtype]], Tuple[List[int], dtype]]` but
-                #  got `Tuple[Size, dtype]`.
-                alloc_spec.append((t.shape, t.dtype))
+                alloc_spec.append(materialize_alloc_spec(t.shape, t.dtype))
             else:
                 raise InternalError(
                     "Memory allocator node needs FakeTensor val or TensorMetadata to proceed"
diff --git a/exir/tests/test_passes.py b/exir/tests/test_passes.py
index 8a084ba491a..1316dffb828 100644
--- a/exir/tests/test_passes.py
+++ b/exir/tests/test_passes.py
@@ -1,5 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -74,6 +75,7 @@
 )
 from executorch.exir.passes.scalar_to_tensor_pass import ScalarToTensorPass
 from executorch.exir.passes.spec_prop_pass import SpecPropPass
+from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass
 from executorch.exir.passes.sym_to_tensor_pass import SymToTensorPass
 from executorch.exir.program._program import lift_constant_tensor_pass
 from executorch.exir.schema import TensorShapeDynamism
@@ -1036,6 +1038,53 @@ def test_alloc_node_spec(self) -> None:
         for node in alloc_nodes:
             self.assertTrue(isinstance(node.meta.get("spec", None), TensorSpec))
 
+    def test_to_out_var_dynamic_alloc_uses_concrete_upper_bounds(self) -> None:
+        class DynamicRelu(nn.Module):
+            def forward(self, x):
+                return torch.relu(x)
+
+        eager_model = DynamicRelu()
+        inputs = (torch.randn(2, 4, 8, 3),)
+        dynamic_shapes = {
+            "x": {
+                0: torch.export.Dim("batch", min=0, max=2),
+                2: torch.export.Dim("height", min=0, max=8),
+                3: torch.export.Dim("width", min=0, max=8),
+            }
+        }
+        prog = to_edge(
+            export(
+                eager_model,
+                inputs,
+                dynamic_shapes=dynamic_shapes,
+                strict=True,
+            ),
+            compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
+        )
+        new_prog = prog.transform(
+            [
+                SpecPropPass(),
+                ConstraintBasedSymShapeEvalPass(),
+            ]
+        )
+
+        new_gm_res = ToOutVarPass()(new_prog.exported_program().graph_module)
+        self.assertIsNotNone(new_gm_res)
+        new_gm = new_gm_res.graph_module
+
+        alloc_nodes = []
+        for node in new_gm.graph.nodes:
+            if node.target == memory.alloc:
+                alloc_nodes.append(node)
+
+        self.assertTrue(len(alloc_nodes) > 0)
+        for node in alloc_nodes:
+            alloc_spec = node.args[0]
+            self.assertIsInstance(alloc_spec, tuple)
+            shape, _dtype = alloc_spec
+            for dim in shape:
+                self.assertIsInstance(dim, int)
+
     def test_debug_pass_file_log(self) -> None:
         eager_model = Mul()
         inputs = eager_model.get_random_inputs()

From 5ca3207e1c10d8a8841a80a12fdb65fe89a86294 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Lindstr=C3=B6m?=
 <33344797+martinlsm@users.noreply.github.com>
Date: Thu, 28 May 2026 13:41:23 +0200
Subject: [PATCH 060/103] Arm backend: Update examples/arm/README.md (#19756)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make the README concise for setup, run.sh usage, example notebooks,
applications, and helper scripts. Move broader backend documentation
links to the backend README.

Signed-off-by: Martin Lindström <Martin.Lindstroem@arm.com>
---
 backends/arm/README.md |   6 +-
 examples/arm/README.md | 206 +++++++++++++----------------------------
 2 files changed, 67 insertions(+), 145 deletions(-)

diff --git a/backends/arm/README.md b/backends/arm/README.md
index 237f2433cb5..8edd3665d44 100644
--- a/backends/arm/README.md
+++ b/backends/arm/README.md
@@ -136,8 +136,10 @@ The delegated Python API flow is:
 For complete examples of that flow, including quantization and target-specific
 compile specs, see:
 
-- `docs/source/backends/arm-ethos-u/tutorials/ethos-u-getting-started.md`
-- `docs/source/backends/arm-vgf/tutorials/vgf-getting-started.md`
+- [Arm Ethos-U tutorial](../../docs/source/backends/arm-ethos-u/tutorials/ethos-u-getting-started.md)
+- [Arm VGF tutorial](../../docs/source/backends/arm-vgf/tutorials/vgf-getting-started.md)
+- [Arm Cortex-M backend overview](../../docs/source/backends/arm-cortex-m/arm-cortex-m-overview.md)
+- [Ethos-U porting guide](../../examples/arm/ethos-u-porting-guide.md)
 
 Additional examples are available in `examples/arm`.
 
diff --git a/examples/arm/README.md b/examples/arm/README.md
index c5f5bb24862..07aecec51e2 100644
--- a/examples/arm/README.md
+++ b/examples/arm/README.md
@@ -5,175 +5,95 @@ This source code is licensed under the BSD-style license found in the
 LICENSE file in the root directory of this source tree.
 -->
 
-## ExecuTorch for Arm backends Ethos-U, VGF and Cortex-M
+# Examples for Arm backends Ethos-U, VGF and Cortex-M
 
-This project contains scripts to help you setup and run a PyTorch
-model on a Arm backend via ExecuTorch. This backend supports Ethos-U and VGF as 
-targets (using TOSA) but you can also use the Ethos-U example runner as an example
-on Cortex-M if you do not delegate the model.
+This directory contains documentation and scripts to
+help you setup and run a PyTorch model on the Arm backend
+via ExecuTorch.
 
-The main scripts are `setup.sh`, `run.sh` and
-`backends/arm/scripts/aot_arm_compiler.py`.
+## setup.sh
 
-`setup.sh` will install the needed tools and with --root-dir <FOLDER> 
-you can change the path to a scratch folder where it will download and generate build
-artifacts. If supplied, you must also supply the same folder to run.sh with
---scratch-dir=<FOLDER> If not supplied both scripts will use examples/arm/arm-scratch.
+`setup.sh` downloads the Arm cross-compilation toolchain and Corstone FVP
+simulators, installs the Python dependencies for TOSA, Ethos-U Vela, and
+Cortex-M/CMSIS-NN, and generates `setup_path.sh` scripts for adding those tools
+to your environment. Optional flags also install VGF/MLSDK and Vulkan
+dependencies.
 
-`run.sh` can be used to build, run and test a model in an easy way and it will call cmake for you
-and in cases you want to run a simulator it will start it also. The script will call `aot_arm_compiler.py`
-to convert a model and include it in the build/run.
-
-For bare-metal Ethos-U builds `run.sh` configures the standalone
-`examples/arm/executor_runner/standalone` CMake entry point automatically. If
-`--build-dir` is omitted, the script creates and owns a build tree under
-`arm_test/<target>_<build_type>`. Supplying `--build-dir` reuses an existing tree
-(for example a VGF host build or out-of-tree configuration) and `run.sh`
-verifies it exposes the runner options it needs before compiling.
-
-Build and test artifacts are by default placed under the folder arm_test folder
-this can be changed with --et_build_root=<FOLDER>
-
-`aot_arm_compiler.py` is used to convert a Python model or a saved .pt model to a PTE file and is used by `run.sh`
-and other test script but can also be used directly.
-
-
-## Create a PTE file for Arm backends
-
-There is an easy to use example flow to compile your PyTorch model to a PTE file for the Arm backend called `aot_arm_compiler.py`
-that you can use to generate PTE files, it can generate PTE files for the supported targets `-t` or even non delegated (Cortex-M)
-using different memory modes and can both use a python file as input or just use the models from examples/models with `--model_name`.
-It also supports generating Devtools artifacts like BundleIO BPTE files, and ETRecords. Run it with `--help` to check its capabilities.
-
-You point out the model to convert with `--model_name=<MODELNAME/FILE>` It supports running a model from examples/models or models
-from a python file if you just specify `ModelUnderTest` and `ModelInputs` in it.
-
-```
-$ python3 -m backends.arm.scripts.aot_arm_compiler --help
-```
-
-This is how you generate a BundleIO BPTE of a simple add example
+Example to install the default Arm backend dependencies and add them to your current shell:
 
+```bash
+./examples/arm/setup.sh --i-agree-to-the-contained-eula
+source examples/arm/arm-scratch/setup_path.sh
 ```
-$ python3 -m backends.arm.scripts.aot_arm_compiler --model_name=examples/arm/example_modules/add.py --target=ethos-u55-128 --bundleio
-```
-
-The example model used has added two extra variables that is picked up to make this work.
-
-`ModelUnderTest` should be a `torch.nn.module` instance.
-
-`ModelInputs` should be a tuple of inputs to the forward function.
-
-
-You can also use the models from example/models directly by just using the short name e.g.
-
-```
-$ python3 -m backends.arm.scripts.aot_arm_compiler --model_name=mv2 --target=ethos-u55-64
-```
-
-
-`aot_arm_compiler.py` is called from the scripts below so you don't need to, but it can be useful to do by hand in some cases.
 
-## Host VGF example applications
+## run.sh
 
-The Arm examples directory also contains host-side VGF reference flows for
-specific tasks:
+`run.sh` is an end-to-end helper for building and executing an Arm backend
+example. It sources the `setup_path.sh` script generated by `setup.sh`, runs
+`aot_arm_compiler.py` to convert the selected model to a `.pte` or `.bpte`,
+builds the matching runner with CMake, and starts the simulator or runtime for
+the selected target when `--build_only` is not set.
 
-- `examples/arm/image_classification_example_vgf` for DEiT image
-  classification.
-- `examples/arm/super_resolution_example_vgf` for Swin2SR image
-  super-resolution.
-
-
-## ExecuTorch on Arm Ethos-U55/U65 and U85
-
-This example code will help you get going with the Corstone&trade;-300/320 platforms and
-run on the FVP and can be used a starting guide in your porting to your board/HW
-
-We will start from a PyTorch model in python, export it, convert it to a `.pte`
-file - A binary format adopted by ExecuTorch. Then we will take the `.pte`
-model file and embed that with a baremetal application executor_runner. We will
-then take the executor_runner file, which contains not only the `.pte` binary but
-also necessary software components to run standalone on a baremetal system.
-The build flow will pick up the non delegated ops from the generated PTE file and 
-add CPU implementation of them. 
-Lastly, we will run the executor_runner binary on a Corstone&trade;-300/320 FVP Simulator platform.
-
-
-### Example workflow
-
-Below is example workflow to build an application for Ethos-U55/85. The script below requires an internet connection:
-
-```
-# Step [1] - setup necessary tools
-$ cd <EXECUTORCH-ROOT-FOLDER>
-$ ./examples/arm/setup.sh --i-agree-to-the-contained-eula
-
-# Step [2] - Setup path to tools, The `setup.sh` script has generated a script that you need to source every time you restart you shell.
-$ source  examples/arm/arm-scratch/setup_path.sh
+Build and test artifacts are written to `arm_test` by default. Use
+`--et_build_root=<FOLDER>` to choose another build root.
 
-# Step [3] - build and run ExecuTorch and executor_runner baremetal example application
-# on a Corstone(TM)-320 FVP to run a simple PyTorch model from a file.
-$ ./examples/arm/run.sh --model_name=examples/arm/example_modules/add.py --target=ethos-u85-128
-```
-
-The argument `--model_name=<MODEL>` is passed to `aot_arm_compiler.py` so you can use it in the same way
-e.g. you can also use the models from example/models directly in the same way as above.
+For example, after running `setup.sh` and sourcing the generated
+`setup_path.sh`, build and run a model on an Ethos-U85 target with:
 
-```
-$ ./examples/arm/run.sh --model_name=mv2 --target=ethos-u55-64
+```bash
+./examples/arm/run.sh --model_name=examples/arm/example_modules/add.py --target=ethos-u85-128
 ```
 
-The runner will by default set all inputs to "1" and you are supposed to add/change the code
-handling the input for your hardware target to give the model proper input, maybe from your camera
-or mic hardware.
+For bundled input/output and ETDump testing:
 
-While testing you can use the --bundleio flag to use the input from the python model file and
-generate a .bpte instead of a .pte file. This will embed the input example data and reference output
-in the bpte file/data, which is used to verify the model's output. You can also use --etdump to generate
-an ETRecord and a ETDump trace files from your target (they are printed as base64 strings in the serial log).
-
-Just keep in mind that CPU cycles are NOT accurate on the FVP simulator and it can not be used for
-performance measurements, so you need to run on FPGA or actual ASIC to get good results from --etdump.
-As a note the printed NPU cycle numbers are still usable and closer to real values if the timing
-adaptor is setup correctly.
-
-```
-# Build + run with BundleIO and ETDump
-$ ./examples/arm/run.sh --model_name=lstm --target=ethos-u85-128 --bundleio --etdump
+```bash
+./examples/arm/run.sh --model_name=lstm --target=ethos-u85-128 --bundleio --etdump
 ```
 
+For Cortex-M testing, use a Cortex-M target and bundled I/O:
 
-### Ethos-U minimal example
-
-See the jupyter notebook `ethos_u_minimal_example.ipynb` for an explained minimal example of the full flow for running a
-PyTorch module on the EthosUDelegate. The notebook runs directly in some IDE:s s.a. VS Code, otherwise it can be run in
-your browser using
-```
-pip install jupyter
-jupyter notebook ethos_u_minimal_example.ipynb
+```bash
+./examples/arm/run.sh --model_name=mv2 --target=cortex-m55 --bundleio
 ```
 
-## ExecuTorch on ARM Cortex-M
+## Example Contents
 
-For Cortex-M you run the script without delegating e.g `--no_delegate` as the build flow already supports picking up
-the non delegated ops from the generated PTE file and add CPU implementation of them this will work out of the box in
-most cases.
+### Notebook examples
 
-To run mobilenet_v2 on the Cortex-M55 only, without using the Ethos-U try this:
+- [ethos_u_minimal_example.ipynb](ethos_u_minimal_example.ipynb) - Minimal
+  Ethos-U AOT, runtime build, and FVP execution flow.
+- [vgf_minimal_example.ipynb](vgf_minimal_example.ipynb) - Minimal VGF
+  lowering and host execution flow.
+- [cortex_m_mv2_example.ipynb](cortex_m_mv2_example.ipynb) - Cortex-M
+  MobileNetV2 export, quantization, runtime build, and FVP execution flow.
+- [pruning_minimal_example.ipynb](pruning_minimal_example.ipynb) - Model
+  conditioning and pruning flow for Ethos-U85.
+- [quantizer_tutorial.ipynb](quantizer_tutorial.ipynb) - Quantizer tutorial
+  for TOSA, Ethos-U, and VGF quantizers.
 
-```
-$ ./examples/arm/run.sh --model_name=mv2 --target=ethos-u55-128 --no_delegate
-```
+### Application examples
 
+- [image_classification_example_ethos_u](image_classification_example_ethos_u/)
+  - End-to-end DEiT-Tiny image classification flow for Ethos-U, including
+  model fine-tuning, export, bare-metal runtime build, and Corstone-320 FVP
+  execution.
+- [image_classification_example_vgf](image_classification_example_vgf/) -
+  DEiT-Tiny image classification flow for VGF host execution.
+- [super_resolution_example_vgf](super_resolution_example_vgf) - Swin2SR image
+  super-resolution.
+- [example_modules/add.py](example_modules/add.py) - Small external model file
+  usable with `run.sh --model_name=examples/arm/example_modules/add.py`.
 
-### Online Tutorial
+### Utility examples and guides
 
-We also have a [tutorial](https://pytorch.org/executorch/stable/backends-arm-ethos-u) explaining the steps performed in these
-scripts, expected results, possible problems and more. It is a step-by-step guide
-you can follow to better understand this delegate.
+- [ethos-u-porting-guide.md](ethos-u-porting-guide.md) - Notes for adapting
+  the example Ethos-U runtime integration to another target.
+- [export_standalone_tosa_graph.py](export_standalone_tosa_graph.py) -
+  Example of exporting a standalone TOSA graph with multiple outputs.
+- [visualize.py](visualize.py) - Helper used by `run.sh --model_explorer` to
+  visualize TOSA or PTE graphs.
 
-### Project Templates
+## Project Templates
 
 These project templates provide alternative starting points with different toolchains and build systems:
 

From 96b19af7744debd62f8cac2579a03de18069e36d Mon Sep 17 00:00:00 2001
From: Erik Lundell <erik.lundell@arm.com>
Date: Thu, 28 May 2026 14:20:00 +0200
Subject: [PATCH 061/103] Arm backend: Guard empty cmake arg array in
 build_executorch (#19840)

Avoid expanding extra_cmake_args when the array is empty.

Older Bash versions on macOS treat an empty array expansion under set -u
as an unbound variable. Append the extra CMake arguments only when the
array is non-empty so the script behaves the same on Linux and macOS.

Signed-off-by: Erik Lundell <erik.lundell@arm.com>
---
 backends/arm/scripts/build_executorch.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/backends/arm/scripts/build_executorch.sh b/backends/arm/scripts/build_executorch.sh
index 5ac2674f964..5ebc0eb46b4 100755
--- a/backends/arm/scripts/build_executorch.sh
+++ b/backends/arm/scripts/build_executorch.sh
@@ -96,9 +96,12 @@ cmake_args=(
     -DEXECUTORCH_BUILD_DEVTOOLS=${build_devtools}
     -DEXECUTORCH_BUILD_ARM_ETDUMP=${build_with_etdump}
     -DEXECUTORCH_BAREMETAL_SKIP_INSTALL=OFF
-    "${extra_cmake_args[@]}"
 )
 
+if [[ ${#extra_cmake_args[@]} -gt 0 ]]; then
+    cmake_args+=("${extra_cmake_args[@]}")
+fi
+
 if [[ -n "${target_cpu}" ]]; then
     cmake_args+=(-DTARGET_CPU=${target_cpu})
 fi

From b903c30c046676c8f38df3caef8e4da44ed2b170 Mon Sep 17 00:00:00 2001
From: Erik Lundell <erik.lundell@arm.com>
Date: Thu, 28 May 2026 14:21:37 +0200
Subject: [PATCH 062/103] Arm backend: Fix vgf_quant swin test op-count and
 test vgf models in trunk job. (#19841)

---
 .github/workflows/trunk.yml                  | 1 +
 backends/arm/test/models/test_swin2sr_arm.py | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 5a6720cdfad..cca1fe5fe45 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -278,6 +278,7 @@ jobs:
       matrix:
         include:
           - test_arm_backend: test_pytest_ops_vkml
+          - test_arm_backend: test_pytest_models_vkml
           - test_arm_backend: test_ootb_tests_vgf
       fail-fast: false
     with:
diff --git a/backends/arm/test/models/test_swin2sr_arm.py b/backends/arm/test/models/test_swin2sr_arm.py
index e4fc6f07950..5fd29943b94 100644
--- a/backends/arm/test/models/test_swin2sr_arm.py
+++ b/backends/arm/test/models/test_swin2sr_arm.py
@@ -42,6 +42,9 @@
     "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 5,
     "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 6,
 }
+swin2sr_vgf_quant_lowered_outer_graph_ops = {
+    "torch.ops.higher_order.executorch_call_delegate": 1,
+}
 
 
 class TinySwin2SR(torch.nn.Module):
@@ -110,7 +113,7 @@ def test_swin2sr_vgf_quant():
         quantize=True,
         run_on_vulkan_runtime=sys.platform == "linux",
     )
-    pipeline.change_args("check_count.exir", swin2sr_int_lowered_outer_graph_ops)
+    pipeline.change_args("check_count.exir", swin2sr_vgf_quant_lowered_outer_graph_ops)
     pipeline.run()
 
 
From acce7cd6f1558132e40edd9a25b12febaf7beb79 Mon Sep 17 00:00:00 2001
From: robert-kalmar <robert.kalmar@nxp.com>
Date: Thu, 28 May 2026 17:00:49 +0200
Subject: [PATCH 063/103] NXP Backend: Force backend (NeutronBackend)
 destructor call before neutronDeInit() (#19795)

### Summary
The `NeutronBackend::destroy` function shall be called before the
Neutron driver's `neutronDeInit()` function to avoid double free. At
this moment the ExecuTorch does not provide means to destroy the backend
or the method outside of the method's desctructor.

### Test plan
With upcomming eIQ Neutron SDK 3.1.2 the nxp-executor-runner crash, so
existing unit tests covers this problem.

cc @JakeStevens @digantdesai @rascani
---
 .../executor_runner/nxp_executor_runner.cpp   | 183 +++++++++---------
 1 file changed, 93 insertions(+), 90 deletions(-)

diff --git a/examples/nxp/executor_runner/nxp_executor_runner.cpp b/examples/nxp/executor_runner/nxp_executor_runner.cpp
index 65f5831e5c5..52d7c778227 100644
--- a/examples/nxp/executor_runner/nxp_executor_runner.cpp
+++ b/examples/nxp/executor_runner/nxp_executor_runner.cpp
@@ -384,71 +384,30 @@ int main(int argc, char* argv[]) {
   torch::executor::MemoryManager memory_manager(
       &method_allocator, &planned_memory, &tmp_allocator);
 
-  Result<torch::executor::Method> method =
-      program->load_method(method_name, &memory_manager);
-  if (!method.ok()) {
-    fprintf(
-        stderr,
-        "Loading of method (%s) failed with status %" PRIu32 "...\n",
-        method_name,
-        (unsigned int)method.error());
-    exit(-1);
-  }
-  printf("Method loaded...\n");
-
-  Error status = Error::Ok;
-  if (!FLAGS_dataset.empty()) {
-    // Go through entire dataset for this model.
-    FLAGS_dataset += "/";
-    while (dataset = readdir(datasetDir)) {
-      if (!strcmp(dataset->d_name, ".") || !strcmp(dataset->d_name, ".."))
-        continue;
-
-      std::vector<std::string> inputsData;
-      inputsData.push_back(FLAGS_dataset + dataset->d_name);
-      // Set input and call inferrence.
-      setInputs(method.get(), inputsData);
-
-      status = method->execute();
-      if (status != Error::Ok) {
-        fprintf(
-            stderr,
-            "Execution of method %s failed with status %" PRIu32 "...\n",
-            method_name,
-            (unsigned int)status);
-        exit(-1);
-      } else {
-        printf("Method executed successfully...\n");
-      }
-
-      // Save outputs in binary files.
-      saveOutputs(method.get(), FLAGS_output, dataset->d_name);
-      // Print result with highest confidence.
-      printOutput(method.get(), FLAGS_output, dataset->d_name);
+  {
+    Result<torch::executor::Method> method =
+        program->load_method(method_name, &memory_manager);
+    if (!method.ok()) {
+      fprintf(
+          stderr,
+          "Loading of method (%s) failed with status %" PRIu32 "...\n",
+          method_name,
+          (unsigned int)method.error());
+      exit(-1);
     }
-    closedir(datasetDir);
-  } else if (!FLAGS_inputs.empty()) {
-    std::vector<std::string> inputPaths;
-
-    // Validate and process inputs and separate into two lists.
-    processInputs(inputPaths, FLAGS_inputs);
-
-    if (std::all_of(inputPaths.begin(), inputPaths.end(), isDirectory)) {
-      // Inputs are in directories - use files in each directory as the inputs.
-      std::vector<std::string> inputsData;
-      for (std::string& inputDir : inputPaths) {
-        datasetDir = opendir(inputDir.c_str());
-        while (dataset = readdir(datasetDir)) {
-          if (!strcmp(dataset->d_name, ".") || !strcmp(dataset->d_name, ".."))
-            continue;
-
-          inputsData.push_back(inputDir + "/" + dataset->d_name);
-        }
-        closedir(datasetDir);
-
-        // Sort inputsData to ensure correct input ordering
-        std::sort(inputsData.begin(), inputsData.end());
-
+    printf("Method loaded...\n");
+
+    Error status = Error::Ok;
+    if (!FLAGS_dataset.empty()) {
+      // Go through entire dataset for this model.
+      FLAGS_dataset += "/";
+      while (dataset = readdir(datasetDir)) {
+        if (!strcmp(dataset->d_name, ".") || !strcmp(dataset->d_name, ".."))
+          continue;
+
+        std::vector<std::string> inputsData;
+        inputsData.push_back(FLAGS_dataset + dataset->d_name);
+        // Set input and call inferrence.
         setInputs(method.get(), inputsData);
 
         status = method->execute();
@@ -463,37 +422,81 @@ int main(int argc, char* argv[]) {
           printf("Method executed successfully...\n");
         }
 
-        if (inputDir.back() == '/')
-          inputDir.pop_back();
-
-        auto pos = inputDir.find_last_of('/');
-        if (pos != std::string::npos)
-          inputDir = inputDir.substr(pos + 1);
-
         // Save outputs in binary files.
-        saveOutputs(method.get(), FLAGS_output, inputDir.c_str());
-        inputsData.clear();
+        saveOutputs(method.get(), FLAGS_output, dataset->d_name);
+        // Print result with highest confidence.
+        printOutput(method.get(), FLAGS_output, dataset->d_name);
       }
-    } else {
-      // Inputs are files.
-      setInputs(method.get(), inputPaths);
-
-      status = method->execute();
-      if (status != Error::Ok) {
-        fprintf(
-            stderr,
-            "Execution of method %s failed with status %" PRIu32 "...\n",
-            method_name,
-            (unsigned int)status);
-        exit(-1);
+      closedir(datasetDir);
+    } else if (!FLAGS_inputs.empty()) {
+      std::vector<std::string> inputPaths;
+
+      // Validate and process inputs and separate into two lists.
+      processInputs(inputPaths, FLAGS_inputs);
+
+      if (std::all_of(inputPaths.begin(), inputPaths.end(), isDirectory)) {
+        // Inputs are in directories - use files in each directory as the
+        // inputs.
+        std::vector<std::string> inputsData;
+        for (std::string& inputDir : inputPaths) {
+          datasetDir = opendir(inputDir.c_str());
+          while (dataset = readdir(datasetDir)) {
+            if (!strcmp(dataset->d_name, ".") || !strcmp(dataset->d_name, ".."))
+              continue;
+
+            inputsData.push_back(inputDir + "/" + dataset->d_name);
+          }
+          closedir(datasetDir);
+
+          // Sort inputsData to ensure correct input ordering
+          std::sort(inputsData.begin(), inputsData.end());
+
+          setInputs(method.get(), inputsData);
+
+          status = method->execute();
+          if (status != Error::Ok) {
+            fprintf(
+                stderr,
+                "Execution of method %s failed with status %" PRIu32 "...\n",
+                method_name,
+                (unsigned int)status);
+            exit(-1);
+          } else {
+            printf("Method executed successfully...\n");
+          }
+
+          if (inputDir.back() == '/')
+            inputDir.pop_back();
+
+          auto pos = inputDir.find_last_of('/');
+          if (pos != std::string::npos)
+            inputDir = inputDir.substr(pos + 1);
+
+          // Save outputs in binary files.
+          saveOutputs(method.get(), FLAGS_output, inputDir.c_str());
+          inputsData.clear();
+        }
       } else {
-        printf("Method executed successfully...\n");
-      }
+        // Inputs are files.
+        setInputs(method.get(), inputPaths);
+
+        status = method->execute();
+        if (status != Error::Ok) {
+          fprintf(
+              stderr,
+              "Execution of method %s failed with status %" PRIu32 "...\n",
+              method_name,
+              (unsigned int)status);
+          exit(-1);
+        } else {
+          printf("Method executed successfully...\n");
+        }
 
-      // Save outputs in binary files.
-      saveOutputs(method.get(), FLAGS_output);
+        // Save outputs in binary files.
+        saveOutputs(method.get(), FLAGS_output);
+      }
     }
-  }
+  } // Destruct the method object before destroying the Neutron Device.
 
   printf("Finished...\n");
 

From 463fbe4407eee8f5f3c70fed1a50f9d8afb206c8 Mon Sep 17 00:00:00 2001
From: Adrian Lundell <36153706+AdrianLundell@users.noreply.github.com>
Date: Thu, 28 May 2026 18:41:05 +0200
Subject: [PATCH 064/103] Add general Aten lowering pass (#19837)

Adds a simple pass for replacing single Aten ops with corresponding
dialect ops to be reused across multiple backends.

Signed-off-by: Adrian Lundell <adrian.lundell@arm.com>
---
 backends/transforms/aten_to_dialect_pass.py   | 138 ++++++++++
 backends/transforms/targets.bzl               |  25 ++
 .../test/test_aten_to_dialect_pass.py         | 239 ++++++++++++++++++
 3 files changed, 402 insertions(+)
 create mode 100644 backends/transforms/aten_to_dialect_pass.py
 create mode 100644 backends/transforms/test/test_aten_to_dialect_pass.py

diff --git a/backends/transforms/aten_to_dialect_pass.py b/backends/transforms/aten_to_dialect_pass.py
new file mode 100644
index 00000000000..f31df73bc58
--- /dev/null
+++ b/backends/transforms/aten_to_dialect_pass.py
@@ -0,0 +1,138 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import traceback
+from collections.abc import Callable
+from dataclasses import dataclass
+from typing import ClassVar, TypeAlias
+
+import torch
+
+from executorch.backends.xnnpack._passes.xnnpack_pass import ExportPass
+
+from executorch.exir import ExportedProgram
+from torch.fx.node import Target
+from torch.fx.passes.infra.pass_manager import PassResult
+
+
+# Expected type to be returned by substitution functions.
+@dataclass
+class DialectNodeSpec:
+    op: Target
+    args: tuple
+    kwargs: dict = None
+
+
+# Expected type to be used for substitution functions
+SubstitutionFn: TypeAlias = Callable[
+    [torch.fx.Node, torch.export.ExportedProgram], DialectNodeSpec | None
+]
+
+
+class AtenToDialectPass(ExportPass):
+    """
+    General pass to convert ops 1-1 from ATen to a specific dialect.
+
+    Usage:
+        1. Subclass the pass for a specific dialect
+        2. For each ATen target to be substituted, implement a function returning a DialectNodeSpec defining the
+           corresponding dialect op, or None if the substitution does not apply.
+        3. Register each substitution function for the subclass using the decorator register_dialect_substitution
+
+    Only one substitution function can be registered for a given target.
+
+    The pass must be initialized with an exported_program to allow substitution functions to modify placeholders,
+    e.g. if the dialect ops require additional scratch buffers.
+    """
+
+    _DIALECT_SUBSTITUTIONS: ClassVar[dict[Target, SubstitutionFn]] = {}
+
+    def __init__(self, exported_program: ExportedProgram):
+        super().__init__()
+        self.exported_program: ExportedProgram = exported_program
+
+    # Ensure each subclass has its own substitution registry.
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        cls._DIALECT_SUBSTITUTIONS = {}
+
+    @classmethod
+    def register_dialect_substitution(
+        cls, target: Target
+    ) -> Callable[[SubstitutionFn], SubstitutionFn]:
+
+        def decorator(func: SubstitutionFn) -> SubstitutionFn:
+            if target in cls._DIALECT_SUBSTITUTIONS:
+                raise RuntimeError(
+                    f"Multiple substitutions registered for the same target in {cls.__name__} are not allowed."
+                )
+            else:
+                cls._DIALECT_SUBSTITUTIONS[target] = func
+            return func
+
+        return decorator
+
+    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
+        modified = False
+
+        for node in graph_module.graph.nodes:
+            if node.op != "call_function":
+                continue
+
+            substitution_func = self._DIALECT_SUBSTITUTIONS.get(node.target, None)
+            if substitution_func is None:
+                continue
+
+            dialect_node_spec = substitution_func(node, self.exported_program)
+            if dialect_node_spec is None:
+                continue
+
+            modified = True
+            with graph_module.graph.inserting_before(node):
+                dialect_node = graph_module.graph.create_node(
+                    "call_function",
+                    target=dialect_node_spec.op,
+                    args=dialect_node_spec.args,
+                    kwargs=dialect_node_spec.kwargs or {},
+                )
+
+                node.replace_all_uses_with(dialect_node)
+
+                # Keep same meta dict for new node and append new trace
+                dialect_node.meta = node.meta
+                old_stack_trace = dialect_node.meta.get("stack_trace", "")
+                dialect_node.meta["stack_trace"] = (
+                    f"{old_stack_trace}\n{traceback.format_stack()[-2]}"
+                )
+
+                graph_module.graph.erase_node(node)
+
+        if modified:
+            graph_module.graph.eliminate_dead_code()
+            graph_module.recompile()
+            graph_module = super().call(graph_module).graph_module
+
+        return PassResult(graph_module, modified)
+
+    def requires(self, graph_module):
+        self.ops_before = sum(
+            1 for node in graph_module.graph.nodes if node.op == "call_function"
+        )
+        return super().requires(graph_module)
+
+    def ensures(self, graph_module: torch.fx.GraphModule) -> bool:
+        """Ensure that there has only been 1-1 substitution of call_function nodes, i.e. that the number of call_function nodes is preserved after the pass."""
+
+        self.ops_after = sum(
+            1 for node in graph_module.graph.nodes if node.op == "call_function"
+        )
+        if self.ops_after != self.ops_before:
+            raise RuntimeError(
+                f"{self.__class__.__name__} did not preserve the number of call_function nodes: "
+                f"before={self.ops_before}, after={self.ops_after}"
+            )
+
+        return super().ensures(graph_module)
diff --git a/backends/transforms/targets.bzl b/backends/transforms/targets.bzl
index 8c3603e293d..36466ec4aa0 100644
--- a/backends/transforms/targets.bzl
+++ b/backends/transforms/targets.bzl
@@ -176,6 +176,21 @@ def define_common_targets():
         ],
     )
 
+    runtime.python_library(
+        name = "aten_to_dialect_pass",
+        srcs = [
+            "aten_to_dialect_pass.py",
+        ],
+        visibility = [
+            "//executorch/backends/...",
+        ],
+        deps = [
+            "//caffe2:torch",
+            "//executorch/backends/xnnpack/_passes:xnnpack_passes",
+            "//executorch/exir:lib",
+        ],
+    )
+
     runtime.python_library(
         name = "rank_0_to_rank_1",
         srcs = [
@@ -243,6 +258,16 @@ def define_common_targets():
         ],
     )
 
+    runtime.python_test(
+        name = "test_aten_to_dialect_pass",
+        srcs = [
+            "test/test_aten_to_dialect_pass.py",
+        ],
+        deps = [
+            "//caffe2:torch",
+            ":aten_to_dialect_pass",
+        ],
+    )
 
     runtime.python_test(
         name = "test_rank_0_to_rank_1",
diff --git a/backends/transforms/test/test_aten_to_dialect_pass.py b/backends/transforms/test/test_aten_to_dialect_pass.py
new file mode 100644
index 00000000000..80dbf210d72
--- /dev/null
+++ b/backends/transforms/test/test_aten_to_dialect_pass.py
@@ -0,0 +1,239 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import pytest
+import torch
+from executorch.backends.transforms.aten_to_dialect_pass import (
+    AtenToDialectPass,
+    DialectNodeSpec,
+)
+from executorch.backends.transforms.utils import create_constant_placeholder
+from torch.export import ExportedProgram
+from torch.export.graph_signature import InputKind
+from torch.fx import Node
+
+
+class AddModel(torch.nn.Module):
+    def forward(self, x, y):
+        return torch.ops.aten.add.Tensor(x, y)
+
+
+class AddAlphaModel(torch.nn.Module):
+    def forward(self, x, y):
+        return torch.ops.aten.add.Tensor(x, y, alpha=2)
+
+
+def _count_target(graph_module: torch.fx.GraphModule, target) -> int:
+    return sum(
+        1
+        for node in graph_module.graph.nodes
+        if node.op == "call_function" and node.target == target
+    )
+
+
+def _get_target_node(graph_module: torch.fx.GraphModule, target) -> Node:
+    nodes = [
+        node
+        for node in graph_module.graph.nodes
+        if node.op == "call_function" and node.target == target
+    ]
+    assert len(nodes) == 1
+    return nodes[0]
+
+
+def _export_add_model() -> ExportedProgram:
+    return torch.export.export(
+        AddModel().eval(), (torch.randn(2, 3), torch.randn(2, 3)), strict=True
+    )
+
+
+def _export_add_alpha_model() -> ExportedProgram:
+    return torch.export.export(
+        AddAlphaModel().eval(), (torch.randn(2, 3), torch.randn(2, 3)), strict=True
+    )
+
+
+def test_rewrites_node_when_substitution_matches() -> None:
+    class _TestAtenToDialectPass(AtenToDialectPass):
+        pass
+
+    @_TestAtenToDialectPass.register_dialect_substitution(torch.ops.aten.add.Tensor)
+    def replace_add_with_sub(
+        node: Node, exported_program: ExportedProgram
+    ) -> DialectNodeSpec | None:
+        del exported_program
+        return DialectNodeSpec(torch.ops.aten.sub.Tensor, node.args)
+
+    exported_program = _export_add_model()
+    result = _TestAtenToDialectPass(exported_program=exported_program).call(
+        exported_program.graph_module
+    )
+
+    assert result.modified
+    assert _count_target(result.graph_module, torch.ops.aten.add.Tensor) == 0
+    assert _count_target(result.graph_module, torch.ops.aten.sub.Tensor) == 1
+
+
+def test_substitution_can_add_state_dict_placeholder() -> None:
+    class _TestAtenToDialectPass(AtenToDialectPass):
+        pass
+
+    @_TestAtenToDialectPass.register_dialect_substitution(torch.ops.aten.add.Tensor)
+    def replace_add_rhs_with_constant(
+        node: Node, exported_program: ExportedProgram
+    ) -> DialectNodeSpec | None:
+        first_placeholder = next(
+            graph_node
+            for graph_node in node.graph.nodes
+            if graph_node.op == "placeholder"
+        )
+        with node.graph.inserting_before(first_placeholder):
+            const_node = create_constant_placeholder(
+                exp_program=exported_program,
+                graph=node.graph,
+                name="test_constant",
+                kind=InputKind.PARAMETER,
+                data=torch.ones(2, 3),
+            )
+        return DialectNodeSpec(torch.ops.aten.add.Tensor, (node.args[0], const_node))
+
+    exported_program = _export_add_model()
+    result = _TestAtenToDialectPass(exported_program=exported_program).call(
+        exported_program.graph_module
+    )
+
+    assert result.modified
+    assert "test_constant" in exported_program.state_dict
+    assert torch.equal(exported_program.state_dict["test_constant"], torch.ones(2, 3))
+    assert (
+        exported_program.graph_signature.inputs_to_parameters["test_constant"]
+        == "test_constant"
+    )
+    add_node = _get_target_node(result.graph_module, torch.ops.aten.add.Tensor)
+    assert add_node.args[1].name == "test_constant"
+
+    x = torch.full((2, 3), 2.0)
+    y = torch.full((2, 3), 5.0)
+    torch.testing.assert_close(exported_program.module()(x, y), x + torch.ones_like(x))
+
+
+def test_substitution_can_change_kwargs() -> None:
+    class _TestAtenToDialectPass(AtenToDialectPass):
+        pass
+
+    @_TestAtenToDialectPass.register_dialect_substitution(torch.ops.aten.add.Tensor)
+    def replace_add_alpha(
+        node: Node, exported_program: ExportedProgram
+    ) -> DialectNodeSpec | None:
+        del exported_program
+        return DialectNodeSpec(torch.ops.aten.add.Tensor, node.args, {"alpha": 3})
+
+    exported_program = _export_add_alpha_model()
+    result = _TestAtenToDialectPass(exported_program=exported_program).call(
+        exported_program.graph_module
+    )
+
+    assert result.modified
+    add_node = _get_target_node(result.graph_module, torch.ops.aten.add.Tensor)
+    assert add_node.kwargs["alpha"] == 3
+
+    x = torch.full((2, 3), 2.0)
+    y = torch.full((2, 3), 5.0)
+    torch.testing.assert_close(exported_program.module()(x, y), x + 3 * y)
+
+
+def test_preserves_meta_when_substitution_matches() -> None:
+    class _TestAtenToDialectPass(AtenToDialectPass):
+        pass
+
+    @_TestAtenToDialectPass.register_dialect_substitution(torch.ops.aten.add.Tensor)
+    def replace_add_with_sub(
+        node: Node, exported_program: ExportedProgram
+    ) -> DialectNodeSpec | None:
+        del exported_program
+        return DialectNodeSpec(torch.ops.aten.sub.Tensor, node.args)
+
+    exported_program = _export_add_model()
+    add_node = _get_target_node(
+        exported_program.graph_module, torch.ops.aten.add.Tensor
+    )
+    add_node.meta["test_sentinel"] = "kept"
+    add_node.meta["stack_trace"] = "original stack"
+
+    result = _TestAtenToDialectPass(exported_program=exported_program).call(
+        exported_program.graph_module
+    )
+
+    sub_node = _get_target_node(result.graph_module, torch.ops.aten.sub.Tensor)
+    assert sub_node.meta["test_sentinel"] == "kept"
+    assert sub_node.meta["stack_trace"].startswith("original stack\n")
+    assert sub_node.meta["stack_trace"] != "original stack"
+
+
+def test_keeps_node_when_substitution_returns_none() -> None:
+    class _TestAtenToDialectPass(AtenToDialectPass):
+        pass
+
+    @_TestAtenToDialectPass.register_dialect_substitution(torch.ops.aten.add.Tensor)
+    def do_not_replace(
+        node: Node, exported_program: ExportedProgram
+    ) -> DialectNodeSpec | None:
+        del node, exported_program
+        return None
+
+    exported_program = _export_add_model()
+    result = _TestAtenToDialectPass(exported_program=exported_program).call(
+        exported_program.graph_module
+    )
+
+    assert not result.modified
+    assert _count_target(result.graph_module, torch.ops.aten.add.Tensor) == 1
+    assert _count_target(result.graph_module, torch.ops.aten.sub.Tensor) == 0
+
+
+def test_raises_when_duplicate_substitution_is_registered() -> None:
+    class _TestAtenToDialectPass(AtenToDialectPass):
+        pass
+
+    @_TestAtenToDialectPass.register_dialect_substitution(torch.ops.aten.add.Tensor)
+    def first_replace(
+        node: Node, exported_program: ExportedProgram
+    ) -> DialectNodeSpec | None:
+        del exported_program
+        return DialectNodeSpec(torch.ops.aten.sub.Tensor, node.args)
+
+    with pytest.raises(RuntimeError, match="Multiple substitutions registered"):
+
+        @_TestAtenToDialectPass.register_dialect_substitution(torch.ops.aten.add.Tensor)
+        def second_replace(
+            node: Node, exported_program: ExportedProgram
+        ) -> DialectNodeSpec | None:
+            del exported_program
+            return DialectNodeSpec(torch.ops.aten.mul.Tensor, node.args)
+
+
+def test_ensures_raises_when_call_function_count_changes() -> None:
+    class _TestAtenToDialectPass(AtenToDialectPass):
+        pass
+
+    exported_program = _export_add_model()
+    graph_module = exported_program.graph_module
+    test_pass = _TestAtenToDialectPass(exported_program=exported_program)
+    test_pass.requires(graph_module)
+
+    placeholders = [
+        node for node in graph_module.graph.nodes if node.op == "placeholder"
+    ]
+    output_node = next(node for node in graph_module.graph.nodes if node.op == "output")
+    with graph_module.graph.inserting_before(output_node):
+        graph_module.graph.create_node(
+            "call_function",
+            target=torch.ops.aten.sub.Tensor,
+            args=tuple(placeholders),
+            kwargs={},
+        )
+
+    with pytest.raises(RuntimeError, match="did not preserve"):
+        test_pass.ensures(graph_module)

From c8c04e4b6e3aa7b11574374484fb18c404daefc6 Mon Sep 17 00:00:00 2001
From: Hansong Zhang <107070759+kirklandsign@users.noreply.github.com>
Date: Thu, 28 May 2026 09:59:29 -0700
Subject: [PATCH 065/103] Remove `google-java-format` from CI lint
 infrastructure

Differential Revision: D106575515

Pull Request resolved: https://github.com/pytorch/executorch/pull/19831
---
 .ci/docker/common/install_linter.sh |  4 ---
 .github/workflows/lint.yml          | 46 -----------------------------
 2 files changed, 50 deletions(-)

diff --git a/.ci/docker/common/install_linter.sh b/.ci/docker/common/install_linter.sh
index 52d2d262685..4a796a72d54 100755
--- a/.ci/docker/common/install_linter.sh
+++ b/.ci/docker/common/install_linter.sh
@@ -13,7 +13,3 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 # NB: Install all linter dependencies, the caching of lintrunner init could be
 # done after Executorch becomes public
 pip_install -r requirements-lintrunner.txt
-
-# Install google-java-format
-curl -L --retry 3 --retry-all-errors https://github.com/google/google-java-format/releases/download/v1.23.0/google-java-format_linux-x86-64 > /opt/google-java-format
-chmod +x /opt/google-java-format
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index b26247d2333..b21cc527b8d 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -125,49 +125,3 @@ jobs:
     uses: ./.github/workflows/_link_check.yml
     with:
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-
-  android-java-format:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-
-      - uses: actions/setup-java@v4
-        with:
-          distribution: 'temurin'
-          java-version: '17'
-
-      - name: Check Java formatting
-        run: |
-          GOOGLE_JAVA_FORMAT_VERSION="1.24.0"
-          curl -sSfL "https://github.com/google/google-java-format/releases/download/v${GOOGLE_JAVA_FORMAT_VERSION}/google-java-format-${GOOGLE_JAVA_FORMAT_VERSION}-all-deps.jar" \
-            -o /tmp/google-java-format.jar
-
-          FILES_NEEDS_FORMAT=$(find extension/android/executorch_android/src/main/java/org/pytorch/executorch \
-                              extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm \
-                              extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations \
-                              extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch \
-                              extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench \
-                              extension/benchmark/android/benchmark/app/src/androidTest/java/org/pytorch/minibench \
-                              -type f -name "*.java" 2>/dev/null | \
-                              xargs -r java -jar /tmp/google-java-format.jar -n)
-
-          if [ -n "$FILES_NEEDS_FORMAT" ]; then
-            echo "Warning: The following files need formatting:"
-            echo "$FILES_NEEDS_FORMAT"
-            echo ""
-            echo "Please use google-java-format from https://github.com/google/google-java-format/releases/"
-            echo ""
-            echo "To fix, run one of these commands:"
-            echo "  # Using xargs (recommended):"
-            echo "  find <paths> -type f -name '*.java' | xargs google-java-format -i"
-            echo ""
-            echo "  # Or format specific files:"
-            echo "$FILES_NEEDS_FORMAT" | while IFS= read -r file; do
-              echo "  google-java-format -i \"$file\""
-            done
-            exit 1
-          fi

From 000d81029005954628a59cf86c292fefe7d04e85 Mon Sep 17 00:00:00 2001
From: Gasoonjia <gasoonjia@icloud.com>
Date: Thu, 28 May 2026 14:04:39 -0700
Subject: [PATCH 066/103] [ET Device Support] Define et_copy runtime h2d and
 d2h copy ops (#19858)

clone https://github.com/pytorch/executorch/pull/18729 due to bot crash
---
 backends/cuda/runtime/shims/tests/targets.bzl |  24 ++
 .../shims/tests/test_op__device_copy.cpp      | 195 ++++++++++++
 kernels/portable/cpu/op__device_copy.cpp      | 154 +++++++++
 kernels/portable/functions.yaml               |  10 +
 kernels/test/op__device_copy_test.cpp         | 297 ++++++++++++++++++
 kernels/test/targets.bzl                      |  14 +-
 shim_et/xplat/executorch/codegen/codegen.bzl  |   1 +
 .../kernels/portable/op_registration_util.bzl |   6 +
 8 files changed, 698 insertions(+), 3 deletions(-)
 create mode 100644 backends/cuda/runtime/shims/tests/test_op__device_copy.cpp
 create mode 100644 kernels/portable/cpu/op__device_copy.cpp
 create mode 100644 kernels/test/op__device_copy_test.cpp

diff --git a/backends/cuda/runtime/shims/tests/targets.bzl b/backends/cuda/runtime/shims/tests/targets.bzl
index b68043f7feb..a54c47e979d 100644
--- a/backends/cuda/runtime/shims/tests/targets.bzl
+++ b/backends/cuda/runtime/shims/tests/targets.bzl
@@ -42,3 +42,27 @@ def define_common_targets():
     cuda_shim_cpp_unittest("aoti_torch_new_tensor_handle")
     cuda_shim_cpp_unittest("aoti_torch_item_bool")
     cuda_shim_cpp_unittest("aoti_torch_assign_tensors_out")
+
+    cpp_unittest(
+        name = "test_op__device_copy",
+        srcs = ["test_op__device_copy.cpp"],
+        deps = [
+            "//executorch/backends/cuda/runtime:cuda_backend",
+            "//executorch/kernels/portable:generated_lib",
+            "//executorch/kernels/portable:generated_lib_headers",
+            "//executorch/kernels/portable/cpu:op__device_copy",
+            "//executorch/runtime/core:device_allocator",
+            "//executorch/runtime/core/exec_aten:lib",
+            "//executorch/runtime/core/portable_type:portable_type",
+            "//executorch/runtime/kernel:kernel_runtime_context",
+            "//executorch/runtime/platform:platform",
+        ],
+        external_deps = [
+            ("cuda", None, "cuda-lazy"),
+        ],
+        preprocessor_flags = ["-DCUDA_AVAILABLE=1"],
+        keep_gpu_sections = True,
+        remote_execution = re_test_utils.remote_execution(
+            platform = "gpu-remote-execution",
+        ),
+    )
diff --git a/backends/cuda/runtime/shims/tests/test_op__device_copy.cpp b/backends/cuda/runtime/shims/tests/test_op__device_copy.cpp
new file mode 100644
index 00000000000..4e5c5a099b7
--- /dev/null
+++ b/backends/cuda/runtime/shims/tests/test_op__device_copy.cpp
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cuda_runtime.h>
+#include <executorch/kernels/portable/Functions.h>
+#include <executorch/runtime/core/device_allocator.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/portable_type/tensor_impl.h>
+#include <executorch/runtime/kernel/kernel_runtime_context.h>
+#include <executorch/runtime/platform/runtime.h>
+#include <gtest/gtest.h>
+
+#if (defined(__has_feature) && __has_feature(address_sanitizer)) || \
+    defined(__SANITIZE_ADDRESS__)
+#include <sanitizer/lsan_interface.h>
+#define EXECUTORCH_CUDA_DEVICE_COPY_HAS_LSAN_INTERFACE 1
+#else
+#define EXECUTORCH_CUDA_DEVICE_COPY_HAS_LSAN_INTERFACE 0
+#endif
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using executorch::aten::TensorImpl;
+using executorch::runtime::Error;
+using executorch::runtime::get_device_allocator;
+using executorch::runtime::KernelRuntimeContext;
+using executorch::runtime::TensorShapeDynamism;
+using executorch::runtime::etensor::DeviceIndex;
+using executorch::runtime::etensor::DeviceType;
+
+namespace {
+
+struct CudaDeleter {
+  void operator()(void* ptr) const {
+    if (ptr != nullptr) {
+      cudaFree(ptr);
+    }
+  }
+};
+
+using CudaPtr = std::unique_ptr<void, CudaDeleter>;
+
+CudaPtr allocate_cuda(size_t nbytes) {
+  void* ptr = nullptr;
+  const cudaError_t err = cudaMalloc(&ptr, nbytes);
+  EXPECT_EQ(err, cudaSuccess) << "cudaMalloc failed";
+  return CudaPtr(ptr);
+}
+
+bool is_cuda_available() {
+#if EXECUTORCH_CUDA_DEVICE_COPY_HAS_LSAN_INTERFACE
+  __lsan_disable();
+#endif
+  int device_count = 0;
+  const cudaError_t err = cudaGetDeviceCount(&device_count);
+#if EXECUTORCH_CUDA_DEVICE_COPY_HAS_LSAN_INTERFACE
+  __lsan_enable();
+#endif
+  return err == cudaSuccess && device_count > 0;
+}
+
+std::vector<float> copy_cuda_to_host(const void* device_ptr, size_t numel) {
+  std::vector<float> host(numel);
+  const cudaError_t err = cudaMemcpy(
+      host.data(), device_ptr, numel * sizeof(float), cudaMemcpyDeviceToHost);
+  EXPECT_EQ(err, cudaSuccess) << "cudaMemcpy D2H failed";
+  return host;
+}
+
+void copy_host_to_cuda(const std::vector<float>& host, void* device_ptr) {
+  const cudaError_t err = cudaMemcpy(
+      device_ptr,
+      host.data(),
+      host.size() * sizeof(float),
+      cudaMemcpyHostToDevice);
+  EXPECT_EQ(err, cudaSuccess) << "cudaMemcpy H2D failed";
+}
+
+class CudaDeviceCopyOpTest : public ::testing::Test {
+ protected:
+  static void SetUpTestSuite() {
+    executorch::runtime::runtime_init();
+    ASSERT_NE(get_device_allocator(DeviceType::CUDA), nullptr)
+        << "Linking cuda_backend should auto-register the CUDA allocator";
+  }
+
+  void SetUp() override {
+    if (!is_cuda_available()) {
+      GTEST_SKIP() << "CUDA not available, skipping CUDA device copy op tests";
+    }
+  }
+
+  Tensor& op_h2d_copy_out(const Tensor& self, Tensor& out) {
+    return torch::executor::et_copy::_h2d_copy_outf(context_, self, out);
+  }
+
+  Tensor& op_d2h_copy_out(const Tensor& self, Tensor& out) {
+    return torch::executor::et_copy::_d2h_copy_outf(context_, self, out);
+  }
+
+  KernelRuntimeContext context_;
+};
+
+} // namespace
+
+TEST_F(CudaDeviceCopyOpTest, H2dCopyUsesRegisteredCudaAllocator) {
+  std::vector<float> src_data = {1.0f, 2.0f, 3.0f, 4.0f};
+  auto device_data = allocate_cuda(src_data.size() * sizeof(float));
+  ASSERT_NE(device_data.get(), nullptr);
+
+  int32_t sizes[] = {static_cast<int32_t>(src_data.size())};
+  uint8_t dim_order[] = {0};
+  int32_t strides[] = {1};
+
+  TensorImpl src_impl(
+      ScalarType::Float,
+      1,
+      sizes,
+      src_data.data(),
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CPU,
+      0);
+  Tensor src(&src_impl);
+
+  TensorImpl dst_impl(
+      ScalarType::Float,
+      1,
+      sizes,
+      device_data.get(),
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CUDA,
+      0);
+  Tensor dst(&dst_impl);
+
+  Tensor& result = op_h2d_copy_out(src, dst);
+
+  EXPECT_EQ(context_.failure_state(), Error::Ok);
+  EXPECT_EQ(&result, &dst);
+  EXPECT_EQ(copy_cuda_to_host(device_data.get(), src_data.size()), src_data);
+}
+
+TEST_F(CudaDeviceCopyOpTest, D2hCopyUsesRegisteredCudaAllocator) {
+  const std::vector<float> expected = {5.0f, 6.0f, 7.0f, 8.0f};
+  auto device_data = allocate_cuda(expected.size() * sizeof(float));
+  ASSERT_NE(device_data.get(), nullptr);
+  copy_host_to_cuda(expected, device_data.get());
+
+  std::vector<float> dst_data(expected.size(), 0.0f);
+  int32_t sizes[] = {static_cast<int32_t>(expected.size())};
+  uint8_t dim_order[] = {0};
+  int32_t strides[] = {1};
+
+  TensorImpl src_impl(
+      ScalarType::Float,
+      1,
+      sizes,
+      device_data.get(),
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CUDA,
+      0);
+  Tensor src(&src_impl);
+
+  TensorImpl dst_impl(
+      ScalarType::Float,
+      1,
+      sizes,
+      dst_data.data(),
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CPU,
+      0);
+  Tensor dst(&dst_impl);
+
+  Tensor& result = op_d2h_copy_out(src, dst);
+
+  EXPECT_EQ(context_.failure_state(), Error::Ok);
+  EXPECT_EQ(&result, &dst);
+  EXPECT_EQ(dst_data, expected);
+}
diff --git a/kernels/portable/cpu/op__device_copy.cpp b/kernels/portable/cpu/op__device_copy.cpp
new file mode 100644
index 00000000000..5e1a51a83be
--- /dev/null
+++ b/kernels/portable/cpu/op__device_copy.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/**
+ * Runtime kernels for et_copy._h2d_copy and et_copy._d2h_copy ops.
+ *
+ * These ops transfer tensor data between CPU and device memory using
+ * the DeviceAllocator interface. The device type is inferred from the
+ * tensor metadata (out.device_type() for H2D, self.device_type() for D2H),
+ * which was set during AOT serialization by PropagateDevicePass.
+ */
+
+#include <executorch/runtime/core/device_allocator.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace torch {
+namespace executor {
+namespace native {
+
+using Tensor = executorch::aten::Tensor;
+using DeviceAllocator = executorch::runtime::DeviceAllocator;
+using Error = executorch::runtime::Error;
+
+/**
+ * Copies tensor data from host (CPU) memory to device memory.
+ *
+ * self: source tensor on CPU
+ * out:  destination tensor on device (memory-planned by runtime)
+ *
+ * The device type and index are inferred from out's TensorImpl metadata.
+ */
+Tensor&
+_h2d_copy_out(KernelRuntimeContext& ctx, const Tensor& self, Tensor& out) {
+  auto device_type = out.unsafeGetTensorImpl()->device_type();
+  auto device_index = out.unsafeGetTensorImpl()->device_index();
+
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      self.unsafeGetTensorImpl()->device_type() ==
+          executorch::runtime::etensor::DeviceType::CPU,
+      InvalidArgument,
+      out,
+      "_h2d_copy: source tensor must be on CPU, got device_type=%d",
+      static_cast<int>(self.unsafeGetTensorImpl()->device_type()));
+
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      device_type != executorch::runtime::etensor::DeviceType::CPU,
+      InvalidArgument,
+      out,
+      "_h2d_copy: destination tensor must be on a non-CPU device");
+
+  auto nbytes = self.nbytes();
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      nbytes == out.nbytes(),
+      InvalidArgument,
+      out,
+      "_h2d_copy: size mismatch: self.nbytes()=%zu, out.nbytes()=%zu",
+      nbytes,
+      out.nbytes());
+
+  DeviceAllocator* allocator =
+      executorch::runtime::get_device_allocator(device_type);
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      allocator != nullptr,
+      NotFound,
+      out,
+      "_h2d_copy: no device allocator registered for device_type=%d",
+      static_cast<int>(device_type));
+
+  Error err = allocator->copy_host_to_device(
+      out.mutable_data_ptr(), self.const_data_ptr(), nbytes, device_index);
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      err == Error::Ok,
+      Internal,
+      out,
+      "_h2d_copy: copy_host_to_device failed");
+
+  return out;
+}
+
+/**
+ * Copies tensor data from device memory to host (CPU) memory.
+ *
+ * self: source tensor on device
+ * out:  destination tensor on CPU (memory-planned by runtime)
+ *
+ * The device type and index are inferred from self's TensorImpl metadata.
+ */
+Tensor&
+_d2h_copy_out(KernelRuntimeContext& ctx, const Tensor& self, Tensor& out) {
+  auto device_type = self.unsafeGetTensorImpl()->device_type();
+  auto device_index = self.unsafeGetTensorImpl()->device_index();
+
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      device_type != executorch::runtime::etensor::DeviceType::CPU,
+      InvalidArgument,
+      out,
+      "_d2h_copy: source tensor must be on a non-CPU device");
+
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      out.unsafeGetTensorImpl()->device_type() ==
+          executorch::runtime::etensor::DeviceType::CPU,
+      InvalidArgument,
+      out,
+      "_d2h_copy: destination tensor must be on CPU, got device_type=%d",
+      static_cast<int>(out.unsafeGetTensorImpl()->device_type()));
+
+  auto nbytes = self.nbytes();
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      nbytes == out.nbytes(),
+      InvalidArgument,
+      out,
+      "_d2h_copy: size mismatch: self.nbytes()=%zu, out.nbytes()=%zu",
+      nbytes,
+      out.nbytes());
+
+  DeviceAllocator* allocator =
+      executorch::runtime::get_device_allocator(device_type);
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      allocator != nullptr,
+      NotFound,
+      out,
+      "_d2h_copy: no device allocator registered for device_type=%d",
+      static_cast<int>(device_type));
+
+  Error err = allocator->copy_device_to_host(
+      out.mutable_data_ptr(), self.const_data_ptr(), nbytes, device_index);
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      err == Error::Ok,
+      Internal,
+      out,
+      "_d2h_copy: copy_device_to_host failed");
+
+  return out;
+}
+
+} // namespace native
+} // namespace executor
+} // namespace torch
diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml
index 620d97d050f..ecf62ee3606 100644
--- a/kernels/portable/functions.yaml
+++ b/kernels/portable/functions.yaml
@@ -1045,6 +1045,16 @@
     - arg_meta: null
       kernel_name: torch::executor::zeros_out
 
+- func: et_copy::_h2d_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::_h2d_copy_out
+
+- func: et_copy::_d2h_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::_d2h_copy_out
+
 - func: dim_order_ops::_empty_dim_order.out(int[] size, *, int[]? dim_order=None, Tensor(a!) out) -> Tensor(a!)
   kernels:
     - arg_meta: null
diff --git a/kernels/test/op__device_copy_test.cpp b/kernels/test/op__device_copy_test.cpp
new file mode 100644
index 00000000000..d345642bd37
--- /dev/null
+++ b/kernels/test/op__device_copy_test.cpp
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/**
+ * Tests for et_copy._h2d_copy.out and et_copy._d2h_copy.out runtime kernels.
+ *
+ * Uses a MockDeviceAllocator to verify that the kernels correctly call
+ * copy_host_to_device / copy_device_to_host via the DeviceAllocator interface,
+ * and that device type is inferred from tensor metadata.
+ */
+
+#include <gtest/gtest.h>
+
+#include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
+#include <executorch/kernels/test/TestUtil.h>
+#include <executorch/runtime/core/device_allocator.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/portable_type/tensor_impl.h>
+#include <executorch/runtime/platform/runtime.h>
+
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using executorch::aten::TensorImpl;
+using executorch::runtime::DeviceAllocator;
+using executorch::runtime::Error;
+using executorch::runtime::get_device_allocator;
+using executorch::runtime::register_device_allocator;
+using executorch::runtime::Result;
+using executorch::runtime::etensor::DeviceIndex;
+using executorch::runtime::etensor::DeviceType;
+
+using TensorShapeDynamism = executorch::runtime::TensorShapeDynamism;
+
+namespace {
+
+class MockDeviceAllocator : public DeviceAllocator {
+ public:
+  Result<void*> allocate(
+      size_t nbytes,
+      DeviceIndex index,
+      size_t alignment = kDefaultAlignment) override {
+    return Error::NotSupported;
+  }
+
+  void deallocate(void* ptr, DeviceIndex index) override {}
+
+  Error copy_host_to_device(
+      void* dst,
+      const void* src,
+      size_t nbytes,
+      DeviceIndex index) override {
+    h2d_call_count_++;
+    last_h2d_nbytes_ = nbytes;
+    last_h2d_device_index_ = index;
+    // Actually copy so we can verify data
+    std::memcpy(dst, src, nbytes);
+    return Error::Ok;
+  }
+
+  Error copy_device_to_host(
+      void* dst,
+      const void* src,
+      size_t nbytes,
+      DeviceIndex index) override {
+    d2h_call_count_++;
+    last_d2h_nbytes_ = nbytes;
+    last_d2h_device_index_ = index;
+    std::memcpy(dst, src, nbytes);
+    return Error::Ok;
+  }
+
+  DeviceType device_type() const override {
+    return DeviceType::CUDA;
+  }
+
+  int h2d_call_count_ = 0;
+  int d2h_call_count_ = 0;
+  size_t last_h2d_nbytes_ = 0;
+  size_t last_d2h_nbytes_ = 0;
+  DeviceIndex last_h2d_device_index_ = -1;
+  DeviceIndex last_d2h_device_index_ = -1;
+};
+
+} // namespace
+
+static MockDeviceAllocator g_mock_cuda;
+
+class OpDeviceCopyTest : public OperatorTest {
+ protected:
+  Tensor& op_h2d_copy_out(const Tensor& self, Tensor& out) {
+    return torch::executor::et_copy::_h2d_copy_outf(context_, self, out);
+  }
+
+  Tensor& op_d2h_copy_out(const Tensor& self, Tensor& out) {
+    return torch::executor::et_copy::_d2h_copy_outf(context_, self, out);
+  }
+
+  static void SetUpTestSuite() {
+    executorch::runtime::runtime_init();
+    if (get_device_allocator(DeviceType::CUDA) == nullptr) {
+      register_device_allocator(&g_mock_cuda);
+    }
+  }
+
+  void SetUp() override {
+    OperatorTest::SetUp();
+    g_mock_cuda.h2d_call_count_ = 0;
+    g_mock_cuda.d2h_call_count_ = 0;
+    g_mock_cuda.last_h2d_nbytes_ = 0;
+    g_mock_cuda.last_d2h_nbytes_ = 0;
+    g_mock_cuda.last_h2d_device_index_ = -1;
+    g_mock_cuda.last_d2h_device_index_ = -1;
+  }
+};
+
+TEST_F(OpDeviceCopyTest, H2dCopyCopiesDataAndCallsAllocator) {
+  // Set up a CPU source tensor with known data.
+  float src_data[] = {1.0f, 2.0f, 3.0f, 4.0f};
+  int32_t sizes[] = {4};
+  uint8_t dim_order[] = {0};
+  int32_t strides[] = {1};
+  TensorImpl src_impl(
+      ScalarType::Float,
+      1,
+      sizes,
+      src_data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CPU,
+      0);
+  Tensor src(&src_impl);
+
+  // Set up a CUDA destination tensor (simulated with host memory).
+  float dst_data[] = {0.0f, 0.0f, 0.0f, 0.0f};
+  TensorImpl dst_impl(
+      ScalarType::Float,
+      1,
+      sizes,
+      dst_data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CUDA,
+      0);
+  Tensor dst(&dst_impl);
+
+  Tensor& result = op_h2d_copy_out(src, dst);
+
+  // Verify the allocator was called correctly.
+  EXPECT_EQ(g_mock_cuda.h2d_call_count_, 1);
+  EXPECT_EQ(g_mock_cuda.last_h2d_nbytes_, 4 * sizeof(float));
+  EXPECT_EQ(g_mock_cuda.last_h2d_device_index_, 0);
+
+  // Verify data was copied (mock does a real memcpy).
+  EXPECT_EQ(dst_data[0], 1.0f);
+  EXPECT_EQ(dst_data[1], 2.0f);
+  EXPECT_EQ(dst_data[2], 3.0f);
+  EXPECT_EQ(dst_data[3], 4.0f);
+
+  // Verify return value is the out tensor.
+  EXPECT_EQ(&result, &dst);
+}
+
+TEST_F(OpDeviceCopyTest, D2hCopyCopiesDataAndCallsAllocator) {
+  // Set up a CUDA source tensor with known data.
+  float src_data[] = {5.0f, 6.0f, 7.0f, 8.0f};
+  int32_t sizes[] = {4};
+  uint8_t dim_order[] = {0};
+  int32_t strides[] = {1};
+  TensorImpl src_impl(
+      ScalarType::Float,
+      1,
+      sizes,
+      src_data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CUDA,
+      0);
+  Tensor src(&src_impl);
+
+  // Set up a CPU destination tensor.
+  float dst_data[] = {0.0f, 0.0f, 0.0f, 0.0f};
+  TensorImpl dst_impl(
+      ScalarType::Float,
+      1,
+      sizes,
+      dst_data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CPU,
+      0);
+  Tensor dst(&dst_impl);
+
+  Tensor& result = op_d2h_copy_out(src, dst);
+
+  // Verify the allocator was called correctly.
+  EXPECT_EQ(g_mock_cuda.d2h_call_count_, 1);
+  EXPECT_EQ(g_mock_cuda.last_d2h_nbytes_, 4 * sizeof(float));
+  EXPECT_EQ(g_mock_cuda.last_d2h_device_index_, 0);
+
+  // Verify data was copied.
+  EXPECT_EQ(dst_data[0], 5.0f);
+  EXPECT_EQ(dst_data[1], 6.0f);
+  EXPECT_EQ(dst_data[2], 7.0f);
+  EXPECT_EQ(dst_data[3], 8.0f);
+
+  EXPECT_EQ(&result, &dst);
+}
+
+TEST_F(OpDeviceCopyTest, H2dCopyWithDeviceIndex1) {
+  // Verify device_index is correctly forwarded to the allocator.
+  float src_data[] = {1.0f};
+  float dst_data[] = {0.0f};
+  int32_t sizes[] = {1};
+  uint8_t dim_order[] = {0};
+  int32_t strides[] = {1};
+
+  TensorImpl src_impl(
+      ScalarType::Float,
+      1,
+      sizes,
+      src_data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CPU,
+      0);
+  Tensor src(&src_impl);
+
+  // Device index = 1 (e.g., cuda:1)
+  TensorImpl dst_impl(
+      ScalarType::Float,
+      1,
+      sizes,
+      dst_data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CUDA,
+      1);
+  Tensor dst(&dst_impl);
+
+  op_h2d_copy_out(src, dst);
+
+  EXPECT_EQ(g_mock_cuda.h2d_call_count_, 1);
+  EXPECT_EQ(g_mock_cuda.last_h2d_device_index_, 1);
+}
+
+TEST_F(OpDeviceCopyTest, H2dCopyMultidimensionalTensor) {
+  // Test with a 2D tensor [2, 3].
+  float src_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+  float dst_data[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
+  int32_t sizes[] = {2, 3};
+  uint8_t dim_order[] = {0, 1};
+  int32_t strides[] = {3, 1};
+
+  TensorImpl src_impl(
+      ScalarType::Float,
+      2,
+      sizes,
+      src_data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CPU,
+      0);
+  Tensor src(&src_impl);
+
+  TensorImpl dst_impl(
+      ScalarType::Float,
+      2,
+      sizes,
+      dst_data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CUDA,
+      0);
+  Tensor dst(&dst_impl);
+
+  op_h2d_copy_out(src, dst);
+
+  EXPECT_EQ(g_mock_cuda.h2d_call_count_, 1);
+  EXPECT_EQ(g_mock_cuda.last_h2d_nbytes_, 6 * sizeof(float));
+
+  for (int i = 0; i < 6; ++i) {
+    EXPECT_EQ(dst_data[i], src_data[i]);
+  }
+}
diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl
index bc51e336cb8..5212d691c5b 100644
--- a/kernels/test/targets.bzl
+++ b/kernels/test/targets.bzl
@@ -1,14 +1,14 @@
 load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 load("@fbsource//xplat/executorch/kernels/test:util.bzl", "codegen_function_header_wrapper", "op_test")
 
-def _common_op_test(name, kernels):
+def _common_op_test(name, kernels, deps = []):
     """
     Defines test targets in format of <kernel>_op_<op-name>_test
     For ATen kernel testing, let's use portable functions.yaml for tested ops.
     """
     for kernel in kernels:
-        deps = [":function_header_wrapper_{}".format(kernel)]
-        op_test(name, kernel_name = kernel, use_kernel_prefix = True, deps = deps)
+        op_deps = [":function_header_wrapper_{}".format(kernel)] + deps
+        op_test(name, kernel_name = kernel, use_kernel_prefix = True, deps = op_deps)
 
 def define_common_targets():
     """Defines targets that should be shared between fbcode and xplat.
@@ -177,6 +177,14 @@ def define_common_targets():
     _common_op_test("op__clone_dim_order_test", ["aten", "portable"])
     _common_op_test("op__conj_physical_test", ["aten", "portable"])
     _common_op_test("op__adaptive_avg_pool2d_test", ["aten", "portable"])
+    _common_op_test(
+        "op__device_copy_test",
+        ["portable"],
+        deps = [
+            "//executorch/runtime/core:device_allocator",
+            "//executorch/runtime/platform:platform",
+        ],
+    )
     _common_op_test("op_abs_test", ["aten", "portable"])
     _common_op_test("op_acos_test", ["aten", "portable"])
     _common_op_test("op_acosh_test", ["aten", "portable"])
diff --git a/shim_et/xplat/executorch/codegen/codegen.bzl b/shim_et/xplat/executorch/codegen/codegen.bzl
index 5ffa7b65a36..318996784a1 100644
--- a/shim_et/xplat/executorch/codegen/codegen.bzl
+++ b/shim_et/xplat/executorch/codegen/codegen.bzl
@@ -535,6 +535,7 @@ def get_portable_lib_deps():
         "//executorch/kernels/portable/cpu:vec_ops",
         "//executorch/kernels/portable/cpu/pattern:all_deps",
         "//executorch/kernels/portable/cpu/util:all_deps",
+        "//executorch/runtime/core:device_allocator",
     ]
 
 def get_optimized_lib_deps():
diff --git a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl
index cc2a0f78c75..479f3913f8f 100644
--- a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl
+++ b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl
@@ -1405,6 +1405,12 @@ ATEN_OPS = (
             "//executorch/kernels/portable/cpu/util:copy_ops_util",
         ],
     ),
+    op_target(
+        name = "op__device_copy",
+        deps = [
+            "//executorch/runtime/core:device_allocator",
+        ],
+    ),
 )
 
 # Operators that are not listed in `functions.yaml` (i.e., operators listed in

From 42581f1b09167b8dbed119eabd240354bf8f6108 Mon Sep 17 00:00:00 2001
From: Mergen Nachin <mnachin@meta.com>
Date: Thu, 28 May 2026 17:44:19 -0400
Subject: [PATCH 067/103] =?UTF-8?q?Add=20GGUF=20=E2=86=92=20MLX=20export?=
 =?UTF-8?q?=20support=20for=20Gemma=204=2031B=20(#19829)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enable loading GGUF files (e.g. Q4_K_M) and exporting to the MLX
backend. Three areas of change:

GGUF loader (gguf_loader.py):
- Add MLX backend support alongside CUDA
- Keep embedding quantized for MLX (QuantizedEmbeddingHandler supports
  quantized gather natively, unlike CUDA's Int4Tensor)
- Fix stale docstring references to Int4TilePackedTo4dTensor/tinygemm

MLX backend (op_helpers.py, patterns.py):
- Accept group_size=16 in parse_dequant_node for GGUF Q6_K tensors
- For group_size < 32, emit DequantizeNode + TransposeNode + AddmmNode
  instead of QuantizedMatmulNode, since MLX Metal kernels are only
  instantiated for group_size >= 32. Weights stay packed as int8 in the
  .pte file and are dequantized on-device at runtime — same strategy
  CUDA/Inductor uses (separate Triton dequant + cuBLAS mm).

Packer (pack_mlx.py):
- Add 16 to supported group sizes so Q6_K IntxUnpackedToInt8Tensor
  passes through to export unchanged

Tests (test_ops.py):
- Add group_size=16 configs for int8, int4, and no-bias variants


Test Plan:

Export and run this model


https://huggingface.co/unsloth/gemma-4-31B-it-GGUF/blob/main/gemma-4-31B-it-Q4_K_M.gguf

On M1 32GB machine (exported on Linux A100)

```
(executorch_dev) mnachin@mnachin-mbp executorch % ./cmake-out/examples/models/gemma4_31b/gemma4_31b_runner \
    --model_path  /Users/mnachin/repos/models/gemma-4-31B-it-GGUF/model.pte \
    --tokenizer_path /Users/mnachin/repos/models/gemma-4-31B-it-HQQ-INT4/tokenizer.json \
    --prompt "Tell me a joke about RAM usage" \
    --max_new_tokens 128 \
    --temperature 0.8
I tokenizers:regex.cpp:27] Registering override fallback regex
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
E0000 00:00:1779926968.603672 54889180 re2.cc:237] Error parsing '((\<pad\>|ool\|\>1\x00\x00\
                                                                                             �\<t|respo|\<tool_call\|\>|\<bos\>|\<\|tool_response\>|\<\|think\|\>|\x0...': invalid UTF-8
I tokenizers:re2_regex.cpp:27] Re2 failed to compile regex: ((\<pad\>|ool\|\>1\x00\x00\
                                                                                       �\<t|respo|\<tool_call\|\>|\<bos\>|\<\|tool_response\>|\<\|think\|\>|\x00\x00\\\<|\<tool_response\|\>|\<mask\>|\<\|\"\|\>|all\|\>j\x00\x00\\|\<channel\|\>|\<\|turn\>|\<turn\|\>|\<\|image\>|\<\|$
I tokenizers:regex_lookahead.cpp:27] Creating PCRE2 regex
I tokenizers:pcre2_regex.cpp:48] PCRE2 UTF-8 validation failed at offset 27: UTF-8 error: byte 2 top bits not 0x80. Retrying without UTF flags.
Loading model...
Prompt tokens: 23
Why did the computer go to therapy?

Because it had too many **unresolved dependencies** and it just couldn't stop **dwelling on the past**... but it forgot everything the moment it took a nap.<turn|>
PyTorchObserver {"prefill_token_per_sec":2.49539,"decode_token_per_sec":0.0880671,"prompt_tokens":23,"generated_tokens":44,"model_load_start_ms":1779926968052,"model_load_end_ms":1779926982494,"inference_start_ms":1779926982497,"inference_end_ms":1779927491333,"prompt_eval_end_ms":1779926991714,"first_token_ms":1779926991714,"aggregate_sampling_time_ms":0,"SCALING_FACTOR_UNITS_PER_SECOND":1000}
```

For reference, here's the this model:
https://huggingface.co/SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4

```
(executorch_dev) mnachin@mnachin-mbp executorch % ./cmake-out/examples/models/gemma4_31b/gemma4_31b_runner \
    --model_path  /Users/mnachin/repos/models/gemma-4-31B-it-HQQ-INT4/model.pte \
    --tokenizer_path /Users/mnachin/repos/models/gemma-4-31B-it-HQQ-INT4/tokenizer.json \
    --prompt "Tell me a joke about RAM usage" \
    --max_new_tokens 128 \
    --temperature 0.8
I tokenizers:regex.cpp:27] Registering override fallback regex
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
E0000 00:00:1779927592.109382 54914733 re2.cc:237] Error parsing '((\<pad\>|ool\|\>1\x00\x00\
                                                                                             �\<t|respo|\<tool_call\|\>|\<bos\>|\<\|tool_response\>|\<\|think\|\>|\x0...': invalid UTF-8
I tokenizers:re2_regex.cpp:27] Re2 failed to compile regex: ((\<pad\>|ool\|\>1\x00\x00\
                                                                                       �\<t|respo|\<tool_call\|\>|\<bos\>|\<\|tool_response\>|\<\|think\|\>|\x00\x00\\\<|\<tool_response\|\>|\<mask\>|\<\|\"\|\>|all\|\>j\x00\x00\\|\<channel\|\>|\<\|turn\>|\<turn\|\>|\<\|image\>|\<\|$
I tokenizers:regex_lookahead.cpp:27] Creating PCRE2 regex
I tokenizers:pcre2_regex.cpp:48] PCRE2 UTF-8 validation failed at offset 27: UTF-8 error: byte 2 top bits not 0x80. Retrying without UTF flags.
Loading model...
Prompt tokens: 23
Why did the computer go to therapy?

Because it had too many **unresolved dependencies** and couldn't stop **dwelling on the past**, but it still couldn't remember why it was there.

***

Alternatively, a shorter one:

**Why was the RAM so stressed?**
Because it had too much on its mind, but it knew that as soon as it slept, it would forget everything.<turn|>
PyTorchObserver {"prefill_token_per_sec":9.11975,"decode_token_per_sec":5.24998,"prompt_tokens":23,"generated_tokens":86,"model_load_start_ms":1779927591719,"model_load_end_ms":1779927603575,"inference_start_ms":1779927603579,"inference_end_ms":1779927622482,"prompt_eval_end_ms":1779927606101,"first_token_ms":1779927606101,"aggregate_sampling_time_ms":0,"SCALING_FACTOR_UNITS_PER_SECOND":1000}
```

There's definitely performance degradation when running GGUF
---
 .github/workflows/mlx.yml                     |  4 +
 backends/mlx/builder/op_helpers.py            |  2 +-
 backends/mlx/patterns.py                      | 79 ++++++++++++++++---
 backends/mlx/test/test_ops.py                 | 14 ++++
 examples/models/gemma4_31b/README.md          |  1 +
 examples/models/gemma4_31b/export.py          |  7 +-
 examples/models/gemma4_31b/gguf_loader.py     | 19 +++--
 examples/models/gemma4_31b/quant/README.md    |  2 -
 examples/models/gemma4_31b/quant/pack_mlx.py  |  6 +-
 .../gemma4_31b/quant/tests/test_pack_mlx.py   | 46 ++++++++++-
 .../gemma4_31b/tests/test_mlx_pipeline.py     | 79 +++++++++++++++++++
 11 files changed, 233 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/mlx.yml b/.github/workflows/mlx.yml
index c4be146f862..027101ba7f0 100644
--- a/.github/workflows/mlx.yml
+++ b/.github/workflows/mlx.yml
@@ -47,6 +47,10 @@ jobs:
 
         ${CONDA_RUN} pip list
 
+        echo "::group::Install Python test requirements"
+        ${CONDA_RUN} pip install gguf
+        echo "::endgroup::"
+
         echo "::group::Build test runners"
         ${CONDA_RUN} cmake --build cmake-out --target op_test_runner multi_thread_test_runner -j$(( $(sysctl -n hw.ncpu) - 1 ))
         echo "::endgroup::"
diff --git a/backends/mlx/builder/op_helpers.py b/backends/mlx/builder/op_helpers.py
index 40e71e0bdab..7740546cc2c 100644
--- a/backends/mlx/builder/op_helpers.py
+++ b/backends/mlx/builder/op_helpers.py
@@ -334,7 +334,7 @@ def parse_dequant_node(
     if len(non_one) != 1:
         return None
     quantized_dim, group_size = non_one[0]
-    if group_size not in [32, 64, 128]:
+    if group_size not in [16, 32, 64, 128]:
         return None
 
     # TODO: MLX supports 3, 5, and 7, but we need to figure out the
diff --git a/backends/mlx/patterns.py b/backends/mlx/patterns.py
index 29e5e326c69..5f74cbea643 100644
--- a/backends/mlx/patterns.py
+++ b/backends/mlx/patterns.py
@@ -15,6 +15,7 @@
 
 from __future__ import annotations
 
+import os
 from typing import Any, List, Optional, Tuple
 
 import torch
@@ -37,6 +38,7 @@
 )
 from executorch.backends.mlx.serialization.mlx_graph_schema import (
     AddIntNode,
+    AddmmNode,
     AddNode,
     AsTypeNode,
     DequantizeNode,
@@ -52,6 +54,7 @@
     SubtractIntNode,
     SymSizeNode,
     TakeNode,
+    TransposeNode,
 )
 from torch.export.exported_program import ExportedProgram
 from torch.fx.node import Node
@@ -883,6 +886,18 @@ def maybe_create(
             out_dtype=out_dtype,
         )
 
+    # MLX's quantized_matmul Metal kernels are only instantiated for
+    # group_size in {32, 64, 128}. For smaller group sizes (e.g. GGUF
+    # Q6_K with group_size=16), emit DequantizeNode + matmul instead.
+    # Weights stay packed in the .pte file; dequantized on-device.
+    # This non-fused path is significantly slower and must be opted in
+    # via ET_MLX_ALLOW_NON_FUSED_QUANTIZED_OPS=1.
+    _MIN_FUSED_GROUP_SIZE = 32
+
+    @staticmethod
+    def _allow_non_fused() -> bool:
+        return os.environ.get("ET_MLX_ALLOW_NON_FUSED_QUANTIZED_OPS", "0") == "1"
+
     def __call__(self, P: MLXProgramBuilder, n: Node) -> Slot:
         assert n == self.head
 
@@ -908,19 +923,59 @@ def __call__(self, P: MLXProgramBuilder, n: Node) -> Slot:
         x_dtype = x_node.meta["val"].dtype
         needs_cast = self.out_dtype != x_dtype
 
-        P.emit(
-            QuantizedMatmulNode(
-                x=P.slot_to_tid(x_slot),
-                w=P.slot_to_tid(w),
-                scales=P.slot_to_tid(scale_slot),
-                out=P.slot_to_tid(out),
-                biases=P.slot_to_tid(biases),
-                group_size=self.group_size,
-                bits=self.bits,
-                mode="affine",
-                transpose=True,
+        if self.group_size >= self._MIN_FUSED_GROUP_SIZE:
+            P.emit(
+                QuantizedMatmulNode(
+                    x=P.slot_to_tid(x_slot),
+                    w=P.slot_to_tid(w),
+                    scales=P.slot_to_tid(scale_slot),
+                    out=P.slot_to_tid(out),
+                    biases=P.slot_to_tid(biases),
+                    group_size=self.group_size,
+                    bits=self.bits,
+                    mode="affine",
+                    transpose=True,
+                )
             )
-        )
+        else:
+            if not self._allow_non_fused():
+                raise ValueError(
+                    f"Quantized linear with group_size={self.group_size} requires "
+                    f"the non-fused dequantize+matmul path, which is significantly "
+                    f"slower than the fused QuantizedMatmulNode (group_size >= 32). "
+                    f"Set ET_MLX_ALLOW_NON_FUSED_QUANTIZED_OPS=1 to allow this."
+                )
+            out_scalar_type = torch_dtype_to_scalar_type(self.out_dtype)
+            _, w_deq = P.make_tmp_slot()
+            P.emit(
+                DequantizeNode(
+                    w=P.slot_to_tid(w),
+                    scales=P.slot_to_tid(scale_slot),
+                    out=P.slot_to_tid(w_deq),
+                    biases=P.slot_to_tid(biases),
+                    group_size=self.group_size,
+                    bits=self.bits,
+                    mode="affine",
+                    dtype=out_scalar_type,
+                )
+            )
+            _, w_t = P.make_tmp_slot()
+            P.emit(
+                TransposeNode(
+                    x=P.slot_to_tid(w_deq),
+                    out=P.slot_to_tid(w_t),
+                    perm=[1, 0],
+                )
+            )
+            P.emit(
+                AddmmNode(
+                    mat1=P.slot_to_tid(x_slot),
+                    mat2=P.slot_to_tid(w_t),
+                    out=P.slot_to_tid(out),
+                )
+            )
+            # DequantizeNode already produces the correct dtype.
+            needs_cast = False
 
         if has_bias:
             P.emit(
diff --git a/backends/mlx/test/test_ops.py b/backends/mlx/test/test_ops.py
index 4471610519e..45ea024f0e8 100644
--- a/backends/mlx/test/test_ops.py
+++ b/backends/mlx/test/test_ops.py
@@ -24,6 +24,7 @@
 See README.md in this directory for full documentation.
 """
 
+import os
 from typing import Callable, Dict, List, Optional, Tuple
 
 import torch
@@ -5621,8 +5622,21 @@ def get_test_configs(cls) -> List["QuantizedLinearTest"]:
             cls(group_size=128),
             cls(qdtype=torch.int2),
             cls(qdtype=torch.int8),
+            # group_size=16: exercises the non-fused dequantize+matmul path
+            # (requires ET_MLX_ALLOW_NON_FUSED_QUANTIZED_OPS=1).
+            cls(qdtype=torch.int8, group_size=16),
+            cls(qdtype=torch.int4, group_size=16),
+            cls(qdtype=torch.int8, group_size=16, bias=False),
         ]
 
+    def generate_test_files(self, verbose=False):
+        if self.group_size < 32:
+            os.environ["ET_MLX_ALLOW_NON_FUSED_QUANTIZED_OPS"] = "1"
+        try:
+            return super().generate_test_files(verbose=verbose)
+        finally:
+            os.environ.pop("ET_MLX_ALLOW_NON_FUSED_QUANTIZED_OPS", None)
+
     def create_model(self) -> nn.Module:
         model = LinearModel(self.in_features, self.out_features, bias=self.bias)
         model = model.to(self.dtype)
diff --git a/examples/models/gemma4_31b/README.md b/examples/models/gemma4_31b/README.md
index da4aa893079..c6ac10748d8 100644
--- a/examples/models/gemma4_31b/README.md
+++ b/examples/models/gemma4_31b/README.md
@@ -15,6 +15,7 @@ both export and eager inference:
 |---|---|---|
 | `quantize_and_save.py` | bf16 HF checkpoint → quantized checkpoint (one-time) | ~30 GB CPU |
 | `export.py --prequantized <dir>` | quantized checkpoint → `model.pte` + `model.ptd` | ~24 GB CPU + CUDA for packing |
+| `export.py --gguf <file> [--backend mlx]` | GGUF file (Q4_K_M, etc.) → `model.pte` + `model.ptd` | ~24 GB CPU |
 | `inference.py --prequantized <dir>` | quantized checkpoint → eager generation under `torch.compile` | ~24 GB GPU |
 | `inference.py --gguf <file>` | GGUF file (Q4_K_M, etc.) → eager generation | ~24 GB GPU |
 | `export.py --model-dir <hf>` | one-shot bf16 → quantize → export (no intermediate file) | ~30 GB CPU + CUDA for packing |
diff --git a/examples/models/gemma4_31b/export.py b/examples/models/gemma4_31b/export.py
index 046e365947b..bd648f534b5 100644
--- a/examples/models/gemma4_31b/export.py
+++ b/examples/models/gemma4_31b/export.py
@@ -443,7 +443,12 @@ def main() -> None:
             backend=args.backend,
         )
 
-    export_and_lower(model, config, args.output_dir, backend=args.backend)
+    if args.gguf and args.backend == "mlx":
+        os.environ["ET_MLX_ALLOW_NON_FUSED_QUANTIZED_OPS"] = "1"
+    try:
+        export_and_lower(model, config, args.output_dir, backend=args.backend)
+    finally:
+        os.environ.pop("ET_MLX_ALLOW_NON_FUSED_QUANTIZED_OPS", None)
 
 
 if __name__ == "__main__":
diff --git a/examples/models/gemma4_31b/gguf_loader.py b/examples/models/gemma4_31b/gguf_loader.py
index 3e50991e553..35dddb5a0dc 100644
--- a/examples/models/gemma4_31b/gguf_loader.py
+++ b/examples/models/gemma4_31b/gguf_loader.py
@@ -12,6 +12,7 @@
 
 Usage:
     model, config = load_gguf_model("model.gguf", backend="cuda")
+    model, config = load_gguf_model("model.gguf", backend="mlx")
 """
 
 from typing import Optional
@@ -104,10 +105,11 @@ def load_gguf_model(
     Streams tensors one at a time for low peak memory.
 
     GGUF ties ``embed_tokens`` and ``lm_head`` into a single Q4_K tensor.
-    We untie them: the embedding is dequantized to bf16 (``nn.Embedding``
-    needs gather, which ``Int4TilePackedTo4dTensor`` does not support),
-    while ``lm_head`` keeps the original Q4_K quantization (``nn.Linear``
-    matmul via tinygemm).
+    We untie them so ``lm_head`` keeps the original Q4_K quantization.
+    On CUDA, the embedding is dequantized to bf16 because ``Int4Tensor``
+    does not support the gather op that ``nn.Embedding`` requires.  On
+    MLX, the embedding stays quantized — ``QuantizedEmbeddingHandler``
+    handles quantized gather natively.
 
     Returns ``(model, config)``.
     """
@@ -120,8 +122,12 @@ def load_gguf_model(
         from executorch.examples.models.gemma4_31b.quant import DEFAULT_CUDA_PACKERS
 
         packers = DEFAULT_CUDA_PACKERS
+    elif backend == "mlx":
+        from executorch.examples.models.gemma4_31b.quant import DEFAULT_MLX_PACKERS
+
+        packers = DEFAULT_MLX_PACKERS
     else:
-        raise ValueError(f"Unsupported backend: {backend!r}. Supported: 'cuda'.")
+        raise ValueError(f"Unsupported backend: {backend!r}. Supported: 'cuda', 'mlx'.")
 
     config = Gemma4_31BConfig(max_seq_len=max_seq_len)
 
@@ -143,7 +149,8 @@ def load_gguf_model(
 
         if model_key == "embed_tokens.weight" and isinstance(result, Int4Tensor):
             embed_quant = result
-            result = dequantize_weight(result, torch.bfloat16)
+            if backend == "cuda":
+                result = dequantize_weight(result, torch.bfloat16)
 
         pack_one(model, model_key, result, packers)
 
diff --git a/examples/models/gemma4_31b/quant/README.md b/examples/models/gemma4_31b/quant/README.md
index 2eacced4387..92ddbf97243 100644
--- a/examples/models/gemma4_31b/quant/README.md
+++ b/examples/models/gemma4_31b/quant/README.md
@@ -50,5 +50,3 @@ The format is compatible with torchao's `save_pretrained` / `load_pretrained`.
 
 - `pack_metal.py` — Metal backend packer.
 - `gguf.py` — extend with Q5_K, Q8_0 GGUF quant types.
-- Upstream `Int4TilePackedTo4dTensor.from_int4_tensor()` to torchao
-  to replace the manual conversion in `pack_int4_for_cuda`.
diff --git a/examples/models/gemma4_31b/quant/pack_mlx.py b/examples/models/gemma4_31b/quant/pack_mlx.py
index 63aeca426a8..d627c9c437c 100644
--- a/examples/models/gemma4_31b/quant/pack_mlx.py
+++ b/examples/models/gemma4_31b/quant/pack_mlx.py
@@ -22,7 +22,7 @@
 
 from .pack import ModulePackerFn, pack_model  # noqa: F401
 
-_MLX_SUPPORTED_GROUP_SIZES = (128, 64, 32)
+_MLX_SUPPORTED_GROUP_SIZES = (128, 64, 32, 16)
 
 
 # ---------------------------------------------------------------------------
@@ -126,7 +126,9 @@ def pack_for_mlx(module: nn.Module, weights: dict[str, torch.Tensor]) -> None:
     default dispatch produces the ``dequantize_affine → linear`` pattern
     MLX expects.  Regroups to a compatible group_size when needed (e.g.
     per-axis group_size=5376 → group_size=128) since MLX's
-    ``parse_dequant_node`` only accepts group_size in {32, 64, 128}.
+    ``parse_dequant_node`` only accepts group_size in {16, 32, 64, 128}.
+    Group sizes ≥ 32 use the fused ``QuantizedMatmulNode``; group_size=16
+    (e.g. GGUF Q6_K) falls back to ``DequantizeNode`` + matmul at export.
     """
     from torchao.quantization import IntxUnpackedToInt8Tensor
     from torchao.quantization.quantize_.workflows.int4.int4_tensor import Int4Tensor
diff --git a/examples/models/gemma4_31b/quant/tests/test_pack_mlx.py b/examples/models/gemma4_31b/quant/tests/test_pack_mlx.py
index ffb2e0e2dd3..2e6310b9c10 100644
--- a/examples/models/gemma4_31b/quant/tests/test_pack_mlx.py
+++ b/examples/models/gemma4_31b/quant/tests/test_pack_mlx.py
@@ -146,7 +146,7 @@ def test_regroup_preserves_dequant(self):
 
 class TestMlxGroupSize(unittest.TestCase):
     def test_passthrough(self):
-        for gs in (32, 64, 128):
+        for gs in (16, 32, 64, 128):
             self.assertEqual(_mlx_group_size(gs, 256), gs)
 
     def test_regroup_5376(self):
@@ -157,7 +157,49 @@ def test_regroup_256(self):
 
     def test_rejects_indivisible(self):
         with self.assertRaises(ValueError):
-            _mlx_group_size(48, 48)
+            _mlx_group_size(7, 7)
+
+
+class TestPackLinearGroupSize16(unittest.TestCase):
+    """Packing group_size=16 weights (GGUF Q6_K) preserves semantics."""
+
+    def _make_gs16_tensor(self, N=64, K=128):
+        from torchao.quantization import IntxUnpackedToInt8Tensor
+
+        return IntxUnpackedToInt8Tensor(
+            qdata=torch.randint(-32, 31, (N, K), dtype=torch.int8),
+            scale=torch.randn(N, K // 16, dtype=torch.bfloat16),
+            zero_point=torch.zeros(N, K // 16, dtype=torch.int8),
+            target_dtype=torch.int8,
+            block_size=(1, 16),
+            dtype=torch.bfloat16,
+            activation_quantization=None,
+        )
+
+    def test_dequant_preserves_values(self):
+        """Packing preserves the dequantized weight values."""
+        w = self._make_gs16_tensor(64, 128)
+        before = dequantize_weight(w, torch.float32)
+
+        module = nn.Linear(128, 64, bias=False)
+        pack_for_mlx(module, {"weight": w})
+        after = dequantize_weight(module.weight.data, torch.float32)
+
+        self.assertTrue(
+            torch.allclose(before, after, atol=1e-5),
+            f"max diff: {(before - after).abs().max():.6g}",
+        )
+
+    def test_forward_produces_valid_output(self):
+        """Packed gs=16 weight produces finite output in a linear forward."""
+        w = self._make_gs16_tensor(64, 128)
+        module = nn.Linear(128, 64, bias=False)
+        pack_for_mlx(module, {"weight": w})
+
+        x = torch.randn(1, 128, dtype=torch.bfloat16)
+        out = torch.nn.functional.linear(x, module.weight.data.dequantize())
+        self.assertEqual(out.shape, torch.Size([1, 64]))
+        self.assertFalse(torch.isnan(out).any())
 
 
 class TestPackEmbeddingForMlx(unittest.TestCase):
diff --git a/examples/models/gemma4_31b/tests/test_mlx_pipeline.py b/examples/models/gemma4_31b/tests/test_mlx_pipeline.py
index 0e62ab88e4b..37f61fddb0f 100644
--- a/examples/models/gemma4_31b/tests/test_mlx_pipeline.py
+++ b/examples/models/gemma4_31b/tests/test_mlx_pipeline.py
@@ -244,5 +244,84 @@ def test_export_to_pte(self):
             self.assertTrue(os.path.exists(os.path.join(out_dir, "model.pte")))
 
 
+class TestGgufMlxPipeline(unittest.TestCase):
+    """Test GGUF → MLX loading path with synthetic Q6_K-like tensors."""
+
+    def test_load_gguf_model_mlx_backend(self):
+        """gguf_loader.load_gguf_model accepts backend='mlx'."""
+        try:
+            import gguf  # noqa: F401
+        except ModuleNotFoundError:
+            self.skipTest("gguf package not installed")
+
+        from executorch.examples.models.gemma4_31b.gguf_loader import load_gguf_model
+
+        # Will fail on missing file, but NOT on "Unsupported backend".
+        with self.assertRaisesRegex((FileNotFoundError, OSError, RuntimeError), ".*"):
+            load_gguf_model("/nonexistent.gguf", backend="mlx")
+
+    def test_mlx_backend_rejects_unknown(self):
+        from executorch.examples.models.gemma4_31b.gguf_loader import load_gguf_model
+
+        with self.assertRaisesRegex(ValueError, "Unsupported backend"):
+            load_gguf_model("/nonexistent.gguf", backend="tpu")
+
+    def test_gs16_packing_preserves_values(self):
+        """Q6_K-like weight (gs=16) preserves dequantized values after packing."""
+        from executorch.examples.models.gemma4_31b.quant.pack_mlx import pack_for_mlx
+        from executorch.examples.models.gemma4_31b.quant.quantize import (
+            dequantize_weight,
+        )
+        from torchao.quantization import IntxUnpackedToInt8Tensor
+
+        w = IntxUnpackedToInt8Tensor(
+            qdata=torch.randint(-32, 31, (64, 128), dtype=torch.int8),
+            scale=torch.randn(64, 8, dtype=torch.bfloat16),
+            zero_point=torch.zeros(64, 8, dtype=torch.int8),
+            target_dtype=torch.int8,
+            block_size=(1, 16),
+            dtype=torch.bfloat16,
+            activation_quantization=None,
+        )
+        before = dequantize_weight(w, torch.float32)
+
+        module = nn.Linear(128, 64, bias=False)
+        pack_for_mlx(module, {"weight": w})
+        after = dequantize_weight(module.weight.data, torch.float32)
+
+        self.assertTrue(
+            torch.allclose(before, after, atol=1e-5),
+            f"max diff: {(before - after).abs().max():.6g}",
+        )
+
+    def test_embedding_packing_preserves_values(self):
+        """MLX embedding packing preserves dequantized weight values."""
+        from executorch.examples.models.gemma4_31b.quant.pack_mlx import pack_for_mlx
+        from executorch.examples.models.gemma4_31b.quant.quantize import (
+            dequantize_weight,
+        )
+        from torchao.quantization import IntxUnpackedToInt8Tensor
+
+        w = IntxUnpackedToInt8Tensor(
+            qdata=torch.randint(-8, 7, (256, 128), dtype=torch.int8),
+            scale=torch.randn(256, 4, dtype=torch.bfloat16),
+            zero_point=torch.zeros(256, 4, dtype=torch.bfloat16),
+            target_dtype=torch.int4,
+            block_size=(1, 32),
+            dtype=torch.bfloat16,
+            activation_quantization=None,
+        )
+        before = dequantize_weight(w, torch.float32)
+
+        module = nn.Embedding(256, 128)
+        pack_for_mlx(module, {"weight": w})
+        after = dequantize_weight(module.weight.data, torch.float32)
+
+        self.assertTrue(
+            torch.allclose(before, after, atol=1e-5),
+            f"max diff: {(before - after).abs().max():.6g}",
+        )
+
+
 if __name__ == "__main__":
     unittest.main()

From 9596866371dbabf763de063a5ab2fa00c5c3fe2e Mon Sep 17 00:00:00 2001
From: Siddartha Pothapragada <sidart@meta.com>
Date: Thu, 28 May 2026 17:38:40 -0700
Subject: [PATCH 068/103] Add ASR module and LoRA/dataFiles instrumentation
 tests (#19859)

Adds two new Android instrumentation test suites covering previously
untested API surfaces, completing feature testing coverage for OKR 3.2.

AsrModuleInstrumentationTest (18 tests): constructor validation,
lifecycle (close idempotency, use-after-close), transcribe validation,
and AsrTranscribeConfig builder/validation.

LlmLoraInstrumentationTest (13 tests): dataFiles constructor variants,
LlmModuleConfig with dataPath, invalid data file error handling,
baseline equivalence, and config builder validation.

  ## Test plan
  - [x] `./gradlew :executorch_android:connectedAndroidTest
-Pandroid.testInstrumentationRunnerArguments.class=org.pytorch.executor
  ch.AsrModuleInstrumentationTest`
  - [x] `./gradlew :executorch_android:connectedAndroidTest
-Pandroid.testInstrumentationRunnerArguments.class=org.pytorch.executor
  ch.LlmLoraInstrumentationTest`
  - [x] Verify all 31 new tests pass on emulator (API 34 x86_64)
  - [x] Verify existing tests are unaffected
---
 .../AsrModuleInstrumentationTest.kt           | 260 ++++++++++++++++
 .../executorch/LlmLoraInstrumentationTest.kt  | 291 ++++++++++++++++++
 2 files changed, 551 insertions(+)
 create mode 100644 extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/AsrModuleInstrumentationTest.kt
 create mode 100644 extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/LlmLoraInstrumentationTest.kt

diff --git a/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/AsrModuleInstrumentationTest.kt b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/AsrModuleInstrumentationTest.kt
new file mode 100644
index 00000000000..fe8a168e406
--- /dev/null
+++ b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/AsrModuleInstrumentationTest.kt
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+package org.pytorch.executorch
+
+import androidx.test.ext.junit.runners.AndroidJUnit4
+import java.io.File
+import java.io.IOException
+import org.apache.commons.io.FileUtils
+import org.junit.Assert.assertEquals
+import org.junit.Assert.assertFalse
+import org.junit.Assert.assertTrue
+import org.junit.Assert.fail
+import org.junit.Assume.assumeNotNull
+import org.junit.Test
+import org.junit.runner.RunWith
+import org.pytorch.executorch.TestFileUtils.getTestFilePath
+import org.pytorch.executorch.extension.asr.AsrCallback
+import org.pytorch.executorch.extension.asr.AsrModule
+import org.pytorch.executorch.extension.asr.AsrTranscribeConfig
+
+/**
+ * Instrumentation tests for [AsrModule], [AsrTranscribeConfig], and [AsrCallback].
+ *
+ * Tests cover:
+ * - Constructor validation (invalid model/tokenizer/preprocessor paths)
+ * - AsrTranscribeConfig builder and validation
+ * - Lifecycle (close idempotency, use-after-close)
+ * - Transcribe validation (invalid WAV path)
+ *
+ * The test fixture is the TinyStories-110M LLM model, NOT an ASR model, so functional transcription
+ * tests are not possible. Tests that require a valid AsrModule instance handle the case where
+ * nativeCreate fails (stories.pte lacks encoder/text_decoder methods).
+ */
+@RunWith(AndroidJUnit4::class)
+class AsrModuleInstrumentationTest {
+
+  // ─── Constructor validation ─────────────────────────────────────────────────
+
+  @Test(timeout = 30_000)
+  fun testInvalidModelPathThrows() {
+    try {
+      AsrModule("/nonexistent/model.pte", "/nonexistent/tokenizer")
+      fail("Should throw for invalid model path")
+    } catch (_: IllegalArgumentException) {
+      // Expected: require(modelFile.canRead() && modelFile.isFile)
+    }
+  }
+
+  @Test(timeout = 30_000)
+  fun testInvalidTokenizerPathThrows() {
+    val modelFile = provisionModelFile()
+    assumeNotNull("Test resource $MODEL_FILE_NAME not available", modelFile)
+    try {
+      AsrModule(modelFile!!.absolutePath, "/nonexistent/tokenizer")
+      fail("Should throw for invalid tokenizer path")
+    } catch (_: IllegalArgumentException) {
+      // Expected: require(tokenizerFile.exists())
+    }
+  }
+
+  @Test(timeout = 30_000)
+  fun testInvalidPreprocessorPathThrows() {
+    val modelFile = provisionModelFile()
+    val tokenizerFile = provisionTokenizerFile()
+    assumeNotNull("Test resource $MODEL_FILE_NAME not available", modelFile)
+    assumeNotNull("Test resource $TOKENIZER_FILE_NAME not available", tokenizerFile)
+    try {
+      AsrModule(
+          modelFile!!.absolutePath,
+          tokenizerFile!!.absolutePath,
+          preprocessorPath = "/nonexistent/preprocessor.pte",
+      )
+      fail("Should throw for invalid preprocessor path")
+    } catch (_: IllegalArgumentException) {
+      // Expected: require(preprocessorFile.canRead() && preprocessorFile.isFile)
+    }
+  }
+
+  @Test(timeout = 30_000)
+  fun testNonAsrModelFailsGracefully() {
+    val modelFile = provisionModelFile()
+    val tokenizerFile = provisionTokenizerFile()
+    assumeNotNull("Test resource $MODEL_FILE_NAME not available", modelFile)
+    assumeNotNull("Test resource $TOKENIZER_FILE_NAME not available", tokenizerFile)
+    try {
+      val module = AsrModule(modelFile!!.absolutePath, tokenizerFile!!.absolutePath)
+      // If construction succeeds (model was accepted), verify basic state
+      assertTrue("Module should be valid after construction", module.isValid)
+      module.close()
+    } catch (_: ExecutorchRuntimeException) {
+      // Expected: nativeCreate returns 0 for non-ASR model
+    } catch (_: RuntimeException) {
+      // Also acceptable: native layer rejects the model
+    }
+  }
+
+  // ─── Lifecycle ──────────────────────────────────────────────────────────────
+
+  @Test(timeout = 30_000)
+  fun testCloseIsIdempotent() {
+    val module = tryCreateAsrModule() ?: return
+    module.close()
+    module.close()
+    module.close()
+    assertFalse("isValid must be false after close", module.isValid)
+  }
+
+  @Test(timeout = 30_000)
+  fun testLoadAfterCloseThrows() {
+    val module = tryCreateAsrModule() ?: return
+    module.close()
+    try {
+      module.load()
+      fail("load() after close() must throw IllegalStateException")
+    } catch (_: IllegalStateException) {
+      // Expected
+    }
+  }
+
+  @Test(timeout = 30_000)
+  fun testTranscribeAfterCloseThrows() {
+    val module = tryCreateAsrModule() ?: return
+    module.close()
+    try {
+      module.transcribe("/some/audio.wav")
+      fail("transcribe() after close() must throw IllegalStateException")
+    } catch (_: IllegalStateException) {
+      // Expected
+    }
+  }
+
+  @Test(timeout = 30_000)
+  fun testIsValidAndIsLoadedState() {
+    val module = tryCreateAsrModule() ?: return
+    assertTrue("Module should be valid after construction", module.isValid)
+    module.close()
+    assertFalse("Module should not be valid after close", module.isValid)
+    assertFalse("Module should not be loaded after close", module.isLoaded)
+  }
+
+  // ─── Transcribe validation ──────────────────────────────────────────────────
+
+  @Test(timeout = 30_000)
+  fun testTranscribeInvalidWavPathThrows() {
+    val module = tryCreateAsrModule() ?: return
+    try {
+      module.transcribe("/nonexistent/audio.wav")
+      fail("transcribe() with invalid WAV path must throw")
+    } catch (_: IllegalArgumentException) {
+      // Expected: require(wavFile.canRead() && wavFile.isFile)
+    } finally {
+      module.close()
+    }
+  }
+
+  // ─── AsrTranscribeConfig ────────────────────────────────────────────────────
+
+  @Test
+  fun testConfigDefaults() {
+    val config = AsrTranscribeConfig()
+    assertEquals(128L, config.maxNewTokens)
+    assertEquals(0.0f, config.temperature, 0.0f)
+    assertEquals(0L, config.decoderStartTokenId)
+  }
+
+  @Test
+  fun testConfigBuilder() {
+    val config =
+        AsrTranscribeConfig.Builder()
+            .setMaxNewTokens(256)
+            .setTemperature(0.7f)
+            .setDecoderStartTokenId(50258)
+            .build()
+    assertEquals(256L, config.maxNewTokens)
+    assertEquals(0.7f, config.temperature, 0.001f)
+    assertEquals(50258L, config.decoderStartTokenId)
+  }
+
+  @Test
+  fun testConfigCustomValues() {
+    val config = AsrTranscribeConfig(maxNewTokens = 64, temperature = 0.5f, decoderStartTokenId = 1)
+    assertEquals(64L, config.maxNewTokens)
+    assertEquals(0.5f, config.temperature, 0.001f)
+    assertEquals(1L, config.decoderStartTokenId)
+  }
+
+  @Test(expected = IllegalArgumentException::class)
+  fun testConfigZeroMaxNewTokensThrows() {
+    AsrTranscribeConfig(maxNewTokens = 0)
+  }
+
+  @Test(expected = IllegalArgumentException::class)
+  fun testConfigNegativeMaxNewTokensThrows() {
+    AsrTranscribeConfig(maxNewTokens = -1)
+  }
+
+  @Test(expected = IllegalArgumentException::class)
+  fun testConfigNegativeTemperatureThrows() {
+    AsrTranscribeConfig(temperature = -0.1f)
+  }
+
+  @Test(expected = IllegalArgumentException::class)
+  fun testConfigBuilderZeroMaxNewTokensThrows() {
+    AsrTranscribeConfig.Builder().setMaxNewTokens(0).build()
+  }
+
+  @Test(expected = IllegalArgumentException::class)
+  fun testConfigBuilderNegativeTemperatureThrows() {
+    AsrTranscribeConfig.Builder().setTemperature(-1.0f).build()
+  }
+
+  @Test
+  fun testConfigDataClassEquality() {
+    val a = AsrTranscribeConfig(maxNewTokens = 100, temperature = 0.5f, decoderStartTokenId = 42)
+    val b = AsrTranscribeConfig(maxNewTokens = 100, temperature = 0.5f, decoderStartTokenId = 42)
+    assertEquals(a, b)
+    assertEquals(a.hashCode(), b.hashCode())
+  }
+
+  // ─── Helpers ────────────────────────────────────────────────────────────────
+
+  @Throws(IOException::class)
+  private fun provisionModelFile(): File? {
+    val pteFile = File(getTestFilePath(MODEL_FILE_NAME))
+    val stream = javaClass.getResourceAsStream(MODEL_FILE_NAME) ?: return null
+    stream.use { FileUtils.copyInputStreamToFile(it, pteFile) }
+    return pteFile
+  }
+
+  @Throws(IOException::class)
+  private fun provisionTokenizerFile(): File? {
+    val tokenizerFile = File(getTestFilePath(TOKENIZER_FILE_NAME))
+    val stream = javaClass.getResourceAsStream(TOKENIZER_FILE_NAME) ?: return null
+    stream.use { FileUtils.copyInputStreamToFile(it, tokenizerFile) }
+    return tokenizerFile
+  }
+
+  private fun tryCreateAsrModule(): AsrModule? {
+    val modelFile = provisionModelFile()
+    val tokenizerFile = provisionTokenizerFile()
+    assumeNotNull("Test resource $MODEL_FILE_NAME not available", modelFile)
+    assumeNotNull("Test resource $TOKENIZER_FILE_NAME not available", tokenizerFile)
+    return try {
+      AsrModule(modelFile!!.absolutePath, tokenizerFile!!.absolutePath)
+    } catch (_: RuntimeException) {
+      // nativeCreate may reject non-ASR models — skip lifecycle tests in that case
+      null
+    }
+  }
+
+  companion object {
+    private const val MODEL_FILE_NAME = "/stories.pte"
+    private const val TOKENIZER_FILE_NAME = "/tokenizer.bin"
+  }
+}
diff --git a/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/LlmLoraInstrumentationTest.kt b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/LlmLoraInstrumentationTest.kt
new file mode 100644
index 00000000000..a8d35b09de2
--- /dev/null
+++ b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/LlmLoraInstrumentationTest.kt
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+package org.pytorch.executorch
+
+import androidx.test.ext.junit.runners.AndroidJUnit4
+import java.io.File
+import java.io.IOException
+import org.apache.commons.io.FileUtils
+import org.junit.After
+import org.junit.Assert.assertTrue
+import org.junit.Assert.fail
+import org.junit.Before
+import org.junit.Test
+import org.junit.runner.RunWith
+import org.pytorch.executorch.TestFileUtils.getTestFilePath
+import org.pytorch.executorch.extension.llm.LlmCallback
+import org.pytorch.executorch.extension.llm.LlmModule
+import org.pytorch.executorch.extension.llm.LlmModuleConfig
+
+/**
+ * Instrumentation tests for LlmModule's LoRA / dataFiles constructor paths.
+ *
+ * LoRA adapters are loaded at construction time via the `dataFiles` parameter or
+ * `LlmModuleConfig.dataPath`. These tests verify that:
+ * 1. The dataFiles constructor variants produce a functional module
+ * 2. LlmModuleConfig with dataPath integrates correctly
+ * 3. Invalid data file paths are handled gracefully
+ * 4. Empty vs null dataFiles behave identically to no-data constructors
+ *
+ * Uses TinyStories-110M; no LoRA adapter fixture is available so functional LoRA tests
+ * (output-changes-with-adapter) are not possible.
+ */
+@RunWith(AndroidJUnit4::class)
+class LlmLoraInstrumentationTest {
+
+  private var llmModule: LlmModule? = null
+
+  @Before
+  @Throws(IOException::class)
+  fun setUp() {
+    val pteFile = File(getTestFilePath(MODEL_FILE_NAME))
+    requireNotNull(javaClass.getResourceAsStream(MODEL_FILE_NAME)) {
+          "Test resource $MODEL_FILE_NAME not found; did android_test_setup.sh run?"
+        }
+        .use { FileUtils.copyInputStreamToFile(it, pteFile) }
+
+    val tokenizerFile = File(getTestFilePath(TOKENIZER_FILE_NAME))
+    requireNotNull(javaClass.getResourceAsStream(TOKENIZER_FILE_NAME)) {
+          "Test resource $TOKENIZER_FILE_NAME not found; did android_test_setup.sh run?"
+        }
+        .use { FileUtils.copyInputStreamToFile(it, tokenizerFile) }
+  }
+
+  @After
+  fun tearDown() {
+    llmModule?.close()
+    llmModule = null
+  }
+
+  // ─── dataFiles constructor variants ─────────────────────────────────────────
+
+  @Test(timeout = MAX_TEST_TIMEOUT_MS)
+  fun testConstructorWithEmptyDataFilesList() {
+    llmModule =
+        LlmModule(
+            LlmModule.MODEL_TYPE_TEXT,
+            getTestFilePath(MODEL_FILE_NAME),
+            getTestFilePath(TOKENIZER_FILE_NAME),
+            0.0f,
+            emptyList<String>(),
+        )
+    val tokens = generateAndCollect(llmModule!!)
+    assertTrue("Module with empty dataFiles should generate tokens", tokens.isNotEmpty())
+  }
+
+  @Test(timeout = MAX_TEST_TIMEOUT_MS)
+  fun testConstructorWithNullDataPath() {
+    llmModule =
+        LlmModule(
+            LlmModule.MODEL_TYPE_TEXT,
+            getTestFilePath(MODEL_FILE_NAME),
+            getTestFilePath(TOKENIZER_FILE_NAME),
+            0.0f,
+            null as String?,
+        )
+    val tokens = generateAndCollect(llmModule!!)
+    assertTrue("Module with null dataPath should generate tokens", tokens.isNotEmpty())
+  }
+
+  @Test(timeout = MAX_TEST_TIMEOUT_MS)
+  fun testConstructorWithDataFilesAndBosEos() {
+    llmModule =
+        LlmModule(
+            LlmModule.MODEL_TYPE_TEXT,
+            getTestFilePath(MODEL_FILE_NAME),
+            getTestFilePath(TOKENIZER_FILE_NAME),
+            0.0f,
+            emptyList<String>(),
+            0,
+            0,
+        )
+    val tokens = generateAndCollect(llmModule!!)
+    assertTrue("Module with dataFiles+BOS/EOS should generate tokens", tokens.isNotEmpty())
+  }
+
+  // ─── LlmModuleConfig with dataPath ──────────────────────────────────────────
+
+  @Test(timeout = MAX_TEST_TIMEOUT_MS)
+  fun testLlmModuleConfigNoDataPath() {
+    val config =
+        LlmModuleConfig.create()
+            .modulePath(getTestFilePath(MODEL_FILE_NAME))
+            .tokenizerPath(getTestFilePath(TOKENIZER_FILE_NAME))
+            .temperature(0.0f)
+            .build()
+    llmModule = LlmModule(config)
+    val tokens = generateAndCollect(llmModule!!)
+    assertTrue("Module via config with no dataPath should generate tokens", tokens.isNotEmpty())
+  }
+
+  @Test(timeout = MAX_TEST_TIMEOUT_MS)
+  fun testLlmModuleConfigWithNullDataPath() {
+    val config =
+        LlmModuleConfig.create()
+            .modulePath(getTestFilePath(MODEL_FILE_NAME))
+            .tokenizerPath(getTestFilePath(TOKENIZER_FILE_NAME))
+            .temperature(0.0f)
+            .dataPath(null)
+            .build()
+    llmModule = LlmModule(config)
+    val tokens = generateAndCollect(llmModule!!)
+    assertTrue("Module via config with null dataPath should generate tokens", tokens.isNotEmpty())
+  }
+
+  @Test(timeout = MAX_TEST_TIMEOUT_MS)
+  fun testLlmModuleConfigWithLoadMode() {
+    val config =
+        LlmModuleConfig.create()
+            .modulePath(getTestFilePath(MODEL_FILE_NAME))
+            .tokenizerPath(getTestFilePath(TOKENIZER_FILE_NAME))
+            .temperature(0.0f)
+            .loadMode(LlmModuleConfig.LOAD_MODE_FILE)
+            .build()
+    llmModule = LlmModule(config)
+    val tokens = generateAndCollect(llmModule!!)
+    assertTrue("Module via config with LOAD_MODE_FILE should generate tokens", tokens.isNotEmpty())
+  }
+
+  // ─── Invalid data file paths ────────────────────────────────────────────────
+
+  @Test(timeout = MAX_TEST_TIMEOUT_MS)
+  fun testInvalidDataFilePathThrowsOnConstruction() {
+    try {
+      llmModule =
+          LlmModule(
+              LlmModule.MODEL_TYPE_TEXT,
+              getTestFilePath(MODEL_FILE_NAME),
+              getTestFilePath(TOKENIZER_FILE_NAME),
+              0.0f,
+              listOf("/nonexistent/lora_weights.bin"),
+          )
+      // dataFiles are passed to native initHybrid — invalid paths should cause
+      // construction to fail. If we reach here, the native layer didn't validate.
+      llmModule!!.close()
+      fail("Construction should have thrown for invalid data file path")
+    } catch (e: RuntimeException) {
+      assertTrue(
+          "Exception message should be non-empty",
+          e.message != null && e.message!!.isNotEmpty(),
+      )
+    }
+  }
+
+  @Test(timeout = MAX_TEST_TIMEOUT_MS)
+  fun testMultipleInvalidDataFilePathsThrowOnConstruction() {
+    try {
+      llmModule =
+          LlmModule(
+              LlmModule.MODEL_TYPE_TEXT,
+              getTestFilePath(MODEL_FILE_NAME),
+              getTestFilePath(TOKENIZER_FILE_NAME),
+              0.0f,
+              listOf("/nonexistent/a.bin", "/nonexistent/b.bin"),
+          )
+      llmModule!!.close()
+      fail("Construction should have thrown for invalid data file paths")
+    } catch (e: RuntimeException) {
+      assertTrue(
+          "Exception message should be non-empty",
+          e.message != null && e.message!!.isNotEmpty(),
+      )
+    }
+  }
+
+  // ─── Baseline equivalence ───────────────────────────────────────────────────
+
+  @Test(timeout = MAX_TEST_TIMEOUT_MS)
+  fun testEmptyDataFilesMatchesNoDataConstructor() {
+    val moduleNoData =
+        LlmModule(getTestFilePath(MODEL_FILE_NAME), getTestFilePath(TOKENIZER_FILE_NAME), 0.0f)
+    val moduleEmptyList =
+        LlmModule(
+            LlmModule.MODEL_TYPE_TEXT,
+            getTestFilePath(MODEL_FILE_NAME),
+            getTestFilePath(TOKENIZER_FILE_NAME),
+            0.0f,
+            emptyList<String>(),
+        )
+
+    try {
+      val tokensNoData = generateAndCollect(moduleNoData)
+      val tokensEmptyList = generateAndCollect(moduleEmptyList)
+
+      assertTrue("Both constructors should produce tokens", tokensNoData.isNotEmpty())
+      assertTrue("Both constructors should produce tokens", tokensEmptyList.isNotEmpty())
+    } finally {
+      moduleNoData.close()
+      moduleEmptyList.close()
+    }
+  }
+
+  // ─── LlmModuleConfig builder validation ─────────────────────────────────────
+
+  @Test(expected = IllegalArgumentException::class)
+  fun testConfigBuilderMissingModulePathThrows() {
+    LlmModuleConfig.create().tokenizerPath("/some/tokenizer.bin").build()
+  }
+
+  @Test(expected = IllegalArgumentException::class)
+  fun testConfigBuilderMissingTokenizerPathThrows() {
+    LlmModuleConfig.create().modulePath("/some/model.pte").build()
+  }
+
+  @Test(expected = IllegalArgumentException::class)
+  fun testConfigBuilderInvalidLoadModeThrows() {
+    LlmModuleConfig.create()
+        .modulePath("/some/model.pte")
+        .tokenizerPath("/some/tokenizer.bin")
+        .loadMode(99)
+        .build()
+  }
+
+  @Test
+  fun testConfigBuilderAllLoadModes() {
+    val modes =
+        listOf(
+            LlmModuleConfig.LOAD_MODE_FILE,
+            LlmModuleConfig.LOAD_MODE_MMAP,
+            LlmModuleConfig.LOAD_MODE_MMAP_USE_MLOCK,
+            LlmModuleConfig.LOAD_MODE_MMAP_USE_MLOCK_IGNORE_ERRORS,
+        )
+    for (mode in modes) {
+      val config =
+          LlmModuleConfig.create()
+              .modulePath("/some/model.pte")
+              .tokenizerPath("/some/tokenizer.bin")
+              .loadMode(mode)
+              .build()
+      assertTrue("Config should accept load mode $mode", config.loadMode == mode)
+    }
+  }
+
+  // ─── Helpers ────────────────────────────────────────────────────────────────
+
+  private fun generateAndCollect(module: LlmModule): List<String> {
+    val collector = mutableListOf<String>()
+    module.generate(
+        TEST_PROMPT,
+        SEQ_LEN,
+        object : LlmCallback {
+          override fun onResult(result: String) {
+            collector.add(result)
+          }
+        },
+    )
+    return collector
+  }
+
+  companion object {
+    private const val MODEL_FILE_NAME = "/stories.pte"
+    private const val TOKENIZER_FILE_NAME = "/tokenizer.bin"
+    private const val TEST_PROMPT = "Once"
+    private const val SEQ_LEN = 16
+    private const val MAX_TEST_TIMEOUT_MS = 120_000L
+  }
+}

From 4de16d0ad24339f52f784c8e35297e702fb7675e Mon Sep 17 00:00:00 2001
From: Ethan Ng <ethann@meta.com>
Date: Thu, 28 May 2026 19:43:41 -0700
Subject: [PATCH 069/103] Add shared fusion infrastructure and QuantFusionPass
 (#19724)

Differential Revision: D105728137

Pull Request resolved: https://github.com/pytorch/executorch/pull/19724
---
 backends/cadence/aot/compiler_funcs.py        |  30 +++
 backends/cadence/aot/pass_utils.py            |  17 ++
 backends/cadence/aot/quantizer/BUCK           |  15 ++
 .../cadence/aot/quantizer/pattern_utils.py    | 207 ++++++++++++++++++
 backends/cadence/aot/quantizer/patterns.py    |  18 +-
 backends/cadence/aot/quantizer/utils.py       |   4 +-
 6 files changed, 289 insertions(+), 2 deletions(-)
 create mode 100644 backends/cadence/aot/quantizer/pattern_utils.py

diff --git a/backends/cadence/aot/compiler_funcs.py b/backends/cadence/aot/compiler_funcs.py
index 02dcde7fd39..cec3cb7d016 100644
--- a/backends/cadence/aot/compiler_funcs.py
+++ b/backends/cadence/aot/compiler_funcs.py
@@ -14,6 +14,7 @@
 import torch
 from torch._inductor.decomposition import remove_decompositions
 from torch.fx import GraphModule
+from torch.fx.passes.infra.pass_base import PassBase, PassResult
 from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e, prepare_qat_pt2e
 from torchao.quantization.pt2e.quantizer import Quantizer
 
@@ -607,3 +608,32 @@ def sink_input_dequant_through_transparent_ops(
         graph_module.recompile()
 
     return modified
+
+
+class QuantFusionPass(PassBase):
+    """
+    Iterates patterns, finds anchor ops in the converted graph, and calls
+    pattern.fuse() to replace dq-op-q subgraphs with fused ops.
+    """
+
+    def __init__(self, patterns: Sequence[object]) -> None:
+        super().__init__()
+        self.patterns = patterns
+
+    def call(self, graph_module: GraphModule) -> Optional[PassResult]:
+        changed = False
+        for pattern in self.patterns:
+            pattern_changed = False
+            for target in pattern.anchor_ops():  # pyre-ignore[16]
+                for node in graph_module.graph.find_nodes(
+                    op="call_function", target=target
+                ):
+                    result = pattern.fuse(graph_module, node)  # pyre-ignore[16]
+                    if result is not None:
+                        changed = True
+                        pattern_changed = True
+            if pattern_changed:
+                graph_module.graph.eliminate_dead_code()
+        if changed:
+            graph_module.recompile()
+        return PassResult(graph_module, changed)
diff --git a/backends/cadence/aot/pass_utils.py b/backends/cadence/aot/pass_utils.py
index ab42ef43d56..091605e94ec 100644
--- a/backends/cadence/aot/pass_utils.py
+++ b/backends/cadence/aot/pass_utils.py
@@ -212,3 +212,20 @@ def nodes_not_adjacent_in_gm(
 def none_throws(x: Optional[PassResult]) -> PassResult:
     assert x is not None
     return x
+
+
+def replace_with_op(
+    gm: torch.fx.GraphModule,
+    insert_after: torch.fx.Node,
+    replacement_op: torch._ops.OpOverload,
+    args: tuple,  # pyre-ignore[2]
+    kwargs: dict,  # pyre-ignore[2]
+    node_to_replace: torch.fx.Node,
+) -> torch.fx.Node:
+    """Insert ``replacement_op`` after ``insert_after`` and replace all uses of
+    ``node_to_replace`` with the new node."""
+    with gm.graph.inserting_after(insert_after):
+        new_node = gm.graph.call_function(replacement_op, args, kwargs)
+    new_node.meta = node_to_replace.meta
+    node_to_replace.replace_all_uses_with(new_node)
+    return new_node
diff --git a/backends/cadence/aot/quantizer/BUCK b/backends/cadence/aot/quantizer/BUCK
index 34fec2556f8..c2ec3e3a1f6 100644
--- a/backends/cadence/aot/quantizer/BUCK
+++ b/backends/cadence/aot/quantizer/BUCK
@@ -14,6 +14,21 @@ fbcode_target(_kind = runtime.python_library,
     ],
 )
 
+fbcode_target(_kind = runtime.python_library,
+    name = "pattern_utils",
+    srcs = [
+        "pattern_utils.py",
+    ],
+    typing = True,
+    deps = [
+        ":utils",
+        "//caffe2:torch",
+        "//executorch/backends/cadence/aot:compiler_utils",
+        "//executorch/backends/cadence/aot:pass_utils",
+        "//executorch/backends/cadence/aot:utils",
+    ],
+)
+
 fbcode_target(_kind = runtime.python_library,
     name = "patterns",
     srcs = [
diff --git a/backends/cadence/aot/quantizer/pattern_utils.py b/backends/cadence/aot/quantizer/pattern_utils.py
new file mode 100644
index 00000000000..25ff363ecc9
--- /dev/null
+++ b/backends/cadence/aot/quantizer/pattern_utils.py
@@ -0,0 +1,207 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+import operator
+from typing import Any
+
+import torch
+from executorch.backends.cadence.aot.pass_utils import get_arg, replace_with_op
+from executorch.backends.cadence.aot.quantizer.utils import (
+    copy_node_metadata,
+    create_zero_bias_int32,
+    quantize_tensor_multiplier,
+)
+from executorch.backends.cadence.aot.utils import is_depthwise_conv
+from torch import fx
+from torch._ops import OpOverload
+
+DQ_PER_TENSOR: OpOverload = torch.ops.quantized_decomposed.dequantize_per_tensor.default
+Q_PER_TENSOR: OpOverload = torch.ops.quantized_decomposed.quantize_per_tensor.default
+
+
+def insert_node_with_meta(
+    gm: fx.GraphModule,
+    op: OpOverload,
+    args: tuple[Any, ...],
+    kwargs: dict[str, Any] | None,
+    insert_before: fx.Node,
+    like_node: fx.Node,
+) -> fx.Node:
+    """Create a new node and populate its FakeTensor metadata.
+
+    Inserts ``op(*args, **kwargs)`` before ``insert_before``, runs the op
+    under ``like_node``'s fake_mode to compute ``meta["val"]``, and copies
+    remaining metadata from ``like_node``.
+    """
+    with gm.graph.inserting_before(insert_before):
+        node = gm.graph.call_function(op, args, kwargs or {})
+    assert "val" in like_node.meta
+    fake_mode = like_node.meta["val"].fake_mode
+    assert fake_mode is not None
+
+    def _resolve(x: Any) -> Any:
+        return x.meta["val"] if isinstance(x, fx.Node) else x
+
+    fake_args = tuple(_resolve(a) for a in args)
+    fake_kwargs = {k: _resolve(v) for k, v in (kwargs or {}).items()}
+    with fake_mode:
+        node.meta["val"] = op(*fake_args, **fake_kwargs)
+    copy_node_metadata(node, like_node)
+    return node
+
+
+def find_quant_user(node: fx.Node) -> fx.Node | None:
+    """Find the first quantize_per_tensor user of ``node``, traversing through getitem."""
+    users = list(node.users)
+    if not users:
+        return None
+    user = users[0]
+    if user.target is operator.getitem:
+        if user.args[1] == 0:
+            users = list(user.users)
+            if not users:
+                return None
+            user = users[0]
+        else:
+            return None
+    if user.target == Q_PER_TENSOR:
+        return user
+    return None
+
+
+def fuse_conv(
+    pattern: object,
+    gm: fx.GraphModule,
+    conv_node: fx.Node,
+    dq_input: fx.Node,
+    dq_weight: fx.Node,
+    quant_node: fx.Node,
+) -> fx.Node:
+    """Fuse a dq->conv->q chain into a single quantized conv op."""
+    dq_bias = None
+    if len(conv_node.args) > 2 and conv_node.args[2] is not None:
+        bias_arg = conv_node.args[2]
+        assert isinstance(bias_arg, fx.Node)
+        dq_bias = bias_arg if bias_arg.target == DQ_PER_TENSOR else None
+    weight_scale = get_arg(dq_weight, "scale", float)
+    input_scale = get_arg(dq_input, "scale", float)
+    bias_scale = input_scale * weight_scale
+    if dq_bias is not None:
+        bias_q = get_arg(dq_bias, "input", fx.Node)
+    else:
+        # Cadence quantized conv ops require a non-optional bias argument.
+        weight_node = get_arg(dq_weight, "input", fx.Node)
+        with gm.graph.inserting_before(conv_node):
+            bias_q = create_zero_bias_int32(gm, weight_node, bias_scale)
+    requantize_scale = bias_scale / get_arg(quant_node, "scale", float)
+    requantize_scale_t = torch.tensor([requantize_scale])
+    out_multiplier, out_shift = quantize_tensor_multiplier(requantize_scale_t)
+    args = (
+        get_arg(dq_input, "input", fx.Node),
+        get_arg(dq_weight, "input", fx.Node),
+        bias_q,
+    )
+    groups = get_arg(conv_node, "groups", int)
+    kwargs = {
+        "stride": get_arg(conv_node, "stride", list[int]),
+        "padding": get_arg(conv_node, "padding", list[int]),
+        "dilation": get_arg(conv_node, "dilation", list[int]),
+        "groups": groups,
+        "input_zero_point": get_arg(dq_input, "zero_point", int),
+        "weight_zero_point": get_arg(dq_weight, "zero_point", int),
+        "bias_scale": bias_scale,
+        "out_scale": get_arg(quant_node, "scale", float),
+        "out_zero_point": get_arg(quant_node, "zero_point", int),
+        "out_multiplier": out_multiplier[0].item(),
+        "out_shift": out_shift[0].item(),
+    }
+    replacement_op = pattern.replacement_op()  # pyre-ignore[16]
+    if replacement_op == torch.ops.cadence.quantized_conv1d_ncl.per_tensor:
+        input_node = get_arg(dq_input, "input", fx.Node)
+        assert len(input_node.meta["val"].shape) >= 2
+        in_channels = input_node.meta["val"].shape[1]
+        if is_depthwise_conv(groups, in_channels):
+            replacement_op = torch.ops.cadence.quantized_depthwise_conv1d_ncl.per_tensor
+    return replace_with_op(gm, conv_node, replacement_op, args, kwargs, quant_node)
+
+
+def fuse_linear(
+    gm: fx.GraphModule,
+    dq_input: fx.Node,
+    dq_weight: fx.Node,
+    dq_bias: fx.Node | None,
+    quant_node: fx.Node,
+    op_node: fx.Node,
+    replacement_op: OpOverload,
+    weight_q: fx.Node | None = None,
+) -> fx.Node:
+    """Fuse a dq->linear->q chain into a single quantized linear op."""
+    assert op_node.target in (
+        torch.ops.aten.linear.default,
+        torch.ops.aten.addmm.default,
+    ), f"Expected linear/addmm, got {op_node.target}"
+    weight_scale = get_arg(dq_weight, "scale", float)
+    input_scale = get_arg(dq_input, "scale", float)
+    bias_scale = input_scale * weight_scale
+    requantize_scale = bias_scale / get_arg(quant_node, "scale", float)
+    requantize_scale_t = torch.tensor([requantize_scale])
+    out_multiplier, out_shift = quantize_tensor_multiplier(requantize_scale_t)
+    if dq_bias is not None:
+        bias_q = get_arg(dq_bias, "input", fx.Node)
+    else:
+        # Cadence quantized linear ops require a non-optional bias argument.
+        weight_node = get_arg(dq_weight, "input", fx.Node)
+        with gm.graph.inserting_before(op_node):
+            bias_q = create_zero_bias_int32(gm, weight_node, bias_scale)
+    final_weight = (
+        weight_q if weight_q is not None else get_arg(dq_weight, "input", fx.Node)
+    )
+    args = (get_arg(dq_input, "input", fx.Node), final_weight, bias_q)
+    kwargs = {
+        "src_zero_point": get_arg(dq_input, "zero_point", int),
+        "weight_zero_point": get_arg(dq_weight, "zero_point", int),
+        "out_multiplier": out_multiplier[0].item(),
+        "out_shift": out_shift[0].item(),
+        "out_zero_point": get_arg(quant_node, "zero_point", int),
+        "offset": None,
+    }
+    return replace_with_op(gm, op_node, replacement_op, args, kwargs, quant_node)
+
+
+def fuse_matmul(
+    gm: fx.GraphModule,
+    anchor_node: fx.Node,
+    dq0: fx.Node,
+    dq1: fx.Node,
+    quant_node: fx.Node,
+    replacement_op: OpOverload,
+) -> fx.Node:
+    """Fuse a dq->matmul->q chain into a single quantized matmul op."""
+    assert anchor_node.target in (
+        torch.ops.aten.bmm.default,
+        torch.ops.aten.matmul.default,
+    ), f"Expected bmm/matmul, got {anchor_node.target}"
+    scale0 = get_arg(dq0, "scale", float)
+    scale1 = get_arg(dq1, "scale", float)
+    requantize_scale = (scale0 * scale1) / get_arg(quant_node, "scale", float)
+    requantize_scale_t = torch.tensor([requantize_scale])
+    out_multiplier, out_shift = quantize_tensor_multiplier(requantize_scale_t)
+    args = (
+        get_arg(dq0, "input", fx.Node),
+        get_arg(dq0, "zero_point", int),
+        get_arg(dq1, "input", fx.Node),
+        get_arg(dq1, "zero_point", int),
+        None,
+    )
+    kwargs = {
+        "out_multiplier": out_multiplier[0].item(),
+        "out_shift": out_shift[0].item(),
+        "out_zero_point": get_arg(quant_node, "zero_point", int),
+        "transposed": False,
+    }
+    return replace_with_op(gm, anchor_node, replacement_op, args, kwargs, quant_node)
diff --git a/backends/cadence/aot/quantizer/patterns.py b/backends/cadence/aot/quantizer/patterns.py
index 54c01227d07..e1f44b8ce5c 100644
--- a/backends/cadence/aot/quantizer/patterns.py
+++ b/backends/cadence/aot/quantizer/patterns.py
@@ -9,7 +9,7 @@
 import operator
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
-from typing import List, Tuple, Union
+from typing import List, Optional, Tuple, Union
 
 import torch
 from executorch.backends.cadence.aot.quantizer.utils import get_bias_qparams
@@ -79,6 +79,22 @@ def replacement_op(self) -> OpOverload:
         """
         pass
 
+    def anchor_ops(self) -> tuple[OpOverload, ...]:
+        return tuple(self.partition_types())
+
+    def fuse(
+        self,
+        gm: fx.GraphModule,
+        anchor_node: fx.Node,
+    ) -> Optional[fx.Node]:
+        """Replace the dq→op→q subgraph around ``anchor_node`` with a fused op.
+
+        Called by ``QuantFusionPass`` for each node matching ``anchor_ops()``.
+        Returns the new fused node on success, or ``None`` to skip this match.
+        Subclasses override to implement pattern-specific fusion logic.
+        """
+        return None
+
 
 class AddmmPattern(QuantizationPattern):
     def partition_types(self) -> List[OpOverload]:
diff --git a/backends/cadence/aot/quantizer/utils.py b/backends/cadence/aot/quantizer/utils.py
index 51182a4ce92..f5773938f0a 100644
--- a/backends/cadence/aot/quantizer/utils.py
+++ b/backends/cadence/aot/quantizer/utils.py
@@ -118,7 +118,9 @@ def create_zero_bias_int32(
     bias_scale: float,
 ) -> fx.Node:
     """
-    Creates a zero bias tensor with the shape of weight[0]
+    Creates a zero bias tensor with the shape of weight[0].
+    Caller is responsible for setting the graph insertion point
+    (e.g. ``with gm.graph.inserting_before(node):``).
     """
     try:
         attr_node = getattr(graph_module, weight_node.target)

From 007570a970b0d3d1188b887fae2fd276970499f5 Mon Sep 17 00:00:00 2001
From: Martin Pavella <martin.pavella@nxp.com>
Date: Fri, 29 May 2026 08:58:13 +0200
Subject: [PATCH 070/103] NXP backend: Enable `aten.upsample_bilinear2d` with
 new Neutron flow. (#19793)

### Summary
Enable `aten.upsample_bilinear2d` with new Neutron flow.

### Test plan
Unit tests provided.


cc @robert-kalmar @JakeStevens @digantdesai @rascani
---
 .../upsample_bilinear2d_converter.py          | 102 +++++--
 .../test_convert_upsample_bilinear2d.py       | 283 +++++++++++++++++-
 2 files changed, 353 insertions(+), 32 deletions(-)

diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/upsample_bilinear2d_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/upsample_bilinear2d_converter.py
index 33d97dff642..1183ef494b5 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/upsample_bilinear2d_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/upsample_bilinear2d_converter.py
@@ -4,11 +4,13 @@
 # LICENSE file in the root directory of this source tree.
 
 import numpy as np
+import torch
 
 from executorch.backends.nxp.backend.data_format import DataFormat, NXP_NODE_FORMAT
 from executorch.backends.nxp.backend.edge_helper import node_has_well_defined_shape
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
+    is_not_qdq_node,
     NodeConverter,
 )
 from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options.resize_bilinear_options import (
@@ -16,12 +18,35 @@
 )
 from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from torch.fx import Node
+from torch.fx.passes.infra.partitioner import Partition
 from torch.nn import Parameter
 
 
 # noinspection SpellCheckingInspection
 class UpsampleBilinear2DConverter(NodeConverter):
 
+    @classmethod
+    def supports_partitioning_result(
+        cls,
+        node: Node,
+        partition_list: list[Partition],
+        custom_delegation_options: CustomDelegationOptions,
+        neutron_target_spec: NeutronTargetSpec,
+        parameters_mapping: dict[str, Parameter],
+    ) -> bool:
+        input_shape = node.all_input_nodes[0].meta["val"].shape
+        output_shape = node.meta["val"].shape
+        is_alone_in_partition = cls.is_node_alone_in_partition(
+            node, partition_list, filter_fn=is_not_qdq_node
+        )
+
+        if is_alone_in_partition and input_shape == output_shape:
+            # The operator is a no-op, so the Neutron Converter will skip it. If it's the only node in the
+            #  partition, the graph would end up empty.
+            return False
+
+        return True
+
     @staticmethod
     def _is_supported_in_IR(
         node: Node,
@@ -36,6 +61,14 @@ def _is_supported_in_IR(
                 " format. Please report this."
             )
 
+        # The conversion requires the output shape to be known and static.
+        if not node_has_well_defined_shape(node):
+            return False
+
+        if len(node.meta["val"].shape) != 4:
+            # Unexpected case. The input should always be 4D.
+            return False
+
         return True
 
     @staticmethod
@@ -45,38 +78,58 @@ def _is_supported_on_target(
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
-        # Neutron requires static shapes.
-        #  neutron-converter/src/OperatorC/UpsamplePlugin.cpp?at=NEUTRON_SOFTWARE_2.2.3#74
-        if not node_has_well_defined_shape(node):
-            return False
-
-        if len(node.meta["val"].shape) != 4:
-            # Unexpected case. The input should always be 4D.
-            return False
-
-        # The tensors here use the channels first format (NCHW).
+        # The tensors are always 4D and use the channels first format (NCHW).
         _, in_c, in_h, in_w = node.all_input_nodes[0].meta["val"].shape
         _, _, out_h, out_w = node.meta["val"].shape
 
-        # Neutron supports only the doubling and quadrupleing of both height and width at the same time.
-        #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#778
-        supported_scales = [2, 4]
-        if not any(
-            in_h * scale == out_h and in_w * scale == out_w
-            for scale in supported_scales
-        ):
-            return False
-
-        # Neutron requires the input channels to be a multiple of `num_macs`.
-        #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#777
-        if in_c % neutron_target_spec.get_num_macs() != 0:
-            return False
+        if custom_delegation_options.use_new_flow_neutron_c:
+            # Requirements specified by the new Neutron flow documentation.
+
+            if not NodeConverter.uses_quantization_type_for_io(
+                node,
+                supported_types=[torch.int8, torch.uint8],
+                input_indices=[0],
+                output_indices=[0],
+            ):
+                return False
+
+            supported_scales = [1, 2, 4, 8]
+            align_corners = node.args[2]
+            if align_corners:
+                if in_h == 1 or in_w == 1:
+                    return False  # Avoid division by 0.
+                h_scale = (out_h - 1) / (in_h - 1)
+                w_scale = (out_w - 1) / (in_w - 1)
+            else:
+                h_scale = out_h / in_h
+                w_scale = out_w / in_w
+
+            # The H and W scales don't need to be equal, but both must be supported.
+            if (h_scale not in supported_scales) or (w_scale not in supported_scales):
+                return False
+
+        else:
+            # Requirements of the old Neutron flow.
+
+            # Neutron supports only the doubling and quadrupleing of both height and width at the same time.
+            #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#778
+            supported_scales = [2, 4]
+            if not any(
+                in_h * scale == out_h and in_w * scale == out_w
+                for scale in supported_scales
+            ):
+                return False
+
+            # Neutron requires the input channels to be a multiple of `num_macs`.
+            #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#777
+            if in_c % neutron_target_spec.get_num_macs() != 0:
+                return False
 
         return True
 
     def convert(self, node: Node):
         """Convert the `aten.upsample_bilinear2d.vec` operator to Neutron IR `ResizeBilinear`.
-        The schema is:
+        The ExecuTorch schema is:
         aten::upsample_bilinear2d.vec(
             Tensor input,
             SymInt[]? output_size,
@@ -109,6 +162,7 @@ def convert(self, node: Node):
         #  and the second one is what NeutronIR uses when `align_corners == False and half_pixel_centers == True`.
         # https://github.com/tensorflow/tensorflow/blob/v2.20.0/tensorflow/lite/kernels/internal/reference/resize_bilinear.h#L82-L88
         # https://github.com/tensorflow/tensorflow/blob/v2.20.0/tensorflow/lite/kernels/internal/reference/resize_bilinear.h#L172-L180
+        # Also, the new Neutron flow requires that `align_corners` and `half_pixel_centers` are not True simultainiously.
         align_corners = node.args[2]
         half_pixel_centers = not align_corners
         t_op.builtin_options = ResizeBilinear(align_corners, half_pixel_centers)
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_bilinear2d.py b/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_bilinear2d.py
index 5663eea9cc3..2d2f9845fa3 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_bilinear2d.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_bilinear2d.py
@@ -4,12 +4,15 @@
 # LICENSE file in the root directory of this source tree.
 
 import numpy as np
+
+# noinspection PyUnusedImports
 import pytest
 import torch
 
 from executorch.backends.nxp.backend.edge_program_converter import (
     EdgeProgramToIRConverter,
 )
+from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator
 from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
 from executorch.backends.nxp.tests.executors import (
     convert_run_compare,
@@ -17,7 +20,17 @@
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
-from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
+from executorch.backends.nxp.tests.model_output_comparator import (
+    AllCloseOutputComparator,
+)
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+from executorch.backends.nxp.tests.ops_aliases import (
+    AddTensor,
+    ExecutorchDelegateCall,
+    UpsampleBilinear2D,
+)
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -26,23 +39,25 @@ def reseed_model_per_test_run():
     np.random.seed(23)
 
 
-# noinspection PyProtectedMember
-ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate
-UpsampleBilinear2D = exir_ops.edge.aten.upsample_bilinear2d.vec
-
-
 class UpsampleBilinearModule(torch.nn.Module):
 
-    def __init__(self, size=None, scale=None):
+    def __init__(self, size=None, scale=None, **kwargs):
         super().__init__()
         self.upsample = torch.nn.Upsample(
-            size=size, scale_factor=scale, mode="bilinear"
+            size=size, scale_factor=scale, mode="bilinear", **kwargs
         )
 
     def forward(self, x):
         return self.upsample(x)
 
 
+class UpsampleBilinearAddModule(UpsampleBilinearModule):
+
+    def forward(self, x):
+        x = super().forward(x)
+        return x + x
+
+
 @pytest.mark.parametrize(
     "input_shape, size",
     [
@@ -185,3 +200,255 @@ def test_convert_upsample_bilinear2d__no_delegation__unsupported_size(
     # Make sure the `upsample` was NOT delegated (size != double of input).
     assert not graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
     assert graph_contains_any_of_ops(delegated_ep.graph, [UpsampleBilinear2D])
+
+
+class TestUpsampleBilinear2DNewNeutronFlow:
+    # TODO Use quantized dataset and `atol=1` in the tests.
+
+    # noinspection PyMethodMayBeStatic
+    def assert_delegated(
+        self,
+        model,
+        input_shape,
+        mocker,
+        use_qat=False,
+        atol=None,
+        expected_delegated_ops=None,
+    ):
+        if expected_delegated_ops is None:
+            expected_delegated_ops = {UpsampleBilinear2D: 1}
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops=expected_delegated_ops,
+            expected_non_delegated_ops={},
+        )
+
+        # Cover also negative values to thoroughly test the operator.
+        dataset_creator = RandomDatasetCreator(low=-2, high=2)
+
+        kwargs = {"atol": atol} if atol is not None else {}
+        output_comparator = AllCloseOutputComparator(**kwargs)
+
+        lower_run_compare(
+            model,
+            input_shape,
+            graph_verifier,
+            dataset_creator,
+            output_comparator,
+            use_qat=use_qat,
+            use_new_flow_neutron_c=True,  # Use the new flow.
+        )
+
+    # noinspection PyMethodMayBeStatic
+    def assert_not_delegated(self, model, input_shape):
+        delegated_ep = to_quantized_edge_program(
+            model, input_shape, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
+        assert graph_contains_any_of_ops(delegated_ep.graph, [UpsampleBilinear2D])
+
+    def test__qat__align_corners(self, mocker, use_qat):
+        align_corners = True
+        input_shape = (1, 2, 3, 4)
+        output_size = (5, 7)
+        model = UpsampleBilinearModule(size=output_size, align_corners=align_corners)
+        atol = 0.015  # ~= output scale -> single bit error.
+        self.assert_delegated(model, input_shape, mocker, use_qat=use_qat, atol=atol)
+
+    def test__qat__not_align_corners(self, mocker, use_qat):
+        align_corners = False
+        input_shape = (1, 2, 3, 4)
+        output_size = (6, 8)
+        model = UpsampleBilinearModule(size=output_size, align_corners=align_corners)
+        atol = 0.015  # ~= output scale -> single bit error.
+        self.assert_delegated(model, input_shape, mocker, use_qat=use_qat, atol=atol)
+
+    @pytest.mark.parametrize(
+        "input_shape, output_size",
+        [
+            pytest.param((1, 2, 3, 4), (6, 8), id="batch=1, scale_h=scale_w=2"),
+            pytest.param(
+                (3, 3, 3, 5),
+                (6, 5),
+                id="batch=3, scale_h=2, scale_w=1 (no num_macs multiples)",
+            ),
+            pytest.param((2, 2, 3, 4), (3, 16), id="batch=2, scale_h=1, scale_w=4"),
+            pytest.param((2, 2, 3, 4), (24, 8), id="batch=2, scale_h=8, scale_w=2"),
+        ],
+    )
+    def test__not_align_corners__output_size(self, mocker, input_shape, output_size):
+        align_corners = False
+        model = UpsampleBilinearModule(size=output_size, align_corners=align_corners)
+        atol = 0.016  # ~= output scale -> single bit error.
+        self.assert_delegated(model, input_shape, mocker, atol=atol)
+
+    def test__not_align_corners__output_size__unsupported(self):
+        align_corners = False
+        input_shape = (1, 2, 3, 4)
+        output_size = (9, 12)  # scale = (3, 3)
+        model = UpsampleBilinearModule(size=output_size, align_corners=align_corners)
+        self.assert_not_delegated(model, input_shape)
+
+    @pytest.mark.parametrize(
+        "input_shape, scale",
+        [
+            pytest.param((1, 2, 3, 4), (2, 2), id="batch=1, scale_h=scale_w=2"),
+            pytest.param(
+                (3, 3, 3, 5),
+                (2, 1),
+                id="batch=3, scale_h=2, scale_w=1 (no num_macs multiples)",
+            ),
+            pytest.param((2, 2, 3, 4), (4, 1), id="batch=2, scale_h=4, scale_w=1"),
+            pytest.param((2, 2, 3, 4), (2, 8), id="batch=2, scale_h=2, scale_w=8"),
+        ],
+    )
+    def test__not_align_corners__scales(self, mocker, input_shape, scale):
+        align_corners = False
+        model = UpsampleBilinearModule(scale=scale, align_corners=align_corners)
+        atol = 0.016  # ~= output scale -> single bit error.
+        self.assert_delegated(model, input_shape, mocker, atol=atol)
+
+    def test__not_align_corners__scales__unsupported(self):
+        align_corners = False
+        input_shape = (1, 2, 3, 4)
+        scale = (3, 3)
+        model = UpsampleBilinearModule(scale=scale, align_corners=align_corners)
+        self.assert_not_delegated(model, input_shape)
+
+    @pytest.mark.parametrize(
+        "input_shape, output_size",
+        [
+            pytest.param((1, 2, 4, 5), (7, 9), id="batch=1, scale_h=scale_w=2"),
+            pytest.param(
+                (1, 3, 3, 5),
+                (5, 5),
+                id="batch=1, scale_h=2, scale_w=1 (no num_macs multiples)",
+            ),
+            pytest.param((2, 2, 4, 5), (4, 17), id="batch=2, scale_h=1, scale_w=4"),
+            pytest.param((1, 2, 4, 5), (25, 9), id="batch=1, scale_h=8, scale_w=2"),
+        ],
+    )
+    def test__align_corners__output_size(self, mocker, input_shape, output_size):
+        align_corners = True
+        model = UpsampleBilinearModule(size=output_size, align_corners=align_corners)
+        atol = 0.016  # ~= output scale -> single bit error.
+        self.assert_delegated(model, input_shape, mocker, atol=atol)
+
+    @pytest.mark.parametrize(
+        "input_shape, output_size",
+        [
+            pytest.param(
+                (2, 2, 4, 5), (25, 9), id="batch=2, scale_h=8, scale_w=2"
+            ),  # Error ~= 0.47
+            pytest.param(
+                (3, 3, 3, 5),
+                (5, 5),
+                id="batch=3, scale_h=2, scale_w=1 (no num_macs multiples)",
+            ),  # Error ~= 3.7
+        ],
+    )
+    def test__align_corners__output_size__incorrect_output(
+        self, mocker, input_shape, output_size
+    ):
+        align_corners = True
+        model = UpsampleBilinearModule(size=output_size, align_corners=align_corners)
+        atol = 0.45  # Huge tolerance (still not enough to pass).
+        with pytest.raises(AssertionError):
+            self.assert_delegated(model, input_shape, mocker, atol=atol)
+
+    def test__align_corners__output_size__unsupported(self):
+        align_corners = True
+        input_shape = (1, 2, 3, 4)
+        output_size = (6, 8)  # Neutron scale = (5/2, 7/3)
+        model = UpsampleBilinearModule(size=output_size, align_corners=align_corners)
+        self.assert_not_delegated(model, input_shape)
+
+    def test__align_corners__output_size__input_size_equal_to_one(self):
+        align_corners = True
+        input_shape = (1, 2, 1, 1)  # Neutron scale computation would divide by zero.
+        output_size = (2, 2)
+        model = UpsampleBilinearModule(size=output_size, align_corners=align_corners)
+        self.assert_not_delegated(model, input_shape)
+
+    @pytest.mark.parametrize(
+        "input_shape, scale",
+        [
+            # The PyTorch scales are "weird" because the "Neutron scales" are computed differently.
+            # The fractions correspond to "nice" Neutron scales (1, 2, 4, or 8).
+            pytest.param(
+                (1, 2, 4, 5),
+                (7 / 4, 9 / 5),
+                id="batch=1, scale_h=7/4, scale_w=9/5 (Neutron scales = (2, 2)",
+            ),
+            pytest.param(
+                (1, 3, 3, 5),
+                (5 / 3, 1),
+                id="batch=1, scale_h=5/3, scale_w=1 (Neutron scales = (2, 1))",
+            ),
+            pytest.param(
+                (2, 2, 4, 5),
+                (1, 17 / 5),
+                id="batch=2, scale_h=1, scale_w=17/5 (Neutron scales = (1, 4))",
+            ),
+            pytest.param(
+                (1, 2, 4, 5),
+                (25 / 4, 9 / 5),
+                id="batch=1, scale_h=25/4, scale_w=9/5 (Neutron scales = (8, 2))",
+            ),
+        ],
+    )
+    def test__align_corners__scales(self, mocker, input_shape, scale):
+        align_corners = True
+        model = UpsampleBilinearModule(scale=scale, align_corners=align_corners)
+        atol = 0.016  # ~= output scale -> single bit error.
+        self.assert_delegated(model, input_shape, mocker, atol=atol)
+
+    @pytest.mark.parametrize(
+        "input_shape, scale",
+        [
+            pytest.param(
+                (2, 2, 4, 5),
+                (25 / 4, 9 / 5),
+                id="batch=3, scale_h=25/4, scale_w=9/5 (Neutron scales = (8, 2))",
+            ),  # Error ~= 0.47
+            pytest.param(
+                (3, 3, 3, 5),
+                (5 / 3, 1),
+                id="batch=3, scale_h=5/3, scale_w=1 (Neutron scales = (2, 1))",
+            ),  # Error ~= 3.7
+        ],
+    )
+    def test__align_corners__scales__incorrect_output(self, mocker, input_shape, scale):
+        align_corners = True
+        model = UpsampleBilinearModule(scale=scale, align_corners=align_corners)
+        atol = 0.45  # Huge tolerance (still not enough to pass).
+        with pytest.raises(AssertionError):
+            self.assert_delegated(model, input_shape, mocker, atol=atol)
+
+    def test__align_corners__scales__unsupported(self):
+        align_corners = True
+        input_shape = (1, 2, 3, 4)
+        scale = (2, 2)  # Neutron scale = (5/2, 7/3)
+        model = UpsampleBilinearModule(scale=scale, align_corners=align_corners)
+        self.assert_not_delegated(model, input_shape)
+
+    def test__noop__alone_in_partition__not_delegated(self):
+        input_shape = (1, 2, 3, 4)
+        scale = 1
+        model = UpsampleBilinearModule(scale=scale)
+        self.assert_not_delegated(model, input_shape)
+
+    def test__noop__not_alone_in_partition__delegated(self, mocker):
+        input_shape = (1, 2, 3, 4)
+        scale = 1
+        model = UpsampleBilinearAddModule(scale=scale)
+        self.assert_delegated(
+            model,
+            input_shape,
+            mocker,
+            expected_delegated_ops={UpsampleBilinear2D: 1, AddTensor: 1},
+        )

From c72bc872a652c2197e954287bb62f0ebd0a69d75 Mon Sep 17 00:00:00 2001
From: Martin Pavella <martin.pavella@nxp.com>
Date: Fri, 29 May 2026 09:00:32 +0200
Subject: [PATCH 071/103] NXP backend: Enable `aten.upsample_nearest2d` with
 new Neutron flow. (#19796)

### Summary
NXP backend: Enable `aten.upsample_nearest2d` with new Neutron flow.

### Test plan
Unit tests provided.


cc @robert-kalmar @JakeStevens @digantdesai @rascani
---
 .../upsample_nearest2d_converter.py           | 110 ++++++++++----
 .../test_convert_upsample_nearest2d.py        | 141 +++++++++++++++++-
 2 files changed, 220 insertions(+), 31 deletions(-)

diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/upsample_nearest2d_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/upsample_nearest2d_converter.py
index 1ddc71425ef..6e18a7bfe67 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/upsample_nearest2d_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/upsample_nearest2d_converter.py
@@ -4,11 +4,13 @@
 # LICENSE file in the root directory of this source tree.
 
 import numpy as np
+import torch
 
 from executorch.backends.nxp.backend.data_format import DataFormat, NXP_NODE_FORMAT
 from executorch.backends.nxp.backend.edge_helper import node_has_well_defined_shape
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
+    is_not_qdq_node,
     NodeConverter,
 )
 from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options.resize_nearest_neighbor_options import (
@@ -16,12 +18,37 @@
 )
 from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from torch.fx import Node
+from torch.fx.passes.infra.partitioner import Partition
 from torch.nn import Parameter
 
+HeightScale = float
+WidthScale = float
+
 
 # noinspection SpellCheckingInspection
 class UpsampleNearest2DConverter(NodeConverter):
 
+    @classmethod
+    def supports_partitioning_result(
+        cls,
+        node: Node,
+        partition_list: list[Partition],
+        custom_delegation_options: CustomDelegationOptions,
+        neutron_target_spec: NeutronTargetSpec,
+        parameters_mapping: dict[str, Parameter],
+    ) -> bool:
+        h_scale, w_scale = cls._get_effective_scales(node)
+        is_alone_in_partition = cls.is_node_alone_in_partition(
+            node, partition_list, filter_fn=is_not_qdq_node
+        )
+
+        if is_alone_in_partition and h_scale == w_scale == 1:
+            # The operator is a no-op, so the Neutron Converter will skip it. If it's the only node in the
+            #  partition, the graph would end up empty.
+            return False
+
+        return True
+
     @staticmethod
     def _is_supported_in_IR(
         node: Node,
@@ -36,6 +63,14 @@ def _is_supported_in_IR(
                 " format. Please report this."
             )
 
+        # The conversion requires the output shape to be known and static.
+        if not node_has_well_defined_shape(node):
+            return False
+
+        if len(node.meta["val"].shape) != 4:
+            # Unexpected case. The input should always be 4D.
+            return False
+
         return True
 
     @staticmethod
@@ -45,39 +80,62 @@ def _is_supported_on_target(
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
-        # Neutron requires static shapes.
-        #  neutron-converter/src/OperatorC/UpsamplePlugin.cpp?at=NEUTRON_SOFTWARE_2.2.3#74
-        if not node_has_well_defined_shape(node):
-            return False
-
-        if len(node.meta["val"].shape) != 4:
-            # Unexpected case. The input should always be 4D.
-            return False
-
-        # The tensors here use the channels first format (NCHW).
+        # The tensors are always 4D and use the channels first format (NCHW).
         _, in_c, in_h, in_w = node.all_input_nodes[0].meta["val"].shape
         _, _, out_h, out_w = node.meta["val"].shape
 
-        # Neutron supports only the doubling and quadrupleing of both height and width at the same time.
-        #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#768
-        #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#778
-        supported_scales = [2, 4]
-        if not any(
-            in_h * scale == out_h and in_w * scale == out_w
-            for scale in supported_scales
-        ):
-            return False
-
-        # Neutron requires the input channels to be a multiple of `num_macs`.
-        #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#767
-        if in_c % neutron_target_spec.get_num_macs() != 0:
-            return False
+        if custom_delegation_options.use_new_flow_neutron_c:
+            # Requirements specified by the new Neutron flow documentation.
+
+            if not NodeConverter.uses_quantization_type_for_io(
+                node,
+                supported_types=[torch.int8, torch.uint8],
+                input_indices=[0],
+                output_indices=[0],
+            ):
+                return False
+
+            supported_scales = [1, 2, 4, 8]
+            h_scale, w_scale = UpsampleNearest2DConverter._get_effective_scales(node)
+            # The H and W scales don't need to be equal but both must be supported.
+            if (h_scale not in supported_scales) or (w_scale not in supported_scales):
+                return False
+
+        else:
+            # Requirements of the old Neutron flow.
+
+            # Neutron supports only the doubling and quadrupleing of both height and width at the same time.
+            #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#768
+            #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#778
+            supported_scales = [2, 4]
+            if not any(
+                in_h * scale == out_h and in_w * scale == out_w
+                for scale in supported_scales
+            ):
+                return False
+
+            # Neutron requires the input channels to be a multiple of `num_macs`.
+            #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#767
+            if in_c % neutron_target_spec.get_num_macs() != 0:
+                return False
 
         return True
 
+    @staticmethod
+    def _get_effective_scales(node: Node) -> tuple[HeightScale, WidthScale]:
+        # Neutron supports variants where `align_corners=False` and `align_corners=True`. ExecuTorch doesn't have this
+        #  parameter. Its behavior is equivalent to `align_corners=False`. Hence, the scale calculation corresponds to
+        #  the `align_corners=False` case in the Neutron documentation.
+        _, _, in_h, in_w = node.all_input_nodes[0].meta["val"].shape
+        _, _, out_h, out_w = node.meta["val"].shape
+        h_scale = out_h / in_h
+        w_scale = out_w / in_w
+
+        return h_scale, w_scale
+
     def convert(self, node: Node):
         """Convert the `aten.upsample_nearest2d.vec` operator to Neutron IR `ResizeNearestNeighbor`.
-        The schema is:
+        The ExecuTorch schema is:
             aten::upsample_nearest2d.vec(
                 Tensor input,
                 SymInt[]? output_size,
@@ -90,6 +148,8 @@ def convert(self, node: Node):
         x = t_op.tmp_inputs[0]
         y = t_op.tmp_outputs[0]
 
+        # Neutron supports variants where `align_corners=False` and `align_corners=True`. ExecuTorch doesn't have this
+        #  parameter. Its behavior is equivalent to `align_corners=False` and `half_pixel_centers=False`.
         t_op.builtin_options = ResizeNearestNeighbor(False, False)
 
         # The `aten.upsample_nearest2d` can use either the `size` attribute or the `scale_factor` to define the output
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_nearest2d.py b/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_nearest2d.py
index 3d9ec84dec9..27d1ac718a0 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_nearest2d.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_nearest2d.py
@@ -4,12 +4,15 @@
 # LICENSE file in the root directory of this source tree.
 
 import numpy as np
+
+# noinspection PyUnusedImports
 import pytest
 import torch
 
 from executorch.backends.nxp.backend.edge_program_converter import (
     EdgeProgramToIRConverter,
 )
+from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator
 from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
 from executorch.backends.nxp.tests.executors import (
     convert_run_compare,
@@ -17,7 +20,14 @@
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
-from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+from executorch.backends.nxp.tests.ops_aliases import (
+    AddTensor,
+    ExecutorchDelegateCall,
+    UpsampleNearest2D,
+)
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -26,11 +36,6 @@ def reseed_model_per_test_run():
     np.random.seed(23)
 
 
-# noinspection PyProtectedMember
-ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate
-UpsampleNearest2D = exir_ops.edge.aten.upsample_nearest2d.vec
-
-
 class UpsampleNearestModule(torch.nn.Module):
 
     def __init__(self, size=None, scale=None):
@@ -41,6 +46,13 @@ def forward(self, x):
         return self.upsample(x)
 
 
+class UpsampleNearestAddModule(UpsampleNearestModule):
+
+    def forward(self, x):
+        x = super().forward(x)
+        return x + x
+
+
 @pytest.mark.parametrize(
     "input_shape, size",
     [
@@ -181,3 +193,120 @@ def test_convert_upsample_nearest2d__no_delegation__unsupported_size(input_shape
     # Make sure the `upsample` was NOT delegated (size != double of input).
     assert not graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
     assert graph_contains_any_of_ops(delegated_ep.graph, [UpsampleNearest2D])
+
+
+class TestUpsampleNearest2DNewNeutronFlow:
+
+    # noinspection PyMethodMayBeStatic
+    def assert_delegated(
+        self,
+        model,
+        input_shape,
+        mocker,
+        use_qat=False,
+        expected_delegated_ops=None,
+    ):
+        if expected_delegated_ops is None:
+            expected_delegated_ops = {UpsampleNearest2D: 1}
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops=expected_delegated_ops,
+            expected_non_delegated_ops={},
+        )
+
+        # Cover also negative values to thoroughly test the operator.
+        dataset_creator = RandomDatasetCreator(low=-2, high=2)
+
+        lower_run_compare(
+            model,
+            input_shape,
+            graph_verifier,
+            dataset_creator,
+            use_qat=use_qat,
+            use_new_flow_neutron_c=True,  # Use the new flow.
+        )
+
+    # noinspection PyMethodMayBeStatic
+    def assert_not_delegated(self, model, input_shape):
+        delegated_ep = to_quantized_edge_program(
+            model, input_shape, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
+        assert graph_contains_any_of_ops(delegated_ep.graph, [UpsampleNearest2D])
+
+    def test__qat(self, mocker, use_qat):
+        input_shape = (1, 2, 3, 4)
+        output_size = (6, 8)
+        model = UpsampleNearestModule(size=output_size)
+        self.assert_delegated(model, input_shape, mocker, use_qat=use_qat)
+
+    @pytest.mark.parametrize(
+        "input_shape, output_size",
+        [
+            pytest.param((1, 2, 3, 4), (6, 8), id="batch=1, scale_h=scale_w=2"),
+            pytest.param((1, 2, 3, 3), 6, id="batch=1, scale_h=scale_w=2, scalar size"),
+            pytest.param(
+                (3, 3, 3, 5),
+                (6, 5),
+                id="batch=3, scale_h=2, scale_w=1 (no num_macs multiples)",
+            ),
+            pytest.param((2, 2, 3, 4), (3, 16), id="batch=2, scale_h=1, scale_w=4"),
+            pytest.param((2, 2, 3, 4), (24, 8), id="batch=2, scale_h=8, scale_w=2"),
+        ],
+    )
+    def test__output_size(self, mocker, input_shape, output_size):
+        model = UpsampleNearestModule(size=output_size)
+        self.assert_delegated(model, input_shape, mocker)
+
+    def test__output_size__unsupported(self):
+        input_shape = (1, 2, 3, 4)
+        output_size = (9, 12)  # scale = (3, 3)
+        model = UpsampleNearestModule(size=output_size)
+        self.assert_not_delegated(model, input_shape)
+
+    @pytest.mark.parametrize(
+        "input_shape, scale",
+        [
+            pytest.param((1, 2, 3, 4), (2, 2), id="batch=1, scale_h=scale_w=2"),
+            pytest.param(
+                (1, 2, 3, 4), 4, id="batch=1, scale_h=scale_w=4, scalar scale"
+            ),
+            pytest.param(
+                (3, 3, 3, 5),
+                (2, 1),
+                id="batch=3, scale_h=2, scale_w=1 (no num_macs multiples)",
+            ),
+            pytest.param((2, 2, 3, 4), (4, 1), id="batch=2, scale_h=4, scale_w=1"),
+            pytest.param((2, 2, 3, 4), (2, 8), id="batch=2, scale_h=2, scale_w=8"),
+        ],
+    )
+    def test__scales(self, mocker, input_shape, scale):
+        model = UpsampleNearestModule(scale=scale)
+        self.assert_delegated(model, input_shape, mocker)
+
+    def test__scales__unsupported(self):
+        input_shape = (1, 2, 3, 4)
+        scale = (3, 3)
+        model = UpsampleNearestModule(scale=scale)
+        self.assert_not_delegated(model, input_shape)
+
+    def test__noop__alone_in_partition__not_delegated(self):
+        input_shape = (1, 2, 3, 4)
+        scale = 1
+        model = UpsampleNearestModule(scale=scale)
+        self.assert_not_delegated(model, input_shape)
+
+    def test__noop__not_alone_in_partition__delegated(self, mocker):
+        input_shape = (1, 2, 3, 4)
+        scale = 1
+        model = UpsampleNearestAddModule(scale=scale)
+        self.assert_delegated(
+            model,
+            input_shape,
+            mocker,
+            expected_delegated_ops={UpsampleNearest2D: 1, AddTensor: 1},
+        )

From 501d6415437eae895531d3783bf622f6ccb56f40 Mon Sep 17 00:00:00 2001
From: Erik Lundell <erik.lundell@arm.com>
Date: Fri, 29 May 2026 09:38:52 +0200
Subject: [PATCH 072/103] Arm backend: Fix bug causing empty partition reports
 (#19842)

logger.level was used to determine whether to
add the partition_report.txt FileHandler to the logger. This value is
not est by logging.setBasicConfig,
and defaults to 0. This caused empty reports to be output when
intermediate path was set and logging was > info

Instead, use .getEffectiveLevel()

cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils
@Sebastian-Larsson @robell @rascani

Signed-off-by: Erik Lundell <erik.lundell@arm.com>
---
 backends/arm/tosa/partitioner.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/backends/arm/tosa/partitioner.py b/backends/arm/tosa/partitioner.py
index d93e212c314..37b9cd7cc2a 100644
--- a/backends/arm/tosa/partitioner.py
+++ b/backends/arm/tosa/partitioner.py
@@ -550,7 +550,10 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
         partition_tags = {tag: self.delegation_spec for tag in tags}
 
         tag_constant_data(exported_program)
-        if self.intermediate_path is not None and logger.level <= logging.INFO:
+        if (
+            self.intermediate_path is not None
+            and logger.getEffectiveLevel() <= logging.INFO
+        ):
             intermediate_path = Path(self.intermediate_path)
             intermediate_path.mkdir(parents=True, exist_ok=True)
             file_handler = logging.FileHandler(

From ea37954cd7eeec168608010f8faaaa6c9ccfa6bc Mon Sep 17 00:00:00 2001
From: Tom Allsop <72802373+tom-arm@users.noreply.github.com>
Date: Fri, 29 May 2026 09:58:02 +0100
Subject: [PATCH 073/103] Arm backend: Add BF16 layer tests for Qwen (#19767)

* Add layers that run in BF16 in the HF model

Change-Id: If75434db138059f3a433a70abda3f3e26f6dd3b6

cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils
@Sebastian-Larsson @robell @rascani

---------

Signed-off-by: Tom Allsop <tom.allsop@arm.com>
---
 .../models/Qwen3_VL/test_qwen3_vl_layers.py   | 48 ++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/backends/arm/test/models/Qwen3_VL/test_qwen3_vl_layers.py b/backends/arm/test/models/Qwen3_VL/test_qwen3_vl_layers.py
index 77b2739167a..f1ffe35b14e 100644
--- a/backends/arm/test/models/Qwen3_VL/test_qwen3_vl_layers.py
+++ b/backends/arm/test/models/Qwen3_VL/test_qwen3_vl_layers.py
@@ -33,7 +33,7 @@
     Qwen3VLVisionRotaryEmbedding,
 )
 
-input_t = Tuple[torch.Tensor, ...]
+input_t = Tuple[torch.Tensor | int, ...]
 
 
 def _make_qwen3_vl_2b_instruct_layer_config():
@@ -99,6 +99,19 @@ def prepare_model_and_inputs(cls):
         raise NotImplementedError
 
 
+def _to_bfloat16(
+    model: torch.nn.Module, inputs: input_t
+) -> tuple[torch.nn.Module, input_t]:
+    return model.to(torch.bfloat16), tuple(
+        (
+            x.to(torch.bfloat16)
+            if isinstance(x, torch.Tensor) and x.is_floating_point()
+            else x
+        )
+        for x in inputs
+    )
+
+
 class Qwen3VLVisionMLPModel(Qwen3VLTestModule):
     def __init__(self, config) -> None:
         super().__init__()
@@ -442,6 +455,18 @@ class Qwen3VLTestCase:
 
 VGF_NO_QUANT_TEST_CASES: dict[str, Qwen3VLTestCase] = TOSA_FP_TEST_CASES
 
+TOSA_BF16_TEST_CASES: dict[str, Qwen3VLTestCase] = {
+    "vision_mlp": TOSA_FP_TEST_CASES["vision_mlp"],
+    "vision_patch_embed": TOSA_FP_TEST_CASES["vision_patch_embed"],
+    "vision_rotary_embedding": TOSA_FP_TEST_CASES["vision_rotary_embedding"],
+    "vision_rotary_apply": TOSA_FP_TEST_CASES["vision_rotary_apply"],
+    "vision_attention": TOSA_FP_TEST_CASES["vision_attention"],
+    "vision_block": TOSA_FP_TEST_CASES["vision_block"],
+    "vision_patch_merger": TOSA_FP_TEST_CASES["vision_patch_merger"],
+    "text_rms_norm": TOSA_FP_TEST_CASES["text_rms_norm"],
+    "qk_norm": TOSA_FP_TEST_CASES["qk_norm"],
+}
+
 
 @common.parametrize(
     "test_case",
@@ -460,6 +485,27 @@ def test_qwen3_vl_tosa_FP(test_case: Qwen3VLTestCase):
         pipeline.run()
 
 
+@common.parametrize(
+    "test_case",
+    TOSA_BF16_TEST_CASES,
+)
+def test_qwen3_vl_tosa_FP_bf16(test_case: Qwen3VLTestCase):
+    model, inputs = test_case.model_cls.prepare_model_and_inputs()
+    model, inputs = _to_bfloat16(model, inputs)
+    with torch.no_grad():
+        pipeline = TosaPipelineFP[input_t](
+            model,
+            inputs,
+            aten_op=[],
+            exir_op=[],
+            transform_passes=list(test_case.transform_passes),
+            tosa_extensions=["bf16"],
+            atol=1e-2,
+            rtol=1e-2,
+        )
+        pipeline.run()
+
+
 @common.SkipIfNoModelConverter
 @common.parametrize(
     "test_case",

From f6be9851aa90b373a212d4eab24614d561c44c43 Mon Sep 17 00:00:00 2001
From: Xingguo Li <100689130+xingguo01@users.noreply.github.com>
Date: Fri, 29 May 2026 10:01:03 +0100
Subject: [PATCH 074/103] LLM support: improve VGF export and calibration
 pipeline (#19157)

This is stacked on top of
https://github.com/pytorch/executorch/pull/19029
- make non-KV-cache example inputs match the static export window
- fix PT2E calibration flow for padded prefixes
  and optional LM-Eval tasks
- update SmolLM2 export settings used by the VGF PT2E workflow
- Fix rope_theta in 135M_config.json to align with Hugging face
  model config

cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils
@Sebastian-Larsson @robell @rascani

Signed-off-by: Xingguo Li <xingguo.li@arm.com>
Co-authored-by: Zingo Andersen <zingo.andersen@arm.com>
---
 examples/models/llama/eval_llama_lib.py      |  94 +++++++++----
 examples/models/llama/evaluate/eager_eval.py |   8 +-
 examples/models/llama/model.py               |  23 +++-
 extension/llm/export/builder.py              | 131 +++++++++++++------
 4 files changed, 183 insertions(+), 73 deletions(-)

diff --git a/examples/models/llama/eval_llama_lib.py b/examples/models/llama/eval_llama_lib.py
index 23d00ff8c15..b562a2b3c70 100644
--- a/examples/models/llama/eval_llama_lib.py
+++ b/examples/models/llama/eval_llama_lib.py
@@ -1,5 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -46,9 +47,13 @@ def __init__(
         use_kv_cache: bool = False,
         generate_full_logits: bool = False,
         enable_dynamic_shape: bool = True,
+        device: Optional[str] = None,
     ):
         super().__init__(
-            model=model, tokenizer=tokenizer, max_seq_length=max_seq_length
+            model=model,
+            tokenizer=tokenizer,
+            max_seq_length=max_seq_length,
+            device=device,
         )
         self._model = model.to(self.device)
         self._use_kv_cache = use_kv_cache
@@ -57,30 +62,70 @@ def __init__(
 
     def _model_call(self, inps):
         if self._use_kv_cache:
-            if not self._enable_dynamic_shape:
-                # graph module exported without dynamic shape won't work with a different shape.
-                # And we have to do single token prefill here.
-                result_logits = []
-                for pos in range(inps.shape[-1]):
-                    pos_tensor = torch.tensor([pos], dtype=torch.int64)
-                    logits = self._model(
-                        inps[:, pos : pos + 1], {"input_pos": pos_tensor}
-                    )
-                    result_logits.append(logits)
-                if self._generate_full_logits:
-                    return torch.cat(result_logits, dim=1)
-                else:
-                    return torch.stack(result_logits, dim=1)
-            else:
-                pos_tensor = torch.tensor([0], dtype=torch.int64, device=self.device)
-                # Batch process the whole sequence.
-                logits = self._model(
-                    inps[:, : self._max_seq_length], {"input_pos": pos_tensor}
-                )
-                return logits
+            return self._model_call_kv_cache(inps)
+        return self._model_call_no_kv_cache(inps)
 
-        else:
-            return self._model(inps)
+    def _model_call_kv_cache(self, inps):
+        if self._enable_dynamic_shape:
+            pos_tensor = torch.tensor([0], dtype=torch.int64, device=self.device)
+            return self._model(
+                inps[:, : self._max_seq_length], {"input_pos": pos_tensor}
+            )
+
+        # graph module exported without dynamic shape won't work with a different shape.
+        # And we have to do single token prefill here.
+        result_logits = []
+        for pos in range(inps.shape[-1]):
+            pos_tensor = torch.tensor([pos], dtype=torch.int64)
+            logits = self._model(inps[:, pos : pos + 1], {"input_pos": pos_tensor})
+            result_logits.append(logits)
+        if self._generate_full_logits:
+            return torch.cat(result_logits, dim=1)
+        return torch.stack(result_logits, dim=1)
+
+    def _model_call_no_kv_cache(self, inps):
+        # lm-eval expects logits shaped [batch, seq, vocab]. In the non-KV path,
+        # some exported graphs (when generate_full_logits=False) return only
+        # last-position logits [batch, vocab], so reconstruct per-position
+        # logits by running prefix calls.
+        if not self._enable_dynamic_shape and not self._generate_full_logits:
+            raise ValueError(
+                "Static non-KV lm-eval requires generate_full_logits=True "
+                "so logits can be read from the last non-pad token."
+            )
+
+        if self._generate_full_logits:
+            return self._model(self._pad_to_max_len(inps))
+
+        result_logits = []
+        seq_len = inps.shape[-1]
+        for pos in range(min(seq_len, self._max_seq_length)):
+            prefix = self._pad_to_max_len(inps[:, : pos + 1])
+            logits = self._model(prefix)
+            if logits.dim() == 3:
+                logits = logits[:, -1, :]
+            result_logits.append(logits)
+
+        return torch.stack(result_logits, dim=1)
+
+    def _pad_to_max_len(self, tokens: torch.Tensor) -> torch.Tensor:
+        if self._enable_dynamic_shape:
+            return tokens
+        token_len = tokens.shape[-1]
+        if token_len > self._max_seq_length:
+            return tokens[:, : self._max_seq_length]
+        if token_len == self._max_seq_length:
+            return tokens
+
+        pad_len = self._max_seq_length - token_len
+        pad_token = getattr(self._tokenizer, "pad_id", self._tokenizer.eos_id)
+        pad = torch.full(
+            (tokens.shape[0], pad_len),
+            pad_token,
+            dtype=tokens.dtype,
+            device=tokens.device,
+        )
+        return torch.cat((tokens, pad), dim=-1)
 
     def _model_generate(self, context, max_length, eos_token_id):
         raise Exception("unimplemented")
@@ -219,6 +264,7 @@ def gen_eval_wrapper(
             tokenizer=tokenizer,
             max_seq_length=llm_config.export.max_seq_length,
             use_kv_cache=llm_config.model.use_kv_cache,
+            generate_full_logits=llm_config.debug.generate_full_logits,
             enable_dynamic_shape=llm_config.model.enable_dynamic_shape,
         )
     else:
diff --git a/examples/models/llama/evaluate/eager_eval.py b/examples/models/llama/evaluate/eager_eval.py
index 9d5d7ad447b..5c129e1c250 100644
--- a/examples/models/llama/evaluate/eager_eval.py
+++ b/examples/models/llama/evaluate/eager_eval.py
@@ -1,5 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -28,12 +29,13 @@ def __init__(
         tokenizer: Union[SentencePieceTokenizer, Tiktoken, HuggingFaceTokenizer],
         max_seq_length: Optional[int] = None,
         use_kv_cache: bool = False,
+        device: Optional[str] = None,
     ):
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        super().__init__(device=device, pretrained="gpt2")
+        resolved_device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        super().__init__(device=resolved_device, pretrained="gpt2")
         self._model = model
         self._tokenizer = tokenizer
-        self._device = torch.device(device)
+        self._device = torch.device(resolved_device)
         self._max_seq_length = 2048 if max_seq_length is None else max_seq_length
         self._use_kv_cache = use_kv_cache
 
diff --git a/examples/models/llama/model.py b/examples/models/llama/model.py
index f02621b66b2..8ae146dda0f 100644
--- a/examples/models/llama/model.py
+++ b/examples/models/llama/model.py
@@ -1,5 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -285,11 +286,25 @@ def get_example_inputs(self):
         if self.use_kv_cache:
             return self.get_example_inputs_kvcache_sdpa()
         else:
-            return (
-                torch.tensor(
-                    [[1, 2, 3]], dtype=torch.long
-                ),  # tokens, with kv cache our input token length is always just 1 token.
+            max_seq_len = getattr(self.llm_config.export, "max_seq_length", 3)
+            # Preserve the historical three-token example input as the minimum.
+            max_seq_len = max(3, int(max_seq_len))
+            max_len = max_seq_len - 1 if self.enable_dynamic_shape else max_seq_len
+            backend = self.llm_config.backend
+            token_dtype = (
+                torch.int32
+                if (
+                    backend.ethosu.enabled
+                    or backend.tosa.enabled
+                    or backend.vgf.enabled
+                )
+                else torch.long
             )
+            example_tokens = torch.arange(max_len, dtype=token_dtype).unsqueeze(0)
+            vocab_size = int(getattr(self.model_.params, "vocab_size", 0))
+            if vocab_size > 1:
+                example_tokens = example_tokens % (vocab_size - 1) + 1
+            return (example_tokens,)
 
     # assumption is the custom op doesnt support dynamic shape right now. It might but its untested so lets first get static shape working
     def get_example_inputs_kvcache_sdpa(self):
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
index c25c1190990..5928e40dc4d 100644
--- a/extension/llm/export/builder.py
+++ b/extension/llm/export/builder.py
@@ -256,6 +256,35 @@ def run_canonical_optimizations(self):
             assert res.graph_module is not None, "Pass returned None"
             self.pre_autograd_graph_module = res.graph_module
 
+    def _check_calibration_prefix_options(self) -> None:
+        if (
+            not self.use_kv_cache
+            and not self.enable_dynamic_shape
+            and not self.generate_full_logits
+        ):
+            raise ValueError(
+                "Static non-KV calibration with padded prefixes requires "
+                "generate_full_logits so calibration can sample the last "
+                "non-pad token position."
+            )
+
+    def _prepare_calibration_prefix(
+        self, token_list: List[int], pos: int, max_len: int, pad_token: int
+    ) -> Tuple[torch.Tensor, int]:
+        prefix_tokens = list(token_list[: pos + 1])
+        logits_token_pos = min(len(prefix_tokens), max_len) - 1
+
+        if self.enable_dynamic_shape:
+            prefix_tokens = prefix_tokens[:max_len]
+        elif len(prefix_tokens) < max_len:
+            prefix_tokens.extend([pad_token] * (max_len - len(prefix_tokens)))
+        else:
+            prefix_tokens = prefix_tokens[:max_len]
+
+        input_dtype = self.example_inputs[0].dtype
+        prefix = torch.tensor(prefix_tokens, dtype=input_dtype).unsqueeze(0)
+        return prefix, logits_token_pos
+
     def pt2e_calibrate(
         self,
         prepared_module,
@@ -266,39 +295,41 @@ def pt2e_calibrate(
         tokenizer_path,
     ):
         logging.info("Run calibration...")
-        try:
-            from executorch.examples.models.llama.eval_llama_lib import (
-                GraphModuleEvalWrapper,
-            )
-            from lm_eval.evaluator import simple_evaluate
-        except ImportError:
-            raise ImportError(
-                "Please install the llm eval dependency via examples/models/llama/install_requirements.sh"
-            )
-
+        self._check_calibration_prefix_options()
         tokenizer = get_tokenizer(tokenizer_path)
 
         def calibrate_template(
             module: torch.fx.GraphModule, tokenizer, prompts: str, max_len: int
         ):
             # TODO: change criteria & support batch inputs if necessary
-            pos = torch.tensor(0, dtype=torch.int64)
+            pos = 0
             token_list = tokenizer.encode(prompts, bos=True, eos=False)
 
+            pad_token = getattr(tokenizer, "pad_id", tokenizer.eos_id)
+
             with torch.no_grad():
                 while token_list[-1] != tokenizer.eos_id and pos < max_len:
-                    logits = module(
-                        torch.full((1, 1), token_list[pos]),
-                        {"input_pos": torch.tensor((pos,))},
-                    )
+                    logits_token_pos = -1
+                    if self.use_kv_cache:
+                        logits = module(
+                            torch.full((1, 1), token_list[pos]),
+                            {"input_pos": torch.tensor((pos,))},
+                        )
+                    else:
+                        prefix, logits_token_pos = self._prepare_calibration_prefix(
+                            token_list, pos, max_len, pad_token
+                        )
+                        logits = module(prefix)
+
                     pos += 1
                     if pos >= len(token_list):
                         if self.generate_full_logits:
-                            token_list.append(
-                                torch.argmax(logits[:, -1], dim=-1).item()
-                            )
+                            next_token = torch.argmax(
+                                logits[:, logits_token_pos], dim=-1
+                            ).item()
                         else:
-                            token_list.append(torch.argmax(logits[:], dim=-1).item())
+                            next_token = torch.argmax(logits[:], dim=-1).item()
+                        token_list.append(next_token)
 
         calibrate_template(
             module=prepared_module,
@@ -307,26 +338,41 @@ def calibrate_template(
             max_len=calibration_seq_length,
         )
 
-        eval_wrapper = GraphModuleEvalWrapper(
-            model=prepared_module,
-            tokenizer=tokenizer,
-            max_seq_length=calibration_seq_length,
-            use_kv_cache=self.use_kv_cache,
-            generate_full_logits=self.generate_full_logits,
-            enable_dynamic_shape=self.enable_dynamic_shape,
-        )
+        if calibration_tasks:
+            try:
+                from executorch.examples.models.llama.eval_llama_lib import (
+                    GraphModuleEvalWrapper,
+                )
+                from lm_eval.evaluator import simple_evaluate
+            except ImportError:
+                raise ImportError(
+                    "Please install the llm eval dependency via examples/models/llama/install_requirements.sh"
+                )
 
-        # Evaluate the model
-        with torch.no_grad():
-            eval_results = simple_evaluate(
-                model=eval_wrapper,
-                tasks=calibration_tasks,
-                limit=calibration_limit,
+            eval_wrapper = GraphModuleEvalWrapper(
+                model=prepared_module,
+                tokenizer=tokenizer,
+                max_seq_length=calibration_seq_length,
+                use_kv_cache=self.use_kv_cache,
+                generate_full_logits=self.generate_full_logits,
+                enable_dynamic_shape=self.enable_dynamic_shape,
+                # The exported graph can contain ops like aten.full.default
+                # without explicit device, which default to CPU and can
+                # trigger device-mismatch errors when lm_eval runs on CUDA.
+                # Calibrate on CPU for stability.
+                device="cpu",
             )
 
-        for task, res in eval_results["results"].items():
-            print(f"{task}: {res}")
-        logging.info("Calibration finish...")
+            with torch.no_grad():
+                eval_results = simple_evaluate(
+                    model=eval_wrapper,
+                    tasks=calibration_tasks,
+                    limit=calibration_limit,
+                )
+
+            for task, res in eval_results["results"].items():
+                print(f"{task}: {res}")
+            logging.info("Calibration finish...")
 
     def pt2e_quantize(self, quantizers: Optional[List[Quantizer]]) -> "LLMEdgeManager":
         """
@@ -351,18 +397,19 @@ def pt2e_quantize(self, quantizers: Optional[List[Quantizer]]) -> "LLMEdgeManage
                 assert (
                     self.pre_autograd_graph_module is not None
                 ), "Please run export() first"
+                if self.calibration_tasks and self.calibration_limit is None:
+                    logging.warning(
+                        "calibration_tasks provided without calibration_limit; "
+                        "lm-eval will run the full task dataset during "
+                        "calibration."
+                    )
                 m = prepare_pt2e(
                     self.pre_autograd_graph_module,  # pyre-ignore[6]
                     composed_quantizer,
                 )
-                logging.info(
-                    f"Calibrating with tasks: {self.calibration_tasks}, limit: {self.calibration_limit}, calibration_data: {self.calibration_data}, tokenizer_path: {self.tokenizer_path}, seq_length: {self.calibration_seq_length}"
-                )
                 # Calibrate
                 if (
-                    self.calibration_tasks is not None
-                    and self.calibration_limit is not None
-                    and self.calibration_seq_length is not None
+                    self.calibration_seq_length is not None
                     and self.calibration_data is not None
                     and self.tokenizer_path is not None
                 ):

From 1494535ba2d391c274a225dd03b2d81c429944c8 Mon Sep 17 00:00:00 2001
From: Michiel Olieslagers
 <44864547+Michiel-Olieslagers@users.noreply.github.com>
Date: Fri, 29 May 2026 10:03:49 +0100
Subject: [PATCH 075/103] Arm backend: Fix VKML install bug for macOS. (#19612)

Change-Id: Id97fcb787369b62aecd4a0be27132ff4a0785fcf

cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils
@Sebastian-Larsson @robell @rascani

Signed-off-by: Michiel Olieslagers <michiel.olieslagers@arm.com>
---
 backends/arm/scripts/vulkan_utils.sh | 31 +++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/backends/arm/scripts/vulkan_utils.sh b/backends/arm/scripts/vulkan_utils.sh
index c8b169c0c3d..520c244c6fb 100644
--- a/backends/arm/scripts/vulkan_utils.sh
+++ b/backends/arm/scripts/vulkan_utils.sh
@@ -71,6 +71,9 @@ function install_vulkan_sdk_macos() {
     fi
 
     log_step "vulkan" "Extracting Vulkan SDK installer"
+    rm -rf \
+        "vulkansdk-macOS-${vulkan_sdk_version}.app" \
+        "vulkansdk-macos-${vulkan_sdk_version}.app"
     unzip -q -o "${vulkan_sdk_zip_file}"
 
     local vulkan_sdk_app_path=""
@@ -91,15 +94,33 @@ function install_vulkan_sdk_macos() {
 
     local install_root="$(cd "${root_dir}" && pwd)/${vulkan_sdk_base_dir}/${vulkan_sdk_version}"
     mkdir -p "${install_root}"
-    local vulkan_sdk_root="${root_dir}/${vulkan_sdk_base_dir}"
 
     log_step "vulkan" "Installing Vulkan SDK (${vulkan_sdk_version}) to ${install_root}"
-    ${vulkan_sdk_installer} --root "${install_root}" --accept-licenses --default-answer --confirm-command install
+    "${vulkan_sdk_installer}" --root "${install_root}" --accept-licenses --default-answer --confirm-command install
+}
+
+function validate_vulkan_sdk_installation() {
+    if [[ ! -d "${root_dir}/${vulkan_sdk_bin_dir}" ]]; then
+        return 1
+    fi
+
+    vulkan_sdk_bin_path="$(cd "${root_dir}/${vulkan_sdk_bin_dir}" && pwd)"
+    if [[ ! -x "${vulkan_sdk_bin_path}/glslc" ]]; then
+        return 1
+    fi
+
+    "${vulkan_sdk_bin_path}/glslc" --version > /dev/null 2>&1
 }
 
 function setup_vulkan_sdk() {
     cd "${root_dir}"
 
+    if validate_vulkan_sdk_installation; then
+        log_step "vulkan" "Reusing Vulkan SDK at ${root_dir}/${vulkan_sdk_base_dir}/${vulkan_sdk_version}"
+        log_step "vulkan" "Vulkan SDK validation (glslc) succeeded"
+        return
+    fi
+
     if [[ "${os_name}" == "Darwin" ]]; then
         install_vulkan_sdk_macos
     else
@@ -117,11 +138,11 @@ function setup_vulkan_sdk() {
         exit 1
     fi
 
-    if ${vulkan_sdk_bin_path}/glslc --version > /dev/null 2>&1; then
+    if "${vulkan_sdk_bin_path}/glslc" --version > /dev/null 2>&1; then
         log_step "vulkan" "Vulkan SDK validation (glslc) succeeded"
     else
         log_step "vulkan" "Error: Vulkan SDK validation failed"
-        ${vulkan_sdk_bin_path}/glslc --version
+        "${vulkan_sdk_bin_path}/glslc" --version
         exit 1
     fi
 }
@@ -143,7 +164,7 @@ function setup_path_vulkan() {
     vulkan_sdk_arch_root="$(cd "${vulkan_sdk_arch_root}" && pwd)"
     vulkan_sdk_bin_path="$(cd "${vulkan_sdk_bin_dir}" && pwd)"
 
-    append_env_in_setup_path PATH ${vulkan_sdk_bin_path}
+    append_env_in_setup_path PATH "${vulkan_sdk_bin_path}"
     if [[ "${OS:-}" == "Darwin" ]]; then
         prepend_env_in_setup_path DYLD_LIBRARY_PATH "${vulkan_sdk_arch_root}/lib"
         local moltenvk_icd_path="${vulkan_sdk_arch_root}/share/vulkan/icd.d/MoltenVK_icd.json"

From 513a4eaef4411325ae537beb44fe33eaf75205c3 Mon Sep 17 00:00:00 2001
From: Yufeng Shi <yufeng.shi@arm.com>
Date: Fri, 29 May 2026 10:05:33 +0100
Subject: [PATCH 076/103] Arm backend: Avoid running passes with no matching
 target ops (#19839)

Add ArmPass.should_run_pass() as a reusable early-exit hook before
  call() starts the normal ExportPass retracing path. The default hook
  returns true, preserving existing behavior for ArmPass subclasses.

  Introduce ArmOpTargetedPass for passes that only transform a known
  set of operator targets. It implements should_run_pass() by scanning
  the current graph and nested GraphModules for matching target
  operators. If no matching target operator is found, the pass returns
  an unmodified PassResult.

  For passes that already gate transformations with
  allowed_to_transform(), allow the target pre-scan to apply the same
  check before deciding whether the pass needs to run. This avoids
  running TFA passes when all matching target nodes are marked as
  disallowed.

  The should_run_pass() hook and ArmOpTargetedPass pre-scan avoid
  rebuilding graphs for decomposition and rewrite passes that cannot
  affect the current graph. The speedup is most visible on large models.

  Single-run paired benchmarks on Arm backend model tests
  across FP32, INT, VGF no-quant, and VGF quant variants:

  | Model       | E2E avg | Pass-manager avg |
  |-------------|--------:|-----------------:|
  | T5-small    | +30.5%  | +47.5%           |
  | DeepLabV3   | +12.9%  | +49.8%           |
  | Wav2Letter  | +16.9%  | +51.2%           |
  | InceptionV3 | +22.2%  | +46.5%           |
  | MobileNetV2 | +22.2%  | +52.5%           |
  | MobileNetV3 | +29.9%  | +54.6%           |

  Model rows are unweighted averages over successful variants.
  Unweighted average across 23 successful model/target variants:
  E2E speedup: +22.4%
  Pass-manager speedup: +50.5%

Change-Id: Iaa09638473a1d6d1e2ce98f5a0e3fc3a14378143


cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils
@Sebastian-Larsson @robell @rascani

Signed-off-by: Yufeng Shi <yufeng.shi@arm.com>
Co-authored-by: Erik Lundell <erik.lundell@arm.com>
---
 backends/arm/_passes/__init__.py              |   2 +-
 .../arm/_passes/accumulate_index_put_pass.py  |   8 +-
 backends/arm/_passes/arm_pass.py              |  99 +++++++++++-
 .../arm/_passes/canonicalize_gather_pass.py   |   8 +-
 backends/arm/_passes/conv1d_unsqueeze_pass.py |   7 +-
 .../_passes/convert_expand_copy_to_repeat.py  |   7 +-
 .../_passes/convert_full_like_to_full_pass.py |   9 +-
 .../convert_permute_singleton_to_view_pass.py |   7 +-
 .../arm/_passes/convert_squeezes_to_view.py   |  13 +-
 backends/arm/_passes/convert_to_clamp_pass.py |  10 +-
 backends/arm/_passes/decompose_acosh_pass.py  |   7 +-
 .../decompose_adaptive_avg_pool2d_pass.py     |   8 +-
 .../_passes/decompose_add_sub_alpha_pass.py   |   7 +-
 backends/arm/_passes/decompose_addmm_pass.py  |   7 +-
 .../_passes/decompose_as_strided_copy_pass.py |   7 +-
 .../_passes/decompose_asin_and_acos_pass.py   |   7 +-
 backends/arm/_passes/decompose_asinh_pass.py  |   7 +-
 backends/arm/_passes/decompose_atan_pass.py   |   7 +-
 backends/arm/_passes/decompose_atanh_pass.py  |   7 +-
 .../arm/_passes/decompose_avg_pool2d_pass.py  |  10 +-
 backends/arm/_passes/decompose_cosh_pass.py   |   7 +-
 .../decompose_cosine_similarity_pass.py       |   8 +-
 backends/arm/_passes/decompose_div_pass.py    |   9 +-
 .../arm/_passes/decompose_div_tensor_mode.py  |  10 +-
 backends/arm/_passes/decompose_elu_pass.py    |  13 +-
 backends/arm/_passes/decompose_erfinv_pass.py |   7 +-
 backends/arm/_passes/decompose_expm1_pass.py  |   7 +-
 .../_passes/decompose_floor_divide_pass.py    |   7 +-
 backends/arm/_passes/decompose_gelu_pass.py   |   7 +-
 backends/arm/_passes/decompose_glu_pass.py    |   7 +-
 .../_passes/decompose_grouped_conv_pass.py    |   9 +-
 .../decompose_index_select_to_gather_pass.py  |   8 +-
 .../decompose_index_tensor_to_gather_pass.py  |   8 +-
 .../arm/_passes/decompose_int_pow_pass.py     |   7 +-
 .../arm/_passes/decompose_leaky_relu_pass.py  |   8 +-
 .../decompose_linalg_vector_norm_pass.py      |  10 +-
 backends/arm/_passes/decompose_log1p_pass.py  |   7 +-
 backends/arm/_passes/decompose_logit_pass.py  |  10 +-
 .../arm/_passes/decompose_masked_fill_pass.py |   7 +-
 .../decompose_maxpool2d_with_dilation_pass.py |   7 +-
 .../arm/_passes/decompose_meandim_pass.py     |  18 ++-
 backends/arm/_passes/decompose_ne_pass.py     |   7 +-
 .../_passes/decompose_permute_for_u55_pass.py |   7 +-
 .../arm/_passes/decompose_remainder_pass.py   |  13 +-
 backends/arm/_passes/decompose_round_pass.py  |  10 +-
 .../_passes/decompose_select_scatter_pass.py  |   7 +-
 backends/arm/_passes/decompose_sign_pass.py   |   7 +-
 backends/arm/_passes/decompose_sinh_pass.py   |   7 +-
 .../_passes/decompose_slice_scatter_pass.py   |   7 +-
 .../arm/_passes/decompose_softmax_pass.py     |   9 +-
 backends/arm/_passes/decompose_sqrt_pass.py   |   9 +-
 .../decompose_strided_slice_copy_pass.py      |   8 +-
 backends/arm/_passes/decompose_sum_pass.py    |  13 +-
 backends/arm/_passes/decompose_tan_pass.py    |   7 +-
 .../decompose_tosa_unsupported_clamp_pass.py  |   7 +-
 backends/arm/_passes/decompose_tril_pass.py   |   9 +-
 .../decompose_unfold_to_gather_pass.py        |  10 +-
 backends/arm/_passes/decompose_var_pass.py    |  16 +-
 .../decompose_where_scalar_other_pass.py      |  12 +-
 .../decorate_fp32_to_int32_casting_pass.py    |   7 +-
 .../_passes/fuse_consecutive_concat_shapes.py |   7 +-
 backends/arm/_passes/insert_const_shapes.py   |   8 +-
 .../_passes/insert_data_layout_casts_pass.py  |   8 +-
 .../arm/_passes/insert_dynamic_padding.py     |  13 +-
 ...malize_index_put_bool_index_tensor_pass.py |   7 +-
 .../normalize_index_put_none_indices_pass.py  |   7 +-
 .../arm/_passes/promote_bool_operands_pass.py |   8 +-
 backends/arm/_passes/remove_noop_pass.py      |  19 +--
 .../arm/_passes/rewrite_avg_pool2d_pass.py    |   8 +-
 .../rewrite_bool_bitwise_to_logical_pass.py   |   7 +-
 ...ewrite_high_rank_singleton_permute_pass.py |   7 +-
 .../arm/_passes/rewrite_index_put_pass.py     |   7 +-
 .../rewrite_inplace_arithmetic_pass.py        |   6 +-
 .../_passes/rewrite_le_lt_to_ge_gt_pass.py    |   6 +-
 .../arm/_passes/rewrite_max_pool2d_pass.py    |   7 +-
 backends/arm/_passes/rewrite_pad.py           |   8 +-
 backends/arm/_passes/rewrite_slice.py         |   7 +-
 .../test/passes/test_arm_op_targeted_pass.py  | 150 ++++++++++++++++++
 78 files changed, 593 insertions(+), 294 deletions(-)
 create mode 100644 backends/arm/test/passes/test_arm_op_targeted_pass.py

diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py
index 20bddf17793..3e881fdb9ef 100644
--- a/backends/arm/_passes/__init__.py
+++ b/backends/arm/_passes/__init__.py
@@ -5,7 +5,7 @@
 
 
 from . import arm_pass_utils  # noqa
-from .arm_pass import ArmPass  # noqa  # usort: skip
+from .arm_pass import ArmOpTargetedPass, ArmPass  # noqa  # usort: skip
 from .accumulate_index_put_pass import AccumulateIndexPutPass  # noqa
 from .broadcast_args_pass import BroadcastArgsPass  # noqa
 from .canonicalize_gather_pass import CanonicalizeGatherPass  # noqa
diff --git a/backends/arm/_passes/accumulate_index_put_pass.py b/backends/arm/_passes/accumulate_index_put_pass.py
index 1194e08e2d8..9aa0457b0c7 100644
--- a/backends/arm/_passes/accumulate_index_put_pass.py
+++ b/backends/arm/_passes/accumulate_index_put_pass.py
@@ -6,7 +6,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.decompose_index_tensor_to_gather_pass import (
     DecomposeIndexTensorToGatherPass,
 )
@@ -32,7 +32,7 @@ def get_ops(op):
     raise RuntimeError(f"Can't get index_put decomposition for op {op}")
 
 
-class AccumulateIndexPutPass(ArmPass):
+class AccumulateIndexPutPass(ArmOpTargetedPass):
     """This pass adjusts the values arg when the accumulate arg is set to true
     for the index_put op.
     """
@@ -41,9 +41,11 @@ class AccumulateIndexPutPass(ArmPass):
         DecomposeIndexTensorToGatherPass,
         RewriteIndexPutPass,
     }
+    target_ops = aten_ops + edge_ops
+    check_allowed_to_transform = True
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in (aten_ops + edge_ops) or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         source, indices, values = args[:3]
diff --git a/backends/arm/_passes/arm_pass.py b/backends/arm/_passes/arm_pass.py
index add0f3aeb20..1b4fc677d18 100644
--- a/backends/arm/_passes/arm_pass.py
+++ b/backends/arm/_passes/arm_pass.py
@@ -7,6 +7,7 @@
 import copy
 import traceback
 from abc import abstractmethod
+from collections.abc import Collection
 from typing import Any, List, Optional, Set, Type
 
 import torch
@@ -14,7 +15,7 @@
 from executorch.backends.arm.tosa.mapping import TosaSpecialDtype
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, NodeMetadata, ProxyValue
-from torch.fx import GraphModule
+from torch.fx import GraphModule, Node
 from torch.fx.passes.infra.pass_base import PassResult
 from torch.utils import _pytree as pytree
 
@@ -191,3 +192,99 @@ def call_scalar(self, value: int | float, meta: NodeMetadata | dict[str, Any]):
             meta=meta,
             updated=True,
         )
+
+    def should_run_pass(self, graph_module: GraphModule) -> bool:
+        """Return whether this pass should run on the graph module.
+
+        Subclasses can override this to cheaply skip the pass before
+        ``call()`` starts the normal ``ExportPass`` retracing path.
+
+        Args:
+            graph_module (GraphModule): The graph module to inspect.
+
+        Returns:
+            bool: True when the pass should run.
+
+        """
+        return True
+
+    def __call__(self, graph_module: GraphModule) -> PassResult | None:
+        self.requires(graph_module)
+        if not self.should_run_pass(graph_module):
+            self.ensures(graph_module)
+            return PassResult(graph_module, False)
+        res = self.call(graph_module)
+        self.ensures(graph_module)
+        return res
+
+
+class ArmOpTargetedPass(ArmPass):
+    """Base class for passes that only transform selected operators.
+
+    Subclasses set ``target_ops`` to the call_function targets they can
+    transform. If the current graph and nested control-flow subgraphs do not
+    contain any target, the pass returns immediately without paying the default
+    ExportPass retracing cost.
+
+    Set ``check_allowed_to_transform`` to ``True`` when the target pre-scan
+    should also apply ``allowed_to_transform()`` to matching target nodes. This
+    is useful for TFA passes whose ``call_operator()`` leaves disallowed target
+    nodes unchanged. If all matching targets are disallowed, the pass can
+    return before entering the normal ``ExportPass`` path.
+
+    """
+
+    target_ops: Collection[Any] = ()
+    check_allowed_to_transform = False
+
+    def has_target_node(self, graph_module: GraphModule) -> bool:
+        """Return whether the graph module tree contains a target node.
+
+        Args:
+            graph_module (GraphModule): The graph module tree to inspect.
+
+        Returns:
+            bool: True if a matching call_function node is present.
+
+        """
+        visited_graph_modules = set()
+
+        def target_node_can_trigger_pass(node: Node) -> bool:
+            if not self.check_allowed_to_transform:
+                return True
+            if self.allowed_to_transform(node.meta):
+                return True
+            return False
+
+        def graph_has_target(module: GraphModule) -> bool:
+            if id(module) in visited_graph_modules:
+                return False
+            visited_graph_modules.add(id(module))
+
+            for target in self.target_ops:
+                for node in module.graph.find_nodes(
+                    op="call_function",
+                    target=target,
+                    sort=False,
+                ):
+                    if target_node_can_trigger_pass(node):
+                        return True
+
+            return any(
+                isinstance(child, GraphModule) and graph_has_target(child)
+                for child in module.children()
+            )
+
+        return graph_has_target(graph_module)
+
+    def should_run_pass(self, graph_module: GraphModule) -> bool:
+        """Return whether this pass has a target node to transform.
+
+        Args:
+            graph_module (GraphModule): The graph module tree to inspect.
+
+        Returns:
+            bool: True when a matching target node is present.
+
+        """
+        return self.has_target_node(graph_module)
diff --git a/backends/arm/_passes/canonicalize_gather_pass.py b/backends/arm/_passes/canonicalize_gather_pass.py
index 23886111b18..aaa77ce4002 100644
--- a/backends/arm/_passes/canonicalize_gather_pass.py
+++ b/backends/arm/_passes/canonicalize_gather_pass.py
@@ -6,12 +6,12 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
 
-class CanonicalizeGatherPass(ArmPass):
+class CanonicalizeGatherPass(ArmOpTargetedPass):
     """Canonicalize gather so it can be lowered to TOSA.GATHER via the backend
     dialect.
 
@@ -40,10 +40,10 @@ class CanonicalizeGatherPass(ArmPass):
 
     _passes_required_after: Set[Type[ExportPass]] = set()
 
-    _TARGET_OPS = {exir_ops.edge.aten.gather.default}
+    target_ops = {exir_ops.edge.aten.gather.default}
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in self._TARGET_OPS:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         # edge.aten.gather.default: (x, dim, index) with kw-only sparse_grad
diff --git a/backends/arm/_passes/conv1d_unsqueeze_pass.py b/backends/arm/_passes/conv1d_unsqueeze_pass.py
index cf1e884e05b..f81ef33e2d1 100644
--- a/backends/arm/_passes/conv1d_unsqueeze_pass.py
+++ b/backends/arm/_passes/conv1d_unsqueeze_pass.py
@@ -8,7 +8,7 @@
 
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 
 from executorch.backends.arm._passes.rewrite_conv_pass import RewriteConvPass
 from executorch.backends.arm._passes.size_adjust_input_pass import SizeAdjustInputPass
@@ -17,7 +17,7 @@
 from executorch.exir.pass_base import ExportPass
 
 
-class Conv1dUnsqueezePass(ArmPass):
+class Conv1dUnsqueezePass(ArmOpTargetedPass):
     """This pass is used to change conv1d ops into conv2d since TOSA only
     supports 2d and 3d convolution.
 
@@ -34,9 +34,10 @@ class Conv1dUnsqueezePass(ArmPass):
         RewriteConvPass,
         SizeAdjustInputPass,
     }
+    target_ops = (exir_ops.edge.aten.convolution.default,)
 
     def call_operator(self, op, args, kwargs, meta):
-        if op != exir_ops.edge.aten.convolution.default:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
         stride = list(args[3])
         if len(stride) != 1:
diff --git a/backends/arm/_passes/convert_expand_copy_to_repeat.py b/backends/arm/_passes/convert_expand_copy_to_repeat.py
index 69056cb47f4..430dc70bd0c 100644
--- a/backends/arm/_passes/convert_expand_copy_to_repeat.py
+++ b/backends/arm/_passes/convert_expand_copy_to_repeat.py
@@ -9,7 +9,7 @@
 
 import torch
 
-from executorch.backends.arm._passes.arm_pass import ArmPass
+from executorch.backends.arm._passes.arm_pass import ArmOpTargetedPass
 from executorch.backends.arm._passes.unsqueeze_before_repeat_pass import (
     UnsqueezeBeforeRepeatPass,
 )
@@ -51,7 +51,7 @@ def calculate_multiples(args):
     return multiples, expanded_rank != len(input_shape)
 
 
-class ConvertExpandCopyToRepeatPass(ArmPass):
+class ConvertExpandCopyToRepeatPass(ArmOpTargetedPass):
     """Replace expand copy with repeat since it is a repeat that can only repeat
     singleton dimensions.
     """
@@ -60,9 +60,10 @@ class ConvertExpandCopyToRepeatPass(ArmPass):
 
     expand_copy = exir_ops.edge.aten.expand_copy.default
     repeat = exir_ops.edge.aten.repeat.default
+    target_ops = (expand_copy,)
 
     def call_operator(self, op, args, kwargs, meta):
-        if op != self.expand_copy:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         multiples, changes_rank = calculate_multiples(args)
diff --git a/backends/arm/_passes/convert_full_like_to_full_pass.py b/backends/arm/_passes/convert_full_like_to_full_pass.py
index 1e26f24250a..f7a94424228 100644
--- a/backends/arm/_passes/convert_full_like_to_full_pass.py
+++ b/backends/arm/_passes/convert_full_like_to_full_pass.py
@@ -5,7 +5,7 @@
 
 from typing import Set, Type
 
-from executorch.backends.arm._passes.arm_pass import ArmPass
+from executorch.backends.arm._passes.arm_pass import ArmOpTargetedPass
 from executorch.backends.arm._passes.fuse_constant_ops_pass import (
     ComputeConstantOpsAOTPass,
 )
@@ -14,7 +14,7 @@
 from executorch.exir.pass_base import ExportPass
 
 
-class ConvertFullLikeToFullPass(ArmPass):
+class ConvertFullLikeToFullPass(ArmOpTargetedPass):
     """Convert edge aten full_like to full.
 
     As per the full_like PyTorch documentation, `torch.full_like(input,
@@ -35,11 +35,10 @@ class ConvertFullLikeToFullPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = {ComputeConstantOpsAOTPass}
+    target_ops = (exir_ops.edge.aten.full_like.default,)
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in [
-            exir_ops.edge.aten.full_like.default,
-        ]:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         tensor = args[0].data
diff --git a/backends/arm/_passes/convert_permute_singleton_to_view_pass.py b/backends/arm/_passes/convert_permute_singleton_to_view_pass.py
index 7447cf037bc..0ed5f92f91d 100644
--- a/backends/arm/_passes/convert_permute_singleton_to_view_pass.py
+++ b/backends/arm/_passes/convert_permute_singleton_to_view_pass.py
@@ -6,7 +6,7 @@
 
 from typing import Sequence, Set, Tuple, Type
 
-from executorch.backends.arm._passes.arm_pass import ArmPass
+from executorch.backends.arm._passes.arm_pass import ArmOpTargetedPass
 
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
@@ -20,7 +20,7 @@
 )
 
 
-class ConvertPermuteSingletonToViewPass(ArmPass):
+class ConvertPermuteSingletonToViewPass(ArmOpTargetedPass):
     """Replace permutations that only move singleton axes with a reshape.
 
     Examples:
@@ -34,9 +34,10 @@ class ConvertPermuteSingletonToViewPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = _PERMUTE_TARGETS
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in _PERMUTE_TARGETS:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         input_tensor = args[0].data
diff --git a/backends/arm/_passes/convert_squeezes_to_view.py b/backends/arm/_passes/convert_squeezes_to_view.py
index 2058c3407e3..b79e38cdf10 100644
--- a/backends/arm/_passes/convert_squeezes_to_view.py
+++ b/backends/arm/_passes/convert_squeezes_to_view.py
@@ -6,7 +6,7 @@
 
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.fuse_view_copy_transform_pass import (
     FuseViewCopyTransformPass,
 )
@@ -14,7 +14,7 @@
 from executorch.exir.pass_base import ExportPass
 
 
-class ConvertSqueezesToViewPass(ArmPass):
+class ConvertSqueezesToViewPass(ArmOpTargetedPass):
     """Replaces squeeze/unsqueeze operators with view.
 
     These are simply special cases of the view op, so removing them gives us
@@ -23,12 +23,13 @@ class ConvertSqueezesToViewPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = {FuseViewCopyTransformPass}
+    target_ops = (
+        exir_ops.edge.aten.squeeze_copy.dims,
+        exir_ops.edge.aten.unsqueeze_copy.default,
+    )
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in [
-            exir_ops.edge.aten.squeeze_copy.dims,
-            exir_ops.edge.aten.unsqueeze_copy.default,
-        ]:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         x = args[0]
diff --git a/backends/arm/_passes/convert_to_clamp_pass.py b/backends/arm/_passes/convert_to_clamp_pass.py
index effb46f25c4..6273759aa55 100644
--- a/backends/arm/_passes/convert_to_clamp_pass.py
+++ b/backends/arm/_passes/convert_to_clamp_pass.py
@@ -1,11 +1,11 @@
-# Copyright 2025 Arm Limited and/or its affiliates.
+# Copyright 2025-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
 from typing import Set, Tuple, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 
 from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
     QuantizeClampArgumentsPass,
@@ -29,11 +29,13 @@ def get_clamp_params(op, args) -> Tuple[float | None, float | None]:
         raise ValueError(f"Getting clamp parameters for op {op} is not implemented.")
 
 
-class ConvertToClampPass(ArmPass):
+class ConvertToClampPass(ArmOpTargetedPass):
     _passes_required_after: Set[Type[ExportPass]] = {QuantizeClampArgumentsPass}
+    target_ops = edge_operators
+    check_allowed_to_transform = True
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in edge_operators or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         return super().call_operator(
diff --git a/backends/arm/_passes/decompose_acosh_pass.py b/backends/arm/_passes/decompose_acosh_pass.py
index 3ce6d73abc3..3c2cac45e75 100644
--- a/backends/arm/_passes/decompose_acosh_pass.py
+++ b/backends/arm/_passes/decompose_acosh_pass.py
@@ -6,7 +6,7 @@
 
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.decompose_sqrt_pass import DecomposeSqrtPass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass  # noqa
 from executorch.backends.arm._passes.match_arg_dtype_pass import MatchArgDtypePass
@@ -21,7 +21,7 @@
 edge_acosh_op = exir_ops.edge.aten.acosh.default
 
 
-class DecomposeAcoshPass(ArmPass):
+class DecomposeAcoshPass(ArmOpTargetedPass):
     """Decomposes acosh to supported TOSA-operations.
 
     This decomposition is based on the mathematical identity:
@@ -36,10 +36,11 @@ class DecomposeAcoshPass(ArmPass):
         ReplaceScalarWithTensorByProfilePass,
         MatchArgDtypePass,
     }
+    target_ops = (edge_acosh_op,)
 
     def call_operator(self, op, args, kwargs, meta, updated=False):
 
-        if op is not edge_acosh_op:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated)
 
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_adaptive_avg_pool2d_pass.py b/backends/arm/_passes/decompose_adaptive_avg_pool2d_pass.py
index eda9dd28bf9..58fcf69cd8f 100644
--- a/backends/arm/_passes/decompose_adaptive_avg_pool2d_pass.py
+++ b/backends/arm/_passes/decompose_adaptive_avg_pool2d_pass.py
@@ -8,7 +8,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.decompose_avg_pool2d_pass import (
     DecomposeAvgPool2dPass,
 )
@@ -36,7 +36,7 @@ def _get_decomposition(op) -> tuple:
     raise RuntimeError(f"Unable to get decomposition for op {op}")
 
 
-class DecomposeAdaptiveAvgPool2dPass(ArmPass):
+class DecomposeAdaptiveAvgPool2dPass(ArmOpTargetedPass):
     """Decomposes AdaptiveAvgPool2d into AvgPool2d operations.
 
     An input tensor of shape (N, C, H, W) is transformed into an output tensor
@@ -47,9 +47,11 @@ class DecomposeAdaptiveAvgPool2dPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = {DecomposeAvgPool2dPass}
+    target_ops = edge_ops + aten_ops
+    check_allowed_to_transform = True
 
     def call_operator(self, op, args, kwargs, meta, updated=False):
-        if op not in (edge_ops + aten_ops) or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta, updated)
 
         avg_pool2d_op, slice_op, cat_op = _get_decomposition(op)
diff --git a/backends/arm/_passes/decompose_add_sub_alpha_pass.py b/backends/arm/_passes/decompose_add_sub_alpha_pass.py
index d7db9c5bcf9..30903fbd3d8 100644
--- a/backends/arm/_passes/decompose_add_sub_alpha_pass.py
+++ b/backends/arm/_passes/decompose_add_sub_alpha_pass.py
@@ -9,7 +9,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
@@ -55,13 +55,14 @@ def _should_decompose(alpha) -> bool:
     return False
 
 
-class DecomposeAddSubAlphaPass(ArmPass):
+class DecomposeAddSubAlphaPass(ArmOpTargetedPass):
     """Rewrite add/sub with alpha into a mul followed by add/sub."""
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = _ADD_OPS + _SUB_OPS
 
     def call_operator(self, op, args, kwargs, meta, updated: bool | None = False):
-        if op not in _ADD_OPS + _SUB_OPS:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated)
 
         alpha = kwargs.get("alpha", 1)
diff --git a/backends/arm/_passes/decompose_addmm_pass.py b/backends/arm/_passes/decompose_addmm_pass.py
index d1368602d5d..d198e1a3b64 100644
--- a/backends/arm/_passes/decompose_addmm_pass.py
+++ b/backends/arm/_passes/decompose_addmm_pass.py
@@ -7,7 +7,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.match_arg_dtype_pass import MatchArgDtypePass
 from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
 from executorch.backends.arm._passes.mm_to_bmm_pass import ConvertMmToBmmPass  # noqa
@@ -41,7 +41,7 @@ def get_ops(op):
         raise ValueError(f"Unsupported operator: {op}")
 
 
-class DecomposeAddmmPass(ArmPass):
+class DecomposeAddmmPass(ArmOpTargetedPass):
     """Decomposes the addmm operator into tensor multiplication and addition."""
 
     _passes_required_after: Set[Type[ExportPass]] = {
@@ -49,9 +49,10 @@ class DecomposeAddmmPass(ArmPass):
         MatchArgRanksPass,
         MatchArgDtypePass,
     }
+    target_ops = (edge_addmm, aten_addmm)
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in [edge_addmm, aten_addmm] or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         input, mat1, mat2 = args
diff --git a/backends/arm/_passes/decompose_as_strided_copy_pass.py b/backends/arm/_passes/decompose_as_strided_copy_pass.py
index a60d1b19fd9..c8c2a200bd8 100644
--- a/backends/arm/_passes/decompose_as_strided_copy_pass.py
+++ b/backends/arm/_passes/decompose_as_strided_copy_pass.py
@@ -7,7 +7,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm.common.as_strided_utils import (
     contiguous_strides,
     maybe_static_sequence,
@@ -18,7 +18,7 @@
 from executorch.exir.pass_base import ExportPass
 
 
-class DecomposeAsStridedCopyPass(ArmPass):
+class DecomposeAsStridedCopyPass(ArmOpTargetedPass):
     """Replace contiguous `aten.as_strided_copy` with `aten.view_copy`.
 
     The TOSA backend only supports the contiguous-as-strided case where the stride matches
@@ -31,6 +31,7 @@ class DecomposeAsStridedCopyPass(ArmPass):
 
     _EDGE_OPS = (exir_ops.edge.aten.as_strided_copy.default,)
     _ATEN_OPS = (torch.ops.aten.as_strided_copy.default,)
+    target_ops = _EDGE_OPS + _ATEN_OPS
 
     def _extract_args(
         self, args: Tuple[object, ...], kwargs: dict
@@ -76,7 +77,7 @@ def _extract_args(
         return size_tuple, stride_tuple, storage_offset
 
     def call_operator(self, op, args, kwargs, meta, updated: Optional[bool] = False):
-        if op not in (*self._EDGE_OPS, *self._ATEN_OPS):
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated)
 
         extracted = self._extract_args(args, kwargs)
diff --git a/backends/arm/_passes/decompose_asin_and_acos_pass.py b/backends/arm/_passes/decompose_asin_and_acos_pass.py
index 707e6ec070d..5e0cfd66c32 100644
--- a/backends/arm/_passes/decompose_asin_and_acos_pass.py
+++ b/backends/arm/_passes/decompose_asin_and_acos_pass.py
@@ -10,7 +10,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.convert_full_like_to_full_pass import (
     ConvertFullLikeToFullPass,
 )
@@ -48,7 +48,7 @@ def get_decomposition(op) -> tuple:
     raise RuntimeError(f"Can't get decomposition for op {op}")
 
 
-class DecomposeAsinAndAcosPass(ArmPass):
+class DecomposeAsinAndAcosPass(ArmOpTargetedPass):
     """This pass decomposes asin and acos into a rational approximation for
     small values and a transformed rational approximation for large values.
 
@@ -71,6 +71,7 @@ class DecomposeAsinAndAcosPass(ArmPass):
         MatchArgDtypePass,
         ReplaceScalarWithTensorByProfilePass,
     }
+    target_ops = edge_asin_op + edge_acos_op
 
     def _build_polynomial(
         self, coefficients: list[float], variable: torch.Tensor, meta: dict[str, str]
@@ -116,7 +117,7 @@ def _combine_branches(
         )
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in (edge_asin_op + edge_acos_op):
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_asinh_pass.py b/backends/arm/_passes/decompose_asinh_pass.py
index 822b793d203..5f31c5efedc 100644
--- a/backends/arm/_passes/decompose_asinh_pass.py
+++ b/backends/arm/_passes/decompose_asinh_pass.py
@@ -6,7 +6,7 @@
 
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.decompose_sqrt_pass import DecomposeSqrtPass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
 from executorch.backends.arm._passes.match_arg_dtype_pass import MatchArgDtypePass
@@ -21,7 +21,7 @@
 edge_asinh_op = (exir_ops.edge.aten.asinh.default,)
 
 
-class DecomposeAsinhPass(ArmPass):
+class DecomposeAsinhPass(ArmOpTargetedPass):
     """Decomposes asinh to supported TOSA-operations.
 
     This decomposition is based on the mathematical identity:
@@ -36,9 +36,10 @@ class DecomposeAsinhPass(ArmPass):
         ReplaceScalarWithTensorByProfilePass,
         MatchArgDtypePass,
     }
+    target_ops = edge_asinh_op
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in edge_asinh_op:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_atan_pass.py b/backends/arm/_passes/decompose_atan_pass.py
index a7ca90e7b43..cd33504c972 100644
--- a/backends/arm/_passes/decompose_atan_pass.py
+++ b/backends/arm/_passes/decompose_atan_pass.py
@@ -7,7 +7,7 @@
 from math import pi
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
 from executorch.backends.arm._passes.match_arg_dtype_pass import MatchArgDtypePass
 from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
@@ -40,7 +40,7 @@ def _get_atan_ops(op):
     )
 
 
-class DecomposeAtanPass(ArmPass):
+class DecomposeAtanPass(ArmOpTargetedPass):
     """Decomposes the atan operator into a rational (Padé) approximation."""
 
     _passes_required_after: Set[Type[ExportPass]] = {
@@ -49,6 +49,7 @@ class DecomposeAtanPass(ArmPass):
         MatchArgDtypePass,
         ReplaceScalarWithTensorByProfilePass,
     }
+    target_ops = (edge_atan,)
 
     def _rational_approximation(self, z, ops, meta):
         """Creates a (2,1) Padé approximation for atan(x) on [-1, 1]."""
@@ -77,7 +78,7 @@ def _rational_approximation(self, z, ops, meta):
         return super().call_operator(op_mul, (z, prod), {}, meta, updated=True)
 
     def call_operator(self, op, args, kwargs, meta):
-        if op is not edge_atan:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated=False)
 
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_atanh_pass.py b/backends/arm/_passes/decompose_atanh_pass.py
index 014da39d7bd..c542b94f30d 100644
--- a/backends/arm/_passes/decompose_atanh_pass.py
+++ b/backends/arm/_passes/decompose_atanh_pass.py
@@ -5,7 +5,7 @@
 
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
 from executorch.backends.arm._passes.match_arg_dtype_pass import MatchArgDtypePass
 from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
@@ -33,7 +33,7 @@ def _get_atanh_ops(op):
     )
 
 
-class DecomposeAtanhPass(ArmPass):
+class DecomposeAtanhPass(ArmOpTargetedPass):
     """Decomposes the atanh operator into primitive ops.
 
     atanh(x) = 0.5 * log((1 + x) / (1 - x))
@@ -46,9 +46,10 @@ class DecomposeAtanhPass(ArmPass):
         MatchArgDtypePass,
         ReplaceScalarWithTensorByProfilePass,
     }
+    target_ops = (edge_atanh,)
 
     def call_operator(self, op, args, kwargs, meta):
-        if op is not edge_atanh:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated=False)
 
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_avg_pool2d_pass.py b/backends/arm/_passes/decompose_avg_pool2d_pass.py
index 8fcbcd35b5e..eb30a7600d8 100644
--- a/backends/arm/_passes/decompose_avg_pool2d_pass.py
+++ b/backends/arm/_passes/decompose_avg_pool2d_pass.py
@@ -7,7 +7,7 @@
 from typing import Any, Set, Type
 
 import torch
-from executorch.backends.arm._passes.arm_pass import ArmPass
+from executorch.backends.arm._passes.arm_pass import ArmOpTargetedPass
 from executorch.backends.arm._passes.fuse_constant_ops_pass import (
     ComputeConstantOpsAOTPass,
 )
@@ -96,13 +96,13 @@ def _get_avgpool_post_pad(
     return [pad_w, post_w, pad_h, post_h], [0, 0]
 
 
-class DecomposeAvgPool2dPass(ArmPass):
+class DecomposeAvgPool2dPass(ArmOpTargetedPass):
     _passes_required_after: Set[Type[ExportPass]] = {ComputeConstantOpsAOTPass}
+    target_ops = edge_avg_pool2d + aten_avg_pool2d
+    check_allowed_to_transform = True
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in (
-            edge_avg_pool2d + aten_avg_pool2d
-        ) or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         pad_op, avgpool_op, mul_op = get_decomposition(op)
diff --git a/backends/arm/_passes/decompose_cosh_pass.py b/backends/arm/_passes/decompose_cosh_pass.py
index 70d4247d9e0..96c73b6cdf2 100644
--- a/backends/arm/_passes/decompose_cosh_pass.py
+++ b/backends/arm/_passes/decompose_cosh_pass.py
@@ -5,7 +5,7 @@
 
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
 from executorch.backends.arm._passes.match_arg_dtype_pass import MatchArgDtypePass
 from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
@@ -19,7 +19,7 @@
 edge_cosh = exir_ops.edge.aten.cosh.default
 
 
-class DecomposeCoshPass(ArmPass):
+class DecomposeCoshPass(ArmOpTargetedPass):
     """
     This pass replaces the cosh operator with a sequence of TOSA-equivalent operations that
     compute the hyperbolic cosine using the formula:
@@ -34,9 +34,10 @@ class DecomposeCoshPass(ArmPass):
         ReplaceScalarWithTensorByProfilePass,
         MatchArgDtypePass,
     }
+    target_ops = (edge_cosh,)
 
     def call_operator(self, op, args, kwargs, meta, updated=False):
-        if op is not edge_cosh:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated)
 
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_cosine_similarity_pass.py b/backends/arm/_passes/decompose_cosine_similarity_pass.py
index 6ceb50fdf55..b9e11a68174 100644
--- a/backends/arm/_passes/decompose_cosine_similarity_pass.py
+++ b/backends/arm/_passes/decompose_cosine_similarity_pass.py
@@ -6,7 +6,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.convert_full_like_to_full_pass import (
     ConvertFullLikeToFullPass,
 )
@@ -19,7 +19,7 @@
 torch_cosine_similarity = (torch.ops.aten.cosine_similarity.default,)
 
 
-class DecomposeCosineSimilarityPass(ArmPass):
+class DecomposeCosineSimilarityPass(ArmOpTargetedPass):
     """Decomposition of aten.cosine_similarity.
 
     Example:
@@ -42,9 +42,11 @@ class DecomposeCosineSimilarityPass(ArmPass):
         ConvertFullLikeToFullPass,
         InsertTableOpsPass,
     }
+    target_ops = torch_cosine_similarity
+    check_allowed_to_transform = True
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in torch_cosine_similarity or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         x1, x2 = args[0], args[1]
diff --git a/backends/arm/_passes/decompose_div_pass.py b/backends/arm/_passes/decompose_div_pass.py
index 651e58a563c..be4d91cd30c 100644
--- a/backends/arm/_passes/decompose_div_pass.py
+++ b/backends/arm/_passes/decompose_div_pass.py
@@ -8,7 +8,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
@@ -28,7 +28,7 @@ def get_div_decomposition(op) -> tuple:
     raise RuntimeError(f"Can't get div decomposition for op {op}")
 
 
-class DecomposeDivPass(ArmPass):
+class DecomposeDivPass(ArmOpTargetedPass):
     """This pass decomposes div into a mul and a reciprocal node.
 
     Example:
@@ -40,11 +40,10 @@ class DecomposeDivPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = {InsertTableOpsPass}
+    target_ops = edge_div_ops + aten_div_ops
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in (edge_div_ops + aten_div_ops) or not self.allowed_to_transform(
-            meta
-        ):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         reciprocal_op, mul_op = get_div_decomposition(op)
diff --git a/backends/arm/_passes/decompose_div_tensor_mode.py b/backends/arm/_passes/decompose_div_tensor_mode.py
index 774557b816f..cc5440b4e5b 100644
--- a/backends/arm/_passes/decompose_div_tensor_mode.py
+++ b/backends/arm/_passes/decompose_div_tensor_mode.py
@@ -7,7 +7,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes.arm_pass import ArmPass
+from executorch.backends.arm._passes.arm_pass import ArmOpTargetedPass
 from executorch.backends.arm._passes.decompose_div_pass import DecomposeDivPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
@@ -42,7 +42,7 @@ def _get_opset(op):
     raise RuntimeError(f"div.Tensor_mode not supported for op {op}")
 
 
-class DecomposeDivTensorModePass(ArmPass):
+class DecomposeDivTensorModePass(ArmOpTargetedPass):
     """Rewrites aten.div.Tensor_mode into.
 
     Example:
@@ -57,11 +57,11 @@ class DecomposeDivTensorModePass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = {DecomposeDivPass}
+    target_ops = edge_div_mode_ops + aten_div_mode_ops
+    check_allowed_to_transform = True
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in (
-            edge_div_mode_ops + aten_div_mode_ops
-        ) or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         opset = _get_opset(op)
diff --git a/backends/arm/_passes/decompose_elu_pass.py b/backends/arm/_passes/decompose_elu_pass.py
index 548a508d914..5f94968ad79 100644
--- a/backends/arm/_passes/decompose_elu_pass.py
+++ b/backends/arm/_passes/decompose_elu_pass.py
@@ -6,7 +6,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
@@ -71,13 +71,15 @@ def _get_elu_parameters(op, args, kwargs):
     return alpha, scale, input_scale
 
 
-class ConvertEluFamilyToEluPass(ArmPass):
+class ConvertEluFamilyToEluPass(ArmOpTargetedPass):
     """Convert SELU/CELU ops to equivalent parameterized ELU ops."""
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = selu_ops + celu_ops
+    check_allowed_to_transform = True
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in selu_ops + celu_ops or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta, updated=False)
 
         input_ = args[0]
@@ -96,7 +98,7 @@ def call_operator(self, op, args, kwargs, meta):
         )
 
 
-class DecomposeEluPass(ArmPass):
+class DecomposeEluPass(ArmOpTargetedPass):
     """A transformation pass that decomposes unsupported 'aten.elu' operations
     into a combination of supported TOSA-equivalent operations.
 
@@ -119,9 +121,10 @@ class DecomposeEluPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = edge_elu_family_ops
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in edge_elu_family_ops:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated=False)
 
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_erfinv_pass.py b/backends/arm/_passes/decompose_erfinv_pass.py
index 747209d943e..07f874f9d97 100644
--- a/backends/arm/_passes/decompose_erfinv_pass.py
+++ b/backends/arm/_passes/decompose_erfinv_pass.py
@@ -5,7 +5,7 @@
 
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.convert_full_like_to_full_pass import (
     ConvertFullLikeToFullPass,
 )
@@ -48,7 +48,7 @@ def get_erfinv_decomposition(op) -> tuple:
     raise RuntimeError(f"Can't get erfinv decomposition for op {op}")
 
 
-class DecomposeErfinvPass(ArmPass):
+class DecomposeErfinvPass(ArmOpTargetedPass):
     """Decomposes `aten.erfinv` using the same *initial-guess* approximation as
     the PyTorch CPU scalar `calc_erfinv`, with a guarded Newton refinement step
     to improve numerical accuracy (especially for fp16).
@@ -127,9 +127,10 @@ class DecomposeErfinvPass(ArmPass):
         MatchArgDtypePass,
         ReplaceScalarWithTensorByProfilePass,
     }
+    target_ops = edge_erfinv_ops
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in edge_erfinv_ops:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated=False)
 
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_expm1_pass.py b/backends/arm/_passes/decompose_expm1_pass.py
index c1cb0b83166..6898b9fafb2 100644
--- a/backends/arm/_passes/decompose_expm1_pass.py
+++ b/backends/arm/_passes/decompose_expm1_pass.py
@@ -5,7 +5,7 @@
 
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.decompose_div_pass import DecomposeDivPass
 from executorch.backends.arm._passes.decompose_int_pow_pass import DecomposeIntPowPass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
@@ -55,7 +55,7 @@ def _get_expm1_decomposition(op) -> tuple:
     raise RuntimeError(f"Can't get expm1 decomposition for op {op}")
 
 
-class DecomposeExpm1Pass(ArmPass):
+class DecomposeExpm1Pass(ArmOpTargetedPass):
     """A transformation pass that decomposes unsupported 'aten.expm1' operations
     into a combination of supported TOSA-equivalent operations.
 
@@ -87,9 +87,10 @@ class DecomposeExpm1Pass(ArmPass):
         MatchArgDtypePass,
         MatchArgRanksPass,
     }
+    target_ops = edge_expm1_ops
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in edge_expm1_ops:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated=False)
 
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_floor_divide_pass.py b/backends/arm/_passes/decompose_floor_divide_pass.py
index 20e63f48023..d8f451f8af6 100644
--- a/backends/arm/_passes/decompose_floor_divide_pass.py
+++ b/backends/arm/_passes/decompose_floor_divide_pass.py
@@ -6,7 +6,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.decompose_div_tensor_mode import (
     DecomposeDivTensorModePass,
 )
@@ -47,15 +47,16 @@ def get_floor_divide_decomposition(op) -> tuple:
     raise RuntimeError(f"Can't get floor_div decomposition for op {op}")
 
 
-class DecomposeFloorDividePass(ArmPass):
+class DecomposeFloorDividePass(ArmOpTargetedPass):
     """Decomposes aten.floor_divide into aten.div.Tensor_mode with
     rounding_mode="floor".
     """
 
     _passes_required_after: Set[Type[ExportPass]] = {DecomposeDivTensorModePass}
+    target_ops = edge_floor_divide_ops + aten_floor_divide_ops
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in (edge_floor_divide_ops + aten_floor_divide_ops):
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated=False)
 
         (div_op, full_op) = get_floor_divide_decomposition(op)
diff --git a/backends/arm/_passes/decompose_gelu_pass.py b/backends/arm/_passes/decompose_gelu_pass.py
index 7815b5fa44f..85f0b77df21 100644
--- a/backends/arm/_passes/decompose_gelu_pass.py
+++ b/backends/arm/_passes/decompose_gelu_pass.py
@@ -6,7 +6,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.arm_pass_utils import get_node_arg
 from executorch.backends.arm._passes.fuse_constant_ops_pass import (
     ComputeConstantOpsAOTPass,
@@ -42,7 +42,7 @@ def _get_gelu_ops(op) -> tuple:
     raise RuntimeError(f"Can't get GeLU decomposition ops for op {op}")
 
 
-class DecomposeGeluPass(ArmPass):
+class DecomposeGeluPass(ArmOpTargetedPass):
     """This pass decomposes the GELU operator into primitive ops. Aiming to
     adhere closely to the reference implementations built into ExecuTorch.
     Including using the same pre-calculated constants.
@@ -88,9 +88,10 @@ class DecomposeGeluPass(ArmPass):
         MatchArgDtypePass,
         MatchArgRanksPass,
     }
+    target_ops = torch_gelu + edge_gelu
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in torch_gelu + edge_gelu:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
         if self._is_quantized_meta(meta):
             # If quantized, node should be replace by table op
diff --git a/backends/arm/_passes/decompose_glu_pass.py b/backends/arm/_passes/decompose_glu_pass.py
index 68efaedd784..5927174a776 100644
--- a/backends/arm/_passes/decompose_glu_pass.py
+++ b/backends/arm/_passes/decompose_glu_pass.py
@@ -6,7 +6,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
@@ -39,13 +39,14 @@ def get_ops(op):
         raise ValueError(f"Unsupported operator: {op}")
 
 
-class DecomposeGluPass(ArmPass):
+class DecomposeGluPass(ArmOpTargetedPass):
     """Decomposes the GLU operator into hadamard product and sigmoid."""
 
     _passes_required_after: Set[Type[ExportPass]] = {InsertTableOpsPass}
+    target_ops = (edge_glu, aten_glu)
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in [edge_glu, aten_glu] or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         hadamard_prod, sigmoid, slice_op = get_ops(op)
diff --git a/backends/arm/_passes/decompose_grouped_conv_pass.py b/backends/arm/_passes/decompose_grouped_conv_pass.py
index ed0adbe83d7..3fb68bc5aef 100644
--- a/backends/arm/_passes/decompose_grouped_conv_pass.py
+++ b/backends/arm/_passes/decompose_grouped_conv_pass.py
@@ -7,7 +7,7 @@
 from typing import Literal, Protocol, Set, Type, TypeGuard
 
 import torch
-from executorch.backends.arm._passes.arm_pass import ArmPass
+from executorch.backends.arm._passes.arm_pass import ArmOpTargetedPass
 from executorch.backends.arm._passes.conv1d_unsqueeze_pass import Conv1dUnsqueezePass
 from executorch.backends.arm._passes.quant_args import QuantArgs
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -24,7 +24,7 @@ class _PerChannelQuantArgs(Protocol):
     per_channel: Literal[True]
 
 
-class DecomposeGroupedConvPass(ArmPass):
+class DecomposeGroupedConvPass(ArmOpTargetedPass):
     """Splits a grouped convolution which is not supported by TOSA into multiple
     convolutions using slice->conv->cat.
 
@@ -47,6 +47,11 @@ class DecomposeGroupedConvPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = {Conv1dUnsqueezePass}
+    target_ops = (
+        exir_ops.edge.aten.convolution.default,
+        torch.ops.aten.conv_transpose2d.input,
+        torch.ops.aten.conv2d.default,
+    )
 
     @staticmethod
     def _get_decomposition(op):
diff --git a/backends/arm/_passes/decompose_index_select_to_gather_pass.py b/backends/arm/_passes/decompose_index_select_to_gather_pass.py
index 5947e8c5499..be0d4dbb07c 100644
--- a/backends/arm/_passes/decompose_index_select_to_gather_pass.py
+++ b/backends/arm/_passes/decompose_index_select_to_gather_pass.py
@@ -8,7 +8,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.convert_expand_copy_to_repeat import (
     ConvertExpandCopyToRepeatPass,
 )
@@ -38,7 +38,7 @@ def _get_index_select_decomposition(op):
     raise RuntimeError(f"Can't get index_select decomposition for op {op}")
 
 
-class DecomposeIndexSelectToGatherPass(ArmPass):
+class DecomposeIndexSelectToGatherPass(ArmOpTargetedPass):
     """Decompose edge index_select into a single backend TOSA gather.
 
     index_select(x, dim, index) semantics:
@@ -67,12 +67,12 @@ class DecomposeIndexSelectToGatherPass(ArmPass):
         ConvertSqueezesToViewPass,
     }
 
-    _TARGET_OPS = {
+    target_ops = {
         exir_ops.edge.aten.index_select.default,
     }
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in self._TARGET_OPS:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         x, dim, index = args
diff --git a/backends/arm/_passes/decompose_index_tensor_to_gather_pass.py b/backends/arm/_passes/decompose_index_tensor_to_gather_pass.py
index 037c9977fa6..93db9f9d434 100644
--- a/backends/arm/_passes/decompose_index_tensor_to_gather_pass.py
+++ b/backends/arm/_passes/decompose_index_tensor_to_gather_pass.py
@@ -9,7 +9,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.arm_pass_utils import meta_without_qparams
 from executorch.backends.arm._passes.convert_expand_copy_to_repeat import (
     ConvertExpandCopyToRepeatPass,
@@ -75,7 +75,7 @@ def _broadcast_shape(
     return out
 
 
-class DecomposeIndexTensorToGatherPass(ArmPass):
+class DecomposeIndexTensorToGatherPass(ArmOpTargetedPass):
     """Decompose edge.aten.index.Tensor into backend TOSA gather (+ basic
     arith).
 
@@ -165,7 +165,7 @@ class DecomposeIndexTensorToGatherPass(ArmPass):
         ReplaceScalarWithTensorByProfilePass,
     }
 
-    _TARGET_OPS = {
+    target_ops = {
         exir_ops.edge.aten.index.Tensor,
     }
 
@@ -246,7 +246,7 @@ def _compute_index_tensor_params(self, x, m, index_shapes):
         return x_data, S, W, K, C, trailing, lin_scales
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in self._TARGET_OPS:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         assert (
diff --git a/backends/arm/_passes/decompose_int_pow_pass.py b/backends/arm/_passes/decompose_int_pow_pass.py
index a31a9415e23..5147d23b68c 100644
--- a/backends/arm/_passes/decompose_int_pow_pass.py
+++ b/backends/arm/_passes/decompose_int_pow_pass.py
@@ -6,12 +6,12 @@
 
 from typing import Optional, Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
 
-class DecomposeIntPowPass(ArmPass):
+class DecomposeIntPowPass(ArmOpTargetedPass):
     """Replaces pow with integer exponent with a series of multiplications.
 
     Only handles pow.Tensor_Scalar and not pow.Tensor_Tensor. Needs to be run
@@ -20,6 +20,7 @@ class DecomposeIntPowPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = (exir_ops.edge.aten.pow.Tensor_Scalar,)
 
     @staticmethod
     def _get_decomposable_integer_exponent(exp) -> Optional[int]:
@@ -34,7 +35,7 @@ def _get_decomposable_integer_exponent(exp) -> Optional[int]:
         return None
 
     def call_operator(self, op, args, kwargs, meta):
-        if op != exir_ops.edge.aten.pow.Tensor_Scalar:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_leaky_relu_pass.py b/backends/arm/_passes/decompose_leaky_relu_pass.py
index eb8b5bda61a..e2f9852d7f9 100644
--- a/backends/arm/_passes/decompose_leaky_relu_pass.py
+++ b/backends/arm/_passes/decompose_leaky_relu_pass.py
@@ -8,7 +8,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
@@ -33,7 +33,7 @@ def _get_leaky_relu_ops(op) -> tuple:
         raise RuntimeError(f"Can't get decomposition ops for op {op}")
 
 
-class DecomposeLeakyReLUPass(ArmPass):
+class DecomposeLeakyReLUPass(ArmOpTargetedPass):
     """This pass decomposes Leaky ReLU into primitive operations.
     LeakyReLU(x,slope) = max(0,x) + slope * min(0,x)
 
@@ -47,9 +47,11 @@ class DecomposeLeakyReLUPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = edge_ops + torch_ops
+    check_allowed_to_transform = True
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in (edge_ops + torch_ops) or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         x = args[0]
diff --git a/backends/arm/_passes/decompose_linalg_vector_norm_pass.py b/backends/arm/_passes/decompose_linalg_vector_norm_pass.py
index 8b165658c37..1604d861030 100644
--- a/backends/arm/_passes/decompose_linalg_vector_norm_pass.py
+++ b/backends/arm/_passes/decompose_linalg_vector_norm_pass.py
@@ -6,13 +6,13 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.decompose_sqrt_pass import DecomposeSqrtPass
 from executorch.backends.arm._passes.decompose_sum_pass import DecomposeSumPass
 from executorch.exir.pass_base import ExportPass
 
 
-class DecomposeLinalgVectorNormPass(ArmPass):
+class DecomposeLinalgVectorNormPass(ArmOpTargetedPass):
     """This pass decomposes aten.linalg_vector_norm.default into more primitive
     ops. We need to add this pass before quantization for graph annotation. By
     default, aten.linalg_vector_norm op is decomposed during legalization to
@@ -40,11 +40,11 @@ class DecomposeLinalgVectorNormPass(ArmPass):
     }
 
     torch_linalg_vector_norm = (torch.ops.aten.linalg_vector_norm.default,)
+    target_ops = torch_linalg_vector_norm
+    check_allowed_to_transform = True
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in self.torch_linalg_vector_norm or not self.allowed_to_transform(
-            meta
-        ):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         # Extract inputs and optional arguments.
diff --git a/backends/arm/_passes/decompose_log1p_pass.py b/backends/arm/_passes/decompose_log1p_pass.py
index b5cb8659140..7cc5f8cec9c 100644
--- a/backends/arm/_passes/decompose_log1p_pass.py
+++ b/backends/arm/_passes/decompose_log1p_pass.py
@@ -6,7 +6,7 @@
 import logging
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
 from executorch.backends.arm._passes.match_arg_dtype_pass import MatchArgDtypePass
 from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
@@ -17,7 +17,7 @@
 from executorch.exir.pass_base import ExportPass
 
 
-class DecomposeLog1pPass(ArmPass):
+class DecomposeLog1pPass(ArmOpTargetedPass):
     """Decompose log1p into a small polynomial with a log fallback for larger
     inputs.
     """
@@ -32,6 +32,7 @@ class DecomposeLog1pPass(ArmPass):
     _supported_ops = {
         exir_ops.edge.aten.log1p.default,
     }
+    target_ops = _supported_ops
 
     def _poly(self, x, meta):
         # 6-term Taylor: x - x^2/2 + x^3/3 - x^4/4 + x^5/5 - x^6/6
@@ -63,7 +64,7 @@ def _poly(self, x, meta):
         return acc
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in self._supported_ops:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated=False)
 
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_logit_pass.py b/backends/arm/_passes/decompose_logit_pass.py
index fa82ff4f579..9f9f4744fd0 100644
--- a/backends/arm/_passes/decompose_logit_pass.py
+++ b/backends/arm/_passes/decompose_logit_pass.py
@@ -7,7 +7,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
 from executorch.backends.arm._passes.match_arg_dtype_pass import MatchArgDtypePass
 from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
@@ -50,7 +50,7 @@ def get_ops(op):
         raise ValueError(f"Unsupported operator: {op}")
 
 
-class DecomposeLogitPass(ArmPass):
+class DecomposeLogitPass(ArmOpTargetedPass):
     """Decomposes the `logit` operator into a sequence of primitive operations.
 
     If `eps` is provided, the input tensor `x` is first clamped to the range
@@ -78,15 +78,13 @@ class DecomposeLogitPass(ArmPass):
         ReplaceScalarWithTensorByProfilePass,
     }
 
-    _TARGET_OPS = {
+    target_ops = {
         edge_logit,
         aten_logit,
     }
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in DecomposeLogitPass._TARGET_OPS or not self.allowed_to_transform(
-            meta
-        ):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         X = args[0]
diff --git a/backends/arm/_passes/decompose_masked_fill_pass.py b/backends/arm/_passes/decompose_masked_fill_pass.py
index 748aee3fc49..dfb85da7742 100644
--- a/backends/arm/_passes/decompose_masked_fill_pass.py
+++ b/backends/arm/_passes/decompose_masked_fill_pass.py
@@ -8,7 +8,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.convert_full_like_to_full_pass import (
     ConvertFullLikeToFullPass,
 )
@@ -34,7 +34,7 @@ def _get_decomposition(op) -> tuple:
     raise RuntimeError(f"Unable to get decomposition for op {op}")
 
 
-class DecomposeMaskedFillPass(ArmPass):
+class DecomposeMaskedFillPass(ArmOpTargetedPass):
     """Masked fill takes in a boolean mask, a tensor and a scalar value.
 
     Fills the tensor with the scalar value according to the boolean mask.
@@ -43,9 +43,10 @@ class DecomposeMaskedFillPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = {ConvertFullLikeToFullPass}
+    target_ops = aten_ops + edge_ops
 
     def call_operator(self, op, args, kwargs, meta, updated=False):
-        if op not in (*aten_ops, *edge_ops):
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated)
 
         x, mask, scalar = args
diff --git a/backends/arm/_passes/decompose_maxpool2d_with_dilation_pass.py b/backends/arm/_passes/decompose_maxpool2d_with_dilation_pass.py
index 72fe53d57b9..7729b755113 100644
--- a/backends/arm/_passes/decompose_maxpool2d_with_dilation_pass.py
+++ b/backends/arm/_passes/decompose_maxpool2d_with_dilation_pass.py
@@ -9,7 +9,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.size_adjust_input_pass import SizeAdjustInputPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
@@ -47,7 +47,7 @@ def _pack_dimension(
     return packed_dim_size, padding + extra_padding, output_size
 
 
-class DecomposeMaxPool2dPass(ArmPass):
+class DecomposeMaxPool2dPass(ArmOpTargetedPass):
     """Decompose dilated max_pool2d (EXIR edge ops) into space-to-batch ->
     maxpool -> batch-to-space.
     """
@@ -55,10 +55,11 @@ class DecomposeMaxPool2dPass(ArmPass):
     _passes_required_after: Set[Type[ExportPass]] = {
         SizeAdjustInputPass,
     }
+    target_ops = EDGE_MAXPOOL2D
 
     def call_operator(self, op, args, kwargs, meta):
         # Only intercept EXIR edge max_pool2d ops
-        if op not in EDGE_MAXPOOL2D:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         # detect whether indices variant
diff --git a/backends/arm/_passes/decompose_meandim_pass.py b/backends/arm/_passes/decompose_meandim_pass.py
index c7d3bc0a04d..e1175d5ba1b 100644
--- a/backends/arm/_passes/decompose_meandim_pass.py
+++ b/backends/arm/_passes/decompose_meandim_pass.py
@@ -8,7 +8,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.arm_pass_utils import get_node_arg
 from executorch.backends.arm._passes.decompose_sum_pass import DecomposeSumPass
 from executorch.backends.arm._passes.fuse_constant_ops_pass import (
@@ -69,7 +69,7 @@ def get_quantization(op):
     return None
 
 
-class DecomposeMeanDimPass(ArmPass):
+class DecomposeMeanDimPass(ArmOpTargetedPass):
     """Decomposes a meandim into sum + mul (1/N).
 
     Each reduction dimension is handled via REDUCE_SUM followed by
@@ -94,6 +94,13 @@ class DecomposeMeanDimPass(ArmPass):
         DecomposeSumPass,
         SizeAdjustInputPass,
     }
+    target_ops = (
+        exir_ops.edge.aten.mean.dim,
+        torch.ops.aten.mean.dim,
+        exir_ops.edge.aten.mean.default,
+        torch.ops.aten.mean.default,
+    )
+    check_allowed_to_transform = True
 
     def __init__(self, graph_module, tosa_spec, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -101,12 +108,7 @@ def __init__(self, graph_module, tosa_spec, *args, **kwargs):
         self._tosa_spec = tosa_spec
 
     def call_operator(self, op, args, kwargs, meta, updated=False):
-        if op not in (
-            exir_ops.edge.aten.mean.dim,
-            torch.ops.aten.mean.dim,
-            exir_ops.edge.aten.mean.default,
-            torch.ops.aten.mean.default,
-        ) or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta, updated)
 
         x = get_node_arg(args, 0)
diff --git a/backends/arm/_passes/decompose_ne_pass.py b/backends/arm/_passes/decompose_ne_pass.py
index 95dfc0e1179..4dfcf6ad934 100644
--- a/backends/arm/_passes/decompose_ne_pass.py
+++ b/backends/arm/_passes/decompose_ne_pass.py
@@ -6,7 +6,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
@@ -38,7 +38,7 @@ def get_ne_decomposition(op) -> tuple:
     raise RuntimeError(f"Can't get ne decomposition for op {op}")
 
 
-class DecomposeNotEqualPass(ArmPass):
+class DecomposeNotEqualPass(ArmOpTargetedPass):
     """A transformation pass that decomposes unsupported `aten.ne` operations
     into a combination of supported TOSA-equivalent operations.
 
@@ -57,9 +57,10 @@ class DecomposeNotEqualPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = edge_ne_ops + aten_ne_ops
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in (edge_ne_ops + aten_ne_ops) or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         lhs, rhs = args
diff --git a/backends/arm/_passes/decompose_permute_for_u55_pass.py b/backends/arm/_passes/decompose_permute_for_u55_pass.py
index ceed25f97ec..a9e8beef1cd 100644
--- a/backends/arm/_passes/decompose_permute_for_u55_pass.py
+++ b/backends/arm/_passes/decompose_permute_for_u55_pass.py
@@ -11,7 +11,7 @@
 
 import torch
 import tosa_serializer as ts
-from executorch.backends.arm._passes.arm_pass import ArmPass
+from executorch.backends.arm._passes.arm_pass import ArmOpTargetedPass
 from executorch.backends.arm._passes.rewrite_slice import RewriteSlicePass
 from executorch.backends.arm.arm_vela import vela_compile
 from executorch.backends.arm.tosa.mapping import map_dtype
@@ -20,7 +20,7 @@
 from executorch.exir.pass_base import ExportPass
 
 
-class DecomposePermuteForU55Pass(ArmPass):
+class DecomposePermuteForU55Pass(ArmOpTargetedPass):
     """Decompose U55 permutes into shape-safe permutes for large tensor shapes.
 
     Ethos-U55 has transpose shape constraints based on rank-dependent
@@ -36,6 +36,7 @@ class DecomposePermuteForU55Pass(ArmPass):
         exir_ops.edge.aten.permute.default,
         exir_ops.edge.aten.permute_copy.default,
     )
+    target_ops = _PERMUTE_OPS
     _SLICE_OP = exir_ops.edge.aten.slice_copy.Tensor
     _CAT_OP = exir_ops.edge.aten.cat.default
     _MAX_PRODUCT = 2**16
@@ -323,7 +324,7 @@ def recurse(current, depth: int):
         return recurse(input_node, 0)
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in self._PERMUTE_OPS:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         spec = get_context_spec()
diff --git a/backends/arm/_passes/decompose_remainder_pass.py b/backends/arm/_passes/decompose_remainder_pass.py
index 38185b85149..af22cad1624 100644
--- a/backends/arm/_passes/decompose_remainder_pass.py
+++ b/backends/arm/_passes/decompose_remainder_pass.py
@@ -6,7 +6,7 @@
 from typing import Dict, Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.decompose_div_tensor_mode import (
     DecomposeDivTensorModePass,
 )
@@ -41,7 +41,7 @@
 }
 
 
-class DecomposeRemainderPass(ArmPass):
+class DecomposeRemainderPass(ArmOpTargetedPass):
     """
     Decompose the remainder operation into primitive arithmetic:
         remainder(x, y) -> x - floor_div(x, y) * y
@@ -49,15 +49,10 @@ class DecomposeRemainderPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = {DecomposeDivTensorModePass}
+    target_ops = tuple(_decomposition_ops)
 
     def call_operator(self, op, args, kwargs, meta, updated=False):
-        supported_ops = (
-            exir_ops.edge.aten.remainder.Scalar,
-            exir_ops.edge.aten.remainder.Tensor,
-            torch.ops.aten.remainder.Scalar,
-            torch.ops.aten.remainder.Tensor,
-        )
-        if op not in supported_ops:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated)
         # Keep scalar remainder opaque during transform-for-annotation so the
         # quantizer can wrap the original op directly. In the backend pipeline,
diff --git a/backends/arm/_passes/decompose_round_pass.py b/backends/arm/_passes/decompose_round_pass.py
index 9319394d986..476f75d6b56 100644
--- a/backends/arm/_passes/decompose_round_pass.py
+++ b/backends/arm/_passes/decompose_round_pass.py
@@ -6,7 +6,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.dialects.edge._ops import EdgeOpOverload
 from executorch.exir.pass_base import ExportPass
@@ -46,7 +46,7 @@ def _get_round_decomposition_ops(op) -> tuple[Op, Op, Op, Op, Op, Op, Op]:
     raise RuntimeError(f"Can't get round decomposition ops for op {op}")
 
 
-class DecomposeRoundPass(ArmPass):
+class DecomposeRoundPass(ArmOpTargetedPass):
     """
     For inputs >= 0, round(x) is equivalent to floor(x + 0.5), and for inputs < 0,
     round(x) is equivalent to ceil(x - 0.5). This pass decomposes the round operation into
@@ -63,15 +63,13 @@ class DecomposeRoundPass(ArmPass):
 
     _passes_required_after: Set[Type[ExportPass]] = set()
 
-    _TARGET_OPS = {
+    target_ops = {
         exir_ops.edge.aten.round.default,
         torch.ops.aten.round.default,
     }
 
     def call_operator(self, op, args, kwargs, meta, updated=False):
-        if op not in DecomposeRoundPass._TARGET_OPS or not self.allowed_to_transform(
-            meta
-        ):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta, updated)
         x = args[0]
         input_dtype = x.node.meta["val"].dtype
diff --git a/backends/arm/_passes/decompose_select_scatter_pass.py b/backends/arm/_passes/decompose_select_scatter_pass.py
index 4b4db8d208c..129e9f05961 100644
--- a/backends/arm/_passes/decompose_select_scatter_pass.py
+++ b/backends/arm/_passes/decompose_select_scatter_pass.py
@@ -7,7 +7,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.convert_int64_const_ops_to_int32 import (
     ConvertInt64ConstOpsToInt32Pass,
 )
@@ -44,7 +44,7 @@ def get_select_scatter_decomposition(op) -> tuple:
     raise RuntimeError(f"Can't get select_scatter decomposition for op {op}")
 
 
-class DecomposeSelectScatterPass(ArmPass):
+class DecomposeSelectScatterPass(ArmOpTargetedPass):
     """select_scatter is decomposed into other ops during export, however this
     is only suppported for the fp profile and for the int profile we need to
     decompose it here.
@@ -65,9 +65,10 @@ class DecomposeSelectScatterPass(ArmPass):
         ReplaceScalarWithTensorByProfilePass,
         ConvertInt64ConstOpsToInt32Pass,
     }
+    target_ops = edge_scatter_ops + aten_scatter_ops
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in (edge_scatter_ops + aten_scatter_ops):
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated=False)
 
         (
diff --git a/backends/arm/_passes/decompose_sign_pass.py b/backends/arm/_passes/decompose_sign_pass.py
index 111d1ca5ee3..8f7fda8729b 100644
--- a/backends/arm/_passes/decompose_sign_pass.py
+++ b/backends/arm/_passes/decompose_sign_pass.py
@@ -7,7 +7,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
@@ -44,15 +44,16 @@ def get_ops(op):
         raise ValueError(f"Unsupported operator: {op}")
 
 
-class DecomposeSignPass(ArmPass):
+class DecomposeSignPass(ArmOpTargetedPass):
     """Decomposes the sign operator into a sequence of operations that are
     supported by the Arm backend.
     """
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = (edge_sign, aten_sign)
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in (edge_sign, aten_sign) or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         gt_op, lt_op, where_op, neg_op, mul_op, add_op = get_ops(op)
diff --git a/backends/arm/_passes/decompose_sinh_pass.py b/backends/arm/_passes/decompose_sinh_pass.py
index 71ac0a34f08..053b378af83 100644
--- a/backends/arm/_passes/decompose_sinh_pass.py
+++ b/backends/arm/_passes/decompose_sinh_pass.py
@@ -6,7 +6,7 @@
 
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
 from executorch.backends.arm._passes.match_arg_dtype_pass import MatchArgDtypePass
 from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
@@ -21,7 +21,7 @@
 edge_sinh = exir_ops.edge.aten.sinh.default
 
 
-class DecomposeSinhPass(ArmPass):
+class DecomposeSinhPass(ArmOpTargetedPass):
     """A decomposition pass that decomposes Sinh operations into a combination
     of supported TOSA-equivalent operations (MI).
 
@@ -39,9 +39,10 @@ class DecomposeSinhPass(ArmPass):
         ReplaceScalarWithTensorByProfilePass,
         MatchArgDtypePass,
     }
+    target_ops = (edge_sinh,)
 
     def call_operator(self, op, args, kwargs, meta):
-        if op is not edge_sinh:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_slice_scatter_pass.py b/backends/arm/_passes/decompose_slice_scatter_pass.py
index 24cdfeb96a5..edf030f9701 100644
--- a/backends/arm/_passes/decompose_slice_scatter_pass.py
+++ b/backends/arm/_passes/decompose_slice_scatter_pass.py
@@ -7,7 +7,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.accumulate_index_put_pass import (
     AccumulateIndexPutPass,
 )
@@ -53,7 +53,7 @@ def _fixup_end(end, dim_size: int) -> int:
     return max(0, min(e, dim_size))
 
 
-class DecomposeSliceScatterPass(ArmPass):
+class DecomposeSliceScatterPass(ArmOpTargetedPass):
     """
     Decompose slice_scatter into:
       - Fast path (step == 1): slice_copy + cat (contiguous update), or
@@ -71,9 +71,10 @@ class DecomposeSliceScatterPass(ArmPass):
         AccumulateIndexPutPass,
         RewriteIndexPutPass,
     }
+    target_ops = edge_slice_scatter_ops + aten_slice_scatter_ops
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in (edge_slice_scatter_ops + aten_slice_scatter_ops):
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         (
diff --git a/backends/arm/_passes/decompose_softmax_pass.py b/backends/arm/_passes/decompose_softmax_pass.py
index cb05b7c4b0c..d30137c0460 100644
--- a/backends/arm/_passes/decompose_softmax_pass.py
+++ b/backends/arm/_passes/decompose_softmax_pass.py
@@ -7,7 +7,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes.arm_pass import ArmPass
+from executorch.backends.arm._passes.arm_pass import ArmOpTargetedPass
 from executorch.backends.arm._passes.decompose_sum_pass import DecomposeSumPass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -56,7 +56,7 @@ def _get_logsoftmax_ops(op) -> tuple:
     raise RuntimeError(f"Can't get logsoftmax decomposition ops for op {op}")
 
 
-class DecomposeSoftmaxPass(ArmPass):
+class DecomposeSoftmaxPass(ArmOpTargetedPass):
     """This pass decomposes log_softmax or softmax into more primitive ops.
 
     Example:
@@ -77,6 +77,7 @@ class DecomposeSoftmaxPass(ArmPass):
         DecomposeSumPass,
         InsertTableOpsPass,
     }
+    target_ops = torch_softmax + edge_softmax
 
     def __init__(self, skip_safe_softmax: bool = False, **kwargs):
         super().__init__(**kwargs)
@@ -84,9 +85,7 @@ def __init__(self, skip_safe_softmax: bool = False, **kwargs):
         self._warned_safe_softmax = False
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in torch_softmax + edge_softmax or not self.allowed_to_transform(
-            meta
-        ):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         if self._skip_safe_softmax and op == torch.ops.aten._safe_softmax.default:
diff --git a/backends/arm/_passes/decompose_sqrt_pass.py b/backends/arm/_passes/decompose_sqrt_pass.py
index 86e5d6681bd..ce5a5b6d2a4 100644
--- a/backends/arm/_passes/decompose_sqrt_pass.py
+++ b/backends/arm/_passes/decompose_sqrt_pass.py
@@ -6,7 +6,7 @@
 from typing import Set, Tuple, Type, Union
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
@@ -27,15 +27,14 @@ def get_sqrt_decomposition(op) -> Union[Tuple, torch._ops.OpOverload]:
     raise RuntimeError(f"Can't get sqrt decomposition for op {op}")
 
 
-class DecomposeSqrtPass(ArmPass):
+class DecomposeSqrtPass(ArmOpTargetedPass):
     _passes_required_after: Set[Type[ExportPass]] = {InsertTableOpsPass}
+    target_ops = edge_sqrt_ops + aten_sqrt_ops
 
     def call_operator(self, op, args, kwargs, meta):
         """Decomposes `sqrt(x)` into `pow(x, 0.5)` for backend support."""
 
-        if op not in (edge_sqrt_ops + aten_sqrt_ops) or not self.allowed_to_transform(
-            meta
-        ):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_strided_slice_copy_pass.py b/backends/arm/_passes/decompose_strided_slice_copy_pass.py
index 71cc618ed9c..91606dd0bd6 100644
--- a/backends/arm/_passes/decompose_strided_slice_copy_pass.py
+++ b/backends/arm/_passes/decompose_strided_slice_copy_pass.py
@@ -6,7 +6,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
@@ -42,7 +42,7 @@ def _fixup_end(end, dim_size):
     return max(0, min(e, dim_size))
 
 
-class DecomposeStridedSliceCopyPass(ArmPass):
+class DecomposeStridedSliceCopyPass(ArmOpTargetedPass):
     """Decompose edge.aten.slice_copy.Tensor with non-unit step into supported
     ops.
 
@@ -61,10 +61,10 @@ class DecomposeStridedSliceCopyPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = set()
-    _TARGET_OPS = {exir_ops.edge.aten.slice_copy.Tensor}
+    target_ops = {exir_ops.edge.aten.slice_copy.Tensor}
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in self._TARGET_OPS:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         # Only handle the non-unit-step case; leave unit-step to existing lowering.
diff --git a/backends/arm/_passes/decompose_sum_pass.py b/backends/arm/_passes/decompose_sum_pass.py
index 3076510533e..e134ea6abc7 100644
--- a/backends/arm/_passes/decompose_sum_pass.py
+++ b/backends/arm/_passes/decompose_sum_pass.py
@@ -6,7 +6,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
@@ -24,7 +24,7 @@ def _get_sum_decomp(op):
             raise RuntimeError("Unvalid op in DecomposeSumPass")
 
 
-class DecomposeSumPass(ArmPass):
+class DecomposeSumPass(ArmOpTargetedPass):
     """In Pytorch, the default behaviour of for example Tensor.sum is to squeeze
     the dimension that is summed (keep_dim = False). However, in TOSA,
     REDUCE_SUM always preserves the rank of the input (keep_dim = True). To get
@@ -44,12 +44,13 @@ class DecomposeSumPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = (
+        exir_ops.edge.aten.sum.dim_IntList,
+        torch.ops.aten.sum.dim_IntList,
+    )
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in [
-            exir_ops.edge.aten.sum.dim_IntList,
-            torch.ops.aten.sum.dim_IntList,
-        ]:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         match len(args):
diff --git a/backends/arm/_passes/decompose_tan_pass.py b/backends/arm/_passes/decompose_tan_pass.py
index 87b347dbbad..2d655a9937d 100644
--- a/backends/arm/_passes/decompose_tan_pass.py
+++ b/backends/arm/_passes/decompose_tan_pass.py
@@ -5,7 +5,7 @@
 
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass, DecomposeDivPass
+from executorch.backends.arm._passes import ArmOpTargetedPass, DecomposeDivPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
@@ -13,13 +13,14 @@
 edge_tan_op = exir_ops.edge.aten.tan.default
 
 
-class DecomposeTanPass(ArmPass):
+class DecomposeTanPass(ArmOpTargetedPass):
     """Decomposes tan to sin/cos."""
 
     _passes_required_after: Set[Type[ExportPass]] = {DecomposeDivPass}
+    target_ops = (edge_tan_op,)
 
     def call_operator(self, op, args, kwargs, meta, updated=False):
-        if op != edge_tan_op:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated)
         # Skip quantized tan - it is decomposed as one single table op
         if self._is_quantized_meta(meta):
diff --git a/backends/arm/_passes/decompose_tosa_unsupported_clamp_pass.py b/backends/arm/_passes/decompose_tosa_unsupported_clamp_pass.py
index 2410ce503a7..12dcd06388c 100644
--- a/backends/arm/_passes/decompose_tosa_unsupported_clamp_pass.py
+++ b/backends/arm/_passes/decompose_tosa_unsupported_clamp_pass.py
@@ -6,12 +6,12 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
 
-class DecomposeTOSAUnsupportedClampPass(ArmPass):
+class DecomposeTOSAUnsupportedClampPass(ArmOpTargetedPass):
     """Rewrite TOSA unsupported clamp into min/max chain since TOSA lacks int32
     clamp support and only supports scalar min/max values.
     """
@@ -23,6 +23,7 @@ class DecomposeTOSAUnsupportedClampPass(ArmPass):
         torch.ops.aten.clamp.default,
         torch.ops.aten.clamp.Tensor,
     }
+    target_ops = _supported_ops
 
     def _ensure_tensor(
         self,
@@ -54,7 +55,7 @@ def call_operator(self, op, args, kwargs, meta):
             torch.ops.aten.clamp.Tensor,
         }
 
-        if op not in self._supported_ops:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         # Only rewrite scalar clamp for int32
diff --git a/backends/arm/_passes/decompose_tril_pass.py b/backends/arm/_passes/decompose_tril_pass.py
index 3101b24e95b..9108208e73d 100644
--- a/backends/arm/_passes/decompose_tril_pass.py
+++ b/backends/arm/_passes/decompose_tril_pass.py
@@ -6,7 +6,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.arm_pass_utils import get_node_arg
 from executorch.backends.arm._passes.fuse_constant_ops_pass import (
     ComputeConstantOpsAOTPass,
@@ -44,7 +44,7 @@ def _get_ops(op):
     raise RuntimeError(f"Unable to get decomposition ops for {op}")
 
 
-class DecomposeTrilPass(ArmPass):
+class DecomposeTrilPass(ArmOpTargetedPass):
     """Tril decomposition.
 
     Decomposition:
@@ -54,11 +54,10 @@ class DecomposeTrilPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = {ComputeConstantOpsAOTPass}
+    target_ops = (torch.ops.aten.tril.default,)
 
     def call_operator(self, op, args, kwargs, meta):
-        handled_ops = [torch.ops.aten.tril.default]
-
-        if op not in handled_ops:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         x = args[0]
diff --git a/backends/arm/_passes/decompose_unfold_to_gather_pass.py b/backends/arm/_passes/decompose_unfold_to_gather_pass.py
index d0e3897080a..950290b3b83 100644
--- a/backends/arm/_passes/decompose_unfold_to_gather_pass.py
+++ b/backends/arm/_passes/decompose_unfold_to_gather_pass.py
@@ -9,7 +9,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.replace_scalar_with_tensor_pass import (
     ReplaceScalarWithTensorByProfilePass,
 )
@@ -29,7 +29,7 @@ def _get_unfold_copy_decomposition(op) -> tuple:
 
     """
 
-    if op in DecomposeUnfoldToGatherPass._TARGET_OPS:
+    if op in DecomposeUnfoldToGatherPass.target_ops:
         return (
             exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
             exir_ops.edge.aten.view_copy.default,
@@ -45,7 +45,7 @@ def _get_unfold_copy_decomposition(op) -> tuple:
     raise RuntimeError(f"Can't get unfold_copy decomposition for op {op}")
 
 
-class DecomposeUnfoldToGatherPass(ArmPass):
+class DecomposeUnfoldToGatherPass(ArmOpTargetedPass):
     """Decompose unfold_copy with backend tosa.GATHER as the core op, plus other
     TOSA-supported ops to build indices and materialize the output layout.
 
@@ -93,7 +93,7 @@ class DecomposeUnfoldToGatherPass(ArmPass):
         ReplaceScalarWithTensorByProfilePass,
     }
 
-    _TARGET_OPS = {
+    target_ops = {
         exir_ops.edge.aten.unfold_copy.default,
     }
 
@@ -147,7 +147,7 @@ def _compute_unfold_copy_params(
         return (x_val, C, S, K, U, UC, pre, post, P, Q, needs_bool_cast)
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in self._TARGET_OPS:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         x, dim, size, step = args
diff --git a/backends/arm/_passes/decompose_var_pass.py b/backends/arm/_passes/decompose_var_pass.py
index fcf61cf5129..90ea80b6b47 100644
--- a/backends/arm/_passes/decompose_var_pass.py
+++ b/backends/arm/_passes/decompose_var_pass.py
@@ -8,7 +8,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.arm_pass_utils import get_node_arg
 from executorch.backends.arm._passes.decompose_meandim_pass import DecomposeMeanDimPass
 from executorch.backends.arm._passes.decompose_sum_pass import DecomposeSumPass
@@ -37,7 +37,7 @@ def get_var_decomposition(op) -> tuple:
     raise RuntimeError(f"Can't get var decomposition for op {op}")
 
 
-class DecomposeVarPass(ArmPass):
+class DecomposeVarPass(ArmOpTargetedPass):
     """
     This pass decomposes var.correction and var.dim into smaller ops (see https://pytorch.org/docs/stable/generated/torch.var.html)
 
@@ -56,13 +56,15 @@ class DecomposeVarPass(ArmPass):
         DecomposeMeanDimPass,
         DecomposeSumPass,
     }
+    target_ops = (
+        exir_ops.edge.aten.var.correction,
+        torch.ops.aten.var.correction,
+        torch.ops.aten.var.dim,
+    )
+    check_allowed_to_transform = True
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in (
-            exir_ops.edge.aten.var.correction,
-            torch.ops.aten.var.correction,
-            torch.ops.aten.var.dim,
-        ) or not self.allowed_to_transform(meta):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta)
 
         x = args[0]
diff --git a/backends/arm/_passes/decompose_where_scalar_other_pass.py b/backends/arm/_passes/decompose_where_scalar_other_pass.py
index a125a6355cb..8b4b27c8ce2 100644
--- a/backends/arm/_passes/decompose_where_scalar_other_pass.py
+++ b/backends/arm/_passes/decompose_where_scalar_other_pass.py
@@ -5,7 +5,7 @@
 
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
@@ -27,20 +27,18 @@ def _get_where_scalar_other_decomposition(op):
     raise RuntimeError(f"Can't get where.ScalarOther decomposition for op {op}")
 
 
-class DecomposeWhereScalarOtherPass(ArmPass):
+class DecomposeWhereScalarOtherPass(ArmOpTargetedPass):
     """Decompose where.ScalarOther into where.self with a tensorized scalar."""
 
     _passes_required_after: Set[Type[ExportPass]] = set()
 
-    _TARGET_OPS = {
+    target_ops = {
         exir_ops.edge.aten.where.ScalarOther,
     }
+    check_allowed_to_transform = True
 
     def call_operator(self, op, args, kwargs, meta, updated=False):
-        if (
-            op not in DecomposeWhereScalarOtherPass._TARGET_OPS
-            or not self.allowed_to_transform(meta)
-        ):
+        if op not in self.target_ops or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta, updated)
 
         condition, self_tensor, other_scalar = args
diff --git a/backends/arm/_passes/decorate_fp32_to_int32_casting_pass.py b/backends/arm/_passes/decorate_fp32_to_int32_casting_pass.py
index b856df8e060..3ddd1358035 100644
--- a/backends/arm/_passes/decorate_fp32_to_int32_casting_pass.py
+++ b/backends/arm/_passes/decorate_fp32_to_int32_casting_pass.py
@@ -7,7 +7,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.arm_pass_utils import get_node_arg
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
@@ -26,7 +26,7 @@ def _get_decorated_ops(op):
         raise RuntimeError(f"Can't get decorated ops for op {op}")
 
 
-class DecorateFp32toInt32CastingPass(ArmPass):
+class DecorateFp32toInt32CastingPass(ArmOpTargetedPass):
     """To lower pytorch fp32 -> int32 casting to TOSA, we need to transform the
     value with Ceil, Floor, and Where.
 
@@ -47,9 +47,10 @@ class DecorateFp32toInt32CastingPass(ArmPass):
     targets = [
         exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
     ]
+    target_ops = targets
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in self.targets:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         input = get_node_arg(args, 0)
diff --git a/backends/arm/_passes/fuse_consecutive_concat_shapes.py b/backends/arm/_passes/fuse_consecutive_concat_shapes.py
index 8a02697d57c..fc2d46d3c12 100644
--- a/backends/arm/_passes/fuse_consecutive_concat_shapes.py
+++ b/backends/arm/_passes/fuse_consecutive_concat_shapes.py
@@ -6,12 +6,12 @@
 from typing import Any
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import NodeMetadata, ProxyValue
 
 
-class FuseConsecutiveConcatShapesPass(ArmPass):
+class FuseConsecutiveConcatShapesPass(ArmOpTargetedPass):
     """This pass fuses consecutive tosa.CONCAT_SHAPE operations into a single
     tosa.CONCAT_SHAPE operation with a flattened list of input shapes. E.g.
     tosa.CONCAT_SHAPE([shape1, tosa.CONCAT_SHAPE([shape2, shape3]), shape4])
@@ -24,6 +24,7 @@ class FuseConsecutiveConcatShapesPass(ArmPass):
     """
 
     _passes_required_after = set()
+    target_ops = (exir_ops.backend.tosa.CONCAT_SHAPE.default,)
 
     def _to_proxy_value(
         self, arg: ProxyValue | torch.fx.Node | Any
@@ -42,7 +43,7 @@ def call_operator(
         meta: NodeMetadata,
         updated: bool | None = False,
     ) -> ProxyValue:
-        if op != exir_ops.backend.tosa.CONCAT_SHAPE.default:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
         arg_list = args[0]
         new_arg_list: list[Any] = []
diff --git a/backends/arm/_passes/insert_const_shapes.py b/backends/arm/_passes/insert_const_shapes.py
index 059731857b4..c916438eb09 100644
--- a/backends/arm/_passes/insert_const_shapes.py
+++ b/backends/arm/_passes/insert_const_shapes.py
@@ -5,12 +5,12 @@
 
 from typing import Any, Optional
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm.tosa.dialect.shape import meta_has_shape_mark
 from executorch.exir.dialects._ops import ops as exir_ops
 
 
-class InsertConstShapesPass(ArmPass):
+class InsertConstShapesPass(ArmOpTargetedPass):
     """Materialize literal shape arguments as CONST_SHAPE nodes.
 
     This pass targets ops such as `aten.view_copy` and `aten.repeat` whose shape
@@ -21,7 +21,7 @@ class InsertConstShapesPass(ArmPass):
     """
 
     _passes_required_after = set()
-    targeted_ops = {
+    target_ops = {
         exir_ops.edge.aten.view_copy.default,
         exir_ops.edge.aten.repeat.default,
     }
@@ -41,7 +41,7 @@ def _is_shape_arg(arg: Any) -> bool:
         )
 
     def call_operator(self, op, args, kwargs, meta, updated: Optional[bool] = False):
-        if op not in self.targeted_ops:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated)
         if any(InsertConstShapesPass._is_shape_arg(arg) for arg in args):
             new_args = []
diff --git a/backends/arm/_passes/insert_data_layout_casts_pass.py b/backends/arm/_passes/insert_data_layout_casts_pass.py
index b760baef6e8..07a2d186895 100644
--- a/backends/arm/_passes/insert_data_layout_casts_pass.py
+++ b/backends/arm/_passes/insert_data_layout_casts_pass.py
@@ -6,13 +6,13 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm.tosa.specification import get_context_spec
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, NodeMetadata
 
 
-class InsertDataLayoutCastsPass(ArmPass):
+class InsertDataLayoutCastsPass(ArmOpTargetedPass):
     """Insert casts around data layout operators when their dtype is not
     supported by the active TOSA specification.
 
@@ -45,7 +45,7 @@ class InsertDataLayoutCastsPass(ArmPass):
         exir_ops.edge.aten.slice_copy.Tensor,
         exir_ops.edge.aten.flip.default,
     }
-    targeted_ops = _concat_ops | _single_input_ops
+    target_ops = _concat_ops | _single_input_ops
 
     _fp_to_int_map = {
         torch.float16: torch.int16,
@@ -60,7 +60,7 @@ class InsertDataLayoutCastsPass(ArmPass):
     }
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in self.targeted_ops:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         if op in self._concat_ops:
diff --git a/backends/arm/_passes/insert_dynamic_padding.py b/backends/arm/_passes/insert_dynamic_padding.py
index ea03e231ae8..61a5ebd09ca 100644
--- a/backends/arm/_passes/insert_dynamic_padding.py
+++ b/backends/arm/_passes/insert_dynamic_padding.py
@@ -7,14 +7,14 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm.tosa.dialect.shape import is_shape_op_node
 
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, ProxyValue
 
 
-class InsertDynamicPaddingPass(ArmPass):
+class InsertDynamicPaddingPass(ArmOpTargetedPass):
     """This pass rewrites conv operations with padding to use an explicit pad
     operator before the conv2d operation and setting the padding to zero in the
     conv2d operator. E.g. conv2d(x, weight, bias, stride, padding, dilation)
@@ -27,6 +27,10 @@ class InsertDynamicPaddingPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = (
+        exir_ops.backend.tosa.CONV2D.default,
+        exir_ops.backend.tosa.DEPTHWISE_CONV2D.default,
+    )
 
     def _is_dynamic_padding(
         self, padding: ProxyValue | list[int] | tuple[int, ...]
@@ -39,10 +43,7 @@ def _is_dynamic_padding(
         )
 
     def call_operator(self, op, args, kwargs, meta, updated=False) -> ProxyValue:
-        if op not in (
-            exir_ops.backend.tosa.CONV2D.default,
-            exir_ops.backend.tosa.DEPTHWISE_CONV2D.default,
-        ):
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated)
         padding = args[4]
         if not self._is_dynamic_padding(padding):
diff --git a/backends/arm/_passes/normalize_index_put_bool_index_tensor_pass.py b/backends/arm/_passes/normalize_index_put_bool_index_tensor_pass.py
index 9377eaec2fe..badc58b06fb 100644
--- a/backends/arm/_passes/normalize_index_put_bool_index_tensor_pass.py
+++ b/backends/arm/_passes/normalize_index_put_bool_index_tensor_pass.py
@@ -6,13 +6,13 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.rewrite_index_put_pass import RewriteIndexPutPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
 
-class NormalizeIndexPutBoolIndexTensorPass(ArmPass):
+class NormalizeIndexPutBoolIndexTensorPass(ArmOpTargetedPass):
     """Normalize  single boolean mask index_put scalar to where.
     In the general case, boolean masks are complex and data dependent. The simple case
     x[mask] = scalar
@@ -30,6 +30,7 @@ class NormalizeIndexPutBoolIndexTensorPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = {RewriteIndexPutPass}
+    target_ops = (exir_ops.edge.aten.index_put.default,)
 
     def __init__(self):
         super().__init__()
@@ -57,7 +58,7 @@ def _is_valid_bool_mask(
         return True
 
     def call_operator(self, op, args, kwargs, meta, updated: bool | None = False):
-        if op not in (exir_ops.edge.aten.index_put.default,):
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated)
 
         destination, indices_tensor_list, data = args[:3]
diff --git a/backends/arm/_passes/normalize_index_put_none_indices_pass.py b/backends/arm/_passes/normalize_index_put_none_indices_pass.py
index 7aaace641b0..3afc9732b02 100644
--- a/backends/arm/_passes/normalize_index_put_none_indices_pass.py
+++ b/backends/arm/_passes/normalize_index_put_none_indices_pass.py
@@ -4,13 +4,13 @@
 # LICENSE file in the root directory of this source tree.
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.rewrite_index_put_pass import RewriteIndexPutPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
 
-class NormalizeIndexPutNoneIndicesPass(ArmPass):
+class NormalizeIndexPutNoneIndicesPass(ArmOpTargetedPass):
     """Normalize index_put with None:s in the indices_tensor list by moving
     None-indexed dims to the channel dimensions (*C_j in RewriteIndexPutPass
     teminology) by permutating the destination and data tensors. A None-index
@@ -41,6 +41,7 @@ class NormalizeIndexPutNoneIndicesPass(ArmPass):
     """
 
     _passes_required_after: Set[Type[ExportPass]] = {RewriteIndexPutPass}
+    target_ops = (exir_ops.edge.aten.index_put.default,)
 
     def __init__(self):
         super().__init__()
@@ -67,7 +68,7 @@ def _get_data_dim_order(
             return destination_dim_order
 
     def call_operator(self, op, args, kwargs, meta, updated: bool | None = False):
-        if op not in (exir_ops.edge.aten.index_put.default,):
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         destination, indices_tensor_list, data = args[:3]
diff --git a/backends/arm/_passes/promote_bool_operands_pass.py b/backends/arm/_passes/promote_bool_operands_pass.py
index 4d02646e30a..8e162ded1bd 100644
--- a/backends/arm/_passes/promote_bool_operands_pass.py
+++ b/backends/arm/_passes/promote_bool_operands_pass.py
@@ -11,19 +11,19 @@
 
 import torch
 
-from executorch.backends.arm._passes.arm_pass import ArmPass
+from executorch.backends.arm._passes.arm_pass import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
 
-class PromoteBoolOperandsPass(ArmPass):
+class PromoteBoolOperandsPass(ArmOpTargetedPass):
     """Promote boolean operands to the appropriate integer dtype for unsupported
     ops.
     """
 
     _passes_required_after: Set[Type[ExportPass]] = set()
 
-    targeted_ops = {
+    target_ops = {
         exir_ops.edge.aten.bitwise_and.Tensor,
         exir_ops.edge.aten.bitwise_or.Tensor,
         exir_ops.edge.aten.bitwise_xor.Tensor,
@@ -31,7 +31,7 @@ class PromoteBoolOperandsPass(ArmPass):
     }
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in self.targeted_ops:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         original_dtypes = [arg.data.dtype for arg in args]
diff --git a/backends/arm/_passes/remove_noop_pass.py b/backends/arm/_passes/remove_noop_pass.py
index c7fe469c8b8..5fafc848003 100644
--- a/backends/arm/_passes/remove_noop_pass.py
+++ b/backends/arm/_passes/remove_noop_pass.py
@@ -8,7 +8,7 @@
 import logging
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
@@ -16,19 +16,20 @@
 logger = logging.getLogger(__name__)
 
 
-class RemoveNoopPass(ArmPass):
+class RemoveNoopPass(ArmOpTargetedPass):
     """Remove no-ops from graph_module."""
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = (
+        exir_ops.edge.dim_order_ops._clone_dim_order.default,
+        exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
+        exir_ops.edge.aten.alias_copy.default,
+        exir_ops.edge.aten.copy.default,
+        exir_ops.edge.aten.detach_copy.default,
+    )
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in (
-            exir_ops.edge.dim_order_ops._clone_dim_order.default,
-            exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
-            exir_ops.edge.aten.alias_copy.default,
-            exir_ops.edge.aten.copy.default,
-            exir_ops.edge.aten.detach_copy.default,
-        ):
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         input_dtype = args[0].data.dtype
diff --git a/backends/arm/_passes/rewrite_avg_pool2d_pass.py b/backends/arm/_passes/rewrite_avg_pool2d_pass.py
index bf81505d923..6427b571218 100644
--- a/backends/arm/_passes/rewrite_avg_pool2d_pass.py
+++ b/backends/arm/_passes/rewrite_avg_pool2d_pass.py
@@ -6,7 +6,7 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.arm_pass_utils import to_2tuple
 from executorch.backends.arm.constants import NHWC_INVERSE_ORDER, NHWC_ORDER
 from executorch.backends.arm.operators.operator_validation_utils import (
@@ -18,11 +18,11 @@
 from .fuse_constant_ops_pass import ComputeConstantOpsAOTPass
 
 
-class RewriteAvgPool2dPass(ArmPass):
+class RewriteAvgPool2dPass(ArmOpTargetedPass):
     """Rewrite aten.avg_pool2d calls to TOSA AVG_POOL2D op."""
 
     # Target the original avg_pool2d operator
-    targeted_ops = {exir_ops.edge.aten.avg_pool2d.default}
+    target_ops = {exir_ops.edge.aten.avg_pool2d.default}
     _passes_required_after: Set[Type[ExportPass]] = {
         ComputeConstantOpsAOTPass,
     }
@@ -30,7 +30,7 @@ class RewriteAvgPool2dPass(ArmPass):
     def call_operator(self, op, args, kwargs, meta, updated=False):
 
         # Only rewrite avg_pool2d
-        if op not in self.targeted_ops:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated)
 
         x = args[0]
diff --git a/backends/arm/_passes/rewrite_bool_bitwise_to_logical_pass.py b/backends/arm/_passes/rewrite_bool_bitwise_to_logical_pass.py
index 8c6bf6f39ec..962bdbbaf6e 100644
--- a/backends/arm/_passes/rewrite_bool_bitwise_to_logical_pass.py
+++ b/backends/arm/_passes/rewrite_bool_bitwise_to_logical_pass.py
@@ -7,12 +7,12 @@
 from typing import Set, Type
 
 import torch
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
 
-class RewriteBoolBitwiseToLogicalPass(ArmPass):
+class RewriteBoolBitwiseToLogicalPass(ArmOpTargetedPass):
     """Rewrites ``aten.bitwise_*`` on boolean tensors to ``aten.logical_*``.
 
     TOSA ``bitwise_*`` does not support boolean inputs. On boolean tensors,
@@ -32,9 +32,10 @@ class RewriteBoolBitwiseToLogicalPass(ArmPass):
         exir_ops.edge.aten.bitwise_xor.Tensor: exir_ops.edge.aten.logical_xor.default,
         exir_ops.edge.aten.bitwise_xor.Scalar: exir_ops.edge.aten.logical_xor.default,
     }
+    target_ops = tuple(_TARGET_TO_LOGICAL)
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in self._TARGET_TO_LOGICAL:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         if meta["val"].dtype == torch.bool:
diff --git a/backends/arm/_passes/rewrite_high_rank_singleton_permute_pass.py b/backends/arm/_passes/rewrite_high_rank_singleton_permute_pass.py
index 1c0bac0ba9c..40a7935f050 100644
--- a/backends/arm/_passes/rewrite_high_rank_singleton_permute_pass.py
+++ b/backends/arm/_passes/rewrite_high_rank_singleton_permute_pass.py
@@ -5,12 +5,12 @@
 
 from typing import Sequence, Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
 
-class RewriteHighRankSingletonPermutePass(ArmPass):
+class RewriteHighRankSingletonPermutePass(ArmOpTargetedPass):
     """Rewrite high-rank permute via a lower-rank permute when singleton dims
     allow it.
 
@@ -30,6 +30,7 @@ class RewriteHighRankSingletonPermutePass(ArmPass):
         exir_ops.edge.aten.permute.default,
         exir_ops.edge.aten.permute_copy.default,
     )
+    target_ops = _PERMUTE_OPS
 
     @staticmethod
     def _extract_permutation(permutation_arg: object) -> tuple[int, ...] | None:
@@ -46,7 +47,7 @@ def _normalize_permutation(
         return tuple(dim % rank for dim in permutation)
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in self._PERMUTE_OPS:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
         if len(args) < 2:
             return super().call_operator(op, args, kwargs, meta)
diff --git a/backends/arm/_passes/rewrite_index_put_pass.py b/backends/arm/_passes/rewrite_index_put_pass.py
index c0898673fd7..8f2ab4bb830 100644
--- a/backends/arm/_passes/rewrite_index_put_pass.py
+++ b/backends/arm/_passes/rewrite_index_put_pass.py
@@ -7,7 +7,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.convert_expand_copy_to_repeat import (
     ConvertExpandCopyToRepeatPass,
 )
@@ -31,7 +31,7 @@ def calculate_data_stride(destination_shape: list[int]) -> list[int]:
     return data_strides
 
 
-class RewriteIndexPutPass(ArmPass):
+class RewriteIndexPutPass(ArmOpTargetedPass):
     """
     This pass transforms index_put with arguments
         - destination, of shape (*K_i, *C_j)
@@ -69,6 +69,7 @@ def __init__(self):
         FuseViewCopyTransformPass,
         ConvertExpandCopyToRepeatPass,
     }
+    target_ops = (exir_ops.edge.aten.index_put.default,)
 
     def _calculate_flat_indices(
         self,
@@ -121,7 +122,7 @@ def _calculate_flat_indices(
         )
 
     def call_operator(self, op, args, kwargs, meta, updated: bool | None = False):
-        if op not in (exir_ops.edge.aten.index_put.default,):
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         destination, indices_tensor_list, data = args[:3]
diff --git a/backends/arm/_passes/rewrite_inplace_arithmetic_pass.py b/backends/arm/_passes/rewrite_inplace_arithmetic_pass.py
index f5a484343c5..72683b353ce 100644
--- a/backends/arm/_passes/rewrite_inplace_arithmetic_pass.py
+++ b/backends/arm/_passes/rewrite_inplace_arithmetic_pass.py
@@ -7,7 +7,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
@@ -23,10 +23,12 @@
 }
 
 
-class RewriteInplaceArithmeticPass(ArmPass):
+class RewriteInplaceArithmeticPass(ArmOpTargetedPass):
     """Rewrite inplace arithmetic ops into functional equivalents."""
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = tuple(OP_MAP)
+    check_allowed_to_transform = True
 
     def call_operator(self, op, args, kwargs, meta):
         if not self.allowed_to_transform(meta):
diff --git a/backends/arm/_passes/rewrite_le_lt_to_ge_gt_pass.py b/backends/arm/_passes/rewrite_le_lt_to_ge_gt_pass.py
index 9119567b7aa..c73279e65d0 100644
--- a/backends/arm/_passes/rewrite_le_lt_to_ge_gt_pass.py
+++ b/backends/arm/_passes/rewrite_le_lt_to_ge_gt_pass.py
@@ -7,7 +7,7 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
@@ -19,10 +19,12 @@
 }
 
 
-class RewriteLeLtToGeGtPass(ArmPass):
+class RewriteLeLtToGeGtPass(ArmOpTargetedPass):
     """Rewrite le/lt into ge/gt with swapped inputs."""
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = tuple(OP_MAP)
+    check_allowed_to_transform = True
 
     def call_operator(self, op, args, kwargs, meta):
         if not self.allowed_to_transform(meta):
diff --git a/backends/arm/_passes/rewrite_max_pool2d_pass.py b/backends/arm/_passes/rewrite_max_pool2d_pass.py
index 8a59f2bd4ac..8debb322a6d 100644
--- a/backends/arm/_passes/rewrite_max_pool2d_pass.py
+++ b/backends/arm/_passes/rewrite_max_pool2d_pass.py
@@ -5,7 +5,7 @@
 
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.backends.arm._passes.arm_pass_utils import to_2tuple
 from executorch.backends.arm.constants import NHWC_INVERSE_ORDER, NHWC_ORDER
 from executorch.backends.arm.operators.operator_validation_utils import (
@@ -17,13 +17,14 @@
 edge_max_pool2d_ops = (exir_ops.edge.aten.max_pool2d.default,)
 
 
-class RewriteMaxPool2dPass(ArmPass):
+class RewriteMaxPool2dPass(ArmOpTargetedPass):
     """Rewrite max_pool2d ops to TOSA MAX_POOL2D."""
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = edge_max_pool2d_ops
 
     def call_operator(self, op, args, kwargs, meta):
-        if op not in edge_max_pool2d_ops:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         x = args[0]
diff --git a/backends/arm/_passes/rewrite_pad.py b/backends/arm/_passes/rewrite_pad.py
index 40523fb559a..250fccab38b 100644
--- a/backends/arm/_passes/rewrite_pad.py
+++ b/backends/arm/_passes/rewrite_pad.py
@@ -8,18 +8,18 @@
 
 import torch
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
 
-class RewritePadPass(ArmPass):
+class RewritePadPass(ArmOpTargetedPass):
     """Rewrite constant_pad_nd operator to TOSA Pad operator with constant
     mode.
     """
 
     _passes_required_after: Set[Type[ExportPass]] = set()
-    targeted_ops = {
+    target_ops = {
         exir_ops.edge.aten.constant_pad_nd.default,
         exir_ops.edge.aten.pad.default,
     }
@@ -145,7 +145,7 @@ def _rewrite_non_constant_pad(
         return output
 
     def call_operator(self, op, args, kwargs, meta, updated=False):
-        if op not in self.targeted_ops:
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         if op == exir_ops.edge.aten.constant_pad_nd.default:
diff --git a/backends/arm/_passes/rewrite_slice.py b/backends/arm/_passes/rewrite_slice.py
index c0f6e1b6573..2aab2e16539 100644
--- a/backends/arm/_passes/rewrite_slice.py
+++ b/backends/arm/_passes/rewrite_slice.py
@@ -4,7 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 from typing import Set, Type
 
-from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes import ArmOpTargetedPass
 
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, ProxyValue
@@ -12,10 +12,11 @@
 from torch import SymInt
 
 
-class RewriteSlicePass(ArmPass):
+class RewriteSlicePass(ArmOpTargetedPass):
     """Rewrite slice operations with step of 1 to TOSA slice operators."""
 
     _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = (exir_ops.edge.aten.slice_copy.Tensor,)
 
     def _fixup_start(self, start, input_shape, dim) -> int:
         """Convert negative and out-of-bounds start indices to valid positive
@@ -29,7 +30,7 @@ def _fixup_start(self, start, input_shape, dim) -> int:
         return idx
 
     def call_operator(self, op, args, kwargs, meta, updated=False) -> ProxyValue:
-        if op not in (exir_ops.edge.aten.slice_copy.Tensor,):
+        if op not in self.target_ops:
             return super().call_operator(op, args, kwargs, meta, updated)
 
         if len(args) == 5 and args[4] != 1:
diff --git a/backends/arm/test/passes/test_arm_op_targeted_pass.py b/backends/arm/test/passes/test_arm_op_targeted_pass.py
new file mode 100644
index 00000000000..5c213d4c4b9
--- /dev/null
+++ b/backends/arm/test/passes/test_arm_op_targeted_pass.py
@@ -0,0 +1,150 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import operator
+from typing import Set, Type
+
+import torch
+from executorch.backends.arm._passes.arm_pass import ArmOpTargetedPass
+from executorch.backends.arm._passes.arm_pass_manager import ArmPassManager
+from executorch.backends.arm.constants import DISALLOW_TFA_META_KEY
+from executorch.backends.arm.tosa.compile_spec import TosaCompileSpec
+from executorch.backends.arm.tosa.specification import TosaSpecification
+from executorch.exir.pass_base import ExportPass
+from torch.fx import Graph, GraphModule
+from torch.fx.passes.infra.pass_base import PassResult
+
+
+TARGET_OP = torch.ops.aten.add.Tensor
+OTHER_OP = operator.add
+
+
+def create_graph_module(target=OTHER_OP, disallow_tfa: bool = False) -> GraphModule:
+    graph = Graph()
+    lhs = graph.placeholder("lhs")
+    rhs = graph.placeholder("rhs")
+    lhs.meta["val"] = torch.randn(2, 3)
+    rhs.meta["val"] = torch.randn(2, 3)
+    node = graph.call_function(target, (lhs, rhs))
+    node.meta["val"] = torch.randn(2, 3)
+    if disallow_tfa:
+        node.meta[DISALLOW_TFA_META_KEY] = True
+    graph.output(node)
+    return GraphModule(torch.nn.Module(), graph)
+
+
+def create_test_pass_manager() -> ArmPassManager:
+    compile_spec = TosaCompileSpec(
+        TosaSpecification.create_from_string("TOSA-1.00+INT")
+    )
+    return ArmPassManager(compile_spec)
+
+
+def run_single_pass(graph_module: GraphModule, test_pass: ExportPass) -> PassResult:
+    pass_manager = create_test_pass_manager()
+    pass_manager.add_pass(test_pass)
+    return pass_manager(graph_module)
+
+
+class DummyTargetedPass(ArmOpTargetedPass):
+    _passes_required_after: Set[Type[ExportPass]] = set()
+    target_ops = (TARGET_OP,)
+    check_allowed_to_transform = True
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.call_operator_count = 0
+
+    def call_operator(self, op, args, kwargs, meta):
+        self.call_operator_count += 1
+        return super().call_operator(op, args, kwargs, meta)
+
+
+class InsertTargetPass(ExportPass):
+    def call(self, graph_module: GraphModule) -> PassResult:
+        graph = graph_module.graph
+        placeholders = [node for node in graph.nodes if node.op == "placeholder"]
+        output = next(node for node in graph.nodes if node.op == "output")
+
+        with graph.inserting_before(output):
+            target_node = graph.call_function(
+                TARGET_OP,
+                (placeholders[0], placeholders[1]),
+            )
+            target_node.meta["val"] = torch.randn(2, 3)
+        output.args = (target_node,)
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(graph_module, True)
+
+
+class CondModule(torch.nn.Module):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        def true_branch(arg: torch.Tensor) -> torch.Tensor:
+            return arg + 1
+
+        def false_branch(arg: torch.Tensor) -> torch.Tensor:
+            return arg - 1
+
+        return torch.cond(x.sum() > 0, true_branch, false_branch, [x])
+
+
+def test_skips_when_target_is_absent() -> None:
+    graph_module = create_graph_module()
+    targeted_pass = DummyTargetedPass()
+
+    result = run_single_pass(graph_module, targeted_pass)
+
+    assert result is not None
+    assert result.graph_module is graph_module
+    assert not result.modified
+    assert targeted_pass.call_operator_count == 0
+
+
+def test_runs_when_target_is_present() -> None:
+    graph_module = create_graph_module(TARGET_OP)
+    targeted_pass = DummyTargetedPass()
+
+    result = run_single_pass(graph_module, targeted_pass)
+
+    assert result is not None
+    assert result.modified
+    assert targeted_pass.call_operator_count == 1
+
+
+def test_skips_tfa_disallowed_target() -> None:
+    graph_module = create_graph_module(TARGET_OP, disallow_tfa=True)
+    targeted_pass = DummyTargetedPass(tfa_pass=True)
+
+    result = run_single_pass(graph_module, targeted_pass)
+
+    assert result is not None
+    assert result.graph_module is graph_module
+    assert not result.modified
+    assert targeted_pass.call_operator_count == 0
+
+
+def test_runs_when_previous_pass_creates_target() -> None:
+    graph_module = create_graph_module()
+    pass_manager = create_test_pass_manager()
+    targeted_pass = DummyTargetedPass()
+    pass_manager.add_pass(InsertTargetPass())
+    pass_manager.add_pass(targeted_pass)
+    result = pass_manager(graph_module)
+
+    assert result.modified
+    assert targeted_pass.call_operator_count == 1
+
+
+def test_runs_when_target_is_present_in_nested_submodule() -> None:
+    exported_program = torch.export.export(CondModule(), (torch.randn(2, 3),))
+    graph_module = exported_program.graph_module
+    targeted_pass = DummyTargetedPass()
+
+    result = run_single_pass(graph_module, targeted_pass)
+
+    assert result is not None
+    assert result.modified
+    assert targeted_pass.call_operator_count > 0

From ad4d19057d0184ba7aa72d3355a2365dd8a8cc09 Mon Sep 17 00:00:00 2001
From: George Gekov <george.gekov@arm.com>
Date: Mon, 11 May 2026 17:17:20 +0100
Subject: [PATCH 077/103] Arm backend: Fix Smollm2 model test

- Export & lower the smollm2 via extensions/llm/export_llm
- Build the arm_executor_runner application
- Fix the propagation of select_ops_list in the CMakeLists.txt
- Test the application runs on FVP in fast mode

Signed-off-by: George Gekov <george.gekov@arm.com>
Change-Id: I8acd87c2f5c3e6b5b189bb987ceccfe4877e2254
---
 backends/arm/scripts/build_executorch.sh    |  3 ++
 backends/arm/test/test_arm_backend.sh       | 38 ++++++++++++++++++---
 examples/arm/executor_runner/CMakeLists.txt |  1 -
 examples/arm/run.sh                         |  2 +-
 4 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/backends/arm/scripts/build_executorch.sh b/backends/arm/scripts/build_executorch.sh
index 5ebc0eb46b4..362fc4d40bf 100755
--- a/backends/arm/scripts/build_executorch.sh
+++ b/backends/arm/scripts/build_executorch.sh
@@ -97,6 +97,9 @@ cmake_args=(
     -DEXECUTORCH_BUILD_ARM_ETDUMP=${build_with_etdump}
     -DEXECUTORCH_BAREMETAL_SKIP_INSTALL=OFF
 )
+if ((${#extra_cmake_args[@]})); then
+      cmake_args+=("${extra_cmake_args[@]}")
+fi
 
 if [[ ${#extra_cmake_args[@]} -gt 0 ]]; then
     cmake_args+=("${extra_cmake_args[@]}")
diff --git a/backends/arm/test/test_arm_backend.sh b/backends/arm/test/test_arm_backend.sh
index be48d7ad234..26f30974a9c 100755
--- a/backends/arm/test/test_arm_backend.sh
+++ b/backends/arm/test/test_arm_backend.sh
@@ -302,11 +302,41 @@ test_deit_e2e_ethos_u() {
 test_model_smollm2_135M() {
     echo "${TEST_SUITE_NAME}: Test SmolLM2-135M on Ethos-U85"
 
-    # Build common libs once
-    python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --build_libs
-
-    python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=smollm2 --extra_flags="-DEXECUTORCH_SELECT_OPS_LIST=dim_order_ops::_to_dim_order_copy.out" --specify_ethosu_scratch
+    backends/arm/scripts/build_executorch.sh
 
+    # Build pte for smollm2
+    python -m extension.llm.export.export_llm \
+        base.model_class=smollm2 \
+        base.params=examples/models/smollm2/135M_config.json \
+        debug.verbose=True model.enable_dynamic_shape=False quantization.pt2e_quantize="ethosu_8a8w" \
+        backend.ethosu.enabled=True backend.ethosu.target="ethos-u85-256" backend.ethosu.memory_mode=Dedicated_Sram_384KB
+
+    # Build the arm_executor_runner application, pre-loading the pte in the DDR for faster linking
+    local pte_addr="0x76000000"
+    backends/arm/scripts/build_executor_runner.sh \
+      --et_build_root="${et_root_dir}/arm_test" \
+      --pte="${pte_addr}" \
+      --build_type=Release \
+      --target=ethos-u85-256 \
+      --system_config=Ethos_U85_SYS_DRAM_Mid \
+      --memory_mode=Dedicated_Sram_384KB \
+      --ethosu_tools_dir="${scratch_dir}" \
+      --toolchain=arm-none-eabi-gcc \
+      --extra_build_flags="-DET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE=0x20000" \
+      --select_ops_list="dim_order_ops::_to_dim_order_copy.out" 
+
+
+    # Deploy the application on the FVP in fast mode
+    FVP_Corstone_SSE-320 -C mps4_board.subsystem.ethosu.num_macs=256 \
+        -C mps4_board.visualisation.disable-visualisation=1 \
+        -C vis_hdlcd.disable_visualisation=1 \
+        -C mps4_board.telnetterminal0.start_telnet=0 \
+        -C mps4_board.uart0.out_file='-' \
+        -C mps4_board.uart0.shutdown_on_eot=1 \
+        -a "${et_root_dir}"/arm_test/ethos-u85-256_${pte_addr}/cmake-out/arm_executor_runner \
+        -C mps4_board.subsystem.ethosu.extra_args="--fast" \
+        --data smollm2.pte@"${pte_addr}"
+    
     echo "${TEST_SUITE_NAME}: PASS"
 }
 
diff --git a/examples/arm/executor_runner/CMakeLists.txt b/examples/arm/executor_runner/CMakeLists.txt
index d84947a75ad..88050a2ae77 100644
--- a/examples/arm/executor_runner/CMakeLists.txt
+++ b/examples/arm/executor_runner/CMakeLists.txt
@@ -349,7 +349,6 @@ elseif(FOUND_OPS_IN_FILE)
     "gen_oplist:  EXECUTORCH_SELECT_OPS_MODEL=${ET_PTE_FILE_PATH} is used to auto generate ops from"
   )
 else()
-  set(EXECUTORCH_SELECT_OPS_LIST "")
   set(EXECUTORCH_SELECT_OPS_MODEL "")
   message(
     "gen_oplist: No non delagated ops was found in ${ET_PTE_FILE_PATH} no ops added to build"
diff --git a/examples/arm/run.sh b/examples/arm/run.sh
index cfbcae2dbad..3ef4b0b829b 100755
--- a/examples/arm/run.sh
+++ b/examples/arm/run.sh
@@ -659,7 +659,7 @@ configure_ethosu_scratch_if_requested() {
         return
     fi
     local scratch_size
-    scratch_size=$(get_ethosu_scratch_size "$pte_path" || true)
+    scratch_size=$(get_ethosu_scratch_size "$pte_path" | tail -n 1)
     if [[ -z "${scratch_size}" ]]; then
         echo "WARNING: Failed to derive Ethos-U scratch size from ${pte_path}" >&2
         return

From b0441b50be603a6312c6857d359e47b049fd67c7 Mon Sep 17 00:00:00 2001
From: George Gekov <george.gekov@arm.com>
Date: Fri, 29 May 2026 11:15:47 +0100
Subject: [PATCH 078/103] Change python to python3 in shell script

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 backends/arm/test/test_arm_backend.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backends/arm/test/test_arm_backend.sh b/backends/arm/test/test_arm_backend.sh
index 26f30974a9c..1cb9e135d00 100755
--- a/backends/arm/test/test_arm_backend.sh
+++ b/backends/arm/test/test_arm_backend.sh
@@ -305,7 +305,7 @@ test_model_smollm2_135M() {
     backends/arm/scripts/build_executorch.sh
 
     # Build pte for smollm2
-    python -m extension.llm.export.export_llm \
+    python3 -m extension.llm.export.export_llm \
         base.model_class=smollm2 \
         base.params=examples/models/smollm2/135M_config.json \
         debug.verbose=True model.enable_dynamic_shape=False quantization.pt2e_quantize="ethosu_8a8w" \

From cf6daa9b1cb354de33528cb3eff1ccbe443ad2df Mon Sep 17 00:00:00 2001
From: Jacob Stevens <stevens.jacob1492@gmail.com>
Date: Fri, 29 May 2026 09:46:24 -0400
Subject: [PATCH 079/103] Add short function support (#19846)

Summary:
Currently, __builtin_FUNCTION is used opportunistically if it exists.


However, for heavily templated code, this results in extremely long
string which adds .rodata which can be wasteful on embedded targets.


This commit adds an override which uses the shorter __FUNCTION__ even if
__bultin_FUNCTION exists and exposes as a BUCK constraint.

Integration into CMake intentially left out for now.

Differential Revision: D106668077
---
 runtime/executor/targets.bzl | 10 ++++++++--
 runtime/platform/compiler.h  | 17 +++++++++++++---
 runtime/platform/targets.bzl |  4 ++++
 tools/buck/constraints/BUCK  | 38 ++++++++++++++++++++++++++++++++++++
 4 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/runtime/executor/targets.bzl b/runtime/executor/targets.bzl
index 90f8d0221e9..81d0a58667f 100644
--- a/runtime/executor/targets.bzl
+++ b/runtime/executor/targets.bzl
@@ -16,8 +16,14 @@ def _program_preprocessor_flags():
     if enable_verification == "false":
         return ["-DET_ENABLE_PROGRAM_VERIFICATION=0"]
     elif enable_verification == "true":
-        # Enabled by default.
-        return []
+        # Enabled by default; allow opt-out via constraint
+        if not runtime.is_oss:
+            return select({
+                "DEFAULT": [],
+                "fbsource//xplat/executorch/tools/buck/constraints:executorch-program-verification-disabled": ["-DET_ENABLE_PROGRAM_VERIFICATION=0"],
+            })
+        else:
+            return []
     else:
         fail("executorch.enable_program_verification must be one of 'true' or 'false'; saw '" +
              enable_verification + "'")
diff --git a/runtime/platform/compiler.h b/runtime/platform/compiler.h
index edd340d1fb0..692d590f44c 100644
--- a/runtime/platform/compiler.h
+++ b/runtime/platform/compiler.h
@@ -138,8 +138,14 @@
 #define __has_builtin(x) (0)
 #endif
 
-#if __has_builtin(__builtin_strrchr)
+#if defined(__FILE_NAME__)
+/// __FILE_NAME__ provides just the filename at
+/// compile time, avoiding embedding full paths in the binary
+#define ET_SHORT_FILENAME __FILE_NAME__
+#elif __has_builtin(__builtin_strrchr)
 /// Name of the source file without a directory string.
+/// Note: This approach embeds the full path in .rodata even though only the
+/// basename is used at runtime. __FILE_NAME__ is preferred when available.
 #define ET_SHORT_FILENAME (__builtin_strrchr("/" __FILE__, '/') + 1)
 #else
 #define ET_SHORT_FILENAME __FILE__
@@ -152,12 +158,17 @@
 #define ET_LINE __LINE__
 #endif // __has_builtin(__builtin_LINE)
 
-#if __has_builtin(__builtin_FUNCTION)
+#if defined(ET_USE_BUILTIN_FUNCTION_NAME) && ET_USE_BUILTIN_FUNCTION_NAME == 0
+/// __FUNCTION__ provides a short undecorated name, saving .rodata space
+/// compared to __builtin_FUNCTION() which includes the full signature
+/// (namespace, parameters, return type).
+#define ET_FUNCTION __FUNCTION__
+#elif __has_builtin(__builtin_FUNCTION)
 /// Name of the current function as a const char[].
 #define ET_FUNCTION __builtin_FUNCTION()
 #else
 #define ET_FUNCTION __FUNCTION__
-#endif // __has_builtin(__builtin_FUNCTION)
+#endif
 
 // As of G3 RJ-2024.3 toolchain, zu format specifier is not supported for Xtensa
 #if defined(__XTENSA__)
diff --git a/runtime/platform/targets.bzl b/runtime/platform/targets.bzl
index 65d92b134d6..63b8cb553ef 100644
--- a/runtime/platform/targets.bzl
+++ b/runtime/platform/targets.bzl
@@ -116,5 +116,9 @@ def define_common_targets():
         exported_headers = [
             "compiler.h",
         ],
+        exported_preprocessor_flags = select({
+            "DEFAULT": [],
+            "fbsource//xplat/executorch/tools/buck/constraints:executorch-builtin-function-name-disabled": ["-DET_USE_BUILTIN_FUNCTION_NAME=0"],
+        }) if not runtime.is_oss else [],
         visibility = ["PUBLIC"],
     )
diff --git a/tools/buck/constraints/BUCK b/tools/buck/constraints/BUCK
index b558bb9e4a4..49fbaabe06f 100644
--- a/tools/buck/constraints/BUCK
+++ b/tools/buck/constraints/BUCK
@@ -61,3 +61,41 @@ fb_native.constraint_value(
     constraint_setting = ":executorch-event-tracer",
     visibility = ["PUBLIC"],
 )
+
+fb_native.config_setting(
+    name = "executorch-program-verification-disabled",
+    constraint_values = [
+        ":program-verification-disabled",
+    ],
+    visibility = ["PUBLIC"],
+)
+
+fb_native.constraint_setting(
+    name = "executorch-program-verification",
+    visibility = ["PUBLIC"],
+)
+
+fb_native.constraint_value(
+    name = "program-verification-disabled",
+    constraint_setting = ":executorch-program-verification",
+    visibility = ["PUBLIC"],
+)
+
+fb_native.config_setting(
+    name = "executorch-builtin-function-name-disabled",
+    constraint_values = [
+        ":builtin-function-name-disabled",
+    ],
+    visibility = ["PUBLIC"],
+)
+
+fb_native.constraint_setting(
+    name = "executorch-builtin-function-name",
+    visibility = ["PUBLIC"],
+)
+
+fb_native.constraint_value(
+    name = "builtin-function-name-disabled",
+    constraint_setting = ":executorch-builtin-function-name",
+    visibility = ["PUBLIC"],
+)

From 88faab264734e7c6b4640d30485ebafa717189a1 Mon Sep 17 00:00:00 2001
From: Jacob Stevens <stevens.jacob1492@gmail.com>
Date: Fri, 29 May 2026 09:46:37 -0400
Subject: [PATCH 080/103] Opportunistically use __FILE_NAME__ to get filename
 (#19834) (#19834)

Summary:

The current approach use __FILE__ and opportunistically trims it if the
utility is available.

However, the long name is still stored in .rodata

This can contribute some memory on embedded platforms.


Instead, first try __FILE_NAME__

Differential Revision: D106587633

From 84c0484d15c9bc96e05384a93e9ee174e81351fe Mon Sep 17 00:00:00 2001
From: SS-JIA <ssjia@meta.com>
Date: Fri, 29 May 2026 13:30:30 -0400
Subject: [PATCH 081/103] Fix ghstack merge bot failing to parse PR stack
 header

Summary:

ghstack 0.15.0 changed the header URL in PR bodies from
`Stack from [ghstack](https://github.com/ezyang/ghstack)` to
`Stack from [ghstack](https://github.com/ezyang/ghstack/tree/0.15.0)`.

The exact string match in `propose_ghstack_orig_pr.py` no longer matched,
causing every ghstack_land workflow run to fail since May 14. Use
`startswith("Stack from [ghstack]")` instead to be resilient to URL changes.

Test Plan:

Verified the new pattern matches both the old format
(`https://github.com/ezyang/ghstack`) and the new format
(`https://github.com/ezyang/ghstack/tree/0.15.0`).

This PR was authored with the help of Claude.

Reviewers:
---
 .github/scripts/propose_ghstack_orig_pr.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/scripts/propose_ghstack_orig_pr.py b/.github/scripts/propose_ghstack_orig_pr.py
index 3abcc6cdcf9..f41e03f18ff 100644
--- a/.github/scripts/propose_ghstack_orig_pr.py
+++ b/.github/scripts/propose_ghstack_orig_pr.py
@@ -52,12 +52,9 @@ def extract_stack_from_body(pr_body: str) -> List[int]:
     """
 
     prs = []
-    ghstack_begin = (
-        "Stack from [ghstack](https://github.com/ezyang/ghstack) (oldest at bottom):"
-    )
     ghstack_begin_seen = False
     for line in pr_body.splitlines():
-        if ghstack_begin in line:
+        if line.startswith("Stack from [ghstack]"):
             ghstack_begin_seen = True
         if not ghstack_begin_seen:
             continue

From d1c80af479dba2040444959e6b9e7264abbcf377 Mon Sep 17 00:00:00 2001
From: ssjia <ssjia@devvm26340.ftw0.facebook.com>
Date: Fri, 29 May 2026 07:29:56 -0700
Subject: [PATCH 082/103] [ET-VK][tests][1/N] Report disabled delegate tests as
 executed

Pull Request resolved: https://github.com/pytorch/executorch/pull/19867

Some environments preserve stale failure state when tests are reported through unittest skip results. This switches currently disabled Vulkan delegate coverage to a local decorator so those tests stay discoverable, log their disabled reason, and produce an executed result.

ghstack-source-id: 387629544
@exported-using-ghexport

Differential Revision: [D106732141](https://our.internmc.facebook.com/intern/diff/D106732141/)
---
 backends/vulkan/test/test_vulkan_delegate.py | 41 ++++++++++++++------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py
index 7c9f31b720c..ff709618259 100644
--- a/backends/vulkan/test/test_vulkan_delegate.py
+++ b/backends/vulkan/test/test_vulkan_delegate.py
@@ -7,6 +7,7 @@
 # pyre-unsafe
 
 import ctypes
+import functools
 import unittest
 from typing import Tuple
 
@@ -42,6 +43,24 @@
     pass
 
 
+def disable_test(reason):
+    """Disable a test while still reporting it as executed.
+
+    Some test runners do not handle skipped results consistently, so this keeps
+    disabled tests visible in logs without using unittest.skip.
+    """
+
+    def decorator(fn):
+        @functools.wraps(fn)
+        def wrapper(*args, **kwargs):
+            print(f"DISABLED_TEST: {fn.__qualname__}: {reason}")
+            return None
+
+        return wrapper
+
+    return decorator
+
+
 def lower_module(
     model: torch.nn.Module, sample_inputs: Tuple[torch.Tensor], dynamic_shapes=None
 ) -> EdgeProgramManager:
@@ -743,7 +762,7 @@ def forward(self, x):
 
         self.lower_module_and_test_output(model, sample_inputs)
 
-    @unittest.skip(
+    @disable_test(
         "Currently this test is failing due to weird partitioning because the eq scalar"
         "operator is not supported yet. Re-enable when the operator is supported."
     )
@@ -810,7 +829,7 @@ def forward(self, x):
 
         self.lower_module_and_test_output(module, sample_inputs)
 
-    @unittest.skip(
+    @disable_test(
         "Reduce shader does not support multiple reduction axes at the moment"
     )
     def test_vulkan_backend_sum_dim_list(self):
@@ -831,7 +850,7 @@ def forward(self, x):
             sample_inputs,
         )
 
-    @unittest.skip(
+    @disable_test(
         "Reduce shader does not support multiple reduction axes at the moment"
     )
     def test_vulkan_backend_sum(self):
@@ -1028,7 +1047,7 @@ def forward(self, x):
             sample_inputs,
         )
 
-    @unittest.skip("layer norm compute shader not working with swiftshader")
+    @disable_test("layer norm compute shader not working with swiftshader")
     def test_vulkan_backend_native_layer_norm(self):
         class NativeLayerNormModule(torch.nn.Module):
             def __init__(self):
@@ -1459,7 +1478,7 @@ def forward(self, x):
             sample_inputs,
         )
 
-    @unittest.skip(
+    @disable_test(
         "Softmax shader with shared memory does not work with swiftshader due to potential swiftshader bug"
     )
     def test_vulkan_backend_softmax(self):
@@ -1480,7 +1499,7 @@ def forward(self, x):
             sample_inputs,
         )
 
-    @unittest.skip(
+    @disable_test(
         "Softmax shader with shared memory does not work with swiftshader due to potential swiftshader bug"
     )
     def test_vulkan_backend_logsoftmax(self):
@@ -1512,7 +1531,7 @@ def forward(self, x):
 
         self.lower_unary_module_and_test_output(GeluModule())
 
-    @unittest.skip(
+    @disable_test(
         "Reduce shader does not support multiple reduction axes at the moment"
     )
     def test_vulkan_backend_mean(self):
@@ -2364,7 +2383,7 @@ def apply_quantization(self):
             quantized_linear_module_gemm, sample_inputs_gemm, atol=1e-2, rtol=1e-2
         )
 
-    @unittest.skip("Cannot run on swiftshader due to no integer dot product support")
+    @disable_test("Cannot run on swiftshader due to no integer dot product support")
     def test_vulkan_backend_xnnpack_pt2e_quantized_linear_sequence(self):
         """
         Test a sequence of linear layers quantized with XNNPACK quantization config.
@@ -2439,7 +2458,7 @@ def forward(self, x):
             rtol=1e-1,
         )
 
-    @unittest.skip("Cannot run on swiftshader due to no integer dot product support")
+    @disable_test("Cannot run on swiftshader due to no integer dot product support")
     def test_vulkan_backend_xnnpack_pt2e_quantized_conv_sequence(self):
         """
         Test a sequence of convolution layers quantized with PT2E quantization.
@@ -2530,7 +2549,7 @@ def forward(self, x):
             rtol=1e-1,
         )
 
-    @unittest.skip("Cannot run on swiftshader due to no integer dot product support")
+    @disable_test("Cannot run on swiftshader due to no integer dot product support")
     def test_vulkan_backend_xnnpack_pt2e_quantized_conv_sequence_all_reduced(self):
         """
         Test a sequence of convolution layers quantized with PT2E quantization.
@@ -2610,7 +2629,7 @@ def forward(self, x):
             rtol=1e-1,
         )
 
-    @unittest.skip("Cannot run on swiftshader due to no 8-bit int support")
+    @disable_test("Cannot run on swiftshader due to no 8-bit int support")
     def test_vulkan_backend_torchao_8da4w_quantized_linear(self):
         """
         Test TorchAO 8da4w quantization (int8 dynamic activation + int4 weight) with Vulkan backend.

From 915a82d4235c92930b7670c19d4f006852ba6e00 Mon Sep 17 00:00:00 2001
From: ssjia <ssjia@devvm26340.ftw0.facebook.com>
Date: Fri, 29 May 2026 07:30:02 -0700
Subject: [PATCH 083/103] [devtools][tests][4/N] Report disabled inspector
 tests as executed

Applies the same disabled-test treatment as the prior diffs in this stack to the devtools inspector tests. Some test runners preserve stale failure state when tests report through unittest skip results, so this replaces the conditionally disabled coverage with a local decorator that keeps the tests discoverable, logs their disabled reason, and produces an executed result.

Adds a disable_if decorator that mirrors unittest.skipIf (evaluating the condition at decoration time) and converts the three Windows-gated test cases to use it.

Differential Revision: [D106736354](https://our.internmc.facebook.com/intern/diff/D106736354/)


ghstack-source-id: 387629542
Pull-Request: https://github.com/pytorch/executorch/pull/19874
---
 devtools/inspector/tests/inspector_test.py | 29 +++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/devtools/inspector/tests/inspector_test.py b/devtools/inspector/tests/inspector_test.py
index b33c5b37164..4c59190650c 100644
--- a/devtools/inspector/tests/inspector_test.py
+++ b/devtools/inspector/tests/inspector_test.py
@@ -7,6 +7,7 @@
 # pyre-unsafe
 
 import copy
+import functools
 import os
 import random
 import statistics
@@ -90,6 +91,28 @@ def forward(self, indices: torch.Tensor, values: torch.Tensor) -> torch.Tensor:
 ETRECORD_PATH = "unittest_etrecord_path"
 
 
+def disable_if(condition, reason):
+    """Disable a test when condition is true, still reporting it as executed.
+
+    Conditional analogue of unittest.skipIf that keeps disabled tests visible in
+    logs instead of producing a skipped result, which some test runners handle
+    inconsistently.
+    """
+
+    def decorator(fn):
+        if not condition:
+            return fn
+
+        @functools.wraps(fn)
+        def wrapper(*args, **kwargs):
+            print(f"DISABLED_TEST: {fn.__qualname__}: {reason}")
+            return None
+
+        return wrapper
+
+    return decorator
+
+
 # TODO: write an E2E test: create an inspector instance, mock just the file reads, and then verify the external correctness
 class TestInspector(unittest.TestCase):
     def test_perf_data(self) -> None:
@@ -1504,7 +1527,7 @@ def test_calculate_numeric_gap_with_edge_dialect_exported_program_name(self):
             self.assertIsInstance(df, pd.DataFrame)
             self.assertEqual(len(df), 1)
 
-    @unittest.skipIf(sys.platform.startswith("win"), "Skipping on Windows")
+    @disable_if(sys.platform.startswith("win"), "Skipping on Windows")
     def test_transformer_block_xnnpack_numeric_gap_within_tolerance(self):
         """
         Test that the numeric gap between AOT and runtime intermediate outputs
@@ -1693,7 +1716,7 @@ def forward(
                         f"Stack trace for {op_name} doesn't contain file info",
                     )
 
-    @unittest.skipIf(sys.platform.startswith("win"), "Skipping on Windows")
+    @disable_if(sys.platform.startswith("win"), "Skipping on Windows")
     def test_intermediate_tensor_comparison_with_torch_export(self):
         """Test intermediate tensor comparison using torch.export.export and to_edge_transform_and_lower.
 
@@ -1840,7 +1863,7 @@ def _gen_random_runtime_output(
     ) -> List[Union[None, List[torch.Tensor], bool, float, int, str, torch.Tensor]]:
         return [torch.randn(RAW_DATA_SIZE)]
 
-    @unittest.skipIf(sys.platform.startswith("win"), "Skipping on Windows")
+    @disable_if(sys.platform.startswith("win"), "Skipping on Windows")
     def test_disable_debug_handle_validation_with_symbolic_shapes(self):
         """
         Test that demonstrates the issue with symbolic shape related nodes losing from_node info

From 10e2eecfb63a14781554aa1e3dae83c19929e46b Mon Sep 17 00:00:00 2001
From: SS-JIA <ssjia@meta.com>
Date: Fri, 29 May 2026 15:29:54 -0400
Subject: [PATCH 084/103] Skip AOTI tests on macOS CI and bump job timeout to
 120 min

Summary:
AOTI tests (llama3_2_vision and select extension/llm tests) hang
indefinitely on macOS CI runners after the PyTorch 2.12 pin update.
The hang is in native C/C++ code (inductor compilation / dlopen),
which prevents faulthandler from producing a traceback. Diagnosis
is ongoing in #19886.

Skip the affected tests and bump the macOS job timeout from the
default 90 to 120 minutes to add margin (observed completion at
~79 min with skips applied).

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .ci/scripts/unittest-macos-cmake.sh | 15 +++++++++++++--
 .github/workflows/_unittest.yml     |  1 +
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/.ci/scripts/unittest-macos-cmake.sh b/.ci/scripts/unittest-macos-cmake.sh
index 43eb1f21c3c..48f072a0cc1 100755
--- a/.ci/scripts/unittest-macos-cmake.sh
+++ b/.ci/scripts/unittest-macos-cmake.sh
@@ -12,8 +12,19 @@ set -eux
 export TORCHINDUCTOR_CACHE_DIR="$(mktemp -d "${RUNNER_TEMP:-/tmp}/torchinductor_cache_XXXXXX")"
 trap 'rm -rf "${TORCHINDUCTOR_CACHE_DIR}"' EXIT
 
-# Run pytest with coverage
-${CONDA_RUN} pytest -n auto --cov=./ --cov-report=xml
+# TODO(SS-JIA): AOTI tests hang on macOS CI runners — the thread blocks in
+# native C/C++ code (dlopen / inductor compilation) so faulthandler cannot
+# even produce a traceback. Diagnosis ongoing in #19886.
+AOTI_SKIPS=(
+  --ignore=examples/models/llama3_2_vision/preprocess/test_preprocess.py
+  --ignore=examples/models/llama3_2_vision/vision_encoder/test/test_vision_encoder.py
+  --ignore=examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py
+  --deselect=extension/llm/modules/test/test_position_embeddings.py::TilePositionalEmbeddingTest::test_tile_positional_embedding_aoti
+  --deselect=extension/llm/modules/test/test_position_embeddings.py::TiledTokenPositionalEmbeddingTest::test_tiled_token_positional_embedding_aoti
+  --deselect=extension/llm/modules/test/test_attention.py::AttentionTest::test_attention_aoti
+)
+
+${CONDA_RUN} pytest -n auto --cov=./ --cov-report=xml "${AOTI_SKIPS[@]}"
 # Run gtest
 LLVM_PROFDATA="xcrun llvm-profdata" LLVM_COV="xcrun llvm-cov" \
 ${CONDA_RUN} test/run_oss_cpp_tests.sh
diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml
index 15c87bd79e4..a253857d2c0 100644
--- a/.github/workflows/_unittest.yml
+++ b/.github/workflows/_unittest.yml
@@ -49,6 +49,7 @@ jobs:
       python-version: '3.11'
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
       script: |
         set -eux
         # This is needed to get the prebuilt PyTorch wheel from S3

From 29c18def8be12f6915b5c6b0fab435105c4fb6d2 Mon Sep 17 00:00:00 2001
From: Jacob Szwejbka <jakeszwe@meta.com>
Date: Fri, 29 May 2026 15:20:29 -0700
Subject: [PATCH 085/103] Use uint64_t for FlatTensor segment end

Differential Revision: D106710218

Pull Request resolved: https://github.com/pytorch/executorch/pull/19860
---
 .../flat_tensor/flat_tensor_data_map.cpp      | 41 ++++++++++++++++---
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/extension/flat_tensor/flat_tensor_data_map.cpp b/extension/flat_tensor/flat_tensor_data_map.cpp
index 48684da1239..845778f45c2 100644
--- a/extension/flat_tensor/flat_tensor_data_map.cpp
+++ b/extension/flat_tensor/flat_tensor_data_map.cpp
@@ -21,6 +21,8 @@
 #include <executorch/runtime/core/span.h>
 #include <executorch/runtime/platform/compiler.h>
 
+#include <cinttypes>
+
 using executorch::runtime::Error;
 using executorch::runtime::FreeableBuffer;
 using executorch::runtime::Result;
@@ -52,7 +54,7 @@ Result<const flat_tensor_flatbuffer::NamedData*> get_named_data(
         flatbuffers::Offset<flat_tensor_flatbuffer::NamedData>>* named_data,
     const flatbuffers::Vector<
         flatbuffers::Offset<flat_tensor_flatbuffer::DataSegment>>* segments,
-    size_t segment_end_offset) {
+    uint64_t segment_end_offset) {
   // Linear search by name.
   if (named_data == nullptr) {
     return Error::NotFound;
@@ -81,19 +83,34 @@ Result<const flat_tensor_flatbuffer::NamedData*> get_named_data(
               static_cast<uint64_t>(segments->Get(segment_index)->offset()),
               static_cast<uint64_t>(segments->Get(segment_index)->size()),
               &seg_end) &&
-              seg_end <= static_cast<uint64_t>(segment_end_offset),
+              seg_end <= segment_end_offset,
           InvalidExternalData,
           "Invalid segment offset %" PRIu64
           " is larger than the segment_base_offset + segment_data_size %" PRIu64
           "; malformed PTD file.",
           segments->Get(segment_index)->offset(),
-          static_cast<uint64_t>(segment_end_offset));
+          segment_end_offset);
       return found;
     }
   }
   return Error::NotFound;
 }
 
+Result<uint64_t> get_segment_end_offset(const FlatTensorHeader& header) {
+  uint64_t segment_end_offset = 0;
+  ET_CHECK_OR_RETURN_ERROR(
+      !c10::add_overflows(
+          header.segment_base_offset,
+          header.segment_data_size,
+          &segment_end_offset),
+      InvalidExternalData,
+      "segment_base_offset %" PRIu64 " + segment_data_size %" PRIu64
+      " overflows uint64_t; malformed PTD file.",
+      header.segment_base_offset,
+      header.segment_data_size);
+  return segment_end_offset;
+}
+
 Result<const TensorLayout> create_tensor_layout(
     const flat_tensor_flatbuffer::TensorLayout* tensor_layout) {
   ScalarType scalar_type =
@@ -111,11 +128,15 @@ Result<const TensorLayout> create_tensor_layout(
 
 ET_NODISCARD Result<const TensorLayout> FlatTensorDataMap::get_tensor_layout(
     executorch::aten::string_view key) const {
+  Result<uint64_t> segment_end_offset = get_segment_end_offset(header_);
+  if (!segment_end_offset.ok()) {
+    return segment_end_offset.error();
+  }
   Result<const flat_tensor_flatbuffer::NamedData*> named_data = get_named_data(
       key,
       flat_tensor_->named_data(),
       flat_tensor_->segments(),
-      header_.segment_base_offset + header_.segment_data_size);
+      segment_end_offset.get());
   if (!named_data.ok()) {
     return named_data.error();
   }
@@ -124,11 +145,15 @@ ET_NODISCARD Result<const TensorLayout> FlatTensorDataMap::get_tensor_layout(
 
 ET_NODISCARD Result<FreeableBuffer> FlatTensorDataMap::get_data(
     executorch::aten::string_view key) const {
+  Result<uint64_t> segment_end_offset = get_segment_end_offset(header_);
+  if (!segment_end_offset.ok()) {
+    return segment_end_offset.error();
+  }
   Result<const flat_tensor_flatbuffer::NamedData*> named_data = get_named_data(
       key,
       flat_tensor_->named_data(),
       flat_tensor_->segments(),
-      header_.segment_base_offset + header_.segment_data_size);
+      segment_end_offset.get());
   if (!named_data.ok()) {
     return named_data.error();
   }
@@ -148,11 +173,15 @@ ET_NODISCARD Error FlatTensorDataMap::load_data_into(
     ET_UNUSED executorch::aten::string_view key,
     ET_UNUSED void* buffer,
     ET_UNUSED size_t size) const {
+  Result<uint64_t> segment_end_offset = get_segment_end_offset(header_);
+  if (!segment_end_offset.ok()) {
+    return segment_end_offset.error();
+  }
   Result<const flat_tensor_flatbuffer::NamedData*> named_data = get_named_data(
       key,
       flat_tensor_->named_data(),
       flat_tensor_->segments(),
-      header_.segment_base_offset + header_.segment_data_size);
+      segment_end_offset.get());
   if (!named_data.ok()) {
     return named_data.error();
   }

From 0e6b67ed9620e435fe387e90c12aa284be2e7a71 Mon Sep 17 00:00:00 2001
From: Ethan Ng <ethann@meta.com>
Date: Fri, 29 May 2026 15:27:59 -0700
Subject: [PATCH 086/103] Add fuse() to QuantizationPatterns (#19726)

Differential Revision: D105728156

Pull Request resolved: https://github.com/pytorch/executorch/pull/19726
---
 backends/cadence/aot/quantizer/BUCK        |   2 +
 backends/cadence/aot/quantizer/patterns.py | 264 ++++++++++++++++++++-
 2 files changed, 264 insertions(+), 2 deletions(-)

diff --git a/backends/cadence/aot/quantizer/BUCK b/backends/cadence/aot/quantizer/BUCK
index c2ec3e3a1f6..956bf700bd7 100644
--- a/backends/cadence/aot/quantizer/BUCK
+++ b/backends/cadence/aot/quantizer/BUCK
@@ -36,8 +36,10 @@ fbcode_target(_kind = runtime.python_library,
     ],
     typing = True,
     deps = [
+        ":pattern_utils",
         ":utils",
         "//caffe2:torch",
+        "//executorch/backends/cadence/aot:pass_utils",
     ],
 )
 
diff --git a/backends/cadence/aot/quantizer/patterns.py b/backends/cadence/aot/quantizer/patterns.py
index e1f44b8ce5c..bf7ca3ef567 100644
--- a/backends/cadence/aot/quantizer/patterns.py
+++ b/backends/cadence/aot/quantizer/patterns.py
@@ -12,8 +12,19 @@
 from typing import List, Optional, Tuple, Union
 
 import torch
-from executorch.backends.cadence.aot.quantizer.utils import get_bias_qparams
-
+from executorch.backends.cadence.aot.pass_utils import get_arg, replace_with_op
+from executorch.backends.cadence.aot.quantizer.pattern_utils import (
+    DQ_PER_TENSOR,
+    find_quant_user,
+    fuse_conv,
+    fuse_linear,
+    fuse_matmul,
+    insert_node_with_meta,
+)
+from executorch.backends.cadence.aot.quantizer.utils import (
+    check_out_zero_point_is_min_range,
+    get_bias_qparams,
+)
 from torch import fx
 from torch._ops import OpOverload
 from torchao.quantization.pt2e.quantizer import (
@@ -131,6 +142,41 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_linear.per_tensor
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        assert anchor_node.target == torch.ops.aten.addmm.default
+        # addmm(bias, input, weight)
+        bias_node = anchor_node.args[0]
+        assert isinstance(bias_node, fx.Node)
+        dq_input = get_arg(anchor_node, "mat1", fx.Node)
+        if dq_input.target != DQ_PER_TENSOR:
+            return None
+        dq_weight = get_arg(anchor_node, "mat2", fx.Node)
+        if dq_weight.target != DQ_PER_TENSOR:
+            return None
+        quant_node = find_quant_user(anchor_node)
+        if quant_node is None:
+            return None
+        dq_bias = bias_node if bias_node.target == DQ_PER_TENSOR else None
+        weight_q = get_arg(dq_weight, "input", fx.Node)
+        transposed = insert_node_with_meta(
+            gm,
+            torch.ops.aten.transpose.int,
+            (weight_q, 0, 1),
+            None,
+            anchor_node,
+            weight_q,
+        )
+        return fuse_linear(
+            gm,
+            dq_input,
+            dq_weight,
+            dq_bias,
+            quant_node,
+            anchor_node,
+            self.replacement_op(),
+            weight_q=transposed,
+        )
+
 
 class AddPattern(QuantizationPattern):
     def partition_types(self) -> List[OpOverload]:
@@ -169,6 +215,33 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_add.per_tensor
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        # Skip if alpha kwarg is present — changes add semantics.
+        if anchor_node.kwargs:
+            return None
+        dq0 = anchor_node.args[0]
+        if not isinstance(dq0, fx.Node) or dq0.target != DQ_PER_TENSOR:
+            return None
+        dq1 = anchor_node.args[1]
+        if not isinstance(dq1, fx.Node) or dq1.target != DQ_PER_TENSOR:
+            return None
+        quant_node = find_quant_user(anchor_node)
+        if quant_node is None:
+            return None
+        args = (
+            get_arg(dq0, "input", fx.Node),
+            get_arg(dq0, "scale", float),
+            get_arg(dq0, "zero_point", int),
+            get_arg(dq1, "input", fx.Node),
+            get_arg(dq1, "scale", float),
+            get_arg(dq1, "zero_point", int),
+            get_arg(quant_node, "scale", float),
+            get_arg(quant_node, "zero_point", int),
+        )
+        return replace_with_op(
+            gm, anchor_node, self.replacement_op(), args, {}, quant_node
+        )
+
 
 # This is a base class for Add+ReLU fusion, since it can be used with two different relu aten ops
 class AddReluBasePattern(QuantizationPattern):
@@ -212,6 +285,46 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_add.per_tensor
 
+    def anchor_ops(self) -> tuple[OpOverload, ...]:
+        return (torch.ops.aten.add.Tensor,)
+
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        add_users = list(anchor_node.users)
+        if len(add_users) != 1:
+            return None
+        relu_node = add_users[0]
+        if relu_node.target != self.partition_types()[1]:
+            return None
+        if len(anchor_node.kwargs) > 0:
+            return None
+        dq0 = anchor_node.args[0]
+        if not isinstance(dq0, fx.Node) or dq0.target != DQ_PER_TENSOR:
+            return None
+        dq1 = anchor_node.args[1]
+        if not isinstance(dq1, fx.Node) or dq1.target != DQ_PER_TENSOR:
+            return None
+        quant_node = find_quant_user(relu_node)
+        if quant_node is None:
+            return None
+        if not check_out_zero_point_is_min_range(
+            get_arg(quant_node, "zero_point", int),
+            get_arg(quant_node, "dtype", torch.dtype),
+        ):
+            return None
+        args = (
+            get_arg(dq0, "input", fx.Node),
+            get_arg(dq0, "scale", float),
+            get_arg(dq0, "zero_point", int),
+            get_arg(dq1, "input", fx.Node),
+            get_arg(dq1, "scale", float),
+            get_arg(dq1, "zero_point", int),
+            get_arg(quant_node, "scale", float),
+            get_arg(quant_node, "zero_point", int),
+        )
+        return replace_with_op(
+            gm, anchor_node, self.replacement_op(), args, {}, quant_node
+        )
+
 
 # Add + regular relu op fusion
 class AddReluPattern0(AddReluBasePattern):
@@ -250,6 +363,18 @@ def replacement_op(self) -> OpOverload:
         # we just need to change the name of the op
         return torch.ops.cadence.quantized_matmul.default
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        dq0 = anchor_node.args[0]
+        if not isinstance(dq0, fx.Node) or dq0.target != DQ_PER_TENSOR:
+            return None
+        dq1 = anchor_node.args[1]
+        if not isinstance(dq1, fx.Node) or dq1.target != DQ_PER_TENSOR:
+            return None
+        quant_node = find_quant_user(anchor_node)
+        if quant_node is None:
+            return None
+        return fuse_matmul(gm, anchor_node, dq0, dq1, quant_node, self.replacement_op())
+
 
 class CatPattern(QuantizationPattern):
     def partition_types(self) -> List[OpOverload]:
@@ -299,6 +424,25 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.aten.cat.default
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        cat_inputs = anchor_node.args[0]
+        if not isinstance(cat_inputs, (list, tuple)) or not cat_inputs:
+            return None
+        inputs_q = []
+        for inp in cat_inputs:
+            if not isinstance(inp, fx.Node) or inp.target != DQ_PER_TENSOR:
+                return None
+            inputs_q.append(get_arg(inp, "input", fx.Node))
+        quant_node = find_quant_user(anchor_node)
+        if quant_node is None:
+            return None
+        dim = get_arg(anchor_node, "dim", int)
+        args = (inputs_q,)
+        kwargs = {"dim": dim}
+        return replace_with_op(
+            gm, anchor_node, self.replacement_op(), args, kwargs, quant_node
+        )
+
 
 class Conv1dPattern(QuantizationPattern):
     def partition_types(self) -> List[OpOverload]:
@@ -341,6 +485,18 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_conv1d_ncl.per_tensor
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        dq_input = anchor_node.args[0]
+        if not isinstance(dq_input, fx.Node) or dq_input.target != DQ_PER_TENSOR:
+            return None
+        dq_weight = anchor_node.args[1]
+        if not isinstance(dq_weight, fx.Node) or dq_weight.target != DQ_PER_TENSOR:
+            return None
+        quant_node = find_quant_user(anchor_node)
+        if quant_node is None:
+            return None
+        return fuse_conv(self, gm, anchor_node, dq_input, dq_weight, quant_node)
+
 
 class Conv2dPattern(QuantizationPattern):
     def partition_types(self) -> List[OpOverload]:
@@ -383,6 +539,18 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_conv2d_nchw.per_tensor
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        dq_input = anchor_node.args[0]
+        if not isinstance(dq_input, fx.Node) or dq_input.target != DQ_PER_TENSOR:
+            return None
+        dq_weight = anchor_node.args[1]
+        if not isinstance(dq_weight, fx.Node) or dq_weight.target != DQ_PER_TENSOR:
+            return None
+        quant_node = find_quant_user(anchor_node)
+        if quant_node is None:
+            return None
+        return fuse_conv(self, gm, anchor_node, dq_input, dq_weight, quant_node)
+
 
 class LayerNormPattern(QuantizationPattern):
     def partition_types(self) -> List[OpOverload]:
@@ -421,6 +589,61 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_layer_norm.per_tensor
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        dq_input = anchor_node.args[0]
+        if not isinstance(dq_input, fx.Node) or dq_input.target != DQ_PER_TENSOR:
+            return None
+        quant_node = find_quant_user(anchor_node)
+        if quant_node is None:
+            return None
+        scale = get_arg(dq_input, "scale", float)
+        zero_point = get_arg(dq_input, "zero_point", int)
+        normalized_shape = anchor_node.args[1]
+        assert isinstance(normalized_shape, list)
+        weight = (
+            anchor_node.args[2]
+            if len(anchor_node.args) > 2 and anchor_node.args[2]
+            else None
+        )
+        bias = (
+            anchor_node.args[3]
+            if len(anchor_node.args) > 3 and anchor_node.args[3]
+            else None
+        )
+        input_q = get_arg(dq_input, "input", fx.Node)
+        # Default weight=1 and bias=0 must be float32 — cadence::quantized_layer_norm
+        # expects float affine parameters, not quantized values.
+        if not weight:
+            weight = insert_node_with_meta(
+                gm,
+                torch.ops.aten.full.default,
+                (normalized_shape, 1),
+                {"dtype": torch.float32},
+                anchor_node,
+                input_q,
+            )
+        if not bias:
+            bias = insert_node_with_meta(
+                gm,
+                torch.ops.aten.full.default,
+                (normalized_shape, 0),
+                {"dtype": torch.float32},
+                anchor_node,
+                input_q,
+            )
+        args = (input_q, scale, zero_point)
+        kwargs = {
+            "normalized_shape": normalized_shape,
+            "weight": weight,
+            "bias": bias,
+            "eps": get_arg(anchor_node, "eps", float),
+            "output_scale": get_arg(quant_node, "scale", float),
+            "output_zero_point": get_arg(quant_node, "zero_point", int),
+        }
+        return replace_with_op(
+            gm, anchor_node, self.replacement_op(), args, kwargs, quant_node
+        )
+
 
 class LinearPattern(QuantizationPattern):
     def partition_types(self) -> List[OpOverload]:
@@ -463,6 +686,31 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_linear.per_tensor
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        dq_input = anchor_node.args[0]
+        if not isinstance(dq_input, fx.Node) or dq_input.target != DQ_PER_TENSOR:
+            return None
+        dq_weight = anchor_node.args[1]
+        if not isinstance(dq_weight, fx.Node) or dq_weight.target != DQ_PER_TENSOR:
+            return None
+        quant_node = find_quant_user(anchor_node)
+        if quant_node is None:
+            return None
+        dq_bias: fx.Node | None = None
+        if len(anchor_node.args) > 2:
+            bias_arg = anchor_node.args[2]
+            if isinstance(bias_arg, fx.Node) and bias_arg.target == DQ_PER_TENSOR:
+                dq_bias = bias_arg
+        return fuse_linear(
+            gm,
+            dq_input,
+            dq_weight,
+            dq_bias,
+            quant_node,
+            anchor_node,
+            self.replacement_op(),
+        )
+
 
 class MatmulPattern(QuantizationPattern):
     def partition_types(self) -> List[OpOverload]:
@@ -488,6 +736,18 @@ def replacement_op(self) -> OpOverload:
         # TODO: T240804887 This is actually a per-tensor variant, we just need to change the name of the op
         return torch.ops.cadence.quantized_matmul.default
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        dq0 = anchor_node.args[0]
+        if not isinstance(dq0, fx.Node) or dq0.target != DQ_PER_TENSOR:
+            return None
+        dq1 = anchor_node.args[1]
+        if not isinstance(dq1, fx.Node) or dq1.target != DQ_PER_TENSOR:
+            return None
+        quant_node = find_quant_user(anchor_node)
+        if quant_node is None:
+            return None
+        return fuse_matmul(gm, anchor_node, dq0, dq1, quant_node, self.replacement_op())
+
 
 class MaxPool2dPattern(QuantizationPattern):
     """

From 5395f2084ee1ef1243ad30309cc7c74b93e9f683 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Fri, 29 May 2026 16:56:01 -0700
Subject: [PATCH 087/103] [MLX][Gemma4] Add turbo quant support (#19866)

Add TurboQuant TQ4 KV cache to the MLX backend, exposed on gemma4_31b
via --turboquant. Compresses full-attention KV cache from bf16 to a
4-bit codebook + per-vector norms, letting Gemma 4 31B-IT scale to very
long contexts. Sliding-window layers are unchanged.

What's in the PR

  New cache subclass:
    - backends/mlx/llm/turboquant_cache.py: MLXTurboQuantKVCache,
      a drop-in subclass of TurboQuantKVCache.

  Three custom ops + Metal kernels:
    - mlx::tq4_compress (model_ops/tq4_compress.py): bucketize +
      cast(uint8) + nibble-pack in one kernel.
    - mlx::tq_norm (model_ops/tq_norm.py): L2 norm with simd_sum
      cross-lane reduction in fp32 registers; bf16 in / bf16 out.
    - mlx::tq_dequant (model_ops/tq_dequant.py): unpack + centroid
      gather + multiply-by-norm in one kernel.

  Per-op tests:
    - test_tq4_compress.py, test_tq_norm.py, test_tq_dequant.py

  Wiring:
    - examples/models/gemma4_31b/mlx_source_transformations.py:
    - examples/models/gemma4_31b/export.py: --turboquant CLI flag
    - examples/models/gemma4_31b/README.md: TurboQuant subsection.

Perf on M4 Max 64GB Ram:

```
 2K prompt:
    bf16 cache:        prefill 189.7 tok/s,  decode 17.4 tok/s
    TurboQuant cache:  prefill 187.7 tok/s,  decode 16.9 tok/s

  8K prompt:
    bf16 cache:        prefill 170.0 tok/s,  decode 17.1 tok/s
    TurboQuant cache:  prefill 166.0 tok/s,  decode 11.9 tok/s
```

For TQ, max context length is set to 64K. On bf16 cache, max context
length is 10K.

TODO: why does decode slow more for TQ than bf16?
---
 .github/workflows/mlx.yml                     |  12 +
 backends/mlx/builder/op_helpers.py            | 112 +++++
 backends/mlx/llm/turboquant_cache.py          | 243 +++++++++++
 backends/mlx/model_ops/test_tq4_compress.py   | 183 ++++++++
 backends/mlx/model_ops/test_tq_dequant.py     | 166 ++++++++
 backends/mlx/model_ops/test_tq_norm.py        | 150 +++++++
 backends/mlx/model_ops/tq4_compress.py        | 189 +++++++++
 backends/mlx/model_ops/tq_dequant.py          | 216 ++++++++++
 backends/mlx/model_ops/tq_norm.py             | 170 ++++++++
 backends/mlx/test/op_test_runner.cpp          |  12 +
 backends/mlx/test/test_ops.py                 | 396 ++++++++++++++++++
 backends/mlx/test/test_utils.py               |   5 +
 examples/models/gemma4_31b/README.md          |  18 +
 examples/models/gemma4_31b/export.py          |  44 +-
 .../gemma4_31b/mlx_source_transformations.py  |  73 +++-
 15 files changed, 1961 insertions(+), 28 deletions(-)
 create mode 100644 backends/mlx/llm/turboquant_cache.py
 create mode 100644 backends/mlx/model_ops/test_tq4_compress.py
 create mode 100644 backends/mlx/model_ops/test_tq_dequant.py
 create mode 100644 backends/mlx/model_ops/test_tq_norm.py
 create mode 100644 backends/mlx/model_ops/tq4_compress.py
 create mode 100644 backends/mlx/model_ops/tq_dequant.py
 create mode 100644 backends/mlx/model_ops/tq_norm.py

diff --git a/.github/workflows/mlx.yml b/.github/workflows/mlx.yml
index 027101ba7f0..c51f126dbe6 100644
--- a/.github/workflows/mlx.yml
+++ b/.github/workflows/mlx.yml
@@ -80,6 +80,18 @@ jobs:
         ${CONDA_RUN} python -m executorch.backends.mlx.model_ops.test_gated_delta_rule run -v
         echo "::endgroup::"
 
+        echo "::group::Run tq_norm op tests"
+        ${CONDA_RUN} python -m executorch.backends.mlx.model_ops.test_tq_norm run -v
+        echo "::endgroup::"
+
+        echo "::group::Run tq4_compress op tests"
+        ${CONDA_RUN} python -m executorch.backends.mlx.model_ops.test_tq4_compress run -v
+        echo "::endgroup::"
+
+        echo "::group::Run tq_dequant op tests"
+        ${CONDA_RUN} python -m executorch.backends.mlx.model_ops.test_tq_dequant run -v
+        echo "::endgroup::"
+
   test-mlx-qwen35-moe:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
diff --git a/backends/mlx/builder/op_helpers.py b/backends/mlx/builder/op_helpers.py
index 7740546cc2c..be199f75340 100644
--- a/backends/mlx/builder/op_helpers.py
+++ b/backends/mlx/builder/op_helpers.py
@@ -17,6 +17,7 @@
 
 if TYPE_CHECKING:
     from executorch.backends.mlx.builder.program_builder import MLXProgramBuilder
+    from executorch.backends.mlx.serialization.mlx_graph_schema import IntOrVid
 
 # When True, always serialize the biases tensor for quantized ops.
 # When False, use init-time computation when zero_point is all zeros,
@@ -173,6 +174,117 @@ def emit_lifted_constant(P: "MLXProgramBuilder", value, dtype: torch.dtype) -> S
     return slot
 
 
+def emit_shape(
+    P: "MLXProgramBuilder",
+    node: Node,
+    slot: Slot,
+    *,
+    end_dim: "Optional[int]" = None,
+) -> "list[IntOrVid]":
+    """Return the shape of ``node`` as a list of ``IntOrVid``.
+
+    Each static dim becomes a literal ``IntOrVid``; each dynamic dim
+    emits a ``SymSizeNode`` against ``slot`` and is wrapped via
+    ``P.to_int_or_vid``.
+
+    Args:
+        P: program builder.
+        node: FX node whose shape to walk (must have ``meta['val']``).
+        slot: slot corresponding to ``node`` (used as the
+            ``SymSize`` source for any dynamic dim).
+        end_dim: stop index (exclusive). ``None`` means the full ndim.
+            Negative values index from the end (e.g. ``-1`` is "all
+            leading dims, drop the last").
+
+    Returns:
+        ``list[IntOrVid]`` of length ``end_dim`` (after normalization).
+    """
+    from executorch.backends.mlx.serialization.mlx_graph_schema import (
+        IntOrVid,
+        SymSizeNode,
+    )
+
+    shape = node.meta["val"].shape
+    ndim = len(shape)
+    if end_dim is None:
+        end_dim = ndim
+    elif end_dim < 0:
+        end_dim += ndim
+
+    out: "list[IntOrVid]" = []
+    for dim_idx in range(end_dim):
+        s = shape[dim_idx]
+        if isinstance(s, int):
+            out.append(IntOrVid.from_literal(int(s)))
+        else:
+            _, d_val = P.make_tmp_value_slot()
+            P.emit(
+                SymSizeNode(
+                    a=P.slot_to_tid(slot),
+                    dim=dim_idx,
+                    out=P.slot_to_vid(d_val),
+                )
+            )
+            out.append(P.to_int_or_vid(d_val))
+    return out
+
+
+def emit_product(
+    P: "MLXProgramBuilder",
+    dims: "list[IntOrVid]",
+) -> "IntOrVid":
+    """Multiplicative reduction over a list of ``IntOrVid`` values.
+
+    Folds all literal entries AOT into a single static product, then
+    emits ``MultiplyIntNode`` only for the dynamic entries (and one
+    final node combining the static product with the dynamic accumulator
+    when both contribute).
+
+    Args:
+        P: program builder.
+        dims: list of ``IntOrVid``. May be empty (returns
+            ``IntOrVid.from_literal(1)``), all literals, or a mix.
+
+    Returns:
+        An ``IntOrVid`` representing the product. Always literal when
+        every entry is literal (or ``dims`` is empty).
+    """
+    from executorch.backends.mlx.serialization.mlx_graph_schema import (
+        IntOrVid,
+        MultiplyIntNode,
+    )
+
+    static_product = 1
+    dynamic_dims: "list[IntOrVid]" = []
+    for d in dims:
+        if d.is_vid:
+            dynamic_dims.append(d)
+        else:
+            static_product *= d.literal
+
+    if not dynamic_dims:
+        return IntOrVid.from_literal(static_product)
+
+    acc = dynamic_dims[0]
+    for d in dynamic_dims[1:]:
+        _, acc_val = P.make_tmp_value_slot()
+        P.emit(MultiplyIntNode(a=acc, b=d, out=P.slot_to_vid(acc_val)))
+        acc = P.to_int_or_vid(acc_val)
+
+    if static_product == 1:
+        return acc
+
+    _, final_val = P.make_tmp_value_slot()
+    P.emit(
+        MultiplyIntNode(
+            a=IntOrVid.from_literal(static_product),
+            b=acc,
+            out=P.slot_to_vid(final_val),
+        )
+    )
+    return P.to_int_or_vid(final_val)
+
+
 def emit_quantized_biases(
     P: "MLXProgramBuilder",
     zero_point_key: str,
diff --git a/backends/mlx/llm/turboquant_cache.py b/backends/mlx/llm/turboquant_cache.py
new file mode 100644
index 00000000000..7f2109ba074
--- /dev/null
+++ b/backends/mlx/llm/turboquant_cache.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+TurboQuant TQ4 KV cache for the MLX backend.
+
+Subclass of the backend-agnostic
+``extension/llm/modules/turboquant/kv_cache.py::TurboQuantKVCache``.
+
+The cache stores K and V in **rotated space** (post-multiplied by R^T)
+as nibble-packed uint8 codebook indices plus per-vector bf16 norms.
+SDPA runs in rotated space and undoes the rotation on the output side
+(both Q and output rotations are ``T_q × D²``, much smaller than
+applying the inverse rotation to K/V which would be ``T_kv × D²``).
+
+Reference:
+    TurboQuant: Online Vector Quantization with Near-optimal
+    Distortion Rate. arXiv:2504.19874 (ICLR 2026).
+"""
+
+from typing import Optional, Tuple
+
+# Register the MLX custom ops used by this cache.
+import executorch.backends.mlx.custom_ops  # noqa: F401  mlx::custom_sdpa, mlx::kv_cache_update
+import executorch.backends.mlx.model_ops.tq4_compress  # noqa: F401  mlx::tq4_compress
+import executorch.backends.mlx.model_ops.tq_dequant  # noqa: F401  mlx::tq_dequant
+import executorch.backends.mlx.model_ops.tq_norm  # noqa: F401  mlx::tq_norm
+
+import torch
+
+from executorch.extension.llm.modules.turboquant.kv_cache import (
+    TurboQuantKVCache as _SharedTurboQuantKVCache,
+)
+
+
+class TurboQuantKVCache(_SharedTurboQuantKVCache):
+    """
+    TurboQuant TQ4 KV cache, MLX-backend variant.
+
+    Drop-in replacement for ``backends/mlx/llm/cache.py::KVCache``.
+
+    Args:
+        max_batch_size: Must be 1 (TQ4 is batch=1 only).
+        max_context_length: Maximum sequence length.
+        n_heads: Number of KV heads.
+        head_dim: Per-head dimension. Must be even and a multiple of 64.
+        enable_dynamic_shape: Accepted for interface parity; ignored.
+        dtype: Compute dtype (bf16). Used for pre-cast buffers.
+        bits: Quantization bits (must be 4).
+        seed: RNG seed for the orthogonal rotation matrix.
+    """
+
+    def __init__(
+        self,
+        max_batch_size: int,
+        max_context_length: int,
+        n_heads: int,
+        head_dim: int,
+        enable_dynamic_shape: bool,
+        dtype: torch.dtype = torch.bfloat16,
+        bits: int = 4,
+        seed: int = 42,
+    ):
+        if max_batch_size != 1:
+            raise ValueError(
+                f"TurboQuantKVCache only supports max_batch_size=1, "
+                f"got {max_batch_size}"
+            )
+        if bits != 4:
+            raise ValueError(
+                f"TurboQuantKVCache only supports bits=4 "
+                f"(16-entry codebook), got bits={bits}"
+            )
+        # MLX-backend Metal kernels need ``head_dim % 64 == 0``: ``tq_norm``
+        # uses 32 SIMD lanes (so D must be a multiple of 32), and
+        # ``tq_dequant`` packs 2 dims per byte across 32 lanes (so D must
+        # be a multiple of 64). Take the stricter constraint here.
+        if head_dim % 64 != 0:
+            raise ValueError(
+                f"TurboQuantKVCache requires head_dim to be "
+                f"a multiple of 64 (Metal SIMD + 4-bit pack constraint), "
+                f"got {head_dim}"
+            )
+        super().__init__(
+            n_heads=n_heads,
+            head_dim=head_dim,
+            max_seq_len=max_context_length,
+            bits=bits,
+            seed=seed,
+        )
+        self.max_batch_size = max_batch_size
+        self.max_context_length = max_context_length
+        self.enable_dynamic_shape = enable_dynamic_shape
+
+        # Replace parent's fp32 ``rotation`` and ``centroids`` buffers
+        # with compute-dtype versions in-place. Avoids a per-call
+        # ``_to_copy`` cast in the lowered graph at every use site.
+        # Parent's ``_decompress`` (testing-only) is the sole consumer
+        # of these as fp32 and is not called at runtime.
+        self.register_buffer(
+            "rotation",
+            self.rotation.to(dtype).contiguous(),
+            persistent=False,
+        )
+        self.register_buffer(
+            "centroids",
+            self.centroids.to(dtype).contiguous(),
+            persistent=False,
+        )
+        # Pre-cast eps for the divide-by-zero guard in _compress.
+        self.register_buffer(
+            "norm_eps",
+            torch.tensor(1e-10, dtype=dtype),
+            persistent=False,
+        )
+
+    def _compress(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Compress ``(1, H, T, D)`` → packed ``(1, H, T, D//2)`` u8 +
+        norms ``(1, H, T, 1)`` bf16.
+
+        The L2-norm reduction uses ``mlx::tq_norm`` (one Metal kernel
+        with fp32 sum-of-squares in registers via ``simd_sum``); the
+        bucketize + nibble-pack tail uses ``mlx::tq4_compress`` (one
+        Metal kernel for both steps).
+        """
+        orig_shape = x.shape
+        flat = x.reshape(-1, self.head_dim)
+
+        norms = torch.ops.mlx.tq_norm(flat)
+        normalized = flat / (norms + self.norm_eps)
+        rotated = normalized @ self.rotation_T
+        packed = torch.ops.mlx.tq4_compress(rotated, self.boundaries)
+
+        return (
+            packed.reshape(*orig_shape[:-1], self.half_dim),
+            norms.reshape(*orig_shape[:-1], 1),
+        )
+
+    def update(
+        self,
+        input_pos,
+        k_val: torch.Tensor,
+        v_val: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Compress + write K/V at ``input_pos``, return the full
+        compressed cache buffers.
+
+        Accepts ``input_pos`` as either a ``(T,)`` LongTensor of
+        positions or a Python int / SymInt ``start_pos``. Writes go
+        through ``mlx::kv_cache_update`` (matching the non-TQ
+        ``MLXKVCache`` path) which lowers to a tighter in-place
+        scatter than ``index_copy_`` would.
+        """
+        if isinstance(input_pos, torch.Tensor):
+            start_pos = input_pos[0].item()
+            seq_len = k_val.size(2)
+            torch._check(seq_len == v_val.size(2))
+            torch._check(start_pos >= 0)
+            torch._check(start_pos + seq_len <= self.max_context_length)
+        else:
+            start_pos = input_pos
+
+        k_packed, k_norms = self._compress(k_val)
+        v_packed, v_norms = self._compress(v_val)
+
+        torch.ops.mlx.kv_cache_update(self.k_packed, k_packed, start_pos)
+        torch.ops.mlx.kv_cache_update(self.k_norms, k_norms, start_pos)
+        torch.ops.mlx.kv_cache_update(self.v_packed, v_packed, start_pos)
+        torch.ops.mlx.kv_cache_update(self.v_norms, v_norms, start_pos)
+
+        # Slices on the return create new graph nodes so the same node
+        # is not both BUFFER_MUTATION and USER_OUTPUT.
+        return (
+            self.k_packed[:, :, :, :],
+            self.k_norms[:, :, :, :],
+            self.v_packed[:, :, :, :],
+            self.v_norms[:, :, :, :],
+        )
+
+    # forward() is inherited from the parent (delegates to update).
+
+    def sdpa(
+        self,
+        query: torch.Tensor,
+        start_pos,
+        scale: Optional[float] = None,
+    ) -> torch.Tensor:
+        """SDPA over the compressed cache.
+
+        Runs attention in rotated space:
+          1. Q_rot = Q @ R^T               (T_q x D^2)
+          2. K_rot, V_rot = tq_dequant(...) (rotated-space K/V)
+          3. out_rot = custom_sdpa(Q_rot, K_rot, V_rot, ...)
+          4. out = out_rot @ R              (T_q x D^2)
+
+        Since R is orthogonal, score = (Q·R^T)·(K·R^T)^T = Q·K^T, so
+        attention is invariant under matched rotation of Q and K. The
+        ``T_kv x D^2`` inverse-rotation matmul on K/V is replaced with
+        two ``T_q x D^2`` matmuls (Q and output).
+
+        Args:
+            query: ``(B, H_q, T_q, D)`` bf16.
+            start_pos: int or SymInt — absolute position of the first
+                query token.
+            scale: 1/sqrt(D) if None.
+
+        Returns:
+            ``(B, H_q, T_q, D)`` bf16 attention output, in original
+            (un-rotated) space.
+        """
+        seq_len = query.size(2)
+        end_pos = start_pos + seq_len
+        torch._check(start_pos >= 0)
+        torch._check(end_pos <= self.max_context_length)
+
+        q_rot = query @ self.rotation_T
+
+        k_packed_live = self.k_packed[:, :, :end_pos, :]
+        k_norms_live = self.k_norms[:, :, :end_pos, :]
+        v_packed_live = self.v_packed[:, :, :end_pos, :]
+        v_norms_live = self.v_norms[:, :, :end_pos, :]
+
+        # TODO: optimize with a fused dequant + SDPA
+        k_rot = torch.ops.mlx.tq_dequant(k_packed_live, k_norms_live, self.centroids)
+        v_rot = torch.ops.mlx.tq_dequant(v_packed_live, v_norms_live, self.centroids)
+
+        out_rot = torch.ops.mlx.custom_sdpa(
+            q_rot,
+            k_rot,
+            v_rot,
+            start_pos,
+            None,  # attn_mask
+            0.0,  # dropout_p
+            True,  # is_causal
+            scale,
+        )
+
+        return out_rot @ self.rotation
diff --git a/backends/mlx/model_ops/test_tq4_compress.py b/backends/mlx/model_ops/test_tq4_compress.py
new file mode 100644
index 00000000000..c2aaa13afa7
--- /dev/null
+++ b/backends/mlx/model_ops/test_tq4_compress.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Tests for ``mlx::tq4_compress``.
+
+Verifies the fused Metal kernel produces byte-exact output vs the
+eager Python implementation across head_dim values used by TurboQuant.
+
+Usage::
+
+    python -m executorch.backends.mlx.model_ops.test_tq4_compress run
+    python -m executorch.backends.mlx.model_ops.test_tq4_compress run -v
+    python -m executorch.backends.mlx.model_ops.test_tq4_compress run --rebuild
+"""
+
+from typing import List, Tuple
+
+import executorch.backends.mlx.model_ops.tq4_compress  # noqa: F401
+
+import torch
+import torch.nn as nn
+
+from executorch.backends.mlx.test.test_utils import OpTestCase
+
+
+class TQ4CompressModel(nn.Module):
+    """``values → packed`` via ``mlx::tq4_compress``.
+
+    Boundaries are stored as a buffer so the model is exportable
+    without feeding them as a graph input.
+    """
+
+    def __init__(self, head_dim: int, dtype: torch.dtype = torch.bfloat16):
+        super().__init__()
+        # 15 sorted thresholds (4-bit codebook).
+        self.register_buffer(
+            "boundaries",
+            torch.linspace(-0.2, 0.2, 15, dtype=dtype),
+        )
+
+    def forward(self, values: torch.Tensor) -> torch.Tensor:
+        return torch.ops.mlx.tq4_compress(values, self.boundaries)
+
+
+class TQ4CompressTest(OpTestCase):
+    """Byte-exact comparison vs eager bucketize + nibble-pack."""
+
+    name = "tq4_compress"
+    rtol = 0.0
+    atol = 0.0
+
+    def __init__(
+        self,
+        batch_size: int = 1,
+        n_heads: int = 8,
+        seq_len: int = 4,
+        head_dim: int = 128,
+        dtype: torch.dtype = torch.bfloat16,
+    ):
+        self.batch_size = batch_size
+        self.n_heads = n_heads
+        self.seq_len = seq_len
+        self.head_dim = head_dim
+        self.dtype = dtype
+
+        parts = [
+            "tq4_compress",
+            f"b{batch_size}",
+            f"h{n_heads}",
+            f"t{seq_len}",
+            f"d{head_dim}",
+        ]
+        if dtype != torch.bfloat16:
+            parts.append(str(dtype).split(".")[-1])
+        self.name = "_".join(parts)
+
+    @classmethod
+    def get_test_configs(cls) -> List["TQ4CompressTest"]:
+        return [
+            # head_dim=128 (Qwen3.5 MoE / Gemma 4 sliding)
+            cls(seq_len=1, head_dim=128),
+            cls(seq_len=8, head_dim=128),
+            cls(seq_len=64, head_dim=128),
+            cls(n_heads=1, seq_len=1, head_dim=128),
+            # head_dim=256 (Gemma 4 sliding-attention)
+            cls(head_dim=256),
+            cls(seq_len=16, head_dim=256),
+            # head_dim=512 (Gemma 4 31B full-attention)
+            cls(n_heads=4, seq_len=4, head_dim=512),
+            cls(n_heads=4, seq_len=64, head_dim=512),
+            # Smaller D for sanity
+            cls(head_dim=64, n_heads=2, seq_len=4),
+        ]
+
+    def create_model(self) -> nn.Module:
+        return TQ4CompressModel(head_dim=self.head_dim, dtype=self.dtype).to(self.dtype)
+
+    def create_inputs(self) -> Tuple[torch.Tensor, ...]:
+        # Activation-scale values; the kernel is byte-exact regardless
+        # of magnitude as long as values fall within the bucketize
+        # comparison range.
+        values = torch.randn(
+            self.batch_size,
+            self.n_heads,
+            self.seq_len,
+            self.head_dim,
+            dtype=self.dtype,
+        ) * (1.0 / (self.head_dim**0.5))
+        return (values,)
+
+
+if __name__ == "__main__":  # noqa: C901
+    import argparse
+    import sys
+
+    from executorch.backends.mlx.test.test_utils import rebuild_op_test_runner
+
+    parser = argparse.ArgumentParser(description="Test mlx::tq4_compress op")
+    parser.add_argument(
+        "action",
+        choices=["generate", "compare", "run", "list"],
+        help="Action: generate (export), compare (check outputs), run (full), list (show configs)",
+    )
+    parser.add_argument("--verbose", "-v", action="store_true")
+    parser.add_argument(
+        "--rebuild", action="store_true", help="Rebuild C++ runner first"
+    )
+    parser.add_argument(
+        "--config", type=str, default=None, help="Run specific config by name"
+    )
+    args = parser.parse_args()
+
+    if args.rebuild and not rebuild_op_test_runner(verbose=args.verbose):
+        sys.exit(1)
+
+    configs = TQ4CompressTest.get_test_configs()
+
+    if args.action == "list":
+        for cfg in configs:
+            print(f"  {cfg.name}")
+        sys.exit(0)
+
+    if args.config:
+        configs = [c for c in configs if c.name == args.config]
+        if not configs:
+            print(f"No config matching '{args.config}'")
+            sys.exit(1)
+
+    passed = 0
+    failed = 0
+    failed_names: List[str] = []
+
+    for test in configs:
+        if args.action == "generate":
+            pte_path, _, _ = test.generate_test_files(verbose=args.verbose)
+            print(f"Generated: {pte_path}")
+        elif args.action == "compare":
+            actual_path = test.get_test_dir() / "actual_output.bin"
+            ok, msg = test.compare_with_actual(actual_path)
+            print(f"{'✓' if ok else '✗'} {test.name}: {msg}")
+            if ok:
+                passed += 1
+            else:
+                failed += 1
+                failed_names.append(test.name)
+        elif args.action == "run":
+            ok = test.run_test(verbose=args.verbose)
+            if ok:
+                passed += 1
+            else:
+                failed += 1
+                failed_names.append(test.name)
+
+    if args.action in ("run", "compare"):
+        print(f"\nPassed: {passed}, Failed: {failed}")
+        if failed_names:
+            print(f"Failed: {', '.join(failed_names)}")
+        sys.exit(0 if failed == 0 else 1)
diff --git a/backends/mlx/model_ops/test_tq_dequant.py b/backends/mlx/model_ops/test_tq_dequant.py
new file mode 100644
index 00000000000..07d9deb895a
--- /dev/null
+++ b/backends/mlx/model_ops/test_tq_dequant.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Tests for ``mlx::tq_dequant``.
+
+Verifies the fused unpack + gather + multiply Metal kernel matches
+the eager reference at head_dim values used by TurboQuant
+(D ∈ {128, 256, 512}). Output is byte-exact — no fp32 promotion in
+either path.
+
+Usage::
+
+    python -m executorch.backends.mlx.model_ops.test_tq_dequant run
+    python -m executorch.backends.mlx.model_ops.test_tq_dequant run -v
+    python -m executorch.backends.mlx.model_ops.test_tq_dequant run --rebuild
+"""
+
+from typing import List, Tuple
+
+import executorch.backends.mlx.model_ops.tq_dequant  # noqa: F401
+
+import torch
+import torch.nn as nn
+
+from executorch.backends.mlx.test.test_utils import OpTestCase
+
+
+class TQDequantModel(nn.Module):
+    """``packed, norms, centroids → unrotated``."""
+
+    def forward(
+        self,
+        packed: torch.Tensor,
+        norms: torch.Tensor,
+        centroids: torch.Tensor,
+    ) -> torch.Tensor:
+        return torch.ops.mlx.tq_dequant(packed, norms, centroids)
+
+
+class TQDequantTest(OpTestCase):
+    """Byte-exact comparison vs eager unpack + gather + multiply."""
+
+    name = "tq_dequant"
+    rtol = 0.0
+    atol = 0.0
+
+    def __init__(
+        self,
+        batch_size: int = 1,
+        n_heads: int = 8,
+        seq_len: int = 4,
+        head_dim: int = 128,
+    ):
+        self.batch_size = batch_size
+        self.n_heads = n_heads
+        self.seq_len = seq_len
+        self.head_dim = head_dim
+        self.half_dim = head_dim // 2
+        self.name = f"tq_dequant_b{batch_size}_h{n_heads}_t{seq_len}_d{head_dim}"
+
+    @classmethod
+    def get_test_configs(cls) -> List["TQDequantTest"]:
+        return [
+            # head_dim=128 (Qwen3.5 MoE / Gemma 4 sliding)
+            cls(seq_len=1, head_dim=128),
+            cls(seq_len=8, head_dim=128),
+            cls(seq_len=64, head_dim=128),
+            cls(n_heads=1, seq_len=1, head_dim=128),
+            # head_dim=256 (Gemma 4 sliding-attention)
+            cls(seq_len=4, head_dim=256),
+            cls(seq_len=16, head_dim=256),
+            # head_dim=512 (Gemma 4 31B full-attention)
+            cls(n_heads=4, seq_len=4, head_dim=512),
+            cls(n_heads=4, seq_len=64, head_dim=512),
+        ]
+
+    def create_model(self) -> nn.Module:
+        return TQDequantModel()
+
+    def create_inputs(self) -> Tuple[torch.Tensor, ...]:
+        # Random packed bytes exercise every codebook entry.
+        packed = torch.randint(
+            0,
+            256,
+            (self.batch_size, self.n_heads, self.seq_len, self.half_dim),
+            dtype=torch.uint8,
+        )
+        norms = (
+            torch.randn(
+                self.batch_size,
+                self.n_heads,
+                self.seq_len,
+                1,
+                dtype=torch.bfloat16,
+            ).abs()
+            + 0.1
+        )
+        # Deterministic codebook covering [-1, 1].
+        centroids = torch.linspace(-1.0, 1.0, 16, dtype=torch.bfloat16)
+        return (packed, norms, centroids)
+
+
+if __name__ == "__main__":  # noqa: C901
+    import argparse
+    import sys
+
+    from executorch.backends.mlx.test.test_utils import rebuild_op_test_runner
+
+    parser = argparse.ArgumentParser(description="Test mlx::tq_dequant op")
+    parser.add_argument("action", choices=["generate", "compare", "run", "list"])
+    parser.add_argument("--verbose", "-v", action="store_true")
+    parser.add_argument("--rebuild", action="store_true")
+    parser.add_argument("--config", type=str, default=None)
+    args = parser.parse_args()
+
+    if args.rebuild and not rebuild_op_test_runner(verbose=args.verbose):
+        sys.exit(1)
+
+    configs = TQDequantTest.get_test_configs()
+
+    if args.action == "list":
+        for cfg in configs:
+            print(f"  {cfg.name}")
+        sys.exit(0)
+
+    if args.config:
+        configs = [c for c in configs if c.name == args.config]
+        if not configs:
+            print(f"No config matching '{args.config}'")
+            sys.exit(1)
+
+    passed = 0
+    failed = 0
+    failed_names: List[str] = []
+
+    for test in configs:
+        if args.action == "generate":
+            pte_path, _, _ = test.generate_test_files(verbose=args.verbose)
+            print(f"Generated: {pte_path}")
+        elif args.action == "compare":
+            actual_path = test.get_test_dir() / "actual_output.bin"
+            ok, msg = test.compare_with_actual(actual_path)
+            print(f"{'✓' if ok else '✗'} {test.name}: {msg}")
+            if ok:
+                passed += 1
+            else:
+                failed += 1
+                failed_names.append(test.name)
+        elif args.action == "run":
+            ok = test.run_test(verbose=args.verbose)
+            if ok:
+                passed += 1
+            else:
+                failed += 1
+                failed_names.append(test.name)
+
+    if args.action in ("run", "compare"):
+        print(f"\nPassed: {passed}, Failed: {failed}")
+        if failed_names:
+            print(f"Failed: {', '.join(failed_names)}")
+        sys.exit(0 if failed == 0 else 1)
diff --git a/backends/mlx/model_ops/test_tq_norm.py b/backends/mlx/model_ops/test_tq_norm.py
new file mode 100644
index 00000000000..35c4491d8ae
--- /dev/null
+++ b/backends/mlx/model_ops/test_tq_norm.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Tests for ``mlx::tq_norm``.
+
+Verifies the fused L2-norm Metal kernel matches eager ``vector_norm``
+at head_dim values used by TurboQuant (D ∈ {128, 256, 512}).
+
+Usage::
+
+    python -m executorch.backends.mlx.model_ops.test_tq_norm run
+    python -m executorch.backends.mlx.model_ops.test_tq_norm run -v
+    python -m executorch.backends.mlx.model_ops.test_tq_norm run --rebuild
+"""
+
+from typing import List, Tuple
+
+import executorch.backends.mlx.model_ops.tq_norm  # noqa: F401
+
+import torch
+import torch.nn as nn
+
+from executorch.backends.mlx.test.test_utils import OpTestCase
+
+
+class TQNormModel(nn.Module):
+    """``x → ||x||₂`` over the last dim."""
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.ops.mlx.tq_norm(x)
+
+
+class TQNormTest(OpTestCase):
+    """Compare ``mlx::tq_norm`` to eager ``vector_norm`` within bf16 ULPs."""
+
+    name = "tq_norm"
+    rtol = 1e-2
+    atol = 1e-2
+
+    def __init__(
+        self,
+        batch_size: int = 1,
+        n_heads: int = 8,
+        seq_len: int = 4,
+        head_dim: int = 128,
+    ):
+        self.batch_size = batch_size
+        self.n_heads = n_heads
+        self.seq_len = seq_len
+        self.head_dim = head_dim
+        self.name = f"tq_norm_b{batch_size}_h{n_heads}_t{seq_len}_d{head_dim}"
+
+    @classmethod
+    def get_test_configs(cls) -> List["TQNormTest"]:
+        return [
+            # head_dim=128 (Qwen3.5 MoE / Gemma 4 sliding)
+            cls(seq_len=1, head_dim=128),
+            cls(seq_len=8, head_dim=128),
+            cls(seq_len=64, head_dim=128),
+            cls(n_heads=1, seq_len=1, head_dim=128),
+            # head_dim=256 (Gemma 4 sliding-attention)
+            cls(seq_len=4, head_dim=256),
+            cls(seq_len=16, head_dim=256),
+            # head_dim=512 (Gemma 4 31B full-attention)
+            cls(n_heads=4, seq_len=4, head_dim=512),
+            cls(n_heads=4, seq_len=64, head_dim=512),
+        ]
+
+    def create_model(self) -> nn.Module:
+        return TQNormModel().to(torch.bfloat16)
+
+    def create_inputs(self) -> Tuple[torch.Tensor, ...]:
+        # Activation-scale bf16 inputs.
+        x = torch.randn(
+            self.batch_size,
+            self.n_heads,
+            self.seq_len,
+            self.head_dim,
+            dtype=torch.bfloat16,
+        ) * (1.0 / (self.head_dim**0.5))
+        return (x,)
+
+
+if __name__ == "__main__":  # noqa: C901
+    import argparse
+    import sys
+
+    from executorch.backends.mlx.test.test_utils import rebuild_op_test_runner
+
+    parser = argparse.ArgumentParser(description="Test mlx::tq_norm op")
+    parser.add_argument(
+        "action",
+        choices=["generate", "compare", "run", "list"],
+    )
+    parser.add_argument("--verbose", "-v", action="store_true")
+    parser.add_argument("--rebuild", action="store_true")
+    parser.add_argument("--config", type=str, default=None)
+    args = parser.parse_args()
+
+    if args.rebuild and not rebuild_op_test_runner(verbose=args.verbose):
+        sys.exit(1)
+
+    configs = TQNormTest.get_test_configs()
+
+    if args.action == "list":
+        for cfg in configs:
+            print(f"  {cfg.name}")
+        sys.exit(0)
+
+    if args.config:
+        configs = [c for c in configs if c.name == args.config]
+        if not configs:
+            print(f"No config matching '{args.config}'")
+            sys.exit(1)
+
+    passed = 0
+    failed = 0
+    failed_names: List[str] = []
+
+    for test in configs:
+        if args.action == "generate":
+            pte_path, _, _ = test.generate_test_files(verbose=args.verbose)
+            print(f"Generated: {pte_path}")
+        elif args.action == "compare":
+            actual_path = test.get_test_dir() / "actual_output.bin"
+            ok, msg = test.compare_with_actual(actual_path)
+            print(f"{'✓' if ok else '✗'} {test.name}: {msg}")
+            if ok:
+                passed += 1
+            else:
+                failed += 1
+                failed_names.append(test.name)
+        elif args.action == "run":
+            ok = test.run_test(verbose=args.verbose)
+            if ok:
+                passed += 1
+            else:
+                failed += 1
+                failed_names.append(test.name)
+
+    if args.action in ("run", "compare"):
+        print(f"\nPassed: {passed}, Failed: {failed}")
+        if failed_names:
+            print(f"Failed: {', '.join(failed_names)}")
+        sys.exit(0 if failed == 0 else 1)
diff --git a/backends/mlx/model_ops/tq4_compress.py b/backends/mlx/model_ops/tq4_compress.py
new file mode 100644
index 00000000000..f08d47b9a11
--- /dev/null
+++ b/backends/mlx/model_ops/tq4_compress.py
@@ -0,0 +1,189 @@
+#
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+#
+
+"""
+``mlx::tq4_compress``: TurboQuant TQ4 quantize + nibble-pack.
+
+Maps ``(..., D)`` floats to ``(..., D/2)`` uint8 by:
+    1. Bucketizing each value against ``boundaries`` (15 sorted thresholds).
+    2. Packing pairs of 4-bit indices into one byte: high nibble holds
+       the even-position index, low nibble holds the odd-position index.
+
+Constraints:
+    * ``boundaries`` must be 1-D length 15 (4-bit codebook).
+    * Last dim of ``values`` must be even and statically known.
+
+Usage::
+
+    import executorch.backends.mlx.model_ops.tq4_compress  # noqa: F401
+
+    packed = torch.ops.mlx.tq4_compress(rotated, boundaries)
+    # rotated:    (..., D)   float
+    # boundaries: (15,)      same dtype as rotated
+    # packed:     (..., D/2) uint8
+"""
+
+from __future__ import annotations
+
+import torch
+from torch import Tensor
+from torch.fx.node import Node
+
+
+@torch.library.custom_op("mlx::tq4_compress", mutates_args=())
+def tq4_compress(values: Tensor, boundaries: Tensor) -> Tensor:
+    """TurboQuant TQ4 quantize + nibble-pack.
+
+    Args:
+        values: ``(..., D)`` float, last dim must be even.
+        boundaries: ``(15,)`` 1-D sorted, same dtype as ``values``.
+
+    Returns:
+        ``(..., D/2)`` uint8. Each byte holds two 4-bit indices: high
+        nibble is the even-position index, low nibble is the odd.
+    """
+    if boundaries.dim() != 1 or boundaries.shape[0] != 15:
+        raise ValueError(
+            f"mlx::tq4_compress: boundaries must be 1-D length 15; "
+            f"got shape {tuple(boundaries.shape)}"
+        )
+    if values.shape[-1] % 2 != 0:
+        raise ValueError(
+            f"mlx::tq4_compress: input last dim must be even; got "
+            f"{values.shape[-1]}"
+        )
+
+    indices = torch.bucketize(values, boundaries).to(torch.uint8)
+    packed = (indices[..., 0::2] << 4) | indices[..., 1::2]
+    return packed
+
+
+@torch.library.register_fake("mlx::tq4_compress")
+def tq4_compress_fake(values: Tensor, boundaries: Tensor) -> Tensor:
+    out_shape = list(values.shape)
+    out_shape[-1] = out_shape[-1] // 2
+    return values.new_empty(out_shape, dtype=torch.uint8)
+
+
+# ---------------------------------------------------------------------------
+# MLX handler
+# ---------------------------------------------------------------------------
+
+from executorch.backends.mlx.builder.op_helpers import (
+    emit_product,
+    emit_shape,
+    torch_dtype_to_scalar_type,
+)
+from executorch.backends.mlx.builder.op_registry import REGISTRY
+from executorch.backends.mlx.builder.program_builder import MLXProgramBuilder
+from executorch.backends.mlx.builder.slot_manager import Slot
+from executorch.backends.mlx.serialization.mlx_graph_schema import (
+    IntOrVid,
+    MetalKernelNode,
+)
+
+
+# One thread per output byte: reads ``values[2*gid]``, ``values[2*gid+1]``,
+# bucketizes against the 15 boundaries (loop unrolled, ``B`` is a template
+# constant), and packs the two 4-bit indices into one byte.
+_TQ4_COMPRESS_SOURCE = """
+    uint gid = thread_position_in_grid.x;
+    float v_hi = float(values[2 * gid]);
+    float v_lo = float(values[2 * gid + 1]);
+    uchar idx_hi = 0;
+    uchar idx_lo = 0;
+    #pragma unroll
+    for (uint i = 0; i < B; ++i) {
+        float bnd = float(boundaries[i]);
+        idx_hi += (uchar)(v_hi > bnd);
+        idx_lo += (uchar)(v_lo > bnd);
+    }
+    out[gid] = (idx_hi << 4) | idx_lo;
+"""
+
+
+@REGISTRY.register(target=[torch.ops.mlx.tq4_compress.default])
+def _tq4_compress_handler(P: MLXProgramBuilder, n: Node) -> Slot:
+    """Lower ``mlx::tq4_compress`` to a fused Metal kernel."""
+    args = P.args(n)
+    if len(args) != 2:
+        raise ValueError(
+            f"mlx::tq4_compress: expected 2 args (values, boundaries), "
+            f"got {len(args)}"
+        )
+
+    values_slot, boundaries_slot = args
+    values_node = n.args[0]
+    boundaries_node = n.args[1]
+
+    values_meta = values_node.meta["val"]
+    boundaries_meta = boundaries_node.meta["val"]
+
+    # Validate boundaries length: must be 15 for 4-bit nibble pack.
+    bnd_shape = boundaries_meta.shape
+    if (
+        len(bnd_shape) != 1
+        or not isinstance(bnd_shape[0], int)
+        or int(bnd_shape[0]) != 15
+    ):
+        raise ValueError(
+            f"mlx::tq4_compress: boundaries must be 1-D length 15; "
+            f"got shape {tuple(bnd_shape)}"
+        )
+
+    last_dim = values_meta.shape[-1]
+    if not isinstance(last_dim, int):
+        raise NotImplementedError(
+            "mlx::tq4_compress: last dim must be statically known"
+        )
+    if int(last_dim) % 2 != 0:
+        raise ValueError(f"mlx::tq4_compress: last dim must be even; got {last_dim}")
+    half_last = int(last_dim) // 2
+
+    in_dtype_int = torch_dtype_to_scalar_type(values_meta.dtype)
+
+    out = P.make_or_get_slot(n)
+    leading = emit_shape(P, values_node, values_slot, end_dim=-1)
+    half_last_iov = IntOrVid.from_literal(half_last)
+    out_shape_flat = leading + [half_last_iov]
+
+    # One thread per output byte, so the grid size is the output numel
+    # (product of leading dims times the halved last dim).
+    n_out_iov = emit_product(P, leading + [half_last_iov])
+
+    P.emit(
+        MetalKernelNode(
+            name="tq4_compress",
+            source=_TQ4_COMPRESS_SOURCE,
+            inputs=[
+                P.slot_to_tid(values_slot),
+                P.slot_to_tid(boundaries_slot),
+            ],
+            outputs=[P.slot_to_tid(out)],
+            grid=[n_out_iov, IntOrVid.from_literal(1), IntOrVid.from_literal(1)],
+            # 32 threads per threadgroup so each TG fills one Apple-GPU SIMD group
+            threadgroup=[
+                IntOrVid.from_literal(32),
+                IntOrVid.from_literal(1),
+                IntOrVid.from_literal(1),
+            ],
+            input_names=["values", "boundaries"],
+            output_names=["out"],
+            output_shapes_flat=out_shape_flat,
+            output_shape_lengths=[len(out_shape_flat)],
+            output_dtypes=[torch_dtype_to_scalar_type(torch.uint8)],
+            template_arg_names=["InT", "B"],
+            template_arg_kinds=[2, 0],  # 2=dtype, 0=int
+            template_arg_values=[
+                in_dtype_int,
+                15,
+            ],
+        )
+    )
+
+    return out
diff --git a/backends/mlx/model_ops/tq_dequant.py b/backends/mlx/model_ops/tq_dequant.py
new file mode 100644
index 00000000000..28a168e9be0
--- /dev/null
+++ b/backends/mlx/model_ops/tq_dequant.py
@@ -0,0 +1,216 @@
+#
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+#
+
+"""
+``mlx::tq_dequant``: TurboQuant TQ4 unpack + centroid gather + multiply-by-norm.
+
+    indices    = unpack 4-bit nibbles from packed bytes  (..., D)
+    centvals   = centroids[indices]                       (..., D)
+    out        = centvals * norms                         (..., D)
+
+Output is in **rotated space** — the inverse rotation, if needed, is
+left to the caller (typically MLX's tuned bf16 GEMM).
+
+Constraints:
+    * ``D`` (= ``packed.shape[-1] * 2``) must be a multiple of 64.
+    * ``centroids`` must be a 1-D tensor of length 16.
+    * Output dtype matches ``norms.dtype``.
+
+Usage::
+
+    import executorch.backends.mlx.model_ops.tq_dequant  # noqa: F401
+
+    out = torch.ops.mlx.tq_dequant(packed, norms, centroids)
+    # packed:    (..., D/2) uint8
+    # norms:     (..., 1)   bf16
+    # centroids: (16,)      bf16
+    # out:       (..., D)   bf16  (in rotated space)
+"""
+
+from __future__ import annotations
+
+import torch
+from torch import Tensor
+from torch.fx.node import Node
+
+
+# ---------------------------------------------------------------------------
+# Custom op + eager fallback
+# ---------------------------------------------------------------------------
+
+
+@torch.library.custom_op("mlx::tq_dequant", mutates_args=())
+def tq_dequant(
+    packed: Tensor,
+    norms: Tensor,
+    centroids: Tensor,
+) -> Tensor:
+    """Fused unpack + centroid gather + multiply-by-norm.
+
+    Args:
+        packed: ``(..., D/2)`` uint8. High nibble = even-position index,
+            low nibble = odd-position index.
+        norms: ``(..., 1)`` of compute dtype, broadcasts over D.
+        centroids: ``(16,)`` of compute dtype.
+
+    Returns:
+        ``(..., D)`` of compute dtype, in rotated space.
+    """
+    if centroids.dim() != 1 or centroids.shape[0] != 16:
+        raise ValueError(
+            f"mlx::tq_dequant: centroids must be 1-D length 16; got "
+            f"shape {tuple(centroids.shape)}"
+        )
+    high = (packed >> 4).long()
+    low = (packed & 0x0F).long()
+    indices = torch.stack([high, low], dim=-1).reshape(
+        *packed.shape[:-1], packed.shape[-1] * 2
+    )
+    return centroids[indices] * norms
+
+
+@torch.library.register_fake("mlx::tq_dequant")
+def tq_dequant_fake(packed: Tensor, norms: Tensor, centroids: Tensor) -> Tensor:
+    out_shape = list(packed.shape)
+    out_shape[-1] = out_shape[-1] * 2
+    return packed.new_empty(out_shape, dtype=norms.dtype)
+
+
+# ---------------------------------------------------------------------------
+# MLX handler
+# ---------------------------------------------------------------------------
+
+from executorch.backends.mlx.builder.op_helpers import (
+    emit_product,
+    emit_shape,
+    torch_dtype_to_scalar_type,
+)
+from executorch.backends.mlx.builder.op_registry import REGISTRY
+from executorch.backends.mlx.builder.program_builder import MLXProgramBuilder
+from executorch.backends.mlx.builder.slot_manager import Slot
+from executorch.backends.mlx.serialization.mlx_graph_schema import (
+    IntOrVid,
+    MetalKernelNode,
+)
+
+
+_TQ_DEQUANT_HEADER = """
+#include <metal_simdgroup>
+using namespace metal;
+"""
+
+
+# Per-vector decompress:
+#   * Grid (32, 1, M), threadgroup (32, 1, 1): one simdgroup per vector.
+#   * Each lane handles DIMS_PER_LANE = D/32 output values, sourced
+#     from BYTES_PER_LANE = DIMS_PER_LANE/2 packed bytes.
+#   * The 16-entry codebook is preloaded into per-lane registers.
+_TQ_DEQUANT_SOURCE = """
+    constexpr uint DIMS_PER_LANE  = D / 32;
+    constexpr uint BYTES_PER_LANE = DIMS_PER_LANE / 2;
+
+    uint vec_id  = thread_position_in_grid.z;
+    uint lane_id = thread_position_in_threadgroup.x;
+
+    InT cent[16];
+    for (uint c = 0; c < 16; ++c) {
+        cent[c] = centroids[c];
+    }
+
+    InT norm = norms[vec_id];
+
+    uint packed_base = vec_id * (D / 2) + lane_id * BYTES_PER_LANE;
+    uint out_base    = vec_id * D       + lane_id * DIMS_PER_LANE;
+
+    for (uint i = 0; i < BYTES_PER_LANE; ++i) {
+        uchar byte = packed[packed_base + i];
+        uchar idx_hi = (byte >> 4) & 0x0F;
+        uchar idx_lo = byte & 0x0F;
+        out[out_base + 2 * i + 0] = cent[idx_hi] * norm;
+        out[out_base + 2 * i + 1] = cent[idx_lo] * norm;
+    }
+"""
+
+
+@REGISTRY.register(target=[torch.ops.mlx.tq_dequant.default])
+def _tq_dequant_handler(P: MLXProgramBuilder, n: Node) -> Slot:
+    """Lower ``mlx::tq_dequant`` to a single fused Metal kernel."""
+    args = P.args(n)
+    if len(args) != 3:
+        raise ValueError(
+            f"mlx::tq_dequant: expected 3 args (packed, norms, centroids); "
+            f"got {len(args)}"
+        )
+    packed_slot, norms_slot, centroids_slot = args
+    packed_node = n.args[0]
+    norms_node = n.args[1]
+    centroids_node = n.args[2]
+
+    packed_meta = packed_node.meta["val"]
+    norms_meta = norms_node.meta["val"]
+    centroids_meta = centroids_node.meta["val"]
+
+    if centroids_meta.dim() != 1 or int(centroids_meta.shape[0]) != 16:
+        raise ValueError(
+            f"mlx::tq_dequant: centroids must be 1-D length 16; got "
+            f"shape {tuple(centroids_meta.shape)}"
+        )
+
+    last_dim_packed = packed_meta.shape[-1]
+    if not isinstance(last_dim_packed, int):
+        raise NotImplementedError(
+            "mlx::tq_dequant: packed last dim must be statically known"
+        )
+    half_D = int(last_dim_packed)
+    D = half_D * 2
+    if D % 64 != 0:
+        raise NotImplementedError(
+            f"mlx::tq_dequant: unpacked dim must be a multiple of 64 "
+            f"(2 dims per packed byte, 32 SIMD lanes); got D={D}"
+        )
+
+    out_dtype_int = torch_dtype_to_scalar_type(norms_meta.dtype)
+
+    out = P.make_or_get_slot(n)
+    leading = emit_shape(P, packed_node, packed_slot, end_dim=-1)
+    out_shape_flat = leading + [IntOrVid.from_literal(D)]
+    M_iov = emit_product(P, leading)
+
+    P.emit(
+        MetalKernelNode(
+            name="tq_dequant",
+            source=_TQ_DEQUANT_SOURCE,
+            header=_TQ_DEQUANT_HEADER,
+            inputs=[
+                P.slot_to_tid(packed_slot),
+                P.slot_to_tid(norms_slot),
+                P.slot_to_tid(centroids_slot),
+            ],
+            outputs=[P.slot_to_tid(out)],
+            grid=[
+                IntOrVid.from_literal(32),
+                IntOrVid.from_literal(1),
+                M_iov,
+            ],
+            threadgroup=[
+                IntOrVid.from_literal(32),
+                IntOrVid.from_literal(1),
+                IntOrVid.from_literal(1),
+            ],
+            input_names=["packed", "norms", "centroids"],
+            output_names=["out"],
+            output_shapes_flat=out_shape_flat,
+            output_shape_lengths=[len(out_shape_flat)],
+            output_dtypes=[out_dtype_int],
+            template_arg_names=["InT", "D"],
+            template_arg_kinds=[2, 0],  # 2=dtype, 0=int
+            template_arg_values=[out_dtype_int, D],
+        )
+    )
+
+    return out
diff --git a/backends/mlx/model_ops/tq_norm.py b/backends/mlx/model_ops/tq_norm.py
new file mode 100644
index 00000000000..7e6a4d657f3
--- /dev/null
+++ b/backends/mlx/model_ops/tq_norm.py
@@ -0,0 +1,170 @@
+#
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+#
+
+"""
+``mlx::tq_norm``: L2 norm along the last dim, lowered to a single Metal kernel.
+
+    norms[..., 0] = sqrt(sum_i x[..., i]^2)
+
+Reads / writes ``x.dtype`` directly (no graph-level dtype casts).
+Reduces in fp32 inside Metal registers via ``simd_sum`` for precision
+on large ``D`` (bf16 sum-of-squares loses too much for D>=128).
+
+Constraints:
+    * Last dim ``D`` must be statically known and a multiple of 32.
+
+Usage::
+
+    import executorch.backends.mlx.model_ops.tq_norm  # noqa: F401
+
+    norms = torch.ops.mlx.tq_norm(x)
+    # x:     (..., D) bf16
+    # norms: (..., 1) bf16, equal to vector_norm(x, dim=-1, keepdim=True)
+"""
+
+from __future__ import annotations
+
+import torch
+from torch import Tensor
+from torch.fx.node import Node
+
+
+# ---------------------------------------------------------------------------
+# Custom op + eager fallback
+# ---------------------------------------------------------------------------
+
+
+@torch.library.custom_op("mlx::tq_norm", mutates_args=())
+def tq_norm(x: Tensor) -> Tensor:
+    """L2 norm along last dim.
+
+    Args:
+        x: ``(..., D)``. For MLX lowering, ``D`` must be a multiple of 32.
+
+    Returns:
+        ``(..., 1)`` of the same dtype as ``x``.
+    """
+    return torch.linalg.vector_norm(x, dim=-1, keepdim=True).to(x.dtype)
+
+
+@torch.library.register_fake("mlx::tq_norm")
+def tq_norm_fake(x: Tensor) -> Tensor:
+    out_shape = list(x.shape)
+    out_shape[-1] = 1
+    return x.new_empty(out_shape, dtype=x.dtype)
+
+
+# ---------------------------------------------------------------------------
+# MLX handler
+# ---------------------------------------------------------------------------
+
+from executorch.backends.mlx.builder.op_helpers import (
+    emit_product,
+    emit_shape,
+    torch_dtype_to_scalar_type,
+)
+from executorch.backends.mlx.builder.op_registry import REGISTRY
+from executorch.backends.mlx.builder.program_builder import MLXProgramBuilder
+from executorch.backends.mlx.builder.slot_manager import Slot
+from executorch.backends.mlx.serialization.mlx_graph_schema import (
+    IntOrVid,
+    MetalKernelNode,
+)
+
+
+_TQ_NORM_HEADER = """
+#include <metal_simdgroup>
+using namespace metal;
+"""
+
+
+# Per-vector reduction:
+#   * Grid (32, 1, M), threadgroup (32, 1, 1): one simdgroup per vector.
+#   * Each lane covers DIMS_PER_LANE = D/32 elements; partial sums are
+#     accumulated in an fp32 register.
+#   * ``simd_sum`` reduces across the 32 lanes; lane 0 sqrts and writes.
+_TQ_NORM_SOURCE = """
+    constexpr uint DIMS_PER_LANE = D / 32;
+
+    uint vec_id = thread_position_in_grid.z;
+    uint lane_id = thread_position_in_threadgroup.x;
+
+    uint base = vec_id * D + lane_id * DIMS_PER_LANE;
+
+    float local_sum_sq = 0.0f;
+    for (uint i = 0; i < DIMS_PER_LANE; ++i) {
+        float v = float(x[base + i]);
+        local_sum_sq += v * v;
+    }
+
+    float total_sum_sq = simd_sum(local_sum_sq);
+
+    if (lane_id == 0) {
+        norms[vec_id] = (InT)sqrt(total_sum_sq);
+    }
+"""
+
+
+@REGISTRY.register(target=[torch.ops.mlx.tq_norm.default])
+def _tq_norm_handler(P: MLXProgramBuilder, n: Node) -> Slot:
+    """Lower ``mlx::tq_norm`` to a single fused Metal kernel."""
+    args = P.args(n)
+    if len(args) != 1:
+        raise ValueError(f"mlx::tq_norm: expected 1 arg (x), got {len(args)}")
+
+    (x_slot,) = args
+    x_node = n.args[0]
+
+    x_meta = x_node.meta["val"]
+
+    last_dim = x_meta.shape[-1]
+    if not isinstance(last_dim, int):
+        raise NotImplementedError("mlx::tq_norm: last dim must be statically known")
+    D = int(last_dim)
+    if D % 32 != 0:
+        raise NotImplementedError(
+            f"mlx::tq_norm: last dim must be a multiple of 32 (one per "
+            f"SIMD lane); got D={D}"
+        )
+
+    in_dtype_int = torch_dtype_to_scalar_type(x_meta.dtype)
+
+    out = P.make_or_get_slot(n)
+    leading = emit_shape(P, x_node, x_slot, end_dim=-1)
+    out_shape_flat = leading + [IntOrVid.from_literal(1)]
+    M_iov = emit_product(P, leading)
+
+    P.emit(
+        MetalKernelNode(
+            name="tq_norm",
+            source=_TQ_NORM_SOURCE,
+            header=_TQ_NORM_HEADER,
+            inputs=[P.slot_to_tid(x_slot)],
+            outputs=[P.slot_to_tid(out)],
+            grid=[
+                IntOrVid.from_literal(32),
+                IntOrVid.from_literal(1),
+                M_iov,
+            ],
+            threadgroup=[
+                IntOrVid.from_literal(32),
+                IntOrVid.from_literal(1),
+                IntOrVid.from_literal(1),
+            ],
+            input_names=["x"],
+            output_names=["norms"],
+            output_shapes_flat=out_shape_flat,
+            output_shape_lengths=[len(out_shape_flat)],
+            output_dtypes=[in_dtype_int],
+            template_arg_names=["InT", "D"],
+            template_arg_kinds=[2, 0],  # 2=dtype, 0=int
+            template_arg_values=[in_dtype_int, D],
+        )
+    )
+
+    return out
diff --git a/backends/mlx/test/op_test_runner.cpp b/backends/mlx/test/op_test_runner.cpp
index 6bed13d7a56..925ff410f42 100644
--- a/backends/mlx/test/op_test_runner.cpp
+++ b/backends/mlx/test/op_test_runner.cpp
@@ -58,6 +58,7 @@ enum class DType : uint32_t {
   Int64 = 3,
   BFloat16 = 4,
   Bool = 5,
+  UInt8 = 6,
 };
 
 size_t dtype_size(DType dtype) {
@@ -74,6 +75,8 @@ size_t dtype_size(DType dtype) {
       return 2;
     case DType::Bool:
       return 1;
+    case DType::UInt8:
+      return 1;
     default:
       return 4;
   }
@@ -93,6 +96,8 @@ exec_aten::ScalarType dtype_to_scalar_type(DType dtype) {
       return exec_aten::ScalarType::BFloat16;
     case DType::Bool:
       return exec_aten::ScalarType::Bool;
+    case DType::UInt8:
+      return exec_aten::ScalarType::Byte;
     default:
       return exec_aten::ScalarType::Float;
   }
@@ -112,6 +117,8 @@ DType scalar_type_to_dtype(exec_aten::ScalarType stype) {
       return DType::BFloat16;
     case exec_aten::ScalarType::Bool:
       return DType::Bool;
+    case exec_aten::ScalarType::Byte:
+      return DType::UInt8;
     default:
       return DType::Float32;
   }
@@ -316,6 +323,11 @@ int main(int argc, char* argv[]) {
         std::memcpy(data.data(), t.data.data(), t.data.size());
         tensor_ptr = make_tensor_ptr(
             sizes, std::move(data), {}, {}, exec_aten::ScalarType::Bool);
+      } else if (t.dtype == DType::UInt8) {
+        std::vector<uint8_t> data(t.data.size());
+        std::memcpy(data.data(), t.data.data(), t.data.size());
+        tensor_ptr = make_tensor_ptr(
+            sizes, std::move(data), {}, {}, exec_aten::ScalarType::Byte);
       } else {
         std::cerr << "Unsupported dtype: " << static_cast<int>(t.dtype)
                   << std::endl;
diff --git a/backends/mlx/test/test_ops.py b/backends/mlx/test/test_ops.py
index 45ea024f0e8..ec80b1d3911 100644
--- a/backends/mlx/test/test_ops.py
+++ b/backends/mlx/test/test_ops.py
@@ -2236,6 +2236,402 @@ def get_dynamic_shapes(self) -> Optional[Dict[str, any]]:
         }
 
 
+from executorch.backends.mlx.llm.turboquant_cache import TurboQuantKVCache
+
+
+class TurboQuantKVCacheModel(nn.Module):
+    """
+    Test model wrapping TurboQuantKVCache.update().
+
+    TurboQuantKVCache stores K/V in rotated 4-bit packed form. ``update``
+    returns the four cache buffers (k_packed, k_norms, v_packed, v_norms)
+    rather than uncompressed K/V.
+    """
+
+    def __init__(
+        self,
+        max_batch_size: int,
+        max_context_length: int,
+        n_heads: int,
+        head_dim: int,
+        enable_dynamic_shape: bool = True,
+    ):
+        super().__init__()
+        self.cache = TurboQuantKVCache(
+            max_batch_size=max_batch_size,
+            max_context_length=max_context_length,
+            n_heads=n_heads,
+            head_dim=head_dim,
+            enable_dynamic_shape=enable_dynamic_shape,
+        )
+
+    def forward(
+        self,
+        input_pos: torch.Tensor,
+        k_val: torch.Tensor,
+        v_val: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        return self.cache.update(input_pos, k_val, v_val)
+
+
+@register_test
+class TurboQuantKVCacheTest(OpTestCase):
+    """
+    Test case for TurboQuantKVCache with tensor input_pos.
+
+    Verifies eager-vs-MLX consistency for the compress + write path
+    (``mlx::tq_norm``, ``mlx::tq4_compress``, ``mlx::kv_cache_update``).
+    The packed cache is uint8 (byte-exact), norms are bf16 (loose tol).
+    """
+
+    name = "turboquant_kv_cache"
+    # uint8 packed cache stays effectively exact under atol<1; bf16
+    # norms need ~1e-1 absolute slack for the eager-vs-MLX bf16 path.
+    rtol = 1e-5
+    atol = 1e-1
+
+    def __init__(
+        self,
+        n_heads: int = 4,
+        head_dim: int = 64,
+        max_context_length: int = 128,
+        seq_step: int = 8,
+        enable_dynamic_shape: bool = True,
+    ):
+        # TurboQuantKVCache requires batch=1.
+        self.max_batch_size = 1
+        self.n_heads = n_heads
+        self.head_dim = head_dim
+        self.max_context_length = max_context_length
+        self.seq_step = seq_step
+        self.enable_dynamic_shape = enable_dynamic_shape
+
+    @classmethod
+    def get_test_configs(cls) -> List["TurboQuantKVCacheTest"]:
+        return [
+            cls(),  # default: head_dim=64 (smallest valid)
+            cls(head_dim=128),
+            cls(enable_dynamic_shape=False),
+        ]
+
+    def create_model(self) -> nn.Module:
+        return TurboQuantKVCacheModel(
+            max_batch_size=self.max_batch_size,
+            max_context_length=self.max_context_length,
+            n_heads=self.n_heads,
+            head_dim=self.head_dim,
+            enable_dynamic_shape=self.enable_dynamic_shape,
+        )
+
+    def create_inputs(self) -> Tuple[torch.Tensor, ...]:
+        input_pos = torch.tensor([0], dtype=torch.int64)
+        k_val = torch.randn(
+            self.max_batch_size,
+            self.n_heads,
+            self.seq_step,
+            self.head_dim,
+            dtype=torch.bfloat16,
+        )
+        v_val = torch.randn(
+            self.max_batch_size,
+            self.n_heads,
+            self.seq_step,
+            self.head_dim,
+            dtype=torch.bfloat16,
+        )
+        return (input_pos, k_val, v_val)
+
+    def create_test_inputs(self) -> Tuple[torch.Tensor, ...]:
+        # With static shape, test inputs must match the exported seq length.
+        test_seq_step = (
+            self.seq_step if not self.enable_dynamic_shape else self.seq_step + 4
+        )
+        input_pos = torch.tensor([16], dtype=torch.int64)
+        k_val = torch.randn(
+            self.max_batch_size,
+            self.n_heads,
+            test_seq_step,
+            self.head_dim,
+            dtype=torch.bfloat16,
+        )
+        v_val = torch.randn(
+            self.max_batch_size,
+            self.n_heads,
+            test_seq_step,
+            self.head_dim,
+            dtype=torch.bfloat16,
+        )
+        return (input_pos, k_val, v_val)
+
+    def get_dynamic_shapes(self) -> Optional[Dict[str, any]]:
+        if not self.enable_dynamic_shape:
+            return None
+        seq_dim = Dim("seq_step", min=1, max=self.max_context_length)
+        return {
+            "input_pos": None,
+            "k_val": {2: seq_dim},
+            "v_val": {2: seq_dim},
+        }
+
+
+class TurboQuantKVCacheIntModel(nn.Module):
+    """
+    Test model that passes int/SymInt (not tensor) to
+    ``TurboQuantKVCache.update`` — the multi-layer pattern.
+    """
+
+    def __init__(
+        self,
+        max_batch_size: int,
+        max_context_length: int,
+        n_heads: int,
+        head_dim: int,
+        enable_dynamic_shape: bool = True,
+    ):
+        super().__init__()
+        self.cache = TurboQuantKVCache(
+            max_batch_size=max_batch_size,
+            max_context_length=max_context_length,
+            n_heads=n_heads,
+            head_dim=head_dim,
+            enable_dynamic_shape=enable_dynamic_shape,
+        )
+
+    def forward(
+        self,
+        input_pos: torch.Tensor,
+        k_val: torch.Tensor,
+        v_val: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        start_pos = input_pos[0].item()
+        return self.cache.update(start_pos, k_val, v_val)
+
+
+@register_test
+class TurboQuantKVCacheIntTest(OpTestCase):
+    """Test case for TurboQuantKVCache with int/SymInt input_pos."""
+
+    name = "turboquant_kv_cache_int"
+    rtol = 1e-5
+    atol = 1e-1
+
+    def __init__(
+        self,
+        n_heads: int = 4,
+        head_dim: int = 64,
+        max_context_length: int = 128,
+        seq_step: int = 8,
+        enable_dynamic_shape: bool = True,
+    ):
+        self.max_batch_size = 1
+        self.n_heads = n_heads
+        self.head_dim = head_dim
+        self.max_context_length = max_context_length
+        self.seq_step = seq_step
+        self.enable_dynamic_shape = enable_dynamic_shape
+
+    @classmethod
+    def get_test_configs(cls) -> List["TurboQuantKVCacheIntTest"]:
+        return [
+            cls(),
+            cls(head_dim=128),
+        ]
+
+    def create_model(self) -> nn.Module:
+        return TurboQuantKVCacheIntModel(
+            max_batch_size=self.max_batch_size,
+            max_context_length=self.max_context_length,
+            n_heads=self.n_heads,
+            head_dim=self.head_dim,
+            enable_dynamic_shape=self.enable_dynamic_shape,
+        )
+
+    def create_inputs(self) -> Tuple[torch.Tensor, ...]:
+        input_pos = torch.tensor([0], dtype=torch.int64)
+        k_val = torch.randn(
+            self.max_batch_size,
+            self.n_heads,
+            self.seq_step,
+            self.head_dim,
+            dtype=torch.bfloat16,
+        )
+        v_val = torch.randn(
+            self.max_batch_size,
+            self.n_heads,
+            self.seq_step,
+            self.head_dim,
+            dtype=torch.bfloat16,
+        )
+        return (input_pos, k_val, v_val)
+
+    def create_test_inputs(self) -> Tuple[torch.Tensor, ...]:
+        test_seq_step = self.seq_step + 4
+        input_pos = torch.tensor([16], dtype=torch.int64)
+        k_val = torch.randn(
+            self.max_batch_size,
+            self.n_heads,
+            test_seq_step,
+            self.head_dim,
+            dtype=torch.bfloat16,
+        )
+        v_val = torch.randn(
+            self.max_batch_size,
+            self.n_heads,
+            test_seq_step,
+            self.head_dim,
+            dtype=torch.bfloat16,
+        )
+        return (input_pos, k_val, v_val)
+
+    def get_dynamic_shapes(self) -> Optional[Dict[str, any]]:
+        if not self.enable_dynamic_shape:
+            return None
+        seq_dim = Dim("seq_step", min=1, max=self.max_context_length)
+        return {
+            "input_pos": None,
+            "k_val": {2: seq_dim},
+            "v_val": {2: seq_dim},
+        }
+
+
+class TurboQuantKVCacheSdpaModel(nn.Module):
+    """
+    Test model wrapping ``TurboQuantKVCache.update + .sdpa`` — the full
+    prefill/decode flow (compress, dequant, attention in rotated space,
+    un-rotate output).
+    """
+
+    def __init__(
+        self,
+        max_batch_size: int,
+        max_context_length: int,
+        n_heads: int,
+        head_dim: int,
+        enable_dynamic_shape: bool = True,
+    ):
+        super().__init__()
+        self.max_context_length = max_context_length
+        self.cache = TurboQuantKVCache(
+            max_batch_size=max_batch_size,
+            max_context_length=max_context_length,
+            n_heads=n_heads,
+            head_dim=head_dim,
+            enable_dynamic_shape=enable_dynamic_shape,
+        )
+
+    def forward(
+        self,
+        input_pos: torch.Tensor,
+        k_val: torch.Tensor,
+        v_val: torch.Tensor,
+        query: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        start_pos = input_pos[0].item()
+        seq_len = k_val.size(2)
+        torch._check(start_pos >= 0)
+        torch._check(start_pos + seq_len <= self.max_context_length)
+
+        k_packed, k_norms, v_packed, v_norms = self.cache.update(
+            start_pos, k_val, v_val
+        )
+        out = self.cache.sdpa(query, start_pos)
+        return out, k_packed, k_norms, v_packed, v_norms
+
+
+@register_test
+class TurboQuantKVCacheSdpaTest(OpTestCase):
+    """
+    Test case for ``TurboQuantKVCache.update`` + ``.sdpa``.
+
+    Exercises the full forward path: compress + write through
+    ``mlx::tq_norm`` / ``mlx::tq4_compress`` / ``mlx::kv_cache_update``,
+    then dequantize and attend via ``mlx::tq_dequant`` /
+    ``mlx::custom_sdpa`` with Q rotated in and output rotated back.
+    Looser tolerance is needed because attention runs in bf16.
+    """
+
+    name = "turboquant_kv_cache_sdpa"
+    rtol = 1e-5
+    atol = 5e-2  # bf16 SDPA output
+
+    def __init__(
+        self,
+        n_heads: int = 4,
+        head_dim: int = 64,
+        max_context_length: int = 128,
+        seq_step: int = 8,
+        enable_dynamic_shape: bool = True,
+    ):
+        self.max_batch_size = 1
+        self.n_heads = n_heads
+        self.head_dim = head_dim
+        self.max_context_length = max_context_length
+        self.seq_step = seq_step
+        self.enable_dynamic_shape = enable_dynamic_shape
+
+    @classmethod
+    def get_test_configs(cls) -> List["TurboQuantKVCacheSdpaTest"]:
+        return [
+            cls(),
+            cls(head_dim=128),
+        ]
+
+    def create_model(self) -> nn.Module:
+        return TurboQuantKVCacheSdpaModel(
+            max_batch_size=self.max_batch_size,
+            max_context_length=self.max_context_length,
+            n_heads=self.n_heads,
+            head_dim=self.head_dim,
+            enable_dynamic_shape=self.enable_dynamic_shape,
+        )
+
+    def _make_inputs(
+        self, start: int, q_len: int, kv_len: int
+    ) -> Tuple[torch.Tensor, ...]:
+        input_pos = torch.tensor([start], dtype=torch.int64)
+        k_val = torch.randn(
+            self.max_batch_size,
+            self.n_heads,
+            kv_len,
+            self.head_dim,
+            dtype=torch.bfloat16,
+        )
+        v_val = torch.randn(
+            self.max_batch_size,
+            self.n_heads,
+            kv_len,
+            self.head_dim,
+            dtype=torch.bfloat16,
+        )
+        query = torch.randn(
+            self.max_batch_size,
+            self.n_heads,
+            q_len,
+            self.head_dim,
+            dtype=torch.bfloat16,
+        )
+        return (input_pos, k_val, v_val, query)
+
+    def create_inputs(self) -> Tuple[torch.Tensor, ...]:
+        # Prefill-style: start=0, q_len == kv_len.
+        return self._make_inputs(start=0, q_len=self.seq_step, kv_len=self.seq_step)
+
+    def create_test_inputs(self) -> Tuple[torch.Tensor, ...]:
+        # Decode-style: write a single token into the existing cache.
+        return self._make_inputs(start=16, q_len=1, kv_len=1)
+
+    def get_dynamic_shapes(self) -> Optional[Dict[str, any]]:
+        if not self.enable_dynamic_shape:
+            return None
+        seq_dim = Dim("seq_step", min=1, max=self.max_context_length)
+        return {
+            "input_pos": None,
+            "k_val": {2: seq_dim},
+            "v_val": {2: seq_dim},
+            "query": {2: seq_dim},
+        }
+
+
 class RingBufferKVCacheModel(nn.Module):
     """
     Test model wrapping RingBufferKVCache from cache.py.
diff --git a/backends/mlx/test/test_utils.py b/backends/mlx/test/test_utils.py
index 660968195b7..5dbc35b824d 100644
--- a/backends/mlx/test/test_utils.py
+++ b/backends/mlx/test/test_utils.py
@@ -44,6 +44,7 @@ class TestTimeoutError(Exception):
 DTYPE_INT64 = 3
 DTYPE_BFLOAT16 = 4
 DTYPE_BOOL = 5
+DTYPE_UINT8 = 6
 
 
 # Default tolerance presets for different data types.
@@ -110,6 +111,7 @@ def torch_dtype_to_bin_dtype(dtype: torch.dtype) -> int:
         torch.int64: DTYPE_INT64,
         torch.bfloat16: DTYPE_BFLOAT16,
         torch.bool: DTYPE_BOOL,
+        torch.uint8: DTYPE_UINT8,
     }
     if dtype not in mapping:
         raise ValueError(f"Unsupported dtype: {dtype}")
@@ -125,6 +127,7 @@ def bin_dtype_to_torch_dtype(dtype_val: int) -> torch.dtype:
         DTYPE_INT64: torch.int64,
         DTYPE_BFLOAT16: torch.bfloat16,
         DTYPE_BOOL: torch.bool,
+        DTYPE_UINT8: torch.uint8,
     }
     if dtype_val not in mapping:
         raise ValueError(f"Unknown dtype value: {dtype_val}")
@@ -208,6 +211,7 @@ def load_tensors_from_bin(path: Union[str, Path]) -> List[torch.Tensor]:
         torch.int32: np.int32,
         torch.int64: np.int64,
         torch.bool: np.bool_,
+        torch.uint8: np.uint8,
         # bfloat16 needs special handling - read as uint16
     }
 
@@ -219,6 +223,7 @@ def load_tensors_from_bin(path: Union[str, Path]) -> List[torch.Tensor]:
         torch.int64: 8,
         torch.bfloat16: 2,
         torch.bool: 1,
+        torch.uint8: 1,
     }
 
     tensors = []
diff --git a/examples/models/gemma4_31b/README.md b/examples/models/gemma4_31b/README.md
index c6ac10748d8..ae3bcb24c19 100644
--- a/examples/models/gemma4_31b/README.md
+++ b/examples/models/gemma4_31b/README.md
@@ -93,6 +93,24 @@ method with dynamic sequence length and host-side sampling.
 
 Writes `model.pte` (and optionally `model.ptd`) into `--output-dir`.
 
+#### TurboQuant KV cache (long context, MLX only)
+
+For long-context inference, add `--turboquant` to swap the full-attention
+layers' KV cache for a TurboQuant TQ4 cache (4-bit codebook + nibble pack).
+This gives ~3.8× cache memory savings on the full-attention layers and lets
+you fit context lengths that wouldn't fit in bf16. Sliding-window layers are unaffected.
+
+```bash
+python examples/models/gemma4_31b/export.py \
+    --prequantized ./gemma4_31b_int4 \
+    --output-dir ./gemma4_31b_exports_mlx_tq \
+    --max-seq-len 65536 \
+    --backend mlx \
+    --turboquant
+```
+
+Use TurboQuant when you need context beyond what bf16 fits; otherwise leave it off.
+
 ## Eager inference
 
 The prompt is automatically wrapped with the Gemma 4 IT chat template.
diff --git a/examples/models/gemma4_31b/export.py b/examples/models/gemma4_31b/export.py
index bd648f534b5..ed3dcdba9c3 100644
--- a/examples/models/gemma4_31b/export.py
+++ b/examples/models/gemma4_31b/export.py
@@ -141,12 +141,19 @@ def export_and_lower(
     config: Gemma4_31BConfig,
     output_dir: str,
     backend: str = "cuda",
+    use_turboquant: bool = False,
 ) -> None:
     """Export and lower the model to ExecuTorch for the given backend."""
     if backend == "cuda":
+        if use_turboquant:
+            raise ValueError(
+                "--turboquant is only supported with --backend mlx "
+                "(the CUDA path here uses a different TurboQuant integration; "
+                "see examples/models/qwen3_5_moe/export.py)."
+            )
         _export_cuda(model, config, output_dir)
     elif backend == "mlx":
-        _export_mlx(model, config, output_dir)
+        _export_mlx(model, config, output_dir, use_turboquant=use_turboquant)
     else:
         raise ValueError(
             f"Unsupported backend: {backend!r}. Supported: {_SUPPORTED_BACKENDS}."
@@ -279,7 +286,12 @@ def _export_cuda(model: Gemma4_31B, config: Gemma4_31BConfig, output_dir: str) -
     print("Done.")
 
 
-def _export_mlx(model: Gemma4_31B, config: Gemma4_31BConfig, output_dir: str) -> None:
+def _export_mlx(
+    model: Gemma4_31B,
+    config: Gemma4_31BConfig,
+    output_dir: str,
+    use_turboquant: bool = False,
+) -> None:
     """Export to .pte via torch.export + MLX backend.
 
     Unlike CUDA (which exports separate decode/prefill methods with an
@@ -287,6 +299,10 @@ def _export_mlx(model: Gemma4_31B, config: Gemma4_31BConfig, output_dir: str) ->
     sequence length.  No int4_dispatch import — IntxUnpackedToInt8Tensor's
     default dispatch produces the ``dequantize_affine → linear`` pattern
     that MLX's QuantizedLinearHandler matches.
+
+    When ``use_turboquant=True``, full-attention layers swap to
+    ``MLXTurboQuantKVCache`` for ~3.8× KV cache memory savings. Sliding
+    layers are unaffected (already use ``RingBufferKVCache``).
     """
     import gc
 
@@ -304,10 +320,13 @@ def _export_mlx(model: Gemma4_31B, config: Gemma4_31BConfig, output_dir: str) ->
     from executorch.exir.passes import MemoryPlanningPass
     from torch.export import Dim, export
 
-    mlx_source_transformations(model, dtype=torch.bfloat16)
+    mlx_source_transformations(
+        model, dtype=torch.bfloat16, use_turboquant=use_turboquant
+    )
+
     materialize_runtime_buffers(model, dtype=torch.bfloat16)
 
-    max_prefill = min(config.max_seq_len - 1, config.sliding_window * 2)
+    max_prefill = 256
     seq_dim = Dim("seq_len", min=1, max=max_prefill)
 
     print(f"Exporting (T in [1, {max_prefill}])...")
@@ -418,8 +437,17 @@ def main() -> None:
         choices=list(_SUPPORTED_BACKENDS),
         help="Target backend for export.",
     )
+    parser.add_argument(
+        "--turboquant",
+        action="store_true",
+        help="Use TurboQuant TQ4 KV cache compression (MLX backend only). "
+        "~3.8× cache memory savings; applies only to full-attention "
+        "(non-sliding) layers — sliding layers keep RingBufferKVCache.",
+    )
     args = parser.parse_args()
 
+    if args.turboquant and args.backend != "mlx":
+        parser.error("--turboquant requires --backend mlx.")
     if args.backend == "cuda" and not torch.cuda.is_available():
         parser.error("CUDA is required for the cuda backend.")
 
@@ -446,7 +474,13 @@ def main() -> None:
     if args.gguf and args.backend == "mlx":
         os.environ["ET_MLX_ALLOW_NON_FUSED_QUANTIZED_OPS"] = "1"
     try:
-        export_and_lower(model, config, args.output_dir, backend=args.backend)
+        export_and_lower(
+            model,
+            config,
+            args.output_dir,
+            backend=args.backend,
+            use_turboquant=args.turboquant,
+        )
     finally:
         os.environ.pop("ET_MLX_ALLOW_NON_FUSED_QUANTIZED_OPS", None)
 
diff --git a/examples/models/gemma4_31b/mlx_source_transformations.py b/examples/models/gemma4_31b/mlx_source_transformations.py
index 3a8ae4420e3..0bbd4f7b250 100644
--- a/examples/models/gemma4_31b/mlx_source_transformations.py
+++ b/examples/models/gemma4_31b/mlx_source_transformations.py
@@ -24,6 +24,9 @@
     KVCache as MLXKVCache,
     RingBufferKVCache as MLXRingKVCache,
 )
+from executorch.backends.mlx.llm.turboquant_cache import (
+    TurboQuantKVCache as MLXTurboQuantKVCache,
+)
 
 
 def _replace_attention_forward(attn: nn.Module) -> None:
@@ -68,30 +71,34 @@ def _mlx_forward(self, x: torch.Tensor, input_pos: torch.Tensor) -> torch.Tensor
             q = torch.ops.mlx.rope(q, rotary_dim, start_pos, False, 0.0, 1.0, mlx_freqs)
             k = torch.ops.mlx.rope(k, rotary_dim, start_pos, False, 0.0, 1.0, mlx_freqs)
 
-        k_cache, v_cache = self.kv_cache.update(start_pos, k, v)
-
-        if self.is_sliding:
-            sdpa_mask = self.kv_cache.create_sliding_window_mask(start_pos, T)
-            y = torch.ops.mlx.custom_sdpa(
-                q,
-                k_cache,
-                v_cache,
-                start_pos=self.kv_cache.buffer_size - T,
-                attn_mask=sdpa_mask,
-                dropout_p=0.0,
-                is_causal=False,
-                scale=self.scaling,
-            )
+        if getattr(self, "is_turboquant", False):
+            self.kv_cache.update(start_pos, k, v)
+            y = self.kv_cache.sdpa(q, start_pos, scale=self.scaling)
         else:
-            y = torch.ops.mlx.custom_sdpa(
-                q,
-                k_cache,
-                v_cache,
-                start_pos=start_pos,
-                dropout_p=0.0,
-                is_causal=True,
-                scale=self.scaling,
-            )
+            k_cache, v_cache = self.kv_cache.update(start_pos, k, v)
+
+            if self.is_sliding:
+                sdpa_mask = self.kv_cache.create_sliding_window_mask(start_pos, T)
+                y = torch.ops.mlx.custom_sdpa(
+                    q,
+                    k_cache,
+                    v_cache,
+                    start_pos=self.kv_cache.buffer_size - T,
+                    attn_mask=sdpa_mask,
+                    dropout_p=0.0,
+                    is_causal=False,
+                    scale=self.scaling,
+                )
+            else:
+                y = torch.ops.mlx.custom_sdpa(
+                    q,
+                    k_cache,
+                    v_cache,
+                    start_pos=start_pos,
+                    dropout_p=0.0,
+                    is_causal=True,
+                    scale=self.scaling,
+                )
 
         y = y.transpose(1, 2).contiguous().view(B, T, self.n_heads * self.head_dim)
         return self.o_proj(y)
@@ -150,6 +157,7 @@ def _mlx_model_forward(
 def mlx_source_transformations(
     model: nn.Module,
     dtype: torch.dtype = torch.bfloat16,
+    use_turboquant: bool = False,
 ) -> None:
     """Apply MLX source transformations to a Gemma 4 31B model in-place.
 
@@ -162,6 +170,13 @@ def mlx_source_transformations(
     - Rewrites layer forward to drop mask parameters (each attention builds
       its own mask via ``custom_sdpa``)
     - Rewrites model forward to drop the sampler and ``_build_masks``
+
+    Args:
+        model: Gemma4_31B model to transform in place.
+        dtype: dtype for KV cache buffers (bf16 by default).
+        use_turboquant: If True, swap full-attention layers' KV caches
+            for ``MLXTurboQuantKVCache`` (~3.8× cache memory savings).
+            Sliding-window layers are unaffected.
     """
     config = model.config
 
@@ -176,6 +191,17 @@ def mlx_source_transformations(
                 head_dim=attn.head_dim,
                 dtype=dtype,
             )
+            attn.is_turboquant = False
+        elif use_turboquant:
+            attn.kv_cache = MLXTurboQuantKVCache(
+                max_batch_size=1,
+                max_context_length=config.max_seq_len,
+                n_heads=attn.n_kv_heads,
+                head_dim=attn.head_dim,
+                enable_dynamic_shape=True,
+                dtype=dtype,
+            )
+            attn.is_turboquant = True
         else:
             attn.kv_cache = MLXKVCache(
                 max_batch_size=1,
@@ -185,6 +211,7 @@ def mlx_source_transformations(
                 enable_dynamic_shape=True,
                 dtype=dtype,
             )
+            attn.is_turboquant = False
 
         _replace_attention_forward(attn)
         _replace_layer_forward(layer)

From bd24e79e87e9093a70cc7f1d8e63366ac457bfd4 Mon Sep 17 00:00:00 2001
From: Ethan Ng <ethann@meta.com>
Date: Fri, 29 May 2026 22:25:49 -0700
Subject: [PATCH 088/103] Add fuse() to remaining QuantizationPatterns (#19727)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:

Add `fuse()` implementations to the remaining Cadence
`QuantizationPattern` subclasses:

- `MaxPool2dPattern`, `MaxPool2dWithoutIndicesPattern` —
order-preserving pool on quantized values
- `ReluBasePattern` (inherited by `ReluPattern0`/`1`) — relu with
requantization
- `ConvReluBasePattern` (inherited by `Conv1d`/`2dReluPattern0`/`1`) —
conv+relu fusion with `anchor_ops()` override to match only the conv op
- `SoftmaxPattern` — softmax with dummy mask/pos tensors and fake_mode
metadata
- `MixedW8A32LinearPattern` — weight-only quantized linear (no
input/output quant)
- `MixedW8A32ConvPattern` — weight-only quantized conv1d with NCL→NLC
permutation
- `MixedW8A32GruPattern` — weight-only quantized GRU with 4 dequantized
params

Reviewed By: DrJessop

Differential Revision: D105728177
---
 backends/cadence/aot/quantizer/patterns.py | 262 ++++++++++++++++++++-
 1 file changed, 260 insertions(+), 2 deletions(-)

diff --git a/backends/cadence/aot/quantizer/patterns.py b/backends/cadence/aot/quantizer/patterns.py
index bf7ca3ef567..a7026cbf26c 100644
--- a/backends/cadence/aot/quantizer/patterns.py
+++ b/backends/cadence/aot/quantizer/patterns.py
@@ -12,6 +12,7 @@
 from typing import List, Optional, Tuple, Union
 
 import torch
+from executorch.backends.cadence.aot.compiler_utils import get_shape
 from executorch.backends.cadence.aot.pass_utils import get_arg, replace_with_op
 from executorch.backends.cadence.aot.quantizer.pattern_utils import (
     DQ_PER_TENSOR,
@@ -24,6 +25,7 @@
 from executorch.backends.cadence.aot.quantizer.utils import (
     check_out_zero_point_is_min_range,
     get_bias_qparams,
+    quantize_tensor_multiplier,
 )
 from torch import fx
 from torch._ops import OpOverload
@@ -806,6 +808,40 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_max_pool2d_nchw.default
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        return _fuse_max_pool2d(gm, anchor_node)
+
+
+def _fuse_max_pool2d(gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+    """Shared fuse logic for both MaxPool2d variants."""
+    dq_input = anchor_node.args[0]
+    if not isinstance(dq_input, fx.Node) or dq_input.target != DQ_PER_TENSOR:
+        return None
+    quant_node = find_quant_user(anchor_node)
+    if quant_node is None:
+        return None
+    kernel_size = get_arg(anchor_node, "kernel_size", list[int])
+    stride = get_arg(anchor_node, "stride", list[int])
+    padding = get_arg(anchor_node, "padding", list[int])
+    dilation = get_arg(anchor_node, "dilation", list[int])
+    ceil_mode = get_arg(anchor_node, "ceil_mode", bool)
+    args = (get_arg(dq_input, "input", fx.Node),)
+    kwargs = {
+        "kernel_size": kernel_size,
+        "stride": stride,
+        "padding": padding,
+        "dilation": dilation,
+        "ceil_mode": ceil_mode,
+    }
+    return replace_with_op(
+        gm,
+        anchor_node,
+        torch.ops.cadence.quantized_max_pool2d_nchw.default,
+        args,
+        kwargs,
+        quant_node,
+    )
+
 
 class MaxPool2dWithoutIndicesPattern(QuantizationPattern):
     """
@@ -845,8 +881,8 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_max_pool2d_nchw.default
 
-
-# This is a base class for ReLU
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        return _fuse_max_pool2d(gm, anchor_node)
 
 
 # This is a base class for ReLU, since it can be used with two different aten ops
@@ -874,6 +910,28 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_relu.per_tensor
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        dq_input = anchor_node.args[0]
+        if not isinstance(dq_input, fx.Node) or dq_input.target != DQ_PER_TENSOR:
+            return None
+        quant_node = find_quant_user(anchor_node)
+        if quant_node is None:
+            return None
+        input_scale = get_arg(dq_input, "scale", float)
+        requantize_scale = input_scale / get_arg(quant_node, "scale", float)
+        requantize_scale_t = torch.tensor([requantize_scale])
+        out_multiplier, out_shift = quantize_tensor_multiplier(requantize_scale_t)
+        args = (get_arg(dq_input, "input", fx.Node),)
+        kwargs = {
+            "X_zero_point": get_arg(dq_input, "zero_point", int),
+            "out_zero_point": get_arg(quant_node, "zero_point", int),
+            "out_multiplier": out_multiplier[0].item(),
+            "out_shift": out_shift[0].item(),
+        }
+        return replace_with_op(
+            gm, anchor_node, self.replacement_op(), args, kwargs, quant_node
+        )
+
 
 # Regular relu op
 class ReluPattern0(ReluBasePattern):
@@ -933,6 +991,39 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_conv2d_nchw.per_tensor
 
+    def anchor_ops(self) -> tuple[OpOverload, ...]:
+        return (self.partition_types()[0],)
+
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        conv_users = list(anchor_node.users)
+        if len(conv_users) != 1:
+            return None
+        relu_node = conv_users[0]
+        if relu_node.target != self.partition_types()[1]:
+            return None
+        _arg0 = anchor_node.args[0]
+        dq_input = (
+            _arg0
+            if isinstance(_arg0, fx.Node) and _arg0.target == DQ_PER_TENSOR
+            else None
+        )
+        _arg1 = anchor_node.args[1]
+        dq_weight = (
+            _arg1
+            if isinstance(_arg1, fx.Node) and _arg1.target == DQ_PER_TENSOR
+            else None
+        )
+        if dq_input is None or dq_weight is None:
+            return None
+        quant_node = find_quant_user(relu_node)
+        if quant_node is None:
+            return None
+        check_out_zero_point_is_min_range(
+            get_arg(quant_node, "zero_point", int),
+            get_arg(quant_node, "dtype", torch.dtype),
+        )
+        return fuse_conv(self, gm, anchor_node, dq_input, dq_weight, quant_node)
+
 
 # Conv1d + regular relu op fusion
 class Conv1dReluPattern0(ConvReluBasePattern):
@@ -987,6 +1078,56 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_softmax.per_tensor
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        dq_input = anchor_node.args[0]
+        if not isinstance(dq_input, fx.Node) or dq_input.target != DQ_PER_TENSOR:
+            return None
+        quant_node = find_quant_user(anchor_node)
+        if quant_node is None:
+            return None
+        input_q = get_arg(dq_input, "input", fx.Node)
+        quant_input = get_arg(quant_node, "input", fx.Node)
+        mask_shape = get_shape(gm, quant_input)
+        if not mask_shape:
+            return None
+        mask_shape = list(mask_shape)
+        # Softmax mask is packed 16 elements per int32 word.
+        assert (
+            mask_shape[-1] % 16 == 0
+        ), f"Softmax mask dimension must be divisible by 16, got {mask_shape[-1]}"
+        mask_shape[-1] = mask_shape[-1] // 16
+        mask_tensor = insert_node_with_meta(
+            gm,
+            torch.ops.aten.full.default,
+            (mask_shape, 0.0),
+            {"dtype": torch.int32},
+            anchor_node,
+            input_q,
+        )
+        # Initial position for streaming softmax (unused, set to 0).
+        pos_tensor = insert_node_with_meta(
+            gm,
+            torch.ops.aten.full.default,
+            ([1], 0),
+            {"dtype": torch.int64},
+            anchor_node,
+            input_q,
+        )
+        args = (
+            input_q,
+            mask_tensor,
+            get_arg(anchor_node, "dim", int),
+            0,
+            pos_tensor,
+            get_arg(dq_input, "scale", float),
+            get_arg(dq_input, "zero_point", int),
+            get_arg(quant_node, "scale", float),
+            get_arg(quant_node, "zero_point", int),
+        )
+        return replace_with_op(
+            gm, anchor_node, self.replacement_op(), args, {}, quant_node
+        )
+
 
 class MixedW8A32LinearPattern(QuantizationPattern):
     def partition_types(self) -> List[OpOverload]:
@@ -1041,6 +1182,36 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_w8a32_linear.default
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        if len(anchor_node.args) != 3 or len(anchor_node.kwargs) > 0:
+            return None
+        _arg1 = anchor_node.args[1]
+        dq_weight = (
+            _arg1
+            if isinstance(_arg1, fx.Node) and _arg1.target == DQ_PER_TENSOR
+            else None
+        )
+        _arg2 = anchor_node.args[2]
+        dq_bias = (
+            _arg2
+            if isinstance(_arg2, fx.Node) and _arg2.target == DQ_PER_TENSOR
+            else None
+        )
+        if dq_weight is None or dq_bias is None:
+            return None
+        input_node = anchor_node.args[0]
+        assert isinstance(input_node, fx.Node)
+        args = (
+            input_node,
+            get_arg(dq_weight, "input", fx.Node),
+            get_arg(dq_weight, "scale", float),
+            get_arg(dq_bias, "input", fx.Node),
+            get_arg(dq_bias, "scale", float),
+        )
+        return replace_with_op(
+            gm, anchor_node, self.replacement_op(), args, {}, anchor_node
+        )
+
 
 class MixedW8A32ConvPattern(QuantizationPattern):
     def partition_types(self) -> List[OpOverload]:
@@ -1115,6 +1286,57 @@ def get_anchors(
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_w8a32_conv.default
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        if len(anchor_node.args) != 3 or len(anchor_node.kwargs) > 0:
+            return None
+        _arg1 = anchor_node.args[1]
+        dq_weight = (
+            _arg1
+            if isinstance(_arg1, fx.Node) and _arg1.target == DQ_PER_TENSOR
+            else None
+        )
+        _arg2 = anchor_node.args[2]
+        dq_bias = (
+            _arg2
+            if isinstance(_arg2, fx.Node) and _arg2.target == DQ_PER_TENSOR
+            else None
+        )
+        if dq_weight is None or dq_bias is None:
+            return None
+        input_node = anchor_node.args[0]
+        assert isinstance(input_node, fx.Node)
+        assert get_arg(anchor_node, "stride", list[int]) == [1]
+        assert get_arg(anchor_node, "padding", list[int]) == [0]
+        assert get_arg(anchor_node, "dilation", list[int]) == [1]
+        assert get_arg(anchor_node, "groups", int) == 1
+        weight_q = get_arg(dq_weight, "input", fx.Node)
+        transposed_inputs = insert_node_with_meta(
+            gm,
+            torch.ops.aten.permute.default,
+            (input_node, [0, 2, 1]),
+            None,
+            anchor_node,
+            input_node,
+        )
+        transposed_weights = insert_node_with_meta(
+            gm,
+            torch.ops.aten.permute.default,
+            (weight_q, [2, 0, 1]),
+            None,
+            anchor_node,
+            weight_q,
+        )
+        args = (
+            transposed_inputs,
+            transposed_weights,
+            get_arg(dq_weight, "scale", float),
+            get_arg(dq_bias, "input", fx.Node),
+            get_arg(dq_bias, "scale", float),
+        )
+        return replace_with_op(
+            gm, anchor_node, self.replacement_op(), args, {}, anchor_node
+        )
+
 
 class MixedW8A32GruPattern(QuantizationPattern):
     def partition_types(self) -> List[OpOverload]:
@@ -1187,6 +1409,42 @@ def __init__(self, args, meta):
     def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_w8a32_gru.default
 
+    def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
+        if len(anchor_node.kwargs) > 0:
+            return None
+        params = anchor_node.args[2]
+        # GRU requires 4 weight/bias params: w_ih, w_hh, b_ih, b_hh
+        if not isinstance(params, (list, tuple)) or len(params) < 4:
+            return None
+        dq_w_ih = params[0]
+        if not isinstance(dq_w_ih, fx.Node) or dq_w_ih.target != DQ_PER_TENSOR:
+            return None
+        dq_w_hh = params[1]
+        if not isinstance(dq_w_hh, fx.Node) or dq_w_hh.target != DQ_PER_TENSOR:
+            return None
+        dq_b_ih = params[2]
+        if not isinstance(dq_b_ih, fx.Node) or dq_b_ih.target != DQ_PER_TENSOR:
+            return None
+        dq_b_hh = params[3]
+        if not isinstance(dq_b_hh, fx.Node) or dq_b_hh.target != DQ_PER_TENSOR:
+            return None
+        input_node = anchor_node.args[0]
+        hidden_node = anchor_node.args[1]
+        args = (
+            input_node,
+            hidden_node,
+            get_arg(dq_w_ih, "input", fx.Node),
+            get_arg(dq_w_ih, "scale", float),
+            get_arg(dq_w_hh, "input", fx.Node),
+            get_arg(dq_w_hh, "scale", float),
+            get_arg(dq_b_ih, "input", fx.Node),
+            get_arg(dq_b_ih, "scale", float),
+            get_arg(dq_b_hh, "input", fx.Node),
+        )
+        return replace_with_op(
+            gm, anchor_node, self.replacement_op(), args, {}, anchor_node
+        )
+
 
 class RmsNormPattern(QuantizationPattern):
     """Pattern that preserves rms_norm from decomposition without matching anything."""

From ec317357dce55a7bda318966bf44eb2abe3f3cec Mon Sep 17 00:00:00 2001
From: Ethan Ng <ethann@meta.com>
Date: Fri, 29 May 2026 22:32:23 -0700
Subject: [PATCH 089/103] Enable QuantFusionPass in compiler pipeline (#19728)
 (#19728)

Summary:

Both and Cadence now use the shared `QuantFusionPass` from
`compiler_funcs.py`.

- `QuantFusionPass` in `compiler_funcs.py` iterates patterns, matches
`anchor_ops()`, calls `fuse()` on each match, with debug logging and
dead code elimination
- Cadence: `compiler.py` now uses `QuantFusionPass` instead of the old
`QuantFusion` isinstance switch
- Removed Cadence `compiler` target's dep on `:fusion_pass` (no longer
imported)

Reviewed By: DrJessop

Differential Revision: D105728219
---
 backends/cadence/aot/BUCK        | 2 --
 backends/cadence/aot/compiler.py | 8 ++++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/backends/cadence/aot/BUCK b/backends/cadence/aot/BUCK
index 7d8ff3cffd2..57b8194c7f8 100644
--- a/backends/cadence/aot/BUCK
+++ b/backends/cadence/aot/BUCK
@@ -44,7 +44,6 @@ fbcode_target(_kind = runtime.python_library,
         ":compiler_funcs",
         ":utils",
         "//caffe2:torch",
-        "//executorch/backends/cadence/aot/quantizer:fusion_pass",
         "//executorch/backends/cadence/aot/quantizer/passes:fuse_ops",
         "//executorch/backends/cadence/aot/quantizer:quantizer",
         "//executorch/backends/transforms:decompose_sdpa",
@@ -65,7 +64,6 @@ fbcode_target(_kind = runtime.python_library,
         ":replace_ops",
         ":utils",
         "//caffe2:torch",
-        "//executorch/backends/cadence/aot/quantizer:fusion_pass",
         "//executorch/backends/cadence/aot/quantizer:quantizer",
         "//executorch/backends/cadence/runtime:runtime",
         "//executorch/backends/transforms:decompose_sdpa",
diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
index 5c66c9eb62b..0b1b8dac361 100644
--- a/backends/cadence/aot/compiler.py
+++ b/backends/cadence/aot/compiler.py
@@ -14,6 +14,7 @@
 import torch
 from executorch.backends.cadence.aot.compiler_funcs import (
     prepare as prepare_fn,
+    QuantFusionPass,
     QuantizedInputWrapper,
     trace as trace_fn,
 )
@@ -21,7 +22,6 @@
     CadenceMemoryPlanning,
     print_memory_planning_info,
 )
-from executorch.backends.cadence.aot.quantizer.fusion_pass import QuantFusion
 from executorch.backends.cadence.aot.quantizer.passes.fuse_ops import FuseQATConvBN
 from executorch.backends.cadence.aot.quantizer.quantizer import (
     CadenceDefaultQuantizer,
@@ -154,9 +154,9 @@ def apply_pre_edge_transform_passes(
     quantizer: CadenceQuantizer,
 ) -> ExportedProgram:
     """
-    Apply pre-edge transform passes including QuantFusion and torch ops passes.
+    Apply pre-edge transform passes including QuantFusionPass and torch ops passes.
     This mirrors the Cadence AOT compiler flow:
-    1. QuantFusion - fuses dq->op->q patterns
+    1. QuantFusionPass - fuses dq->op->q patterns
     2. apply_torch_ops_passes - applied just before to_edge()
 
     The quantizer must be the same as the one used to convert the model.
@@ -169,7 +169,7 @@ def apply_pre_edge_transform_passes(
     PassManager(
         [
             FuseQATConvBN(converted_program),
-            QuantFusion(patterns),
+            QuantFusionPass(patterns),
         ]
     )(converted_program.graph_module)
 

From 2af5a13d1eab5414cedc364726ce3b32bc7bec3e Mon Sep 17 00:00:00 2001
From: Ethan Ng <ethann@meta.com>
Date: Mon, 1 Jun 2026 00:17:32 -0700
Subject: [PATCH 090/103] Remove over-strict softmax mask divisibility assert

Differential Revision: D106957459

Pull Request resolved: https://github.com/pytorch/executorch/pull/19903
---
 backends/cadence/aot/quantizer/patterns.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/backends/cadence/aot/quantizer/patterns.py b/backends/cadence/aot/quantizer/patterns.py
index a7026cbf26c..9897d443725 100644
--- a/backends/cadence/aot/quantizer/patterns.py
+++ b/backends/cadence/aot/quantizer/patterns.py
@@ -1092,9 +1092,6 @@ def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
             return None
         mask_shape = list(mask_shape)
         # Softmax mask is packed 16 elements per int32 word.
-        assert (
-            mask_shape[-1] % 16 == 0
-        ), f"Softmax mask dimension must be divisible by 16, got {mask_shape[-1]}"
         mask_shape[-1] = mask_shape[-1] // 16
         mask_tensor = insert_node_with_meta(
             gm,

From f244a9f62fd463036470cc2761052e90f0ab5db9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Lindstr=C3=B6m?=
 <33344797+martinlsm@users.noreply.github.com>
Date: Mon, 1 Jun 2026 12:33:27 +0200
Subject: [PATCH 091/103] Arm backend: Add MXFP Linear source transform
 (#19800)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the possibility to convert torch.nn.Linear modules to MXFP format.
The feature works by replacing all torch.nn.Linear submodules inside a
graph by a custom implemented MXFP counterpart: `MXFPLinearOp`.

A new user API called `to_mxfp` has been added to enable this feature
(located in backends/arm/ao_ext/mxfp.py). The API is tagged as
experimental for now.

An eager CPU and fake implementation is added to the new custom op, but
lowering it TOSA is handled in a later patch. To summarize, this patch
enables the following flow:

```python
m = MyModule()

to_mxfp(m, MXFPOpConfig())

m.forward(x)
```

Signed-off-by: Martin Lindström <Martin.Lindstroem@arm.com>
Co-authored-by: Sebastian Larsson <sebastian.larsson@arm.com>
---
 backends/arm/TARGETS                          |  27 ++-
 backends/arm/__init__.py                      |   6 +
 backends/arm/ao_ext/__init__.py               |  12 +
 backends/arm/ao_ext/mxfp.py                   |  64 +++++
 backends/arm/ao_ext/mxfp_tosa_lib.py          |  11 +
 backends/arm/ao_ext/mxfp_transform.py         |  24 ++
 backends/arm/ao_ext/ops/__init__.py           |  10 +
 backends/arm/ao_ext/ops/mxfp_linear_op.py     | 179 ++++++++++++++
 backends/arm/operators/op_view.py             |  16 +-
 backends/arm/test/misc/test_mxfp_linear_ao.py |  46 ++++
 backends/arm/test/ops/test_mxfp_linear.py     | 226 ++++++++++++++++++
 backends/arm/test/targets.bzl                 |   3 +
 .../arm/test/tester/analyze_output_utils.py   |  32 ++-
 13 files changed, 639 insertions(+), 17 deletions(-)
 create mode 100644 backends/arm/ao_ext/__init__.py
 create mode 100644 backends/arm/ao_ext/mxfp.py
 create mode 100644 backends/arm/ao_ext/mxfp_tosa_lib.py
 create mode 100644 backends/arm/ao_ext/mxfp_transform.py
 create mode 100644 backends/arm/ao_ext/ops/__init__.py
 create mode 100644 backends/arm/ao_ext/ops/mxfp_linear_op.py
 create mode 100644 backends/arm/test/misc/test_mxfp_linear_ao.py
 create mode 100644 backends/arm/test/ops/test_mxfp_linear.py

diff --git a/backends/arm/TARGETS b/backends/arm/TARGETS
index c3e2251bb11..a63237fe2c9 100644
--- a/backends/arm/TARGETS
+++ b/backends/arm/TARGETS
@@ -1,4 +1,4 @@
-# Copyright 2025 Arm Limited and/or its affiliates.
+# Copyright 2025-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -15,6 +15,31 @@ runtime.python_library(
         "//executorch/exir/dialects:lib",
     ],
 )
+runtime.python_library(
+    name = "ao_ext",
+    srcs = glob([
+        "ao_ext/*.py",
+        "ao_ext/ops/*.py",
+    ]),
+    deps = [
+        "//caffe2:torch",
+        "//executorch/exir:_warnings",
+        "//pytorch/ao:torchao",
+    ],
+)
+
+runtime.python_library(
+    name = "lib",
+    srcs = [
+        "__init__.py",
+    ],
+    deps = [
+        ":ao_ext",
+        ":ethosu",
+        ":vgf",
+        "//executorch/backends/arm/quantizer:lib",
+    ],
+)
 runtime.python_library(
     name = "common",
     srcs = glob(["common/*.py"]),
diff --git a/backends/arm/__init__.py b/backends/arm/__init__.py
index fcbafa717ce..7c0b61457d0 100644
--- a/backends/arm/__init__.py
+++ b/backends/arm/__init__.py
@@ -14,6 +14,10 @@
 import importlib
 from typing import Any
 
+# Register Arm-specific torch.library ops and MXFP transforms at package
+# import time.
+import executorch.backends.arm.ao_ext  # noqa: F401
+
 # Public for tooling (manifest generation and API validation).
 LAZY_IMPORTS = {
     "EthosUBackend": ("executorch.backends.arm.ethosu", "EthosUBackend"),
@@ -32,6 +36,8 @@
         "executorch.backends.arm.quantizer",
         "get_symmetric_a16w8_quantization_config",
     ),
+    "MXFPOpConfig": ("executorch.backends.arm.ao_ext.mxfp", "MXFPOpConfig"),
+    "to_mxfp": ("executorch.backends.arm.ao_ext.mxfp", "to_mxfp"),
 }
 
 
diff --git a/backends/arm/ao_ext/__init__.py b/backends/arm/ao_ext/__init__.py
new file mode 100644
index 00000000000..fef05a9f6ae
--- /dev/null
+++ b/backends/arm/ao_ext/__init__.py
@@ -0,0 +1,12 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Import mxfp_transform to trigger registration of the MXFP transforms.
+from . import mxfp_transform  # noqa: F401
+
+from .mxfp import MXFPOpConfig, to_mxfp
+
+
+__all__ = ["MXFPOpConfig", "to_mxfp"]
diff --git a/backends/arm/ao_ext/mxfp.py b/backends/arm/ao_ext/mxfp.py
new file mode 100644
index 00000000000..783da92590e
--- /dev/null
+++ b/backends/arm/ao_ext/mxfp.py
@@ -0,0 +1,64 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass
+from typing import Callable, Optional
+
+import torch
+from executorch.exir._warnings import experimental
+from torchao.core.config import AOBaseConfig
+from torchao.prototype.mx_formats.config import ScaleCalculationMode
+from torchao.quantization import quantize_
+
+
+def _match_supported_modules(module: torch.nn.Module, _name: str) -> bool:
+    """Default filter function that matches supported modules."""
+    return isinstance(module, torch.nn.Linear)
+
+
+@experimental("This API is experimental and may change without notice.")
+@dataclass
+class MXFPOpConfig(AOBaseConfig):
+    """Configuration for Arm MXFP source transforms."""
+
+    weight_dtype: torch.dtype = torch.float8_e4m3fn
+    weight_scaling_mode: ScaleCalculationMode = ScaleCalculationMode.RCEIL
+
+    # Only block size of 32 is currently supported for now, so we hardcode it here.
+    @property
+    def block_size(self) -> int:
+        return 32
+
+    def __post_init__(self) -> None:
+        if self.weight_dtype not in (torch.float8_e4m3fn, torch.float8_e5m2):
+            raise ValueError(f"Unsupported weight_dtype: {self.weight_dtype}")
+        if not isinstance(self.weight_scaling_mode, ScaleCalculationMode):
+            raise ValueError(
+                f"Unsupported weight_scaling_mode: {self.weight_scaling_mode}"
+            )
+
+
+@experimental("This API is experimental and may change without notice.")
+def to_mxfp(
+    model: torch.nn.Module,
+    config: MXFPOpConfig,
+    filter_fn: Optional[Callable[[torch.nn.Module, str], bool]] = None,
+) -> None:
+    """Convert matching modules in ``model`` to Arm MXFP modules in-place.
+
+    Args:
+        model (torch.nn.Module): Module to transform. Matching submodules are
+            replaced in-place.
+        config (MXFPOpConfig): Configuration controlling the MXFP conversion
+            behavior.
+        filter_fn (Optional[Callable[[torch.nn.Module, str], bool]]): Optional
+            predicate that receives a module and its fully qualified name. When
+            omitted, all modules supported by the MXFP transform are matched.
+
+    """
+    if filter_fn is None:
+        filter_fn = _match_supported_modules
+
+    quantize_(model, config, filter_fn)
diff --git a/backends/arm/ao_ext/mxfp_tosa_lib.py b/backends/arm/ao_ext/mxfp_tosa_lib.py
new file mode 100644
index 00000000000..4459ec59126
--- /dev/null
+++ b/backends/arm/ao_ext/mxfp_tosa_lib.py
@@ -0,0 +1,11 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from torch.library import Library
+
+# MXFP TOSA library definition for the Arm backend containing.
+# This library will generate custom ops like the following example:
+#   torch.ops.tosa_mxfp.linear.default
+MXFP_TOSA_LIB = Library("tosa_mxfp", "DEF")
diff --git a/backends/arm/ao_ext/mxfp_transform.py b/backends/arm/ao_ext/mxfp_transform.py
new file mode 100644
index 00000000000..b7823524475
--- /dev/null
+++ b/backends/arm/ao_ext/mxfp_transform.py
@@ -0,0 +1,24 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from executorch.backends.arm.ao_ext.mxfp import MXFPOpConfig
+from executorch.backends.arm.ao_ext.ops.mxfp_linear_op import transform_linear_to_mxfp
+from torchao.quantization.transform_module import register_quantize_module_handler
+
+
+@register_quantize_module_handler(MXFPOpConfig)  # type: ignore[misc]
+def _transform_to_mxfp(
+    module: torch.nn.Module,
+    config: MXFPOpConfig,
+) -> torch.nn.Module:
+    """Transforms a given module to use MXFP operations based on the provided
+    MXFPOpConfig configuration.
+    """
+    if isinstance(module, torch.nn.Linear):
+        return transform_linear_to_mxfp(module, config)
+    else:
+        return module
diff --git a/backends/arm/ao_ext/ops/__init__.py b/backends/arm/ao_ext/ops/__init__.py
new file mode 100644
index 00000000000..a690c4b7b02
--- /dev/null
+++ b/backends/arm/ao_ext/ops/__init__.py
@@ -0,0 +1,10 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .mxfp_linear_op import MXFPLinearOp
+
+__all__ = [
+    "MXFPLinearOp",
+]
diff --git a/backends/arm/ao_ext/ops/mxfp_linear_op.py b/backends/arm/ao_ext/ops/mxfp_linear_op.py
new file mode 100644
index 00000000000..5238f85a847
--- /dev/null
+++ b/backends/arm/ao_ext/ops/mxfp_linear_op.py
@@ -0,0 +1,179 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""MXFP Linear transform for the Arm backend.
+
+TorchAO extension for MXFP linear. It replaces ``nn.Linear`` with a wrapper
+module that stores precomputed MXFP weights and emits a backend-internal custom
+op during export.
+
+"""
+
+import torch
+import torch.nn.functional as F
+from executorch.backends.arm.ao_ext.mxfp import MXFPOpConfig
+from executorch.backends.arm.ao_ext.mxfp_tosa_lib import MXFP_TOSA_LIB
+from torchao.prototype.mx_formats.config import ScaleCalculationMode
+from torchao.prototype.mx_formats.mx_tensor import to_dtype, to_mx
+
+MXFP_TOSA_LIB.define(
+    "linear(Tensor input, Tensor weight_qdata, Tensor weight_scale, "
+    "Tensor? bias=None, SymInt block_size=32) -> Tensor"
+)
+
+
+@torch.library.register_fake("tosa_mxfp::linear", lib=MXFP_TOSA_LIB)  # type: ignore[misc]
+def _mxfp_linear_fake(
+    input: torch.Tensor,
+    weight_qdata: torch.Tensor,
+    weight_scale: torch.Tensor,
+    bias: torch.Tensor | None = None,
+    block_size: int = 32,
+) -> torch.Tensor:
+    if weight_qdata.ndim != 3:
+        raise ValueError(
+            f"Expected weight_qdata to be rank 3 for linear, got {weight_qdata.ndim}"
+        )
+    if weight_qdata.shape[0] != 1:
+        raise ValueError(
+            f"Expected weight_qdata batch dim to be 1, got {weight_qdata.shape[0]}"
+        )
+    if input.shape[-1] != weight_qdata.shape[-1]:
+        raise ValueError(
+            f"Input last dim {input.shape[-1]} must match linear in_features "
+            f"{weight_qdata.shape[-1]}"
+        )
+    expected_scale_shape = (
+        1,
+        weight_qdata.shape[1],
+        weight_qdata.shape[-1] // block_size,
+    )
+    if tuple(weight_scale.shape) != expected_scale_shape:
+        raise ValueError(
+            f"Expected weight_scale shape {expected_scale_shape}, got "
+            f"{tuple(weight_scale.shape)}"
+        )
+    output_shape = (*input.shape[:-1], weight_qdata.shape[1])
+    return input.new_empty(output_shape, dtype=torch.float32)
+
+
+def _cast_to_block_scaled_cpu_ref(
+    input: torch.Tensor,
+    output_dtype: torch.dtype,
+    block_size: int,
+) -> torch.Tensor:
+    """Emulate the current TOSA activation cast in eager mode."""
+    input_scale, input_qdata = to_mx(
+        input.to(torch.float32).contiguous(),
+        elem_dtype=output_dtype,
+        block_size=block_size,
+        scaling_mode=ScaleCalculationMode.RCEIL,
+    )
+    return to_dtype(
+        input_qdata,
+        input_scale,
+        output_dtype,
+        block_size,
+        torch.float32,
+    )
+
+
+@torch.library.impl("tosa_mxfp::linear", "cpu", lib=MXFP_TOSA_LIB)
+def _mxfp_linear_cpu(
+    input: torch.Tensor,
+    weight_qdata: torch.Tensor,
+    weight_scale: torch.Tensor,
+    bias: torch.Tensor | None = None,
+    block_size: int = 32,
+) -> torch.Tensor:
+    """CPU reference implementation of the MXFP linear op."""
+
+    if weight_qdata.ndim != 3 or weight_scale.ndim != 3:
+        raise ValueError("Expected rank-3 weight tensors for MXFP linear")
+
+    # Cast the input to block-scaled format and back again to match the
+    # expected input format of the TOSA
+    dequantized_input = _cast_to_block_scaled_cpu_ref(
+        input,
+        weight_qdata.dtype,
+        block_size,
+    )
+    dequantized_weight = to_dtype(
+        weight_qdata,
+        weight_scale,
+        weight_qdata.dtype,
+        block_size,
+        torch.float32,
+    )
+    dequantized_weight = dequantized_weight.squeeze(0)
+    if bias is not None:
+        bias = bias.to(torch.float32)
+    return F.linear(dequantized_input, dequantized_weight, bias)
+
+
+class MXFPLinearOp(torch.nn.Module):
+    """Linear wrapper that stores MXFP weights and emits a custom op."""
+
+    def __init__(
+        self,
+        weight_qdata: torch.Tensor,
+        weight_scale: torch.Tensor,
+        bias: torch.Tensor | None,
+        config: MXFPOpConfig,
+    ) -> None:
+        super().__init__()
+        self.config = config
+
+        self.register_buffer("weight_qdata", weight_qdata, persistent=True)
+        self.register_buffer("weight_scale", weight_scale, persistent=True)
+
+        self.bias: torch.nn.Parameter | None
+        bias_param = (
+            torch.nn.Parameter(bias.detach(), requires_grad=False)
+            if bias is not None
+            else None
+        )
+        self.register_parameter(
+            "bias",
+            bias_param,
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.ops.tosa_mxfp.linear.default(
+            x,
+            self.weight_qdata,
+            self.weight_scale,
+            self.bias,
+            self.config.block_size,
+        )
+
+
+def transform_linear_to_mxfp(
+    module: torch.nn.Module,
+    config: MXFPOpConfig,
+) -> torch.nn.Module:
+    assert isinstance(module, torch.nn.Linear)
+
+    weight = module.weight.detach().contiguous()
+    if weight.shape[-1] % config.block_size != 0:
+        raise ValueError(
+            f"Linear in_features={weight.shape[-1]} must be divisible by "
+            f"block_size={config.block_size}"
+        )
+
+    weight_scale, weight_qdata = to_mx(
+        weight,
+        elem_dtype=config.weight_dtype,
+        block_size=config.block_size,
+        scaling_mode=config.weight_scaling_mode,
+    )
+
+    # The resulting TOSA op MATMUL_T_BLOCK_SCALED only works with tensors of
+    # rank 3, therefore we prepend a batch dimension of 1 to the weight tensors
+    # here.
+    weight_qdata = weight_qdata.unsqueeze(0)
+    weight_scale = weight_scale.unsqueeze(0)
+
+    bias = module.bias.detach().to(torch.float32) if module.bias is not None else None
+    return MXFPLinearOp(weight_qdata, weight_scale, bias, config)
diff --git a/backends/arm/operators/op_view.py b/backends/arm/operators/op_view.py
index ba98f746476..6d399b65801 100644
--- a/backends/arm/operators/op_view.py
+++ b/backends/arm/operators/op_view.py
@@ -35,24 +35,26 @@ def define_node(
         inputs: List[TosaArg],
         output: TosaArg,
     ) -> None:
-        supported_dtypes = [ts.DType.BOOL]
+        supported_dtypes = {ts.DType.BOOL}
         if self.tosa_spec.support_integer():
-            supported_dtypes.extend([ts.DType.INT8, ts.DType.INT16, ts.DType.INT32])
+            supported_dtypes.update([ts.DType.INT8, ts.DType.INT16, ts.DType.INT32])
         if self.tosa_spec.support_float():
-            supported_dtypes.extend([ts.DType.FP16, ts.DType.FP32])
+            supported_dtypes.update([ts.DType.FP16, ts.DType.FP32])
         if self.tosa_spec.support_extension("bf16"):
-            supported_dtypes.append(ts.DType.BF16)
+            supported_dtypes.add(ts.DType.BF16)
         if self.tosa_spec.support_extension("fp8e4m3"):
-            supported_dtypes.append(ts.DType.FP8E4M3)
+            supported_dtypes.add(ts.DType.FP8E4M3)
         if self.tosa_spec.support_extension("fp8e5m2"):
-            supported_dtypes.append(ts.DType.FP8E5M2)
+            supported_dtypes.add(ts.DType.FP8E5M2)
+        if self.tosa_spec.support_extension("mxfp"):
+            supported_dtypes.update([ts.DType.FP8E4M3, ts.DType.FP8E5M2])
 
         validate_num_inputs(self.target, inputs, 2)
         validate_same_dtype(self.target, [inputs[0], output], ts)
         validate_valid_dtype(
             self.target,
             [inputs[0], output],
-            supported_dtypes,
+            list(supported_dtypes),
             self.tosa_spec,
         )
 
diff --git a/backends/arm/test/misc/test_mxfp_linear_ao.py b/backends/arm/test/misc/test_mxfp_linear_ao.py
new file mode 100644
index 00000000000..0f2b6b9198c
--- /dev/null
+++ b/backends/arm/test/misc/test_mxfp_linear_ao.py
@@ -0,0 +1,46 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm.ao_ext import MXFPOpConfig, to_mxfp
+from executorch.backends.arm.ao_ext.ops import MXFPLinearOp
+
+from torch.export import export
+
+
+class LinearModule(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.linear = torch.nn.Linear(32, 8, bias=True)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.linear(x)
+
+
+def test_mxfp_linear_quantize_swaps_module() -> None:
+    model = LinearModule().eval()
+
+    to_mxfp(model, MXFPOpConfig())
+
+    assert isinstance(model.linear, MXFPLinearOp)
+    assert model.linear.weight_qdata.dtype == torch.float8_e4m3fn
+    assert model.linear.weight_scale.dtype == torch.float8_e8m0fnu
+    assert tuple(model.linear.weight_qdata.shape) == (1, 8, 32)
+    assert tuple(model.linear.weight_scale.shape) == (1, 8, 1)
+
+
+def test_mxfp_linear_export_preserves_custom_op() -> None:
+    model = LinearModule().eval()
+    to_mxfp(model, MXFPOpConfig())
+
+    exported = export(model, (torch.randn(4, 32),), strict=False)
+
+    targets = [
+        node.target
+        for node in exported.graph_module.graph.nodes
+        if node.op == "call_function"
+    ]
+
+    assert torch.ops.tosa_mxfp.linear.default in targets
diff --git a/backends/arm/test/ops/test_mxfp_linear.py b/backends/arm/test/ops/test_mxfp_linear.py
new file mode 100644
index 00000000000..da1bbec3b83
--- /dev/null
+++ b/backends/arm/test/ops/test_mxfp_linear.py
@@ -0,0 +1,226 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+
+import torch
+from executorch.backends.arm.ao_ext import MXFPOpConfig, to_mxfp
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.analyze_output_utils import (
+    compare_rel_frobenius_and_cosine_similarity,
+)
+
+
+def _block_input_rank1() -> torch.Tensor:
+    """Create a rank-1 input with distinct MXFP activation block scales."""
+
+    return torch.cat(
+        (
+            1e-3 * torch.randn(32),
+            100.0 * torch.randn(32),
+        )
+    )
+
+
+def _block_input_rank2() -> torch.Tensor:
+    """Create a rank-2 input with per-row activation block scale changes."""
+
+    return torch.stack(
+        (
+            _block_input_rank1(),
+            torch.cat(
+                (
+                    100.0 * torch.randn(32),
+                    1e-3 * torch.randn(32),
+                )
+            ),
+        )
+    )
+
+
+_test_data_rank1_fp = {
+    "mxfp_linear_rank1_zeros": lambda: (
+        torch.zeros(32 * 8),
+        5,
+        True,
+        False,
+    ),
+    "mxfp_linear_rank1_rand": lambda: (
+        torch.rand(32),
+        16,
+        False,
+        False,
+    ),
+}
+
+_test_data_rank2_fp = {
+    "mxfp_linear_rank2_zeros": lambda: (
+        torch.zeros(4, 32),
+        16,
+        True,
+        False,
+    ),
+    "mxfp_linear_rank2_rand": lambda: (
+        torch.rand(4, 32 * 6),
+        13,
+        True,
+        False,
+    ),
+}
+
+_test_data_rank3_fp = {
+    "mxfp_linear_rank3_zeros": lambda: (
+        torch.zeros(2, 4, 32 * 3),
+        1,
+        True,
+        False,
+    ),
+    "mxfp_linear_rank3_rand": lambda: (
+        torch.rand(2, 4, 32),
+        20,
+        True,
+        False,
+    ),
+}
+
+_test_data_rank4_fp = {
+    "mxfp_linear_rank4_zeros": lambda: (
+        torch.zeros(2, 3, 4, 32 * 24),
+        8,
+        True,
+        False,
+    ),
+    "mxfp_linear_rank4_rand": lambda: (
+        torch.rand(2, 3, 4, 32 * 32),
+        64,
+        False,
+        False,
+    ),
+}
+
+_test_data_block_fp = {
+    "mxfp_linear_rank1_block_weights": lambda: (
+        torch.ones(64),
+        4,
+        False,
+        True,
+    ),
+    "mxfp_linear_rank1_block_weights_block_activations": lambda: (
+        _block_input_rank1(),
+        4,
+        False,
+        True,
+    ),
+    "mxfp_linear_rank2_block_weights_block_activations": lambda: (
+        _block_input_rank2(),
+        4,
+        False,
+        True,
+    ),
+}
+
+test_data_fp = (
+    _test_data_rank1_fp
+    | _test_data_rank2_fp
+    | _test_data_rank3_fp
+    | _test_data_rank4_fp
+    | _test_data_block_fp
+)
+
+
+class Linear(torch.nn.Module):
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int = 8,
+        bias: bool = True,
+    ) -> None:
+        super().__init__()
+        self.fc = torch.nn.Linear(
+            in_features=in_features,
+            out_features=out_features,
+            bias=bias,
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.fc(x)
+
+    def set_block_test_weights(self) -> None:
+        """Set weights to exercise separate MXFP weight block scales.
+
+        The first two logical 32-wide input blocks use different magnitudes so
+        tests can verify block scaling does not share one scale across blocks.
+
+        """
+        if self.fc.weight.shape[1] < 64:
+            raise ValueError(
+                "Block test weights require at least 64 input features (2 blocks), got "
+                f"{tuple(self.fc.weight.shape)}"
+            )
+
+        with torch.no_grad():
+            self.fc.weight.zero_()
+            for row in range(self.fc.weight.shape[0]):
+                # Small values in the first block.
+                self.fc.weight[row, 0:32] = 1e-3
+                # Large values in the next block to require a different scale.
+                self.fc.weight[row, 32:64] = 100.0
+            if self.fc.bias is not None:
+                self.fc.bias.zero_()
+
+
+def _is_linear(module: torch.nn.Module, _fqn: str) -> bool:
+    return isinstance(module, torch.nn.Linear)
+
+
+def _test_mxfp_linear_eager_cpu(
+    test_data: torch.Tensor,
+    config: MXFPOpConfig,
+    frobenius_threshold: float,
+    cosine_threshold: float,
+) -> None:
+    test_input, out_features, has_bias, set_block_weights = test_data()
+    in_features = test_input.shape[-1]
+    ref_model = Linear(
+        in_features=in_features,
+        out_features=out_features,
+        bias=has_bias,
+    ).eval()
+    if set_block_weights:
+        ref_model.set_block_test_weights()
+    test_model = copy.deepcopy(ref_model).eval()
+
+    to_mxfp(test_model, config, filter_fn=_is_linear)
+
+    test_output = test_model(test_input)
+    ref_output = ref_model(test_input)
+
+    compare_rel_frobenius_and_cosine_similarity(
+        ref_output,
+        test_output,
+        quantization_parameters=None,
+        frobenius_threshold=frobenius_threshold,
+        cosine_threshold=cosine_threshold,
+        clean_reference=False,
+    )
+
+
+@common.parametrize("test_data", test_data_fp)
+def test_mxfp_linear_eager_cpu(test_data: torch.Tensor) -> None:
+    """Check eager MXFP implementation.
+
+    The Arm lowering tests compare lowered output against the eager CPU
+    implementation, so the eager implementation must be accurate for it to be
+    used as a reference in other tests.
+
+    """
+    _test_mxfp_linear_eager_cpu(
+        test_data,
+        MXFPOpConfig(),
+        frobenius_threshold=0.06,
+        cosine_threshold=0.995,
+    )
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
index 0a3faa6a074..78b0c6a8533 100644
--- a/backends/arm/test/targets.bzl
+++ b/backends/arm/test/targets.bzl
@@ -25,6 +25,7 @@ def define_arm_tests():
         "ops/test_log10.py",
         "ops/test_max_pool1d.py",
         "ops/test_mul.py",
+        "ops/test_mxfp_linear.py",
         "ops/test_permute.py",
         "ops/test_rsqrt.py",
         "ops/test_slice.py",
@@ -62,6 +63,7 @@ def define_arm_tests():
         "misc/test_bn_relu_folding_qat.py",
         "misc/test_custom_partition.py",
         "misc/test_debug_hook.py",
+        "misc/test_mxfp_linear_ao.py",
         "misc/test_post_quant_device_switch.py",
         # "misc/test_dim_order.py", (TODO - T238390249)
     ]
@@ -104,6 +106,7 @@ def define_arm_tests():
                 "//executorch/backends/arm/test:arm_tester" if runtime.is_oss else "//executorch/backends/arm/test/tester/fb:arm_tester_fb",
                 "//executorch/backends/arm/test:conftest",
                 "//executorch/backends/arm/test/misc:dw_convs_shared_weights_module",
+                "//executorch/backends/arm:ao_ext",
                 "//executorch/backends/arm:ethosu",
                 "//executorch/backends/arm/tosa:compile_spec",
                 "//executorch/backends/arm/tosa:partitioner",
diff --git a/backends/arm/test/tester/analyze_output_utils.py b/backends/arm/test/tester/analyze_output_utils.py
index 6a3bbd4d686..c68811eedad 100644
--- a/backends/arm/test/tester/analyze_output_utils.py
+++ b/backends/arm/test/tester/analyze_output_utils.py
@@ -337,6 +337,24 @@ def dump_error_output(
     logger.error(f"{atol=}, {rtol=}, {qtol=}")
 
 
+def calculate_rel_frobenius_and_cosine_similarity(
+    reference_output: torch.Tensor,
+    test_output: torch.Tensor,
+) -> tuple[float, float]:
+    reference_output = reference_output.to(torch.float32)
+    test_output = test_output.to(torch.float32)
+
+    reference_frobenius_norm = torch.linalg.norm(reference_output).item()
+    error_frobenius_norm = torch.linalg.norm(test_output - reference_output).item()
+
+    relative_frobenius_error = error_frobenius_norm / (reference_frobenius_norm + 1e-8)
+    cosine_similarity = torch.nn.functional.cosine_similarity(
+        test_output.flatten(), reference_output.flatten(), dim=0
+    ).item()
+
+    return relative_frobenius_error, cosine_similarity
+
+
 def compare_rel_frobenius_and_cosine_similarity(
     reference_output: torch.Tensor,
     test_output: torch.Tensor,
@@ -394,15 +412,11 @@ def compare_rel_frobenius_and_cosine_similarity(
     if reference_all_zeros:
         return
 
-    reference_output = reference_output.to(torch.float32)
-    test_output = test_output.to(torch.float32)
-
-    reference_frobenius_norm = torch.linalg.norm(reference_output).item()
-    error_frobenius_norm = torch.linalg.norm(test_output - reference_output).item()
-
-    relative_frobenius_error = error_frobenius_norm / (reference_frobenius_norm + 1e-8)
-    cosine_similarity = torch.nn.functional.cosine_similarity(
-        test_output.flatten(), reference_output.flatten(), dim=0
+    relative_frobenius_error, cosine_similarity = (
+        calculate_rel_frobenius_and_cosine_similarity(reference_output, test_output)
+    )
+    reference_frobenius_norm = torch.linalg.norm(
+        reference_output.to(torch.float32)
     ).item()
 
     # Relative Frobenius is unstable when the reference norm is at quantization-noise scale.

From 0204e36aeecf8a780c601b933d88a02060496ff2 Mon Sep 17 00:00:00 2001
From: roman-janik-nxp <roman.janik@nxp.com>
Date: Mon, 1 Jun 2026 14:18:22 +0200
Subject: [PATCH 092/103] NXP backend: Enable integer inputs model testing
 (#19808)

### Summary
Enables to test Neutron delegate with int data created by quantization
of generated float data and removed input and output quantization nodes.
Turns model to int variant.

### Test plan
Tests provided.


cc @robert-kalmar
---
 backends/nxp/tests/dataset_creator.py         |  68 ++++++++
 backends/nxp/tests/executorch_pipeline.py     |   4 +
 .../test_quantized_input_data.py              | 130 ++++++++++++++
 backends/nxp/tests/nsys_testing.py            | 164 ++++++++++++------
 4 files changed, 317 insertions(+), 49 deletions(-)
 create mode 100644 backends/nxp/tests/generic_tests/test_quantized_input_data.py

diff --git a/backends/nxp/tests/dataset_creator.py b/backends/nxp/tests/dataset_creator.py
index eaf267f4fcf..fdfd363c257 100644
--- a/backends/nxp/tests/dataset_creator.py
+++ b/backends/nxp/tests/dataset_creator.py
@@ -8,6 +8,7 @@
 import shutil
 from collections import OrderedDict
 from copy import deepcopy
+from dataclasses import dataclass
 from os import mkdir
 from random import sample, seed
 
@@ -19,6 +20,7 @@
 )
 from executorch.backends.nxp.tests.calibration_dataset import CalibrationDataset
 from executorch.backends.nxp.tests.executorch_pipeline import ModelInputSpec
+from executorch.exir.scalar_type import ScalarType
 from torch import Tensor
 
 
@@ -33,6 +35,72 @@ def _get_calibration_and_testing_dataset_directory_names(
     return calibration_path, test_path
 
 
+@dataclass
+class InputQuantizationSpec:
+    name: str
+    scale: float
+    zp: int
+    dtype: ScalarType
+
+
+def _replace_input_binary_tensor_with_quantized_variant(
+    input_bin_tensor_path: str,
+    input_spec: ModelInputSpec,
+    q_params: InputQuantizationSpec,
+):
+    tensor = np.fromfile(
+        input_bin_tensor_path, dtype=torch_type_to_numpy_type(input_spec.dtype)
+    )
+    if q_params.dtype == ScalarType.CHAR:
+        tensor = np.add(np.round(np.divide(tensor, [q_params.scale])), [q_params.zp])
+        tensor = np.clip(tensor, -128, 127).astype(np.int8)
+    else:
+        raise ValueError(f"Unknown quantization type: '{q_params.dtype}.")
+    tensor.tofile(input_bin_tensor_path)
+
+
+def create_quantized_variant_of_dataset(
+    dataset_dir: str,
+    dataset_dir_quant: str,
+    input_quant_spec: list[InputQuantizationSpec],
+    input_spec: list[ModelInputSpec],
+):
+    """
+    Create quantized dataset from provided quantization spec. Dataset is cloned from directory 'dataset_dir'.
+
+    :param dataset_dir: Original (float) dataset directory.
+    :param dataset_dir_quant: Quantized dataset directory.
+    :param input_quant_spec: Quantization parameters used for dataset quantization.
+    :param input_spec: Model inputs specification.
+    """
+    assert len(input_quant_spec) > 0
+
+    shutil.copytree(dataset_dir, dataset_dir_quant, dirs_exist_ok=True)
+
+    if len(input_quant_spec) == 1:
+        # Single input dataset - quantize only files in dataset's root dir with first input_quant_spec
+        input_spec = input_spec[0]
+        input_quant_spec = input_quant_spec[0]
+
+        for file in os.listdir(dataset_dir_quant):
+            input_bin_tensor_path = os.path.join(dataset_dir_quant, file)
+            _replace_input_binary_tensor_with_quantized_variant(
+                input_bin_tensor_path, input_spec, input_quant_spec
+            )
+    else:
+        # Iterate over samples (subfolders)
+        for dir_ in os.listdir(dataset_dir_quant):
+            # Iterate over each input in sample
+            sample_dir = os.path.join(dataset_dir_quant, dir_)
+
+            for idx, input_ in enumerate(sorted(os.listdir(sample_dir))):
+                _replace_input_binary_tensor_with_quantized_variant(
+                    os.path.join(sample_dir, input_),
+                    input_spec[idx],
+                    input_quant_spec[idx],
+                )
+
+
 class DatasetCreator(abc.ABC):
 
     @abc.abstractmethod
diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py
index 8f588be621d..e85a5de4d1b 100644
--- a/backends/nxp/tests/executorch_pipeline.py
+++ b/backends/nxp/tests/executorch_pipeline.py
@@ -276,6 +276,8 @@ def to_quantized_executorch_program(
     dataset_dir: str | None = None,
     delegate_to_npu=True,
     use_new_flow_neutron_c: bool = False,
+    operators_not_to_delegate: list[str] = None,
+    remove_quant_io_ops: bool = False,
 ) -> ExecutorchProgramManager:
     if dataset_dir:
         # Extract calibration data from a directory.
@@ -295,6 +297,8 @@ def to_quantized_executorch_program(
         use_neutron_for_format_conversion=use_neutron_for_format_conversion,
         delegate_to_npu=delegate_to_npu,
         use_new_flow_neutron_c=use_new_flow_neutron_c,
+        operators_not_to_delegate=operators_not_to_delegate,
+        remove_quant_io_ops=remove_quant_io_ops,
         **get_calibration_inputs_fn,
     )
 
diff --git a/backends/nxp/tests/generic_tests/test_quantized_input_data.py b/backends/nxp/tests/generic_tests/test_quantized_input_data.py
new file mode 100644
index 00000000000..4d2188816dc
--- /dev/null
+++ b/backends/nxp/tests/generic_tests/test_quantized_input_data.py
@@ -0,0 +1,130 @@
+# Copyright 2026 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import executorch.backends.nxp.tests.nsys_testing as nsys_testing
+import torch
+
+from executorch.backends.nxp.tests.executorch_pipeline import ModelInputSpec
+from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
+from executorch.backends.nxp.tests.models import AvgPool2dModule, MulTensorModule
+from executorch.backends.nxp.tests.nsys_testing import (
+    lower_run_compare,
+    OUTPUTS_DIR,
+    ReferenceModel,
+)
+from executorch.backends.nxp.tests.ops_aliases import AvgPool2D, MulTensor
+
+
+def test__single_quantized_inputs(mocker):
+    input_spec = ModelInputSpec((2, 4, 6, 7))
+    model = AvgPool2dModule(False, 0)
+    graph_verifier = DetailedGraphVerifier(
+        mocker, expected_delegated_ops={AvgPool2D: 1}, expected_non_delegated_ops={}
+    )
+    output_tensor_spec_spy = mocker.spy(nsys_testing, "_get_program_output_spec")
+
+    lower_run_compare(
+        model,
+        [input_spec],
+        graph_verifier,
+        use_new_flow_neutron_c=True,
+        remove_quant_io_ops=True,
+    )
+
+    assert (
+        OUTPUTS_DIR / "test__single_quantized_inputs" / "dataset_quant" / "0000.bin"
+    ).exists()
+
+    # Check outputs are in quantized int8 format
+    output_tensor_spec = output_tensor_spec_spy.spy_return
+    assert output_tensor_spec[0].dtype == torch.int8
+
+
+def test__single_quantized_inputs_edge_python_reference(mocker):
+    input_spec = ModelInputSpec((2, 4, 6, 7))
+    model = AvgPool2dModule(False, 0)
+    graph_verifier = DetailedGraphVerifier(
+        mocker, expected_delegated_ops={AvgPool2D: 1}, expected_non_delegated_ops={}
+    )
+    output_tensor_spec_spy = mocker.spy(nsys_testing, "_get_program_output_spec")
+
+    lower_run_compare(
+        model,
+        [input_spec],
+        graph_verifier,
+        reference_model=ReferenceModel.QUANTIZED_EDGE_PYTHON,
+        use_new_flow_neutron_c=True,
+        remove_quant_io_ops=True,
+    )
+
+    assert (
+        OUTPUTS_DIR
+        / "test__single_quantized_inputs_edge_python_reference"
+        / "dataset_quant"
+        / "0000.bin"
+    ).exists()
+
+    # Check outputs are in quantized int8 format
+    output_tensor_spec = output_tensor_spec_spy.spy_return
+    assert output_tensor_spec[0].dtype == torch.int8
+
+
+def test__multiple_quantized_inputs(mocker):
+    x_input_spec = ModelInputSpec((1, 4, 8, 8))
+    model = MulTensorModule()
+    graph_verifier = DetailedGraphVerifier(
+        mocker, expected_delegated_ops={MulTensor: 1}, expected_non_delegated_ops={}
+    )
+    output_tensor_spec_spy = mocker.spy(nsys_testing, "_get_program_output_spec")
+
+    lower_run_compare(
+        model,
+        [x_input_spec, x_input_spec],
+        graph_verifier,
+        use_new_flow_neutron_c=True,
+        remove_quant_io_ops=True,
+    )
+
+    assert (
+        OUTPUTS_DIR
+        / "test__multiple_quantized_inputs"
+        / "dataset_quant"
+        / "0000"
+        / "00.bin"
+    ).exists()
+
+    # Check outputs are in quantized int8 format
+    output_tensor_spec = output_tensor_spec_spy.spy_return
+    assert output_tensor_spec[0].dtype == torch.int8
+
+
+def test__multiple_quantized_inputs_edge_python_reference(mocker):
+    x_input_spec = ModelInputSpec((1, 4, 8, 8))
+    model = MulTensorModule()
+    graph_verifier = DetailedGraphVerifier(
+        mocker, expected_delegated_ops={MulTensor: 1}, expected_non_delegated_ops={}
+    )
+    output_tensor_spec_spy = mocker.spy(nsys_testing, "_get_program_output_spec")
+
+    lower_run_compare(
+        model,
+        [x_input_spec, x_input_spec],
+        graph_verifier,
+        reference_model=ReferenceModel.QUANTIZED_EDGE_PYTHON,
+        use_new_flow_neutron_c=True,
+        remove_quant_io_ops=True,
+    )
+
+    assert (
+        OUTPUTS_DIR
+        / "test__multiple_quantized_inputs_edge_python_reference"
+        / "dataset_quant"
+        / "0000"
+        / "00.bin"
+    ).exists()
+
+    # Check outputs are in quantized int8 format
+    output_tensor_spec = output_tensor_spec_spy.spy_return
+    assert output_tensor_spec[0].dtype == torch.int8
diff --git a/backends/nxp/tests/nsys_testing.py b/backends/nxp/tests/nsys_testing.py
index 636e1a28a44..ab5a583ede0 100644
--- a/backends/nxp/tests/nsys_testing.py
+++ b/backends/nxp/tests/nsys_testing.py
@@ -23,7 +23,11 @@
 )
 from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner
 from executorch.backends.nxp.tests.config_importer import test_config
-from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator
+from executorch.backends.nxp.tests.dataset_creator import (
+    create_quantized_variant_of_dataset,
+    InputQuantizationSpec,
+    RandomDatasetCreator,
+)
 from executorch.backends.nxp.tests.executorch_pipeline import (
     get_calibration_inputs_fn_from_dataset_dir,
     ModelInputSpec,
@@ -61,20 +65,7 @@ class ReferenceModel(Enum):
     FLOAT_PYTORCH_PYTHON = 4
 
 
-def _run_delegated_executorch_program(
-    model,
-    test_dir,
-    test_name,
-    calibration_dataset_dir,
-    testing_dataset_dir,
-    input_spec,
-    dlg_model_verifier,
-    npu_results_dir,
-    mocker,
-    use_qat: bool = False,
-    train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
-    use_new_flow_neutron_c: bool = False,
-) -> ExportedProgram:
+def _get_dataset_cli_args(input_spec: list[ModelInputSpec], testing_dataset_dir):
     if len(input_spec) == 1:
         # Single input, use --dataset
         dataset_cli = "--dataset"
@@ -90,14 +81,25 @@ def _run_delegated_executorch_program(
                 ]
             )
         )
+    return dataset_cli, dataset_or_inputs
 
-    # Run nxp_executor_runner with program delegated to NPU
-    delegated_model_path = os.path.abspath(
-        os.path.join(test_dir, f"{test_name}_delegated.pte")
-    )
 
-    delegated_cmd = f"{NEUTRON_TEST_PATH} --model {delegated_model_path} {dataset_cli} {dataset_or_inputs} \
-        --output {npu_results_dir} --firmware {NSYS_FIRMWARE_PATH} --nsys {NSYS_PATH} --nsys_config {NSYS_CONFIG_PATH}"
+def _run_delegated_executorch_program(
+    model,
+    test_dir,
+    test_name,
+    calibration_dataset_dir,
+    testing_dataset_dir,
+    input_spec,
+    dlg_model_verifier,
+    npu_results_dir,
+    mocker,
+    use_qat: bool = False,
+    train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
+    use_new_flow_neutron_c: bool = False,
+    operators_not_to_delegate: list[str] = None,
+    remove_quant_io_ops: bool = False,
+) -> tuple[ExportedProgram, str]:
     try:
         if mocker:
             method = getattr(NeutronPartitioner, "partition")  # noqa B009
@@ -123,6 +125,8 @@ def wrapper(*args, **kwargs):
             use_qat=use_qat,
             train_fn=train_fn,
             use_new_flow_neutron_c=use_new_flow_neutron_c,
+            operators_not_to_delegate=operators_not_to_delegate,
+            remove_quant_io_ops=remove_quant_io_ops,
         )
     except RuntimeError as e:
         if "Model converted with neutron-converter has" in str(e) and hasattr(
@@ -139,9 +143,30 @@ def wrapper(*args, **kwargs):
     dlg_model_verifier.verify_graph(exported_program.graph)
 
     save_pte_program(delegated_program, test_name + "_delegated", test_dir)
+
+    # Preparation of quantized dataset, requires quantization parameters from converted delegated model
+    if remove_quant_io_ops:
+        dataset_dir_quant = os.path.join(test_dir, "dataset_quant")
+        input_quant_spec = _parse_input_quant_params(input_spec, delegated_program)
+        create_quantized_variant_of_dataset(
+            testing_dataset_dir, dataset_dir_quant, input_quant_spec, input_spec
+        )
+        testing_dataset_dir = dataset_dir_quant
+
+    dataset_cli, dataset_or_inputs = _get_dataset_cli_args(
+        input_spec, testing_dataset_dir
+    )
+
+    # Run nxp_executor_runner with program delegated to NPU
+    delegated_model_path = os.path.abspath(
+        os.path.join(test_dir, f"{test_name}_delegated.pte")
+    )
+
+    delegated_cmd = f"{NEUTRON_TEST_PATH} --model {delegated_model_path} {dataset_cli} {dataset_or_inputs} \
+        --output {npu_results_dir} --firmware {NSYS_FIRMWARE_PATH} --nsys {NSYS_PATH} --nsys_config {NSYS_CONFIG_PATH}"
     execute_cmd(delegated_cmd)
 
-    return exported_program
+    return exported_program, testing_dataset_dir
 
 
 def _run_non_delegated_executorch_program(
@@ -154,31 +179,12 @@ def _run_non_delegated_executorch_program(
     cpu_results_dir,
     use_qat: bool = False,
     train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
+    remove_quant_io_ops: bool = False,
 ) -> ExportedProgram:
-    if len(input_spec) == 1:
-        # Single input, use --dataset
-        dataset_cli = "--dataset"
-        dataset_or_inputs = testing_dataset_dir
-    else:
-        # Multiple input, use --inputs with subdirectories
-        dataset_cli = "--inputs"
-        dataset_or_inputs = ",".join(
-            sorted(
-                [
-                    os.path.join(testing_dataset_dir, d)
-                    for d in os.listdir(testing_dataset_dir)
-                ]
-            )
-        )
-
-    # Run program via nxp_executor_runner on CPU
-    non_delegated_model_path = os.path.abspath(
-        os.path.join(test_dir, f"{test_name}_non_delegated.pte")
+    dataset_cli, dataset_or_inputs = _get_dataset_cli_args(
+        input_spec, testing_dataset_dir
     )
 
-    non_delegated_cmd = f"{NEUTRON_TEST_PATH} --model {non_delegated_model_path} {dataset_cli} {dataset_or_inputs} \
-        --output {cpu_results_dir} --firmware {NSYS_FIRMWARE_PATH} --nsys {NSYS_PATH} --nsys_config {NSYS_CONFIG_PATH}"
-
     non_delegated_program = to_quantized_executorch_program(
         model,
         input_spec,
@@ -186,6 +192,7 @@ def _run_non_delegated_executorch_program(
         delegate_to_npu=False,
         use_qat=use_qat,
         train_fn=train_fn,
+        remove_quant_io_ops=remove_quant_io_ops,
     )
 
     nodes = list(non_delegated_program.exported_program().graph.nodes)
@@ -194,6 +201,14 @@ def _run_non_delegated_executorch_program(
     ), "Delegated parts found in program executed on CPU!"
 
     save_pte_program(non_delegated_program, test_name + "_non_delegated", test_dir)
+
+    # Run program via nxp_executor_runner on CPU
+    non_delegated_model_path = os.path.abspath(
+        os.path.join(test_dir, f"{test_name}_non_delegated.pte")
+    )
+
+    non_delegated_cmd = f"{NEUTRON_TEST_PATH} --model {non_delegated_model_path} {dataset_cli} {dataset_or_inputs} \
+        --output {cpu_results_dir} --firmware {NSYS_FIRMWARE_PATH} --nsys {NSYS_PATH} --nsys_config {NSYS_CONFIG_PATH}"
     execute_cmd(non_delegated_cmd)
 
     return non_delegated_program.exported_program()
@@ -229,9 +244,9 @@ def read_prepared_samples(
                 bin_file_path = os.path.join(
                     sample_dir, f"{str(spec_idx).zfill(2)}.bin"
                 )
-                sample_vector = np.fromfile(bin_file_path, dtype=spec.type).reshape(
-                    spec.shape
-                )
+                sample_vector = np.fromfile(
+                    bin_file_path, dtype=torch_type_to_numpy_type(spec.dtype)
+                ).reshape(spec.shape)
                 current_samples.append(sample_vector)
 
             all_samples.append(tuple(current_samples))
@@ -385,6 +400,8 @@ def lower_run_compare(
     use_qat: bool = False,
     train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
     use_new_flow_neutron_c: bool = False,
+    operators_not_to_delegate: list[str] = None,
+    remove_quant_io_ops: bool = False,
 ):
     """
     Run provided program twice with neutron-test and check if results correspond. At first,
@@ -402,6 +419,10 @@ def lower_run_compare(
     :param use_qat: If True, applies quantization-aware training before conversion (without the QAT training).
     :param train_fn: Train/finetune function for QAT training. Is used only when `use_qat=True`.
     :param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improved INT8 operator support.
+    :param operators_not_to_delegate: list of operators not to delegate.
+    :param remove_quant_io_ops: If true, IO q-ops are removed and verification is done on quantized
+        version of dataset (quantized INT8 input samples).
+
     """
     assert_NSYS()
 
@@ -430,7 +451,7 @@ def lower_run_compare(
     cpu_results_dir = os.path.join(test_dir, "results_cpu")
     npu_results_dir = os.path.join(test_dir, "results_npu")
 
-    delegated_program = _run_delegated_executorch_program(
+    delegated_program, testing_dataset_dir = _run_delegated_executorch_program(
         model_to_delegate,
         test_dir,
         test_name,
@@ -443,6 +464,8 @@ def lower_run_compare(
         use_qat=use_qat,
         train_fn=train_fn,
         use_new_flow_neutron_c=use_new_flow_neutron_c,
+        operators_not_to_delegate=operators_not_to_delegate,
+        remove_quant_io_ops=remove_quant_io_ops,
     )
 
     output_spec = _get_program_output_spec(delegated_program)
@@ -461,6 +484,7 @@ def lower_run_compare(
                 cpu_results_dir,
                 use_qat=use_qat,
                 train_fn=train_fn,
+                remove_quant_io_ops=remove_quant_io_ops,
             )
 
         case ReferenceModel.QUANTIZED_EDGE_PYTHON:
@@ -475,10 +499,19 @@ def lower_run_compare(
                     delegate_to_npu=False,
                     use_qat=use_qat,
                     train_fn=train_fn,
+                    remove_quant_io_ops=remove_quant_io_ops,
                 )
                 .exported_program()
                 .module()
             )
+            # Switch input spec dtype to quantized int8 if run with remove_quant_io_ops flag
+            # The input spec has to still have float32 dtype during edge program lowering to correctly calibrate the
+            # model. When running in Python, the testing data are loaded from numpy tensors according to input spec.
+            # There the testing data are in quantized int8 dtype.
+            if remove_quant_io_ops:
+                for spec in input_spec:
+                    spec.dtype = torch.int8
+
             _run_python_program(
                 non_delegated_edge_program,
                 testing_dataset_dir,
@@ -489,6 +522,12 @@ def lower_run_compare(
             )
 
         case ReferenceModel.FLOAT_PYTORCH_PYTHON:
+            if remove_quant_io_ops:
+                raise ValueError(
+                    "Flag remove_quant_io_ops is not applicable to FLOAT_PYTORCH_PYTHON reference model"
+                    "as it works with float data only. Run with remove_quant_io_ops=False."
+                )
+
             # Run the PyTorch nn.Module directly in Python.
             _run_python_program(
                 model_to_not_delegate,
@@ -561,7 +600,7 @@ def lower_run_compare_ptq_qat(
     ptq_results_dir = os.path.join(test_dir, "results_ptq")
     qat_results_dir = os.path.join(test_dir, "results_qat")
 
-    delegated_program_ptq = _run_delegated_executorch_program(
+    delegated_program_ptq, _ = _run_delegated_executorch_program(
         model_ptq,
         test_dir,
         test_name,
@@ -597,12 +636,39 @@ def lower_run_compare_ptq_qat(
     )
 
 
+def _parse_input_quant_params(
+    input_spec: tuple[ModelInputSpec, ...], exported_program_manager
+) -> list[InputQuantizationSpec]:
+    """
+    Parse input quantization params from provided exported program manager.
+
+    :param input_spec: Model inputs specification.
+    :param exported_program_manager: Exported program manager of parsed model.
+    :return: List of input quantization specification.
+    """
+    if (config_methods := exported_program_manager._config_methods) is None:
+        raise ValueError("Attempt to parse q-params for not fully quantized model")
+
+    q_params = []
+
+    for idx in range(len(input_spec)):
+        input_name = f"input{idx}"
+        scale = config_methods[f"{input_name}_scale"]
+        zp = config_methods[f"{input_name}_zp"]
+        dtype = config_methods[f"{input_name}_dtype"]
+
+        q_params.append(InputQuantizationSpec(input_name, scale, zp, dtype))
+
+    return q_params
+
+
 def _get_caller_name():
     test_function_names = ["lower_run_compare", "lower_run_compare_ptq_qat"]
     for idx, frame in enumerate(inspect.stack()):
         if frame.function in test_function_names:
             # Look one index above to get caller
             return inspect.stack()[idx + 1].function
+    return None
 
 
 def execute_cmd(cmd, cwd="."):

From a072513a967ef4a373a63d1b1c2e8e96b86e0673 Mon Sep 17 00:00:00 2001
From: Vaclav Novak <vaclav.novak@nxp.com>
Date: Mon, 1 Jun 2026 14:50:25 +0200
Subject: [PATCH 093/103] NXP backend: added support for `slice` using new
 Neutron flow (#19803)

### Summary

Added support for `aten.slice` using new Neutron flow.

### Test plan

tests can be manually run using `pytest -c /dev/null
backends/nxp/tests/`

cc @robert-kalmar @JakeStevens @digantdesai @rascani @MartinPavella
@roman-janik-nxp @jirioc @irtrukhina @StrycekSimon
---
 .../ops_converters/slice_tensor_converter.py  |  31 ++
 .../test_slice_tensor_converter.py            | 370 +++++++++++++++++-
 2 files changed, 394 insertions(+), 7 deletions(-)

diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/slice_tensor_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/slice_tensor_converter.py
index f2002cc311c..f5df822b6ad 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/slice_tensor_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/slice_tensor_converter.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import numpy as np
+import torch
 from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT
 from executorch.backends.nxp.backend.edge_helper import input_tensor
 from executorch.backends.nxp.backend.ir.converter.conversion import translator
@@ -31,6 +32,15 @@ def _is_supported_on_target(
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
+        if custom_delegation_options.use_new_flow_neutron_c:
+            supported_types = [torch.int8, torch.uint8]
+            if not NodeConverter.uses_quantization_type_for_io(
+                node, supported_types, [0], [0]
+            ):
+                return False
+
+            return True
+
         input_shape = input_tensor(node, 0).shape
         dim = node.args[1]
         if node.args[0].meta[NXP_NODE_FORMAT].is_channels_first():
@@ -94,6 +104,23 @@ def _convert_to_slice(self, t_op, main_input, input_rank, dim, start, end) -> No
         size[dim] = max(end - start, 0)
         begin[dim] = start
 
+        # In the new Neutron flow, slicing can be done along any dim, so
+        # no additional `transpose` ops have to be added.
+        if self.context.custom_delegation_options.use_new_flow_neutron_c:
+            begin_tensor = self.builder.create_tensor_for_data(
+                np.asarray(begin, np.int32), "begin"
+            )
+            size_tensor = self.builder.create_tensor_for_data(
+                np.asarray(size, np.int32), "size"
+            )
+
+            t_op.tmp_inputs = [main_input, begin_tensor, size_tensor]
+            t_op.builtin_options = slice_options.Slice()
+            ops = OpsList(middle_op=t_op)
+
+            self.builder.append_operators(ops.flatten())
+            return None
+
         # We can slice only the channels dimension
         # So we swap the sliced dimension with the channels dimension
         begin[-1], begin[dim] = begin[dim], begin[-1]
@@ -131,6 +158,10 @@ def _get_clipped_slice_args(node: Node) -> tuple[Dim, Start, End]:
         _, dim, start, end = node.args
         sliced_tensor_rank = input_shape[dim]
 
+        # convert numbering `from the end` to `from the beginning`, ie. normalize
+        end = end + sliced_tensor_rank if end < 0 else end
+        start = start + sliced_tensor_rank if start < 0 else start
+
         end = int(np.clip(end, 0, sliced_tensor_rank))
         start = int(np.clip(start, 0, sliced_tensor_rank))
 
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_slice_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_slice_tensor_converter.py
index 78886558ba2..39fa900ca55 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_slice_tensor_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_slice_tensor_converter.py
@@ -8,6 +8,7 @@
 from executorch.backends.nxp.backend.edge_program_converter import (
     EdgeProgramToIRConverter,
 )
+from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator
 from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
 from executorch.backends.nxp.tests.executors import (
     convert_run_compare,
@@ -15,12 +16,22 @@
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
+from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
+from executorch.backends.nxp.tests.model_output_comparator import (
+    AllCloseOutputComparator,
+)
 
 from executorch.backends.nxp.tests.models import (
     SliceTensorConvModule,
     SliceTensorModule,
 )
-from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+from executorch.backends.nxp.tests.ops_aliases import (
+    Convolution,
+    ExecutorchDelegateCall,
+    Slice,
+    SliceCopy,
+)
 from torch.export import ExportedProgram
 
 
@@ -30,11 +41,6 @@ def reseed_model_per_test_run():
     np.random.seed(23)
 
 
-ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate
-Slice = exir_ops.edge.aten.slice.Tensor
-SliceCopy = exir_ops.edge.aten.slice_copy.Tensor
-
-
 passing_cases = [
     pytest.param((24, 32), (0, 1), (0, 16), (24, 32), id="2D, no transpose"),
     pytest.param(
@@ -238,7 +244,7 @@ def test_slice_tensor_w_conv_quant_conversion(
             (24, 32), (0, 1), (0, 32), (24, 32), id="2D, start is equal to size"
         ),
         pytest.param(
-            (24, 32), (0, 1), (0, 0), (24, -5), id="2D, clipped end equal to zero"
+            (24, 32), (0, 1), (0, 0), (24, -35), id="2D, clipped end equal to zero"
         ),
         pytest.param(
             (24, 32), (0, 1), (64, 0), (24, 32), id="2D, clipped start equal to size"
@@ -298,3 +304,353 @@ def test_slice_not_delegated(mocker, x_input_shape, dims, starts, ends):
     for i in range(0, num_slice_ops):
         slice_idx = (i + 1) * 3
         assert nodes[slice_idx].target in [Slice, SliceCopy]
+
+
+class TestSliceTensorConverterNewNeutronFlow:
+    @staticmethod
+    def _slice_id(prefix, input_shape, dims, starts, ends):
+        return f"{prefix}rank={len(input_shape)}_dims={str(dims)}_starts={str(starts)}_ends={str(ends)}"
+
+    @staticmethod
+    def assert_delegated_and_correct(model, input_shape, num_slices, mocker, use_qat):
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops={SliceCopy: num_slices},
+            expected_non_delegated_ops={},
+        )
+        dataset = RandomDatasetCreator(low=-255.0, high=255.0)
+        comparator = AllCloseOutputComparator()
+
+        lower_run_compare(
+            model,
+            input_shape,
+            graph_verifier,
+            dataset,
+            comparator,
+            use_new_flow_neutron_c=True,
+            use_qat=use_qat,
+        )
+
+    @staticmethod
+    def assert_model_without_slices(model, input_shape):
+        delegated_ep = to_quantized_edge_program(
+            model, input_shape, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Check there are no slices and nothing is delegated
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
+        assert not graph_contains_any_of_ops(delegated_ep.graph, [Slice, SliceCopy])
+
+    @staticmethod
+    def assert_not_delegated(model, input_shape):
+        delegated_ep = to_quantized_edge_program(
+            model, input_shape, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Make sure the `slice` was NOT delegated.
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
+        assert graph_contains_any_of_ops(delegated_ep.graph, [Slice, SliceCopy])
+
+    @pytest.mark.parametrize(
+        "input_shape, dims, starts, ends",
+        [
+            pytest.param(
+                ins := (5, 2, 3, 4),
+                d := (0,),
+                s := (1,),
+                e := (4,),
+                id=_slice_id("basic, left and right trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (5, 5, 3, 4),
+                d := (0, 1),
+                s := (1, 1),
+                e := (4, 3),
+                id=_slice_id("basic, left and right trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (7, 13, 5, 15),
+                d := (0, 1, 2, 3),
+                s := (4, 3, 1, 8),
+                e := (5, 10, 4, 11),
+                id=_slice_id("basic, left and right trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (5, 13, 5, 13),
+                d := (0, 1, 2, 3),
+                s := (0, 0, 0, 0),
+                e := (4, 11, 4, 11),
+                id=_slice_id("basic, right trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (7, 13, 3, 15),
+                d := (0, 1, 2, 3),
+                s := (2, 5, 1, 4),
+                e := ins,
+                id=_slice_id("basic, left trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (7, 4, 7),
+                d := (0, 1, 2),
+                s := (1, 1, 3),
+                e := (6, 3, 5),
+                id=_slice_id("basic, left and right trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (4, 5, 9),
+                d := (0, 1, 2),
+                s := (0, 0, 0),
+                e := (3, 4, 7),
+                id=_slice_id("basic, right trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (4, 7, 9),
+                d := (0, 1, 2),
+                s := (3, 2, 2),
+                e := ins,
+                id=_slice_id("basic, left trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (4, 5),
+                d := (0, 1),
+                s := (1, 1),
+                e := (2, 4),
+                id=_slice_id("basic, left and right trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (4, 5),
+                d := (0, 1),
+                s := (0, 0),
+                e := (2, 4),
+                id=_slice_id("basic, right trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (4, 5),
+                d := (0, 1),
+                s := (1, 2),
+                e := ins,
+                id=_slice_id("basic, left trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (5,),
+                d := (0,),
+                s := (1,),
+                e := (4,),
+                id=_slice_id("basic, left and right trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (5,),
+                d := (0,),
+                s := (0,),
+                e := (4,),
+                id=_slice_id("basic, right trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (5,),
+                d := (0,),
+                s := (1,),
+                e := ins,
+                id=_slice_id("basic, left trimmed:", ins, d, s, e),
+            ),
+        ],
+    )
+    def test_nsys_inference__basic(self, input_shape, dims, starts, ends, mocker):
+        model = SliceTensorModule(dims, starts, ends)
+
+        num_slices = len(dims)
+        self.assert_delegated_and_correct(
+            model, input_shape, num_slices, mocker, use_qat=False
+        )
+
+    @pytest.mark.parametrize(
+        "input_shape, dims, starts, ends",
+        [
+            pytest.param(
+                ins := (4, 2, 7, 4),
+                d := (2,),
+                s := (5,),
+                e := (6,),
+                id=_slice_id("edge case, dimension reduced to 1:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (11, 2, 7, 5),
+                d := (2,),
+                s := (6,),
+                e := (6,),
+                id=_slice_id("edge case, dimension reduced to 0:", ins, d, s, e),
+            ),
+        ],
+    )
+    def test_nsys_inference__reduction(self, input_shape, dims, starts, ends, mocker):
+        model = SliceTensorModule(dims, starts, ends)
+
+        slice_lengths = [e - s for s, e in zip(starts, ends)]
+        if all(sl == 0 for sl in slice_lengths):
+            # reductions to 0 are disabled in the backend
+            self.assert_not_delegated(model, input_shape)
+        else:
+            num_slices = len(dims)
+            self.assert_delegated_and_correct(
+                model, input_shape, num_slices, mocker, use_qat=False
+            )
+
+    @pytest.mark.parametrize(
+        "input_shape, dims, starts, ends",
+        [
+            pytest.param(
+                ins := (5, 2, 3, 4),
+                d := (0,),
+                s := (-12,),
+                e := (2,),
+                id=_slice_id("edge case, `start` clipped:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (5, 7, 5, 7),
+                d := (0,),
+                s := (1,),
+                e := (12,),
+                id=_slice_id("edge case, `end` clipped:", ins, d, s, e),
+            ),
+        ],
+    )
+    def test_nsys_inference__clipped(self, input_shape, dims, starts, ends, mocker):
+        model = SliceTensorModule(dims, starts, ends)
+
+        num_slices = len(dims)
+        self.assert_delegated_and_correct(
+            model, input_shape, num_slices, mocker, use_qat=False
+        )
+
+    @pytest.mark.parametrize(
+        "input_shape, dims, starts, ends",
+        [
+            pytest.param(
+                ins := (5, 11, 13, 3),
+                d := (1,),
+                s := (-5,),
+                e := (10,),
+                id=_slice_id("edge case, `start` normalized:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (7, 15, 5, 7),
+                d := (1,),
+                s := (2,),
+                e := (-2,),
+                id=_slice_id("edge case, `end` normalized:", ins, d, s, e),
+            ),
+        ],
+    )
+    def test_nsys_inference__normalization(
+        self, input_shape, dims, starts, ends, mocker
+    ):
+        model = SliceTensorModule(dims, starts, ends)
+
+        num_slices = len(dims)
+        self.assert_delegated_and_correct(
+            model, input_shape, num_slices, mocker, use_qat=False
+        )
+
+    @pytest.mark.parametrize(
+        "input_shape, dims, starts, ends",
+        [
+            pytest.param(
+                ins := (5000, 3, 5, 3),
+                d := (0,),
+                s := (1250,),
+                e := (2500,),
+                id=_slice_id("big args, left and right trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (2, 5000, 5, 3),
+                d := (1,),
+                s := (0,),
+                e := (4999,),
+                id=_slice_id("big args, right trimmed:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (2, 3, 5000, 3),
+                d := (2,),
+                s := (1,),
+                e := (5000,),
+                id=_slice_id("big args, left trimmed:", ins, d, s, e),
+            ),
+        ],
+    )
+    def test_nsys_inference__big(self, input_shape, dims, starts, ends, mocker):
+        model = SliceTensorModule(dims, starts, ends)
+
+        num_slices = len(dims)
+        self.assert_delegated_and_correct(
+            model, input_shape, num_slices, mocker, use_qat=False
+        )
+
+    @pytest.mark.parametrize(
+        "input_shape, dims, starts, ends",
+        [
+            pytest.param(
+                ins := (5, 2, 3, 4),
+                d := (2,),
+                s := (0,),
+                e := (3,),
+                id=_slice_id("edge case, one dimension identity:", ins, d, s, e),
+            ),
+            pytest.param(
+                ins := (5, 2, 3, 4),
+                d := (0, 1, 2, 3),
+                s := (0, 0, 0, 0),
+                e := ins,
+                id=_slice_id("edge case, all dimensions identity:", ins, d, s, e),
+            ),
+        ],
+    )
+    def test_nsys_inference__identity(self, input_shape, dims, starts, ends):
+        model = SliceTensorModule(dims, starts, ends)
+
+        self.assert_model_without_slices(model, input_shape)
+
+    def test_nsys_inference__with_conv(self, mocker):
+        input_shape = (11, 13, 5, 7)
+        in_channels = input_shape[1]
+        out_channels = 19
+
+        # we test functionality on `channels` dim
+        dims = (1,)
+        starts = (2,)
+        ends = (out_channels - 2,)
+        model = SliceTensorConvModule(dims, starts, ends, in_channels, out_channels)
+
+        num_slices = len(dims)
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops={SliceCopy: num_slices},
+            expected_non_delegated_ops={Convolution: 1},
+        )
+        dataset = RandomDatasetCreator(low=-255.0, high=255.0)
+        comparator = AllCloseOutputComparator()
+
+        lower_run_compare(
+            model,
+            input_shape,
+            graph_verifier,
+            dataset,
+            comparator,
+            use_new_flow_neutron_c=True,
+            use_qat=False,
+        )
+
+    def test_nsys_inference__qat(self, mocker):
+        input_shape = (7, 13, 7, 9)
+        dims = (0, 1, 2, 3)
+        starts = (1, 2, 3, 2)
+        ends = (6, 10, 5, 8)
+
+        model = SliceTensorModule(dims, starts, ends)
+
+        num_slices = len(dims)
+        self.assert_delegated_and_correct(
+            model, input_shape, num_slices, mocker, use_qat=True
+        )

From 10431b98a14876e018812c70d59eea6403101ba0 Mon Sep 17 00:00:00 2001
From: RJ Ascani <rja@meta.com>
Date: Mon, 1 Jun 2026 08:24:01 -0700
Subject: [PATCH 094/103] Suppress cppcheck unusedFunction false positives in
 headers (#19890)

### Summary
cppcheck's unusedFunction is a whole-program check, but lintrunner
analyzes files individually. Functions defined in headers are used by
the .cpp files that include them, but cppcheck only sees the header in
isolation and falsely reports them as never used. Suppress the check for
.h/.hpp files while keeping it active for .cpp.

Authored with assistance from Claude.
---
 .lintrunner.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.lintrunner.toml b/.lintrunner.toml
index 02380ce1356..75608704110 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -134,6 +134,8 @@ command = [
     '--extra-arg=--inconclusive',
     '--extra-arg=--suppress=unusedStructMember',
     '--extra-arg=--suppress=toomanyconfigs',
+    '--extra-arg=--suppress=unusedFunction:*.h',
+    '--extra-arg=--suppress=unusedFunction:*.hpp',
     '--',
     '@{{PATHSFILE}}'
 ]

From 4469d84647266db3f7c6b76068d56f26020eb435 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Mon, 1 Jun 2026 17:25:52 +0200
Subject: [PATCH 095/103] Add executorch-ubuntu-26.04-gcc15 docker image
 (#19799)

### Summary

Add a docker build image based on Ubuntu 26.04 with gcc 15. It's
necessary for the the baremetal on RISC-V use case since
`libstdc++-riscv64-unknown-elf-picolibc` is only available starting
Ubuntu 26.04. It also makes sure that `gcc-riscv64-unknown-elf` is at
least gcc 14+ which has support for RVV

### Test plan

It will be used by the baremetal testing on RISC-V.

Relates to https://github.com/pytorch/executorch/issues/18991
https://github.com/pytorch/executorch/issues/19666
---
 .ci/docker/build.sh                    | 5 +++++
 .ci/docker/common/install_docs_reqs.sh | 4 ++--
 .github/workflows/docker-builds.yml    | 1 +
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh
index 123680e5275..673b5b4fd4b 100755
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@@ -89,6 +89,11 @@ case "${IMAGE_NAME}" in
     OS_VERSION=24.04
     GCC_VERSION=14
     ;;
+  executorch-ubuntu-26.04-gcc15)
+    LINTRUNNER=""
+    OS_VERSION=26.04
+    GCC_VERSION=15
+    ;;
   *)
     echo "Invalid image name ${IMAGE_NAME}"
     exit 1
diff --git a/.ci/docker/common/install_docs_reqs.sh b/.ci/docker/common/install_docs_reqs.sh
index 3b6d10c5c2b..ea54d90523e 100755
--- a/.ci/docker/common/install_docs_reqs.sh
+++ b/.ci/docker/common/install_docs_reqs.sh
@@ -15,8 +15,8 @@ if [ -n "$BUILD_DOCS" ]; then
   curl --retry 3 --retry-all-errors -sL https://deb.nodesource.com/setup_16.x | sudo -E bash -
   sudo apt-get install -y nodejs
 
-  curl --retry 3 --retry-all-errors -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
-  echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
+  curl --retry 3 --retry-all-errors -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo gpg --dearmor -o /usr/share/keyrings/yarn-archive-keyring.gpg
+  echo "deb [signed-by=/usr/share/keyrings/yarn-archive-keyring.gpg] https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
 
   apt-get update
   apt-get install -y --no-install-recommends yarn
diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml
index b77e5497f79..d11b2e9e6d9 100644
--- a/.github/workflows/docker-builds.yml
+++ b/.github/workflows/docker-builds.yml
@@ -43,6 +43,7 @@ jobs:
           executorch-ubuntu-22.04-mediatek-sdk,
           executorch-ubuntu-22.04-clang12-android,
           executorch-ubuntu-24.04-gcc14,
+          executorch-ubuntu-26.04-gcc15,
         ]
         include:
           - docker-image-name: executorch-ubuntu-22.04-gcc11-aarch64

From 00d01735f729489166236c28cf316b1f14e5183d Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Sat, 23 May 2026 15:17:26 +0200
Subject: [PATCH 096/103] Add baremetal RISC-V smoke tests (rv32, rv64)

Cross-compiles with riscv64-unknown-elf + picolibc, embeds the .bpte into
the ELF, and runs under qemu-system-riscv{32,64} -machine virt with
semihosting carrying stdout and exit status. Same bundled-IO PASS criterion
as the existing linux runs.
---
 .ci/scripts/setup-linux.sh                    |   2 +-
 .ci/scripts/test_riscv_qemu.sh                |  50 ++-
 .github/workflows/_test_riscv.yml             |  57 ++--
 .github/workflows/riscv64.yml                 |  42 ++-
 CMakePresets.json                             |  20 +-
 examples/riscv/README.md                      |  51 ++--
 examples/riscv/aot_riscv.py                   |  40 ++-
 examples/riscv/baremetal/CMakeLists.txt       | 117 +++++++
 .../baremetal/executor_runner_baremetal.cpp   | 286 ++++++++++++++++++
 examples/riscv/baremetal/riscv_virt.ld        |  85 ++++++
 examples/riscv/baremetal/semihosting.h        |  51 ++++
 examples/riscv/baremetal/start.S              |  49 +++
 .../riscv/riscv32-unknown-elf-toolchain.cmake |  74 +++++
 .../riscv/riscv64-unknown-elf-toolchain.cmake |  77 +++++
 examples/riscv/run.sh                         | 246 +++++++++++----
 examples/riscv/setup-baremetal.sh             |  49 +++
 examples/riscv/{setup.sh => setup-linux.sh}   |  11 +-
 examples/riscv/test-matrix.sh                 | 250 +++++++++++++++
 tools/cmake/preset/riscv_baremetal.cmake      |  50 +++
 ...{riscv64_linux.cmake => riscv_linux.cmake} |   0
 20 files changed, 1446 insertions(+), 161 deletions(-)
 create mode 100644 examples/riscv/baremetal/CMakeLists.txt
 create mode 100644 examples/riscv/baremetal/executor_runner_baremetal.cpp
 create mode 100644 examples/riscv/baremetal/riscv_virt.ld
 create mode 100644 examples/riscv/baremetal/semihosting.h
 create mode 100644 examples/riscv/baremetal/start.S
 create mode 100644 examples/riscv/riscv32-unknown-elf-toolchain.cmake
 create mode 100644 examples/riscv/riscv64-unknown-elf-toolchain.cmake
 create mode 100755 examples/riscv/setup-baremetal.sh
 rename examples/riscv/{setup.sh => setup-linux.sh} (90%)
 create mode 100644 examples/riscv/test-matrix.sh
 create mode 100644 tools/cmake/preset/riscv_baremetal.cmake
 rename tools/cmake/preset/{riscv64_linux.cmake => riscv_linux.cmake} (100%)

diff --git a/.ci/scripts/setup-linux.sh b/.ci/scripts/setup-linux.sh
index feb8a128b17..275a93d797e 100755
--- a/.ci/scripts/setup-linux.sh
+++ b/.ci/scripts/setup-linux.sh
@@ -5,7 +5,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-set -exu
+set -eu
 
 # shellcheck source=/dev/null
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
diff --git a/.ci/scripts/test_riscv_qemu.sh b/.ci/scripts/test_riscv_qemu.sh
index 2842542aa3a..0e5b44d97c2 100755
--- a/.ci/scripts/test_riscv_qemu.sh
+++ b/.ci/scripts/test_riscv_qemu.sh
@@ -4,10 +4,9 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-# CI wrapper: install RISC-V cross-compile + qemu-user tooling, then run the
-# RISC-V smoke test (export, cross-compile, qemu-user execution) via
-# examples/riscv/run.sh. The bundled-IO comparison and Test_result: PASS
-# check are done by run.sh.
+# CI wrapper: install riscv32/64 cross-compile + qemu tooling, then drive
+# examples/riscv/run.sh which does the export, cross-compile, qemu run, and
+# bundled-IO PASS check.
 
 set -eu
 
@@ -15,29 +14,41 @@ script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
 et_root_dir=$(realpath "${script_dir}/../..")
 
 model="add"
-xnnpack=false
+backend="portable"
 quantize=false
+os="linux"
+arch="rv64"
+qemu_cpu_ext=""
 verbose_xnnpack=false
 debug_xnnpack=false
+build_dir=
 
 usage() {
     cat <<EOF
 Usage: $(basename "$0") [options]
 Options:
-  --model=<NAME>     Which model to export and run (default: add)
-  --xnnpack          Enable the XNNPACK backend (AOT partitioner + runtime)
-  --quantize         Produce an 8-bit quantized model
-  --verbose-xnnpack  Build XNNPACK with XNN_LOG_LEVEL=4 to log microkernel dispatch
-  --debug-xnnpack    Enable XNNPACK partitioner DEBUG logging and dump the lowered graph
-  -h, --help         Show this help
+  --model=<NAME>          Which model to export and run (default: ${model})
+  --quantize              Produce an 8-bit quantized model
+  --backend=<NAME>        AOT backend (portable|xnnpack) (default: ${backend})
+  --os=<NAME>             Target OS (linux|baremetal) (default: ${os})
+  --arch=<NAME>           Target arch (rv32|rv64) (default: ${arch})
+  --qemu-cpu-ext=<EXT>    QEMU -cpu extensions (no rv32/rv64 prefix, default: none)
+  --build-dir=<DIR>       Build/output directory for this configuration (required)
+  --verbose-xnnpack       Build XNNPACK with XNN_LOG_LEVEL=4 to log microkernel dispatch
+  --debug-xnnpack         Enable XNNPACK partitioner DEBUG logging and dump the lowered graph
+  -h, --help              Show this help
 EOF
 }
 
 for arg in "$@"; do
     case $arg in
         --model=*) model="${arg#*=}" ;;
-        --xnnpack) xnnpack=true ;;
         --quantize) quantize=true ;;
+        --backend=*) backend="${arg#*=}" ;;
+        --os=*) os="${arg#*=}" ;;
+        --arch=*) arch="${arg#*=}" ;;
+        --qemu-cpu-ext=*) qemu_cpu_ext="${arg#*=}" ;;
+        --build-dir=*) build_dir="${arg#*=}" ;;
         --debug-xnnpack) debug_xnnpack=true ;;
         --verbose-xnnpack) verbose_xnnpack=true ;;
         -h|--help) usage; exit 0 ;;
@@ -45,9 +56,13 @@ for arg in "$@"; do
     esac
 done
 
+if [[ -z "${build_dir}" ]]; then
+    echo "[test_riscv_qemu.sh] --build-dir is required" >&2; usage; exit 1
+fi
+
 run_extra_args=()
-if ${xnnpack}; then
-    run_extra_args+=(--xnnpack)
+if [ -n "${qemu_cpu_ext}" ]; then
+    run_extra_args+=(--qemu-cpu-ext="${qemu_cpu_ext}")
 fi
 if ${quantize}; then
     run_extra_args+=(--quantize)
@@ -59,5 +74,8 @@ if ${verbose_xnnpack}; then
     run_extra_args+=(--verbose-xnnpack)
 fi
 
-bash "${et_root_dir}/examples/riscv/setup.sh"
-bash "${et_root_dir}/examples/riscv/run.sh" --model="${model}" "${run_extra_args[@]}"
+bash "${et_root_dir}/examples/riscv/setup-${os}.sh"
+bash "${et_root_dir}/examples/riscv/run.sh" \
+    --model="${model}" --backend="${backend}" --os="${os}" --arch="${arch}" \
+    --build-dir="${build_dir}" \
+    "${run_extra_args[@]}"
diff --git a/.github/workflows/_test_riscv.yml b/.github/workflows/_test_riscv.yml
index 223a146e3d8..0b7d8472d8b 100644
--- a/.github/workflows/_test_riscv.yml
+++ b/.github/workflows/_test_riscv.yml
@@ -13,35 +13,44 @@ on:
         type: number
         default: 30
       model:
-        description: 'Which model to run. Possible values are: add, mv2 (mobilenetv2)'
+        description: 'Which model to run (add, mv2, mobilebert, llama2, resnet18, yolo26)'
         required: false
         type: string
         default: 'add'
-      xnnpack:
-        description: 'Whether to enable XNNPACK'
-        required: false
-        type: boolean
-        default: false
       quantize:
         description: 'Produce an 8-bit quantized model'
         required: false
         type: boolean
         default: false
-      qemu-cpu:
-        description: 'Configuration(s) for the CPU to emulate with QEMU, expecting a JSON array'
-        required: true
+      backend:
+        description: 'AOT backend to lower to (portable|xnnpack)'
+        required: false
         type: string
-      docker-image:
-        description: 'The docker image to use for this job'
+        default: 'portable'
+      os:
+        description: 'Target OS for the runner (linux|baremetal)'
         required: false
         type: string
+        default: 'linux'
+      arch:
+        description: 'Target architecture (rv32|rv64)'
+        required: false
+        type: string
+        default: 'rv64'
+      qemu-cpu-ext:
+        description: >-
+          JSON array of QEMU -cpu *extension* strings (no rv32/rv64 prefix).
+          The script splices each entry with `arch` to form the final -cpu
+          value. Use [""] for plain base-ISA runs.
+        required: true
+        type: string
 
 jobs:
   run:
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-24.04-gcc14
+      docker-image: ${{ inputs.os == 'linux' && 'ci-image:executorch-ubuntu-24.04-gcc14' || 'ci-image:executorch-ubuntu-26.04-gcc15' }}
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: ${{ inputs.timeout }}
@@ -55,20 +64,26 @@ jobs:
         # Allows failure in `echo | jq | while read` pipeline to bubble up and fail the workflow
         set -o pipefail
 
-        echo '${{ inputs.qemu-cpu }}' | jq -r '.[]' | while IFS= read -r qemu_cpu; do
-          export QEMU_CPU="${qemu_cpu}"
-          export GCC_VERSION=14
+        echo '${{ inputs.qemu-cpu-ext }}' | jq -r '.[]' | while IFS= read -r qemu_cpu_ext; do
+          variant_slug="${qemu_cpu_ext//,/_}"; variant_slug="${variant_slug//=/_}"; variant_slug="${variant_slug:-base}"
+          build_dir="riscv_test/${{ inputs.model }}${{ inputs.quantize && '_q' || '' }}/${{ inputs.backend }}/${{ inputs.os }}-${{ inputs.arch }}-${variant_slug}"
+
           bash .ci/scripts/test_riscv_qemu.sh \
             --model="${{ inputs.model }}" \
-            ${{ inputs.xnnpack && '--xnnpack --verbose-xnnpack' || '' }} \
+            --backend="${{ inputs.backend }}" \
+            --os="${{ inputs.os }}" \
+            --arch="${{ inputs.arch }}" \
+            --qemu-cpu-ext="${qemu_cpu_ext}" \
+            --build-dir="${build_dir}" \
+            ${{ inputs.backend == 'xnnpack' && '--verbose-xnnpack' || '' }} \
             ${{ inputs.quantize && '--quantize' || '' }}
 
-          # We only generate riscv_test/${{ inputs.model }}_riscv.etdump.json from `--verbose-xnnpack`.
-          if ${{ inputs.xnnpack }}; then
-            # Generate markdown table from riscv_test/${{ inputs.model }}_riscv.etdump.json, sorted by sum_ms
+          # We only generate run.etdump.json from `--verbose-xnnpack`.
+          if [[ "${{ inputs.backend }}" == "xnnpack" ]]; then
+            # Generate markdown table from ${build_dir}/run.etdump.json, sorted by sum_ms
             (
-              etdump_json="riscv_test/${{ inputs.model }}_riscv.etdump.json"
-              echo "### Model=${{ inputs.model }} XNNPACK=${{ inputs.xnnpack }} Quantize=${{ inputs.quantize }} QEMU_CPU='${QEMU_CPU}'"
+              etdump_json="${build_dir}/run.etdump.json"
+              echo "### Model=${{ inputs.model }} Quantize=${{ inputs.quantize }} Backend=${{ inputs.backend }} OS=${{ inputs.os }} Arch=${{ inputs.arch }}${qemu_cpu_ext:+,${qemu_cpu_ext}}"
               jq -r '
                 def r3: (. * 1000 | round) / 1000;
                 ["Section","Op","Count","Sum (ms)","Avg (ms)","Max (ms)","Microkernels"],
diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml
index a7a5273e2b0..d6109a47305 100644
--- a/.github/workflows/riscv64.yml
+++ b/.github/workflows/riscv64.yml
@@ -10,8 +10,9 @@ on:
   pull_request:
     paths:
       - .github/workflows/riscv64.yml
+      - .github/workflows/_test_riscv.yml
       - .ci/scripts/test_riscv_qemu.sh
-      - tools/cmake/preset/riscv64_linux.cmake
+      - tools/cmake/preset/riscv64_*.cmake
       - examples/riscv/**
   workflow_dispatch:
   schedule:
@@ -35,33 +36,42 @@ jobs:
           - llama2
           - resnet18
           - yolo26
-        xnnpack: [true, false]
         quantize: [true, false]
+        backend: [portable, xnnpack]
+        os: [linux, baremetal]
+        arch: [rv64, rv32]
         exclude:
-          # We only enable quantization with XNNPACK
-          - xnnpack: false
-            quantize: true
-          # We don't test quantization for Yolo26
-          - model: yolo26
-            quantize: true
+          # Disable quantization testing with Portable Kernels
+          - { backend: portable, quantize: true }
+          # XNNPACK needs pthreads + dynamic loading (no baremetal)
+          - { backend: xnnpack, os: baremetal }
+          # No quantization recipe for Yolo26.
+          - { model: yolo26, quantize: true }
+          # No riscv32-linux-gnu cross is packaged on Ubuntu.
+          - { os: linux, arch: rv32 }
     permissions:
       id-token: write
       contents: read
     with:
       model: ${{ matrix.model }}
-      xnnpack: ${{ matrix.xnnpack }}
       quantize: ${{ matrix.quantize }}
-      # If XNNPACK, test with multiple RVV length, disabled otherwise
-      qemu-cpu: >-
+      backend: ${{ matrix.backend }}
+      os: ${{ matrix.os }}
+      arch: ${{ matrix.arch }}
+      # JSON array of QEMU -cpu *extension* strings (no rv32/rv64 prefix - that
+      # comes from `arch`). The script splices them as `<arch>,<ext>`. xnnpack
+      # benefits from RVV so it sweeps multiple vlen; everything else just uses
+      # the plain base ISA.
+      qemu-cpu-ext: >-
         ${{
           case(
-            matrix.xnnpack, '[
-              "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=128,elen=64,vext_spec=v1.0",
-              "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=256,elen=64,vext_spec=v1.0",
-              "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0"
+            matrix.backend == 'xnnpack', '[
+              "v=true,vext_spec=v1.0,vlen=128",
+              "v=true,vext_spec=v1.0,vlen=256",
+              "v=true,vext_spec=v1.0,vlen=512"
             ]',
             '[
-              "rv64,zba=true,zbb=true,zbs=true,v=false"
+              "v=false"
             ]'
           )
         }}
diff --git a/CMakePresets.json b/CMakePresets.json
index 91848565067..15d005cbede 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -318,7 +318,7 @@
       "displayName": "Build ExecuTorch for riscv64 Linux (cross-compile)",
       "inherits": ["common"],
       "cacheVariables": {
-        "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/riscv64_linux.cmake",
+        "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/riscv_linux.cmake",
         "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/riscv/riscv64-linux-gnu-toolchain.cmake"
       },
       "condition": {
@@ -327,6 +327,24 @@
         "rhs": "Linux"
       }
     },
+    {
+      "name": "riscv64-baremetal",
+      "displayName": "Build ExecuTorch for riscv64 baremetal (cross-compile)",
+      "inherits": ["common"],
+      "cacheVariables": {
+        "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/riscv_baremetal.cmake",
+        "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/riscv/riscv64-unknown-elf-toolchain.cmake"
+      }
+    },
+    {
+      "name": "riscv32-baremetal",
+      "displayName": "Build ExecuTorch for riscv32 baremetal (cross-compile)",
+      "inherits": ["common"],
+      "cacheVariables": {
+        "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/riscv_baremetal.cmake",
+        "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/riscv/riscv32-unknown-elf-toolchain.cmake"
+      }
+    },
     {
       "name": "mlx",
       "displayName": "Build MLX delegate",
diff --git a/examples/riscv/README.md b/examples/riscv/README.md
index 563ff4913fd..2c250f75cd7 100644
--- a/examples/riscv/README.md
+++ b/examples/riscv/README.md
@@ -1,41 +1,36 @@
 # RISC-V
 
-Cross-compile `executor_runner` for `riscv64-linux-gnu` and run it under
-`qemu-user-static` against a small bundled program. The end-to-end check
-mirrors the Arm Cortex-M e2e flow: a `Test_result: PASS` line in stdout from
-the bundled-IO comparison path is the pass criterion.
+End-to-end smoke tests that cross-compile ExecuTorch for RISC-V and run a bundled program under QEMU. A `Test_result: PASS` line emitted by the bundled-IO comparison path is the pass criterion.
 
-This is the Phase 1 deliverable for the RISC-V Support RFC at
-[pytorch/executorch#18991][rfc]. The cross-compile and runner artifacts
-(toolchain file, preset, AOT script) are designed to carry over unchanged
-to a hardware-runner job once one becomes available; only the invocation
-step (qemu-user vs. native) would change.
-
-[rfc]: https://github.com/pytorch/executorch/issues/18991
+Part of the RISC-V Support RFC, [pytorch/executorch#18991](https://github.com/pytorch/executorch/issues/18991).
 
 ## Quick start (Ubuntu / Debian)
 
 ```bash
-examples/riscv/setup.sh        # apt: gcc-riscv64-linux-gnu, qemu-user-static
-examples/riscv/run.sh          # export, cross-compile, run under qemu-user
+examples/riscv/setup-linux.sh       # apt: gcc cross riscv64-linux-gnu + qemu-user
+examples/riscv/setup-baremetal.sh   # apt: gcc cross riscv64-unknown-elf + qemu-system + picolibc
+examples/riscv/run.sh               # export, cross-compile, run under qemu
 ```
 
-The driver does three steps:
+`run.sh` accepts:
+
+| Flag | Values | Default | Notes |
+|---|---|---|---|
+| `--model=<N>` | `add`, `mv2`, `mobilebert`, `llama2`, `resnet18`, `yolo26` | `add` | which model to export |
+| `--quantize` | flag | off | XNNPACK quantizer (requires `--backend=xnnpack`) |
+| `--backend=<N>` | `portable`, `xnnpack` | `portable` | xnnpack is linux-only |
+| `--os=<N>` | `linux`, `baremetal` | `linux` | qemu-user vs qemu-system + semihosting |
+| `--arch=<N>` | `rv64` | `rv64` | (rv32 follow-up; no `riscv32-linux-gnu` cross is packaged on Ubuntu) |
+| `--qemu-cpu-ext=<S>` | e.g. `v=true,vlen=128` | empty | extensions appended after the arch base |
+
+## Pipelines
+
+**linux**: `aot_riscv.py` → `cmake --preset riscv64-linux` → `executor_runner` under `qemu-riscv64`. Portable kernels + (optional) XNNPACK delegate.
+
+**baremetal**: `aot_riscv.py` → `cmake -S examples/riscv/baremetal` (standalone project; pulls executorch in via `add_subdirectory`) → `executor_runner_baremetal.elf` under `qemu-system-riscv64 -machine virt -bios none -semihosting-config target=native`.
 
-1. `python examples/riscv/aot_riscv.py` exports a `torch.add` module to
-   `riscv_test/add_riscv.bpte` (a BundledProgram with reference outputs
-   embedded for two test cases).
-2. `cmake --preset riscv64-linux` configures the cross-build using
-   `examples/riscv/riscv64-linux-gnu-toolchain.cmake` and
-   `tools/cmake/preset/riscv64_linux.cmake`. `executor_runner` is built
-   against portable kernels with `ET_BUNDLE_IO_ENABLED` defined.
-3. `qemu-riscv64-static` invokes the runner with `--model_path` pointing at
-   the `.bpte`. The runner detects the bundle, runs every embedded test case,
-   and emits `Test_result: PASS` (or `FAIL`) per case.
+The baremetal runner embeds the `.bpte` directly in `.rodata` via the same `examples/arm/executor_runner/pte_to_header.py` Cortex-M uses; semihosting SYS_WRITE0 / SYS_EXIT carry log output and exit status to the host.
 
 ## CI
 
-`.github/workflows/_test_riscv_qemu.yml` is a reusable `workflow_call`
-job (mirroring `_test_cortex_m_e2e.yml`) invoked from `pull.yml` to run on
-every PR. It runs on the standard `linux.2xlarge` x86_64 runner using the
-`executorch-ubuntu-22.04-gcc11` docker image.
+`.github/workflows/riscv64.yml` is the entry point; it fans out into `_test_riscv.yml` over a `(model, backend, os, arch, quantize)` matrix and sweeps `qemu-cpu-ext` per backend. Runs on the `executorch-ubuntu-26.04-gcc15` docker image (needed for the `riscv64-unknown-elf` picolibc + libstdc++ packages - see [setup.sh](setup.sh)).
diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py
index edc30c2653b..e01fe6f954e 100644
--- a/examples/riscv/aot_riscv.py
+++ b/examples/riscv/aot_riscv.py
@@ -3,11 +3,12 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-"""AOT export for the RISC-V smoke test.
+"""AOT export for the RISC-V smoke tests.
 
-Exports a small model to a BundledProgram (.bpte) that the portable
-executor_runner can load on a riscv64 target and verify against the embedded
-reference output, emitting ``Test_result: PASS`` on success.
+Exports the model selected by ``--model`` to a BundledProgram (.bpte) that
+either ``executor_runner`` (linux) or ``executor_runner_baremetal`` (qemu
+virt + semihosting) consumes. The bundled-IO comparison path inside the
+runner emits ``Test_result: PASS`` per testset, which is what run.sh greps.
 """
 
 import argparse
@@ -171,9 +172,19 @@ def main() -> None:
         help="Output .bpte path (default: <model>_riscv.bpte)",
     )
     parser.add_argument(
-        "--xnnpack",
-        action="store_true",
-        help="Lower through the XNNPACK partitioner",
+        "--backend",
+        choices=("portable", "xnnpack"),
+        default="portable",
+        help="AOT backend: 'portable' runs everything on the portable kernels, "
+        "'xnnpack' adds the XNNPACK partitioner (default: portable)",
+    )
+    parser.add_argument(
+        "--os",
+        choices=("linux", "baremetal"),
+        default="linux",
+        help="Target OS for the runner that will consume this .bpte. The .bpte "
+        "itself is OS-independent; the flag is logged so callers can verify "
+        "the AOT/runtime sides agree (default: linux)",
     )
     parser.add_argument(
         "--quantize",
@@ -187,6 +198,13 @@ def main() -> None:
     )
     args = parser.parse_args()
 
+    if args.debug_xnnpack and args.backend != "xnnpack":
+        parser.error("--debug-xnnpack requires --backend=xnnpack")
+
+    # xnnpack pulls in pthreads + dynamic loading; baremetal runner doesn't have those.
+    if args.os == "baremetal" and args.backend == "xnnpack":
+        parser.error("--backend=xnnpack is not supported on --os=baremetal")
+
     if args.debug_xnnpack:
         logging.basicConfig(level=logging.DEBUG)
 
@@ -209,7 +227,7 @@ def main() -> None:
 
     exported = export(model, example_inputs, strict=strict)
     partitioners = []
-    if args.xnnpack:
+    if args.backend == "xnnpack":
         from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
             XnnpackPartitioner,
         )
@@ -223,7 +241,9 @@ def main() -> None:
         compile_config = EdgeCompileConfig(_check_ir_validity=False)
 
     edge = to_edge_transform_and_lower(
-        exported, partitioner=partitioners, compile_config=compile_config
+        exported,
+        partitioner=partitioners,
+        compile_config=compile_config,
     )
     delegated = sum(
         1
@@ -231,7 +251,7 @@ def main() -> None:
         if n.op == "call_function" and "call_delegate" in str(n.target)
     )
     print(
-        f"[aot_riscv] model={args.model} xnnpack={args.xnnpack} "
+        f"[aot_riscv] model={args.model} backend={args.backend} os={args.os} "
         f"quantize={args.quantize} delegated_nodes={delegated}"
     )
 
diff --git a/examples/riscv/baremetal/CMakeLists.txt b/examples/riscv/baremetal/CMakeLists.txt
new file mode 100644
index 00000000000..b7765c4e3a1
--- /dev/null
+++ b/examples/riscv/baremetal/CMakeLists.txt
@@ -0,0 +1,117 @@
+# Copyright 2026 The ExecuTorch Authors.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Standalone runner project, invoked from examples/riscv/run.sh as:
+# ~~~
+#   cmake -S examples/riscv/baremetal -B <build> \
+#       -DEXECUTORCH_ROOT=<repo>                 \
+#       -DRISCV_BAREMETAL_PTE=<path>.bpte        \
+#       -DCMAKE_TOOLCHAIN_FILE=.../riscv{32,64}-unknown-elf-toolchain.cmake
+# ~~~
+# Mirrors examples/arm/executor_runner/standalone/CMakeLists.txt so the
+# top-level executorch CMake has no reference to examples/riscv/.
+
+cmake_minimum_required(VERSION 3.20)
+project(riscv_executor_runner_baremetal LANGUAGES C CXX ASM)
+
+get_filename_component(
+  _default_executorch_root "${CMAKE_CURRENT_LIST_DIR}/../../.." ABSOLUTE
+)
+if(NOT DEFINED EXECUTORCH_ROOT)
+  set(EXECUTORCH_ROOT
+      "${_default_executorch_root}"
+      CACHE PATH "Path to the ExecuTorch checkout"
+  )
+endif()
+if(NOT EXISTS "${EXECUTORCH_ROOT}/CMakeLists.txt")
+  message(
+    FATAL_ERROR
+      "EXECUTORCH_ROOT (${EXECUTORCH_ROOT}) does not contain an ExecuTorch CMake project."
+  )
+endif()
+
+set(RISCV_BAREMETAL_PTE
+    ""
+    CACHE FILEPATH "Path to the .bpte to embed in the baremetal runner"
+)
+if(NOT RISCV_BAREMETAL_PTE)
+  message(
+    FATAL_ERROR
+      "RISCV_BAREMETAL_PTE not set; pass -DRISCV_BAREMETAL_PTE=<path> from run.sh"
+  )
+endif()
+
+include("${EXECUTORCH_ROOT}/tools/cmake/common/preset.cmake")
+if(NOT DEFINED EXECUTORCH_BUILD_PRESET_FILE)
+  set(EXECUTORCH_BUILD_PRESET_FILE
+      "${EXECUTORCH_ROOT}/tools/cmake/preset/riscv64_baremetal.cmake"
+      CACHE PATH "Preset used when configuring the standalone baremetal runner"
+  )
+endif()
+load_build_preset()
+include("${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake")
+
+add_subdirectory(
+  "${EXECUTORCH_ROOT}" "${CMAKE_BINARY_DIR}/executorch" EXCLUDE_FROM_ALL
+)
+
+find_package(Python3 REQUIRED COMPONENTS Interpreter)
+
+set(_pte_header "${CMAKE_CURRENT_BINARY_DIR}/model_pte.h")
+add_custom_command(
+  OUTPUT "${_pte_header}"
+  COMMAND
+    "${Python3_EXECUTABLE}"
+    "${EXECUTORCH_ROOT}/examples/arm/executor_runner/pte_to_header.py" --pte
+    "${RISCV_BAREMETAL_PTE}" --outdir "${CMAKE_CURRENT_BINARY_DIR}" --outfile
+    "model_pte.h" --section ".rodata.model_pte"
+  DEPENDS "${RISCV_BAREMETAL_PTE}"
+  COMMENT "Embedding ${RISCV_BAREMETAL_PTE} into model_pte.h"
+  VERBATIM
+)
+
+# pte_to_header.py emits the byte array but not its length; the glue TU
+# materialises the matching `model_pte_len` and is the only place the header is
+# included (avoids a double-definition at link time).
+file(
+  WRITE "${CMAKE_CURRENT_BINARY_DIR}/model_pte_glue.cpp"
+  "#include <stddef.h>\n#include \"model_pte.h\"\nextern \"C\" const size_t model_pte_len = sizeof(model_pte);\n"
+)
+
+add_executable(
+  executor_runner_baremetal
+  start.S executor_runner_baremetal.cpp
+  "${CMAKE_CURRENT_BINARY_DIR}/model_pte_glue.cpp" "${_pte_header}"
+)
+set_target_properties(
+  executor_runner_baremetal PROPERTIES SUFFIX ".elf" LINKER_LANGUAGE CXX
+)
+target_include_directories(
+  executor_runner_baremetal PRIVATE "${CMAKE_CURRENT_BINARY_DIR}"
+)
+target_compile_options(
+  executor_runner_baremetal PRIVATE -fno-exceptions -fno-rtti -fdata-sections
+                                    -ffunction-sections
+)
+# --specs=picolibc.specs / -nostartfiles / -march / -mabi all come from the
+# toolchain file; only the linker script (QEMU virt memory map) is target-
+# specific here.
+target_link_options(
+  executor_runner_baremetal PRIVATE
+  "-T${CMAKE_CURRENT_SOURCE_DIR}/riscv_virt.ld"
+)
+
+# gen_operators_lib / executorch_target_link_options_shared_lib attach INTERFACE
+# --whole-archive options to portable_ops_lib (so the static-init
+# kernel-registration TU survives DCE) and to executorch itself. Listing the
+# libs once each is enough; an extra --whole-archive wrapper around them would
+# include the same archive twice and double-register every op.
+target_link_libraries(executor_runner_baremetal PRIVATE bundled_program)
+if(TARGET portable_ops_lib)
+  target_link_libraries(executor_runner_baremetal PRIVATE portable_ops_lib)
+endif()
+if(TARGET portable_kernels)
+  target_link_libraries(executor_runner_baremetal PRIVATE portable_kernels)
+endif()
diff --git a/examples/riscv/baremetal/executor_runner_baremetal.cpp b/examples/riscv/baremetal/executor_runner_baremetal.cpp
new file mode 100644
index 00000000000..d0bb128bd98
--- /dev/null
+++ b/examples/riscv/baremetal/executor_runner_baremetal.cpp
@@ -0,0 +1,286 @@
+/*
+ * Copyright 2026 The ExecuTorch Authors.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Baremetal runner for qemu-system-riscv64 -machine virt + semihosting. Loads
+// a .bpte embedded into the ELF and emits "TEST: BundleIO index[N]
+// Test_result: PASS|FAIL" via ET_LOG so examples/riscv/run.sh's grep can
+// detect success without a host filesystem.
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include <executorch/devtools/bundled_program/bundled_program.h>
+#include <executorch/extension/data_loader/buffer_data_loader.h>
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/memory_allocator.h>
+#include <executorch/runtime/executor/method.h>
+#include <executorch/runtime/executor/program.h>
+#include <executorch/runtime/platform/log.h>
+#include <executorch/runtime/platform/platform.h>
+#include <executorch/runtime/platform/runtime.h>
+
+#include "semihosting.h"
+
+extern "C" const uint8_t model_pte[];
+extern "C" const size_t model_pte_len;
+
+using executorch::extension::BufferDataLoader;
+using executorch::runtime::Error;
+using executorch::runtime::HierarchicalAllocator;
+using executorch::runtime::MemoryAllocator;
+using executorch::runtime::MemoryManager;
+using executorch::runtime::Method;
+using executorch::runtime::MethodMeta;
+using executorch::runtime::Program;
+using executorch::runtime::Result;
+using executorch::runtime::Span;
+
+namespace {
+
+// Pools are sized for the largest model we currently test (llama2 / yolo26)
+// rather than per-model; the .bss grows but freestanding picolibc never
+// allocates from it so the cost is just a bigger ELF. Bumping these requires
+// matching headroom in riscv_virt.ld's RAM region and qemu's -m flag.
+alignas(16) uint8_t method_allocator_pool[1u << 23]; //   8 MiB
+alignas(16) uint8_t temp_allocator_pool[1u << 22]; //   4 MiB
+alignas(16) uint8_t planned_memory_pool[1u << 26]; //  64 MiB
+
+constexpr size_t kMaxPlannedBuffers = 8;
+constexpr double kRtol = 0.01;
+constexpr double kAtol = 0.01;
+
+} // namespace
+
+extern "C" [[noreturn]] void baremetal_exit(int status) {
+  executorch::riscv::baremetal::semihost_exit(status);
+}
+
+// picolibc's abort()/raise() resolve _exit; with our own start.S we don't
+// link its crt0, so reroute it to the semihosting trap.
+extern "C" [[noreturn]] void _exit(int status) {
+  executorch::riscv::baremetal::semihost_exit(status);
+}
+
+// libstdc++'s <random> drags std::random_device → getentropy/read. The portable
+// rand kernels are never invoked at runtime for our bundled-IO tests, so a
+// failing stub is enough to satisfy the link.
+extern "C" int getentropy(void*, size_t) {
+  return -1;
+}
+extern "C" long read(int, void*, size_t) {
+  return -1;
+}
+
+// Virtual destructors emit deleting variants that reference operator delete
+// even when we never new/delete. Stubs satisfy the linker; never called.
+void operator delete(void*) noexcept {}
+void operator delete(void*, size_t) noexcept {}
+void operator delete[](void*) noexcept {}
+void operator delete[](void*, size_t) noexcept {}
+
+// op_rand / op_native_dropout / op_randn from portable_kernels reference
+// std::random_device::_M_{init,getval,fini}, whose only definitions live in
+// libstdc++.a's medlow-built random.o (won't relocate at 0x80000000). The
+// bundled-IO smoke tests never invoke those ops, so satisfy the linker with
+// no-op trampolines under the Itanium-mangled names.
+asm(R"(
+    .globl _ZNSt13random_device7_M_initERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
+    .type  _ZNSt13random_device7_M_initERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE, @function
+_ZNSt13random_device7_M_initERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE:
+    ret
+
+    .globl _ZNSt13random_device9_M_getvalEv
+    .type  _ZNSt13random_device9_M_getvalEv, @function
+_ZNSt13random_device9_M_getvalEv:
+    li     a0, 0
+    ret
+
+    .globl _ZNSt13random_device7_M_finiEv
+    .type  _ZNSt13random_device7_M_finiEv, @function
+_ZNSt13random_device7_M_finiEv:
+    ret
+)");
+
+// Route ET_LOG through semihosting. Messages aren't null-terminated; copy and
+// append \n\0 before forwarding to SYS_WRITE0.
+extern "C" void et_pal_emit_log_message(
+    et_timestamp_t,
+    et_pal_log_level_t,
+    const char*,
+    const char*,
+    size_t,
+    const char* message,
+    size_t length) {
+  // The bundle doesn't expose a testset count, so we probe past the end and
+  // rely on InvalidArgument to terminate the loop. The accompanying ET_LOG
+  // ("testset_idx N is out of range ...") is benign noise — suppress it so
+  // run.sh's PASS/FAIL grep stays clean.
+  static const char kOorPrefix[] = "testset_idx ";
+  if (length >= sizeof(kOorPrefix) - 1 &&
+      std::memcmp(message, kOorPrefix, sizeof(kOorPrefix) - 1) == 0) {
+    return;
+  }
+  char buf[512];
+  size_t n = length < sizeof(buf) - 2 ? length : sizeof(buf) - 2;
+  std::memcpy(buf, message, n);
+  buf[n] = '\n';
+  buf[n + 1] = '\0';
+  executorch::riscv::baremetal::semihost_write0(buf);
+}
+
+extern "C" void et_pal_init(void) {}
+extern "C" [[noreturn]] void et_pal_abort(void) {
+  executorch::riscv::baremetal::semihost_exit(1);
+}
+extern "C" et_timestamp_t et_pal_current_ticks(void) {
+  return 0;
+}
+extern "C" et_tick_ratio_t et_pal_ticks_to_ns_multiplier(void) {
+  return {1, 1};
+}
+extern "C" void* et_pal_allocate(size_t) {
+  return nullptr;
+}
+extern "C" void et_pal_free(void*) {}
+
+int main() {
+  executorch::runtime::runtime_init();
+
+  const void* program_data = nullptr;
+  size_t program_size = 0;
+  Error status = executorch::bundled_program::get_program_data(
+      const_cast<uint8_t*>(model_pte),
+      model_pte_len,
+      &program_data,
+      &program_size);
+  if (status != Error::Ok) {
+    ET_LOG(
+        Error, "get_program_data failed: 0x%x", static_cast<unsigned>(status));
+    return 1;
+  }
+
+  BufferDataLoader loader(program_data, program_size);
+  Result<Program> program = Program::load(&loader);
+  if (!program.ok()) {
+    ET_LOG(
+        Error,
+        "Program::load failed: 0x%x",
+        static_cast<unsigned>(program.error()));
+    return 1;
+  }
+
+  // The harness always exports a single "forward" method. Skipping the
+  // Result<const char*> deref of program->get_method_name(0) sidesteps a
+  // codegen wedge we hit under -mcmodel=medany + picolibc.
+  const char* method_name = "forward";
+  ET_LOG(Info, "Using method %s", method_name);
+
+  Result<MethodMeta> method_meta = program->method_meta(method_name);
+  if (!method_meta.ok()) {
+    ET_LOG(
+        Error,
+        "method_meta failed: 0x%x",
+        static_cast<unsigned>(method_meta.error()));
+    return 1;
+  }
+
+  MemoryAllocator method_allocator(
+      sizeof(method_allocator_pool), method_allocator_pool);
+  MemoryAllocator temp_allocator(
+      sizeof(temp_allocator_pool), temp_allocator_pool);
+
+  // One span per planned buffer, bumped through a single .bss arena so we
+  // don't need a heap. kMaxPlannedBuffers / pool size both grow with bigger
+  // models; failures here are loud rather than silent.
+  Span<uint8_t> planned_spans[kMaxPlannedBuffers];
+  size_t num_planned = method_meta->num_memory_planned_buffers();
+  if (num_planned > kMaxPlannedBuffers) {
+    ET_LOG(
+        Error,
+        "num_planned=%zu exceeds kMaxPlannedBuffers=%zu",
+        num_planned,
+        kMaxPlannedBuffers);
+    return 1;
+  }
+  size_t offset = 0;
+  for (size_t id = 0; id < num_planned; ++id) {
+    size_t sz =
+        static_cast<size_t>(method_meta->memory_planned_buffer_size(id).get());
+    sz = (sz + 15u) & ~15u;
+    if (offset + sz > sizeof(planned_memory_pool)) {
+      ET_LOG(
+          Error,
+          "planned buffer %zu (size %zu) overflows pool (%zu/%zu)",
+          id,
+          sz,
+          offset,
+          sizeof(planned_memory_pool));
+      return 1;
+    }
+    planned_spans[id] = Span<uint8_t>(planned_memory_pool + offset, sz);
+    offset += sz;
+  }
+  HierarchicalAllocator planned_memory(
+      Span<Span<uint8_t>>(planned_spans, num_planned));
+  MemoryManager memory_manager(
+      &method_allocator, &planned_memory, &temp_allocator);
+
+  Result<Method> method = program->load_method(method_name, &memory_manager);
+  if (!method.ok()) {
+    ET_LOG(
+        Error,
+        "load_method failed: 0x%x",
+        static_cast<unsigned>(method.error()));
+    return 1;
+  }
+
+  // load_bundled_input returns InvalidArgument past the last testset; that's
+  // how we detect the loop terminator (the bundle has no public count API).
+  int rc = 0;
+  for (size_t testset_idx = 0;; ++testset_idx) {
+    Error load = executorch::bundled_program::load_bundled_input(
+        *method, const_cast<uint8_t*>(model_pte), testset_idx);
+    if (load != Error::Ok) {
+      if (testset_idx == 0) {
+        ET_LOG(
+            Error,
+            "load_bundled_input failed for testset 0: 0x%x",
+            static_cast<unsigned>(load));
+        rc = 1;
+      }
+      break;
+    }
+    Error exec = method->execute();
+    if (exec != Error::Ok) {
+      ET_LOG(
+          Error,
+          "execute failed for testset %zu: 0x%x",
+          testset_idx,
+          static_cast<unsigned>(exec));
+      ET_LOG(Error, "TEST: BundleIO index[%zu] Test_result: FAIL", testset_idx);
+      rc = 1;
+      continue;
+    }
+    Error verify = executorch::bundled_program::verify_method_outputs(
+        *method, const_cast<uint8_t*>(model_pte), testset_idx, kRtol, kAtol);
+    if (verify == Error::Ok) {
+      ET_LOG(Info, "TEST: BundleIO index[%zu] Test_result: PASS", testset_idx);
+    } else {
+      ET_LOG(
+          Error,
+          "verify_method_outputs failed for testset %zu: 0x%x",
+          testset_idx,
+          static_cast<unsigned>(verify));
+      ET_LOG(Error, "TEST: BundleIO index[%zu] Test_result: FAIL", testset_idx);
+      rc = 1;
+    }
+  }
+
+  return rc;
+}
diff --git a/examples/riscv/baremetal/riscv_virt.ld b/examples/riscv/baremetal/riscv_virt.ld
new file mode 100644
index 00000000000..34980116b1d
--- /dev/null
+++ b/examples/riscv/baremetal/riscv_virt.ld
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2026 The ExecuTorch Authors.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/* qemu-system-riscv{32,64} -machine virt -bios none -kernel: the virt board's
+ * reset stub at 0x1000 jumps to DRAM base 0x80000000, so _start has to live
+ * there. RAM size matches the qemu `-m 512M` we pass from run.sh — the
+ * embedded .bpte in .rodata can be tens of MB for mv2 / llama2 / yolo26. */
+
+OUTPUT_ARCH(riscv)
+ENTRY(_start)
+
+MEMORY
+{
+    RAM (rwx) : ORIGIN = 0x80000000, LENGTH = 512M
+}
+
+SECTIONS
+{
+    .text 0x80000000 :
+    {
+        KEEP(*(.text.boot))
+        *(.text .text.*)
+    } > RAM
+
+    .rodata : ALIGN(8)
+    {
+        *(.rodata .rodata.*)
+        *(.srodata .srodata.*)
+    } > RAM
+
+    /* C++ global ctors. start.S calls picolibc's __libc_init_array, which
+     * walks symbols __bothinit_array_start..__bothinit_array_end (preinit +
+     * init combined). The stock newlib names (__init_array_start/end) are
+     * defined too for portability, but it's the "both" pair picolibc reads. */
+    .bothinit_array : ALIGN(8)
+    {
+        PROVIDE_HIDDEN(__bothinit_array_start = .);
+        PROVIDE_HIDDEN(__preinit_array_start = .);
+        KEEP(*(.preinit_array))
+        PROVIDE_HIDDEN(__preinit_array_end = .);
+        PROVIDE_HIDDEN(__init_array_start = .);
+        KEEP(*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+        KEEP(*(.init_array EXCLUDE_FILE(*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o) .ctors))
+        PROVIDE_HIDDEN(__init_array_end = .);
+        PROVIDE_HIDDEN(__bothinit_array_end = .);
+    } > RAM
+    .fini_array : ALIGN(8)
+    {
+        PROVIDE_HIDDEN(__fini_array_start = .);
+        KEEP(*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+        KEEP(*(.fini_array EXCLUDE_FILE(*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o) .dtors))
+        PROVIDE_HIDDEN(__fini_array_end = .);
+    } > RAM
+
+    .data : ALIGN(8)
+    {
+        *(.data .data.*)
+        *(.sdata .sdata.*)
+    } > RAM
+
+    .bss : ALIGN(8)
+    {
+        _bss_start = .;
+        *(.bss .bss.*)
+        *(.sbss .sbss.*)
+        *(COMMON)
+        . = ALIGN(8);
+        _bss_end = .;
+    } > RAM
+
+    /* 2 MiB stack at the high end of RAM; grows downward. picolibc's sbrk
+     * looks up __heap_start / __heap_end (double-underscore). */
+    . = ALIGN(16);
+    PROVIDE(__heap_start = .);
+    . = ORIGIN(RAM) + LENGTH(RAM) - 2M;
+    PROVIDE(__heap_end = .);
+    . = . + 2M;
+    _stack_top = .;
+
+    /DISCARD/ : { *(.note.* .comment .eh_frame .riscv.attributes) }
+}
diff --git a/examples/riscv/baremetal/semihosting.h b/examples/riscv/baremetal/semihosting.h
new file mode 100644
index 00000000000..7af63048d29
--- /dev/null
+++ b/examples/riscv/baremetal/semihosting.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2026 The ExecuTorch Authors.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <stddef.h>
+
+namespace executorch {
+namespace riscv {
+namespace baremetal {
+
+// The RISC-V semihosting trigger is a fixed three-insn sequence (slli/ebreak/
+// srai of x0) so qemu can distinguish it from a normal ecall. Op number in
+// a0, arg pointer in a1, return value back in a0.
+inline long semihost_call(long op, const void* arg) {
+  register long a0 asm("a0") = op;
+  register long a1 asm("a1") = (long)arg;
+  asm volatile(
+      ".option push\n\t"
+      ".option norvc\n\t"
+      "slli x0, x0, 0x1f\n\t"
+      "ebreak\n\t"
+      "srai x0, x0, 0x7\n\t"
+      ".option pop"
+      : "+r"(a0)
+      : "r"(a1)
+      : "memory");
+  return a0;
+}
+
+constexpr long SYS_WRITE0 = 0x04;
+constexpr long SYS_EXIT_EXTENDED = 0x20;
+
+inline void semihost_write0(const char* s) {
+  semihost_call(SYS_WRITE0, s);
+}
+
+[[noreturn]] inline void semihost_exit(int status) {
+  // ADP_Stopped_ApplicationExit (0x20026) + status, per the semihosting spec.
+  long block[2] = {0x20026, (long)status};
+  semihost_call(SYS_EXIT_EXTENDED, block);
+  __builtin_trap();
+}
+
+} // namespace baremetal
+} // namespace riscv
+} // namespace executorch
diff --git a/examples/riscv/baremetal/start.S b/examples/riscv/baremetal/start.S
new file mode 100644
index 00000000000..092eeffa4a6
--- /dev/null
+++ b/examples/riscv/baremetal/start.S
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2026 The ExecuTorch Authors.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Boot stub for the qemu virt RISC-V baremetal runner: set sp, enable FPU,
+// zero .bss, run C++ static ctors via __libc_init_array, jump to main. On
+// return, call baremetal_exit so qemu terminates deterministically.
+
+#if __riscv_xlen == 64
+#define SX sd
+#define XLEN_BYTES 8
+#else
+#define SX sw
+#define XLEN_BYTES 4
+#endif
+
+    .section .text.boot, "ax"
+    .globl _start
+    .type _start, @function
+_start:
+    la      sp, _stack_top
+
+    // mstatus.FS resets to Off in M-mode, so any FP insn (libstdc++ template
+    // code emits fsd/fld) traps. We have no trap vector, so the CPU would
+    // loop on the fault. FS=Dirty (0b11 in bits 13-14) keeps the FPU live.
+    li      t0, 0x6000
+    csrs    mstatus, t0
+
+    la      a0, _bss_start
+    la      a1, _bss_end
+1:
+    bgeu    a0, a1, 2f
+    SX      zero, 0(a0)
+    addi    a0, a0, XLEN_BYTES
+    j       1b
+2:
+    call    __libc_init_array
+    li      a0, 0
+    li      a1, 0
+    call    main
+    call    baremetal_exit
+3:
+    wfi
+    j       3b
+
+    .size _start, .-_start
diff --git a/examples/riscv/riscv32-unknown-elf-toolchain.cmake b/examples/riscv/riscv32-unknown-elf-toolchain.cmake
new file mode 100644
index 00000000000..ae968ea6fe2
--- /dev/null
+++ b/examples/riscv/riscv32-unknown-elf-toolchain.cmake
@@ -0,0 +1,74 @@
+# Copyright 2026 The ExecuTorch Authors.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# rv32 baremetal cross-toolchain. Uses the multilib-aware riscv64-unknown-elf
+# gcc (one package, both XLENs); `-march=rv32...` + `-mabi=ilp32d` selects the
+# 32-bit picolibc + libstdc++ variant. ELF runs under qemu-system-riscv32
+# -machine virt with semihosting.
+
+set(CMAKE_SYSTEM_NAME Generic)
+set(CMAKE_SYSTEM_PROCESSOR riscv32)
+
+set(CMAKE_C_COMPILER
+    "riscv64-unknown-elf-gcc"
+    CACHE FILEPATH ""
+)
+set(CMAKE_CXX_COMPILER
+    "riscv64-unknown-elf-g++"
+    CACHE FILEPATH ""
+)
+set(CMAKE_ASM_COMPILER
+    "riscv64-unknown-elf-gcc"
+    CACHE FILEPATH ""
+)
+set(CMAKE_AR
+    "riscv64-unknown-elf-ar"
+    CACHE FILEPATH ""
+)
+set(CMAKE_RANLIB
+    "riscv64-unknown-elf-ranlib"
+    CACHE FILEPATH ""
+)
+set(CMAKE_STRIP
+    "riscv64-unknown-elf-strip"
+    CACHE FILEPATH ""
+)
+
+set(CMAKE_EXECUTABLE_SUFFIX ".elf")
+# try_compile() can't link without crt0/specs; archive-only sidesteps that.
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
+
+# Baseline rv32imafdc / ilp32d — the rv32gc-equivalent multilib Ubuntu's
+# picolibc + libstdc++ ship. (Unlike rv64, the full rv32gc multilib *is*
+# packaged, so we don't have to drop M / C here.) -mcmodel=medany because medlow
+# can't reach our 0x80000000 base. picolibc.specs must be on the compile line
+# too so libstdc++ headers find picolibc's C headers via the spec's sysroot.
+add_compile_options(
+  --specs=picolibc.specs
+  -march=rv32imafdc
+  -mabi=ilp32d
+  -mcmodel=medany
+  -fdata-sections
+  -ffunction-sections
+  "$<$<COMPILE_LANGUAGE:CXX>:-fno-rtti;-fno-exceptions;-fno-unwind-tables>"
+)
+# -nostdlib++ drops g++'s implicit libstdc++.a (medlow-built, won't relocate).
+# -nostartfiles drops picolibc's crt0 in favour of our start.S.
+add_link_options(
+  --specs=picolibc.specs
+  -march=rv32imafdc
+  -mabi=ilp32d
+  -mcmodel=medany
+  -nostdlib++
+  -nostartfiles
+  "LINKER:--gc-sections"
+)
diff --git a/examples/riscv/riscv64-unknown-elf-toolchain.cmake b/examples/riscv/riscv64-unknown-elf-toolchain.cmake
new file mode 100644
index 00000000000..a4533675f89
--- /dev/null
+++ b/examples/riscv/riscv64-unknown-elf-toolchain.cmake
@@ -0,0 +1,77 @@
+# Copyright 2026 The ExecuTorch Authors.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# rv64 baremetal cross-toolchain (Ubuntu 26.04+ packages:
+# gcc-riscv64-unknown-elf, picolibc-riscv64-unknown-elf,
+# libstdc++-riscv64-unknown-elf-picolibc). The resulting ELF runs under
+# qemu-system-riscv64 -machine virt with semihosting.
+
+set(CMAKE_SYSTEM_NAME Generic)
+set(CMAKE_SYSTEM_PROCESSOR riscv64)
+
+set(CMAKE_C_COMPILER
+    "riscv64-unknown-elf-gcc"
+    CACHE FILEPATH ""
+)
+set(CMAKE_CXX_COMPILER
+    "riscv64-unknown-elf-g++"
+    CACHE FILEPATH ""
+)
+set(CMAKE_ASM_COMPILER
+    "riscv64-unknown-elf-gcc"
+    CACHE FILEPATH ""
+)
+set(CMAKE_AR
+    "riscv64-unknown-elf-ar"
+    CACHE FILEPATH ""
+)
+set(CMAKE_RANLIB
+    "riscv64-unknown-elf-ranlib"
+    CACHE FILEPATH ""
+)
+set(CMAKE_STRIP
+    "riscv64-unknown-elf-strip"
+    CACHE FILEPATH ""
+)
+
+set(CMAKE_EXECUTABLE_SUFFIX ".elf")
+# try_compile() can't link without crt0/specs; archive-only sidesteps that.
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
+
+# Picked baseline: rv64iafd / lp64d. Ubuntu's picolibc + libstdc++ packages
+# don't ship the rv64gc (= rv64imafdc) multilib, so this drops M (integer mul)
+# and C (compressed) but keeps double-float. -mcmodel=medany because medlow's
+# signed-32-bit-around-0 reach can't address our 0x80000000 base.
+# --specs=picolibc.specs has to appear at *compile* time too: libstdc++'s
+# <cstring>/<cassert>/<sys/types.h> need picolibc's C headers via the spec's
+# sysroot.
+add_compile_options(
+  --specs=picolibc.specs
+  -march=rv64iafd
+  -mabi=lp64d
+  -mcmodel=medany
+  -fdata-sections
+  -ffunction-sections
+  "$<$<COMPILE_LANGUAGE:CXX>:-fno-rtti;-fno-exceptions;-fno-unwind-tables>"
+)
+# -nostdlib++ drops g++'s implicit libstdc++.a (medlow-built, won't relocate at
+# 0x80000000); we only use its templates, no runtime calls. -nostartfiles drops
+# picolibc's crt0 in favour of our start.S.
+add_link_options(
+  --specs=picolibc.specs
+  -march=rv64iafd
+  -mabi=lp64d
+  -mcmodel=medany
+  -nostdlib++
+  -nostartfiles
+  "LINKER:--gc-sections"
+)
diff --git a/examples/riscv/run.sh b/examples/riscv/run.sh
index 2c207816bfc..e44f23add86 100755
--- a/examples/riscv/run.sh
+++ b/examples/riscv/run.sh
@@ -4,42 +4,52 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-# RISC-V Phase 1 smoke test driver (pytorch/executorch#18991):
-#   1. Export a tiny model to a BundledProgram (.bpte) on the x86_64 host.
-#   2. Cross-compile executor_runner for riscv64 Linux glibc.
-#   3. Invoke the runner under qemu-user-static and grep its stdout for the
-#      Test_result: PASS marker emitted by the bundled-IO comparison path.
+# RISC-V smoke test driver:
+#   1. Export a small model to a BundledProgram (.bpte) on the host.
+#   2. Cross-compile a riscv32/64 runner (linux glibc or baremetal).
+#   3. Invoke under qemu and grep stdout for the Test_result: PASS marker.
 
-set -eu
+set -euo pipefail
 
 script_dir=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)
 et_root_dir=$(realpath "${script_dir}/../..")
 
 build_only=false
-build_dir="${et_root_dir}/cmake-out-riscv"
-output_dir="${et_root_dir}/riscv_test"
-qemu="qemu-riscv64-static"
-qemu_timeout="600"
+build_dir=
+qemu_timeout="1800"
 model="add"
-xnnpack=false
+backend="portable"
+os="linux"
+arch="rv64"
+qemu_cpu_ext=""
 quantize=false
 debug_xnnpack=false
 verbose_xnnpack=false
+qemu_override=""
 
 usage() {
     cat <<EOF
 Usage: $(basename "$0") [options]
 Options:
   --model=<NAME>          Which model to export and run (default: ${model})
-  --xnnpack               Enable the XNNPACK backend (AOT partitioner + runtime)
   --quantize              Produce an 8-bit quantized model
-  --verbose-xnnpack       Build XNNPACK with XNN_LOG_LEVEL=4 to log microkernel dispatch at runtime
+  --backend=<NAME>        AOT backend (default: ${backend}):
+                           - 'portable': portable kernels only
+                           - 'xnnpack':  XNNPACK delegate (linux only)
+  --os=<NAME>             Target OS (default: ${os}):
+                           - 'linux':    glibc, qemu-user
+                           - 'baremetal': no OS, qemu-system + semihosting
+  --arch=<NAME>           Target arch (default: ${arch}):
+                           - 'rv64': riscv64
+                           - 'rv32': riscv32
+  --qemu-cpu-ext=<EXT>    QEMU -cpu extensions appended after the arch base
+                          (e.g. 'v=true,vlen=128'); no rv32/rv64 prefix.
+  --verbose-xnnpack       Build XNNPACK with XNN_LOG_LEVEL=4 to log microkernel dispatch
   --debug-xnnpack         Enable XNNPACK partitioner DEBUG logging and dump the lowered graph
   --build_only            Only export and cross-compile; do not invoke QEMU
-  --build_dir=<DIR>       CMake build directory (default: ${build_dir})
-  --output_dir=<DIR>      Directory for the exported .bpte (default: ${output_dir})
-  --qemu=<BIN>            qemu-user binary (default: ${qemu})
-  --timeout=<SECONDS>     Maximum QEMU runtime; matches run_fvp.sh --timelimit (default: ${qemu_timeout})
+  --build-dir=<DIR>       Build/output directory for this configuration (required)
+  --qemu=<BIN>            Override qemu binary
+  --timeout=<SECONDS>     Maximum QEMU runtime (default: ${qemu_timeout})
   -h, --help              Show this help
 EOF
 }
@@ -47,51 +57,125 @@ EOF
 for arg in "$@"; do
     case $arg in
         --model=*) model="${arg#*=}" ;;
-        --xnnpack) xnnpack=true ;;
         --quantize) quantize=true ;;
+        --backend=*) backend="${arg#*=}" ;;
+        --os=*) os="${arg#*=}" ;;
+        --arch=*) arch="${arg#*=}" ;;
+        --qemu-cpu-ext=*) qemu_cpu_ext="${arg#*=}" ;;
         --debug-xnnpack) debug_xnnpack=true ;;
         --verbose-xnnpack) verbose_xnnpack=true ;;
         --build_only) build_only=true ;;
-        --build_dir=*) build_dir="${arg#*=}" ;;
-        --output_dir=*) output_dir="${arg#*=}" ;;
-        --qemu=*) qemu="${arg#*=}" ;;
+        --build-dir=*) build_dir="${arg#*=}" ;;
+        --qemu=*) qemu_override="${arg#*=}" ;;
         --timeout=*) qemu_timeout="${arg#*=}" ;;
         -h|--help) usage; exit 0 ;;
         *) echo "Unknown option: $arg" >&2; usage; exit 1 ;;
     esac
 done
 
-mkdir -p "${output_dir}"
-bpte_path="${output_dir}/${model}_riscv.bpte"
+case "${backend}" in
+    portable|xnnpack) ;;
+    *) echo "Unknown backend: ${backend}" >&2; usage; exit 1 ;;
+esac
+case "${os}" in
+    linux|baremetal) ;;
+    *) echo "Unknown os: ${os}" >&2; usage; exit 1 ;;
+esac
+case "${arch}" in
+    rv32|rv64) ;;
+    *) echo "Unknown arch: ${arch}" >&2; usage; exit 1 ;;
+esac
 
-echo "[run.sh] Step 1/3: AOT export on host"
-aot_extra_args=()
-if ${xnnpack}; then
-    aot_extra_args+=(--xnnpack)
+# xnnpack needs pthreads + dynamic loading: baremetal has neither, and the
+# Ubuntu xnnpack microkernels don't ship an rv32 build.
+if [[ "${backend}" == "xnnpack" && "${os}" == "baremetal" ]]; then
+    echo "[run.sh] --backend=xnnpack requires --os=linux" >&2
+    exit 1
+fi
+if [[ "${backend}" == "xnnpack" && "${arch}" == "rv32" ]]; then
+    echo "[run.sh] --backend=xnnpack requires --arch=rv64" >&2
+    exit 1
+fi
+# Ubuntu doesn't package a riscv32-linux-gnu cross (riscv64-linux-gnu has no
+# rv32 multilib either), so rv32 linux is blocked on a custom toolchain build.
+if [[ "${arch}" == "rv32" && "${os}" == "linux" ]]; then
+    echo "[run.sh] --arch=rv32 --os=linux not supported: no riscv32-linux-gnu toolchain on Ubuntu" >&2
+    exit 1
+fi
+
+if ${debug_xnnpack} && [[ "${backend}" != "xnnpack" ]]; then
+    echo "[run.sh] --debug-xnnpack requires --backend=xnnpack" >&2
+    exit 1
 fi
+if ${verbose_xnnpack} && [[ "${backend}" != "xnnpack" ]]; then
+    echo "[run.sh] --verbose-xnnpack requires --backend=xnnpack" >&2
+    exit 1
+fi
+
+if [[ -z "${build_dir}" ]]; then
+    echo "[run.sh] --build-dir is required" >&2; usage; exit 1
+fi
+mkdir -p "${build_dir}"
+
+bpte_path="${build_dir}/model.bpte"
+
+echo "[run.sh] Step 1/3: AOT export on host (backend=${backend} os=${os} arch=${arch})"
+aot_extra_args=()
 if ${quantize}; then
     aot_extra_args+=(--quantize)
 fi
 if ${debug_xnnpack}; then
     aot_extra_args+=(--debug-xnnpack)
 fi
-python "${script_dir}/aot_riscv.py" --model "${model}" "${aot_extra_args[@]}" --output "${bpte_path}"
+python "${script_dir}/aot_riscv.py" --model "${model}" --backend "${backend}" --os "${os}" "${aot_extra_args[@]}" --output "${bpte_path}"
 
-echo "[run.sh] Step 2/3: cross-compile executor_runner for riscv64-linux"
+echo "[run.sh] Step 2/3: cross-compile executor_runner for ${arch}-${os}"
 cmake_extra_args=()
-if ${xnnpack}; then
+if [[ "${backend}" == "xnnpack" ]]; then
     cmake_extra_args+=(-DEXECUTORCH_BUILD_XNNPACK=ON)
 fi
 if ${verbose_xnnpack}; then
     cmake_extra_args+=(-DEXECUTORCH_XNNPACK_LOG_LEVEL=4 -DEXECUTORCH_BUILD_RISCV_ETDUMP=ON)
 fi
-cmake -S "${et_root_dir}" -B "${build_dir}" \
-    --preset riscv64-linux \
-    "${cmake_extra_args[@]}" \
-    -DCMAKE_BUILD_TYPE=Release
-cmake --build "${build_dir}" -j"$(nproc)" --target executor_runner
 
-runner="${build_dir}/executor_runner"
+# Map our short arch (rv32/rv64) to the canonical riscv32/riscv64 prefix used
+# by the cross toolchain and qemu binary names.
+case "${arch}" in
+    rv32) arch_long="riscv32" ;;
+    rv64) arch_long="riscv64" ;;
+esac
+
+if [[ "${os}" == "linux" ]]; then
+    build_target="executor_runner"
+    qemu_default="qemu-${arch_long}-static"
+    cmake -S "${et_root_dir}" -B "${build_dir}" --fresh \
+        --preset "${arch_long}-linux" \
+        "${cmake_extra_args[@]}" \
+        -DCMAKE_BUILD_TYPE=Release
+    cmake --build "${build_dir}" -j"$(nproc)" --target "${build_target}"
+    runner="${build_dir}/${build_target}"
+
+elif [[ "${os}" == "baremetal" ]]; then
+    build_target="executor_runner_baremetal"
+    qemu_default="qemu-system-${arch_long}"
+    # Standalone build (mirrors examples/arm/executor_runner/standalone)
+    cmake -S "${et_root_dir}/examples/riscv/baremetal" -B "${build_dir}" --fresh \
+        -DCMAKE_TOOLCHAIN_FILE=${et_root_dir}/examples/riscv/${arch_long}-unknown-elf-toolchain.cmake \
+        -DEXECUTORCH_BUILD_PRESET_FILE=${et_root_dir}/tools/cmake/preset/riscv_baremetal.cmake \
+        -DEXECUTORCH_ROOT="${et_root_dir}" \
+        -DRISCV_BAREMETAL_PTE="${bpte_path}" \
+        "${cmake_extra_args[@]}" \
+        -DCMAKE_BUILD_TYPE=Release
+    cmake --build "${build_dir}" -j"$(nproc)" --target "${build_target}"
+    runner="${build_dir}/${build_target}.elf"
+
+else
+    echo "Unknown os: ${os}" >&2
+    usage
+    exit 1
+fi
+
+qemu="${qemu_override:-${qemu_default}}"
 [[ -x "${runner}" ]] || { echo "[run.sh] runner not found at ${runner}" >&2; exit 1; }
 
 if file "${runner}" | grep -q "RISC-V"; then
@@ -113,45 +197,75 @@ hash "${qemu}" 2>/dev/null || {
     exit 1
 }
 
-# QEMU_LD_PREFIX points qemu-user at the riscv64 sysroot so the dynamic
-# linker (ld-linux-riscv64-lp64d.so.1) referenced in the ELF resolves.
-export QEMU_LD_PREFIX="${QEMU_LD_PREFIX:-/usr/riscv64-linux-gnu}"
+log_file="${build_dir}/run.log"
+rm -f "${log_file}"
 
-if [[ -n "${QEMU_CPU+x}" ]]; then
-    echo "[run.sh] QEMU_CPU=${QEMU_CPU}"
+# Compose the QEMU -cpu value once: ${arch} alone, or ${arch},${ext} when an
+# extension list was supplied. qemu-user reads $QEMU_CPU; qemu-system takes
+# -cpu on the command line.
+qemu_cpu="${arch}"
+if [[ -n "${qemu_cpu_ext}" ]]; then
+    qemu_cpu="${arch},${qemu_cpu_ext}"
 fi
+echo "[run.sh] qemu -cpu = ${qemu_cpu}"
 
-runner_extra_args=()
-if ${quantize}; then
-    runner_extra_args+=(--bundleio_rtol=0.1 --bundleio_atol=0.25)
-fi
-etdump_path=""
-if ${verbose_xnnpack}; then
-    etdump_path="${output_dir}/${model}_riscv.etdump"
-    rm -f "${etdump_path}"
-    runner_extra_args+=(--etdump_path="${etdump_path}")
-fi
+if [[ "${os}" == "linux" ]]; then
+    # QEMU_LD_PREFIX points qemu-user at the cross sysroot so the dynamic
+    # linker (ld-linux-riscv*) referenced in the ELF resolves.
+    if [[ "${arch}" == "rv64" ]]; then
+        export QEMU_LD_PREFIX="${QEMU_LD_PREFIX:-/usr/riscv64-linux-gnu}"
+    else
+        export QEMU_LD_PREFIX="${QEMU_LD_PREFIX:-/usr/riscv32-linux-gnu}"
+    fi
+    export QEMU_CPU="${qemu_cpu}"
 
-# etdump_summary.py reads the XNN_LOG_LEVEL=4 registrations.
-log_file="${output_dir}/${model}_riscv.run.log"
-rm -f "${log_file}"
+    runner_extra_args=()
+    if ${quantize}; then
+        runner_extra_args+=(--bundleio_rtol=0.1 --bundleio_atol=0.25)
+    fi
+    etdump_path=""
+    if ${verbose_xnnpack}; then
+        etdump_path="${build_dir}/run.etdump"
+        rm -f "${etdump_path}"
+        runner_extra_args+=(--etdump_path="${etdump_path}")
+    fi
 
-set +e
-timeout --signal=KILL "${qemu_timeout}" "${qemu}" "${runner}" \
-    --model_path="${bpte_path}" \
-    "${runner_extra_args[@]}" \
-    2>&1 | tee "${log_file}"
-qemu_status=${PIPESTATUS[0]}
-set -e
+    set +e
+    timeout --signal=KILL "${qemu_timeout}" "${qemu}" "${runner}" \
+        --model_path="${bpte_path}" \
+        "${runner_extra_args[@]}" \
+      |& tee "${log_file}"
+    qemu_status=${PIPESTATUS[0]}
+    set -e
 
-echo "[run.sh] qemu exit status: ${qemu_status}"
+    if [[ -n "${etdump_path}" && -f "${etdump_path}" ]]; then
+        python "${script_dir}/etdump_summary.py" "${etdump_path}" \
+            --run-log "${log_file}" \
+            --json "${etdump_path}.json" || true
+    fi
+
+elif [[ "${os}" == "baremetal" ]]; then
+    # qemu-system -machine virt boots at 0x80000000; -bios none skips OpenSBI;
+    # semihosting target=native routes SYS_WRITE0/SYS_EXIT to host stdio.
+    # For deeper debugging, add: -accel tcg,one-insn-per-tb=on -d in_asm,nochain
+    #                            -D <trace.log>
+    set +e
+    timeout --signal=KILL "${qemu_timeout}" "${qemu}" \
+        -machine virt -cpu "${qemu_cpu}" -m 512M -nographic -bios none \
+        -semihosting-config enable=on,target=native \
+        -kernel "${runner}" \
+      |& tee "${log_file}"
+    qemu_status=${PIPESTATUS[0]}
+    set -e
 
-if [[ -n "${etdump_path}" && -f "${etdump_path}" ]]; then
-    python "${script_dir}/etdump_summary.py" "${etdump_path}" \
-        --run-log "${log_file}" \
-        --json "${etdump_path}.json" || true
+else
+    echo "Unknown os: ${os}" >&2
+    usage
+    exit 1
 fi
 
+echo "[run.sh] qemu exit status: ${qemu_status}"
+
 if grep -q "Test_result: PASS" "${log_file}"; then
     echo "[run.sh] Bundled I/O check PASSED"
     exit 0
diff --git a/examples/riscv/setup-baremetal.sh b/examples/riscv/setup-baremetal.sh
new file mode 100755
index 00000000000..f94a11388a8
--- /dev/null
+++ b/examples/riscv/setup-baremetal.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+# Copyright 2026 The ExecuTorch Authors.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Host tooling for the RISC-V smoke tests. Targets Ubuntu 26.04: that's where
+# libstdc++-riscv64-unknown-elf-picolibc was first packaged, and the baremetal
+# build chain needs C++ stdlib headers paired with picolibc.
+
+set -euo pipefail
+
+script_dir=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)
+
+if ! command -v apt-get >/dev/null 2>&1; then
+    echo "[$(basename "$0")] this setup script targets Debian/Ubuntu (apt-get not found)" >&2
+    exit 1
+fi
+
+SUDO=""
+if [[ $EUID -ne 0 ]]; then
+    SUDO="sudo"
+fi
+
+${SUDO} apt-get update
+${SUDO} apt-get install -y --no-install-recommends \
+    build-essential \
+    gcc-riscv64-linux-gnu \
+    g++-riscv64-linux-gnu \
+    binutils-riscv64-linux-gnu \
+    libc6-riscv64-cross \
+    libc6-dev-riscv64-cross \
+    gcc-riscv64-unknown-elf \
+    picolibc-riscv64-unknown-elf \
+    libstdc++-riscv64-unknown-elf-picolibc \
+    cmake \
+    file \
+    ca-certificates \
+    qemu-user \
+    qemu-system-riscv \
+    libglib2.0-0t64 \
+    libxcb1 \
+    libgl1
+
+riscv64-linux-gnu-gcc --version | head -n1
+qemu-riscv64 --version | head -n1
+
+# Some python packages also need to be installed
+pip install -r "${script_dir}/requirements.txt"
diff --git a/examples/riscv/setup.sh b/examples/riscv/setup-linux.sh
similarity index 90%
rename from examples/riscv/setup.sh
rename to examples/riscv/setup-linux.sh
index 48d5ed27642..03206d9305c 100755
--- a/examples/riscv/setup.sh
+++ b/examples/riscv/setup-linux.sh
@@ -8,7 +8,7 @@
 # - gcc/g++/binutils for riscv64-linux-gnu (cross-compiler + sysroot)
 # - qemu-user-static (qemu-riscv64 user-mode emulator)
 
-set -eu
+set -euo pipefail
 
 script_dir=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)
 
@@ -22,6 +22,13 @@ if [[ $EUID -ne 0 ]]; then
     SUDO="sudo"
 fi
 
+source /etc/os-release
+
+GCC_VERSION=""
+if [[ "${VERSION_ID:-}" == "24.04" ]]; then
+    GCC_VERSION="14"
+fi
+
 ${SUDO} apt-get update
 ${SUDO} apt-get install -y --no-install-recommends \
     build-essential \
@@ -44,7 +51,7 @@ if [[ -n "${GCC_VERSION+x}" ]]; then
 fi
 
 riscv64-linux-gnu-gcc --version | head -n1
-qemu-riscv64-static --version | head -n1
+qemu-riscv64 --version | head -n1
 
 # Some python packages also need to be installed
 pip install -r "${script_dir}/requirements.txt"
diff --git a/examples/riscv/test-matrix.sh b/examples/riscv/test-matrix.sh
new file mode 100644
index 00000000000..93c09d1976d
--- /dev/null
+++ b/examples/riscv/test-matrix.sh
@@ -0,0 +1,250 @@
+#!/usr/bin/env bash
+# Copyright 2026 The ExecuTorch Authors.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# Local mirror of riscv64.yml's matrix using two docker containers:
+#
+#   - executorch-riscv-linux (ubuntu:24.04 + gcc-14).
+#   - executorch-riscv-baremetal (ubuntu:26.04 + gcc-15).
+#     26.04 is the only release shipping libstdc++-riscv64-unknown-elf-picolibc.
+#
+# Usage:
+#   examples/riscv/test-matrix.sh                    # full sweep
+#   examples/riscv/test-matrix.sh --model=mv2        # one model, all configs
+#   examples/riscv/test-matrix.sh --os=baremetal     # one OS
+#   examples/riscv/test-matrix.sh --quantize-only    # skip the no-q half
+#   examples/riscv/test-matrix.sh --setup-only       # bootstrap containers, don't run
+#
+# Re-runs are cheap when the per-cell build dirs survive (set --keep-build).
+
+set -euo pipefail
+
+script_dir=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)
+et_root_dir=$(realpath "${script_dir}/../..")
+
+model_filter=""
+os_filter=""
+arch_filter=""
+variant_filter=""
+backend_filter=""
+quantize_mode="both"   # both | only | none
+setup_only=false
+keep_build=false
+
+usage() {
+    cat <<EOF
+Usage: $(basename "$0") [options]
+Options:
+  --model=<NAME>     Only run cells for this model
+  --os=<linux|baremetal>
+  --arch=<rv64|rv32>
+  --backend=<portable|xnnpack>
+  --variant=<scalar|rvv>
+  --quantize-only    Skip the non-quantized cells
+  --no-quantize      Skip the quantized cells
+  --setup-only       Make sure both containers are ready, then exit
+  --keep-build       Reuse riscv_test/<cell> dirs instead of starting fresh
+  -h, --help
+EOF
+}
+
+for arg in "$@"; do
+    case $arg in
+        --model=*)     model_filter="${arg#*=}"   ;;
+        --os=*)        os_filter="${arg#*=}"      ;;
+        --arch=*)      arch_filter="${arg#*=}"    ;;
+        --backend=*)   backend_filter="${arg#*=}" ;;
+        --variant=*)   variant_filter="${arg#*=}" ;;
+        --quantize-only) quantize_mode="only"     ;;
+        --no-quantize)   quantize_mode="none"     ;;
+        --setup-only)  setup_only=true            ;;
+        --keep-build)  keep_build=true            ;;
+        -h|--help)     usage; exit 0              ;;
+        *)             echo "Unknown: $arg" >&2; usage; exit 1 ;;
+    esac
+done
+
+# Container names + image tags match what the CI workflow consumes.
+LINUX_CTR=executorch-riscv-linux
+BAREMETAL_CTR=executorch-riscv-baremetal
+
+# `add`/`mv2`/`resnet18` are the only models with XNNPACK quantization recipes
+# in MODEL_NAME_TO_OPTIONS — others raise at AOT time when --quantize is set.
+QUANTIZED_MODELS="mv2 resnet18"
+ALL_MODELS="add mv2 resnet18 mobilebert llama2 yolo26"
+ALL_BACKENDS="portable xnnpack"
+
+# qemu-cpu-ext sweeps; keep parity with the JSON arrays in riscv64.yml.
+SCALAR_EXT="zba=true,zbb=true,zbs=true,v=false"
+RVV_EXT="zba=true,zbb=true,zbs=true,v=true,vlen=128,vext_spec=v1.0"
+
+# Check if a cell combination should be excluded (matching riscv64.yml excludes)
+should_exclude() {
+    local os=$1 arch=$2 backend=$3 variant=$4 model=$5 quantize=$6
+
+    # Disable quantization testing with Portable Kernels
+    if [[ "${backend}" == "portable" && "${quantize}" == "true" ]]; then
+        return 0
+    fi
+    # XNNPACK needs pthreads + dynamic loading (no baremetal)
+    if [[ "${backend}" == "xnnpack" && "${os}" == "baremetal" ]]; then
+        return 0
+    fi
+    # XNNPACK needs RVV
+    if [[ "${backend}" == "xnnpack" && "${variant}" == "scalar" ]]; then
+        return 0
+    fi
+    # No quantization recipe for Yolo26
+    if [[ "${model}" == "yolo26" && "${quantize}" == "true" ]]; then
+        return 0
+    fi
+    # No riscv32-linux-gnu cross is packaged on Ubuntu
+    if [[ "${os}" == "linux" && "${arch}" == "rv32" ]]; then
+        return 0
+    fi
+
+    return 1
+}
+
+# ---- container bootstrap (idempotent) -------------------------------------
+
+ensure_linux() {
+    if ! docker ps -a --format '{{.Names}}' | grep -qx "${LINUX_CTR}"; then
+        echo "[matrix] starting ${LINUX_CTR} (ubuntu:24.04)"
+        docker run -d --name "${LINUX_CTR}" \
+            -e DEBIAN_FRONTEND=noninteractive \
+            -v "${et_root_dir}":/executorch -w /executorch \
+            ubuntu:24.04 sleep infinity >/dev/null
+    fi
+    docker start "${LINUX_CTR}" >/dev/null
+    if ! docker exec "${LINUX_CTR}" test -d /executorch/.venv-docker-linux; then
+        echo "[matrix] bootstrapping ${LINUX_CTR} (this takes a few minutes)"
+        docker exec "${LINUX_CTR}" bash -eu -c '
+            set -e
+            apt-get update -qq && apt-get install -y -qq --no-install-recommends \
+                python3 python3-pip ca-certificates sudo
+            python3 -m pip install --break-system-packages --quiet uv
+            uv python install 3.10
+            cd /executorch
+            uv venv --python 3.10 --seed .venv-docker-linux
+        '
+    fi
+    docker exec "${LINUX_CTR}" bash -eu -c '
+        set -e
+        cd /executorch
+        source .venv-docker-linux/bin/activate
+        pip install --upgrade pip
+        pip install executorch
+        bash examples/riscv/setup-linux.sh
+    '
+}
+
+ensure_baremetal() {
+    if ! docker ps -a --format '{{.Names}}' | grep -qx "${BAREMETAL_CTR}"; then
+        echo "[matrix] starting ${BAREMETAL_CTR} (ubuntu:26.04)"
+        docker run -d --name "${BAREMETAL_CTR}" \
+            -e DEBIAN_FRONTEND=noninteractive \
+            -v "${et_root_dir}":/executorch -w /executorch \
+            ubuntu:26.04 sleep infinity >/dev/null
+    fi
+    docker start "${BAREMETAL_CTR}" >/dev/null
+    if ! docker exec "${BAREMETAL_CTR}" test -d /executorch/.venv-docker-baremetal; then
+        echo "[matrix] bootstrapping ${BAREMETAL_CTR} (this takes a few minutes)"
+        docker exec "${BAREMETAL_CTR}" bash -eu -c '
+            set -e
+            apt-get update -qq && apt-get install -y -qq --no-install-recommends \
+                python3 python3-pip ca-certificates sudo
+            python3 -m pip install --break-system-packages --quiet uv
+            uv python install 3.10
+            cd /executorch
+            uv venv --python 3.10 --seed .venv-docker-baremetal
+        '
+    fi
+    docker exec "${BAREMETAL_CTR}" bash -eu -c '
+        set -e
+        cd /executorch
+        source .venv-docker-baremetal/bin/activate
+        pip install --upgrade pip
+        pip install executorch
+        bash examples/riscv/setup-baremetal.sh
+    '
+}
+
+ensure_linux
+ensure_baremetal
+if ${setup_only}; then exit 0; fi
+
+# ---- one cell --------------------------------------------------------------
+
+# Args: ctr venv os arch backend variant ext model quantize_flag
+run_cell() {
+    local ctr=$1 venv=$2 os=$3 arch=$4 backend=$5 variant=$6 ext=$7 model=$8 q=$9
+    local cell="${model}${q:++q}-${backend}/${os}-${arch}"
+    local model_q="${model}${q:+-q}"
+    local variant_slug="${ext//,/_}"; variant_slug="${variant_slug//=/_}"; variant_slug="${variant_slug:-base}"
+    local build_dir="/executorch/riscv_test/${model_q}/${backend}/${os}-${arch}-${variant_slug}"
+    if ! ${keep_build}; then
+        docker exec "${ctr}" rm -rf "${build_dir}"
+    fi
+    if docker exec "${ctr}" bash -lc "
+            cd /executorch && source ${venv}/bin/activate &&
+            timeout 1800 bash -eu examples/riscv/run.sh \
+              --model=${model} ${q} --backend=${backend} \
+              --os=${os} --arch=${arch} \
+              --qemu-cpu-ext='${ext}' \
+              --build-dir=${build_dir} --timeout=900
+        "; then
+        echo "  PASS  ${cell}"
+        return 0
+    else
+        echo "  FAIL  ${cell}"
+        return 1
+    fi
+}
+
+# ---- iterate ---------------------------------------------------------------
+
+passed=0; total=0
+for os_arch in "linux:rv64" "baremetal:rv64" "baremetal:rv32"; do
+    os="${os_arch%%:*}"; arch="${os_arch##*:}"
+    if [[ -n "${os_filter}" && "${os}" != "${os_filter}" ]]; then continue; fi
+    if [[ -n "${arch_filter}" && "${arch}" != "${arch_filter}" ]]; then continue; fi
+    if [[ "${os}" == "linux" ]]; then ctr="${LINUX_CTR}"; venv=/executorch/.venv-docker-linux;
+    else                              ctr="${BAREMETAL_CTR}"; venv=/executorch/.venv-docker-baremetal; fi
+
+    for variant_lbl in "scalar:${SCALAR_EXT}" "rvv:${RVV_EXT}"; do
+        variant="${variant_lbl%%:*}"; ext="${variant_lbl#*:}"
+        if [[ -n "${variant_filter}" && "${variant}" != "${variant_filter}" ]]; then continue; fi
+
+        for backend in ${ALL_BACKENDS}; do
+            if [[ -n "${backend_filter}" && "${backend}" != "${backend_filter}" ]]; then continue; fi
+
+            # non-quantized models
+            if [[ "${quantize_mode}" != "only" ]]; then
+                for m in ${ALL_MODELS}; do
+                    if [[ -n "${model_filter}" && "${m}" != "${model_filter}" ]]; then continue; fi
+                    if should_exclude "${os}" "${arch}" "${backend}" "${variant}" "${m}" "false"; then continue; fi
+                    total=$((total+1))
+                    run_cell "${ctr}" "${venv}" "${os}" "${arch}" "${backend}" "${variant}" "${ext}" "${m}" "" \
+                        && passed=$((passed+1)) || exit 1
+                done
+            fi
+            # quantized — only the 3 models with XNNPACK recipes
+            if [[ "${quantize_mode}" != "none" ]]; then
+                for m in ${QUANTIZED_MODELS}; do
+                    if [[ -n "${model_filter}" && "${m}" != "${model_filter}" ]]; then continue; fi
+                    if should_exclude "${os}" "${arch}" "${backend}" "${variant}" "${m}" "true"; then continue; fi
+                    total=$((total+1))
+                    run_cell "${ctr}" "${venv}" "${os}" "${arch}" "${backend}" "${variant}" "${ext}" "${m}" "--quantize" \
+                        && passed=$((passed+1)) || exit 1
+                done
+            fi
+        done
+    done
+done
+
+echo ""
+echo "===== ${passed}/${total} cells passed ====="
+test "${passed}" -eq "${total}"
diff --git a/tools/cmake/preset/riscv_baremetal.cmake b/tools/cmake/preset/riscv_baremetal.cmake
new file mode 100644
index 00000000000..e70fc57ba57
--- /dev/null
+++ b/tools/cmake/preset/riscv_baremetal.cmake
@@ -0,0 +1,50 @@
+# Copyright 2026 The ExecuTorch Authors.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Baremetal builds consume the build tree directly; mirror arm_baremetal so
+# install rules stay invokable but write back into the build dir.
+define_overridable_option(
+  EXECUTORCH_BAREMETAL_SKIP_INSTALL
+  "Skip emitting install/export rules when building bare-metal artifacts" BOOL
+  ON
+)
+
+if(EXECUTORCH_BAREMETAL_SKIP_INSTALL)
+  set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}")
+  unset(CMAKE_SKIP_INSTALL_RULES CACHE)
+  set(CMAKE_SKIP_INSTALL_RULES
+      OFF
+      CACHE
+        BOOL
+        "Retain install() rules so docs/scripts can keep calling --target install"
+        FORCE
+  )
+endif()
+
+set_overridable_option(EXECUTORCH_BUILD_EXECUTOR_RUNNER OFF)
+set_overridable_option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER OFF)
+set_overridable_option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR OFF)
+set_overridable_option(EXECUTORCH_BUILD_EXTENSION_EVALUE_UTIL ON)
+set_overridable_option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL ON)
+set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON)
+# BUNDLE_IO requires DEVTOOLS to provide the bundled_program lib.
+set_overridable_option(EXECUTORCH_BUILD_DEVTOOLS ON)
+set_overridable_option(EXECUTORCH_ENABLE_BUNDLE_IO ON)
+set_overridable_option(EXECUTORCH_ENABLE_LOGGING ON)
+# Freestanding target: no pthreadpool, no cpuinfo, no shared lib.
+set_overridable_option(EXECUTORCH_BUILD_PTHREADPOOL OFF)
+set_overridable_option(EXECUTORCH_BUILD_CPUINFO OFF)
+
+define_overridable_option(
+  EXECUTORCH_BUILD_RISCV_ETDUMP "Build etdump support for RISC-V" BOOL OFF
+)
+
+if("${EXECUTORCH_BUILD_RISCV_ETDUMP}")
+  set(EXECUTORCH_BUILD_DEVTOOLS ON)
+  set(EXECUTORCH_ENABLE_EVENT_TRACER ON)
+  set(FLATCC_ALLOW_WERROR OFF)
+else()
+  set(EXECUTORCH_ENABLE_EVENT_TRACER OFF)
+endif()
diff --git a/tools/cmake/preset/riscv64_linux.cmake b/tools/cmake/preset/riscv_linux.cmake
similarity index 100%
rename from tools/cmake/preset/riscv64_linux.cmake
rename to tools/cmake/preset/riscv_linux.cmake

From 0df077d96ae296e5e83c1a1fda82915bd639d15d Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Mon, 1 Jun 2026 21:39:05 +0200
Subject: [PATCH 097/103] Fix based on Claude's review

---
 .github/workflows/riscv64.yml           | 2 +-
 examples/riscv/README.md                | 4 ++--
 examples/riscv/baremetal/CMakeLists.txt | 2 +-
 examples/riscv/run.sh                   | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml
index d6109a47305..9331fc35508 100644
--- a/.github/workflows/riscv64.yml
+++ b/.github/workflows/riscv64.yml
@@ -12,7 +12,7 @@ on:
       - .github/workflows/riscv64.yml
       - .github/workflows/_test_riscv.yml
       - .ci/scripts/test_riscv_qemu.sh
-      - tools/cmake/preset/riscv64_*.cmake
+      - tools/cmake/preset/riscv_*.cmake
       - examples/riscv/**
   workflow_dispatch:
   schedule:
diff --git a/examples/riscv/README.md b/examples/riscv/README.md
index 2c250f75cd7..3ae8a151f24 100644
--- a/examples/riscv/README.md
+++ b/examples/riscv/README.md
@@ -20,7 +20,7 @@ examples/riscv/run.sh               # export, cross-compile, run under qemu
 | `--quantize` | flag | off | XNNPACK quantizer (requires `--backend=xnnpack`) |
 | `--backend=<N>` | `portable`, `xnnpack` | `portable` | xnnpack is linux-only |
 | `--os=<N>` | `linux`, `baremetal` | `linux` | qemu-user vs qemu-system + semihosting |
-| `--arch=<N>` | `rv64` | `rv64` | (rv32 follow-up; no `riscv32-linux-gnu` cross is packaged on Ubuntu) |
+| `--arch=<N>` | `rv32`, `rv64` | `rv64` | valid <os>-<arch> pairs are `linux-rv64`, `baremetal-rv32`, `baremetal-rv64` |
 | `--qemu-cpu-ext=<S>` | e.g. `v=true,vlen=128` | empty | extensions appended after the arch base |
 
 ## Pipelines
@@ -33,4 +33,4 @@ The baremetal runner embeds the `.bpte` directly in `.rodata` via the same `exam
 
 ## CI
 
-`.github/workflows/riscv64.yml` is the entry point; it fans out into `_test_riscv.yml` over a `(model, backend, os, arch, quantize)` matrix and sweeps `qemu-cpu-ext` per backend. Runs on the `executorch-ubuntu-26.04-gcc15` docker image (needed for the `riscv64-unknown-elf` picolibc + libstdc++ packages - see [setup.sh](setup.sh)).
+`.github/workflows/riscv64.yml` is the entry point; it fans out into `_test_riscv.yml` over a `(model, backend, os, arch, quantize)` matrix and sweeps `qemu-cpu-ext` per backend. Runs on the `executorch-ubuntu-26.04-gcc15` docker image (needed for the `riscv64-unknown-elf` picolibc + libstdc++ packages - see [setup-linux.sh](setup-linux.sh) or [setup-baremetal.sh](setup-baremetal.sh)).
diff --git a/examples/riscv/baremetal/CMakeLists.txt b/examples/riscv/baremetal/CMakeLists.txt
index b7765c4e3a1..b0208e41d2b 100644
--- a/examples/riscv/baremetal/CMakeLists.txt
+++ b/examples/riscv/baremetal/CMakeLists.txt
@@ -46,7 +46,7 @@ endif()
 include("${EXECUTORCH_ROOT}/tools/cmake/common/preset.cmake")
 if(NOT DEFINED EXECUTORCH_BUILD_PRESET_FILE)
   set(EXECUTORCH_BUILD_PRESET_FILE
-      "${EXECUTORCH_ROOT}/tools/cmake/preset/riscv64_baremetal.cmake"
+      "${EXECUTORCH_ROOT}/tools/cmake/preset/riscv_baremetal.cmake"
       CACHE PATH "Preset used when configuring the standalone baremetal runner"
   )
 endif()
diff --git a/examples/riscv/run.sh b/examples/riscv/run.sh
index e44f23add86..0635bfedb4e 100755
--- a/examples/riscv/run.sh
+++ b/examples/riscv/run.sh
@@ -193,7 +193,7 @@ fi
 
 echo "[run.sh] Step 3/3: run under ${qemu}"
 hash "${qemu}" 2>/dev/null || {
-    echo "[run.sh] ERROR: ${qemu} not found on PATH; install with examples/riscv/setup.sh" >&2
+    echo "[run.sh] ERROR: ${qemu} not found on PATH; install with examples/riscv/setup-${os}.sh" >&2
     exit 1
 }
 

From cfd9b52cb319334b4dfb26f76bdbd463a50af0d5 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Mon, 1 Jun 2026 21:41:07 +0200
Subject: [PATCH 098/103] Fix qemu-riscv64-static live check

---
 examples/riscv/setup-linux.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/riscv/setup-linux.sh b/examples/riscv/setup-linux.sh
index 03206d9305c..bef4408ad56 100755
--- a/examples/riscv/setup-linux.sh
+++ b/examples/riscv/setup-linux.sh
@@ -51,7 +51,7 @@ if [[ -n "${GCC_VERSION+x}" ]]; then
 fi
 
 riscv64-linux-gnu-gcc --version | head -n1
-qemu-riscv64 --version | head -n1
+qemu-riscv64-static --version | head -n1
 
 # Some python packages also need to be installed
 pip install -r "${script_dir}/requirements.txt"

From 66edf4edf7134ac39ec0449662cb84e84551f24b Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 2 Jun 2026 01:10:23 +0200
Subject: [PATCH 099/103] Use GCC 14 for host compiler as well

sentencepiece fails to compile on GCC 15 due to missing #include <cstdint>
---
 examples/riscv/setup-baremetal.sh | 20 ++++++++++++++++++--
 examples/riscv/setup-linux.sh     |  6 +++++-
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/examples/riscv/setup-baremetal.sh b/examples/riscv/setup-baremetal.sh
index f94a11388a8..f96e8c75032 100755
--- a/examples/riscv/setup-baremetal.sh
+++ b/examples/riscv/setup-baremetal.sh
@@ -22,11 +22,20 @@ if [[ $EUID -ne 0 ]]; then
     SUDO="sudo"
 fi
 
+source /etc/os-release
+
+GCC_VERSION=""
+if [[ "${VERSION_ID:-}" == "24.04" || "${VERSION_ID:-}" == "26.04" ]]; then
+    GCC_VERSION="14"
+fi
+
 ${SUDO} apt-get update
 ${SUDO} apt-get install -y --no-install-recommends \
     build-essential \
-    gcc-riscv64-linux-gnu \
-    g++-riscv64-linux-gnu \
+    gcc${GCC_VERSION:+-${GCC_VERSION}} \
+    g++${GCC_VERSION:+-${GCC_VERSION}} \
+    gcc${GCC_VERSION:+-${GCC_VERSION}}-riscv64-linux-gnu \
+    g++${GCC_VERSION:+-${GCC_VERSION}}-riscv64-linux-gnu \
     binutils-riscv64-linux-gnu \
     libc6-riscv64-cross \
     libc6-dev-riscv64-cross \
@@ -42,6 +51,13 @@ ${SUDO} apt-get install -y --no-install-recommends \
     libxcb1 \
     libgl1
 
+if [[ -n "${GCC_VERSION+x}" ]]; then
+    ${SUDO} update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc${GCC_VERSION:+-${GCC_VERSION}} 100
+    ${SUDO} update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++${GCC_VERSION:+-${GCC_VERSION}} 100
+    ${SUDO} update-alternatives --install /usr/bin/riscv64-linux-gnu-gcc riscv64-linux-gnu-gcc /usr/bin/riscv64-linux-gnu-gcc${GCC_VERSION:+-${GCC_VERSION}} 100
+    ${SUDO} update-alternatives --install /usr/bin/riscv64-linux-gnu-g++ riscv64-linux-gnu-g++ /usr/bin/riscv64-linux-gnu-g++${GCC_VERSION:+-${GCC_VERSION}} 100
+fi
+
 riscv64-linux-gnu-gcc --version | head -n1
 qemu-riscv64 --version | head -n1
 
diff --git a/examples/riscv/setup-linux.sh b/examples/riscv/setup-linux.sh
index bef4408ad56..912557e3bfb 100755
--- a/examples/riscv/setup-linux.sh
+++ b/examples/riscv/setup-linux.sh
@@ -25,13 +25,15 @@ fi
 source /etc/os-release
 
 GCC_VERSION=""
-if [[ "${VERSION_ID:-}" == "24.04" ]]; then
+if [[ "${VERSION_ID:-}" == "24.04" || "${VERSION_ID:-}" == "26.04" ]]; then
     GCC_VERSION="14"
 fi
 
 ${SUDO} apt-get update
 ${SUDO} apt-get install -y --no-install-recommends \
     build-essential \
+    gcc${GCC_VERSION:+-${GCC_VERSION}} \
+    g++${GCC_VERSION:+-${GCC_VERSION}} \
     gcc${GCC_VERSION:+-${GCC_VERSION}}-riscv64-linux-gnu \
     g++${GCC_VERSION:+-${GCC_VERSION}}-riscv64-linux-gnu \
     binutils-riscv64-linux-gnu \
@@ -46,6 +48,8 @@ ${SUDO} apt-get install -y --no-install-recommends \
     libgl1
 
 if [[ -n "${GCC_VERSION+x}" ]]; then
+    ${SUDO} update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc${GCC_VERSION:+-${GCC_VERSION}} 100
+    ${SUDO} update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++${GCC_VERSION:+-${GCC_VERSION}} 100
     ${SUDO} update-alternatives --install /usr/bin/riscv64-linux-gnu-gcc riscv64-linux-gnu-gcc /usr/bin/riscv64-linux-gnu-gcc${GCC_VERSION:+-${GCC_VERSION}} 100
     ${SUDO} update-alternatives --install /usr/bin/riscv64-linux-gnu-g++ riscv64-linux-gnu-g++ /usr/bin/riscv64-linux-gnu-g++${GCC_VERSION:+-${GCC_VERSION}} 100
 fi

From ba2281ec6c65da12361a4ac8fa80a5bef091c8a5 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 2 Jun 2026 11:21:28 +0200
Subject: [PATCH 100/103] Fix unecessary change

---
 .ci/scripts/setup-linux.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/scripts/setup-linux.sh b/.ci/scripts/setup-linux.sh
index 275a93d797e..feb8a128b17 100755
--- a/.ci/scripts/setup-linux.sh
+++ b/.ci/scripts/setup-linux.sh
@@ -5,7 +5,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-set -eu
+set -exu
 
 # shellcheck source=/dev/null
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

From 89fdf663e10e3cc3b0051e4e78617712e9175139 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 2 Jun 2026 11:22:59 +0200
Subject: [PATCH 101/103] Add testing on RVV on Portable Backend

---
 .github/workflows/riscv64.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml
index 9331fc35508..f2010b86fe5 100644
--- a/.github/workflows/riscv64.yml
+++ b/.github/workflows/riscv64.yml
@@ -71,7 +71,10 @@ jobs:
               "v=true,vext_spec=v1.0,vlen=512"
             ]',
             '[
-              "v=false"
+              "v=false",
+              "v=true,vext_spec=v1.0,vlen=128",
+              "v=true,vext_spec=v1.0,vlen=256",
+              "v=true,vext_spec=v1.0,vlen=512"
             ]'
           )
         }}

From 7dc53a1bf03d2c273db8948eb693e26fcfde1549 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 2 Jun 2026 11:39:29 +0200
Subject: [PATCH 102/103] Add rvv128, rvv256, and rvv512 testing in
 test-matrix.sh

---
 examples/riscv/test-matrix.sh | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/examples/riscv/test-matrix.sh b/examples/riscv/test-matrix.sh
index 93c09d1976d..084b2eea308 100644
--- a/examples/riscv/test-matrix.sh
+++ b/examples/riscv/test-matrix.sh
@@ -41,7 +41,7 @@ Options:
   --os=<linux|baremetal>
   --arch=<rv64|rv32>
   --backend=<portable|xnnpack>
-  --variant=<scalar|rvv>
+  --variant=<scalar|rvv128|rvv256|rvv512>
   --quantize-only    Skip the non-quantized cells
   --no-quantize      Skip the quantized cells
   --setup-only       Make sure both containers are ready, then exit
@@ -77,8 +77,10 @@ ALL_MODELS="add mv2 resnet18 mobilebert llama2 yolo26"
 ALL_BACKENDS="portable xnnpack"
 
 # qemu-cpu-ext sweeps; keep parity with the JSON arrays in riscv64.yml.
-SCALAR_EXT="zba=true,zbb=true,zbs=true,v=false"
-RVV_EXT="zba=true,zbb=true,zbs=true,v=true,vlen=128,vext_spec=v1.0"
+SCALAR_EXT="v=false"
+RVV128_EXT="v=true,vext_spec=v1.0,vlen=128"
+RVV256_EXT="v=true,vext_spec=v1.0,vlen=256"
+RVV512_EXT="v=true,vext_spec=v1.0,vlen=512"
 
 # Check if a cell combination should be excluded (matching riscv64.yml excludes)
 should_exclude() {
@@ -214,7 +216,7 @@ for os_arch in "linux:rv64" "baremetal:rv64" "baremetal:rv32"; do
     if [[ "${os}" == "linux" ]]; then ctr="${LINUX_CTR}"; venv=/executorch/.venv-docker-linux;
     else                              ctr="${BAREMETAL_CTR}"; venv=/executorch/.venv-docker-baremetal; fi
 
-    for variant_lbl in "scalar:${SCALAR_EXT}" "rvv:${RVV_EXT}"; do
+    for variant_lbl in "scalar:${SCALAR_EXT}" "rvv128:${RVV128_EXT}" "rvv256:${RVV256_EXT}" "rvv512:${RVV512_EXT}"; do
         variant="${variant_lbl%%:*}"; ext="${variant_lbl#*:}"
         if [[ -n "${variant_filter}" && "${variant}" != "${variant_filter}" ]]; then continue; fi
 

From 4b616c0395be8583a3e681051bc4a61a55ddc043 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 2 Jun 2026 13:20:16 +0200
Subject: [PATCH 103/103] Run all models with quantization (except excluded)

---
 examples/riscv/test-matrix.sh | 85 +++++++++++++++--------------------
 1 file changed, 37 insertions(+), 48 deletions(-)

diff --git a/examples/riscv/test-matrix.sh b/examples/riscv/test-matrix.sh
index 084b2eea308..9ed8115de44 100644
--- a/examples/riscv/test-matrix.sh
+++ b/examples/riscv/test-matrix.sh
@@ -29,7 +29,7 @@ os_filter=""
 arch_filter=""
 variant_filter=""
 backend_filter=""
-quantize_mode="both"   # both | only | none
+quantize_filter=""
 setup_only=false
 keep_build=false
 
@@ -42,8 +42,7 @@ Options:
   --arch=<rv64|rv32>
   --backend=<portable|xnnpack>
   --variant=<scalar|rvv128|rvv256|rvv512>
-  --quantize-only    Skip the non-quantized cells
-  --no-quantize      Skip the quantized cells
+  --quantize=<yes,no>
   --setup-only       Make sure both containers are ready, then exit
   --keep-build       Reuse riscv_test/<cell> dirs instead of starting fresh
   -h, --help
@@ -52,16 +51,15 @@ EOF
 
 for arg in "$@"; do
     case $arg in
-        --model=*)     model_filter="${arg#*=}"   ;;
-        --os=*)        os_filter="${arg#*=}"      ;;
-        --arch=*)      arch_filter="${arg#*=}"    ;;
-        --backend=*)   backend_filter="${arg#*=}" ;;
-        --variant=*)   variant_filter="${arg#*=}" ;;
-        --quantize-only) quantize_mode="only"     ;;
-        --no-quantize)   quantize_mode="none"     ;;
-        --setup-only)  setup_only=true            ;;
-        --keep-build)  keep_build=true            ;;
-        -h|--help)     usage; exit 0              ;;
+        --model=*)     model_filter="${arg#*=}"    ;;
+        --os=*)        os_filter="${arg#*=}"       ;;
+        --arch=*)      arch_filter="${arg#*=}"     ;;
+        --backend=*)   backend_filter="${arg#*=}"  ;;
+        --variant=*)   variant_filter="${arg#*=}"  ;;
+        --quantize=*)  quantize_filter="${arg#*=}" ;;
+        --setup-only)  setup_only=true             ;;
+        --keep-build)  keep_build=true             ;;
+        -h|--help)     usage; exit 0               ;;
         *)             echo "Unknown: $arg" >&2; usage; exit 1 ;;
     esac
 done
@@ -70,11 +68,8 @@ done
 LINUX_CTR=executorch-riscv-linux
 BAREMETAL_CTR=executorch-riscv-baremetal
 
-# `add`/`mv2`/`resnet18` are the only models with XNNPACK quantization recipes
-# in MODEL_NAME_TO_OPTIONS — others raise at AOT time when --quantize is set.
-QUANTIZED_MODELS="mv2 resnet18"
-ALL_MODELS="add mv2 resnet18 mobilebert llama2 yolo26"
-ALL_BACKENDS="portable xnnpack"
+MODELS="add mv2 resnet18 mobilebert llama2 yolo26"
+BACKENDS="portable xnnpack"
 
 # qemu-cpu-ext sweeps; keep parity with the JSON arrays in riscv64.yml.
 SCALAR_EXT="v=false"
@@ -209,42 +204,36 @@ run_cell() {
 # ---- iterate ---------------------------------------------------------------
 
 passed=0; total=0
+for m in ${MODELS}; do
+for backend in ${BACKENDS}; do
 for os_arch in "linux:rv64" "baremetal:rv64" "baremetal:rv32"; do
-    os="${os_arch%%:*}"; arch="${os_arch##*:}"
+for variant_lbl in "scalar:${SCALAR_EXT}" "rvv128:${RVV128_EXT}" "rvv256:${RVV256_EXT}" "rvv512:${RVV512_EXT}"; do
+    os="${os_arch%%:*}"; arch="${os_arch##*:}"; variant="${variant_lbl%%:*}"; ext="${variant_lbl#*:}"
+
+    if [[ -n "${model_filter}" && "${m}" != "${model_filter}" ]]; then continue; fi
+    if [[ -n "${backend_filter}" && "${backend}" != "${backend_filter}" ]]; then continue; fi
     if [[ -n "${os_filter}" && "${os}" != "${os_filter}" ]]; then continue; fi
     if [[ -n "${arch_filter}" && "${arch}" != "${arch_filter}" ]]; then continue; fi
+    if [[ -n "${variant_filter}" && "${variant}" != "${variant_filter}" ]]; then continue; fi
+
     if [[ "${os}" == "linux" ]]; then ctr="${LINUX_CTR}"; venv=/executorch/.venv-docker-linux;
     else                              ctr="${BAREMETAL_CTR}"; venv=/executorch/.venv-docker-baremetal; fi
 
-    for variant_lbl in "scalar:${SCALAR_EXT}" "rvv128:${RVV128_EXT}" "rvv256:${RVV256_EXT}" "rvv512:${RVV512_EXT}"; do
-        variant="${variant_lbl%%:*}"; ext="${variant_lbl#*:}"
-        if [[ -n "${variant_filter}" && "${variant}" != "${variant_filter}" ]]; then continue; fi
-
-        for backend in ${ALL_BACKENDS}; do
-            if [[ -n "${backend_filter}" && "${backend}" != "${backend_filter}" ]]; then continue; fi
-
-            # non-quantized models
-            if [[ "${quantize_mode}" != "only" ]]; then
-                for m in ${ALL_MODELS}; do
-                    if [[ -n "${model_filter}" && "${m}" != "${model_filter}" ]]; then continue; fi
-                    if should_exclude "${os}" "${arch}" "${backend}" "${variant}" "${m}" "false"; then continue; fi
-                    total=$((total+1))
-                    run_cell "${ctr}" "${venv}" "${os}" "${arch}" "${backend}" "${variant}" "${ext}" "${m}" "" \
-                        && passed=$((passed+1)) || exit 1
-                done
-            fi
-            # quantized — only the 3 models with XNNPACK recipes
-            if [[ "${quantize_mode}" != "none" ]]; then
-                for m in ${QUANTIZED_MODELS}; do
-                    if [[ -n "${model_filter}" && "${m}" != "${model_filter}" ]]; then continue; fi
-                    if should_exclude "${os}" "${arch}" "${backend}" "${variant}" "${m}" "true"; then continue; fi
-                    total=$((total+1))
-                    run_cell "${ctr}" "${venv}" "${os}" "${arch}" "${backend}" "${variant}" "${ext}" "${m}" "--quantize" \
-                        && passed=$((passed+1)) || exit 1
-                done
-            fi
-        done
-    done
+    if [[ -z "${quantize_filter}" || "${quantize_filter}" = "no" ]]; then
+        if should_exclude "${os}" "${arch}" "${backend}" "${variant}" "${m}" "false"; then continue; fi
+        total=$((total+1))
+        run_cell "${ctr}" "${venv}" "${os}" "${arch}" "${backend}" "${variant}" "${ext}" "${m}" "" \
+            && passed=$((passed+1)) || exit 1
+    fi
+    if [[ -z "${quantize_filter}" || "${quantize_filter}" = "yes" ]]; then
+        if should_exclude "${os}" "${arch}" "${backend}" "${variant}" "${m}" "true"; then continue; fi
+        total=$((total+1))
+        run_cell "${ctr}" "${venv}" "${os}" "${arch}" "${backend}" "${variant}" "${ext}" "${m}" "--quantize" \
+            && passed=$((passed+1)) || exit 1
+    fi
+done
+done
+done
 done
 
 echo ""