From a2b556e92d9b6ab2efdb5800651b91ae9af9fd8c Mon Sep 17 00:00:00 2001
From: RJ Ascani <rja@meta.com>
Date: Mon, 11 May 2026 14:37:25 -0700
Subject: [PATCH 1/5] Cortex-M backend: thread target CPU/ISA through the AOT
 pass manager
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce a CortexMCompileConfig dataclass (cpu + isa) that carries
Cortex-M target information from the --target=cortex-m<variant>+int8
CLI string into CortexMPassManager. The full standard Cortex-M lineup
is registered (M0, M0+, M3, M4, M7, M23, M33, M35P, M52, M55, M85),
each with a sensible default ISA; the optional-DSP M33/M35P and
optional-MVE M52/M55/M85 cases can be expressed via the isa= kwarg.

No pass reads the config yet, so this change is purely plumbing — but
it positions both the upcoming AOT scratch-buffer sizing work (#16580)
and the M0+ (#17646) / M33 (#17644) backend support to plug in without
re-plumbing the call site. Actually building for the new variants
still requires Phase 2's MPS2 platform glue.

CortexMTester gains an optional config kwarg, and the Pico 2 MLP
example now constructs CortexMPassManager with cpu='cortex-m33' to
match the RP2350 hardware it targets.

Authored with Claude.
---
 backends/arm/scripts/aot_arm_compiler.py      |  31 +++++-
 backends/cortex_m/compile_config.py           |  98 +++++++++++++++++
 .../cortex_m/passes/cortex_m_pass_manager.py  |   7 +-
 .../cortex_m/test/misc/test_compile_config.py | 103 ++++++++++++++++++
 backends/cortex_m/test/tester.py              |  23 +++-
 .../pico2/export_mlp_mnist_cmsis.py           |   6 +-
 6 files changed, 255 insertions(+), 13 deletions(-)
 create mode 100644 backends/cortex_m/compile_config.py
 create mode 100644 backends/cortex_m/test/misc/test_compile_config.py
diff --git a/backends/arm/scripts/aot_arm_compiler.py b/backends/arm/scripts/aot_arm_compiler.py
index 19b056787d6..4b2065adc25 100644
--- a/backends/arm/scripts/aot_arm_compiler.py
+++ b/backends/arm/scripts/aot_arm_compiler.py
@@ -33,6 +33,7 @@
 from executorch.backends.arm.util._factory import create_partitioner, create_quantizer
 
 from executorch.backends.arm.vgf import VgfCompileSpec
+from executorch.backends.cortex_m.compile_config import CortexMCompileConfig
 from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager
 
 from executorch.backends.cortex_m.passes.replace_quant_nodes_pass import (
@@ -465,7 +466,17 @@ def forward(self, x):
     "TOSA-1.0+INT",
     "TOSA-1.0+FP",
     "TOSA-1.0+INT+int16",
+    "cortex-m0+int8",
+    "cortex-m0plus+int8",
+    "cortex-m3+int8",
+    "cortex-m4+int8",
+    "cortex-m7+int8",
+    "cortex-m23+int8",
+    "cortex-m33+int8",
+    "cortex-m35p+int8",
+    "cortex-m52+int8",
     "cortex-m55+int8",
+    "cortex-m85+int8",
 ]
 
 
@@ -566,7 +577,7 @@ def _get_args():
         required=False,
         default="ethos-u55-128",
         choices=TARGETS,
-        help=f"Target backend. For delegated models: Ethos-U/VGF/TOSA variants. For non-delegated: cortex-m55+int8 (CMSIS-NN portable kernels). Valid targets: {TARGETS}",
+        help=f"Target backend. For delegated models: Ethos-U/VGF/TOSA variants. For non-delegated: cortex-m<variant>+int8 (CMSIS-NN portable kernels). Valid targets: {TARGETS}",
     )
     # TODO: Remove --evaluate and --evaluate_config completely after a suitable time.
     # They are deprecated and no longer functional in this script.
@@ -860,9 +871,14 @@ def _to_edge_cortex_m(
     model: GraphModule,
     example_inputs: Tuple[torch.Tensor],
     calibration_samples: Optional[List[Tuple[torch.Tensor, ...]]],
+    config: CortexMCompileConfig,
 ):
     """Cortex-M/CMSIS-NN compilation path with no delegation."""
-    logging.info("Using Cortex-M/CMSIS-NN compilation path (no delegation)")
+    logging.info(
+        "Using Cortex-M/CMSIS-NN compilation path for cpu=%s isa=%s",
+        config.cpu,
+        config.isa,
+    )
 
     def _to_channels_last(x):
         if isinstance(x, torch.Tensor):
@@ -915,7 +931,7 @@ def _to_channels_last(x):
         ),
     )
 
-    pass_manager = CortexMPassManager(edge.exported_program())
+    pass_manager = CortexMPassManager(edge.exported_program(), config=config)
     edge._edge_programs["forward"] = pass_manager.transform()
 
     return model_quant, edge
@@ -1007,12 +1023,14 @@ def main() -> None:  # noqa: C901
     else:
         quant_mode = None
 
-    if args.target == "cortex-m55+int8":
+    if args.target.startswith("cortex-m"):
         # Cortex-M path: CMSIS-NN portable kernels, no delegation
+        cortex_m_config = CortexMCompileConfig.from_target_string(args.target)
         if args.delegate:
             logging.warning(
-                "--delegate is ignored for target 'cortex-m55+int8' "
-                "(this target does not use delegated ops)."
+                "--delegate is ignored for target %r "
+                "(this target does not use delegated ops).",
+                args.target,
             )
             args.delegate = False
         model_quant, edge = _to_edge_cortex_m(
@@ -1021,6 +1039,7 @@ def main() -> None:  # noqa: C901
             model,
             example_inputs,
             calibration_samples,
+            cortex_m_config,
         )
     elif args.delegate:
         # As we can target multiple output encodings, one must
diff --git a/backends/cortex_m/compile_config.py b/backends/cortex_m/compile_config.py
new file mode 100644
index 00000000000..93795386ec7
--- /dev/null
+++ b/backends/cortex_m/compile_config.py
@@ -0,0 +1,98 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Literal
+
+Cpu = Literal[
+    "cortex-m0",
+    "cortex-m0plus",
+    "cortex-m3",
+    "cortex-m4",
+    "cortex-m7",
+    "cortex-m23",
+    "cortex-m33",
+    "cortex-m35p",
+    "cortex-m52",
+    "cortex-m55",
+    "cortex-m85",
+]
+Isa = Literal["scalar", "dsp", "mve"]
+
+# Default ISA per CPU follows the most common configuration each core is
+# shipped with. M33/M35P optionally lack DSP, and M52/M55/M85 optionally
+# lack MVE; callers can pass `isa=` explicitly to override.
+_CPU_DEFAULT_ISA: dict[str, str] = {
+    "cortex-m0": "scalar",
+    "cortex-m0plus": "scalar",
+    "cortex-m3": "scalar",
+    "cortex-m4": "dsp",
+    "cortex-m7": "dsp",
+    "cortex-m23": "scalar",
+    "cortex-m33": "dsp",
+    "cortex-m35p": "dsp",
+    "cortex-m52": "mve",
+    "cortex-m55": "mve",
+    "cortex-m85": "mve",
+}
+
+_SUPPORTED_FEATURES: frozenset[str] = frozenset({"int8"})
+
+
+@dataclass(frozen=True)
+class CortexMCompileConfig:
+    """AOT compile configuration for the Cortex-M backend.
+
+    `cpu` and `isa` are consumed by passes that need to differ by target — most
+    notably any future AOT scratch-buffer sizing — and threaded through the
+    build system as the `-mcpu=` value.
+
+    The current default matches pre-config behavior (M55 + MVE) so callers that
+    don't opt in see no change.
+    """
+
+    cpu: Cpu = "cortex-m55"
+    isa: Isa | None = None
+
+    def __post_init__(self) -> None:
+        if self.cpu not in _CPU_DEFAULT_ISA:
+            raise ValueError(
+                f"Unsupported Cortex-M CPU: {self.cpu!r}. "
+                f"Supported: {sorted(_CPU_DEFAULT_ISA)}"
+            )
+        if self.isa is None:
+            # frozen dataclass: use object.__setattr__ to fill default ISA.
+            object.__setattr__(self, "isa", _CPU_DEFAULT_ISA[self.cpu])
+
+    @classmethod
+    def from_target_string(cls, target: str) -> CortexMCompileConfig:
+        """Parse `cortex-m<variant>+int8` strings used by `aot_arm_compiler.py`.
+
+        Today only `+int8` is supported. The suffix is required so the target
+        string remains explicit about the data type contract.
+        """
+        cpu, sep, features = target.partition("+")
+        if not sep:
+            raise ValueError(
+                f"Cortex-M target string must include a feature suffix "
+                f"(e.g. '+int8'), got: {target!r}"
+            )
+        feature_set = set(features.split("+"))
+        unknown = feature_set - _SUPPORTED_FEATURES
+        if unknown or "int8" not in feature_set:
+            raise ValueError(
+                f"Cortex-M target string must be '<cpu>+int8' "
+                f"(supported features: {sorted(_SUPPORTED_FEATURES)}), "
+                f"got: {target!r}"
+            )
+        if cpu not in _CPU_DEFAULT_ISA:
+            raise ValueError(
+                f"Unsupported Cortex-M CPU in target string: {cpu!r}. "
+                f"Supported: {sorted(_CPU_DEFAULT_ISA)}"
+            )
+        return cls(cpu=cpu)  # type: ignore[arg-type]
diff --git a/backends/cortex_m/passes/cortex_m_pass_manager.py b/backends/cortex_m/passes/cortex_m_pass_manager.py
index 074eb6118d0..0355783179e 100644
--- a/backends/cortex_m/passes/cortex_m_pass_manager.py
+++ b/backends/cortex_m/passes/cortex_m_pass_manager.py
@@ -11,6 +11,7 @@
     FoldAndAnnotateQParamsPass,
     ScalarsToAttributePass,
 )
+from executorch.backends.cortex_m.compile_config import CortexMCompileConfig
 from executorch.backends.transforms.remove_getitem_op import RemoveGetItemPass
 from executorch.backends.transforms.replace_scalar_with_tensor import (
     ReplaceScalarWithTensorArgPass,
@@ -57,7 +58,10 @@ class CortexMPassManager(PassManager):
     ]
 
     def __init__(
-        self, exported_program, passes: Optional[list[PassClass]] = None
+        self,
+        exported_program,
+        passes: Optional[list[PassClass]] = None,
+        config: Optional[CortexMCompileConfig] = None,
     ) -> None:
         super().__init__(passes=[])
         self.exported_program = exported_program
@@ -65,6 +69,7 @@ def __init__(
         self.passes: list[PassClass] = (  # type: ignore[assignment]
             passes if passes is not None else self.pass_list  # type: ignore[assignment]
         )
+        self.config: CortexMCompileConfig = config or CortexMCompileConfig()
 
     def transform_for_annotation(self, model):
         passes = self.pass_list_transform_for_annotation
diff --git a/backends/cortex_m/test/misc/test_compile_config.py b/backends/cortex_m/test/misc/test_compile_config.py
new file mode 100644
index 00000000000..a2f5805ef13
--- /dev/null
+++ b/backends/cortex_m/test/misc/test_compile_config.py
@@ -0,0 +1,103 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from importlib.util import find_spec
+
+import pytest
+
+from executorch.backends.cortex_m.compile_config import CortexMCompileConfig
+
+_HAS_CMSIS_NN = find_spec("cmsis_nn") is not None
+
+
+class TestCortexMCompileConfig:
+    def test_default_is_m55_mve(self):
+        config = CortexMCompileConfig()
+        assert config.cpu == "cortex-m55"
+        assert config.isa == "mve"
+
+    @pytest.mark.parametrize(
+        "target_string,expected_cpu,expected_isa",
+        [
+            ("cortex-m0+int8", "cortex-m0", "scalar"),
+            ("cortex-m0plus+int8", "cortex-m0plus", "scalar"),
+            ("cortex-m3+int8", "cortex-m3", "scalar"),
+            ("cortex-m4+int8", "cortex-m4", "dsp"),
+            ("cortex-m7+int8", "cortex-m7", "dsp"),
+            ("cortex-m23+int8", "cortex-m23", "scalar"),
+            ("cortex-m33+int8", "cortex-m33", "dsp"),
+            ("cortex-m35p+int8", "cortex-m35p", "dsp"),
+            ("cortex-m52+int8", "cortex-m52", "mve"),
+            ("cortex-m55+int8", "cortex-m55", "mve"),
+            ("cortex-m85+int8", "cortex-m85", "mve"),
+        ],
+    )
+    def test_from_target_string(self, target_string, expected_cpu, expected_isa):
+        config = CortexMCompileConfig.from_target_string(target_string)
+        assert config.cpu == expected_cpu
+        assert config.isa == expected_isa
+
+    def test_from_target_string_rejects_unknown_cpu(self):
+        with pytest.raises(ValueError, match="cortex-m999"):
+            CortexMCompileConfig.from_target_string("cortex-m999+int8")
+
+    @pytest.mark.parametrize(
+        "target_string",
+        [
+            "cortex-m55",  # missing feature suffix
+            "cortex-m55+int8+int16",  # unsupported extra feature
+            "cortex-m55+",  # trailing plus
+            "cortex-m55+fp16",  # unknown feature
+        ],
+    )
+    def test_from_target_string_rejects_invalid_features(self, target_string):
+        with pytest.raises(ValueError):
+            CortexMCompileConfig.from_target_string(target_string)
+
+    def test_default_matches_m55_target_string(self):
+        # Regression guard: pre-Phase-1 behavior was M55+MVE; the default
+        # constructor must remain equivalent to parsing the existing target.
+        assert CortexMCompileConfig() == CortexMCompileConfig.from_target_string(
+            "cortex-m55+int8"
+        )
+
+    def test_is_hashable_and_frozen(self):
+        from dataclasses import FrozenInstanceError
+
+        config = CortexMCompileConfig(cpu="cortex-m33")
+        assert hash(config) == hash(CortexMCompileConfig(cpu="cortex-m33"))
+        assert {config, CortexMCompileConfig(cpu="cortex-m33")} == {config}
+        with pytest.raises(FrozenInstanceError):
+            config.cpu = "cortex-m55"  # type: ignore[misc]
+
+    def test_explicit_isa_override(self):
+        config = CortexMCompileConfig(cpu="cortex-m33", isa="scalar")
+        assert config.cpu == "cortex-m33"
+        assert config.isa == "scalar"
+
+
+@pytest.mark.skipif(
+    not _HAS_CMSIS_NN, reason="cortex_m passes require cmsis_nn"
+)
+class TestPassManagerConfigWiring:
+    def test_default_config_is_m55(self):
+        from executorch.backends.cortex_m.passes.cortex_m_pass_manager import (
+            CortexMPassManager,
+        )
+
+        pm = CortexMPassManager(exported_program=None)
+        assert pm.config.cpu == "cortex-m55"
+        assert pm.config.isa == "mve"
+
+    def test_explicit_config_threaded(self):
+        from executorch.backends.cortex_m.passes.cortex_m_pass_manager import (
+            CortexMPassManager,
+        )
+
+        config = CortexMCompileConfig(cpu="cortex-m33")
+        pm = CortexMPassManager(exported_program=None, config=config)
+        assert pm.config.cpu == "cortex-m33"
+        assert pm.config.isa == "dsp"
diff --git a/backends/cortex_m/test/tester.py b/backends/cortex_m/test/tester.py
index 75575c80b4e..e888e2e6056 100644
--- a/backends/cortex_m/test/tester.py
+++ b/backends/cortex_m/test/tester.py
@@ -6,11 +6,13 @@
 
 from collections.abc import Callable
 from dataclasses import dataclass
-from typing import Any
+from functools import partial
+from typing import Any, Optional
 
 import torch
 from executorch.backends.arm.test.common import get_u55_compile_spec
 from executorch.backends.arm.test.tester.arm_tester import Serialize
+from executorch.backends.cortex_m.compile_config import CortexMCompileConfig
 from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager
 from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer
 from executorch.backends.test.harness import Tester as TesterBase
@@ -48,9 +50,12 @@ def __init__(self):
 
 
 class CortexMRunPasses(RunPasses):
-    def __init__(self):
+    def __init__(self, config: Optional[CortexMCompileConfig] = None):
+        config = config or CortexMCompileConfig()
+        # The base RunPasses constructs the pass manager as `cls(ep, pass_list)`.
+        # Pre-bind the config so it flows through that 2-arg call.
         super().__init__(
-            CortexMPassManager,
+            partial(CortexMPassManager, config=config),  # type: ignore[arg-type]
             CortexMPassManager.pass_list,
         )
 
@@ -73,12 +78,20 @@ def __init__(self):
 
 
 class CortexMTester(TesterBase):
-    def __init__(self, module, example_inputs):
+    def __init__(
+        self,
+        module,
+        example_inputs,
+        config: Optional[CortexMCompileConfig] = None,
+    ):
         if callable(example_inputs):
             resolved_example_inputs = example_inputs()
         else:
             resolved_example_inputs = example_inputs
-        super().__init__(module, resolved_example_inputs, cortex_m_stage_classes)
+        config = config or CortexMCompileConfig()
+        stage_classes = dict(cortex_m_stage_classes)
+        stage_classes[StageType.RUN_PASSES] = lambda: CortexMRunPasses(config=config)
+        super().__init__(module, resolved_example_inputs, stage_classes)
 
     def test_dialect(
         self,
diff --git a/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py b/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py
index 43ff4a41229..c775ef0576d 100644
--- a/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py
+++ b/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py
@@ -24,6 +24,7 @@
 
 import torch
 
+from executorch.backends.cortex_m.compile_config import CortexMCompileConfig
 from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager
 from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer
 from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig, to_edge
@@ -94,7 +95,10 @@ def export_to_pte(quantized_model, example_input, output_path: str):
     logger.info("Edge program created")
 
     logger.info("Applying Cortex-M optimization passes...")
-    pass_manager = CortexMPassManager(edge_program.exported_program())
+    pass_manager = CortexMPassManager(
+        edge_program.exported_program(),
+        config=CortexMCompileConfig(cpu="cortex-m33"),
+    )
     transformed_ep = pass_manager.transform()
 
     edge_program = to_edge(transformed_ep, compile_config=edge_config)

From 3000f9c2cd80d714379931dc2d7b7b9b08f3ba09 Mon Sep 17 00:00:00 2001
From: RJ Ascani <rja@meta.com>
Date: Mon, 11 May 2026 15:30:28 -0700
Subject: [PATCH 2/5] Fix lints

---
 backends/cortex_m/test/misc/test_compile_config.py | 4 +---
 backends/cortex_m/test/tester.py                   | 6 ++++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/backends/cortex_m/test/misc/test_compile_config.py b/backends/cortex_m/test/misc/test_compile_config.py
index a2f5805ef13..86ed90bfe43 100644
--- a/backends/cortex_m/test/misc/test_compile_config.py
+++ b/backends/cortex_m/test/misc/test_compile_config.py
@@ -79,9 +79,7 @@ def test_explicit_isa_override(self):
         assert config.isa == "scalar"
 
 
-@pytest.mark.skipif(
-    not _HAS_CMSIS_NN, reason="cortex_m passes require cmsis_nn"
-)
+@pytest.mark.skipif(not _HAS_CMSIS_NN, reason="cortex_m passes require cmsis_nn")
 class TestPassManagerConfigWiring:
     def test_default_config_is_m55(self):
         from executorch.backends.cortex_m.passes.cortex_m_pass_manager import (
diff --git a/backends/cortex_m/test/tester.py b/backends/cortex_m/test/tester.py
index e888e2e6056..1e97899245d 100644
--- a/backends/cortex_m/test/tester.py
+++ b/backends/cortex_m/test/tester.py
@@ -56,7 +56,7 @@ def __init__(self, config: Optional[CortexMCompileConfig] = None):
         # Pre-bind the config so it flows through that 2-arg call.
         super().__init__(
             partial(CortexMPassManager, config=config),  # type: ignore[arg-type]
-            CortexMPassManager.pass_list,
+            CortexMPassManager.pass_list,  # type: ignore[arg-type]
         )
 
 
@@ -89,7 +89,9 @@ def __init__(
         else:
             resolved_example_inputs = example_inputs
         config = config or CortexMCompileConfig()
-        stage_classes = dict(cortex_m_stage_classes)
+        stage_classes: dict[StageType, Callable[..., Any]] = dict(
+            cortex_m_stage_classes
+        )
         stage_classes[StageType.RUN_PASSES] = lambda: CortexMRunPasses(config=config)
         super().__init__(module, resolved_example_inputs, stage_classes)
 

From efead9ddfabb03553e44df8f28c435332ba87d57 Mon Sep 17 00:00:00 2001
From: RJ Ascani <rja@meta.com>
Date: Tue, 12 May 2026 17:20:49 -0700
Subject: [PATCH 3/5] Cortex-M backend: address #19470 review feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Aligns CortexMTargetConfig with the design Erik proposes in #19505
while keeping the wider plumbing in place. The earlier
CortexMCompileConfig is renamed to CortexMTargetConfig (and its file
moved to target_config.py) to disambiguate from EdgeCompileConfig —
this dataclass models a compilation *target*, not a step in the
compile pipeline.

Adopted from Erik's feedback:

* CortexM enum replaces the Cpu/Isa Literals — typo-safe and
  IDE-friendly.
* `.backend` property returns `cmsis_nn.Backend` directly, resolved
  via `cmsis_nn.resolve_backend(cmsis_nn.CortexM.<X>)`. The
  hand-rolled `_CPU_DEFAULT_ISA` dict is gone — cmsis_nn is the
  single source of truth for the CPU → backend mapping.
* CortexMPass abstract base class added; CortexMPassManager.transform()
  uses signature inspection to inject both `exported_program` and
  `target_config` into passes that declare them (mirroring Erik's
  proposal). The pass manager also gains stricter validation — the
  exported_program must be a real ExportedProgram and the pass list
  must contain classes, not instances — failing fast instead of
  producing opaque errors deep in _transform.
* cmsis_nn is now a hard dependency for the cortex_m tests: the
  top-level `import cmsis_nn` in test_target_config.py replaces the
  previous skipif-on-find_spec dance, addressing Erik's concern that
  skipping tests on missing deps can mask regressions.
* `+int8` dropped from cortex-m target strings — quantization is a
  result of the export flow, not a CPU attribute. TARGETS, help text,
  from_target_string, CI script and README aligned.
* Logging in `_to_edge_cortex_m` and the --delegate-ignored warning
  switched to f-strings.
* `__init__` docstring on CortexMPassManager documents the
  exported_program / passes / target_config defaults (including the
  M55+MVE fallback that matches pre-config behaviour).
* `import-not-found` removed from the cmsis_nn type-ignore — only
  `import-untyped` actually fires, and if cmsis_nn ever ships stubs
  the unused ignore will become a tripwire.

Kept the optional `isa` override field for the optional-extension
cases (M55 without MVE, M33 without DSP, etc.) — different from
Erik's enum-only design, but the override remains useful for cores
where ISA extensions are optional. A `_SUPPORTED_BACKENDS` table
encodes the per-CPU architectural capability set so overrides
validate at construction; forcing MVE on an M0 raises ValueError
with the actual supported list. The SCALAR ⊂ DSP ⊂ MVE supersession
reflects that an MVE-capable core also runs DSP and scalar code.

Defers Erik's `ANY` proposal. In #19505 ANY falls back to MVE, but an
honest "any cortex-m" choice would have to do worst-case scratch
buffer planning across the ISA classes (which may not be MVE).
Deferring until the scratch-buffer side lands and we can implement
the worst-case analysis properly.

Authored with Claude.
---
 .ci/scripts/test_cortex_m_e2e.sh              |   4 +-
 backends/arm/scripts/aot_arm_compiler.py      |  45 ++++---
 backends/cortex_m/compile_config.py           |  98 ---------------
 backends/cortex_m/passes/__init__.py          |   1 +
 backends/cortex_m/passes/cortex_m_pass.py     |  35 ++++++
 .../cortex_m/passes/cortex_m_pass_manager.py  |  61 ++++++---
 backends/cortex_m/target_config.py            | 110 +++++++++++++++++
 .../cortex_m/test/misc/test_compile_config.py | 101 ---------------
 .../cortex_m/test/misc/test_target_config.py  | 116 ++++++++++++++++++
 backends/cortex_m/test/tester.py              |  18 +--
 .../pico2/export_mlp_mnist_cmsis.py           |   6 +-
 11 files changed, 342 insertions(+), 253 deletions(-)
 delete mode 100644 backends/cortex_m/compile_config.py
 create mode 100644 backends/cortex_m/passes/cortex_m_pass.py
 create mode 100644 backends/cortex_m/target_config.py
 delete mode 100644 backends/cortex_m/test/misc/test_compile_config.py
 create mode 100644 backends/cortex_m/test/misc/test_target_config.py

diff --git a/.ci/scripts/test_cortex_m_e2e.sh b/.ci/scripts/test_cortex_m_e2e.sh
index c6e643f118c..de47a45ea0d 100755
--- a/.ci/scripts/test_cortex_m_e2e.sh
+++ b/.ci/scripts/test_cortex_m_e2e.sh
@@ -17,9 +17,9 @@ MODEL=$1
 script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
 et_root_dir=$(realpath "${script_dir}/../..")
 
-# Quantization is the default for the cortex-m55+int8 target; run.sh's
+# Quantization is the default for the cortex-m55 target; run.sh's
 # arg parser only recognizes --no_quantize, so we omit any explicit flag.
 bash "${et_root_dir}/examples/arm/run.sh" \
     --model_name="${MODEL}" \
-    --target=cortex-m55+int8 \
+    --target=cortex-m55 \
     --bundleio
diff --git a/backends/arm/scripts/aot_arm_compiler.py b/backends/arm/scripts/aot_arm_compiler.py
index 4b2065adc25..8d841ef61ff 100644
--- a/backends/arm/scripts/aot_arm_compiler.py
+++ b/backends/arm/scripts/aot_arm_compiler.py
@@ -33,13 +33,13 @@
 from executorch.backends.arm.util._factory import create_partitioner, create_quantizer
 
 from executorch.backends.arm.vgf import VgfCompileSpec
-from executorch.backends.cortex_m.compile_config import CortexMCompileConfig
 from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager
 
 from executorch.backends.cortex_m.passes.replace_quant_nodes_pass import (
     ReplaceQuantNodesPass,
 )
 from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer
+from executorch.backends.cortex_m.target_config import CortexMTargetConfig
 from executorch.devtools import BundledProgram, generate_etrecord
 from executorch.devtools.backend_debug import get_delegation_info
 from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite
@@ -466,17 +466,16 @@ def forward(self, x):
     "TOSA-1.0+INT",
     "TOSA-1.0+FP",
     "TOSA-1.0+INT+int16",
-    "cortex-m0+int8",
-    "cortex-m0plus+int8",
-    "cortex-m3+int8",
-    "cortex-m4+int8",
-    "cortex-m7+int8",
-    "cortex-m23+int8",
-    "cortex-m33+int8",
-    "cortex-m35p+int8",
-    "cortex-m52+int8",
-    "cortex-m55+int8",
-    "cortex-m85+int8",
+    "cortex-m0",
+    "cortex-m0plus",
+    "cortex-m3",
+    "cortex-m4",
+    "cortex-m7",
+    "cortex-m23",
+    "cortex-m33",
+    "cortex-m35p",
+    "cortex-m55",
+    "cortex-m85",
 ]
 
 
@@ -577,7 +576,7 @@ def _get_args():
         required=False,
         default="ethos-u55-128",
         choices=TARGETS,
-        help=f"Target backend. For delegated models: Ethos-U/VGF/TOSA variants. For non-delegated: cortex-m<variant>+int8 (CMSIS-NN portable kernels). Valid targets: {TARGETS}",
+        help=f"Target backend. For delegated models: Ethos-U/VGF/TOSA variants. For non-delegated: cortex-m<variant> (CMSIS-NN portable kernels). Valid targets: {TARGETS}",
     )
     # TODO: Remove --evaluate and --evaluate_config completely after a suitable time.
     # They are deprecated and no longer functional in this script.
@@ -871,13 +870,12 @@ def _to_edge_cortex_m(
     model: GraphModule,
     example_inputs: Tuple[torch.Tensor],
     calibration_samples: Optional[List[Tuple[torch.Tensor, ...]]],
-    config: CortexMCompileConfig,
+    target_config: CortexMTargetConfig,
 ):
     """Cortex-M/CMSIS-NN compilation path with no delegation."""
     logging.info(
-        "Using Cortex-M/CMSIS-NN compilation path for cpu=%s isa=%s",
-        config.cpu,
-        config.isa,
+        f"Using Cortex-M/CMSIS-NN compilation path for cpu={target_config.cpu.name} "
+        f"backend={target_config.backend.name}"
     )
 
     def _to_channels_last(x):
@@ -931,7 +929,9 @@ def _to_channels_last(x):
         ),
     )
 
-    pass_manager = CortexMPassManager(edge.exported_program(), config=config)
+    pass_manager = CortexMPassManager(
+        edge.exported_program(), target_config=target_config
+    )
     edge._edge_programs["forward"] = pass_manager.transform()
 
     return model_quant, edge
@@ -1025,12 +1025,11 @@ def main() -> None:  # noqa: C901
 
     if args.target.startswith("cortex-m"):
         # Cortex-M path: CMSIS-NN portable kernels, no delegation
-        cortex_m_config = CortexMCompileConfig.from_target_string(args.target)
+        target_config = CortexMTargetConfig.from_target_string(args.target)
         if args.delegate:
             logging.warning(
-                "--delegate is ignored for target %r "
-                "(this target does not use delegated ops).",
-                args.target,
+                f"--delegate is ignored for target {args.target!r} "
+                "(this target does not use delegated ops)."
             )
             args.delegate = False
         model_quant, edge = _to_edge_cortex_m(
@@ -1039,7 +1038,7 @@ def main() -> None:  # noqa: C901
             model,
             example_inputs,
             calibration_samples,
-            cortex_m_config,
+            target_config,
         )
     elif args.delegate:
         # As we can target multiple output encodings, one must
diff --git a/backends/cortex_m/compile_config.py b/backends/cortex_m/compile_config.py
deleted file mode 100644
index 93795386ec7..00000000000
--- a/backends/cortex_m/compile_config.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Literal
-
-Cpu = Literal[
-    "cortex-m0",
-    "cortex-m0plus",
-    "cortex-m3",
-    "cortex-m4",
-    "cortex-m7",
-    "cortex-m23",
-    "cortex-m33",
-    "cortex-m35p",
-    "cortex-m52",
-    "cortex-m55",
-    "cortex-m85",
-]
-Isa = Literal["scalar", "dsp", "mve"]
-
-# Default ISA per CPU follows the most common configuration each core is
-# shipped with. M33/M35P optionally lack DSP, and M52/M55/M85 optionally
-# lack MVE; callers can pass `isa=` explicitly to override.
-_CPU_DEFAULT_ISA: dict[str, str] = {
-    "cortex-m0": "scalar",
-    "cortex-m0plus": "scalar",
-    "cortex-m3": "scalar",
-    "cortex-m4": "dsp",
-    "cortex-m7": "dsp",
-    "cortex-m23": "scalar",
-    "cortex-m33": "dsp",
-    "cortex-m35p": "dsp",
-    "cortex-m52": "mve",
-    "cortex-m55": "mve",
-    "cortex-m85": "mve",
-}
-
-_SUPPORTED_FEATURES: frozenset[str] = frozenset({"int8"})
-
-
-@dataclass(frozen=True)
-class CortexMCompileConfig:
-    """AOT compile configuration for the Cortex-M backend.
-
-    `cpu` and `isa` are consumed by passes that need to differ by target — most
-    notably any future AOT scratch-buffer sizing — and threaded through the
-    build system as the `-mcpu=` value.
-
-    The current default matches pre-config behavior (M55 + MVE) so callers that
-    don't opt in see no change.
-    """
-
-    cpu: Cpu = "cortex-m55"
-    isa: Isa | None = None
-
-    def __post_init__(self) -> None:
-        if self.cpu not in _CPU_DEFAULT_ISA:
-            raise ValueError(
-                f"Unsupported Cortex-M CPU: {self.cpu!r}. "
-                f"Supported: {sorted(_CPU_DEFAULT_ISA)}"
-            )
-        if self.isa is None:
-            # frozen dataclass: use object.__setattr__ to fill default ISA.
-            object.__setattr__(self, "isa", _CPU_DEFAULT_ISA[self.cpu])
-
-    @classmethod
-    def from_target_string(cls, target: str) -> CortexMCompileConfig:
-        """Parse `cortex-m<variant>+int8` strings used by `aot_arm_compiler.py`.
-
-        Today only `+int8` is supported. The suffix is required so the target
-        string remains explicit about the data type contract.
-        """
-        cpu, sep, features = target.partition("+")
-        if not sep:
-            raise ValueError(
-                f"Cortex-M target string must include a feature suffix "
-                f"(e.g. '+int8'), got: {target!r}"
-            )
-        feature_set = set(features.split("+"))
-        unknown = feature_set - _SUPPORTED_FEATURES
-        if unknown or "int8" not in feature_set:
-            raise ValueError(
-                f"Cortex-M target string must be '<cpu>+int8' "
-                f"(supported features: {sorted(_SUPPORTED_FEATURES)}), "
-                f"got: {target!r}"
-            )
-        if cpu not in _CPU_DEFAULT_ISA:
-            raise ValueError(
-                f"Unsupported Cortex-M CPU in target string: {cpu!r}. "
-                f"Supported: {sorted(_CPU_DEFAULT_ISA)}"
-            )
-        return cls(cpu=cpu)  # type: ignore[arg-type]
diff --git a/backends/cortex_m/passes/__init__.py b/backends/cortex_m/passes/__init__.py
index b1b67add413..92179ec6654 100644
--- a/backends/cortex_m/passes/__init__.py
+++ b/backends/cortex_m/passes/__init__.py
@@ -36,6 +36,7 @@ def _ensure_cortex_m_dependencies() -> None:
 from .activation_fusion_pass import ActivationFusionPass  # noqa
 from .clamp_hardswish_pass import ClampHardswishPass  # noqa
 from .convert_to_cortex_m_pass import ConvertToCortexMPass  # noqa
+from .cortex_m_pass import CortexMPass  # noqa
 from .decompose_hardswish_pass import DecomposeHardswishPass  # noqa
 from .decompose_mean_pass import DecomposeMeanPass  # noqa
 from .quantized_clamp_activation_pass import QuantizedClampActivationPass  # noqa
diff --git a/backends/cortex_m/passes/cortex_m_pass.py b/backends/cortex_m/passes/cortex_m_pass.py
new file mode 100644
index 00000000000..5b3e98e9a96
--- /dev/null
+++ b/backends/cortex_m/passes/cortex_m_pass.py
@@ -0,0 +1,35 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from executorch.backends.cortex_m.target_config import CortexMTargetConfig
+from executorch.exir.pass_base import ExportPass
+from torch.export import ExportedProgram
+
+
+class CortexMPass(ExportPass):
+    """Base class for passes that need the Cortex-M target config.
+
+    Passes that subclass this declare `exported_program` and `target_config`
+    in their `__init__`; `CortexMPassManager.transform()` injects both
+    automatically when running the pass list.
+    """
+
+    def __init__(
+        self,
+        exported_program: ExportedProgram,
+        target_config: CortexMTargetConfig,
+    ) -> None:
+        super().__init__()
+        self._exported_program = exported_program
+        self._target_config = target_config
+
+    @property
+    def exported_program(self) -> ExportedProgram:
+        return self._exported_program
+
+    @property
+    def target_config(self) -> CortexMTargetConfig:
+        return self._target_config
diff --git a/backends/cortex_m/passes/cortex_m_pass_manager.py b/backends/cortex_m/passes/cortex_m_pass_manager.py
index 0355783179e..f95587a00d3 100644
--- a/backends/cortex_m/passes/cortex_m_pass_manager.py
+++ b/backends/cortex_m/passes/cortex_m_pass_manager.py
@@ -5,13 +5,13 @@
 
 
 import inspect
-from typing import Callable, cast, Optional, Type
+from typing import Any, Optional, Type
 
 from executorch.backends.arm._passes import (
     FoldAndAnnotateQParamsPass,
     ScalarsToAttributePass,
 )
-from executorch.backends.cortex_m.compile_config import CortexMCompileConfig
+from executorch.backends.cortex_m.target_config import CortexM, CortexMTargetConfig
 from executorch.backends.transforms.remove_getitem_op import RemoveGetItemPass
 from executorch.backends.transforms.replace_scalar_with_tensor import (
     ReplaceScalarWithTensorArgPass,
@@ -20,9 +20,6 @@
 from executorch.exir.pass_manager import PassManager
 from executorch.exir.program._program import _transform, lift_constant_tensor_pass
 from torch.export import ExportedProgram
-from torch.fx.passes.infra.pass_base import PassResult
-
-from torch.nn import Module
 
 from .activation_fusion_pass import ActivationFusionPass
 from .clamp_hardswish_pass import ClampHardswishPass
@@ -59,17 +56,32 @@ class CortexMPassManager(PassManager):
 
     def __init__(
         self,
-        exported_program,
+        exported_program: ExportedProgram | None,
         passes: Optional[list[PassClass]] = None,
-        config: Optional[CortexMCompileConfig] = None,
+        target_config: Optional[CortexMTargetConfig] = None,
     ) -> None:
+        """Initialize the Cortex-M pass manager.
+
+        Args:
+            exported_program: The exported program to transform. Required
+                before calling ``transform()``; may be ``None`` for callers
+                that only use ``transform_for_annotation()``.
+            passes: Optional override of the pass list. Defaults to
+                ``CortexMPassManager.pass_list``.
+            target_config: Compilation target for passes that need it.
+                Defaults to ``CortexMTargetConfig(cpu=CortexM.M55)``, which
+                resolves through cmsis_nn to the MVE backend — matching the
+                pre-config historical behaviour.
+        """
         super().__init__(passes=[])
         self.exported_program = exported_program
         # PassManager.passes is typed as callables; this manager stores pass classes which are initialized at transform time with the exported_program.
         self.passes: list[PassClass] = (  # type: ignore[assignment]
             passes if passes is not None else self.pass_list  # type: ignore[assignment]
         )
-        self.config: CortexMCompileConfig = config or CortexMCompileConfig()
+        self.target_config: CortexMTargetConfig = target_config or CortexMTargetConfig(
+            cpu=CortexM.M55
+        )
 
     def transform_for_annotation(self, model):
         passes = self.pass_list_transform_for_annotation
@@ -78,18 +90,31 @@ def transform_for_annotation(self, model):
         return model
 
     def transform(self) -> ExportedProgram:
-        ep = self.exported_program
+        exported_program = self.exported_program
+        if not isinstance(exported_program, ExportedProgram):
+            raise ValueError(
+                f"{type(self).__name__}.transform() needs a real ExportedProgram, "
+                f"got {exported_program!r}"
+            )
+
         for pass_cls in self.passes:
+            if not isinstance(pass_cls, type):
+                raise ValueError(
+                    f"{type(self).__name__} expects pass classes, not instances; "
+                    f"got {pass_cls!r}"
+                )
+
             signature = inspect.signature(pass_cls)
+            kwargs: dict[str, Any] = {}
             if "exported_program" in signature.parameters:
-                ep_pass_ctor = cast(Callable[[ExportedProgram], ExportPass], pass_cls)
-                transform_pass = ep_pass_ctor(ep)
-            else:
-                transform_pass = pass_cls()
-            pass_callable = cast(Callable[[Module], PassResult], transform_pass)
-            ep = _transform(ep, pass_callable)
+                kwargs["exported_program"] = exported_program
+            if "target_config" in signature.parameters:
+                kwargs["target_config"] = self.target_config
+
+            transform_pass = pass_cls(**kwargs)
+            exported_program = _transform(exported_program, transform_pass)
 
         # All constant tensors should be lifted to buffers at this point, re-run
-        # lift_constant_tensor_pass in case new ones have been introduced by the passes above.
-        ep = lift_constant_tensor_pass(ep)
-        return ep
+        # lift_constant_tensor_pass in case new ones have been introduced.
+        exported_program = lift_constant_tensor_pass(exported_program)
+        return exported_program
diff --git a/backends/cortex_m/target_config.py b/backends/cortex_m/target_config.py
new file mode 100644
index 00000000000..2a53869976b
--- /dev/null
+++ b/backends/cortex_m/target_config.py
@@ -0,0 +1,110 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import auto, Enum
+from typing import Optional
+
+import cmsis_nn  # type: ignore[import-untyped]
+
+
+class CortexM(Enum):
+    """Cortex-M CPU variant. Names mirror cmsis_nn.CortexM so the cmsis_nn
+    enum can be looked up by name."""
+
+    M0 = auto()
+    M0PLUS = auto()
+    M3 = auto()
+    M4 = auto()
+    M7 = auto()
+    M23 = auto()
+    M33 = auto()
+    M35P = auto()
+    M55 = auto()
+    M85 = auto()
+
+
+# Per-CPU set of cmsis_nn backends the core can execute. SCALAR is
+# universal; DSP requires the Armv7E-M or Armv8-M-Mainline DSP option;
+# MVE requires Armv8.1-M Mainline with the MVE extension. The supersession
+# (SCALAR < DSP < MVE) reflects that an MVE-capable core also runs DSP
+# and scalar code, which is what makes "M55 without MVE" → DSP override
+# legitimate.
+_SUPPORTED_BACKENDS: dict[CortexM, frozenset[cmsis_nn.Backend]] = {
+    CortexM.M0: frozenset({cmsis_nn.Backend.SCALAR}),
+    CortexM.M0PLUS: frozenset({cmsis_nn.Backend.SCALAR}),
+    CortexM.M3: frozenset({cmsis_nn.Backend.SCALAR}),
+    CortexM.M23: frozenset({cmsis_nn.Backend.SCALAR}),
+    CortexM.M4: frozenset({cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP}),
+    CortexM.M7: frozenset({cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP}),
+    CortexM.M33: frozenset({cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP}),
+    CortexM.M35P: frozenset({cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP}),
+    CortexM.M55: frozenset(
+        {cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP, cmsis_nn.Backend.MVE}
+    ),
+    CortexM.M85: frozenset(
+        {cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP, cmsis_nn.Backend.MVE}
+    ),
+}
+
+
+@dataclass(frozen=True)
+class CortexMTargetConfig:
+    """AOT compile target configuration for the Cortex-M backend.
+
+    `cpu` selects the CPU variant. `isa` optionally overrides the cmsis_nn
+    backend that would normally be derived from `cpu` — useful for cores
+    with optional ISA extensions (M55 without MVE, M33 without DSP, etc.).
+    Overrides are validated against the CPU's architectural capability set
+    on construction; e.g. forcing MVE on an M0 raises ValueError.
+    """
+
+    cpu: CortexM
+    isa: Optional[cmsis_nn.Backend] = None
+
+    def __post_init__(self) -> None:
+        if self.isa is None:
+            return
+        supported = _SUPPORTED_BACKENDS.get(self.cpu)
+        if supported is None or self.isa not in supported:
+            allowed = sorted(b.name for b in supported) if supported else []
+            raise ValueError(
+                f"Backend {self.isa.name} is not supported on "
+                f"{self.cpu.name}; supported: {allowed}"
+            )
+
+    @property
+    def backend(self) -> cmsis_nn.Backend:
+        if self.isa is not None:
+            return self.isa
+        try:
+            cmsis_member = getattr(cmsis_nn.CortexM, self.cpu.name)
+        except AttributeError as e:
+            raise ValueError(
+                f"cmsis_nn does not yet support {self.cpu.name}; pass an "
+                f"explicit `isa=` override or wait for upstream support."
+            ) from e
+        return cmsis_nn.resolve_backend(cmsis_member)
+
+    @classmethod
+    def from_target_string(cls, target: str) -> CortexMTargetConfig:
+        """Parse a `cortex-m<variant>` target string."""
+        if not target.startswith("cortex-m"):
+            raise ValueError(
+                f"Cortex-M target string must start with 'cortex-m', "
+                f"got: {target!r}"
+            )
+        enum_name = "M" + target[len("cortex-m") :].upper()
+        try:
+            cpu = CortexM[enum_name]
+        except KeyError as e:
+            raise ValueError(
+                f"Unsupported Cortex-M target string: {target!r}. "
+                f"Supported: {sorted('cortex-m' + m.name[1:].lower() for m in CortexM)}"
+            ) from e
+        return cls(cpu=cpu)
diff --git a/backends/cortex_m/test/misc/test_compile_config.py b/backends/cortex_m/test/misc/test_compile_config.py
deleted file mode 100644
index 86ed90bfe43..00000000000
--- a/backends/cortex_m/test/misc/test_compile_config.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-from importlib.util import find_spec
-
-import pytest
-
-from executorch.backends.cortex_m.compile_config import CortexMCompileConfig
-
-_HAS_CMSIS_NN = find_spec("cmsis_nn") is not None
-
-
-class TestCortexMCompileConfig:
-    def test_default_is_m55_mve(self):
-        config = CortexMCompileConfig()
-        assert config.cpu == "cortex-m55"
-        assert config.isa == "mve"
-
-    @pytest.mark.parametrize(
-        "target_string,expected_cpu,expected_isa",
-        [
-            ("cortex-m0+int8", "cortex-m0", "scalar"),
-            ("cortex-m0plus+int8", "cortex-m0plus", "scalar"),
-            ("cortex-m3+int8", "cortex-m3", "scalar"),
-            ("cortex-m4+int8", "cortex-m4", "dsp"),
-            ("cortex-m7+int8", "cortex-m7", "dsp"),
-            ("cortex-m23+int8", "cortex-m23", "scalar"),
-            ("cortex-m33+int8", "cortex-m33", "dsp"),
-            ("cortex-m35p+int8", "cortex-m35p", "dsp"),
-            ("cortex-m52+int8", "cortex-m52", "mve"),
-            ("cortex-m55+int8", "cortex-m55", "mve"),
-            ("cortex-m85+int8", "cortex-m85", "mve"),
-        ],
-    )
-    def test_from_target_string(self, target_string, expected_cpu, expected_isa):
-        config = CortexMCompileConfig.from_target_string(target_string)
-        assert config.cpu == expected_cpu
-        assert config.isa == expected_isa
-
-    def test_from_target_string_rejects_unknown_cpu(self):
-        with pytest.raises(ValueError, match="cortex-m999"):
-            CortexMCompileConfig.from_target_string("cortex-m999+int8")
-
-    @pytest.mark.parametrize(
-        "target_string",
-        [
-            "cortex-m55",  # missing feature suffix
-            "cortex-m55+int8+int16",  # unsupported extra feature
-            "cortex-m55+",  # trailing plus
-            "cortex-m55+fp16",  # unknown feature
-        ],
-    )
-    def test_from_target_string_rejects_invalid_features(self, target_string):
-        with pytest.raises(ValueError):
-            CortexMCompileConfig.from_target_string(target_string)
-
-    def test_default_matches_m55_target_string(self):
-        # Regression guard: pre-Phase-1 behavior was M55+MVE; the default
-        # constructor must remain equivalent to parsing the existing target.
-        assert CortexMCompileConfig() == CortexMCompileConfig.from_target_string(
-            "cortex-m55+int8"
-        )
-
-    def test_is_hashable_and_frozen(self):
-        from dataclasses import FrozenInstanceError
-
-        config = CortexMCompileConfig(cpu="cortex-m33")
-        assert hash(config) == hash(CortexMCompileConfig(cpu="cortex-m33"))
-        assert {config, CortexMCompileConfig(cpu="cortex-m33")} == {config}
-        with pytest.raises(FrozenInstanceError):
-            config.cpu = "cortex-m55"  # type: ignore[misc]
-
-    def test_explicit_isa_override(self):
-        config = CortexMCompileConfig(cpu="cortex-m33", isa="scalar")
-        assert config.cpu == "cortex-m33"
-        assert config.isa == "scalar"
-
-
-@pytest.mark.skipif(not _HAS_CMSIS_NN, reason="cortex_m passes require cmsis_nn")
-class TestPassManagerConfigWiring:
-    def test_default_config_is_m55(self):
-        from executorch.backends.cortex_m.passes.cortex_m_pass_manager import (
-            CortexMPassManager,
-        )
-
-        pm = CortexMPassManager(exported_program=None)
-        assert pm.config.cpu == "cortex-m55"
-        assert pm.config.isa == "mve"
-
-    def test_explicit_config_threaded(self):
-        from executorch.backends.cortex_m.passes.cortex_m_pass_manager import (
-            CortexMPassManager,
-        )
-
-        config = CortexMCompileConfig(cpu="cortex-m33")
-        pm = CortexMPassManager(exported_program=None, config=config)
-        assert pm.config.cpu == "cortex-m33"
-        assert pm.config.isa == "dsp"
diff --git a/backends/cortex_m/test/misc/test_target_config.py b/backends/cortex_m/test/misc/test_target_config.py
new file mode 100644
index 00000000000..5547bfbe125
--- /dev/null
+++ b/backends/cortex_m/test/misc/test_target_config.py
@@ -0,0 +1,116 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import cmsis_nn  # type: ignore[import-untyped]
+import pytest
+
+from executorch.backends.cortex_m.target_config import CortexM, CortexMTargetConfig
+
+
+class TestCortexMTargetConfig:
+    @pytest.mark.parametrize(
+        "target_string,expected_cpu",
+        [
+            ("cortex-m0", CortexM.M0),
+            ("cortex-m0plus", CortexM.M0PLUS),
+            ("cortex-m3", CortexM.M3),
+            ("cortex-m4", CortexM.M4),
+            ("cortex-m7", CortexM.M7),
+            ("cortex-m23", CortexM.M23),
+            ("cortex-m33", CortexM.M33),
+            ("cortex-m35p", CortexM.M35P),
+            ("cortex-m55", CortexM.M55),
+            ("cortex-m85", CortexM.M85),
+        ],
+    )
+    def test_from_target_string(self, target_string, expected_cpu):
+        config = CortexMTargetConfig.from_target_string(target_string)
+        assert config.cpu == expected_cpu
+
+    @pytest.mark.parametrize(
+        "cpu,expected_backend",
+        [
+            (CortexM.M0, cmsis_nn.Backend.SCALAR),
+            (CortexM.M4, cmsis_nn.Backend.DSP),
+            (CortexM.M33, cmsis_nn.Backend.DSP),
+            (CortexM.M55, cmsis_nn.Backend.MVE),
+            (CortexM.M85, cmsis_nn.Backend.MVE),
+        ],
+    )
+    def test_backend_resolved_via_cmsis_nn(self, cpu, expected_backend):
+        assert CortexMTargetConfig(cpu=cpu).backend == expected_backend
+
+    @pytest.mark.parametrize(
+        "cpu,override",
+        [
+            (CortexM.M55, cmsis_nn.Backend.DSP),  # M55 with MVE disabled
+            (CortexM.M55, cmsis_nn.Backend.SCALAR),  # M55 without DSP or MVE
+            (CortexM.M85, cmsis_nn.Backend.DSP),
+            (CortexM.M33, cmsis_nn.Backend.SCALAR),  # M33 without DSP option
+            (CortexM.M4, cmsis_nn.Backend.SCALAR),  # M4 without DSP intrinsics
+        ],
+    )
+    def test_isa_override_compatible(self, cpu, override):
+        config = CortexMTargetConfig(cpu=cpu, isa=override)
+        assert config.backend == override
+
+    @pytest.mark.parametrize(
+        "cpu,override",
+        [
+            (CortexM.M0, cmsis_nn.Backend.DSP),  # Armv6-M has no DSP
+            (CortexM.M0, cmsis_nn.Backend.MVE),
+            (CortexM.M3, cmsis_nn.Backend.DSP),  # Armv7-M has no DSP
+            (CortexM.M4, cmsis_nn.Backend.MVE),  # Armv7E-M has no MVE
+            (CortexM.M33, cmsis_nn.Backend.MVE),  # Armv8-M Mainline has no MVE
+            (CortexM.M35P, cmsis_nn.Backend.MVE),
+        ],
+    )
+    def test_isa_override_rejects_incompatible(self, cpu, override):
+        with pytest.raises(ValueError, match="not supported"):
+            CortexMTargetConfig(cpu=cpu, isa=override)
+
+    @pytest.mark.parametrize(
+        "target_string",
+        [
+            "cortex-m999",
+            "cortex-m52",  # not yet in cmsis_nn.CortexM
+            "cortex-m55+int8",  # legacy +int8 form no longer accepted
+            "arm-m4",
+        ],
+    )
+    def test_from_target_string_rejects_invalid(self, target_string):
+        with pytest.raises(ValueError):
+            CortexMTargetConfig.from_target_string(target_string)
+
+    def test_is_hashable_and_frozen(self):
+        from dataclasses import FrozenInstanceError
+
+        config = CortexMTargetConfig(cpu=CortexM.M33)
+        assert hash(config) == hash(CortexMTargetConfig(cpu=CortexM.M33))
+        assert {config, CortexMTargetConfig(cpu=CortexM.M33)} == {config}
+        with pytest.raises(FrozenInstanceError):
+            config.cpu = CortexM.M55  # type: ignore[misc]
+
+
+class TestPassManagerTargetConfigWiring:
+    def test_default_target_config_is_m55(self):
+        from executorch.backends.cortex_m.passes.cortex_m_pass_manager import (
+            CortexMPassManager,
+        )
+
+        pm = CortexMPassManager(exported_program=None)
+        assert pm.target_config.cpu == CortexM.M55
+        assert pm.target_config.backend == cmsis_nn.Backend.MVE
+
+    def test_explicit_target_config_threaded(self):
+        from executorch.backends.cortex_m.passes.cortex_m_pass_manager import (
+            CortexMPassManager,
+        )
+
+        target_config = CortexMTargetConfig(cpu=CortexM.M33)
+        pm = CortexMPassManager(exported_program=None, target_config=target_config)
+        assert pm.target_config.cpu == CortexM.M33
+        assert pm.target_config.backend == cmsis_nn.Backend.DSP
diff --git a/backends/cortex_m/test/tester.py b/backends/cortex_m/test/tester.py
index 1e97899245d..e9912d03cad 100644
--- a/backends/cortex_m/test/tester.py
+++ b/backends/cortex_m/test/tester.py
@@ -12,9 +12,9 @@
 import torch
 from executorch.backends.arm.test.common import get_u55_compile_spec
 from executorch.backends.arm.test.tester.arm_tester import Serialize
-from executorch.backends.cortex_m.compile_config import CortexMCompileConfig
 from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager
 from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer
+from executorch.backends.cortex_m.target_config import CortexM, CortexMTargetConfig
 from executorch.backends.test.harness import Tester as TesterBase
 from executorch.backends.test.harness.stages import (
     Export,
@@ -50,12 +50,12 @@ def __init__(self):
 
 
 class CortexMRunPasses(RunPasses):
-    def __init__(self, config: Optional[CortexMCompileConfig] = None):
-        config = config or CortexMCompileConfig()
+    def __init__(self, target_config: Optional[CortexMTargetConfig] = None):
+        target_config = target_config or CortexMTargetConfig(cpu=CortexM.M55)
         # The base RunPasses constructs the pass manager as `cls(ep, pass_list)`.
-        # Pre-bind the config so it flows through that 2-arg call.
+        # Pre-bind the target_config so it flows through that 2-arg call.
         super().__init__(
-            partial(CortexMPassManager, config=config),  # type: ignore[arg-type]
+            partial(CortexMPassManager, target_config=target_config),  # type: ignore[arg-type]
             CortexMPassManager.pass_list,  # type: ignore[arg-type]
         )
 
@@ -82,17 +82,19 @@ def __init__(
         self,
         module,
         example_inputs,
-        config: Optional[CortexMCompileConfig] = None,
+        target_config: Optional[CortexMTargetConfig] = None,
     ):
         if callable(example_inputs):
             resolved_example_inputs = example_inputs()
         else:
             resolved_example_inputs = example_inputs
-        config = config or CortexMCompileConfig()
+        target_config = target_config or CortexMTargetConfig(cpu=CortexM.M55)
         stage_classes: dict[StageType, Callable[..., Any]] = dict(
             cortex_m_stage_classes
         )
-        stage_classes[StageType.RUN_PASSES] = lambda: CortexMRunPasses(config=config)
+        stage_classes[StageType.RUN_PASSES] = lambda: CortexMRunPasses(
+            target_config=target_config
+        )
         super().__init__(module, resolved_example_inputs, stage_classes)
 
     def test_dialect(
diff --git a/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py b/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py
index c775ef0576d..4785598c876 100644
--- a/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py
+++ b/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py
@@ -23,10 +23,10 @@
 import os
 
 import torch
-
-from executorch.backends.cortex_m.compile_config import CortexMCompileConfig
 from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager
 from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer
+
+from executorch.backends.cortex_m.target_config import CortexM, CortexMTargetConfig
 from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig, to_edge
 from executorch.extension.export_util.utils import save_pte_program
 
@@ -97,7 +97,7 @@ def export_to_pte(quantized_model, example_input, output_path: str):
     logger.info("Applying Cortex-M optimization passes...")
     pass_manager = CortexMPassManager(
         edge_program.exported_program(),
-        config=CortexMCompileConfig(cpu="cortex-m33"),
+        target_config=CortexMTargetConfig(cpu=CortexM.M33),
     )
     transformed_ep = pass_manager.transform()
 

From c7a1278691ba3e3b197f54195706a1e0449b1737 Mon Sep 17 00:00:00 2001
From: RJ Ascani <rja@meta.com>
Date: Tue, 12 May 2026 17:34:08 -0700
Subject: [PATCH 4/5] Cortex-M backend: restore import-not-found on the
 cmsis_nn type ignore
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CI's mypy env doesn't pip-install cmsis_nn (it's a native pybind11
module — type-checking-only envs typically skip such deps), so
`import-not-found` fires there even though `import-untyped` is the
one that fires locally. The combo `[import-not-found, import-untyped]`
is honest about both states; dropping either half breaks one or the
other environment.

Authored with Claude.
---
 backends/cortex_m/target_config.py                | 2 +-
 backends/cortex_m/test/misc/test_target_config.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/backends/cortex_m/target_config.py b/backends/cortex_m/target_config.py
index 2a53869976b..e18e5d00a41 100644
--- a/backends/cortex_m/target_config.py
+++ b/backends/cortex_m/target_config.py
@@ -10,7 +10,7 @@
 from enum import auto, Enum
 from typing import Optional
 
-import cmsis_nn  # type: ignore[import-untyped]
+import cmsis_nn  # type: ignore[import-not-found, import-untyped]
 
 
 class CortexM(Enum):
diff --git a/backends/cortex_m/test/misc/test_target_config.py b/backends/cortex_m/test/misc/test_target_config.py
index 5547bfbe125..3e648b0a81c 100644
--- a/backends/cortex_m/test/misc/test_target_config.py
+++ b/backends/cortex_m/test/misc/test_target_config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-import cmsis_nn  # type: ignore[import-untyped]
+import cmsis_nn  # type: ignore[import-not-found, import-untyped]
 import pytest
 
 from executorch.backends.cortex_m.target_config import CortexM, CortexMTargetConfig

From 2ec0840f4260a4a689c8fa82b3eb028635fe78bb Mon Sep 17 00:00:00 2001
From: RJ Ascani <rja@meta.com>
Date: Wed, 13 May 2026 08:57:22 -0700
Subject: [PATCH 5/5] Cortex-M backend: fix Zephyr hello-executorch CI command

The Zephyr `hello-executorch` README has `<!-- RUN -->` directives that
the test-arm-backend-zephyr CI extracts and executes verbatim. The
prior wording used `--target=cortex-m55+int8`, which the AOT compiler
no longer accepts after the +int8 drop. Update both code blocks (and
the surrounding prose) to use the new bare `--target=cortex-m55`
spelling.

Authored with Claude.
---
 zephyr/samples/hello-executorch/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/zephyr/samples/hello-executorch/README.md b/zephyr/samples/hello-executorch/README.md
index 16303303031..ab8022d5d62 100644
--- a/zephyr/samples/hello-executorch/README.md
+++ b/zephyr/samples/hello-executorch/README.md
@@ -51,10 +51,10 @@ west build -b mps3/corstone300/fvp modules/lib/executorch/zephyr/samples/hello-e
 Prepare the Cortex-M55 PTE model
 <!-- RUN test_cortex-m55_generate_pte -->
 ```
-python -m modules.lib.executorch.backends.arm.scripts.aot_arm_compiler --model_name=modules/lib/executorch/zephyr/samples/hello-executorch/models/add.py --quantize --target=cortex-m55+int8 --output=add_m55.pte
+python -m modules.lib.executorch.backends.arm.scripts.aot_arm_compiler --model_name=modules/lib/executorch/zephyr/samples/hello-executorch/models/add.py --quantize --target=cortex-m55 --output=add_m55.pte
 ```
 
-`--target=cortex-m55+int8` selects the Cortex-M/CMSIS-NN portable kernel path (no NPU delegation). This produces a `.pte` optimized for Cortex-M55 with INT8 quantization.
+`--target=cortex-m55` plus `--quantize` selects the Cortex-M/CMSIS-NN portable kernel path (no NPU delegation). This produces a `.pte` optimized for Cortex-M55 with INT8 quantization.
 
 
 #### Build and run
@@ -129,10 +129,10 @@ export PATH=$PATH:~/STMicroelectronics/STM32Cube/STM32CubeProgrammer/bin
 
 Prepare the Cortex-M55 PTE model
 ```
-python -m modules.lib.executorch.backends.arm.scripts.aot_arm_compiler --model_name=modules/lib/executorch/zephyr/samples/hello-executorch/models/add.py --quantize --target=cortex-m55+int8 --output=add_m55.pte
+python -m modules.lib.executorch.backends.arm.scripts.aot_arm_compiler --model_name=modules/lib/executorch/zephyr/samples/hello-executorch/models/add.py --quantize --target=cortex-m55 --output=add_m55.pte
 ```
 
-`--target=cortex-m55+int8` selects the Cortex-M/CMSIS-NN portable kernel path (no NPU delegation). This produces a `.pte` optimized for Cortex-M55 with INT8 quantization.
+`--target=cortex-m55` plus `--quantize` selects the Cortex-M/CMSIS-NN portable kernel path (no NPU delegation). This produces a `.pte` optimized for Cortex-M55 with INT8 quantization.
 
 #### Build and run