From a2b556e92d9b6ab2efdb5800651b91ae9af9fd8c Mon Sep 17 00:00:00 2001 From: RJ Ascani Date: Mon, 11 May 2026 14:37:25 -0700 Subject: [PATCH 1/5] Cortex-M backend: thread target CPU/ISA through the AOT pass manager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a CortexMCompileConfig dataclass (cpu + isa) that carries Cortex-M target information from the --target=cortex-m+int8 CLI string into CortexMPassManager. The full standard Cortex-M lineup is registered (M0, M0+, M3, M4, M7, M23, M33, M35P, M52, M55, M85), each with a sensible default ISA; the optional-DSP M33/M35P and optional-MVE M52/M55/M85 cases can be expressed via the isa= kwarg. No pass reads the config yet, so this change is purely plumbing — but it positions both the upcoming AOT scratch-buffer sizing work (#16580) and the M0+ (#17646) / M33 (#17644) backend support to plug in without re-plumbing the call site. Actually building for the new variants still requires Phase 2's MPS2 platform glue. CortexMTester gains an optional config kwarg, and the Pico 2 MLP example now constructs CortexMPassManager with cpu='cortex-m33' to match the RP2350 hardware it targets. Authored with Claude. --- backends/arm/scripts/aot_arm_compiler.py | 31 +++++- backends/cortex_m/compile_config.py | 98 +++++++++++++++++ .../cortex_m/passes/cortex_m_pass_manager.py | 7 +- .../cortex_m/test/misc/test_compile_config.py | 103 ++++++++++++++++++ backends/cortex_m/test/tester.py | 23 +++- .../pico2/export_mlp_mnist_cmsis.py | 6 +- 6 files changed, 255 insertions(+), 13 deletions(-) create mode 100644 backends/cortex_m/compile_config.py create mode 100644 backends/cortex_m/test/misc/test_compile_config.py diff --git a/backends/arm/scripts/aot_arm_compiler.py b/backends/arm/scripts/aot_arm_compiler.py index 19b056787d6..4b2065adc25 100644 --- a/backends/arm/scripts/aot_arm_compiler.py +++ b/backends/arm/scripts/aot_arm_compiler.py @@ -33,6 +33,7 @@ from executorch.backends.arm.util._factory import create_partitioner, create_quantizer from executorch.backends.arm.vgf import VgfCompileSpec +from executorch.backends.cortex_m.compile_config import CortexMCompileConfig from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager from executorch.backends.cortex_m.passes.replace_quant_nodes_pass import ( @@ -465,7 +466,17 @@ def forward(self, x): "TOSA-1.0+INT", "TOSA-1.0+FP", "TOSA-1.0+INT+int16", + "cortex-m0+int8", + "cortex-m0plus+int8", + "cortex-m3+int8", + "cortex-m4+int8", + "cortex-m7+int8", + "cortex-m23+int8", + "cortex-m33+int8", + "cortex-m35p+int8", + "cortex-m52+int8", "cortex-m55+int8", + "cortex-m85+int8", ] @@ -566,7 +577,7 @@ def _get_args(): required=False, default="ethos-u55-128", choices=TARGETS, - help=f"Target backend. For delegated models: Ethos-U/VGF/TOSA variants. For non-delegated: cortex-m55+int8 (CMSIS-NN portable kernels). Valid targets: {TARGETS}", + help=f"Target backend. For delegated models: Ethos-U/VGF/TOSA variants. For non-delegated: cortex-m+int8 (CMSIS-NN portable kernels). Valid targets: {TARGETS}", ) # TODO: Remove --evaluate and --evaluate_config completely after a suitable time. # They are deprecated and no longer functional in this script. @@ -860,9 +871,14 @@ def _to_edge_cortex_m( model: GraphModule, example_inputs: Tuple[torch.Tensor], calibration_samples: Optional[List[Tuple[torch.Tensor, ...]]], + config: CortexMCompileConfig, ): """Cortex-M/CMSIS-NN compilation path with no delegation.""" - logging.info("Using Cortex-M/CMSIS-NN compilation path (no delegation)") + logging.info( + "Using Cortex-M/CMSIS-NN compilation path for cpu=%s isa=%s", + config.cpu, + config.isa, + ) def _to_channels_last(x): if isinstance(x, torch.Tensor): @@ -915,7 +931,7 @@ def _to_channels_last(x): ), ) - pass_manager = CortexMPassManager(edge.exported_program()) + pass_manager = CortexMPassManager(edge.exported_program(), config=config) edge._edge_programs["forward"] = pass_manager.transform() return model_quant, edge @@ -1007,12 +1023,14 @@ def main() -> None: # noqa: C901 else: quant_mode = None - if args.target == "cortex-m55+int8": + if args.target.startswith("cortex-m"): # Cortex-M path: CMSIS-NN portable kernels, no delegation + cortex_m_config = CortexMCompileConfig.from_target_string(args.target) if args.delegate: logging.warning( - "--delegate is ignored for target 'cortex-m55+int8' " - "(this target does not use delegated ops)." + "--delegate is ignored for target %r " + "(this target does not use delegated ops).", + args.target, ) args.delegate = False model_quant, edge = _to_edge_cortex_m( @@ -1021,6 +1039,7 @@ def main() -> None: # noqa: C901 model, example_inputs, calibration_samples, + cortex_m_config, ) elif args.delegate: # As we can target multiple output encodings, one must diff --git a/backends/cortex_m/compile_config.py b/backends/cortex_m/compile_config.py new file mode 100644 index 00000000000..93795386ec7 --- /dev/null +++ b/backends/cortex_m/compile_config.py @@ -0,0 +1,98 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal + +Cpu = Literal[ + "cortex-m0", + "cortex-m0plus", + "cortex-m3", + "cortex-m4", + "cortex-m7", + "cortex-m23", + "cortex-m33", + "cortex-m35p", + "cortex-m52", + "cortex-m55", + "cortex-m85", +] +Isa = Literal["scalar", "dsp", "mve"] + +# Default ISA per CPU follows the most common configuration each core is +# shipped with. M33/M35P optionally lack DSP, and M52/M55/M85 optionally +# lack MVE; callers can pass `isa=` explicitly to override. +_CPU_DEFAULT_ISA: dict[str, str] = { + "cortex-m0": "scalar", + "cortex-m0plus": "scalar", + "cortex-m3": "scalar", + "cortex-m4": "dsp", + "cortex-m7": "dsp", + "cortex-m23": "scalar", + "cortex-m33": "dsp", + "cortex-m35p": "dsp", + "cortex-m52": "mve", + "cortex-m55": "mve", + "cortex-m85": "mve", +} + +_SUPPORTED_FEATURES: frozenset[str] = frozenset({"int8"}) + + +@dataclass(frozen=True) +class CortexMCompileConfig: + """AOT compile configuration for the Cortex-M backend. + + `cpu` and `isa` are consumed by passes that need to differ by target — most + notably any future AOT scratch-buffer sizing — and threaded through the + build system as the `-mcpu=` value. + + The current default matches pre-config behavior (M55 + MVE) so callers that + don't opt in see no change. + """ + + cpu: Cpu = "cortex-m55" + isa: Isa | None = None + + def __post_init__(self) -> None: + if self.cpu not in _CPU_DEFAULT_ISA: + raise ValueError( + f"Unsupported Cortex-M CPU: {self.cpu!r}. " + f"Supported: {sorted(_CPU_DEFAULT_ISA)}" + ) + if self.isa is None: + # frozen dataclass: use object.__setattr__ to fill default ISA. + object.__setattr__(self, "isa", _CPU_DEFAULT_ISA[self.cpu]) + + @classmethod + def from_target_string(cls, target: str) -> CortexMCompileConfig: + """Parse `cortex-m+int8` strings used by `aot_arm_compiler.py`. + + Today only `+int8` is supported. The suffix is required so the target + string remains explicit about the data type contract. + """ + cpu, sep, features = target.partition("+") + if not sep: + raise ValueError( + f"Cortex-M target string must include a feature suffix " + f"(e.g. '+int8'), got: {target!r}" + ) + feature_set = set(features.split("+")) + unknown = feature_set - _SUPPORTED_FEATURES + if unknown or "int8" not in feature_set: + raise ValueError( + f"Cortex-M target string must be '+int8' " + f"(supported features: {sorted(_SUPPORTED_FEATURES)}), " + f"got: {target!r}" + ) + if cpu not in _CPU_DEFAULT_ISA: + raise ValueError( + f"Unsupported Cortex-M CPU in target string: {cpu!r}. " + f"Supported: {sorted(_CPU_DEFAULT_ISA)}" + ) + return cls(cpu=cpu) # type: ignore[arg-type] diff --git a/backends/cortex_m/passes/cortex_m_pass_manager.py b/backends/cortex_m/passes/cortex_m_pass_manager.py index 074eb6118d0..0355783179e 100644 --- a/backends/cortex_m/passes/cortex_m_pass_manager.py +++ b/backends/cortex_m/passes/cortex_m_pass_manager.py @@ -11,6 +11,7 @@ FoldAndAnnotateQParamsPass, ScalarsToAttributePass, ) +from executorch.backends.cortex_m.compile_config import CortexMCompileConfig from executorch.backends.transforms.remove_getitem_op import RemoveGetItemPass from executorch.backends.transforms.replace_scalar_with_tensor import ( ReplaceScalarWithTensorArgPass, @@ -57,7 +58,10 @@ class CortexMPassManager(PassManager): ] def __init__( - self, exported_program, passes: Optional[list[PassClass]] = None + self, + exported_program, + passes: Optional[list[PassClass]] = None, + config: Optional[CortexMCompileConfig] = None, ) -> None: super().__init__(passes=[]) self.exported_program = exported_program @@ -65,6 +69,7 @@ def __init__( self.passes: list[PassClass] = ( # type: ignore[assignment] passes if passes is not None else self.pass_list # type: ignore[assignment] ) + self.config: CortexMCompileConfig = config or CortexMCompileConfig() def transform_for_annotation(self, model): passes = self.pass_list_transform_for_annotation diff --git a/backends/cortex_m/test/misc/test_compile_config.py b/backends/cortex_m/test/misc/test_compile_config.py new file mode 100644 index 00000000000..a2f5805ef13 --- /dev/null +++ b/backends/cortex_m/test/misc/test_compile_config.py @@ -0,0 +1,103 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from importlib.util import find_spec + +import pytest + +from executorch.backends.cortex_m.compile_config import CortexMCompileConfig + +_HAS_CMSIS_NN = find_spec("cmsis_nn") is not None + + +class TestCortexMCompileConfig: + def test_default_is_m55_mve(self): + config = CortexMCompileConfig() + assert config.cpu == "cortex-m55" + assert config.isa == "mve" + + @pytest.mark.parametrize( + "target_string,expected_cpu,expected_isa", + [ + ("cortex-m0+int8", "cortex-m0", "scalar"), + ("cortex-m0plus+int8", "cortex-m0plus", "scalar"), + ("cortex-m3+int8", "cortex-m3", "scalar"), + ("cortex-m4+int8", "cortex-m4", "dsp"), + ("cortex-m7+int8", "cortex-m7", "dsp"), + ("cortex-m23+int8", "cortex-m23", "scalar"), + ("cortex-m33+int8", "cortex-m33", "dsp"), + ("cortex-m35p+int8", "cortex-m35p", "dsp"), + ("cortex-m52+int8", "cortex-m52", "mve"), + ("cortex-m55+int8", "cortex-m55", "mve"), + ("cortex-m85+int8", "cortex-m85", "mve"), + ], + ) + def test_from_target_string(self, target_string, expected_cpu, expected_isa): + config = CortexMCompileConfig.from_target_string(target_string) + assert config.cpu == expected_cpu + assert config.isa == expected_isa + + def test_from_target_string_rejects_unknown_cpu(self): + with pytest.raises(ValueError, match="cortex-m999"): + CortexMCompileConfig.from_target_string("cortex-m999+int8") + + @pytest.mark.parametrize( + "target_string", + [ + "cortex-m55", # missing feature suffix + "cortex-m55+int8+int16", # unsupported extra feature + "cortex-m55+", # trailing plus + "cortex-m55+fp16", # unknown feature + ], + ) + def test_from_target_string_rejects_invalid_features(self, target_string): + with pytest.raises(ValueError): + CortexMCompileConfig.from_target_string(target_string) + + def test_default_matches_m55_target_string(self): + # Regression guard: pre-Phase-1 behavior was M55+MVE; the default + # constructor must remain equivalent to parsing the existing target. + assert CortexMCompileConfig() == CortexMCompileConfig.from_target_string( + "cortex-m55+int8" + ) + + def test_is_hashable_and_frozen(self): + from dataclasses import FrozenInstanceError + + config = CortexMCompileConfig(cpu="cortex-m33") + assert hash(config) == hash(CortexMCompileConfig(cpu="cortex-m33")) + assert {config, CortexMCompileConfig(cpu="cortex-m33")} == {config} + with pytest.raises(FrozenInstanceError): + config.cpu = "cortex-m55" # type: ignore[misc] + + def test_explicit_isa_override(self): + config = CortexMCompileConfig(cpu="cortex-m33", isa="scalar") + assert config.cpu == "cortex-m33" + assert config.isa == "scalar" + + +@pytest.mark.skipif( + not _HAS_CMSIS_NN, reason="cortex_m passes require cmsis_nn" +) +class TestPassManagerConfigWiring: + def test_default_config_is_m55(self): + from executorch.backends.cortex_m.passes.cortex_m_pass_manager import ( + CortexMPassManager, + ) + + pm = CortexMPassManager(exported_program=None) + assert pm.config.cpu == "cortex-m55" + assert pm.config.isa == "mve" + + def test_explicit_config_threaded(self): + from executorch.backends.cortex_m.passes.cortex_m_pass_manager import ( + CortexMPassManager, + ) + + config = CortexMCompileConfig(cpu="cortex-m33") + pm = CortexMPassManager(exported_program=None, config=config) + assert pm.config.cpu == "cortex-m33" + assert pm.config.isa == "dsp" diff --git a/backends/cortex_m/test/tester.py b/backends/cortex_m/test/tester.py index 75575c80b4e..e888e2e6056 100644 --- a/backends/cortex_m/test/tester.py +++ b/backends/cortex_m/test/tester.py @@ -6,11 +6,13 @@ from collections.abc import Callable from dataclasses import dataclass -from typing import Any +from functools import partial +from typing import Any, Optional import torch from executorch.backends.arm.test.common import get_u55_compile_spec from executorch.backends.arm.test.tester.arm_tester import Serialize +from executorch.backends.cortex_m.compile_config import CortexMCompileConfig from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer from executorch.backends.test.harness import Tester as TesterBase @@ -48,9 +50,12 @@ def __init__(self): class CortexMRunPasses(RunPasses): - def __init__(self): + def __init__(self, config: Optional[CortexMCompileConfig] = None): + config = config or CortexMCompileConfig() + # The base RunPasses constructs the pass manager as `cls(ep, pass_list)`. + # Pre-bind the config so it flows through that 2-arg call. super().__init__( - CortexMPassManager, + partial(CortexMPassManager, config=config), # type: ignore[arg-type] CortexMPassManager.pass_list, ) @@ -73,12 +78,20 @@ def __init__(self): class CortexMTester(TesterBase): - def __init__(self, module, example_inputs): + def __init__( + self, + module, + example_inputs, + config: Optional[CortexMCompileConfig] = None, + ): if callable(example_inputs): resolved_example_inputs = example_inputs() else: resolved_example_inputs = example_inputs - super().__init__(module, resolved_example_inputs, cortex_m_stage_classes) + config = config or CortexMCompileConfig() + stage_classes = dict(cortex_m_stage_classes) + stage_classes[StageType.RUN_PASSES] = lambda: CortexMRunPasses(config=config) + super().__init__(module, resolved_example_inputs, stage_classes) def test_dialect( self, diff --git a/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py b/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py index 43ff4a41229..c775ef0576d 100644 --- a/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py +++ b/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py @@ -24,6 +24,7 @@ import torch +from executorch.backends.cortex_m.compile_config import CortexMCompileConfig from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig, to_edge @@ -94,7 +95,10 @@ def export_to_pte(quantized_model, example_input, output_path: str): logger.info("Edge program created") logger.info("Applying Cortex-M optimization passes...") - pass_manager = CortexMPassManager(edge_program.exported_program()) + pass_manager = CortexMPassManager( + edge_program.exported_program(), + config=CortexMCompileConfig(cpu="cortex-m33"), + ) transformed_ep = pass_manager.transform() edge_program = to_edge(transformed_ep, compile_config=edge_config) From 3000f9c2cd80d714379931dc2d7b7b9b08f3ba09 Mon Sep 17 00:00:00 2001 From: RJ Ascani Date: Mon, 11 May 2026 15:30:28 -0700 Subject: [PATCH 2/5] Fix lints --- backends/cortex_m/test/misc/test_compile_config.py | 4 +--- backends/cortex_m/test/tester.py | 6 ++++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/backends/cortex_m/test/misc/test_compile_config.py b/backends/cortex_m/test/misc/test_compile_config.py index a2f5805ef13..86ed90bfe43 100644 --- a/backends/cortex_m/test/misc/test_compile_config.py +++ b/backends/cortex_m/test/misc/test_compile_config.py @@ -79,9 +79,7 @@ def test_explicit_isa_override(self): assert config.isa == "scalar" -@pytest.mark.skipif( - not _HAS_CMSIS_NN, reason="cortex_m passes require cmsis_nn" -) +@pytest.mark.skipif(not _HAS_CMSIS_NN, reason="cortex_m passes require cmsis_nn") class TestPassManagerConfigWiring: def test_default_config_is_m55(self): from executorch.backends.cortex_m.passes.cortex_m_pass_manager import ( diff --git a/backends/cortex_m/test/tester.py b/backends/cortex_m/test/tester.py index e888e2e6056..1e97899245d 100644 --- a/backends/cortex_m/test/tester.py +++ b/backends/cortex_m/test/tester.py @@ -56,7 +56,7 @@ def __init__(self, config: Optional[CortexMCompileConfig] = None): # Pre-bind the config so it flows through that 2-arg call. super().__init__( partial(CortexMPassManager, config=config), # type: ignore[arg-type] - CortexMPassManager.pass_list, + CortexMPassManager.pass_list, # type: ignore[arg-type] ) @@ -89,7 +89,9 @@ def __init__( else: resolved_example_inputs = example_inputs config = config or CortexMCompileConfig() - stage_classes = dict(cortex_m_stage_classes) + stage_classes: dict[StageType, Callable[..., Any]] = dict( + cortex_m_stage_classes + ) stage_classes[StageType.RUN_PASSES] = lambda: CortexMRunPasses(config=config) super().__init__(module, resolved_example_inputs, stage_classes) From efead9ddfabb03553e44df8f28c435332ba87d57 Mon Sep 17 00:00:00 2001 From: RJ Ascani Date: Tue, 12 May 2026 17:20:49 -0700 Subject: [PATCH 3/5] Cortex-M backend: address #19470 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aligns CortexMTargetConfig with the design Erik proposes in #19505 while keeping the wider plumbing in place. The earlier CortexMCompileConfig is renamed to CortexMTargetConfig (and its file moved to target_config.py) to disambiguate from EdgeCompileConfig — this dataclass models a compilation *target*, not a step in the compile pipeline. Adopted from Erik's feedback: * CortexM enum replaces the Cpu/Isa Literals — typo-safe and IDE-friendly. * `.backend` property returns `cmsis_nn.Backend` directly, resolved via `cmsis_nn.resolve_backend(cmsis_nn.CortexM.)`. The hand-rolled `_CPU_DEFAULT_ISA` dict is gone — cmsis_nn is the single source of truth for the CPU → backend mapping. * CortexMPass abstract base class added; CortexMPassManager.transform() uses signature inspection to inject both `exported_program` and `target_config` into passes that declare them (mirroring Erik's proposal). The pass manager also gains stricter validation — the exported_program must be a real ExportedProgram and the pass list must contain classes, not instances — failing fast instead of producing opaque errors deep in _transform. * cmsis_nn is now a hard dependency for the cortex_m tests: the top-level `import cmsis_nn` in test_target_config.py replaces the previous skipif-on-find_spec dance, addressing Erik's concern that skipping tests on missing deps can mask regressions. * `+int8` dropped from cortex-m target strings — quantization is a result of the export flow, not a CPU attribute. TARGETS, help text, from_target_string, CI script and README aligned. * Logging in `_to_edge_cortex_m` and the --delegate-ignored warning switched to f-strings. * `__init__` docstring on CortexMPassManager documents the exported_program / passes / target_config defaults (including the M55+MVE fallback that matches pre-config behaviour). * `import-not-found` removed from the cmsis_nn type-ignore — only `import-untyped` actually fires, and if cmsis_nn ever ships stubs the unused ignore will become a tripwire. Kept the optional `isa` override field for the optional-extension cases (M55 without MVE, M33 without DSP, etc.) — different from Erik's enum-only design, but the override remains useful for cores where ISA extensions are optional. A `_SUPPORTED_BACKENDS` table encodes the per-CPU architectural capability set so overrides validate at construction; forcing MVE on an M0 raises ValueError with the actual supported list. The SCALAR ⊂ DSP ⊂ MVE supersession reflects that an MVE-capable core also runs DSP and scalar code. Defers Erik's `ANY` proposal. In #19505 ANY falls back to MVE, but an honest "any cortex-m" choice would have to do worst-case scratch buffer planning across the ISA classes (which may not be MVE). Deferring until the scratch-buffer side lands and we can implement the worst-case analysis properly. Authored with Claude. --- .ci/scripts/test_cortex_m_e2e.sh | 4 +- backends/arm/scripts/aot_arm_compiler.py | 45 ++++--- backends/cortex_m/compile_config.py | 98 --------------- backends/cortex_m/passes/__init__.py | 1 + backends/cortex_m/passes/cortex_m_pass.py | 35 ++++++ .../cortex_m/passes/cortex_m_pass_manager.py | 61 ++++++--- backends/cortex_m/target_config.py | 110 +++++++++++++++++ .../cortex_m/test/misc/test_compile_config.py | 101 --------------- .../cortex_m/test/misc/test_target_config.py | 116 ++++++++++++++++++ backends/cortex_m/test/tester.py | 18 +-- .../pico2/export_mlp_mnist_cmsis.py | 6 +- 11 files changed, 342 insertions(+), 253 deletions(-) delete mode 100644 backends/cortex_m/compile_config.py create mode 100644 backends/cortex_m/passes/cortex_m_pass.py create mode 100644 backends/cortex_m/target_config.py delete mode 100644 backends/cortex_m/test/misc/test_compile_config.py create mode 100644 backends/cortex_m/test/misc/test_target_config.py diff --git a/.ci/scripts/test_cortex_m_e2e.sh b/.ci/scripts/test_cortex_m_e2e.sh index c6e643f118c..de47a45ea0d 100755 --- a/.ci/scripts/test_cortex_m_e2e.sh +++ b/.ci/scripts/test_cortex_m_e2e.sh @@ -17,9 +17,9 @@ MODEL=$1 script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")") et_root_dir=$(realpath "${script_dir}/../..") -# Quantization is the default for the cortex-m55+int8 target; run.sh's +# Quantization is the default for the cortex-m55 target; run.sh's # arg parser only recognizes --no_quantize, so we omit any explicit flag. bash "${et_root_dir}/examples/arm/run.sh" \ --model_name="${MODEL}" \ - --target=cortex-m55+int8 \ + --target=cortex-m55 \ --bundleio diff --git a/backends/arm/scripts/aot_arm_compiler.py b/backends/arm/scripts/aot_arm_compiler.py index 4b2065adc25..8d841ef61ff 100644 --- a/backends/arm/scripts/aot_arm_compiler.py +++ b/backends/arm/scripts/aot_arm_compiler.py @@ -33,13 +33,13 @@ from executorch.backends.arm.util._factory import create_partitioner, create_quantizer from executorch.backends.arm.vgf import VgfCompileSpec -from executorch.backends.cortex_m.compile_config import CortexMCompileConfig from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager from executorch.backends.cortex_m.passes.replace_quant_nodes_pass import ( ReplaceQuantNodesPass, ) from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer +from executorch.backends.cortex_m.target_config import CortexMTargetConfig from executorch.devtools import BundledProgram, generate_etrecord from executorch.devtools.backend_debug import get_delegation_info from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite @@ -466,17 +466,16 @@ def forward(self, x): "TOSA-1.0+INT", "TOSA-1.0+FP", "TOSA-1.0+INT+int16", - "cortex-m0+int8", - "cortex-m0plus+int8", - "cortex-m3+int8", - "cortex-m4+int8", - "cortex-m7+int8", - "cortex-m23+int8", - "cortex-m33+int8", - "cortex-m35p+int8", - "cortex-m52+int8", - "cortex-m55+int8", - "cortex-m85+int8", + "cortex-m0", + "cortex-m0plus", + "cortex-m3", + "cortex-m4", + "cortex-m7", + "cortex-m23", + "cortex-m33", + "cortex-m35p", + "cortex-m55", + "cortex-m85", ] @@ -577,7 +576,7 @@ def _get_args(): required=False, default="ethos-u55-128", choices=TARGETS, - help=f"Target backend. For delegated models: Ethos-U/VGF/TOSA variants. For non-delegated: cortex-m+int8 (CMSIS-NN portable kernels). Valid targets: {TARGETS}", + help=f"Target backend. For delegated models: Ethos-U/VGF/TOSA variants. For non-delegated: cortex-m (CMSIS-NN portable kernels). Valid targets: {TARGETS}", ) # TODO: Remove --evaluate and --evaluate_config completely after a suitable time. # They are deprecated and no longer functional in this script. @@ -871,13 +870,12 @@ def _to_edge_cortex_m( model: GraphModule, example_inputs: Tuple[torch.Tensor], calibration_samples: Optional[List[Tuple[torch.Tensor, ...]]], - config: CortexMCompileConfig, + target_config: CortexMTargetConfig, ): """Cortex-M/CMSIS-NN compilation path with no delegation.""" logging.info( - "Using Cortex-M/CMSIS-NN compilation path for cpu=%s isa=%s", - config.cpu, - config.isa, + f"Using Cortex-M/CMSIS-NN compilation path for cpu={target_config.cpu.name} " + f"backend={target_config.backend.name}" ) def _to_channels_last(x): @@ -931,7 +929,9 @@ def _to_channels_last(x): ), ) - pass_manager = CortexMPassManager(edge.exported_program(), config=config) + pass_manager = CortexMPassManager( + edge.exported_program(), target_config=target_config + ) edge._edge_programs["forward"] = pass_manager.transform() return model_quant, edge @@ -1025,12 +1025,11 @@ def main() -> None: # noqa: C901 if args.target.startswith("cortex-m"): # Cortex-M path: CMSIS-NN portable kernels, no delegation - cortex_m_config = CortexMCompileConfig.from_target_string(args.target) + target_config = CortexMTargetConfig.from_target_string(args.target) if args.delegate: logging.warning( - "--delegate is ignored for target %r " - "(this target does not use delegated ops).", - args.target, + f"--delegate is ignored for target {args.target!r} " + "(this target does not use delegated ops)." ) args.delegate = False model_quant, edge = _to_edge_cortex_m( @@ -1039,7 +1038,7 @@ def main() -> None: # noqa: C901 model, example_inputs, calibration_samples, - cortex_m_config, + target_config, ) elif args.delegate: # As we can target multiple output encodings, one must diff --git a/backends/cortex_m/compile_config.py b/backends/cortex_m/compile_config.py deleted file mode 100644 index 93795386ec7..00000000000 --- a/backends/cortex_m/compile_config.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Literal - -Cpu = Literal[ - "cortex-m0", - "cortex-m0plus", - "cortex-m3", - "cortex-m4", - "cortex-m7", - "cortex-m23", - "cortex-m33", - "cortex-m35p", - "cortex-m52", - "cortex-m55", - "cortex-m85", -] -Isa = Literal["scalar", "dsp", "mve"] - -# Default ISA per CPU follows the most common configuration each core is -# shipped with. M33/M35P optionally lack DSP, and M52/M55/M85 optionally -# lack MVE; callers can pass `isa=` explicitly to override. -_CPU_DEFAULT_ISA: dict[str, str] = { - "cortex-m0": "scalar", - "cortex-m0plus": "scalar", - "cortex-m3": "scalar", - "cortex-m4": "dsp", - "cortex-m7": "dsp", - "cortex-m23": "scalar", - "cortex-m33": "dsp", - "cortex-m35p": "dsp", - "cortex-m52": "mve", - "cortex-m55": "mve", - "cortex-m85": "mve", -} - -_SUPPORTED_FEATURES: frozenset[str] = frozenset({"int8"}) - - -@dataclass(frozen=True) -class CortexMCompileConfig: - """AOT compile configuration for the Cortex-M backend. - - `cpu` and `isa` are consumed by passes that need to differ by target — most - notably any future AOT scratch-buffer sizing — and threaded through the - build system as the `-mcpu=` value. - - The current default matches pre-config behavior (M55 + MVE) so callers that - don't opt in see no change. - """ - - cpu: Cpu = "cortex-m55" - isa: Isa | None = None - - def __post_init__(self) -> None: - if self.cpu not in _CPU_DEFAULT_ISA: - raise ValueError( - f"Unsupported Cortex-M CPU: {self.cpu!r}. " - f"Supported: {sorted(_CPU_DEFAULT_ISA)}" - ) - if self.isa is None: - # frozen dataclass: use object.__setattr__ to fill default ISA. - object.__setattr__(self, "isa", _CPU_DEFAULT_ISA[self.cpu]) - - @classmethod - def from_target_string(cls, target: str) -> CortexMCompileConfig: - """Parse `cortex-m+int8` strings used by `aot_arm_compiler.py`. - - Today only `+int8` is supported. The suffix is required so the target - string remains explicit about the data type contract. - """ - cpu, sep, features = target.partition("+") - if not sep: - raise ValueError( - f"Cortex-M target string must include a feature suffix " - f"(e.g. '+int8'), got: {target!r}" - ) - feature_set = set(features.split("+")) - unknown = feature_set - _SUPPORTED_FEATURES - if unknown or "int8" not in feature_set: - raise ValueError( - f"Cortex-M target string must be '+int8' " - f"(supported features: {sorted(_SUPPORTED_FEATURES)}), " - f"got: {target!r}" - ) - if cpu not in _CPU_DEFAULT_ISA: - raise ValueError( - f"Unsupported Cortex-M CPU in target string: {cpu!r}. " - f"Supported: {sorted(_CPU_DEFAULT_ISA)}" - ) - return cls(cpu=cpu) # type: ignore[arg-type] diff --git a/backends/cortex_m/passes/__init__.py b/backends/cortex_m/passes/__init__.py index b1b67add413..92179ec6654 100644 --- a/backends/cortex_m/passes/__init__.py +++ b/backends/cortex_m/passes/__init__.py @@ -36,6 +36,7 @@ def _ensure_cortex_m_dependencies() -> None: from .activation_fusion_pass import ActivationFusionPass # noqa from .clamp_hardswish_pass import ClampHardswishPass # noqa from .convert_to_cortex_m_pass import ConvertToCortexMPass # noqa +from .cortex_m_pass import CortexMPass # noqa from .decompose_hardswish_pass import DecomposeHardswishPass # noqa from .decompose_mean_pass import DecomposeMeanPass # noqa from .quantized_clamp_activation_pass import QuantizedClampActivationPass # noqa diff --git a/backends/cortex_m/passes/cortex_m_pass.py b/backends/cortex_m/passes/cortex_m_pass.py new file mode 100644 index 00000000000..5b3e98e9a96 --- /dev/null +++ b/backends/cortex_m/passes/cortex_m_pass.py @@ -0,0 +1,35 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from executorch.backends.cortex_m.target_config import CortexMTargetConfig +from executorch.exir.pass_base import ExportPass +from torch.export import ExportedProgram + + +class CortexMPass(ExportPass): + """Base class for passes that need the Cortex-M target config. + + Passes that subclass this declare `exported_program` and `target_config` + in their `__init__`; `CortexMPassManager.transform()` injects both + automatically when running the pass list. + """ + + def __init__( + self, + exported_program: ExportedProgram, + target_config: CortexMTargetConfig, + ) -> None: + super().__init__() + self._exported_program = exported_program + self._target_config = target_config + + @property + def exported_program(self) -> ExportedProgram: + return self._exported_program + + @property + def target_config(self) -> CortexMTargetConfig: + return self._target_config diff --git a/backends/cortex_m/passes/cortex_m_pass_manager.py b/backends/cortex_m/passes/cortex_m_pass_manager.py index 0355783179e..f95587a00d3 100644 --- a/backends/cortex_m/passes/cortex_m_pass_manager.py +++ b/backends/cortex_m/passes/cortex_m_pass_manager.py @@ -5,13 +5,13 @@ import inspect -from typing import Callable, cast, Optional, Type +from typing import Any, Optional, Type from executorch.backends.arm._passes import ( FoldAndAnnotateQParamsPass, ScalarsToAttributePass, ) -from executorch.backends.cortex_m.compile_config import CortexMCompileConfig +from executorch.backends.cortex_m.target_config import CortexM, CortexMTargetConfig from executorch.backends.transforms.remove_getitem_op import RemoveGetItemPass from executorch.backends.transforms.replace_scalar_with_tensor import ( ReplaceScalarWithTensorArgPass, @@ -20,9 +20,6 @@ from executorch.exir.pass_manager import PassManager from executorch.exir.program._program import _transform, lift_constant_tensor_pass from torch.export import ExportedProgram -from torch.fx.passes.infra.pass_base import PassResult - -from torch.nn import Module from .activation_fusion_pass import ActivationFusionPass from .clamp_hardswish_pass import ClampHardswishPass @@ -59,17 +56,32 @@ class CortexMPassManager(PassManager): def __init__( self, - exported_program, + exported_program: ExportedProgram | None, passes: Optional[list[PassClass]] = None, - config: Optional[CortexMCompileConfig] = None, + target_config: Optional[CortexMTargetConfig] = None, ) -> None: + """Initialize the Cortex-M pass manager. + + Args: + exported_program: The exported program to transform. Required + before calling ``transform()``; may be ``None`` for callers + that only use ``transform_for_annotation()``. + passes: Optional override of the pass list. Defaults to + ``CortexMPassManager.pass_list``. + target_config: Compilation target for passes that need it. + Defaults to ``CortexMTargetConfig(cpu=CortexM.M55)``, which + resolves through cmsis_nn to the MVE backend — matching the + pre-config historical behaviour. + """ super().__init__(passes=[]) self.exported_program = exported_program # PassManager.passes is typed as callables; this manager stores pass classes which are initialized at transform time with the exported_program. self.passes: list[PassClass] = ( # type: ignore[assignment] passes if passes is not None else self.pass_list # type: ignore[assignment] ) - self.config: CortexMCompileConfig = config or CortexMCompileConfig() + self.target_config: CortexMTargetConfig = target_config or CortexMTargetConfig( + cpu=CortexM.M55 + ) def transform_for_annotation(self, model): passes = self.pass_list_transform_for_annotation @@ -78,18 +90,31 @@ def transform_for_annotation(self, model): return model def transform(self) -> ExportedProgram: - ep = self.exported_program + exported_program = self.exported_program + if not isinstance(exported_program, ExportedProgram): + raise ValueError( + f"{type(self).__name__}.transform() needs a real ExportedProgram, " + f"got {exported_program!r}" + ) + for pass_cls in self.passes: + if not isinstance(pass_cls, type): + raise ValueError( + f"{type(self).__name__} expects pass classes, not instances; " + f"got {pass_cls!r}" + ) + signature = inspect.signature(pass_cls) + kwargs: dict[str, Any] = {} if "exported_program" in signature.parameters: - ep_pass_ctor = cast(Callable[[ExportedProgram], ExportPass], pass_cls) - transform_pass = ep_pass_ctor(ep) - else: - transform_pass = pass_cls() - pass_callable = cast(Callable[[Module], PassResult], transform_pass) - ep = _transform(ep, pass_callable) + kwargs["exported_program"] = exported_program + if "target_config" in signature.parameters: + kwargs["target_config"] = self.target_config + + transform_pass = pass_cls(**kwargs) + exported_program = _transform(exported_program, transform_pass) # All constant tensors should be lifted to buffers at this point, re-run - # lift_constant_tensor_pass in case new ones have been introduced by the passes above. - ep = lift_constant_tensor_pass(ep) - return ep + # lift_constant_tensor_pass in case new ones have been introduced. + exported_program = lift_constant_tensor_pass(exported_program) + return exported_program diff --git a/backends/cortex_m/target_config.py b/backends/cortex_m/target_config.py new file mode 100644 index 00000000000..2a53869976b --- /dev/null +++ b/backends/cortex_m/target_config.py @@ -0,0 +1,110 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import annotations + +from dataclasses import dataclass +from enum import auto, Enum +from typing import Optional + +import cmsis_nn # type: ignore[import-untyped] + + +class CortexM(Enum): + """Cortex-M CPU variant. Names mirror cmsis_nn.CortexM so the cmsis_nn + enum can be looked up by name.""" + + M0 = auto() + M0PLUS = auto() + M3 = auto() + M4 = auto() + M7 = auto() + M23 = auto() + M33 = auto() + M35P = auto() + M55 = auto() + M85 = auto() + + +# Per-CPU set of cmsis_nn backends the core can execute. SCALAR is +# universal; DSP requires the Armv7E-M or Armv8-M-Mainline DSP option; +# MVE requires Armv8.1-M Mainline with the MVE extension. The supersession +# (SCALAR < DSP < MVE) reflects that an MVE-capable core also runs DSP +# and scalar code, which is what makes "M55 without MVE" → DSP override +# legitimate. +_SUPPORTED_BACKENDS: dict[CortexM, frozenset[cmsis_nn.Backend]] = { + CortexM.M0: frozenset({cmsis_nn.Backend.SCALAR}), + CortexM.M0PLUS: frozenset({cmsis_nn.Backend.SCALAR}), + CortexM.M3: frozenset({cmsis_nn.Backend.SCALAR}), + CortexM.M23: frozenset({cmsis_nn.Backend.SCALAR}), + CortexM.M4: frozenset({cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP}), + CortexM.M7: frozenset({cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP}), + CortexM.M33: frozenset({cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP}), + CortexM.M35P: frozenset({cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP}), + CortexM.M55: frozenset( + {cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP, cmsis_nn.Backend.MVE} + ), + CortexM.M85: frozenset( + {cmsis_nn.Backend.SCALAR, cmsis_nn.Backend.DSP, cmsis_nn.Backend.MVE} + ), +} + + +@dataclass(frozen=True) +class CortexMTargetConfig: + """AOT compile target configuration for the Cortex-M backend. + + `cpu` selects the CPU variant. `isa` optionally overrides the cmsis_nn + backend that would normally be derived from `cpu` — useful for cores + with optional ISA extensions (M55 without MVE, M33 without DSP, etc.). + Overrides are validated against the CPU's architectural capability set + on construction; e.g. forcing MVE on an M0 raises ValueError. + """ + + cpu: CortexM + isa: Optional[cmsis_nn.Backend] = None + + def __post_init__(self) -> None: + if self.isa is None: + return + supported = _SUPPORTED_BACKENDS.get(self.cpu) + if supported is None or self.isa not in supported: + allowed = sorted(b.name for b in supported) if supported else [] + raise ValueError( + f"Backend {self.isa.name} is not supported on " + f"{self.cpu.name}; supported: {allowed}" + ) + + @property + def backend(self) -> cmsis_nn.Backend: + if self.isa is not None: + return self.isa + try: + cmsis_member = getattr(cmsis_nn.CortexM, self.cpu.name) + except AttributeError as e: + raise ValueError( + f"cmsis_nn does not yet support {self.cpu.name}; pass an " + f"explicit `isa=` override or wait for upstream support." + ) from e + return cmsis_nn.resolve_backend(cmsis_member) + + @classmethod + def from_target_string(cls, target: str) -> CortexMTargetConfig: + """Parse a `cortex-m` target string.""" + if not target.startswith("cortex-m"): + raise ValueError( + f"Cortex-M target string must start with 'cortex-m', " + f"got: {target!r}" + ) + enum_name = "M" + target[len("cortex-m") :].upper() + try: + cpu = CortexM[enum_name] + except KeyError as e: + raise ValueError( + f"Unsupported Cortex-M target string: {target!r}. " + f"Supported: {sorted('cortex-m' + m.name[1:].lower() for m in CortexM)}" + ) from e + return cls(cpu=cpu) diff --git a/backends/cortex_m/test/misc/test_compile_config.py b/backends/cortex_m/test/misc/test_compile_config.py deleted file mode 100644 index 86ed90bfe43..00000000000 --- a/backends/cortex_m/test/misc/test_compile_config.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from importlib.util import find_spec - -import pytest - -from executorch.backends.cortex_m.compile_config import CortexMCompileConfig - -_HAS_CMSIS_NN = find_spec("cmsis_nn") is not None - - -class TestCortexMCompileConfig: - def test_default_is_m55_mve(self): - config = CortexMCompileConfig() - assert config.cpu == "cortex-m55" - assert config.isa == "mve" - - @pytest.mark.parametrize( - "target_string,expected_cpu,expected_isa", - [ - ("cortex-m0+int8", "cortex-m0", "scalar"), - ("cortex-m0plus+int8", "cortex-m0plus", "scalar"), - ("cortex-m3+int8", "cortex-m3", "scalar"), - ("cortex-m4+int8", "cortex-m4", "dsp"), - ("cortex-m7+int8", "cortex-m7", "dsp"), - ("cortex-m23+int8", "cortex-m23", "scalar"), - ("cortex-m33+int8", "cortex-m33", "dsp"), - ("cortex-m35p+int8", "cortex-m35p", "dsp"), - ("cortex-m52+int8", "cortex-m52", "mve"), - ("cortex-m55+int8", "cortex-m55", "mve"), - ("cortex-m85+int8", "cortex-m85", "mve"), - ], - ) - def test_from_target_string(self, target_string, expected_cpu, expected_isa): - config = CortexMCompileConfig.from_target_string(target_string) - assert config.cpu == expected_cpu - assert config.isa == expected_isa - - def test_from_target_string_rejects_unknown_cpu(self): - with pytest.raises(ValueError, match="cortex-m999"): - CortexMCompileConfig.from_target_string("cortex-m999+int8") - - @pytest.mark.parametrize( - "target_string", - [ - "cortex-m55", # missing feature suffix - "cortex-m55+int8+int16", # unsupported extra feature - "cortex-m55+", # trailing plus - "cortex-m55+fp16", # unknown feature - ], - ) - def test_from_target_string_rejects_invalid_features(self, target_string): - with pytest.raises(ValueError): - CortexMCompileConfig.from_target_string(target_string) - - def test_default_matches_m55_target_string(self): - # Regression guard: pre-Phase-1 behavior was M55+MVE; the default - # constructor must remain equivalent to parsing the existing target. - assert CortexMCompileConfig() == CortexMCompileConfig.from_target_string( - "cortex-m55+int8" - ) - - def test_is_hashable_and_frozen(self): - from dataclasses import FrozenInstanceError - - config = CortexMCompileConfig(cpu="cortex-m33") - assert hash(config) == hash(CortexMCompileConfig(cpu="cortex-m33")) - assert {config, CortexMCompileConfig(cpu="cortex-m33")} == {config} - with pytest.raises(FrozenInstanceError): - config.cpu = "cortex-m55" # type: ignore[misc] - - def test_explicit_isa_override(self): - config = CortexMCompileConfig(cpu="cortex-m33", isa="scalar") - assert config.cpu == "cortex-m33" - assert config.isa == "scalar" - - -@pytest.mark.skipif(not _HAS_CMSIS_NN, reason="cortex_m passes require cmsis_nn") -class TestPassManagerConfigWiring: - def test_default_config_is_m55(self): - from executorch.backends.cortex_m.passes.cortex_m_pass_manager import ( - CortexMPassManager, - ) - - pm = CortexMPassManager(exported_program=None) - assert pm.config.cpu == "cortex-m55" - assert pm.config.isa == "mve" - - def test_explicit_config_threaded(self): - from executorch.backends.cortex_m.passes.cortex_m_pass_manager import ( - CortexMPassManager, - ) - - config = CortexMCompileConfig(cpu="cortex-m33") - pm = CortexMPassManager(exported_program=None, config=config) - assert pm.config.cpu == "cortex-m33" - assert pm.config.isa == "dsp" diff --git a/backends/cortex_m/test/misc/test_target_config.py b/backends/cortex_m/test/misc/test_target_config.py new file mode 100644 index 00000000000..5547bfbe125 --- /dev/null +++ b/backends/cortex_m/test/misc/test_target_config.py @@ -0,0 +1,116 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import cmsis_nn # type: ignore[import-untyped] +import pytest + +from executorch.backends.cortex_m.target_config import CortexM, CortexMTargetConfig + + +class TestCortexMTargetConfig: + @pytest.mark.parametrize( + "target_string,expected_cpu", + [ + ("cortex-m0", CortexM.M0), + ("cortex-m0plus", CortexM.M0PLUS), + ("cortex-m3", CortexM.M3), + ("cortex-m4", CortexM.M4), + ("cortex-m7", CortexM.M7), + ("cortex-m23", CortexM.M23), + ("cortex-m33", CortexM.M33), + ("cortex-m35p", CortexM.M35P), + ("cortex-m55", CortexM.M55), + ("cortex-m85", CortexM.M85), + ], + ) + def test_from_target_string(self, target_string, expected_cpu): + config = CortexMTargetConfig.from_target_string(target_string) + assert config.cpu == expected_cpu + + @pytest.mark.parametrize( + "cpu,expected_backend", + [ + (CortexM.M0, cmsis_nn.Backend.SCALAR), + (CortexM.M4, cmsis_nn.Backend.DSP), + (CortexM.M33, cmsis_nn.Backend.DSP), + (CortexM.M55, cmsis_nn.Backend.MVE), + (CortexM.M85, cmsis_nn.Backend.MVE), + ], + ) + def test_backend_resolved_via_cmsis_nn(self, cpu, expected_backend): + assert CortexMTargetConfig(cpu=cpu).backend == expected_backend + + @pytest.mark.parametrize( + "cpu,override", + [ + (CortexM.M55, cmsis_nn.Backend.DSP), # M55 with MVE disabled + (CortexM.M55, cmsis_nn.Backend.SCALAR), # M55 without DSP or MVE + (CortexM.M85, cmsis_nn.Backend.DSP), + (CortexM.M33, cmsis_nn.Backend.SCALAR), # M33 without DSP option + (CortexM.M4, cmsis_nn.Backend.SCALAR), # M4 without DSP intrinsics + ], + ) + def test_isa_override_compatible(self, cpu, override): + config = CortexMTargetConfig(cpu=cpu, isa=override) + assert config.backend == override + + @pytest.mark.parametrize( + "cpu,override", + [ + (CortexM.M0, cmsis_nn.Backend.DSP), # Armv6-M has no DSP + (CortexM.M0, cmsis_nn.Backend.MVE), + (CortexM.M3, cmsis_nn.Backend.DSP), # Armv7-M has no DSP + (CortexM.M4, cmsis_nn.Backend.MVE), # Armv7E-M has no MVE + (CortexM.M33, cmsis_nn.Backend.MVE), # Armv8-M Mainline has no MVE + (CortexM.M35P, cmsis_nn.Backend.MVE), + ], + ) + def test_isa_override_rejects_incompatible(self, cpu, override): + with pytest.raises(ValueError, match="not supported"): + CortexMTargetConfig(cpu=cpu, isa=override) + + @pytest.mark.parametrize( + "target_string", + [ + "cortex-m999", + "cortex-m52", # not yet in cmsis_nn.CortexM + "cortex-m55+int8", # legacy +int8 form no longer accepted + "arm-m4", + ], + ) + def test_from_target_string_rejects_invalid(self, target_string): + with pytest.raises(ValueError): + CortexMTargetConfig.from_target_string(target_string) + + def test_is_hashable_and_frozen(self): + from dataclasses import FrozenInstanceError + + config = CortexMTargetConfig(cpu=CortexM.M33) + assert hash(config) == hash(CortexMTargetConfig(cpu=CortexM.M33)) + assert {config, CortexMTargetConfig(cpu=CortexM.M33)} == {config} + with pytest.raises(FrozenInstanceError): + config.cpu = CortexM.M55 # type: ignore[misc] + + +class TestPassManagerTargetConfigWiring: + def test_default_target_config_is_m55(self): + from executorch.backends.cortex_m.passes.cortex_m_pass_manager import ( + CortexMPassManager, + ) + + pm = CortexMPassManager(exported_program=None) + assert pm.target_config.cpu == CortexM.M55 + assert pm.target_config.backend == cmsis_nn.Backend.MVE + + def test_explicit_target_config_threaded(self): + from executorch.backends.cortex_m.passes.cortex_m_pass_manager import ( + CortexMPassManager, + ) + + target_config = CortexMTargetConfig(cpu=CortexM.M33) + pm = CortexMPassManager(exported_program=None, target_config=target_config) + assert pm.target_config.cpu == CortexM.M33 + assert pm.target_config.backend == cmsis_nn.Backend.DSP diff --git a/backends/cortex_m/test/tester.py b/backends/cortex_m/test/tester.py index 1e97899245d..e9912d03cad 100644 --- a/backends/cortex_m/test/tester.py +++ b/backends/cortex_m/test/tester.py @@ -12,9 +12,9 @@ import torch from executorch.backends.arm.test.common import get_u55_compile_spec from executorch.backends.arm.test.tester.arm_tester import Serialize -from executorch.backends.cortex_m.compile_config import CortexMCompileConfig from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer +from executorch.backends.cortex_m.target_config import CortexM, CortexMTargetConfig from executorch.backends.test.harness import Tester as TesterBase from executorch.backends.test.harness.stages import ( Export, @@ -50,12 +50,12 @@ def __init__(self): class CortexMRunPasses(RunPasses): - def __init__(self, config: Optional[CortexMCompileConfig] = None): - config = config or CortexMCompileConfig() + def __init__(self, target_config: Optional[CortexMTargetConfig] = None): + target_config = target_config or CortexMTargetConfig(cpu=CortexM.M55) # The base RunPasses constructs the pass manager as `cls(ep, pass_list)`. - # Pre-bind the config so it flows through that 2-arg call. + # Pre-bind the target_config so it flows through that 2-arg call. super().__init__( - partial(CortexMPassManager, config=config), # type: ignore[arg-type] + partial(CortexMPassManager, target_config=target_config), # type: ignore[arg-type] CortexMPassManager.pass_list, # type: ignore[arg-type] ) @@ -82,17 +82,19 @@ def __init__( self, module, example_inputs, - config: Optional[CortexMCompileConfig] = None, + target_config: Optional[CortexMTargetConfig] = None, ): if callable(example_inputs): resolved_example_inputs = example_inputs() else: resolved_example_inputs = example_inputs - config = config or CortexMCompileConfig() + target_config = target_config or CortexMTargetConfig(cpu=CortexM.M55) stage_classes: dict[StageType, Callable[..., Any]] = dict( cortex_m_stage_classes ) - stage_classes[StageType.RUN_PASSES] = lambda: CortexMRunPasses(config=config) + stage_classes[StageType.RUN_PASSES] = lambda: CortexMRunPasses( + target_config=target_config + ) super().__init__(module, resolved_example_inputs, stage_classes) def test_dialect( diff --git a/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py b/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py index c775ef0576d..4785598c876 100644 --- a/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py +++ b/examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py @@ -23,10 +23,10 @@ import os import torch - -from executorch.backends.cortex_m.compile_config import CortexMCompileConfig from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer + +from executorch.backends.cortex_m.target_config import CortexM, CortexMTargetConfig from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig, to_edge from executorch.extension.export_util.utils import save_pte_program @@ -97,7 +97,7 @@ def export_to_pte(quantized_model, example_input, output_path: str): logger.info("Applying Cortex-M optimization passes...") pass_manager = CortexMPassManager( edge_program.exported_program(), - config=CortexMCompileConfig(cpu="cortex-m33"), + target_config=CortexMTargetConfig(cpu=CortexM.M33), ) transformed_ep = pass_manager.transform() From c7a1278691ba3e3b197f54195706a1e0449b1737 Mon Sep 17 00:00:00 2001 From: RJ Ascani Date: Tue, 12 May 2026 17:34:08 -0700 Subject: [PATCH 4/5] Cortex-M backend: restore import-not-found on the cmsis_nn type ignore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI's mypy env doesn't pip-install cmsis_nn (it's a native pybind11 module — type-checking-only envs typically skip such deps), so `import-not-found` fires there even though `import-untyped` is the one that fires locally. The combo `[import-not-found, import-untyped]` is honest about both states; dropping either half breaks one or the other environment. Authored with Claude. --- backends/cortex_m/target_config.py | 2 +- backends/cortex_m/test/misc/test_target_config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/cortex_m/target_config.py b/backends/cortex_m/target_config.py index 2a53869976b..e18e5d00a41 100644 --- a/backends/cortex_m/target_config.py +++ b/backends/cortex_m/target_config.py @@ -10,7 +10,7 @@ from enum import auto, Enum from typing import Optional -import cmsis_nn # type: ignore[import-untyped] +import cmsis_nn # type: ignore[import-not-found, import-untyped] class CortexM(Enum): diff --git a/backends/cortex_m/test/misc/test_target_config.py b/backends/cortex_m/test/misc/test_target_config.py index 5547bfbe125..3e648b0a81c 100644 --- a/backends/cortex_m/test/misc/test_target_config.py +++ b/backends/cortex_m/test/misc/test_target_config.py @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import cmsis_nn # type: ignore[import-untyped] +import cmsis_nn # type: ignore[import-not-found, import-untyped] import pytest from executorch.backends.cortex_m.target_config import CortexM, CortexMTargetConfig From 2ec0840f4260a4a689c8fa82b3eb028635fe78bb Mon Sep 17 00:00:00 2001 From: RJ Ascani Date: Wed, 13 May 2026 08:57:22 -0700 Subject: [PATCH 5/5] Cortex-M backend: fix Zephyr hello-executorch CI command The Zephyr `hello-executorch` README has `` directives that the test-arm-backend-zephyr CI extracts and executes verbatim. The prior wording used `--target=cortex-m55+int8`, which the AOT compiler no longer accepts after the +int8 drop. Update both code blocks (and the surrounding prose) to use the new bare `--target=cortex-m55` spelling. Authored with Claude. --- zephyr/samples/hello-executorch/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/zephyr/samples/hello-executorch/README.md b/zephyr/samples/hello-executorch/README.md index 16303303031..ab8022d5d62 100644 --- a/zephyr/samples/hello-executorch/README.md +++ b/zephyr/samples/hello-executorch/README.md @@ -51,10 +51,10 @@ west build -b mps3/corstone300/fvp modules/lib/executorch/zephyr/samples/hello-e Prepare the Cortex-M55 PTE model ``` -python -m modules.lib.executorch.backends.arm.scripts.aot_arm_compiler --model_name=modules/lib/executorch/zephyr/samples/hello-executorch/models/add.py --quantize --target=cortex-m55+int8 --output=add_m55.pte +python -m modules.lib.executorch.backends.arm.scripts.aot_arm_compiler --model_name=modules/lib/executorch/zephyr/samples/hello-executorch/models/add.py --quantize --target=cortex-m55 --output=add_m55.pte ``` -`--target=cortex-m55+int8` selects the Cortex-M/CMSIS-NN portable kernel path (no NPU delegation). This produces a `.pte` optimized for Cortex-M55 with INT8 quantization. +`--target=cortex-m55` plus `--quantize` selects the Cortex-M/CMSIS-NN portable kernel path (no NPU delegation). This produces a `.pte` optimized for Cortex-M55 with INT8 quantization. #### Build and run @@ -129,10 +129,10 @@ export PATH=$PATH:~/STMicroelectronics/STM32Cube/STM32CubeProgrammer/bin Prepare the Cortex-M55 PTE model ``` -python -m modules.lib.executorch.backends.arm.scripts.aot_arm_compiler --model_name=modules/lib/executorch/zephyr/samples/hello-executorch/models/add.py --quantize --target=cortex-m55+int8 --output=add_m55.pte +python -m modules.lib.executorch.backends.arm.scripts.aot_arm_compiler --model_name=modules/lib/executorch/zephyr/samples/hello-executorch/models/add.py --quantize --target=cortex-m55 --output=add_m55.pte ``` -`--target=cortex-m55+int8` selects the Cortex-M/CMSIS-NN portable kernel path (no NPU delegation). This produces a `.pte` optimized for Cortex-M55 with INT8 quantization. +`--target=cortex-m55` plus `--quantize` selects the Cortex-M/CMSIS-NN portable kernel path (no NPU delegation). This produces a `.pte` optimized for Cortex-M55 with INT8 quantization. #### Build and run