Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions backends/arm/scripts/build_executor_runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,12 @@ ethos_u_root_dir="$ethosu_tools_dir/ethos-u"
mkdir -p "${ethos_u_root_dir}"
ethosu_tools_dir=$(realpath ${ethos_u_root_dir})

et_build_dir=${et_build_root}/cmake-out
if [[ ${target} =~ ^cortex-m([0-9]+(plus|p)?)\+ ]]; then
# Match build_executorch.sh's per-CPU staging.
et_build_dir=${et_build_root}/cmake-out-cortex-m${BASH_REMATCH[1]}
else
et_build_dir=${et_build_root}/cmake-out
fi
mkdir -p ${et_build_dir}
et_build_dir=$(realpath ${et_build_dir})

Expand All @@ -151,10 +156,17 @@ fi
mkdir -p "${output_folder}"
output_folder=$(realpath ${output_folder})

if [[ ${target} == *"ethos-u55"* ]]; then
if [[ ${target} =~ ^cortex-m([0-9]+(plus|p)?)\+ ]]; then
# NPU isn't used at runtime, but core_platform's ethosu_get_architecture()
# parser rejects non-ethos-u strings — pass a dummy.
target_cpu="cortex-m${BASH_REMATCH[1]}"
npu_target_config="ethos-u55-128"
elif [[ ${target} == *"ethos-u55"* ]]; then
target_cpu=cortex-m55
npu_target_config="${target}"
else
target_cpu=cortex-m85
npu_target_config="${target}"
fi
echo "--------------------------------------------------------------------------------"
echo "Build Arm ${toolchain/-gcc/} executor_runner for ${target} PTE: ${pte_file} using ${system_config} ${memory_mode} ${extra_build_flags} to '${output_folder}'"
Expand All @@ -177,8 +189,9 @@ cmake \
-DTARGET_CPU=${target_cpu} \
-DET_DIR_PATH:PATH=${et_root_dir} \
-DET_BUILD_DIR_PATH:PATH=${et_build_dir} \
-Dexecutorch_DIR:PATH=${et_build_dir}/lib/cmake/ExecuTorch \
-DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \
-DETHOSU_TARGET_NPU_CONFIG=${target} \
-DETHOSU_TARGET_NPU_CONFIG=${npu_target_config} \
${pte_data} \
${build_bundleio_flags} \
${build_with_etdump_flags} \
Expand Down
13 changes: 12 additions & 1 deletion backends/arm/scripts/build_executorch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ build_type="Release"
build_devtools=OFF
build_with_etdump=OFF
is_linux_musl=0
target_cpu=""

help() {
echo "Usage: $(basename $0) [options]"
Expand All @@ -33,6 +34,7 @@ help() {
echo " --devtools Build Devtools libs"
echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log"
echo " --toolchain=<TOOLCHAIN> Toolchain can be specified (arm-none-eabi-gcc, arm-zephyr-eabi-gcc, aarch64-linux-musl-gcc). Default: ${toolchain}"
echo " --target_cpu=<CPU> Override the toolchain's default TARGET_CPU (e.g. cortex-m4). When set, the build is staged in cmake-out-<cpu> to keep per-CPU artifacts isolated. Default: unset (toolchain default)."
exit 0
}

Expand All @@ -44,6 +46,7 @@ for arg in "$@"; do
--devtools) build_devtools=ON ;;
--etdump) build_with_etdump=ON ;;
--toolchain=*) toolchain="${arg#*=}";;
--target_cpu=*) target_cpu="${arg#*=}";;
*)
;;
esac
Expand All @@ -70,7 +73,11 @@ toolchain_cmake=$(realpath ${toolchain_cmake})

source ${setup_path_script}

et_build_dir="${et_build_root}/cmake-out"
if [[ -n "${target_cpu}" ]]; then
et_build_dir="${et_build_root}/cmake-out-${target_cpu}"
else
et_build_dir="${et_build_root}/cmake-out"
fi

set -x
cd "${et_root_dir}"
Expand All @@ -87,6 +94,10 @@ cmake_args=(
-DEXECUTORCH_BUILD_ARM_ETDUMP=${build_with_etdump}
)

if [[ -n "${target_cpu}" ]]; then
cmake_args+=(-DTARGET_CPU=${target_cpu})
fi

if [[ ${is_linux_musl} -eq 1 ]]; then
if [[ -z "${MUSL_TOOLCHAIN_ROOT:-}" ]]; then
echo "Error: MUSL_TOOLCHAIN_ROOT is required for aarch64-linux-musl-gcc."
Expand Down
7 changes: 6 additions & 1 deletion backends/arm/scripts/corstone_utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,12 @@ function(fetch_ethos_u_content ETHOS_SDK_PATH ET_DIR_PATH)
"source backends/arm/scripts/utils.sh && patch_repo ${ETHOS_SDK_PATH}/core_software ${core_software_base_rev} ${patch_dir}"
WORKING_DIRECTORY ${ET_DIR_PATH}
)
# Always patch the core_platform repo since this is fast enough.
# Always patch the core_platform repo since this is fast enough. TODO:
# examples/arm/ethos-u-setup/core_platform/0002-*.patch is a transient bridge
# that guards Armv8-M-only MPU init so the source compiles for non-Armv8-M
# Cortex-M cores. Once the same guard lands upstream in ethos-u/core_platform
# and ${core_platform_base_rev} is bumped past that commit, delete the 0002
# patch.
set(core_platform_base_rev "26.02")
execute_process(
COMMAND
Expand Down
6 changes: 3 additions & 3 deletions backends/arm/scripts/run_fvp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ done

elf_file=$(realpath ${elf_file})

# cortex-m55 is the only Cortex-M CPU on the Corstone-300 board today;
# cortex-m85 lives on Corstone-320, so it falls through to the SSE-320 FVP.
if [[ ${target} == *"ethos-u55"* || ${target} == cortex-m55* ]]; then
# cortex-m85 lives on Corstone-320; all other Cortex-M variants run on
# the Corstone-300 M55 (ISA superset).
if [[ ${target} == *"ethos-u55"* || ${target} == cortex-m* && ${target} != cortex-m85* ]]; then
fvp_model=FVP_Corstone_SSE-300_Ethos-U55
else
fvp_model=FVP_Corstone_SSE-320
Expand Down
22 changes: 19 additions & 3 deletions backends/cortex_m/test/build_test_runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,32 @@

set -eu

target="cortex-m55+int8"
for arg in "$@"; do
case $arg in
--target=*) target="${arg#*=}";;
*) ;;
esac
done

# Forward to build_executorch.sh so the core libs share the runner's -mcpu.
if [[ ${target} =~ ^cortex-m([0-9]+(plus|p)?)\+ ]]; then
target_cpu="cortex-m${BASH_REMATCH[1]}"
else
echo "Error: build_test_runner.sh only supports cortex-m<X>+int8 targets, got: ${target}"
exit 1
fi

# Always rebuild executorch in case the cortex-m kernels has been updated.
script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
et_root_dir=$(realpath "${script_dir}/../../..")
build_executorch="${et_root_dir}/backends/arm/scripts/build_executorch.sh"
${build_executorch} --devtools
${build_executorch} --devtools --target_cpu="${target_cpu}"

# Build executor runner with selected aten ops and semi hosting
build_dir="${et_root_dir}/arm_test"
build_executor_runner="${et_root_dir}/backends/arm/scripts/build_executor_runner.sh"
build_root_test_dir="${et_root_dir}/arm_test/arm_semihosting_executor_runner_corstone-300"
build_root_test_dir="${et_root_dir}/arm_test/arm_semihosting_executor_runner_${target}"

select_ops_list="\
aten::add.out,\
Expand All @@ -32,4 +48,4 @@ aten::unsqueeze_copy.out,\
aten::select_copy.int_out,\
aten::amax.out"

${build_executor_runner} --pte=semihosting --bundleio --target=ethos-u55-128 --output="${build_root_test_dir}" --select_ops_list="${select_ops_list}" --extra_build_flags="-DET_ATOL=5.0 -DET_RTOL=1.0"
${build_executor_runner} --pte=semihosting --bundleio --target="${target}" --output="${build_root_test_dir}" --select_ops_list="${select_ops_list}" --extra_build_flags="-DET_ATOL=5.0 -DET_RTOL=1.0"
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
From 7a00a3cdf2f47424fdf29718e582ad7ae9af9cb5 Mon Sep 17 00:00:00 2001
From: RJ Ascani <rja@meta.com>
Date: Tue, 12 May 2026 09:25:48 -0700
Subject: [PATCH] Guard Armv8-M MPU init for cross-CPU builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Armv8-M MPU API (ARM_MPU_RBAR, ARM_MPU_RLAR, ARM_MPU_Region_t with
its v8-M layout) is only defined for Armv8-M Mainline targets. When
building the Corstone-300 platform for an older Cortex-M variant — to
exercise the scalar / DSP CMSIS-NN code paths on the Corstone-300 M55
simulator, which is an ISA superset — the v8-M MPU symbols are not in
scope and the build fails.

Guard the MPU configuration block with __ARM_ARCH_8M_MAIN__ /
__ARM_ARCH_8_1M_MAIN__ so the same source compiles for Cortex-M0/M0+/
M3/M4/M7 against the Corstone-300 platform layer. The FVP doesn't
enforce MPU permissions, so skipping the configuration has no
runtime effect on simulation correctness.
---
targets/corstone-300/target.cpp | 8 ++++++++
1 file changed, 8 insertions(+)

diff --git a/targets/corstone-300/target.cpp b/targets/corstone-300/target.cpp
index 45eb98e..bda2248 100644
--- a/targets/corstone-300/target.cpp
+++ b/targets/corstone-300/target.cpp
@@ -314,6 +314,13 @@ void targetSetup() {
#endif

// MPU setup
+ //
+ // The Armv8-M `ARM_MPU_RBAR`/`ARM_MPU_RLAR` API is only available on
+ // Armv8-M Mainline cores (M33/M55/M85/M35P). On pre-Armv8-M targets
+ // (M0/M0+/M3/M4/M7), CMSIS doesn't define these macros — the binary
+ // is still runnable on the Corstone-300 M55 simulator (ISA superset)
+ // without MPU configuration, since the FVP doesn't enforce protection.
+#if defined(__ARM_ARCH_8M_MAIN__) || defined(__ARM_ARCH_8_1M_MAIN__)
const std::vector<ARM_MPU_Region_t> mpuConfig = {
{
// ITCM (NS)
@@ -418,6 +425,7 @@ void targetSetup() {

// Setup MPU configuration
Mpu::loadAndEnableConfig(&mpuConfig[0], mpuConfig.size());
+#endif // __ARM_ARCH_8M_MAIN__ || __ARM_ARCH_8_1M_MAIN__

#if defined(CPU_CACHE_ENABLE) && defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
SCB_EnableICache();
--
2.53.0-Meta

9 changes: 8 additions & 1 deletion examples/arm/executor_runner/arm_executor_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,11 +272,14 @@ unsigned char* ethosu_fast_scratch = dedicated_sram;
#endif

[[maybe_unused]] void et_pal_init(void) {
// Enable ARM PMU Clock
#if defined(__ARM_ARCH_8_1M_MAIN__)
// Armv8.1-M Mainline cores (M55, M85) have the optional PMU extension.
// Pre-Armv8.1-M cores lack ARM_PMU_*; et_pal_current_ticks() returns 0.
ARM_PMU_Enable();
DCB->DEMCR |= DCB_DEMCR_TRCENA_Msk; // Trace enable
ARM_PMU_CYCCNT_Reset();
ARM_PMU_CNTR_Enable(PMU_CNTENSET_CCNTR_ENABLE_Msk);
#endif
}

/**
Expand All @@ -296,7 +299,11 @@ unsigned char* ethosu_fast_scratch = dedicated_sram;
}

[[maybe_unused]] et_timestamp_t et_pal_current_ticks(void) {
#if defined(__ARM_ARCH_8_1M_MAIN__)
return ARM_PMU_Get_CCNTR();
#else
return 0;
#endif
}

[[maybe_unused]] et_tick_ratio_t et_pal_ticks_to_ns_multiplier(void) {
Expand Down
25 changes: 18 additions & 7 deletions examples/arm/executor_runner/arm_perf_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@

namespace {

// Returns the Armv8.1-M PMU cycle counter; 0 on cores without it.
static inline uint64_t arm_pmu_cycles() {
#if defined(__ARM_ARCH_8_1M_MAIN__)
return ARM_PMU_Get_CCNTR();
#else
return 0;
#endif
}

#if defined(ETHOSU55) || defined(ETHOSU65)
const uint32_t ethosu_pmuCountersUsed = 4;
#elif defined(ETHOSU85)
Expand Down Expand Up @@ -85,7 +94,7 @@ void ethosu_inference_begin(struct ethosu_driver* drv, void*) {

// Save Cortex-M cycle clock to calculate total CPU cycles used in
// ethosu_inference_end()
ethosu_ArmWhenNPURunCycleCountStart = ARM_PMU_Get_CCNTR();
ethosu_ArmWhenNPURunCycleCountStart = arm_pmu_cycles();
}

// Callback invoked at end of NPU execution
Expand All @@ -99,21 +108,21 @@ void ethosu_inference_end(struct ethosu_driver* drv, void*) {
ETHOSU_PMU_Disable(drv);
// Add Cortex-M cycle clock used during this NPU execution
ethosu_ArmWhenNPURunCycleCount +=
(ARM_PMU_Get_CCNTR() - ethosu_ArmWhenNPURunCycleCountStart);
(arm_pmu_cycles() - ethosu_ArmWhenNPURunCycleCountStart);
}

// Callback invoked at start of ArmBackend::execute()
void EthosUBackend_execute_begin() {
// Save Cortex-M cycle clock to calculate total CPU cycles used in
// ArmBackend_execute_end()
ethosu_ArmBackendExecuteCycleCountStart = ARM_PMU_Get_CCNTR();
ethosu_ArmBackendExecuteCycleCountStart = arm_pmu_cycles();
}

// Callback invoked at end of ArmBackend::execute()
void EthosUBackend_execute_end() {
// Add Cortex-M cycle clock used during this ArmBackend::execute()
ethosu_ArmBackendExecuteCycleCount +=
(ARM_PMU_Get_CCNTR() - ethosu_ArmBackendExecuteCycleCountStart);
(arm_pmu_cycles() - ethosu_ArmBackendExecuteCycleCountStart);
}
}

Expand All @@ -126,14 +135,16 @@ void StartMeasurements() {
for (size_t i = 0; i < ethosu_pmuCountersUsed; i++) {
ethosu_pmuEventCounts[i] = 0;
}
ethosu_ArmCycleCountStart = ARM_PMU_Get_CCNTR();
ethosu_ArmCycleCountStart = arm_pmu_cycles();
}

void StopMeasurements(int num_inferences) {
#if defined(__ARM_ARCH_8_1M_MAIN__)
ARM_PMU_CNTR_Disable(
PMU_CNTENCLR_CCNTR_ENABLE_Msk | PMU_CNTENCLR_CNT0_ENABLE_Msk |
PMU_CNTENCLR_CNT1_ENABLE_Msk);
uint32_t cycle_count = ARM_PMU_Get_CCNTR() - ethosu_ArmCycleCountStart;
#endif
uint32_t cycle_count = arm_pmu_cycles() - ethosu_ArmCycleCountStart;

// Number of comand streams handled by the NPU
ET_LOG(Info, "NPU Inferences : %d", num_inferences);
Expand Down Expand Up @@ -171,7 +182,7 @@ void StopMeasurements(int num_inferences) {
Info,
"NOTE: CPU cycle values and ratio calculations require FPGA and identical CPU/NPU frequency");

// Avoid division with zero if ARM_PMU_Get_CCNTR() is not enabled properly.
// Avoid division with zero if arm_pmu_cycles() is not enabled properly.
if (cycle_count == 0) {
ET_LOG(Info, "Inference CPU ratio: ?.?? %%");
ET_LOG(Info, "Inference NPU ratio: ?.?? %%");
Expand Down
4 changes: 2 additions & 2 deletions examples/arm/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,8 @@ for i in "${!test_model[@]}"; do
exit 1
fi
set -x
backends/cortex_m/test/build_test_runner.sh
cortex_m_elf="${et_root_dir}/arm_test/arm_semihosting_executor_runner_corstone-300/arm_executor_runner"
backends/cortex_m/test/build_test_runner.sh --target="${target}"
cortex_m_elf="${et_root_dir}/arm_test/arm_semihosting_executor_runner_${target}/arm_executor_runner"
if [ "$build_only" = false ] ; then
backends/arm/scripts/run_fvp.sh --elf="${cortex_m_elf}" --target="${target}" --bundle="${pte_file}"
fi
Expand Down
Loading