Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,204 changes: 203 additions & 2,001 deletions crates/core_arch/src/loongarch64/lasx/generated.rs

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions crates/core_arch/src/loongarch64/lasx/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ mod generated;
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub use self::generated::*;

#[rustfmt::skip]
mod portable;

#[rustfmt::skip]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub use self::portable::*;

#[rustfmt::skip]
#[cfg(test)]
mod tests;
202 changes: 202 additions & 0 deletions crates/core_arch/src/loongarch64/lasx/portable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
//! LoongArch64 LASX intrinsics - intrinsics::simd implementation

use super::super::{simd::*, *};
use crate::core_arch::simd::*;
use crate::intrinsics::simd::*;
use crate::mem::transmute;

impl_vv!("lasx", lasx_xvpcnt_b, simd_ctpop, m256i, i8x32);
impl_vv!("lasx", lasx_xvpcnt_h, simd_ctpop, m256i, i16x16);
impl_vv!("lasx", lasx_xvpcnt_w, simd_ctpop, m256i, i32x8);
impl_vv!("lasx", lasx_xvpcnt_d, simd_ctpop, m256i, i64x4);
impl_vv!("lasx", lasx_xvclz_b, simd_ctlz, m256i, i8x32);
impl_vv!("lasx", lasx_xvclz_h, simd_ctlz, m256i, i16x16);
impl_vv!("lasx", lasx_xvclz_w, simd_ctlz, m256i, i32x8);
impl_vv!("lasx", lasx_xvclz_d, simd_ctlz, m256i, i64x4);
impl_vv!("lasx", lasx_xvneg_b, simd_neg, m256i, i8x32);
impl_vv!("lasx", lasx_xvneg_h, simd_neg, m256i, i16x16);
impl_vv!("lasx", lasx_xvneg_w, simd_neg, m256i, i32x8);
impl_vv!("lasx", lasx_xvneg_d, simd_neg, m256i, i64x4);
impl_vv!("lasx", lasx_xvfsqrt_s, simd_fsqrt, m256, f32x8);
impl_vv!("lasx", lasx_xvfsqrt_d, simd_fsqrt, m256d, f64x4);

impl_gv!("lasx", lasx_xvreplgr2vr_b, simd_splat, m256i, i8x32, i8, i32);
impl_gv!("lasx", lasx_xvreplgr2vr_h, simd_splat, m256i, i16x16, i16, i32);
impl_gv!("lasx", lasx_xvreplgr2vr_w, simd_splat, m256i, i32x8, i32, i32);
impl_gv!("lasx", lasx_xvreplgr2vr_d, simd_splat, m256i, i64x4, i64, i64);

impl_sv!("lasx", lasx_xvrepli_b, simd_splat, m256i, i8x32, i8, 10);
impl_sv!("lasx", lasx_xvrepli_h, simd_splat, m256i, i16x16, i16, 10);
impl_sv!("lasx", lasx_xvrepli_w, simd_splat, m256i, i32x8, i32, 10);
impl_sv!("lasx", lasx_xvrepli_d, simd_splat, m256i, i64x4, i64, 10);

impl_vvv!("lasx", lasx_xvadd_b, simd_add, m256i, i8x32);
impl_vvv!("lasx", lasx_xvadd_h, simd_add, m256i, i16x16);
impl_vvv!("lasx", lasx_xvadd_w, simd_add, m256i, i32x8);
impl_vvv!("lasx", lasx_xvadd_d, simd_add, m256i, i64x4);
impl_vvv!("lasx", lasx_xvsub_b, simd_sub, m256i, i8x32);
impl_vvv!("lasx", lasx_xvsub_h, simd_sub, m256i, i16x16);
impl_vvv!("lasx", lasx_xvsub_w, simd_sub, m256i, i32x8);
impl_vvv!("lasx", lasx_xvsub_d, simd_sub, m256i, i64x4);
impl_vvv!("lasx", lasx_xvmax_b, simd_imax, m256i, i8x32);
impl_vvv!("lasx", lasx_xvmax_h, simd_imax, m256i, i16x16);
impl_vvv!("lasx", lasx_xvmax_w, simd_imax, m256i, i32x8);
impl_vvv!("lasx", lasx_xvmax_d, simd_imax, m256i, i64x4);
impl_vvv!("lasx", lasx_xvmax_bu, simd_imax, m256i, u8x32);
impl_vvv!("lasx", lasx_xvmax_hu, simd_imax, m256i, u16x16);
impl_vvv!("lasx", lasx_xvmax_wu, simd_imax, m256i, u32x8);
impl_vvv!("lasx", lasx_xvmax_du, simd_imax, m256i, u64x4);
impl_vvv!("lasx", lasx_xvmin_b, simd_imin, m256i, i8x32);
impl_vvv!("lasx", lasx_xvmin_h, simd_imin, m256i, i16x16);
impl_vvv!("lasx", lasx_xvmin_w, simd_imin, m256i, i32x8);
impl_vvv!("lasx", lasx_xvmin_d, simd_imin, m256i, i64x4);
impl_vvv!("lasx", lasx_xvmin_bu, simd_imin, m256i, u8x32);
impl_vvv!("lasx", lasx_xvmin_hu, simd_imin, m256i, u16x16);
impl_vvv!("lasx", lasx_xvmin_wu, simd_imin, m256i, u32x8);
impl_vvv!("lasx", lasx_xvmin_du, simd_imin, m256i, u64x4);
impl_vvv!("lasx", lasx_xvseq_b, simd_eq, m256i, i8x32);
impl_vvv!("lasx", lasx_xvseq_h, simd_eq, m256i, i16x16);
impl_vvv!("lasx", lasx_xvseq_w, simd_eq, m256i, i32x8);
impl_vvv!("lasx", lasx_xvseq_d, simd_eq, m256i, i64x4);
impl_vvv!("lasx", lasx_xvslt_b, simd_lt, m256i, i8x32);
impl_vvv!("lasx", lasx_xvslt_h, simd_lt, m256i, i16x16);
impl_vvv!("lasx", lasx_xvslt_w, simd_lt, m256i, i32x8);
impl_vvv!("lasx", lasx_xvslt_d, simd_lt, m256i, i64x4);
impl_vvv!("lasx", lasx_xvslt_bu, simd_lt, m256i, u8x32);
impl_vvv!("lasx", lasx_xvslt_hu, simd_lt, m256i, u16x16);
impl_vvv!("lasx", lasx_xvslt_wu, simd_lt, m256i, u32x8);
impl_vvv!("lasx", lasx_xvslt_du, simd_lt, m256i, u64x4);
impl_vvv!("lasx", lasx_xvsle_b, simd_le, m256i, i8x32);
impl_vvv!("lasx", lasx_xvsle_h, simd_le, m256i, i16x16);
impl_vvv!("lasx", lasx_xvsle_w, simd_le, m256i, i32x8);
impl_vvv!("lasx", lasx_xvsle_d, simd_le, m256i, i64x4);
impl_vvv!("lasx", lasx_xvsle_bu, simd_le, m256i, u8x32);
impl_vvv!("lasx", lasx_xvsle_hu, simd_le, m256i, u16x16);
impl_vvv!("lasx", lasx_xvsle_wu, simd_le, m256i, u32x8);
impl_vvv!("lasx", lasx_xvsle_du, simd_le, m256i, u64x4);
impl_vvv!("lasx", lasx_xvmul_b, simd_mul, m256i, i8x32);
impl_vvv!("lasx", lasx_xvmul_h, simd_mul, m256i, i16x16);
impl_vvv!("lasx", lasx_xvmul_w, simd_mul, m256i, i32x8);
impl_vvv!("lasx", lasx_xvmul_d, simd_mul, m256i, i64x4);
impl_vvv!("lasx", lasx_xvdiv_b, simd_div, m256i, i8x32);
impl_vvv!("lasx", lasx_xvdiv_h, simd_div, m256i, i16x16);
impl_vvv!("lasx", lasx_xvdiv_w, simd_div, m256i, i32x8);
impl_vvv!("lasx", lasx_xvdiv_d, simd_div, m256i, i64x4);
impl_vvv!("lasx", lasx_xvdiv_bu, simd_div, m256i, u8x32);
impl_vvv!("lasx", lasx_xvdiv_hu, simd_div, m256i, u16x16);
impl_vvv!("lasx", lasx_xvdiv_wu, simd_div, m256i, u32x8);
impl_vvv!("lasx", lasx_xvdiv_du, simd_div, m256i, u64x4);
impl_vvv!("lasx", lasx_xvmod_b, simd_rem, m256i, i8x32);
impl_vvv!("lasx", lasx_xvmod_h, simd_rem, m256i, i16x16);
impl_vvv!("lasx", lasx_xvmod_w, simd_rem, m256i, i32x8);
impl_vvv!("lasx", lasx_xvmod_d, simd_rem, m256i, i64x4);
impl_vvv!("lasx", lasx_xvmod_bu, simd_rem, m256i, u8x32);
impl_vvv!("lasx", lasx_xvmod_hu, simd_rem, m256i, u16x16);
impl_vvv!("lasx", lasx_xvmod_wu, simd_rem, m256i, u32x8);
impl_vvv!("lasx", lasx_xvmod_du, simd_rem, m256i, u64x4);
impl_vvv!("lasx", lasx_xvand_v, simd_and, m256i, u8x32);
impl_vvv!("lasx", lasx_xvandn_v, simd_andn, m256i, u8x32);
impl_vvv!("lasx", lasx_xvor_v, simd_or, m256i, u8x32);
impl_vvv!("lasx", lasx_xvorn_v, simd_orn, m256i, u8x32);
impl_vvv!("lasx", lasx_xvnor_v, simd_nor, m256i, u8x32);
impl_vvv!("lasx", lasx_xvxor_v, simd_xor, m256i, u8x32);
impl_vvv!("lasx", lasx_xvfadd_s, simd_add, m256, f32x8);
impl_vvv!("lasx", lasx_xvfadd_d, simd_add, m256d, f64x4);
impl_vvv!("lasx", lasx_xvfsub_s, simd_sub, m256, f32x8);
impl_vvv!("lasx", lasx_xvfsub_d, simd_sub, m256d, f64x4);
impl_vvv!("lasx", lasx_xvfmul_s, simd_mul, m256, f32x8);
impl_vvv!("lasx", lasx_xvfmul_d, simd_mul, m256d, f64x4);
impl_vvv!("lasx", lasx_xvfdiv_s, simd_div, m256, f32x8);
impl_vvv!("lasx", lasx_xvfdiv_d, simd_div, m256d, f64x4);

impl_vvv_s!("lasx", lasx_xvsll_b, simd_shl, m256i, i8x32, i8);
impl_vvv_s!("lasx", lasx_xvsll_h, simd_shl, m256i, i16x16, i16);
impl_vvv_s!("lasx", lasx_xvsll_w, simd_shl, m256i, i32x8, i32);
impl_vvv_s!("lasx", lasx_xvsll_d, simd_shl, m256i, i64x4, i64);
impl_vvv_s!("lasx", lasx_xvsra_b, simd_shr, m256i, i8x32, i8);
impl_vvv_s!("lasx", lasx_xvsra_h, simd_shr, m256i, i16x16, i16);
impl_vvv_s!("lasx", lasx_xvsra_w, simd_shr, m256i, i32x8, i32);
impl_vvv_s!("lasx", lasx_xvsra_d, simd_shr, m256i, i64x4, i64);
impl_vvv_s!("lasx", lasx_xvsrl_b, simd_shr, m256i, u8x32, u8);
impl_vvv_s!("lasx", lasx_xvsrl_h, simd_shr, m256i, u16x16, u16);
impl_vvv_s!("lasx", lasx_xvsrl_w, simd_shr, m256i, u32x8, u32);
impl_vvv_s!("lasx", lasx_xvsrl_d, simd_shr, m256i, u64x4, u64);

impl_vuv!("lasx", lasx_xvslli_b, simd_shl, m256i, i8x32, i8);
impl_vuv!("lasx", lasx_xvslli_h, simd_shl, m256i, i16x16, i16);
impl_vuv!("lasx", lasx_xvslli_w, simd_shl, m256i, i32x8, i32);
impl_vuv!("lasx", lasx_xvslli_d, simd_shl, m256i, i64x4, i64);
impl_vuv!("lasx", lasx_xvsrai_b, simd_shr, m256i, i8x32, i8);
impl_vuv!("lasx", lasx_xvsrai_h, simd_shr, m256i, i16x16, i16);
impl_vuv!("lasx", lasx_xvsrai_w, simd_shr, m256i, i32x8, i32);
impl_vuv!("lasx", lasx_xvsrai_d, simd_shr, m256i, i64x4, i64);
impl_vuv!("lasx", lasx_xvsrli_b, simd_shr, m256i, u8x32, u8);
impl_vuv!("lasx", lasx_xvsrli_h, simd_shr, m256i, u16x16, u16);
impl_vuv!("lasx", lasx_xvsrli_w, simd_shr, m256i, u32x8, u32);
impl_vuv!("lasx", lasx_xvsrli_d, simd_shr, m256i, u64x4, u64);
impl_vuv!("lasx", lasx_xvaddi_bu, simd_add, m256i, u8x32, u8, 5);
impl_vuv!("lasx", lasx_xvaddi_hu, simd_add, m256i, u16x16, u16, 5);
impl_vuv!("lasx", lasx_xvaddi_wu, simd_add, m256i, u32x8, u32, 5);
impl_vuv!("lasx", lasx_xvaddi_du, simd_add, m256i, u64x4, u64, 5);
impl_vuv!("lasx", lasx_xvslti_bu, simd_lt, m256i, u8x32, u8, 5);
impl_vuv!("lasx", lasx_xvslti_hu, simd_lt, m256i, u16x16, u16, 5);
impl_vuv!("lasx", lasx_xvslti_wu, simd_lt, m256i, u32x8, u32, 5);
impl_vuv!("lasx", lasx_xvslti_du, simd_lt, m256i, u64x4, u64, 5);
impl_vuv!("lasx", lasx_xvslei_bu, simd_le, m256i, u8x32, u8, 5);
impl_vuv!("lasx", lasx_xvslei_hu, simd_le, m256i, u16x16, u16, 5);
impl_vuv!("lasx", lasx_xvslei_wu, simd_le, m256i, u32x8, u32, 5);
impl_vuv!("lasx", lasx_xvslei_du, simd_le, m256i, u64x4, u64, 5);
impl_vuv!("lasx", lasx_xvmaxi_bu, simd_imax, m256i, u8x32, u8, 5);
impl_vuv!("lasx", lasx_xvmaxi_hu, simd_imax, m256i, u16x16, u16, 5);
impl_vuv!("lasx", lasx_xvmaxi_wu, simd_imax, m256i, u32x8, u32, 5);
impl_vuv!("lasx", lasx_xvmaxi_du, simd_imax, m256i, u64x4, u64, 5);
impl_vuv!("lasx", lasx_xvmini_bu, simd_imin, m256i, u8x32, u8, 5);
impl_vuv!("lasx", lasx_xvmini_hu, simd_imin, m256i, u16x16, u16, 5);
impl_vuv!("lasx", lasx_xvmini_wu, simd_imin, m256i, u32x8, u32, 5);
impl_vuv!("lasx", lasx_xvmini_du, simd_imin, m256i, u64x4, u64, 5);

impl_vug!("lasx", lasx_xvpickve2gr_w, simd_extract, m256i, i32x8, i32, i32, 3);
impl_vug!("lasx", lasx_xvpickve2gr_d, simd_extract, m256i, i64x4, i64, i64, 2);
impl_vug!("lasx", lasx_xvpickve2gr_wu, simd_extract, m256i, u32x8, u32, u32, 3);
impl_vug!("lasx", lasx_xvpickve2gr_du, simd_extract, m256i, u64x4, u64, u64, 2);

impl_vsv!("lasx", lasx_xvseqi_b, simd_eq, m256i, i8x32, i8, 5);
impl_vsv!("lasx", lasx_xvseqi_h, simd_eq, m256i, i16x16, i16, 5);
impl_vsv!("lasx", lasx_xvseqi_w, simd_eq, m256i, i32x8, i32, 5);
impl_vsv!("lasx", lasx_xvseqi_d, simd_eq, m256i, i64x4, i64, 5);
impl_vsv!("lasx", lasx_xvslti_b, simd_lt, m256i, i8x32, i8, 5);
impl_vsv!("lasx", lasx_xvslti_h, simd_lt, m256i, i16x16, i16, 5);
impl_vsv!("lasx", lasx_xvslti_w, simd_lt, m256i, i32x8, i32, 5);
impl_vsv!("lasx", lasx_xvslti_d, simd_lt, m256i, i64x4, i64, 5);
impl_vsv!("lasx", lasx_xvslei_b, simd_le, m256i, i8x32, i8, 5);
impl_vsv!("lasx", lasx_xvslei_h, simd_le, m256i, i16x16, i16, 5);
impl_vsv!("lasx", lasx_xvslei_w, simd_le, m256i, i32x8, i32, 5);
impl_vsv!("lasx", lasx_xvslei_d, simd_le, m256i, i64x4, i64, 5);
impl_vsv!("lasx", lasx_xvmaxi_b, simd_imax, m256i, i8x32, i8, 5);
impl_vsv!("lasx", lasx_xvmaxi_h, simd_imax, m256i, i16x16, i16, 5);
impl_vsv!("lasx", lasx_xvmaxi_w, simd_imax, m256i, i32x8, i32, 5);
impl_vsv!("lasx", lasx_xvmaxi_d, simd_imax, m256i, i64x4, i64, 5);
impl_vsv!("lasx", lasx_xvmini_b, simd_imin, m256i, i8x32, i8, 5);
impl_vsv!("lasx", lasx_xvmini_h, simd_imin, m256i, i16x16, i16, 5);
impl_vsv!("lasx", lasx_xvmini_w, simd_imin, m256i, i32x8, i32, 5);
impl_vsv!("lasx", lasx_xvmini_d, simd_imin, m256i, i64x4, i64, 5);

impl_vvvv!("lasx", lasx_xvmadd_b, simd_madd, m256i, i8x32);
impl_vvvv!("lasx", lasx_xvmadd_h, simd_madd, m256i, i16x16);
impl_vvvv!("lasx", lasx_xvmadd_w, simd_madd, m256i, i32x8);
impl_vvvv!("lasx", lasx_xvmadd_d, simd_madd, m256i, i64x4);
impl_vvvv!("lasx", lasx_xvmsub_b, simd_msub, m256i, i8x32);
impl_vvvv!("lasx", lasx_xvmsub_h, simd_msub, m256i, i16x16);
impl_vvvv!("lasx", lasx_xvmsub_w, simd_msub, m256i, i32x8);
impl_vvvv!("lasx", lasx_xvmsub_d, simd_msub, m256i, i64x4);
impl_vvvv!("lasx", lasx_xvfmadd_s, simd_fma, m256, f32x8);
impl_vvvv!("lasx", lasx_xvfmadd_d, simd_fma, m256d, f64x4);
impl_vvvv!("lasx", lasx_xvfmsub_s, simd_fms, m256, f32x8);
impl_vvvv!("lasx", lasx_xvfmsub_d, simd_fms, m256d, f64x4);
impl_vvvv!("lasx", lasx_xvfnmadd_s, simd_nfma, m256, f32x8);
impl_vvvv!("lasx", lasx_xvfnmadd_d, simd_nfma, m256d, f64x4);
impl_vvvv!("lasx", lasx_xvfnmsub_s, simd_nfms, m256, f32x8);
impl_vvvv!("lasx", lasx_xvfnmsub_d, simd_nfms, m256d, f64x4);

impl_vugv!("lasx", lasx_xvinsgr2vr_w, simd_insert, m256i, i32x8, i32, i32, 3);
impl_vugv!("lasx", lasx_xvinsgr2vr_d, simd_insert, m256i, i64x4, i64, i64, 2);
Loading