Merge pull request #1879 from heiher/loong-simd-unified-types

loongarch: Use unified data types for SIMD intrinsics
This commit is contained in:
Folkert de Vries
2025-07-25 13:46:59 +00:00
committed by GitHub
5 changed files with 4782 additions and 4524 deletions

View File

@@ -1,33 +1,140 @@
types! {
#![unstable(feature = "stdarch_loongarch", issue = "117427")]
/// LOONGARCH-specific 256-bit wide vector of 32 packed `i8`.
pub struct v32i8(32 x pub(crate) i8);
/// 256-bit wide integer vector type, LoongArch-specific
///
/// This type is the same as the `__m256i` type defined in `lasxintrin.h`,
/// representing a 256-bit SIMD register. Usage of this type typically
/// occurs in conjunction with the `lasx` target features for LoongArch.
///
/// Internally this type may be viewed as:
///
/// * `i8x32` - thirty two `i8` values packed together
/// * `i16x16` - sixteen `i16` values packed together
/// * `i32x8` - eight `i32` values packed together
/// * `i64x4` - four `i64` values packed together
///
/// (as well as unsigned versions). Each intrinsic may interpret the
/// internal bits differently, check the documentation of the intrinsic
/// to see how it's being used.
///
/// The in-memory representation of this type is the same as the one of an
/// equivalent array (i.e. the in-memory order of elements is the same, and
/// there is no padding); however, the alignment is different and equal to
/// the size of the type. Note that the ABI for function calls may *not* be
/// the same.
///
/// Note that this means that an instance of `m256i` typically just means
/// a "bag of bits" which is left up to interpretation at the point of use.
///
/// Most intrinsics using `m256i` are prefixed with `lasx_` and the integer
/// types tend to correspond to suffixes like "b", "h", "w" or "d".
pub struct m256i(4 x i64);
/// LOONGARCH-specific 256-bit wide vector of 16 packed `i16`.
pub struct v16i16(16 x pub(crate) i16);
/// 256-bit wide set of eight `f32` values, LoongArch-specific
///
/// This type is the same as the `__m256` type defined in `lasxintrin.h`,
/// representing a 256-bit SIMD register which internally consists of
/// eight packed `f32` instances. Usage of this type typically occurs in
/// conjunction with the `lasx` target features for LoongArch.
///
/// Note that unlike `m256i`, the integer version of the 256-bit registers,
/// this `m256` type has *one* interpretation. Each instance of `m256`
/// always corresponds to `f32x8`, or eight `f32` values packed together.
///
/// The in-memory representation of this type is the same as the one of an
/// equivalent array (i.e. the in-memory order of elements is the same, and
/// there is no padding between two consecutive elements); however, the
/// alignment is different and equal to the size of the type. Note that the
/// ABI for function calls may *not* be the same.
///
/// Most intrinsics using `m256` are prefixed with `lasx_` and are
/// suffixed with "s".
pub struct m256(8 x f32);
/// LOONGARCH-specific 256-bit wide vector of 8 packed `i32`.
pub struct v8i32(8 x pub(crate) i32);
/// 256-bit wide set of four `f64` values, LoongArch-specific
///
/// This type is the same as the `__m256d` type defined in `lasxintrin.h`,
/// representing a 256-bit SIMD register which internally consists of
/// four packed `f64` instances. Usage of this type typically occurs in
/// conjunction with the `lasx` target features for LoongArch.
///
/// Note that unlike `m256i`, the integer version of the 256-bit registers,
/// this `m256d` type has *one* interpretation. Each instance of `m256d`
/// always corresponds to `f64x4`, or four `f64` values packed together.
///
/// The in-memory representation of this type is the same as the one of an
/// equivalent array (i.e. the in-memory order of elements is the same, and
/// there is no padding); however, the alignment is different and equal to
/// the size of the type. Note that the ABI for function calls may *not* be
/// the same.
///
/// Most intrinsics using `m256d` are prefixed with `lasx_` and are suffixed
/// with "d". Not to be confused with "d" which is used for `m256i`.
pub struct m256d(4 x f64);
/// LOONGARCH-specific 256-bit wide vector of 4 packed `i64`.
pub struct v4i64(4 x pub(crate) i64);
/// LOONGARCH-specific 256-bit wide vector of 32 packed `u8`.
pub struct v32u8(32 x pub(crate) u8);
/// LOONGARCH-specific 256-bit wide vector of 16 packed `u16`.
pub struct v16u16(16 x pub(crate) u16);
/// LOONGARCH-specific 256-bit wide vector of 8 packed `u32`.
pub struct v8u32(8 x pub(crate) u32);
/// LOONGARCH-specific 256-bit wide vector of 4 packed `u64`.
pub struct v4u64(4 x pub(crate) u64);
/// LOONGARCH-specific 128-bit wide vector of 8 packed `f32`.
pub struct v8f32(8 x pub(crate) f32);
/// LOONGARCH-specific 256-bit wide vector of 4 packed `f64`.
pub struct v4f64(4 x pub(crate) f64);
}
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v32i8([i8; 32]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v16i16([i16; 16]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v8i32([i32; 8]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v4i64([i64; 4]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v32u8([u8; 32]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v16u16([u16; 16]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v8u32([u32; 8]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v4u64([u64; 4]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v8f32([f32; 8]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v4f64([f64; 4]);
// These type aliases are provided solely for transitional compatibility.
// They are temporary and will be removed when appropriate.
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v32i8 = m256i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v16i16 = m256i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v8i32 = m256i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v4i64 = m256i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v32u8 = m256i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v16u16 = m256i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v8u32 = m256i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v4u64 = m256i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v8f32 = m256;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v4f64 = m256d;

View File

@@ -1,33 +1,140 @@
types! {
#![unstable(feature = "stdarch_loongarch", issue = "117427")]
/// LOONGARCH-specific 128-bit wide vector of 16 packed `i8`.
pub struct v16i8(16 x pub(crate) i8);
/// 128-bit wide integer vector type, LoongArch-specific
///
/// This type is the same as the `__m128i` type defined in `lsxintrin.h`,
/// representing a 128-bit SIMD register. Usage of this type typically
/// occurs in conjunction with the `lsx` and higher target features for
/// LoongArch.
///
/// Internally this type may be viewed as:
///
/// * `i8x16` - sixteen `i8` values packed together
/// * `i16x8` - eight `i16` values packed together
/// * `i32x4` - four `i32` values packed together
/// * `i64x2` - two `i64` values packed together
///
/// (as well as unsigned versions). Each intrinsic may interpret the
/// internal bits differently, check the documentation of the intrinsic
/// to see how it's being used.
///
/// The in-memory representation of this type is the same as the one of an
/// equivalent array (i.e. the in-memory order of elements is the same, and
/// there is no padding); however, the alignment is different and equal to
/// the size of the type. Note that the ABI for function calls may *not* be
/// the same.
///
/// Note that this means that an instance of `m128i` typically just means
/// a "bag of bits" which is left up to interpretation at the point of use.
///
/// Most intrinsics using `m128i` are prefixed with `lsx_` and the integer
/// types tend to correspond to suffixes like "b", "h", "w" or "d".
pub struct m128i(2 x i64);
/// LOONGARCH-specific 128-bit wide vector of 8 packed `i16`.
pub struct v8i16(8 x pub(crate) i16);
/// 128-bit wide set of four `f32` values, LoongArch-specific
///
/// This type is the same as the `__m128` type defined in `lsxintrin.h`,
/// representing a 128-bit SIMD register which internally consists of
/// four packed `f32` instances. Usage of this type typically occurs in
/// conjunction with the `lsx` and higher target features for LoongArch.
///
/// Note that unlike `m128i`, the integer version of the 128-bit registers,
/// this `m128` type has *one* interpretation. Each instance of `m128`
/// corresponds to `f32x4`, or four `f32` values packed together.
///
/// The in-memory representation of this type is the same as the one of an
/// equivalent array (i.e. the in-memory order of elements is the same, and
/// there is no padding); however, the alignment is different and equal to
/// the size of the type. Note that the ABI for function calls may *not* be
/// the same.
///
/// Most intrinsics using `m128` are prefixed with `lsx_` and are suffixed
/// with "s".
pub struct m128(4 x f32);
/// LOONGARCH-specific 128-bit wide vector of 4 packed `i32`.
pub struct v4i32(4 x pub(crate) i32);
/// LOONGARCH-specific 128-bit wide vector of 2 packed `i64`.
pub struct v2i64(2 x pub(crate) i64);
/// LOONGARCH-specific 128-bit wide vector of 16 packed `u8`.
pub struct v16u8(16 x pub(crate) u8);
/// LOONGARCH-specific 128-bit wide vector of 8 packed `u16`.
pub struct v8u16(8 x pub(crate) u16);
/// LOONGARCH-specific 128-bit wide vector of 4 packed `u32`.
pub struct v4u32(4 x pub(crate) u32);
/// LOONGARCH-specific 128-bit wide vector of 2 packed `u64`.
pub struct v2u64(2 x pub(crate) u64);
/// LOONGARCH-specific 128-bit wide vector of 4 packed `f32`.
pub struct v4f32(4 x pub(crate) f32);
/// LOONGARCH-specific 128-bit wide vector of 2 packed `f64`.
pub struct v2f64(2 x pub(crate) f64);
/// 128-bit wide set of two `f64` values, LoongArch-specific
///
/// This type is the same as the `__m128d` type defined in `lsxintrin.h`,
/// representing a 128-bit SIMD register which internally consists of
/// two packed `f64` instances. Usage of this type typically occurs in
/// conjunction with the `lsx` and higher target features for LoongArch.
///
/// Note that unlike `m128i`, the integer version of the 128-bit registers,
/// this `m128d` type has *one* interpretation. Each instance of `m128d`
/// always corresponds to `f64x2`, or two `f64` values packed together.
///
/// The in-memory representation of this type is the same as the one of an
/// equivalent array (i.e. the in-memory order of elements is the same, and
/// there is no padding); however, the alignment is different and equal to
/// the size of the type. Note that the ABI for function calls may *not* be
/// the same.
///
/// Most intrinsics using `m128d` are prefixed with `lsx_` and are suffixed
/// with "d". Not to be confused with "d" which is used for `m128i`.
pub struct m128d(2 x f64);
}
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v16i8([i8; 16]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v8i16([i16; 8]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v4i32([i32; 4]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v2i64([i64; 2]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v16u8([u8; 16]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v8u16([u16; 8]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v4u32([u32; 4]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v2u64([u64; 2]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v4f32([f32; 4]);
#[allow(non_camel_case_types)]
#[repr(simd)]
pub(crate) struct __v2f64([f64; 2]);
// These type aliases are provided solely for transitional compatibility.
// They are temporary and will be removed when appropriate.
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v16i8 = m128i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v8i16 = m128i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v4i32 = m128i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v2i64 = m128i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v16u8 = m128i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v8u16 = m128i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v4u32 = m128i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v2u64 = m128i;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v4f32 = m128;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub type v2f64 = m128d;