#![cfg_attr(feature = "as_crate", no_std)] // 我们是 std!
#![cfg_attr(feature = "as_crate", feature(platform_intrinsics), feature(portable_simd))]
#[cfg(not(feature = "as_crate"))]
use core::simd;
#[cfg(feature = "as_crate")]
use core_simd::simd;

use simd::{LaneCount, Simd, SupportedLaneCount};

#[cfg(feature = "as_crate")]
mod experimental {
    pub trait Sealed {}
}

#[cfg(feature = "as_crate")]
use experimental as sealed;

use crate::sealed::Sealed;

// "platform intrinsics" 本质上是 "codegen intrinsics"，其中的每一个都可以被标量化并降低为一个 libm 调用
//
extern "platform-intrinsic" {
    // ceil
    fn simd_ceil<T>(x: T) -> T;

    // floor
    fn simd_floor<T>(x: T) -> T;

    // round
    fn simd_round<T>(x: T) -> T;

    // trunc
    fn simd_trunc<T>(x: T) -> T;

    // fsqrt
    fn simd_fsqrt<T>(x: T) -> T;

    // fma
    fn simd_fma<T>(x: T, y: T, z: T) -> T;
}

/// 这个 trait 提供了 float 函数的可能临时实现，在没有硬件支持的情况下，该实现可以规范地调用操作系统的 `math.h` 动态加载库 (也称为共享对象)。
///
/// 因为它们有条件地需要运行时支持，它们应该只出现在假设操作系统支持的二进制文件中: `std`。
///
/// 但是，SIMD 类型通常不需要操作系统支持，因为对于许多体系结构来说，嵌入式二进制文件可能只是简单地配置支持本身。
/// 这意味着这些类型必须在 `core` 中可见，但这些函数在 `std` 中可用。
///
/// [`f32`] 和 [`f64`] 使用 "lang items" 实现了类似的技巧，但是由于编译器的限制，对于像 [`Simd`] 这样的抽象数据类型很难实现这种方法。正是出于这种需要，这个 trait 诞生了。
///
/// 将来，当编译器或其支持的运行时函数得到改进时，这个 trait 可能会以某种方式被取代。
/// 目前，此 trait 可用于对可能缺乏硬件支持的 SIMD 浮点操作进行试验，例如 `mul_add`。
///
///
///
///
///
///
///
///
pub trait StdFloat: Sealed + Sized {
    /// 融合乘法加法。
    /// 仅用一个舍入误差计算 `(self * a) + b`，比未融合的乘法加法产生更准确的结果。
    ///
    /// 如果目标体系结构具有专用的 `fma` CPU 指令，则使用 `mul_add` 的性能可能比未融合的乘加性能更高。
    /// 然而，这并不总是正确的，并且将在很大程度上依赖于设计具有特定目标硬件的算法。
    ///
    ///
    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original value"]
    fn mul_add(self, a: Self, b: Self) -> Self {
        unsafe { simd_fma(self, a, b) }
    }

    /// 产生一个 vector，其中每个 lane 都具有 `self` 中等效索引 lane 的平方根值
    ///
    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original value"]
    fn sqrt(self) -> Self {
        unsafe { simd_fsqrt(self) }
    }

    /// 返回大于或等于每个 lane 的最小整数。
    #[must_use = "method returns a new vector and does not mutate the original value"]
    #[inline]
    fn ceil(self) -> Self {
        unsafe { simd_ceil(self) }
    }

    /// 返回小于或等于每个 lane 的最大整数值。
    #[must_use = "method returns a new vector and does not mutate the original value"]
    #[inline]
    fn floor(self) -> Self {
        unsafe { simd_floor(self) }
    }

    /// 舍入到最接近的整数值。接近于零。
    #[must_use = "method returns a new vector and does not mutate the original value"]
    #[inline]
    fn round(self) -> Self {
        unsafe { simd_round(self) }
    }

    /// 返回浮点的整数值，去掉小数部分。
    #[must_use = "method returns a new vector and does not mutate the original value"]
    #[inline]
    fn trunc(self) -> Self {
        unsafe { simd_trunc(self) }
    }

    /// 返回浮点的小数值，删除其整数部分。
    #[must_use = "method returns a new vector and does not mutate the original value"]
    fn fract(self) -> Self;
}

impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}

// 我们可以安全地使用所有默认值。
impl<const N: usize> StdFloat for Simd<f32, N>
where
    LaneCount<N>: SupportedLaneCount,
{
    /// 返回浮点的小数值，删除其整数部分。
    #[must_use = "method returns a new vector and does not mutate the original value"]
    #[inline]
    fn fract(self) -> Self {
        self - self.trunc()
    }
}

impl<const N: usize> StdFloat for Simd<f64, N>
where
    LaneCount<N>: SupportedLaneCount,
{
    /// 返回浮点的小数值，删除其整数部分。
    #[must_use = "method returns a new vector and does not mutate the original value"]
    #[inline]
    fn fract(self) -> Self {
        self - self.trunc()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use simd::*;

    #[test]
    fn everything_works() {
        let x = f32x4::from_array([0.1, 0.5, 0.6, -1.5]);
        let x2 = x + x;
        let _xc = x.ceil();
        let _xf = x.floor();
        let _xr = x.round();
        let _xt = x.trunc();
        let _xfma = x.mul_add(x, x);
        let _xsqrt = x.sqrt();
        let _ = x2.abs() * x2;
    }
}