1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
//! 无进位乘法 (CLMUL)
//!
//! 引用的是 [Intel 64 和 IA-32 架构软件开发人员手册第 2 卷:指令集参考,A-Z][intel64_ref] (第 4-241 页)。
//!
//! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
//!

use crate::core_arch::x86::__m128i;

#[cfg(test)]
use stdarch_test::assert_instr;

#[allow(improper_ctypes)]
extern "C" {
    #[link_name = "llvm.x86.pclmulqdq"]
    fn pclmulqdq(a: __m128i, round_key: __m128i, imm8: u8) -> __m128i;
}

/// 在有限域 GF(2^k) 上执行两个 64 位多项式的无进位乘法。
///
/// immediate 字节用于确定应使用 `a` 和 `b` 的哪一半。
/// 0 和 4 以外的 immediate 位将被忽略。
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128)
///
#[inline]
#[target_feature(enable = "pclmulqdq")]
#[cfg_attr(all(test, not(target_os = "linux")), assert_instr(pclmulqdq, IMM8 = 0))]
#[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmullqlqdq, IMM8 = 0))]
#[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmulhqlqdq, IMM8 = 1))]
#[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmullqhqdq, IMM8 = 16))]
#[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmulhqhqdq, IMM8 = 17))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_clmulepi64_si128<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
    static_assert_uimm_bits!(IMM8, 8);
    pclmulqdq(a, b, IMM8 as u8)
}

#[cfg(test)]
mod tests {
    // 以下测试中的常量只是位模式。
    // 不应将它们解释为整数。符号性对它们没有意义,但是 __m128i 恰好是根据有符号整数定义的。
    //
    #![allow(overflowing_literals)]

    use stdarch_test::simd_test;

    use crate::core_arch::x86::*;

    #[simd_test(enable = "pclmulqdq")]
    unsafe fn test_mm_clmulepi64_si128() {
        // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf
        let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d);
        let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d);
        let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451);
        let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315);
        let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9);
        let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed);

        assert_eq_m128i(_mm_clmulepi64_si128::<0x00>(a, b), r00);
        assert_eq_m128i(_mm_clmulepi64_si128::<0x10>(a, b), r01);
        assert_eq_m128i(_mm_clmulepi64_si128::<0x01>(a, b), r10);
        assert_eq_m128i(_mm_clmulepi64_si128::<0x11>(a, b), r11);

        let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000);
        let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000);
        assert_eq_m128i(_mm_clmulepi64_si128::<0x00>(a0, a0), r);
    }
}