From 65bd9446245c6581f908770c9fb99ef27968e10d Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Tue, 13 Mar 2018 21:11:14 +0100 Subject: nss: quite impossible to built, tons of SSE2 stuff drawn in, patches make situation worse done toolchain and glibc (gcc without ada for now) syslinux done linux: on the way to modularized kernel --- ...ganize-AES-GCM-source-code-based-on-hw-sw.patch | 401 +++++++++++++++++++++ 1 file changed, 401 insertions(+) create mode 100644 i486-stage3/nss/nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw.patch (limited to 'i486-stage3/nss/nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw.patch') diff --git a/i486-stage3/nss/nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw.patch b/i486-stage3/nss/nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw.patch new file mode 100644 index 0000000..a4315d3 --- /dev/null +++ b/i486-stage3/nss/nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw.patch @@ -0,0 +1,401 @@ +diff -rauN nss-3.35/nss/lib/freebl/freebl_base.gypi nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/freebl_base.gypi +--- nss-3.35/nss/lib/freebl/freebl_base.gypi 2018-01-18 15:19:59.000000000 +0100 ++++ nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/freebl_base.gypi 2018-03-13 20:41:50.250412209 +0100 +@@ -38,6 +38,7 @@ + 'blinit.c', + 'freeblver.c', + 'gcm.c', ++ 'gcm-hw.c', + 'hmacct.c', + 'jpake.c', + 'ldvector.c', +@@ -52,6 +53,7 @@ + 'pqg.c', + 'rawhash.c', + 'rijndael.c', ++ 'rijndael-hw.c', + 'rsa.c', + 'rsapkcs.c', + 'seed.c', +diff -rauN nss-3.35/nss/lib/freebl/gcm-hw.c nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/gcm-hw.c +--- nss-3.35/nss/lib/freebl/gcm-hw.c 1970-01-01 01:00:00.000000000 +0100 ++++ nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/gcm-hw.c 2018-03-13 20:41:50.250412209 +0100 +@@ -0,0 +1,151 @@ ++/* This Source Code Form is subject to the terms of the Mozilla Public ++ * License, v. 2.0. If a copy of the MPL was not distributed with this ++ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ ++ ++#ifdef FREEBL_NO_DEPEND ++#include "stubs.h" ++#endif ++#include "gcm.h" ++#include "secerr.h" ++ ++#ifdef NSS_X86_OR_X64 ++#include /* clmul */ ++#endif ++ ++#define WRITE64(x, bytes) \ ++ (bytes)[0] = (x) >> 56; \ ++ (bytes)[1] = (x) >> 48; \ ++ (bytes)[2] = (x) >> 40; \ ++ (bytes)[3] = (x) >> 32; \ ++ (bytes)[4] = (x) >> 24; \ ++ (bytes)[5] = (x) >> 16; \ ++ (bytes)[6] = (x) >> 8; \ ++ (bytes)[7] = (x); ++ ++SECStatus ++gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf, ++ unsigned int maxout) ++{ ++#ifdef NSS_X86_OR_X64 ++ uint64_t tmp_out[2]; ++ _mm_storeu_si128((__m128i *)tmp_out, ghash->x); ++ PORT_Assert(maxout >= 16); ++ WRITE64(tmp_out[0], outbuf + 8); ++ WRITE64(tmp_out[1], outbuf); ++ return SECSuccess; ++#else ++ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); ++ return SECFailure; ++#endif /* NSS_X86_OR_X64 */ ++} ++ ++SECStatus ++gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, ++ unsigned int count) ++{ ++#ifdef NSS_X86_OR_X64 ++ size_t i; ++ pre_align __m128i z_high post_align; ++ pre_align __m128i z_low post_align; ++ pre_align __m128i C post_align; ++ pre_align __m128i D post_align; ++ pre_align __m128i E post_align; ++ pre_align __m128i F post_align; ++ pre_align __m128i bin post_align; ++ pre_align __m128i Ci post_align; ++ pre_align __m128i tmp post_align; ++ ++ for (i = 0; i < count; i++, buf += 16) { ++ bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1], ++ ((uint16_t)buf[2] << 8) | buf[3], ++ ((uint16_t)buf[4] << 8) | buf[5], ++ ((uint16_t)buf[6] << 8) | buf[7], ++ ((uint16_t)buf[8] << 8) | buf[9], ++ ((uint16_t)buf[10] << 8) | buf[11], ++ ((uint16_t)buf[12] << 8) | buf[13], ++ ((uint16_t)buf[14] << 8) | buf[15]); ++ Ci = _mm_xor_si128(bin, ghash->x); ++ ++ /* Do binary mult ghash->X = Ci * ghash->H. */ ++ C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00); ++ D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11); ++ E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01); ++ F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10); ++ tmp = _mm_xor_si128(E, F); ++ z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8)); ++ z_high = _mm_unpackhi_epi64(z_high, D); ++ z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C); ++ z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low); ++ ++ /* Shift one to the left (multiply by x) as gcm spec is stupid. */ ++ C = _mm_slli_si128(z_low, 8); ++ E = _mm_srli_epi64(C, 63); ++ D = _mm_slli_si128(z_high, 8); ++ F = _mm_srli_epi64(D, 63); ++ /* Carry over */ ++ C = _mm_srli_si128(z_low, 8); ++ D = _mm_srli_epi64(C, 63); ++ z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E); ++ z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D); ++ ++ /* Reduce */ ++ C = _mm_slli_si128(z_low, 8); ++ /* D = z_low << 127 */ ++ D = _mm_slli_epi64(C, 63); ++ /* E = z_low << 126 */ ++ E = _mm_slli_epi64(C, 62); ++ /* F = z_low << 121 */ ++ F = _mm_slli_epi64(C, 57); ++ /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */ ++ z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F); ++ C = _mm_srli_si128(z_low, 8); ++ /* D = z_low >> 1 */ ++ D = _mm_slli_epi64(C, 63); ++ D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D); ++ /* E = z_low >> 2 */ ++ E = _mm_slli_epi64(C, 62); ++ E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E); ++ /* F = z_low >> 7 */ ++ F = _mm_slli_epi64(C, 57); ++ F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F); ++ /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */ ++ ghash->x = _mm_xor_si128(_mm_xor_si128( ++ _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E), ++ F); ++ } ++ return SECSuccess; ++#else ++ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); ++ return SECFailure; ++#endif /* NSS_X86_OR_X64 */ ++} ++ ++SECStatus ++gcm_HashInit_hw(gcmHashContext *ghash) ++{ ++#ifdef NSS_X86_OR_X64 ++ ghash->ghash_mul = gcm_HashMult_hw; ++ ghash->x = _mm_setzero_si128(); ++ /* MSVC requires __m64 to load epi64. */ ++ ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high, ++ ghash->h_low >> 32, (uint32_t)ghash->h_low); ++ ghash->hw = PR_TRUE; ++ return SECSuccess; ++#else ++ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); ++ return SECFailure; ++#endif /* NSS_X86_OR_X64 */ ++} ++ ++SECStatus ++gcm_HashZeroX_hw(gcmHashContext *ghash) ++{ ++#ifdef NSS_X86_OR_X64 ++ ghash->x = _mm_setzero_si128(); ++ return SECSuccess; ++#else ++ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); ++ return SECFailure; ++#endif /* NSS_X86_OR_X64 */ ++} ++ +diff -rauN nss-3.35/nss/lib/freebl/gcm.c nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/gcm.c +--- nss-3.35/nss/lib/freebl/gcm.c 2018-01-18 15:19:59.000000000 +0100 ++++ nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/gcm.c 2018-03-13 20:41:50.251412221 +0100 +@@ -303,7 +303,7 @@ + ghash->x_high = z_high_h; + ghash->x_low = z_high_l; + } +- return SECSuccess; ++ return rv; + } + #endif /* HAVE_INT128_SUPPORT */ + +diff -rauN nss-3.35/nss/lib/freebl/manifest.mn nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/manifest.mn +--- nss-3.35/nss/lib/freebl/manifest.mn 2018-01-18 15:19:59.000000000 +0100 ++++ nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/manifest.mn 2018-03-13 20:41:50.252412232 +0100 +@@ -135,8 +135,10 @@ + blinit.c \ + fipsfreebl.c \ + gcm.c \ ++ gcm-hw.c \ + hmacct.c \ + rijndael.c \ ++ rijndael-hw.c \ + aeskeywrap.c \ + camellia.c \ + dh.c \ +diff -rauN nss-3.35/nss/lib/freebl/rijndael-hw.c nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/rijndael-hw.c +--- nss-3.35/nss/lib/freebl/rijndael-hw.c 1970-01-01 01:00:00.000000000 +0100 ++++ nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/rijndael-hw.c 2018-03-13 20:41:50.252412232 +0100 +@@ -0,0 +1,170 @@ ++/* This Source Code Form is subject to the terms of the Mozilla Public ++ * License, v. 2.0. If a copy of the MPL was not distributed with this ++ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ ++ ++#ifdef FREEBL_NO_DEPEND ++#include "stubs.h" ++#endif ++#include "rijndael.h" ++#include "secerr.h" ++ ++#ifdef NSS_X86_OR_X64 ++#include /* aes-ni */ ++#endif ++ ++#if defined(NSS_X86_OR_X64) ++#define EXPAND_KEY128(k, rcon, res) \ ++ tmp_key = _mm_aeskeygenassist_si128(k, rcon); \ ++ tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \ ++ tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \ ++ tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ ++ tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ ++ res = _mm_xor_si128(tmp, tmp_key) ++ ++static void ++native_key_expansion128(AESContext *cx, const unsigned char *key) ++{ ++ __m128i *keySchedule = cx->keySchedule; ++ pre_align __m128i tmp_key post_align; ++ pre_align __m128i tmp post_align; ++ keySchedule[0] = _mm_loadu_si128((__m128i *)key); ++ EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]); ++ EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]); ++ EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]); ++ EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]); ++ EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]); ++ EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]); ++ EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]); ++ EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]); ++ EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]); ++ EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]); ++} ++ ++#define EXPAND_KEY192_PART1(res, k0, kt, rcon) \ ++ tmp2 = _mm_slli_si128(k0, 4); \ ++ tmp1 = _mm_xor_si128(k0, tmp2); \ ++ tmp2 = _mm_slli_si128(tmp2, 4); \ ++ tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ ++ tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \ ++ res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55)) ++ ++#define EXPAND_KEY192_PART2(res, k1, k2) \ ++ tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \ ++ res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF)) ++ ++#define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \ ++ EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \ ++ EXPAND_KEY192_PART2(carry, res1, tmp3); \ ++ res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \ ++ _mm_castsi128_pd(tmp3), 0)); \ ++ res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \ ++ _mm_castsi128_pd(carry), 1)); \ ++ EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2) ++ ++static void ++native_key_expansion192(AESContext *cx, const unsigned char *key) ++{ ++ __m128i *keySchedule = cx->keySchedule; ++ pre_align __m128i tmp1 post_align; ++ pre_align __m128i tmp2 post_align; ++ pre_align __m128i tmp3 post_align; ++ pre_align __m128i carry post_align; ++ keySchedule[0] = _mm_loadu_si128((__m128i *)key); ++ keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); ++ EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2], ++ keySchedule[3], carry, 0x1, 0x2); ++ EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]); ++ EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5], ++ keySchedule[6], carry, 0x4, 0x8); ++ EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]); ++ EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8], ++ keySchedule[9], carry, 0x10, 0x20); ++ EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]); ++ EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11], ++ keySchedule[12], carry, 0x40, 0x80); ++} ++ ++#define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \ ++ tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \ ++ tmp2 = _mm_slli_si128(k1x, 4); \ ++ tmp1 = _mm_xor_si128(k1x, tmp2); \ ++ tmp2 = _mm_slli_si128(tmp2, 4); \ ++ tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ ++ res = _mm_xor_si128(tmp1, tmp_key); ++ ++#define EXPAND_KEY256(res1, res2, k1, k2, rcon) \ ++ EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \ ++ EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA) ++ ++static void ++native_key_expansion256(AESContext *cx, const unsigned char *key) ++{ ++ __m128i *keySchedule = cx->keySchedule; ++ pre_align __m128i tmp_key post_align; ++ pre_align __m128i tmp1 post_align; ++ pre_align __m128i tmp2 post_align; ++ keySchedule[0] = _mm_loadu_si128((__m128i *)key); ++ keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); ++ EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0], ++ keySchedule[1], 0x01); ++ EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2], ++ keySchedule[3], 0x02); ++ EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4], ++ keySchedule[5], 0x04); ++ EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6], ++ keySchedule[7], 0x08); ++ EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8], ++ keySchedule[9], 0x10); ++ EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10], ++ keySchedule[11], 0x20); ++ EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12], ++ keySchedule[13], 0xFF); ++} ++ ++#endif /* NSS_X86_OR_X64 */ ++ ++/* ++ * AES key expansion using aes-ni instructions. ++ */ ++void ++rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, ++ unsigned int Nk) ++{ ++#ifdef NSS_X86_OR_X64 ++ switch (Nk) { ++ case 4: ++ native_key_expansion128(cx, key); ++ return; ++ case 6: ++ native_key_expansion192(cx, key); ++ return; ++ case 8: ++ native_key_expansion256(cx, key); ++ return; ++ default: ++ /* This shouldn't happen. */ ++ PORT_Assert(0); ++ } ++#else ++ PORT_Assert(0); ++#endif /* NSS_X86_OR_X64 */ ++} ++ ++void ++rijndael_native_encryptBlock(AESContext *cx, ++ unsigned char *output, ++ const unsigned char *input) ++{ ++#ifdef NSS_X86_OR_X64 ++ int i; ++ pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); ++ m = _mm_xor_si128(m, cx->keySchedule[0]); ++ for (i = 1; i < cx->Nr; ++i) { ++ m = _mm_aesenc_si128(m, cx->keySchedule[i]); ++ } ++ m = _mm_aesenclast_si128(m, cx->keySchedule[cx->Nr]); ++ _mm_storeu_si128((__m128i *)output, m); ++#else ++ PORT_Assert(0); ++#endif /* NSS_X86_OR_X64 */ ++} +diff -rauN nss-3.35/nss/lib/freebl/rijndael.c nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/rijndael.c +--- nss-3.35/nss/lib/freebl/rijndael.c 2018-01-18 15:19:59.000000000 +0100 ++++ nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/rijndael.c 2018-03-13 20:41:50.253412244 +0100 +@@ -55,6 +55,13 @@ + } + #endif /* NSS_X86_OR_X64 */ + ++/* Forward declarations */ ++void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, ++ unsigned int Nk); ++void rijndael_native_encryptBlock(AESContext *cx, ++ unsigned char *output, ++ const unsigned char *input); ++ + /* + * There are currently three ways to build this code, varying in performance + * and code size. +diff -rauN nss-3.35/nss/lib/freebl/rijndael.h nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/rijndael.h +--- nss-3.35/nss/lib/freebl/rijndael.h 2018-01-18 15:19:59.000000000 +0100 ++++ nss-3.35-freebl-Reorganize-AES-GCM-source-code-based-on-hw-sw-patch/nss/lib/freebl/rijndael.h 2018-03-13 20:41:50.254412255 +0100 +@@ -80,4 +80,6 @@ + void *mem; /* Start of the allocated memory to free. */ + }; + ++SEC_END_PROTOS ++ + #endif /* _RIJNDAEL_H_ */ -- cgit v1.2.3-70-g09d2