diff --git a/arm-missing-files.diff b/arm-missing-files.diff deleted file mode 100644 index 4f86fbe..0000000 --- a/arm-missing-files.diff +++ /dev/null @@ -1,1618 +0,0 @@ ---- cipher/blowfish-arm.S -+++ cipher/blowfish-arm.S -@@ -0,0 +1,743 @@ -+/* blowfish-arm.S - ARM assembly implementation of Blowfish cipher -+ * -+ * Copyright (C) 2013 Jussi Kivilinna -+ * -+ * This file is part of Libgcrypt. -+ * -+ * Libgcrypt is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU Lesser General Public License as -+ * published by the Free Software Foundation; either version 2.1 of -+ * the License, or (at your option) any later version. -+ * -+ * Libgcrypt is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this program; if not, see . -+ */ -+ -+#include -+ -+#if defined(__ARMEL__) -+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS -+ -+.text -+ -+.syntax unified -+.arm -+ -+/* structure of crypto context */ -+#define s0 0 -+#define s1 (s0 + (1 * 256) * 4) -+#define s2 (s0 + (2 * 256) * 4) -+#define s3 (s0 + (3 * 256) * 4) -+#define p (s3 + (1 * 256) * 4) -+ -+/* register macros */ -+#define CTXs0 %r0 -+#define CTXs1 %r9 -+#define CTXs2 %r8 -+#define CTXs3 %r10 -+#define RMASK %lr -+#define RKEYL %r2 -+#define RKEYR %ip -+ -+#define RL0 %r3 -+#define RR0 %r4 -+ -+#define RL1 %r9 -+#define RR1 %r10 -+ -+#define RT0 %r11 -+#define RT1 %r7 -+#define RT2 %r5 -+#define RT3 %r6 -+ -+/* helper macros */ -+#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \ -+ ldrb rout, [rsrc, #((offs) + 0)]; \ -+ ldrb rtmp, [rsrc, #((offs) + 1)]; \ -+ orr rout, rout, rtmp, lsl #8; \ -+ ldrb rtmp, [rsrc, #((offs) + 2)]; \ -+ orr rout, rout, rtmp, lsl #16; \ -+ ldrb rtmp, [rsrc, #((offs) + 3)]; \ -+ orr rout, rout, rtmp, lsl #24; -+ -+#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \ -+ mov rtmp0, rin, lsr #8; \ -+ strb rin, [rdst, #((offs) + 0)]; \ -+ mov rtmp1, rin, lsr #16; \ -+ strb rtmp0, [rdst, #((offs) + 1)]; \ -+ mov rtmp0, rin, lsr #24; \ -+ strb rtmp1, [rdst, #((offs) + 2)]; \ -+ strb rtmp0, [rdst, #((offs) + 3)]; -+ -+#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \ -+ ldrb rout, [rsrc, #((offs) + 3)]; \ -+ ldrb rtmp, [rsrc, #((offs) + 2)]; \ -+ orr rout, rout, rtmp, lsl #8; \ -+ ldrb rtmp, [rsrc, #((offs) + 1)]; \ -+ orr rout, rout, rtmp, lsl #16; \ -+ ldrb rtmp, [rsrc, #((offs) + 0)]; \ -+ orr rout, rout, rtmp, lsl #24; -+ -+#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \ -+ mov rtmp0, rin, lsr #8; \ -+ strb rin, [rdst, #((offs) + 3)]; \ -+ mov rtmp1, rin, lsr #16; \ -+ strb rtmp0, [rdst, #((offs) + 2)]; \ -+ mov rtmp0, rin, lsr #24; \ -+ strb rtmp1, [rdst, #((offs) + 1)]; \ -+ strb rtmp0, [rdst, #((offs) + 0)]; -+ -+#ifdef __ARMEL__ -+ #define ldr_unaligned_host ldr_unaligned_le -+ #define str_unaligned_host str_unaligned_le -+ -+ /* bswap on little-endian */ -+#ifdef HAVE_ARM_ARCH_V6 -+ #define host_to_be(reg, rtmp) \ -+ rev reg, reg; -+ #define be_to_host(reg, rtmp) \ -+ rev reg, reg; -+#else -+ #define host_to_be(reg, rtmp) \ -+ eor rtmp, reg, reg, ror #16; \ -+ mov rtmp, rtmp, lsr #8; \ -+ bic rtmp, rtmp, #65280; \ -+ eor reg, rtmp, reg, ror #8; -+ #define be_to_host(reg, rtmp) \ -+ eor rtmp, reg, reg, ror #16; \ -+ mov rtmp, rtmp, lsr #8; \ -+ bic rtmp, rtmp, #65280; \ -+ eor reg, rtmp, reg, ror #8; -+#endif -+#else -+ #define ldr_unaligned_host ldr_unaligned_be -+ #define str_unaligned_host str_unaligned_be -+ -+ /* nop on big-endian */ -+ #define host_to_be(reg, rtmp) /*_*/ -+ #define be_to_host(reg, rtmp) /*_*/ -+#endif -+ -+#define host_to_host(x, y) /*_*/ -+ -+/*********************************************************************** -+ * 1-way blowfish -+ ***********************************************************************/ -+#define F(l, r) \ -+ and RT0, RMASK, l, lsr#(24 - 2); \ -+ and RT1, RMASK, l, lsr#(16 - 2); \ -+ ldr RT0, [CTXs0, RT0]; \ -+ and RT2, RMASK, l, lsr#(8 - 2); \ -+ ldr RT1, [CTXs1, RT1]; \ -+ and RT3, RMASK, l, lsl#2; \ -+ ldr RT2, [CTXs2, RT2]; \ -+ add RT0, RT1; \ -+ ldr RT3, [CTXs3, RT3]; \ -+ eor RT0, RT2; \ -+ add RT0, RT3; \ -+ eor r, RT0; -+ -+#define load_roundkey_enc(n) \ -+ ldr RKEYL, [CTXs2, #((p - s2) + (4 * (n) + 0))]; \ -+ ldr RKEYR, [CTXs2, #((p - s2) + (4 * (n) + 4))]; -+ -+#define add_roundkey_enc() \ -+ eor RL0, RKEYL; \ -+ eor RR0, RKEYR; -+ -+#define round_enc(n) \ -+ add_roundkey_enc(); \ -+ load_roundkey_enc(n); \ -+ \ -+ F(RL0, RR0); \ -+ F(RR0, RL0); -+ -+#define load_roundkey_dec(n) \ -+ ldr RKEYL, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 4))]; \ -+ ldr RKEYR, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 0))]; -+ -+#define add_roundkey_dec() \ -+ eor RL0, RKEYL; \ -+ eor RR0, RKEYR; -+ -+#define round_dec(n) \ -+ add_roundkey_dec(); \ -+ load_roundkey_dec(n); \ -+ \ -+ F(RL0, RR0); \ -+ F(RR0, RL0); -+ -+#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \ -+ ldr l0, [rin, #((offs) + 0)]; \ -+ ldr r0, [rin, #((offs) + 4)]; \ -+ convert(l0, rtmp); \ -+ convert(r0, rtmp); -+ -+#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \ -+ convert(l0, rtmp); \ -+ convert(r0, rtmp); \ -+ str l0, [rout, #((offs) + 0)]; \ -+ str r0, [rout, #((offs) + 4)]; -+ -+#ifdef __ARM_FEATURE_UNALIGNED -+ /* unaligned word reads allowed */ -+ #define read_block(rin, offs, l0, r0, rtmp0) \ -+ read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0) -+ -+ #define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \ -+ write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0) -+ -+ #define read_block_host(rin, offs, l0, r0, rtmp0) \ -+ read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0) -+ -+ #define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \ -+ write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0) -+#else -+ /* need to handle unaligned reads by byte reads */ -+ #define read_block(rin, offs, l0, r0, rtmp0) \ -+ tst rin, #3; \ -+ beq 1f; \ -+ ldr_unaligned_be(l0, rin, (offs) + 0, rtmp0); \ -+ ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \ -+ b 2f; \ -+ 1:;\ -+ read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0); \ -+ 2:; -+ -+ #define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \ -+ tst rout, #3; \ -+ beq 1f; \ -+ str_unaligned_be(l0, rout, (offs) + 0, rtmp0, rtmp1); \ -+ str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \ -+ b 2f; \ -+ 1:;\ -+ write_block_aligned(rout, offs, l0, r0, be_to_host, rtmp0); \ -+ 2:; -+ -+ #define read_block_host(rin, offs, l0, r0, rtmp0) \ -+ tst rin, #3; \ -+ beq 1f; \ -+ ldr_unaligned_host(l0, rin, (offs) + 0, rtmp0); \ -+ ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \ -+ b 2f; \ -+ 1:;\ -+ read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0); \ -+ 2:; -+ -+ #define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \ -+ tst rout, #3; \ -+ beq 1f; \ -+ str_unaligned_host(l0, rout, (offs) + 0, rtmp0, rtmp1); \ -+ str_unaligned_host(r0, rout, (offs) + 4, rtmp0, rtmp1); \ -+ b 2f; \ -+ 1:;\ -+ write_block_aligned(rout, offs, l0, r0, host_to_host); \ -+ 2:; -+#endif -+ -+.align 3 -+.type __blowfish_enc_blk1,%function; -+ -+__blowfish_enc_blk1: -+ /* input: -+ * preloaded: CTX -+ * [RL0, RR0]: src -+ * output: -+ * [RR0, RL0]: dst -+ */ -+ push {%lr}; -+ -+ add CTXs1, CTXs0, #(s1 - s0); -+ add CTXs2, CTXs0, #(s2 - s0); -+ mov RMASK, #(0xff << 2); /* byte mask */ -+ add CTXs3, CTXs1, #(s3 - s1); -+ -+ load_roundkey_enc(0); -+ round_enc(2); -+ round_enc(4); -+ round_enc(6); -+ round_enc(8); -+ round_enc(10); -+ round_enc(12); -+ round_enc(14); -+ round_enc(16); -+ add_roundkey_enc(); -+ -+ pop {%pc}; -+.size __blowfish_enc_blk1,.-__blowfish_enc_blk1; -+ -+.align 8 -+.globl _gcry_blowfish_arm_do_encrypt -+.type _gcry_blowfish_arm_do_encrypt,%function; -+ -+_gcry_blowfish_arm_do_encrypt: -+ /* input: -+ * %r0: ctx, CTX -+ * %r1: u32 *ret_xl -+ * %r2: u32 *ret_xr -+ */ -+ push {%r2, %r4-%r11, %ip, %lr}; -+ -+ ldr RL0, [%r1]; -+ ldr RR0, [%r2]; -+ -+ bl __blowfish_enc_blk1; -+ -+ pop {%r2}; -+ str RR0, [%r1]; -+ str RL0, [%r2]; -+ -+ pop {%r4-%r11, %ip, %pc}; -+.size _gcry_blowfish_arm_do_encrypt,.-_gcry_blowfish_arm_do_encrypt; -+ -+.align 3 -+.globl _gcry_blowfish_arm_encrypt_block -+.type _gcry_blowfish_arm_encrypt_block,%function; -+ -+_gcry_blowfish_arm_encrypt_block: -+ /* input: -+ * %r0: ctx, CTX -+ * %r1: dst -+ * %r2: src -+ */ -+ push {%r4-%r11, %ip, %lr}; -+ -+ read_block(%r2, 0, RL0, RR0, RT0); -+ -+ bl __blowfish_enc_blk1; -+ -+ write_block(%r1, 0, RR0, RL0, RT0, RT1); -+ -+ pop {%r4-%r11, %ip, %pc}; -+.size _gcry_blowfish_arm_encrypt_block,.-_gcry_blowfish_arm_encrypt_block; -+ -+.align 3 -+.globl _gcry_blowfish_arm_decrypt_block -+.type _gcry_blowfish_arm_decrypt_block,%function; -+ -+_gcry_blowfish_arm_decrypt_block: -+ /* input: -+ * %r0: ctx, CTX -+ * %r1: dst -+ * %r2: src -+ */ -+ push {%r4-%r11, %ip, %lr}; -+ -+ add CTXs1, CTXs0, #(s1 - s0); -+ add CTXs2, CTXs0, #(s2 - s0); -+ mov RMASK, #(0xff << 2); /* byte mask */ -+ add CTXs3, CTXs1, #(s3 - s1); -+ -+ read_block(%r2, 0, RL0, RR0, RT0); -+ -+ load_roundkey_dec(17); -+ round_dec(15); -+ round_dec(13); -+ round_dec(11); -+ round_dec(9); -+ round_dec(7); -+ round_dec(5); -+ round_dec(3); -+ round_dec(1); -+ add_roundkey_dec(); -+ -+ write_block(%r1, 0, RR0, RL0, RT0, RT1); -+ -+ pop {%r4-%r11, %ip, %pc}; -+.size _gcry_blowfish_arm_decrypt_block,.-_gcry_blowfish_arm_decrypt_block; -+ -+/*********************************************************************** -+ * 2-way blowfish -+ ***********************************************************************/ -+#define F2(n, l0, r0, l1, r1, set_nextk, dec) \ -+ \ -+ and RT0, RMASK, l0, lsr#(24 - 2); \ -+ and RT1, RMASK, l0, lsr#(16 - 2); \ -+ and RT2, RMASK, l0, lsr#(8 - 2); \ -+ add RT1, #(s1 - s0); \ -+ \ -+ ldr RT0, [CTXs0, RT0]; \ -+ and RT3, RMASK, l0, lsl#2; \ -+ ldr RT1, [CTXs0, RT1]; \ -+ add RT3, #(s3 - s2); \ -+ ldr RT2, [CTXs2, RT2]; \ -+ add RT0, RT1; \ -+ ldr RT3, [CTXs2, RT3]; \ -+ \ -+ and RT1, RMASK, l1, lsr#(24 - 2); \ -+ eor RT0, RT2; \ -+ and RT2, RMASK, l1, lsr#(16 - 2); \ -+ add RT0, RT3; \ -+ add RT2, #(s1 - s0); \ -+ and RT3, RMASK, l1, lsr#(8 - 2); \ -+ eor r0, RT0; \ -+ \ -+ ldr RT1, [CTXs0, RT1]; \ -+ and RT0, RMASK, l1, lsl#2; \ -+ ldr RT2, [CTXs0, RT2]; \ -+ add RT0, #(s3 - s2); \ -+ ldr RT3, [CTXs2, RT3]; \ -+ add RT1, RT2; \ -+ ldr RT0, [CTXs2, RT0]; \ -+ \ -+ and RT2, RMASK, r0, lsr#(24 - 2); \ -+ eor RT1, RT3; \ -+ and RT3, RMASK, r0, lsr#(16 - 2); \ -+ add RT1, RT0; \ -+ add RT3, #(s1 - s0); \ -+ and RT0, RMASK, r0, lsr#(8 - 2); \ -+ eor r1, RT1; \ -+ \ -+ ldr RT2, [CTXs0, RT2]; \ -+ and RT1, RMASK, r0, lsl#2; \ -+ ldr RT3, [CTXs0, RT3]; \ -+ add RT1, #(s3 - s2); \ -+ ldr RT0, [CTXs2, RT0]; \ -+ add RT2, RT3; \ -+ ldr RT1, [CTXs2, RT1]; \ -+ \ -+ and RT3, RMASK, r1, lsr#(24 - 2); \ -+ eor RT2, RT0; \ -+ and RT0, RMASK, r1, lsr#(16 - 2); \ -+ add RT2, RT1; \ -+ add RT0, #(s1 - s0); \ -+ and RT1, RMASK, r1, lsr#(8 - 2); \ -+ eor l0, RT2; \ -+ \ -+ ldr RT3, [CTXs0, RT3]; \ -+ and RT2, RMASK, r1, lsl#2; \ -+ ldr RT0, [CTXs0, RT0]; \ -+ add RT2, #(s3 - s2); \ -+ ldr RT1, [CTXs2, RT1]; \ -+ eor l1, RKEYL; \ -+ ldr RT2, [CTXs2, RT2]; \ -+ \ -+ eor r0, RKEYR; \ -+ add RT3, RT0; \ -+ eor r1, RKEYR; \ -+ eor RT3, RT1; \ -+ eor l0, RKEYL; \ -+ add RT3, RT2; \ -+ set_nextk(RKEYL, (p - s2) + (4 * (n) + ((dec) * 4))); \ -+ eor l1, RT3; \ -+ set_nextk(RKEYR, (p - s2) + (4 * (n) + (!(dec) * 4))); -+ -+#define load_n_add_roundkey_enc2(n) \ -+ load_roundkey_enc(n); \ -+ eor RL0, RKEYL; \ -+ eor RR0, RKEYR; \ -+ eor RL1, RKEYL; \ -+ eor RR1, RKEYR; \ -+ load_roundkey_enc((n) + 2); -+ -+#define next_key(reg, offs) \ -+ ldr reg, [CTXs2, #(offs)]; -+ -+#define dummy(x, y) /* do nothing */ -+ -+#define round_enc2(n, load_next_key) \ -+ F2((n) + 2, RL0, RR0, RL1, RR1, load_next_key, 0); -+ -+#define load_n_add_roundkey_dec2(n) \ -+ load_roundkey_dec(n); \ -+ eor RL0, RKEYL; \ -+ eor RR0, RKEYR; \ -+ eor RL1, RKEYL; \ -+ eor RR1, RKEYR; \ -+ load_roundkey_dec((n) - 2); -+ -+#define round_dec2(n, load_next_key) \ -+ F2((n) - 3, RL0, RR0, RL1, RR1, load_next_key, 1); -+ -+#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \ -+ ldr l0, [rin, #(0)]; \ -+ ldr r0, [rin, #(4)]; \ -+ convert(l0, rtmp); \ -+ ldr l1, [rin, #(8)]; \ -+ convert(r0, rtmp); \ -+ ldr r1, [rin, #(12)]; \ -+ convert(l1, rtmp); \ -+ convert(r1, rtmp); -+ -+#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \ -+ convert(l0, rtmp); \ -+ convert(r0, rtmp); \ -+ convert(l1, rtmp); \ -+ str l0, [rout, #(0)]; \ -+ convert(r1, rtmp); \ -+ str r0, [rout, #(4)]; \ -+ str l1, [rout, #(8)]; \ -+ str r1, [rout, #(12)]; -+ -+#ifdef __ARM_FEATURE_UNALIGNED -+ /* unaligned word reads allowed */ -+ #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ -+ read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0) -+ -+ #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ -+ write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0) -+ -+ #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ -+ read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0) -+ -+ #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ -+ write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0) -+#else -+ /* need to handle unaligned reads by byte reads */ -+ #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ -+ tst rin, #3; \ -+ beq 1f; \ -+ ldr_unaligned_be(l0, rin, 0, rtmp0); \ -+ ldr_unaligned_be(r0, rin, 4, rtmp0); \ -+ ldr_unaligned_be(l1, rin, 8, rtmp0); \ -+ ldr_unaligned_be(r1, rin, 12, rtmp0); \ -+ b 2f; \ -+ 1:;\ -+ read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0); \ -+ 2:; -+ -+ #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ -+ tst rout, #3; \ -+ beq 1f; \ -+ str_unaligned_be(l0, rout, 0, rtmp0, rtmp1); \ -+ str_unaligned_be(r0, rout, 4, rtmp0, rtmp1); \ -+ str_unaligned_be(l1, rout, 8, rtmp0, rtmp1); \ -+ str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \ -+ b 2f; \ -+ 1:;\ -+ write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0); \ -+ 2:; -+ -+ #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ -+ tst rin, #3; \ -+ beq 1f; \ -+ ldr_unaligned_host(l0, rin, 0, rtmp0); \ -+ ldr_unaligned_host(r0, rin, 4, rtmp0); \ -+ ldr_unaligned_host(l1, rin, 8, rtmp0); \ -+ ldr_unaligned_host(r1, rin, 12, rtmp0); \ -+ b 2f; \ -+ 1:;\ -+ read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0); \ -+ 2:; -+ -+ #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ -+ tst rout, #3; \ -+ beq 1f; \ -+ str_unaligned_host(l0, rout, 0, rtmp0, rtmp1); \ -+ str_unaligned_host(r0, rout, 4, rtmp0, rtmp1); \ -+ str_unaligned_host(l1, rout, 8, rtmp0, rtmp1); \ -+ str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \ -+ b 2f; \ -+ 1:;\ -+ write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0); \ -+ 2:; -+#endif -+ -+.align 3 -+.type _gcry_blowfish_arm_enc_blk2,%function; -+ -+_gcry_blowfish_arm_enc_blk2: -+ /* input: -+ * preloaded: CTX -+ * [RL0, RR0], [RL1, RR1]: src -+ * output: -+ * [RR0, RL0], [RR1, RL1]: dst -+ */ -+ push {RT0,%lr}; -+ -+ add CTXs2, CTXs0, #(s2 - s0); -+ mov RMASK, #(0xff << 2); /* byte mask */ -+ -+ load_n_add_roundkey_enc2(0); -+ round_enc2(2, next_key); -+ round_enc2(4, next_key); -+ round_enc2(6, next_key); -+ round_enc2(8, next_key); -+ round_enc2(10, next_key); -+ round_enc2(12, next_key); -+ round_enc2(14, next_key); -+ round_enc2(16, dummy); -+ -+ host_to_be(RR0, RT0); -+ host_to_be(RL0, RT0); -+ host_to_be(RR1, RT0); -+ host_to_be(RL1, RT0); -+ -+ pop {RT0,%pc}; -+.size _gcry_blowfish_arm_enc_blk2,.-_gcry_blowfish_arm_enc_blk2; -+ -+.align 3 -+.globl _gcry_blowfish_arm_cfb_dec; -+.type _gcry_blowfish_arm_cfb_dec,%function; -+ -+_gcry_blowfish_arm_cfb_dec: -+ /* input: -+ * %r0: CTX -+ * %r1: dst (2 blocks) -+ * %r2: src (2 blocks) -+ * %r3: iv (64bit) -+ */ -+ push {%r2, %r4-%r11, %ip, %lr}; -+ -+ mov %lr, %r3; -+ -+ /* Load input (iv/%r3 is aligned, src/%r2 might not be) */ -+ ldm %r3, {RL0, RR0}; -+ host_to_be(RL0, RT0); -+ host_to_be(RR0, RT0); -+ read_block(%r2, 0, RL1, RR1, RT0); -+ -+ /* Update IV, load src[1] and save to iv[0] */ -+ read_block_host(%r2, 8, %r5, %r6, RT0); -+ stm %lr, {%r5, %r6}; -+ -+ bl _gcry_blowfish_arm_enc_blk2; -+ /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ -+ -+ /* %r1: dst, %r0: %src */ -+ pop {%r0}; -+ -+ /* dst = src ^ result */ -+ read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr); -+ eor %r5, %r4; -+ eor %r6, %r3; -+ eor %r7, %r10; -+ eor %r8, %r9; -+ write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10); -+ -+ pop {%r4-%r11, %ip, %pc}; -+.ltorg -+.size _gcry_blowfish_arm_cfb_dec,.-_gcry_blowfish_arm_cfb_dec; -+ -+.align 3 -+.globl _gcry_blowfish_arm_ctr_enc; -+.type _gcry_blowfish_arm_ctr_enc,%function; -+ -+_gcry_blowfish_arm_ctr_enc: -+ /* input: -+ * %r0: CTX -+ * %r1: dst (2 blocks) -+ * %r2: src (2 blocks) -+ * %r3: iv (64bit, big-endian) -+ */ -+ push {%r2, %r4-%r11, %ip, %lr}; -+ -+ mov %lr, %r3; -+ -+ /* Load IV (big => host endian) */ -+ read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT0); -+ -+ /* Construct IVs */ -+ adds RR1, RR0, #1; /* +1 */ -+ adc RL1, RL0, #0; -+ adds %r6, RR1, #1; /* +2 */ -+ adc %r5, RL1, #0; -+ -+ /* Store new IV (host => big-endian) */ -+ write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT0); -+ -+ bl _gcry_blowfish_arm_enc_blk2; -+ /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ -+ -+ /* %r1: dst, %r0: %src */ -+ pop {%r0}; -+ -+ /* XOR key-stream with plaintext */ -+ read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr); -+ eor %r5, %r4; -+ eor %r6, %r3; -+ eor %r7, %r10; -+ eor %r8, %r9; -+ write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10); -+ -+ pop {%r4-%r11, %ip, %pc}; -+.ltorg -+.size _gcry_blowfish_arm_ctr_enc,.-_gcry_blowfish_arm_ctr_enc; -+ -+.align 3 -+.type _gcry_blowfish_arm_dec_blk2,%function; -+ -+_gcry_blowfish_arm_dec_blk2: -+ /* input: -+ * preloaded: CTX -+ * [RL0, RR0], [RL1, RR1]: src -+ * output: -+ * [RR0, RL0], [RR1, RL1]: dst -+ */ -+ add CTXs2, CTXs0, #(s2 - s0); -+ mov RMASK, #(0xff << 2); /* byte mask */ -+ -+ load_n_add_roundkey_dec2(17); -+ round_dec2(15, next_key); -+ round_dec2(13, next_key); -+ round_dec2(11, next_key); -+ round_dec2(9, next_key); -+ round_dec2(7, next_key); -+ round_dec2(5, next_key); -+ round_dec2(3, next_key); -+ round_dec2(1, dummy); -+ -+ host_to_be(RR0, RT0); -+ host_to_be(RL0, RT0); -+ host_to_be(RR1, RT0); -+ host_to_be(RL1, RT0); -+ -+ b .Ldec_cbc_tail; -+.ltorg -+.size _gcry_blowfish_arm_dec_blk2,.-_gcry_blowfish_arm_dec_blk2; -+ -+.align 3 -+.globl _gcry_blowfish_arm_cbc_dec; -+.type _gcry_blowfish_arm_cbc_dec,%function; -+ -+_gcry_blowfish_arm_cbc_dec: -+ /* input: -+ * %r0: CTX -+ * %r1: dst (2 blocks) -+ * %r2: src (2 blocks) -+ * %r3: iv (64bit) -+ */ -+ push {%r2-%r11, %ip, %lr}; -+ -+ read_block2(%r2, RL0, RR0, RL1, RR1, RT0); -+ -+ /* dec_blk2 is only used by cbc_dec, jump directly in/out instead -+ * of function call. */ -+ b _gcry_blowfish_arm_dec_blk2; -+.Ldec_cbc_tail: -+ /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ -+ -+ /* %r0: %src, %r1: dst, %r2: iv */ -+ pop {%r0, %r2}; -+ -+ /* load IV+1 (src[0]) to %r7:%r8. Might be unaligned. */ -+ read_block_host(%r0, 0, %r7, %r8, %r5); -+ /* load IV (iv[0]) to %r5:%r6. 'iv' is aligned. */ -+ ldm %r2, {%r5, %r6}; -+ -+ /* out[1] ^= IV+1 */ -+ eor %r10, %r7; -+ eor %r9, %r8; -+ /* out[0] ^= IV */ -+ eor %r4, %r5; -+ eor %r3, %r6; -+ -+ /* load IV+2 (src[1]) to %r7:%r8. Might be unaligned. */ -+ read_block_host(%r0, 8, %r7, %r8, %r5); -+ /* store IV+2 to iv[0] (aligned). */ -+ stm %r2, {%r7, %r8}; -+ -+ /* store result to dst[0-3]. Might be unaligned. */ -+ write_block2_host(%r1, %r4, %r3, %r10, %r9, %r5, %r6); -+ -+ pop {%r4-%r11, %ip, %pc}; -+.ltorg -+.size _gcry_blowfish_arm_cbc_dec,.-_gcry_blowfish_arm_cbc_dec; -+ -+#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ -+#endif /*__ARM_ARCH >= 6*/ ---- cipher/serpent-armv7-neon.S -+++ cipher/serpent-armv7-neon.S -@@ -0,0 +1,869 @@ -+/* serpent-armv7-neon.S - ARM/NEON assembly implementation of Serpent cipher -+ * -+ * Copyright (C) 2013 Jussi Kivilinna -+ * -+ * This file is part of Libgcrypt. -+ * -+ * Libgcrypt is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU Lesser General Public License as -+ * published by the Free Software Foundation; either version 2.1 of -+ * the License, or (at your option) any later version. -+ * -+ * Libgcrypt is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this program; if not, see . -+ */ -+ -+#include -+ -+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ -+ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ -+ defined(HAVE_GCC_INLINE_ASM_NEON) -+ -+.text -+ -+.syntax unified -+.fpu neon -+.arm -+ -+/* ARM registers */ -+#define RROUND r0 -+ -+/* NEON vector registers */ -+#define RA0 q0 -+#define RA1 q1 -+#define RA2 q2 -+#define RA3 q3 -+#define RA4 q4 -+#define RB0 q5 -+#define RB1 q6 -+#define RB2 q7 -+#define RB3 q8 -+#define RB4 q9 -+ -+#define RT0 q10 -+#define RT1 q11 -+#define RT2 q12 -+#define RT3 q13 -+ -+#define RA0d0 d0 -+#define RA0d1 d1 -+#define RA1d0 d2 -+#define RA1d1 d3 -+#define RA2d0 d4 -+#define RA2d1 d5 -+#define RA3d0 d6 -+#define RA3d1 d7 -+#define RA4d0 d8 -+#define RA4d1 d9 -+#define RB0d0 d10 -+#define RB0d1 d11 -+#define RB1d0 d12 -+#define RB1d1 d13 -+#define RB2d0 d14 -+#define RB2d1 d15 -+#define RB3d0 d16 -+#define RB3d1 d17 -+#define RB4d0 d18 -+#define RB4d1 d19 -+#define RT0d0 d20 -+#define RT0d1 d21 -+#define RT1d0 d22 -+#define RT1d1 d23 -+#define RT2d0 d24 -+#define RT2d1 d25 -+ -+/********************************************************************** -+ helper macros -+ **********************************************************************/ -+ -+#define transpose_4x4(_q0, _q1, _q2, _q3) \ -+ vtrn.32 _q0, _q1; \ -+ vtrn.32 _q2, _q3; \ -+ vswp _q0##d1, _q2##d0; \ -+ vswp _q1##d1, _q3##d0; -+ -+/********************************************************************** -+ 8-way serpent -+ **********************************************************************/ -+ -+/* -+ * These are the S-Boxes of Serpent from following research paper. -+ * -+ * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference, -+ * (New York, New York, USA), p. 317–329, National Institute of Standards and -+ * Technology, 2000. -+ * -+ * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf -+ * -+ */ -+#define SBOX0(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ veor a3, a3, a0; veor b3, b3, b0; vmov a4, a1; vmov b4, b1; \ -+ vand a1, a1, a3; vand b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \ -+ veor a1, a1, a0; veor b1, b1, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ -+ veor a0, a0, a4; veor b0, b0, b4; veor a4, a4, a3; veor b4, b4, b3; \ -+ veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a1; vorr b2, b2, b1; \ -+ veor a2, a2, a4; veor b2, b2, b4; vmvn a4, a4; vmvn b4, b4; \ -+ vorr a4, a4, a1; vorr b4, b4, b1; veor a1, a1, a3; veor b1, b1, b3; \ -+ veor a1, a1, a4; veor b1, b1, b4; vorr a3, a3, a0; vorr b3, b3, b0; \ -+ veor a1, a1, a3; veor b1, b1, b3; veor a4, a3; veor b4, b3; -+ -+#define SBOX0_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vmvn a2, a2; vmvn b2, b2; vmov a4, a1; vmov b4, b1; \ -+ vorr a1, a1, a0; vorr b1, b1, b0; vmvn a4, a4; vmvn b4, b4; \ -+ veor a1, a1, a2; veor b1, b1, b2; vorr a2, a2, a4; vorr b2, b2, b4; \ -+ veor a1, a1, a3; veor b1, b1, b3; veor a0, a0, a4; veor b0, b0, b4; \ -+ veor a2, a2, a0; veor b2, b2, b0; vand a0, a0, a3; vand b0, b0, b3; \ -+ veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a1; vorr b0, b0, b1; \ -+ veor a0, a0, a2; veor b0, b0, b2; veor a3, a3, a4; veor b3, b3, b4; \ -+ veor a2, a2, a1; veor b2, b2, b1; veor a3, a3, a0; veor b3, b3, b0; \ -+ veor a3, a3, a1; veor b3, b3, b1;\ -+ vand a2, a2, a3; vand b2, b2, b3;\ -+ veor a4, a2; veor b4, b2; -+ -+#define SBOX1(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vmvn a0, a0; vmvn b0, b0; vmvn a2, a2; vmvn b2, b2; \ -+ vmov a4, a0; vmov b4, b0; vand a0, a0, a1; vand b0, b0, b1; \ -+ veor a2, a2, a0; veor b2, b2, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ -+ veor a3, a3, a2; veor b3, b3, b2; veor a1, a1, a0; veor b1, b1, b0; \ -+ veor a0, a0, a4; veor b0, b0, b4; vorr a4, a4, a1; vorr b4, b4, b1; \ -+ veor a1, a1, a3; veor b1, b1, b3; vorr a2, a2, a0; vorr b2, b2, b0; \ -+ vand a2, a2, a4; vand b2, b2, b4; veor a0, a0, a1; veor b0, b0, b1; \ -+ vand a1, a1, a2; vand b1, b1, b2;\ -+ veor a1, a1, a0; veor b1, b1, b0; vand a0, a0, a2; vand b0, b0, b2; \ -+ veor a0, a4; veor b0, b4; -+ -+#define SBOX1_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vmov a4, a1; vmov b4, b1; veor a1, a1, a3; veor b1, b1, b3; \ -+ vand a3, a3, a1; vand b3, b3, b1; veor a4, a4, a2; veor b4, b4, b2; \ -+ veor a3, a3, a0; veor b3, b3, b0; vorr a0, a0, a1; vorr b0, b0, b1; \ -+ veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a4; veor b0, b0, b4; \ -+ vorr a0, a0, a2; vorr b0, b0, b2; veor a1, a1, a3; veor b1, b1, b3; \ -+ veor a0, a0, a1; veor b0, b0, b1; vorr a1, a1, a3; vorr b1, b1, b3; \ -+ veor a1, a1, a0; veor b1, b1, b0; vmvn a4, a4; vmvn b4, b4; \ -+ veor a4, a4, a1; veor b4, b4, b1; vorr a1, a1, a0; vorr b1, b1, b0; \ -+ veor a1, a1, a0; veor b1, b1, b0;\ -+ vorr a1, a1, a4; vorr b1, b1, b4;\ -+ veor a3, a1; veor b3, b1; -+ -+#define SBOX2(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vmov a4, a0; vmov b4, b0; vand a0, a0, a2; vand b0, b0, b2; \ -+ veor a0, a0, a3; veor b0, b0, b3; veor a2, a2, a1; veor b2, b2, b1; \ -+ veor a2, a2, a0; veor b2, b2, b0; vorr a3, a3, a4; vorr b3, b3, b4; \ -+ veor a3, a3, a1; veor b3, b3, b1; veor a4, a4, a2; veor b4, b4, b2; \ -+ vmov a1, a3; vmov b1, b3; vorr a3, a3, a4; vorr b3, b3, b4; \ -+ veor a3, a3, a0; veor b3, b3, b0; vand a0, a0, a1; vand b0, b0, b1; \ -+ veor a4, a4, a0; veor b4, b4, b0; veor a1, a1, a3; veor b1, b1, b3; \ -+ veor a1, a1, a4; veor b1, b1, b4; vmvn a4, a4; vmvn b4, b4; -+ -+#define SBOX2_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ veor a2, a2, a3; veor b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \ -+ vmov a4, a3; vmov b4, b3; vand a3, a3, a2; vand b3, b3, b2; \ -+ veor a3, a3, a1; veor b3, b3, b1; vorr a1, a1, a2; vorr b1, b1, b2; \ -+ veor a1, a1, a4; veor b1, b1, b4; vand a4, a4, a3; vand b4, b4, b3; \ -+ veor a2, a2, a3; veor b2, b2, b3; vand a4, a4, a0; vand b4, b4, b0; \ -+ veor a4, a4, a2; veor b4, b4, b2; vand a2, a2, a1; vand b2, b2, b1; \ -+ vorr a2, a2, a0; vorr b2, b2, b0; vmvn a3, a3; vmvn b3, b3; \ -+ veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a3; veor b0, b0, b3; \ -+ vand a0, a0, a1; vand b0, b0, b1; veor a3, a3, a4; veor b3, b3, b4; \ -+ veor a3, a0; veor b3, b0; -+ -+#define SBOX3(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vmov a4, a0; vmov b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ -+ veor a3, a3, a1; veor b3, b3, b1; vand a1, a1, a4; vand b1, b1, b4; \ -+ veor a4, a4, a2; veor b4, b4, b2; veor a2, a2, a3; veor b2, b2, b3; \ -+ vand a3, a3, a0; vand b3, b3, b0; vorr a4, a4, a1; vorr b4, b4, b1; \ -+ veor a3, a3, a4; veor b3, b3, b4; veor a0, a0, a1; veor b0, b0, b1; \ -+ vand a4, a4, a0; vand b4, b4, b0; veor a1, a1, a3; veor b1, b1, b3; \ -+ veor a4, a4, a2; veor b4, b4, b2; vorr a1, a1, a0; vorr b1, b1, b0; \ -+ veor a1, a1, a2; veor b1, b1, b2; veor a0, a0, a3; veor b0, b0, b3; \ -+ vmov a2, a1; vmov b2, b1; vorr a1, a1, a3; vorr b1, b1, b3; \ -+ veor a1, a0; veor b1, b0; -+ -+#define SBOX3_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vmov a4, a2; vmov b4, b2; veor a2, a2, a1; veor b2, b2, b1; \ -+ veor a0, a0, a2; veor b0, b0, b2; vand a4, a4, a2; vand b4, b4, b2; \ -+ veor a4, a4, a0; veor b4, b4, b0; vand a0, a0, a1; vand b0, b0, b1; \ -+ veor a1, a1, a3; veor b1, b1, b3; vorr a3, a3, a4; vorr b3, b3, b4; \ -+ veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a3; veor b0, b0, b3; \ -+ veor a1, a1, a4; veor b1, b1, b4; vand a3, a3, a2; vand b3, b3, b2; \ -+ veor a3, a3, a1; veor b3, b3, b1; veor a1, a1, a0; veor b1, b1, b0; \ -+ vorr a1, a1, a2; vorr b1, b1, b2; veor a0, a0, a3; veor b0, b0, b3; \ -+ veor a1, a1, a4; veor b1, b1, b4;\ -+ veor a0, a1; veor b0, b1; -+ -+#define SBOX4(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ veor a1, a1, a3; veor b1, b1, b3; vmvn a3, a3; vmvn b3, b3; \ -+ veor a2, a2, a3; veor b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \ -+ vmov a4, a1; vmov b4, b1; vand a1, a1, a3; vand b1, b1, b3; \ -+ veor a1, a1, a2; veor b1, b1, b2; veor a4, a4, a3; veor b4, b4, b3; \ -+ veor a0, a0, a4; veor b0, b0, b4; vand a2, a2, a4; vand b2, b2, b4; \ -+ veor a2, a2, a0; veor b2, b2, b0; vand a0, a0, a1; vand b0, b0, b1; \ -+ veor a3, a3, a0; veor b3, b3, b0; vorr a4, a4, a1; vorr b4, b4, b1; \ -+ veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ -+ veor a0, a0, a2; veor b0, b0, b2; vand a2, a2, a3; vand b2, b2, b3; \ -+ vmvn a0, a0; vmvn b0, b0; veor a4, a2; veor b4, b2; -+ -+#define SBOX4_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vmov a4, a2; vmov b4, b2; vand a2, a2, a3; vand b2, b2, b3; \ -+ veor a2, a2, a1; veor b2, b2, b1; vorr a1, a1, a3; vorr b1, b1, b3; \ -+ vand a1, a1, a0; vand b1, b1, b0; veor a4, a4, a2; veor b4, b4, b2; \ -+ veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a2; vand b1, b1, b2; \ -+ vmvn a0, a0; vmvn b0, b0; veor a3, a3, a4; veor b3, b3, b4; \ -+ veor a1, a1, a3; veor b1, b1, b3; vand a3, a3, a0; vand b3, b3, b0; \ -+ veor a3, a3, a2; veor b3, b3, b2; veor a0, a0, a1; veor b0, b0, b1; \ -+ vand a2, a2, a0; vand b2, b2, b0; veor a3, a3, a0; veor b3, b3, b0; \ -+ veor a2, a2, a4; veor b2, b2, b4;\ -+ vorr a2, a2, a3; vorr b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \ -+ veor a2, a1; veor b2, b1; -+ -+#define SBOX5(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ veor a0, a0, a1; veor b0, b0, b1; veor a1, a1, a3; veor b1, b1, b3; \ -+ vmvn a3, a3; vmvn b3, b3; vmov a4, a1; vmov b4, b1; \ -+ vand a1, a1, a0; vand b1, b1, b0; veor a2, a2, a3; veor b2, b2, b3; \ -+ veor a1, a1, a2; veor b1, b1, b2; vorr a2, a2, a4; vorr b2, b2, b4; \ -+ veor a4, a4, a3; veor b4, b4, b3; vand a3, a3, a1; vand b3, b3, b1; \ -+ veor a3, a3, a0; veor b3, b3, b0; veor a4, a4, a1; veor b4, b4, b1; \ -+ veor a4, a4, a2; veor b4, b4, b2; veor a2, a2, a0; veor b2, b2, b0; \ -+ vand a0, a0, a3; vand b0, b0, b3; vmvn a2, a2; vmvn b2, b2; \ -+ veor a0, a0, a4; veor b0, b0, b4; vorr a4, a4, a3; vorr b4, b4, b3; \ -+ veor a2, a4; veor b2, b4; -+ -+#define SBOX5_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vmvn a1, a1; vmvn b1, b1; vmov a4, a3; vmov b4, b3; \ -+ veor a2, a2, a1; veor b2, b2, b1; vorr a3, a3, a0; vorr b3, b3, b0; \ -+ veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a1; vorr b2, b2, b1; \ -+ vand a2, a2, a0; vand b2, b2, b0; veor a4, a4, a3; veor b4, b4, b3; \ -+ veor a2, a2, a4; veor b2, b2, b4; vorr a4, a4, a0; vorr b4, b4, b0; \ -+ veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a2; vand b1, b1, b2; \ -+ veor a1, a1, a3; veor b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \ -+ vand a3, a3, a4; vand b3, b3, b4; veor a4, a4, a1; veor b4, b4, b1; \ -+ veor a3, a3, a4; veor b3, b3, b4; vmvn a4, a4; vmvn b4, b4; \ -+ veor a3, a0; veor b3, b0; -+ -+#define SBOX6(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vmvn a2, a2; vmvn b2, b2; vmov a4, a3; vmov b4, b3; \ -+ vand a3, a3, a0; vand b3, b3, b0; veor a0, a0, a4; veor b0, b0, b4; \ -+ veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a4; vorr b2, b2, b4; \ -+ veor a1, a1, a3; veor b1, b1, b3; veor a2, a2, a0; veor b2, b2, b0; \ -+ vorr a0, a0, a1; vorr b0, b0, b1; veor a2, a2, a1; veor b2, b2, b1; \ -+ veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ -+ veor a0, a0, a2; veor b0, b0, b2; veor a4, a4, a3; veor b4, b4, b3; \ -+ veor a4, a4, a0; veor b4, b4, b0; vmvn a3, a3; vmvn b3, b3; \ -+ vand a2, a2, a4; vand b2, b2, b4;\ -+ veor a2, a3; veor b2, b3; -+ -+#define SBOX6_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ veor a0, a0, a2; veor b0, b0, b2; vmov a4, a2; vmov b4, b2; \ -+ vand a2, a2, a0; vand b2, b2, b0; veor a4, a4, a3; veor b4, b4, b3; \ -+ vmvn a2, a2; vmvn b2, b2; veor a3, a3, a1; veor b3, b3, b1; \ -+ veor a2, a2, a3; veor b2, b2, b3; vorr a4, a4, a0; vorr b4, b4, b0; \ -+ veor a0, a0, a2; veor b0, b0, b2; veor a3, a3, a4; veor b3, b3, b4; \ -+ veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a3; vand b1, b1, b3; \ -+ veor a1, a1, a0; veor b1, b1, b0; veor a0, a0, a3; veor b0, b0, b3; \ -+ vorr a0, a0, a2; vorr b0, b0, b2; veor a3, a3, a1; veor b3, b3, b1; \ -+ veor a4, a0; veor b4, b0; -+ -+#define SBOX7(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vmov a4, a1; vmov b4, b1; vorr a1, a1, a2; vorr b1, b1, b2; \ -+ veor a1, a1, a3; veor b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \ -+ veor a2, a2, a1; veor b2, b2, b1; vorr a3, a3, a4; vorr b3, b3, b4; \ -+ vand a3, a3, a0; vand b3, b3, b0; veor a4, a4, a2; veor b4, b4, b2; \ -+ veor a3, a3, a1; veor b3, b3, b1; vorr a1, a1, a4; vorr b1, b1, b4; \ -+ veor a1, a1, a0; veor b1, b1, b0; vorr a0, a0, a4; vorr b0, b0, b4; \ -+ veor a0, a0, a2; veor b0, b0, b2; veor a1, a1, a4; veor b1, b1, b4; \ -+ veor a2, a2, a1; veor b2, b2, b1; vand a1, a1, a0; vand b1, b1, b0; \ -+ veor a1, a1, a4; veor b1, b1, b4; vmvn a2, a2; vmvn b2, b2; \ -+ vorr a2, a2, a0; vorr b2, b2, b0;\ -+ veor a4, a2; veor b4, b2; -+ -+#define SBOX7_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vmov a4, a2; vmov b4, b2; veor a2, a2, a0; veor b2, b2, b0; \ -+ vand a0, a0, a3; vand b0, b0, b3; vorr a4, a4, a3; vorr b4, b4, b3; \ -+ vmvn a2, a2; vmvn b2, b2; veor a3, a3, a1; veor b3, b3, b1; \ -+ vorr a1, a1, a0; vorr b1, b1, b0; veor a0, a0, a2; veor b0, b0, b2; \ -+ vand a2, a2, a4; vand b2, b2, b4; vand a3, a3, a4; vand b3, b3, b4; \ -+ veor a1, a1, a2; veor b1, b1, b2; veor a2, a2, a0; veor b2, b2, b0; \ -+ vorr a0, a0, a2; vorr b0, b0, b2; veor a4, a4, a1; veor b4, b4, b1; \ -+ veor a0, a0, a3; veor b0, b0, b3; veor a3, a3, a4; veor b3, b3, b4; \ -+ vorr a4, a4, a0; vorr b4, b4, b0; veor a3, a3, a2; veor b3, b3, b2; \ -+ veor a4, a2; veor b4, b2; -+ -+/* Apply SBOX number WHICH to to the block. */ -+#define SBOX(which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ SBOX##which (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) -+ -+/* Apply inverse SBOX number WHICH to to the block. */ -+#define SBOX_INVERSE(which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ SBOX##which##_INVERSE (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) -+ -+/* XOR round key into block state in a0,a1,a2,a3. a4 used as temporary. */ -+#define BLOCK_XOR_KEY(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vdup.32 RT3, RT0d0[0]; \ -+ vdup.32 RT1, RT0d0[1]; \ -+ vdup.32 RT2, RT0d1[0]; \ -+ vdup.32 RT0, RT0d1[1]; \ -+ veor a0, a0, RT3; veor b0, b0, RT3; \ -+ veor a1, a1, RT1; veor b1, b1, RT1; \ -+ veor a2, a2, RT2; veor b2, b2, RT2; \ -+ veor a3, a3, RT0; veor b3, b3, RT0; -+ -+#define BLOCK_LOAD_KEY_ENC() \ -+ vld1.8 {RT0d0, RT0d1}, [RROUND]!; -+ -+#define BLOCK_LOAD_KEY_DEC() \ -+ vld1.8 {RT0d0, RT0d1}, [RROUND]; \ -+ sub RROUND, RROUND, #16 -+ -+/* Apply the linear transformation to BLOCK. */ -+#define LINEAR_TRANSFORMATION(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vshl.u32 a4, a0, #13; vshl.u32 b4, b0, #13; \ -+ vshr.u32 a0, a0, #(32-13); vshr.u32 b0, b0, #(32-13); \ -+ veor a0, a0, a4; veor b0, b0, b4; \ -+ vshl.u32 a4, a2, #3; vshl.u32 b4, b2, #3; \ -+ vshr.u32 a2, a2, #(32-3); vshr.u32 b2, b2, #(32-3); \ -+ veor a2, a2, a4; veor b2, b2, b4; \ -+ veor a1, a0, a1; veor b1, b0, b1; \ -+ veor a1, a2, a1; veor b1, b2, b1; \ -+ vshl.u32 a4, a0, #3; vshl.u32 b4, b0, #3; \ -+ veor a3, a2, a3; veor b3, b2, b3; \ -+ veor a3, a4, a3; veor b3, b4, b3; \ -+ vshl.u32 a4, a1, #1; vshl.u32 b4, b1, #1; \ -+ vshr.u32 a1, a1, #(32-1); vshr.u32 b1, b1, #(32-1); \ -+ veor a1, a1, a4; veor b1, b1, b4; \ -+ vshl.u32 a4, a3, #7; vshl.u32 b4, b3, #7; \ -+ vshr.u32 a3, a3, #(32-7); vshr.u32 b3, b3, #(32-7); \ -+ veor a3, a3, a4; veor b3, b3, b4; \ -+ veor a0, a1, a0; veor b0, b1, b0; \ -+ veor a0, a3, a0; veor b0, b3, b0; \ -+ vshl.u32 a4, a1, #7; vshl.u32 b4, b1, #7; \ -+ veor a2, a3, a2; veor b2, b3, b2; \ -+ veor a2, a4, a2; veor b2, b4, b2; \ -+ vshl.u32 a4, a0, #5; vshl.u32 b4, b0, #5; \ -+ vshr.u32 a0, a0, #(32-5); vshr.u32 b0, b0, #(32-5); \ -+ veor a0, a0, a4; veor b0, b0, b4; \ -+ vshl.u32 a4, a2, #22; vshl.u32 b4, b2, #22; \ -+ vshr.u32 a2, a2, #(32-22); vshr.u32 b2, b2, #(32-22); \ -+ veor a2, a2, a4; veor b2, b2, b4; -+ -+/* Apply the inverse linear transformation to BLOCK. */ -+#define LINEAR_TRANSFORMATION_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ -+ vshr.u32 a4, a2, #22; vshr.u32 b4, b2, #22; \ -+ vshl.u32 a2, a2, #(32-22); vshl.u32 b2, b2, #(32-22); \ -+ veor a2, a2, a4; veor b2, b2, b4; \ -+ vshr.u32 a4, a0, #5; vshr.u32 b4, b0, #5; \ -+ vshl.u32 a0, a0, #(32-5); vshl.u32 b0, b0, #(32-5); \ -+ veor a0, a0, a4; veor b0, b0, b4; \ -+ vshl.u32 a4, a1, #7; vshl.u32 b4, b1, #7; \ -+ veor a2, a3, a2; veor b2, b3, b2; \ -+ veor a2, a4, a2; veor b2, b4, b2; \ -+ veor a0, a1, a0; veor b0, b1, b0; \ -+ veor a0, a3, a0; veor b0, b3, b0; \ -+ vshr.u32 a4, a3, #7; vshr.u32 b4, b3, #7; \ -+ vshl.u32 a3, a3, #(32-7); vshl.u32 b3, b3, #(32-7); \ -+ veor a3, a3, a4; veor b3, b3, b4; \ -+ vshr.u32 a4, a1, #1; vshr.u32 b4, b1, #1; \ -+ vshl.u32 a1, a1, #(32-1); vshl.u32 b1, b1, #(32-1); \ -+ veor a1, a1, a4; veor b1, b1, b4; \ -+ vshl.u32 a4, a0, #3; vshl.u32 b4, b0, #3; \ -+ veor a3, a2, a3; veor b3, b2, b3; \ -+ veor a3, a4, a3; veor b3, b4, b3; \ -+ veor a1, a0, a1; veor b1, b0, b1; \ -+ veor a1, a2, a1; veor b1, b2, b1; \ -+ vshr.u32 a4, a2, #3; vshr.u32 b4, b2, #3; \ -+ vshl.u32 a2, a2, #(32-3); vshl.u32 b2, b2, #(32-3); \ -+ veor a2, a2, a4; veor b2, b2, b4; \ -+ vshr.u32 a4, a0, #13; vshr.u32 b4, b0, #13; \ -+ vshl.u32 a0, a0, #(32-13); vshl.u32 b0, b0, #(32-13); \ -+ veor a0, a0, a4; veor b0, b0, b4; -+ -+/* Apply a Serpent round to eight parallel blocks. This macro increments -+ `round'. */ -+#define ROUND(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \ -+ b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \ -+ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ -+ BLOCK_LOAD_KEY_ENC (); \ -+ SBOX (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ -+ LINEAR_TRANSFORMATION (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); -+ -+/* Apply the last Serpent round to eight parallel blocks. This macro increments -+ `round'. */ -+#define ROUND_LAST(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \ -+ b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \ -+ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ -+ BLOCK_LOAD_KEY_ENC (); \ -+ SBOX (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ -+ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); -+ -+/* Apply an inverse Serpent round to eight parallel blocks. This macro -+ increments `round'. */ -+#define ROUND_INVERSE(round, which, a0, a1, a2, a3, a4, \ -+ na0, na1, na2, na3, na4, \ -+ b0, b1, b2, b3, b4, \ -+ nb0, nb1, nb2, nb3, nb4) \ -+ LINEAR_TRANSFORMATION_INVERSE (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ -+ SBOX_INVERSE (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ -+ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); \ -+ BLOCK_LOAD_KEY_DEC (); -+ -+/* Apply the first inverse Serpent round to eight parallel blocks. This macro -+ increments `round'. */ -+#define ROUND_FIRST_INVERSE(round, which, a0, a1, a2, a3, a4, \ -+ na0, na1, na2, na3, na4, \ -+ b0, b1, b2, b3, b4, \ -+ nb0, nb1, nb2, nb3, nb4) \ -+ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ -+ BLOCK_LOAD_KEY_DEC (); \ -+ SBOX_INVERSE (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ -+ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); \ -+ BLOCK_LOAD_KEY_DEC (); -+ -+.align 3 -+.type __serpent_enc_blk8,%function; -+__serpent_enc_blk8: -+ /* input: -+ * r0: round key pointer -+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext -+ * blocks -+ * output: -+ * RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: eight parallel -+ * ciphertext blocks -+ */ -+ -+ transpose_4x4(RA0, RA1, RA2, RA3); -+ BLOCK_LOAD_KEY_ENC (); -+ transpose_4x4(RB0, RB1, RB2, RB3); -+ -+ ROUND (0, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3, -+ RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3); -+ ROUND (1, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3, -+ RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3); -+ ROUND (2, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2, -+ RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2); -+ ROUND (3, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0, -+ RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0); -+ ROUND (4, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3, -+ RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3); -+ ROUND (5, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3, -+ RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3); -+ ROUND (6, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4, -+ RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4); -+ ROUND (7, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3, -+ RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3); -+ ROUND (8, 0, RA4, RA1, RA2, RA0, RA3, RA1, RA3, RA2, RA4, RA0, -+ RB4, RB1, RB2, RB0, RB3, RB1, RB3, RB2, RB4, RB0); -+ ROUND (9, 1, RA1, RA3, RA2, RA4, RA0, RA2, RA1, RA4, RA3, RA0, -+ RB1, RB3, RB2, RB4, RB0, RB2, RB1, RB4, RB3, RB0); -+ ROUND (10, 2, RA2, RA1, RA4, RA3, RA0, RA4, RA3, RA1, RA0, RA2, -+ RB2, RB1, RB4, RB3, RB0, RB4, RB3, RB1, RB0, RB2); -+ ROUND (11, 3, RA4, RA3, RA1, RA0, RA2, RA3, RA1, RA0, RA2, RA4, -+ RB4, RB3, RB1, RB0, RB2, RB3, RB1, RB0, RB2, RB4); -+ ROUND (12, 4, RA3, RA1, RA0, RA2, RA4, RA1, RA4, RA3, RA2, RA0, -+ RB3, RB1, RB0, RB2, RB4, RB1, RB4, RB3, RB2, RB0); -+ ROUND (13, 5, RA1, RA4, RA3, RA2, RA0, RA4, RA2, RA1, RA3, RA0, -+ RB1, RB4, RB3, RB2, RB0, RB4, RB2, RB1, RB3, RB0); -+ ROUND (14, 6, RA4, RA2, RA1, RA3, RA0, RA4, RA2, RA0, RA1, RA3, -+ RB4, RB2, RB1, RB3, RB0, RB4, RB2, RB0, RB1, RB3); -+ ROUND (15, 7, RA4, RA2, RA0, RA1, RA3, RA3, RA1, RA2, RA4, RA0, -+ RB4, RB2, RB0, RB1, RB3, RB3, RB1, RB2, RB4, RB0); -+ ROUND (16, 0, RA3, RA1, RA2, RA4, RA0, RA1, RA0, RA2, RA3, RA4, -+ RB3, RB1, RB2, RB4, RB0, RB1, RB0, RB2, RB3, RB4); -+ ROUND (17, 1, RA1, RA0, RA2, RA3, RA4, RA2, RA1, RA3, RA0, RA4, -+ RB1, RB0, RB2, RB3, RB4, RB2, RB1, RB3, RB0, RB4); -+ ROUND (18, 2, RA2, RA1, RA3, RA0, RA4, RA3, RA0, RA1, RA4, RA2, -+ RB2, RB1, RB3, RB0, RB4, RB3, RB0, RB1, RB4, RB2); -+ ROUND (19, 3, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA4, RA2, RA3, -+ RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB4, RB2, RB3); -+ ROUND (20, 4, RA0, RA1, RA4, RA2, RA3, RA1, RA3, RA0, RA2, RA4, -+ RB0, RB1, RB4, RB2, RB3, RB1, RB3, RB0, RB2, RB4); -+ ROUND (21, 5, RA1, RA3, RA0, RA2, RA4, RA3, RA2, RA1, RA0, RA4, -+ RB1, RB3, RB0, RB2, RB4, RB3, RB2, RB1, RB0, RB4); -+ ROUND (22, 6, RA3, RA2, RA1, RA0, RA4, RA3, RA2, RA4, RA1, RA0, -+ RB3, RB2, RB1, RB0, RB4, RB3, RB2, RB4, RB1, RB0); -+ ROUND (23, 7, RA3, RA2, RA4, RA1, RA0, RA0, RA1, RA2, RA3, RA4, -+ RB3, RB2, RB4, RB1, RB0, RB0, RB1, RB2, RB3, RB4); -+ ROUND (24, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3, -+ RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3); -+ ROUND (25, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3, -+ RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3); -+ ROUND (26, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2, -+ RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2); -+ ROUND (27, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0, -+ RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0); -+ ROUND (28, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3, -+ RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3); -+ ROUND (29, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3, -+ RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3); -+ ROUND (30, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4, -+ RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4); -+ ROUND_LAST (31, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3, -+ RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3); -+ -+ transpose_4x4(RA4, RA1, RA2, RA0); -+ transpose_4x4(RB4, RB1, RB2, RB0); -+ -+ bx lr; -+.size __serpent_enc_blk8,.-__serpent_enc_blk8; -+ -+.align 3 -+.type __serpent_dec_blk8,%function; -+__serpent_dec_blk8: -+ /* input: -+ * r0: round key pointer -+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel -+ * ciphertext blocks -+ * output: -+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext -+ * blocks -+ */ -+ -+ add RROUND, RROUND, #(32*16); -+ -+ transpose_4x4(RA0, RA1, RA2, RA3); -+ BLOCK_LOAD_KEY_DEC (); -+ transpose_4x4(RB0, RB1, RB2, RB3); -+ -+ ROUND_FIRST_INVERSE (31, 7, RA0, RA1, RA2, RA3, RA4, -+ RA3, RA0, RA1, RA4, RA2, -+ RB0, RB1, RB2, RB3, RB4, -+ RB3, RB0, RB1, RB4, RB2); -+ ROUND_INVERSE (30, 6, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA2, RA4, RA3, -+ RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB2, RB4, RB3); -+ ROUND_INVERSE (29, 5, RA0, RA1, RA2, RA4, RA3, RA1, RA3, RA4, RA2, RA0, -+ RB0, RB1, RB2, RB4, RB3, RB1, RB3, RB4, RB2, RB0); -+ ROUND_INVERSE (28, 4, RA1, RA3, RA4, RA2, RA0, RA1, RA2, RA4, RA0, RA3, -+ RB1, RB3, RB4, RB2, RB0, RB1, RB2, RB4, RB0, RB3); -+ ROUND_INVERSE (27, 3, RA1, RA2, RA4, RA0, RA3, RA4, RA2, RA0, RA1, RA3, -+ RB1, RB2, RB4, RB0, RB3, RB4, RB2, RB0, RB1, RB3); -+ ROUND_INVERSE (26, 2, RA4, RA2, RA0, RA1, RA3, RA2, RA3, RA0, RA1, RA4, -+ RB4, RB2, RB0, RB1, RB3, RB2, RB3, RB0, RB1, RB4); -+ ROUND_INVERSE (25, 1, RA2, RA3, RA0, RA1, RA4, RA4, RA2, RA1, RA0, RA3, -+ RB2, RB3, RB0, RB1, RB4, RB4, RB2, RB1, RB0, RB3); -+ ROUND_INVERSE (24, 0, RA4, RA2, RA1, RA0, RA3, RA4, RA3, RA2, RA0, RA1, -+ RB4, RB2, RB1, RB0, RB3, RB4, RB3, RB2, RB0, RB1); -+ ROUND_INVERSE (23, 7, RA4, RA3, RA2, RA0, RA1, RA0, RA4, RA3, RA1, RA2, -+ RB4, RB3, RB2, RB0, RB1, RB0, RB4, RB3, RB1, RB2); -+ ROUND_INVERSE (22, 6, RA0, RA4, RA3, RA1, RA2, RA4, RA3, RA2, RA1, RA0, -+ RB0, RB4, RB3, RB1, RB2, RB4, RB3, RB2, RB1, RB0); -+ ROUND_INVERSE (21, 5, RA4, RA3, RA2, RA1, RA0, RA3, RA0, RA1, RA2, RA4, -+ RB4, RB3, RB2, RB1, RB0, RB3, RB0, RB1, RB2, RB4); -+ ROUND_INVERSE (20, 4, RA3, RA0, RA1, RA2, RA4, RA3, RA2, RA1, RA4, RA0, -+ RB3, RB0, RB1, RB2, RB4, RB3, RB2, RB1, RB4, RB0); -+ ROUND_INVERSE (19, 3, RA3, RA2, RA1, RA4, RA0, RA1, RA2, RA4, RA3, RA0, -+ RB3, RB2, RB1, RB4, RB0, RB1, RB2, RB4, RB3, RB0); -+ ROUND_INVERSE (18, 2, RA1, RA2, RA4, RA3, RA0, RA2, RA0, RA4, RA3, RA1, -+ RB1, RB2, RB4, RB3, RB0, RB2, RB0, RB4, RB3, RB1); -+ ROUND_INVERSE (17, 1, RA2, RA0, RA4, RA3, RA1, RA1, RA2, RA3, RA4, RA0, -+ RB2, RB0, RB4, RB3, RB1, RB1, RB2, RB3, RB4, RB0); -+ ROUND_INVERSE (16, 0, RA1, RA2, RA3, RA4, RA0, RA1, RA0, RA2, RA4, RA3, -+ RB1, RB2, RB3, RB4, RB0, RB1, RB0, RB2, RB4, RB3); -+ ROUND_INVERSE (15, 7, RA1, RA0, RA2, RA4, RA3, RA4, RA1, RA0, RA3, RA2, -+ RB1, RB0, RB2, RB4, RB3, RB4, RB1, RB0, RB3, RB2); -+ ROUND_INVERSE (14, 6, RA4, RA1, RA0, RA3, RA2, RA1, RA0, RA2, RA3, RA4, -+ RB4, RB1, RB0, RB3, RB2, RB1, RB0, RB2, RB3, RB4); -+ ROUND_INVERSE (13, 5, RA1, RA0, RA2, RA3, RA4, RA0, RA4, RA3, RA2, RA1, -+ RB1, RB0, RB2, RB3, RB4, RB0, RB4, RB3, RB2, RB1); -+ ROUND_INVERSE (12, 4, RA0, RA4, RA3, RA2, RA1, RA0, RA2, RA3, RA1, RA4, -+ RB0, RB4, RB3, RB2, RB1, RB0, RB2, RB3, RB1, RB4); -+ ROUND_INVERSE (11, 3, RA0, RA2, RA3, RA1, RA4, RA3, RA2, RA1, RA0, RA4, -+ RB0, RB2, RB3, RB1, RB4, RB3, RB2, RB1, RB0, RB4); -+ ROUND_INVERSE (10, 2, RA3, RA2, RA1, RA0, RA4, RA2, RA4, RA1, RA0, RA3, -+ RB3, RB2, RB1, RB0, RB4, RB2, RB4, RB1, RB0, RB3); -+ ROUND_INVERSE (9, 1, RA2, RA4, RA1, RA0, RA3, RA3, RA2, RA0, RA1, RA4, -+ RB2, RB4, RB1, RB0, RB3, RB3, RB2, RB0, RB1, RB4); -+ ROUND_INVERSE (8, 0, RA3, RA2, RA0, RA1, RA4, RA3, RA4, RA2, RA1, RA0, -+ RB3, RB2, RB0, RB1, RB4, RB3, RB4, RB2, RB1, RB0); -+ ROUND_INVERSE (7, 7, RA3, RA4, RA2, RA1, RA0, RA1, RA3, RA4, RA0, RA2, -+ RB3, RB4, RB2, RB1, RB0, RB1, RB3, RB4, RB0, RB2); -+ ROUND_INVERSE (6, 6, RA1, RA3, RA4, RA0, RA2, RA3, RA4, RA2, RA0, RA1, -+ RB1, RB3, RB4, RB0, RB2, RB3, RB4, RB2, RB0, RB1); -+ ROUND_INVERSE (5, 5, RA3, RA4, RA2, RA0, RA1, RA4, RA1, RA0, RA2, RA3, -+ RB3, RB4, RB2, RB0, RB1, RB4, RB1, RB0, RB2, RB3); -+ ROUND_INVERSE (4, 4, RA4, RA1, RA0, RA2, RA3, RA4, RA2, RA0, RA3, RA1, -+ RB4, RB1, RB0, RB2, RB3, RB4, RB2, RB0, RB3, RB1); -+ ROUND_INVERSE (3, 3, RA4, RA2, RA0, RA3, RA1, RA0, RA2, RA3, RA4, RA1, -+ RB4, RB2, RB0, RB3, RB1, RB0, RB2, RB3, RB4, RB1); -+ ROUND_INVERSE (2, 2, RA0, RA2, RA3, RA4, RA1, RA2, RA1, RA3, RA4, RA0, -+ RB0, RB2, RB3, RB4, RB1, RB2, RB1, RB3, RB4, RB0); -+ ROUND_INVERSE (1, 1, RA2, RA1, RA3, RA4, RA0, RA0, RA2, RA4, RA3, RA1, -+ RB2, RB1, RB3, RB4, RB0, RB0, RB2, RB4, RB3, RB1); -+ ROUND_INVERSE (0, 0, RA0, RA2, RA4, RA3, RA1, RA0, RA1, RA2, RA3, RA4, -+ RB0, RB2, RB4, RB3, RB1, RB0, RB1, RB2, RB3, RB4); -+ -+ transpose_4x4(RA0, RA1, RA2, RA3); -+ transpose_4x4(RB0, RB1, RB2, RB3); -+ -+ bx lr; -+.size __serpent_dec_blk8,.-__serpent_dec_blk8; -+ -+.align 3 -+.globl _gcry_serpent_neon_ctr_enc -+.type _gcry_serpent_neon_ctr_enc,%function; -+_gcry_serpent_neon_ctr_enc: -+ /* input: -+ * r0: ctx, CTX -+ * r1: dst (8 blocks) -+ * r2: src (8 blocks) -+ * r3: iv -+ */ -+ -+ vmov.u8 RT1d0, #0xff; /* u64: -1 */ -+ push {r4,lr}; -+ vadd.u64 RT2d0, RT1d0, RT1d0; /* u64: -2 */ -+ vpush {RA4-RB2}; -+ -+ /* load IV and byteswap */ -+ vld1.8 {RA0}, [r3]; -+ vrev64.u8 RT0, RA0; /* be => le */ -+ ldr r4, [r3, #8]; -+ -+ /* construct IVs */ -+ vsub.u64 RA2d1, RT0d1, RT2d0; /* +2 */ -+ vsub.u64 RA1d1, RT0d1, RT1d0; /* +1 */ -+ cmp r4, #-1; -+ -+ vsub.u64 RB0d1, RA2d1, RT2d0; /* +4 */ -+ vsub.u64 RA3d1, RA2d1, RT1d0; /* +3 */ -+ ldr r4, [r3, #12]; -+ -+ vsub.u64 RB2d1, RB0d1, RT2d0; /* +6 */ -+ vsub.u64 RB1d1, RB0d1, RT1d0; /* +5 */ -+ -+ vsub.u64 RT2d1, RB2d1, RT2d0; /* +8 */ -+ vsub.u64 RB3d1, RB2d1, RT1d0; /* +7 */ -+ -+ vmov RA1d0, RT0d0; -+ vmov RA2d0, RT0d0; -+ vmov RA3d0, RT0d0; -+ vmov RB0d0, RT0d0; -+ rev r4, r4; -+ vmov RB1d0, RT0d0; -+ vmov RB2d0, RT0d0; -+ vmov RB3d0, RT0d0; -+ vmov RT2d0, RT0d0; -+ -+ /* check need for handling 64-bit overflow and carry */ -+ beq .Ldo_ctr_carry; -+ -+.Lctr_carry_done: -+ /* le => be */ -+ vrev64.u8 RA1, RA1; -+ vrev64.u8 RA2, RA2; -+ vrev64.u8 RA3, RA3; -+ vrev64.u8 RB0, RB0; -+ vrev64.u8 RT2, RT2; -+ vrev64.u8 RB1, RB1; -+ vrev64.u8 RB2, RB2; -+ vrev64.u8 RB3, RB3; -+ /* store new IV */ -+ vst1.8 {RT2}, [r3]; -+ -+ bl __serpent_enc_blk8; -+ -+ vld1.8 {RT0, RT1}, [r2]!; -+ vld1.8 {RT2, RT3}, [r2]!; -+ veor RA4, RA4, RT0; -+ veor RA1, RA1, RT1; -+ vld1.8 {RT0, RT1}, [r2]!; -+ veor RA2, RA2, RT2; -+ veor RA0, RA0, RT3; -+ vld1.8 {RT2, RT3}, [r2]!; -+ veor RB4, RB4, RT0; -+ veor RT0, RT0; -+ veor RB1, RB1, RT1; -+ veor RT1, RT1; -+ veor RB2, RB2, RT2; -+ veor RT2, RT2; -+ veor RB0, RB0, RT3; -+ veor RT3, RT3; -+ -+ vst1.8 {RA4}, [r1]!; -+ vst1.8 {RA1}, [r1]!; -+ veor RA1, RA1; -+ vst1.8 {RA2}, [r1]!; -+ veor RA2, RA2; -+ vst1.8 {RA0}, [r1]!; -+ veor RA0, RA0; -+ vst1.8 {RB4}, [r1]!; -+ veor RB4, RB4; -+ vst1.8 {RB1}, [r1]!; -+ vst1.8 {RB2}, [r1]!; -+ vst1.8 {RB0}, [r1]!; -+ -+ vpop {RA4-RB2}; -+ -+ /* clear the used registers */ -+ veor RA3, RA3; -+ veor RB3, RB3; -+ -+ pop {r4,pc}; -+ -+.Ldo_ctr_carry: -+ cmp r4, #-8; -+ blo .Lctr_carry_done; -+ beq .Lcarry_RT2; -+ -+ cmp r4, #-6; -+ blo .Lcarry_RB3; -+ beq .Lcarry_RB2; -+ -+ cmp r4, #-4; -+ blo .Lcarry_RB1; -+ beq .Lcarry_RB0; -+ -+ cmp r4, #-2; -+ blo .Lcarry_RA3; -+ beq .Lcarry_RA2; -+ -+ vsub.u64 RA1d0, RT1d0; -+.Lcarry_RA2: -+ vsub.u64 RA2d0, RT1d0; -+.Lcarry_RA3: -+ vsub.u64 RA3d0, RT1d0; -+.Lcarry_RB0: -+ vsub.u64 RB0d0, RT1d0; -+.Lcarry_RB1: -+ vsub.u64 RB1d0, RT1d0; -+.Lcarry_RB2: -+ vsub.u64 RB2d0, RT1d0; -+.Lcarry_RB3: -+ vsub.u64 RB3d0, RT1d0; -+.Lcarry_RT2: -+ vsub.u64 RT2d0, RT1d0; -+ -+ b .Lctr_carry_done; -+.size _gcry_serpent_neon_ctr_enc,.-_gcry_serpent_neon_ctr_enc; -+ -+.align 3 -+.globl _gcry_serpent_neon_cfb_dec -+.type _gcry_serpent_neon_cfb_dec,%function; -+_gcry_serpent_neon_cfb_dec: -+ /* input: -+ * r0: ctx, CTX -+ * r1: dst (8 blocks) -+ * r2: src (8 blocks) -+ * r3: iv -+ */ -+ -+ push {lr}; -+ vpush {RA4-RB2}; -+ -+ /* Load input */ -+ vld1.8 {RA0}, [r3]; -+ vld1.8 {RA1, RA2}, [r2]!; -+ vld1.8 {RA3}, [r2]!; -+ vld1.8 {RB0}, [r2]!; -+ vld1.8 {RB1, RB2}, [r2]!; -+ vld1.8 {RB3}, [r2]!; -+ -+ /* Update IV */ -+ vld1.8 {RT0}, [r2]!; -+ vst1.8 {RT0}, [r3]; -+ mov r3, lr; -+ sub r2, r2, #(8*16); -+ -+ bl __serpent_enc_blk8; -+ -+ vld1.8 {RT0, RT1}, [r2]!; -+ vld1.8 {RT2, RT3}, [r2]!; -+ veor RA4, RA4, RT0; -+ veor RA1, RA1, RT1; -+ vld1.8 {RT0, RT1}, [r2]!; -+ veor RA2, RA2, RT2; -+ veor RA0, RA0, RT3; -+ vld1.8 {RT2, RT3}, [r2]!; -+ veor RB4, RB4, RT0; -+ veor RT0, RT0; -+ veor RB1, RB1, RT1; -+ veor RT1, RT1; -+ veor RB2, RB2, RT2; -+ veor RT2, RT2; -+ veor RB0, RB0, RT3; -+ veor RT3, RT3; -+ -+ vst1.8 {RA4}, [r1]!; -+ vst1.8 {RA1}, [r1]!; -+ veor RA1, RA1; -+ vst1.8 {RA2}, [r1]!; -+ veor RA2, RA2; -+ vst1.8 {RA0}, [r1]!; -+ veor RA0, RA0; -+ vst1.8 {RB4}, [r1]!; -+ veor RB4, RB4; -+ vst1.8 {RB1}, [r1]!; -+ vst1.8 {RB2}, [r1]!; -+ vst1.8 {RB0}, [r1]!; -+ -+ vpop {RA4-RB2}; -+ -+ /* clear the used registers */ -+ veor RA3, RA3; -+ veor RB3, RB3; -+ -+ pop {pc}; -+.size _gcry_serpent_neon_cfb_dec,.-_gcry_serpent_neon_cfb_dec; -+ -+.align 3 -+.globl _gcry_serpent_neon_cbc_dec -+.type _gcry_serpent_neon_cbc_dec,%function; -+_gcry_serpent_neon_cbc_dec: -+ /* input: -+ * r0: ctx, CTX -+ * r1: dst (8 blocks) -+ * r2: src (8 blocks) -+ * r3: iv -+ */ -+ -+ push {lr}; -+ vpush {RA4-RB2}; -+ -+ vld1.8 {RA0, RA1}, [r2]!; -+ vld1.8 {RA2, RA3}, [r2]!; -+ vld1.8 {RB0, RB1}, [r2]!; -+ vld1.8 {RB2, RB3}, [r2]!; -+ sub r2, r2, #(8*16); -+ -+ bl __serpent_dec_blk8; -+ -+ vld1.8 {RB4}, [r3]; -+ vld1.8 {RT0, RT1}, [r2]!; -+ vld1.8 {RT2, RT3}, [r2]!; -+ veor RA0, RA0, RB4; -+ veor RA1, RA1, RT0; -+ veor RA2, RA2, RT1; -+ vld1.8 {RT0, RT1}, [r2]!; -+ veor RA3, RA3, RT2; -+ veor RB0, RB0, RT3; -+ vld1.8 {RT2, RT3}, [r2]!; -+ veor RB1, RB1, RT0; -+ veor RT0, RT0; -+ veor RB2, RB2, RT1; -+ veor RT1, RT1; -+ veor RB3, RB3, RT2; -+ veor RT2, RT2; -+ vst1.8 {RT3}, [r3]; /* store new IV */ -+ veor RT3, RT3; -+ -+ vst1.8 {RA0, RA1}, [r1]!; -+ veor RA0, RA0; -+ veor RA1, RA1; -+ vst1.8 {RA2, RA3}, [r1]!; -+ veor RA2, RA2; -+ vst1.8 {RB0, RB1}, [r1]!; -+ veor RA3, RA3; -+ vst1.8 {RB2, RB3}, [r1]!; -+ veor RB3, RB3; -+ -+ vpop {RA4-RB2}; -+ -+ /* clear the used registers */ -+ veor RB4, RB4; -+ -+ pop {pc}; -+.size _gcry_serpent_neon_cbc_dec,.-_gcry_serpent_neon_cbc_dec; -+ -+#endif diff --git a/libgcrypt-1.6.0.tar.bz2 b/libgcrypt-1.6.0.tar.bz2 deleted file mode 100644 index c38d34f..0000000 --- a/libgcrypt-1.6.0.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:662d508600ad94cc334e665b7016137feedfcca5226998c65ddd56bdfca29708 -size 2499149 diff --git a/libgcrypt-1.6.0.tar.bz2.sig b/libgcrypt-1.6.0.tar.bz2.sig deleted file mode 100644 index ae69012..0000000 Binary files a/libgcrypt-1.6.0.tar.bz2.sig and /dev/null differ diff --git a/libgcrypt-1.6.1.tar.bz2 b/libgcrypt-1.6.1.tar.bz2 new file mode 100644 index 0000000..c3b1fca --- /dev/null +++ b/libgcrypt-1.6.1.tar.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c3efea69f8ffe769f488b300ce190eeeb0c30de24a53f1c1b6e4202fdc2070 +size 2470885 diff --git a/libgcrypt-1.6.1.tar.bz2.sig b/libgcrypt-1.6.1.tar.bz2.sig new file mode 100644 index 0000000..c0530c0 Binary files /dev/null and b/libgcrypt-1.6.1.tar.bz2.sig differ diff --git a/libgcrypt.changes b/libgcrypt.changes index 0a3e75c..2d46582 100644 --- a/libgcrypt.changes +++ b/libgcrypt.changes @@ -1,3 +1,21 @@ +------------------------------------------------------------------- +Thu Jan 30 13:29:49 UTC 2014 - idonmez@suse.com + +- Drop arm-missing-files.diff, fixed upstream + +------------------------------------------------------------------- +Wed Jan 29 18:40:49 UTC 2014 - andreas.stieger@gmx.de + +- libgcrypt 1.6.1, a bugfix release with the folloging fixes: + * Added emulation for broken Whirlpool code prior to 1.6.0. + * Improved performance of KDF functions. + * Improved ECDSA compliance. + * Fixed message digest lookup by OID (regression in 1.6.0). + * Fixed memory leaks in ECC code. + * Fixed some asm build problems and feature detection bugs. + * Interface changes relative to the 1.6.0 release: + GCRY_MD_FLAG_BUGEMU1 NEW (minor API change). + ------------------------------------------------------------------- Fri Jan 3 16:36:21 UTC 2014 - dmueller@suse.com diff --git a/libgcrypt.spec b/libgcrypt.spec index 4d5789b..0b68a30 100644 --- a/libgcrypt.spec +++ b/libgcrypt.spec @@ -20,11 +20,11 @@ %define separate_hmac256_binary 0 %define libsoname %{name}20 -%define sosuffix 20.0.0 +%define sosuffix 20.0.1 Name: libgcrypt Url: http://directory.fsf.org/wiki/Libgcrypt -Version: 1.6.0 +Version: 1.6.1 Release: 0 Summary: The GNU Crypto Library License: GPL-2.0+ and LGPL-2.1+ and GPL-3.0+ @@ -47,7 +47,6 @@ Patch6: libgcrypt-1.5.0-etc_gcrypt_rngseed-symlink.diff Patch7: libgcrypt-1.5.0-LIBGCRYPT_FORCE_FIPS_MODE-env.diff #PATCH-FIX-UPSTREAM: internal functions are supposed to be used inside libgcrypt, mvyskocil@suse.com Patch8: libgcrypt-1.6.0-use-intenal-functions.patch -Patch9: arm-missing-files.diff BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: automake >= 1.11 BuildRequires: libgpg-error-devel >= 1.11 @@ -116,7 +115,6 @@ understanding of applied cryptography is required to use Libgcrypt. %patch6 -p1 %patch7 -p1 %patch8 -p1 -%patch9 %build echo building with build_hmac256 set to %{build_hmac256} @@ -124,14 +122,14 @@ echo building with build_hmac256 set to %{build_hmac256} autoreconf -fi export CFLAGS="%optflags $(getconf LFS_CFLAGS)" %configure --with-pic \ - --enable-noexecstack \ - --disable-static \ - --enable-m-guard \ + --enable-noexecstack \ + --disable-static \ + --enable-m-guard \ %ifarch %sparc - --disable-asm \ + --disable-asm \ %endif - --enable-hmac-binary-check \ - --enable-random=linux + --enable-hmac-binary-check \ + --enable-random=linux make %{?_smp_mflags} %if 0%{?build_hmac256}