forked from pool/openssl
254 lines
7.2 KiB
Diff
254 lines
7.2 KiB
Diff
|
Index: openssl-1.0.1c/engines/e_padlock.c
|
||
|
===================================================================
|
||
|
--- openssl-1.0.1c.orig/engines/e_padlock.c
|
||
|
+++ openssl-1.0.1c/engines/e_padlock.c
|
||
|
@@ -101,7 +101,10 @@
|
||
|
compiler choice is limited to GCC and Microsoft C. */
|
||
|
#undef COMPILE_HW_PADLOCK
|
||
|
#if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
|
||
|
-# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
|
||
|
+# if (defined(__GNUC__) && __GNUC__>=2 && \
|
||
|
+ (defined(__i386__) || defined(__i386) || \
|
||
|
+ defined(__x86_64__) || defined(__x86_64)) \
|
||
|
+ ) || \
|
||
|
(defined(_MSC_VER) && defined(_M_IX86))
|
||
|
# define COMPILE_HW_PADLOCK
|
||
|
# endif
|
||
|
@@ -304,6 +307,7 @@ static volatile struct padlock_cipher_da
|
||
|
* =======================================================
|
||
|
*/
|
||
|
#if defined(__GNUC__) && __GNUC__>=2
|
||
|
+#if defined(__i386__) || defined(__i386)
|
||
|
/*
|
||
|
* As for excessive "push %ebx"/"pop %ebx" found all over.
|
||
|
* When generating position-independent code GCC won't let
|
||
|
@@ -383,21 +387,6 @@ padlock_available(void)
|
||
|
return padlock_use_ace + padlock_use_rng;
|
||
|
}
|
||
|
|
||
|
-#ifndef OPENSSL_NO_AES
|
||
|
-/* Our own htonl()/ntohl() */
|
||
|
-static inline void
|
||
|
-padlock_bswapl(AES_KEY *ks)
|
||
|
-{
|
||
|
- size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
|
||
|
- unsigned int *key = ks->rd_key;
|
||
|
-
|
||
|
- while (i--) {
|
||
|
- asm volatile ("bswapl %0" : "+r"(*key));
|
||
|
- key++;
|
||
|
- }
|
||
|
-}
|
||
|
-#endif
|
||
|
-
|
||
|
/* Force key reload from memory to the CPU microcode.
|
||
|
Loading EFLAGS from the stack clears EFLAGS[30]
|
||
|
which does the trick. */
|
||
|
@@ -456,11 +445,130 @@ static inline void *name(size_t cnt, \
|
||
|
return iv; \
|
||
|
}
|
||
|
|
||
|
+
|
||
|
+#endif
|
||
|
+
|
||
|
+#elif defined(__x86_64__) || defined(__x86_64)
|
||
|
+
|
||
|
+/* Load supported features of the CPU to see if
|
||
|
+ the PadLock is available. */
|
||
|
+ static int
|
||
|
+padlock_available(void)
|
||
|
+{
|
||
|
+ char vendor_string[16];
|
||
|
+ unsigned int eax, edx;
|
||
|
+
|
||
|
+ /* Are we running on the Centaur (VIA) CPU? */
|
||
|
+ eax = 0x00000000;
|
||
|
+ vendor_string[12] = 0;
|
||
|
+ asm volatile (
|
||
|
+ "cpuid\n"
|
||
|
+ "movl %%ebx,(%1)\n"
|
||
|
+ "movl %%edx,4(%1)\n"
|
||
|
+ "movl %%ecx,8(%1)\n"
|
||
|
+ : "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx");
|
||
|
+ if (strcmp(vendor_string, "CentaurHauls") != 0)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ /* Check for Centaur Extended Feature Flags presence */
|
||
|
+ eax = 0xC0000000;
|
||
|
+ asm volatile ("cpuid"
|
||
|
+ : "+a"(eax) : : "rbx", "rcx", "rdx");
|
||
|
+ if (eax < 0xC0000001)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ /* Read the Centaur Extended Feature Flags */
|
||
|
+ eax = 0xC0000001;
|
||
|
+ asm volatile ("cpuid"
|
||
|
+ : "+a"(eax), "=d"(edx) : : "rbx", "rcx");
|
||
|
+
|
||
|
+ /* Fill up some flags */
|
||
|
+ padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
|
||
|
+ padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
|
||
|
+
|
||
|
+ return padlock_use_ace + padlock_use_rng;
|
||
|
+}
|
||
|
+
|
||
|
+/* Force key reload from memory to the CPU microcode.
|
||
|
+ Loading EFLAGS from the stack clears EFLAGS[30]
|
||
|
+ which does the trick. */
|
||
|
+ static inline void
|
||
|
+padlock_reload_key(void)
|
||
|
+{
|
||
|
+ asm volatile ("pushfq; popfq");
|
||
|
+}
|
||
|
+
|
||
|
+#ifndef OPENSSL_NO_AES
|
||
|
+/*
|
||
|
+ * This is heuristic key context tracing. At first one
|
||
|
+ * believes that one should use atomic swap instructions,
|
||
|
+ * but it's not actually necessary. Point is that if
|
||
|
+ * padlock_saved_context was changed by another thread
|
||
|
+ * after we've read it and before we compare it with cdata,
|
||
|
+ * our key *shall* be reloaded upon thread context switch
|
||
|
+ * and we are therefore set in either case...
|
||
|
+ */
|
||
|
+ static inline void
|
||
|
+padlock_verify_context(struct padlock_cipher_data *cdata)
|
||
|
+{
|
||
|
+ asm volatile (
|
||
|
+ "pushfq\n"
|
||
|
+ " btl $30,(%%rsp)\n"
|
||
|
+ " jnc 1f\n"
|
||
|
+ " cmpq %2,%1\n"
|
||
|
+ " je 1f\n"
|
||
|
+ " popfq\n"
|
||
|
+ " subq $8,%%rsp\n"
|
||
|
+ "1: addq $8,%%rsp\n"
|
||
|
+ " movq %2,%0"
|
||
|
+ :"+m"(padlock_saved_context)
|
||
|
+ : "r"(padlock_saved_context), "r"(cdata) : "cc");
|
||
|
+}
|
||
|
+
|
||
|
+/* Template for padlock_xcrypt_* modes */
|
||
|
+/* BIG FAT WARNING:
|
||
|
+ * The offsets used with 'leal' instructions
|
||
|
+ * describe items of the 'padlock_cipher_data'
|
||
|
+ * structure.
|
||
|
+ */
|
||
|
+#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
|
||
|
+ static inline void *name(size_t cnt, \
|
||
|
+ struct padlock_cipher_data *cdata, \
|
||
|
+ void *out, const void *inp) \
|
||
|
+{ void *iv; \
|
||
|
+ asm volatile ( "leaq 16(%0),%%rdx\n" \
|
||
|
+ " leaq 32(%0),%%rbx\n" \
|
||
|
+ rep_xcrypt "\n" \
|
||
|
+ : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
|
||
|
+ : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
|
||
|
+ : "rbx", "rdx", "cc", "memory"); \
|
||
|
+ return iv; \
|
||
|
+}
|
||
|
+#endif
|
||
|
+
|
||
|
+#endif /* cpu */
|
||
|
+
|
||
|
+#ifndef OPENSSL_NO_AES
|
||
|
+
|
||
|
+
|
||
|
/* Generate all functions with appropriate opcodes */
|
||
|
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */
|
||
|
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */
|
||
|
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */
|
||
|
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */
|
||
|
+
|
||
|
+/* Our own htonl()/ntohl() */
|
||
|
+static inline void
|
||
|
+padlock_bswapl(AES_KEY *ks)
|
||
|
+{
|
||
|
+ size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
|
||
|
+ unsigned int *key = ks->rd_key;
|
||
|
+
|
||
|
+ while (i--) {
|
||
|
+ asm volatile ("bswapl %0" : "+r"(*key));
|
||
|
+ key++;
|
||
|
+ }
|
||
|
+}
|
||
|
#endif
|
||
|
|
||
|
/* The RNG call itself */
|
||
|
@@ -491,8 +599,8 @@ padlock_xstore(void *addr, unsigned int
|
||
|
static inline unsigned char *
|
||
|
padlock_memcpy(void *dst,const void *src,size_t n)
|
||
|
{
|
||
|
- long *d=dst;
|
||
|
- const long *s=src;
|
||
|
+ size_t *d=dst;
|
||
|
+ const size_t *s=src;
|
||
|
|
||
|
n /= sizeof(*d);
|
||
|
do { *d++ = *s++; } while (--n);
|
||
|
Index: openssl-1.0.1c/engines/e_padlock.c
|
||
|
===================================================================
|
||
|
--- openssl-1.0.1c.orig/engines/e_padlock.c
|
||
|
+++ openssl-1.0.1c/engines/e_padlock.c
|
||
|
@@ -457,30 +457,33 @@ padlock_available(void)
|
||
|
{
|
||
|
char vendor_string[16];
|
||
|
unsigned int eax, edx;
|
||
|
+ size_t scratch;
|
||
|
|
||
|
/* Are we running on the Centaur (VIA) CPU? */
|
||
|
eax = 0x00000000;
|
||
|
vendor_string[12] = 0;
|
||
|
asm volatile (
|
||
|
+ "movq %%rbx,%1\n"
|
||
|
"cpuid\n"
|
||
|
- "movl %%ebx,(%1)\n"
|
||
|
- "movl %%edx,4(%1)\n"
|
||
|
- "movl %%ecx,8(%1)\n"
|
||
|
- : "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx");
|
||
|
+ "movl %%ebx,(%2)\n"
|
||
|
+ "movl %%edx,4(%2)\n"
|
||
|
+ "movl %%ecx,8(%2)\n"
|
||
|
+ "movq %1,%%rbx"
|
||
|
+ : "+a"(eax), "=&r"(scratch) : "r"(vendor_string) : "rcx", "rdx");
|
||
|
if (strcmp(vendor_string, "CentaurHauls") != 0)
|
||
|
return 0;
|
||
|
|
||
|
/* Check for Centaur Extended Feature Flags presence */
|
||
|
eax = 0xC0000000;
|
||
|
- asm volatile ("cpuid"
|
||
|
- : "+a"(eax) : : "rbx", "rcx", "rdx");
|
||
|
+ asm volatile ("movq %%rbx,%1; cpuid; movq %1,%%rbx"
|
||
|
+ : "+a"(eax), "=&r"(scratch) : : "rcx", "rdx");
|
||
|
if (eax < 0xC0000001)
|
||
|
return 0;
|
||
|
|
||
|
/* Read the Centaur Extended Feature Flags */
|
||
|
eax = 0xC0000001;
|
||
|
- asm volatile ("cpuid"
|
||
|
- : "+a"(eax), "=d"(edx) : : "rbx", "rcx");
|
||
|
+ asm volatile ("movq %%rbx,%2; cpuid; movq %2,%%rbx"
|
||
|
+ : "+a"(eax), "=d"(edx), "=&r"(scratch) : : "rcx");
|
||
|
|
||
|
/* Fill up some flags */
|
||
|
padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
|
||
|
@@ -536,12 +539,15 @@ padlock_verify_context(struct padlock_ci
|
||
|
struct padlock_cipher_data *cdata, \
|
||
|
void *out, const void *inp) \
|
||
|
{ void *iv; \
|
||
|
- asm volatile ( "leaq 16(%0),%%rdx\n" \
|
||
|
+ size_t scratch; \
|
||
|
+ asm volatile ( "movq %%rbx,%4\n" \
|
||
|
+ " leaq 16(%0),%%rdx\n" \
|
||
|
" leaq 32(%0),%%rbx\n" \
|
||
|
rep_xcrypt "\n" \
|
||
|
- : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
|
||
|
+ " movq %4,%%rbx" \
|
||
|
+ : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp), "=&r"(scratch) \
|
||
|
: "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
|
||
|
- : "rbx", "rdx", "cc", "memory"); \
|
||
|
+ : "rdx", "cc", "memory"); \
|
||
|
return iv; \
|
||
|
}
|
||
|
#endif
|