forked from pool/ffmpeg-6
60 lines
1.6 KiB
Diff
60 lines
1.6 KiB
Diff
|
diff -rup a/libavcodec/x86/mathops.h b/libavcodec/x86/mathops.h
|
||
|
--- a/libavcodec/x86/mathops.h 2023-10-01 13:02:26.829463017 +0200
|
||
|
+++ b/libavcodec/x86/mathops.h 2023-10-01 13:05:19.219502582 +0200
|
||
|
@@ -35,12 +35,20 @@
|
||
|
static av_always_inline av_const int MULL(int a, int b, unsigned shift)
|
||
|
{
|
||
|
int rt, dummy;
|
||
|
+ if (__builtin_constant_p(shift))
|
||
|
__asm__ (
|
||
|
"imull %3 \n\t"
|
||
|
"shrdl %4, %%edx, %%eax \n\t"
|
||
|
:"=a"(rt), "=d"(dummy)
|
||
|
- :"a"(a), "rm"(b), "ci"((uint8_t)shift)
|
||
|
+ :"a"(a), "rm"(b), "i"(shift & 0x1F)
|
||
|
);
|
||
|
+ else
|
||
|
+ __asm__ (
|
||
|
+ "imull %3 \n\t"
|
||
|
+ "shrdl %4, %%edx, %%eax \n\t"
|
||
|
+ :"=a"(rt), "=d"(dummy)
|
||
|
+ :"a"(a), "rm"(b), "c"((uint8_t)shift)
|
||
|
+ );
|
||
|
return rt;
|
||
|
}
|
||
|
|
||
|
@@ -113,19 +121,31 @@ __asm__ volatile(\
|
||
|
// avoid +32 for shift optimization (gcc should do that ...)
|
||
|
#define NEG_SSR32 NEG_SSR32
|
||
|
static inline int32_t NEG_SSR32( int32_t a, int8_t s){
|
||
|
+ if (__builtin_constant_p(s))
|
||
|
__asm__ ("sarl %1, %0\n\t"
|
||
|
: "+r" (a)
|
||
|
- : "ic" ((uint8_t)(-s))
|
||
|
+ : "i" (-s & 0x1F)
|
||
|
);
|
||
|
+ else
|
||
|
+ __asm__ ("sarl %1, %0\n\t"
|
||
|
+ : "+r" (a)
|
||
|
+ : "c" ((uint8_t)(-s))
|
||
|
+ );
|
||
|
return a;
|
||
|
}
|
||
|
|
||
|
#define NEG_USR32 NEG_USR32
|
||
|
static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
|
||
|
+ if (__builtin_constant_p(s))
|
||
|
__asm__ ("shrl %1, %0\n\t"
|
||
|
: "+r" (a)
|
||
|
- : "ic" ((uint8_t)(-s))
|
||
|
+ : "i" (-s & 0x1F)
|
||
|
);
|
||
|
+ else
|
||
|
+ __asm__ ("shrl %1, %0\n\t"
|
||
|
+ : "+r" (a)
|
||
|
+ : "c" ((uint8_t)(-s))
|
||
|
+ );
|
||
|
return a;
|
||
|
}
|
||
|
|