libvpx/version_1.1.0_to_1.2.0.diff

45440 lines
1.7 MiB

diff --git a/.gitignore b/.gitignore
index 110146d..4074b0b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,8 @@
/ivfdec.dox
/ivfenc
/ivfenc.dox
+/libvpx.so*
+/libvpx.ver
/obj_int_extract
/postproc
/postproc.c
@@ -43,6 +45,7 @@
/simple_encoder
/simple_encoder.c
/simple_encoder.dox
+/test_libvpx
/twopass_encoder
/twopass_encoder.c
/twopass_encoder.dox
@@ -55,7 +58,14 @@
/vp8cx_set_ref
/vp8cx_set_ref.c
/vp8cx_set_ref.dox
+/vpx.pc
/vpx_config.c
/vpx_config.h
+/vpx_rtcd.h
/vpx_version.h
+/vpxdec
+/vpxenc
TAGS
+.cproject
+.project
+.settings
diff --git a/CHANGELOG b/CHANGELOG
index dcb9f73..ef64a96 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,32 @@
+2012-12-21 v1.2.0
+ This release acts as a checkpoint for a large amount of internal refactoring
+ and testing. It also contains a number of small bugfixes, so all users are
+ encouraged to upgrade.
+
+ - Upgrading:
+ This release is ABI and API compatible with Duclair (v1.0.0). Users
+ of older releases should refer to the Upgrading notes in this
+ document for that release.
+
+ - Enhancements:
+ VP8 optimizations for MIPS dspr2
+ vpxenc: add -quiet option
+
+ - Speed:
+ Encoder and decoder speed is consistent with the Eider release.
+
+ - Quality:
+ In general, quality is consistent with the Eider release.
+
+ Minor tweaks to ARNR filtering
+ Minor improvements to real time encoding with multiple temporal layers
+
+ - Bug Fixes:
+ Fixes multithreaded encoder race condition in loopfilter
+ Fixes multi-resolution threaded encoding
+ Fix potential encoder dead-lock after picture resize
+
+
2012-05-09 v1.1.0 "Eider"
This introduces a number of enhancements, mostly focused on real-time
encoding. In addition, it fixes a decoder bug (first introduced in
diff --git a/README b/README
index 0dfb0fe..0475dad 100644
--- a/README
+++ b/README
@@ -1,5 +1,5 @@
vpx Multi-Format Codec SDK
-README - 19 May 2010
+README - 21 June 2012
Welcome to the WebM VP8 Codec SDK!
@@ -15,11 +15,19 @@ COMPILING THE APPLICATIONS/LIBRARIES:
* Building the documentation requires PHP[3] and Doxygen[4]. If you do not
have these packages, you must pass --disable-install-docs to the
configure script.
+ * Downloading the data for the unit tests requires curl[5] and sha1sum.
+ sha1sum is provided via the GNU coreutils, installed by default on
+ many *nix platforms, as well as MinGW and Cygwin. If coreutils is not
+ available, a compatible version of sha1sum can be built from
+ source[6]. These requirements are optional if not running the unit
+ tests.
[1]: http://www.tortall.net/projects/yasm
[2]: http://www.cygwin.com
[3]: http://php.net
[4]: http://www.doxygen.org
+ [5]: http://curl.haxx.se
+ [6]: http://www.microbrew.org/tools/md5sha1sum/
2. Out-of-tree builds
Out of tree builds are a supported method of building the application. For
@@ -94,5 +102,5 @@ COMPILING THE APPLICATIONS/LIBRARIES:
SUPPORT
This library is an open source project supported by its community. Please
- please email webm-users@webmproject.org for help.
+ please email webm-discuss@webmproject.org for help.
diff --git a/build/make/Android.mk b/build/make/Android.mk
index 6fcd4ae..c6b9cf9 100644
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -27,15 +27,22 @@
# Android.mk file in the libvpx directory:
# LOCAL_PATH := $(call my-dir)
# include $(CLEAR_VARS)
-# include libvpx/build/make/Android.mk
+# include jni/libvpx/build/make/Android.mk
#
# There are currently two TARGET_ARCH_ABI targets for ARM.
# armeabi and armeabi-v7a. armeabi-v7a is selected by creating an
# Application.mk in the jni directory that contains:
# APP_ABI := armeabi-v7a
#
+# By default libvpx will detect at runtime the existance of NEON extension.
+# For this we import the 'cpufeatures' module from the NDK sources.
+# libvpx can also be configured without this runtime detection method.
+# Configuring with --disable-runtime-cpu-detect will assume presence of NEON.
+# Configuring with --disable-runtime-cpu-detect --disable-neon will remove any
+# NEON dependency.
+
# To change to building armeabi, run ./libvpx/configure again, but with
-# --target=arm5te-android-gcc and and modify the Application.mk file to
+# --target=arm5te-android-gcc and modify the Application.mk file to
# set APP_ABI := armeabi
#
# Running ndk-build will build libvpx and include it in your project.
@@ -166,7 +173,9 @@ LOCAL_MODULE := libvpx
LOCAL_LDLIBS := -llog
-LOCAL_STATIC_LIBRARIES := cpufeatures
+ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
+ LOCAL_STATIC_LIBRARIES := cpufeatures
+endif
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_rtcd.h
@@ -196,4 +205,7 @@ ifeq ($(CONFIG_VP8_ENCODER), yes)
$(LIBVPX_PATH)/vp8/encoder/asm_enc_offsets.c))
endif
+ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
$(call import-module,cpufeatures)
+endif
+
diff --git a/build/make/Makefile b/build/make/Makefile
index b6cf320..1088c84 100644
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -21,6 +21,7 @@ all: .DEFAULT
clean:: .DEFAULT
install:: .DEFAULT
test:: .DEFAULT
+testdata:: .DEFAULT
# Note: md5sum is not installed on OS X, but openssl is. Openssl may not be
@@ -66,6 +67,7 @@ endif
BUILD_ROOT?=.
VPATH=$(SRC_PATH_BARE)
CFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH)
+CXXFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH)
ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT)/ -I$(SRC_PATH)/
DIST_DIR?=dist
HOSTCC?=gcc
@@ -98,6 +100,8 @@ dist:
install::
.PHONY: test
test::
+.PHONY: testdata
+testdata::
$(BUILD_PFX)%.c.d: %.c
$(if $(quiet),@echo " [DEP] $@")
@@ -111,11 +115,11 @@ $(BUILD_PFX)%.c.o: %.c
$(BUILD_PFX)%.cc.d: %.cc
$(if $(quiet),@echo " [DEP] $@")
$(qexec)mkdir -p $(dir $@)
- $(qexec)g++ $(INTERNAL_CFLAGS) $(CFLAGS) -M $< | $(fmt_deps) > $@
+ $(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -M $< | $(fmt_deps) > $@
$(BUILD_PFX)%.cc.o: %.cc
$(if $(quiet),@echo " [CXX] $@")
- $(qexec)g++ $(INTERNAL_CFLAGS) $(CFLAGS) -c -o $@ $<
+ $(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $<
$(BUILD_PFX)%.asm.d: %.asm
$(if $(quiet),@echo " [DEP] $@")
@@ -213,7 +217,7 @@ define linkerxx_template
$(1): $(filter-out -%,$(2))
$(1):
$(if $(quiet),@echo " [LD] $$@")
- $(qexec)g++ $$(strip $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -o $$@ $(2) $(3) $$(extralibs))
+ $(qexec)$$(CXX) $$(strip $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -o $$@ $(2) $(3) $$(extralibs))
endef
# make-3.80 has a bug with expanding large input strings to the eval function,
# which was triggered in some cases by the following component of
diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl
index c55ed0f..95be467 100755
--- a/build/make/ads2gas.pl
+++ b/build/make/ads2gas.pl
@@ -26,12 +26,22 @@ print "\t.equ DO1STROUNDING, 0\n";
while (<STDIN>)
{
+ undef $comment;
+ undef $line;
+ $comment_char = ";";
+ $comment_sub = "@";
+
+ # Handle comments.
+ if (/$comment_char/)
+ {
+ $comment = "";
+ ($line, $comment) = /(.*?)$comment_char(.*)/;
+ $_ = $line;
+ }
+
# Load and store alignment
s/@/,:/g;
- # Comment character
- s/;/@/g;
-
# Hexadecimal constants prefaced by 0x
s/#&/#0x/g;
@@ -51,16 +61,27 @@ while (<STDIN>)
s/:SHR:/ >> /g;
# Convert ELSE to .else
- s/ELSE/.else/g;
+ s/\bELSE\b/.else/g;
# Convert ENDIF to .endif
- s/ENDIF/.endif/g;
+ s/\bENDIF\b/.endif/g;
# Convert ELSEIF to .elseif
- s/ELSEIF/.elseif/g;
+ s/\bELSEIF\b/.elseif/g;
# Convert LTORG to .ltorg
- s/LTORG/.ltorg/g;
+ s/\bLTORG\b/.ltorg/g;
+
+ # Convert endfunc to nothing.
+ s/\bendfunc\b//ig;
+
+ # Convert FUNCTION to nothing.
+ s/\bFUNCTION\b//g;
+ s/\bfunction\b//g;
+
+ s/\bENTRY\b//g;
+ s/\bMSARMASM\b/0/g;
+ s/^\s+end\s+$//g;
# Convert IF :DEF:to .if
# gcc doesn't have the ability to do a conditional
@@ -106,6 +127,7 @@ while (<STDIN>)
if (s/RN\s+([Rr]\d+|lr)/.req $1/)
{
print;
+ print "$comment_sub$comment\n" if defined $comment;
next;
}
@@ -114,6 +136,9 @@ while (<STDIN>)
s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/;
s/IMPORT\s+\|([\$\w]*)\|/.global $1/;
+ s/EXPORT\s+([\$\w]*)/.global $1/;
+ s/export\s+([\$\w]*)/.global $1/;
+
# No vertical bars required; make additional symbol with prepended
# underscore
s/^\|(\$?\w+)\|/_$1\n\t$1:/g;
@@ -124,11 +149,19 @@ while (<STDIN>)
s/^([a-zA-Z_0-9\$]+)/$1:/ if !/EQU/;
# ALIGN directive
- s/ALIGN/.balign/g;
+ s/\bALIGN\b/.balign/g;
# ARM code
s/\sARM/.arm/g;
+ # push/pop
+ s/(push\s+)(r\d+)/stmdb sp\!, \{$2\}/g;
+ s/(pop\s+)(r\d+)/ldmia sp\!, \{$2\}/g;
+
+ # NEON code
+ s/(vld1.\d+\s+)(q\d+)/$1\{$2\}/g;
+ s/(vtbl.\d+\s+[^,]+),([^,]+)/$1,\{$2\}/g;
+
# eabi_attributes numerical equivalents can be found in the
# "ARM IHI 0045C" document.
@@ -157,10 +190,10 @@ while (<STDIN>)
}
# EQU directive
- s/(.*)EQU(.*)/.equ $1, $2/;
+ s/(\S+\s+)EQU(\s+\S+)/.equ $1, $2/;
# Begin macro definition
- if (/MACRO/) {
+ if (/\bMACRO\b/) {
$_ = <STDIN>;
s/^/.macro/;
s/\$//g; # remove formal param reference
@@ -169,9 +202,10 @@ while (<STDIN>)
# For macros, use \ to reference formal params
s/\$/\\/g; # End macro definition
- s/MEND/.endm/; # No need to tell it where to stop assembling
+ s/\bMEND\b/.endm/; # No need to tell it where to stop assembling
next if /^\s*END\s*$/;
print;
+ print "$comment_sub$comment\n" if defined $comment;
}
# Mark that this object doesn't need an executable stack.
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 3c772e5..c99a01c 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -166,6 +166,17 @@ is_in(){
add_cflags() {
CFLAGS="${CFLAGS} $@"
+ CXXFLAGS="${CXXFLAGS} $@"
+}
+
+
+add_cflags_only() {
+ CFLAGS="${CFLAGS} $@"
+}
+
+
+add_cxxflags_only() {
+ CXXFLAGS="${CXXFLAGS} $@"
}
@@ -277,6 +288,13 @@ check_cc() {
check_cmd ${CC} ${CFLAGS} "$@" -c -o ${TMP_O} ${TMP_C}
}
+check_cxx() {
+ log check_cxx "$@"
+ cat >${TMP_C}
+ log_file ${TMP_C}
+ check_cmd ${CXX} ${CXXFLAGS} "$@" -c -o ${TMP_O} ${TMP_C}
+}
+
check_cpp() {
log check_cpp "$@"
cat > ${TMP_C}
@@ -310,8 +328,25 @@ int x;
EOF
}
+check_cxxflags() {
+ log check_cxxflags "$@"
+
+ # Catch CFLAGS that trigger CXX warnings
+ case "$CXX" in
+ *g++*) check_cxx -Werror "$@" <<EOF
+int x;
+EOF
+ ;;
+ *) check_cxx "$@" <<EOF
+int x;
+EOF
+ ;;
+ esac
+}
+
check_add_cflags() {
- check_cflags "$@" && add_cflags "$@"
+ check_cxxflags "$@" && add_cxxflags_only "$@"
+ check_cflags "$@" && add_cflags_only "$@"
}
check_add_asflags() {
@@ -367,7 +402,9 @@ true
write_common_target_config_mk() {
local CC=${CC}
+ local CXX=${CXX}
enabled ccache && CC="ccache ${CC}"
+ enabled ccache && CXX="ccache ${CXX}"
print_webm_license $1 "##" ""
cat >> $1 << EOF
@@ -379,6 +416,7 @@ TOOLCHAIN=${toolchain}
ASM_CONVERSION=${asm_conversion_cmd:-${source_path}/build/make/ads2gas.pl}
CC=${CC}
+CXX=${CXX}
AR=${AR}
LD=${LD}
AS=${AS}
@@ -386,6 +424,7 @@ STRIP=${STRIP}
NM=${NM}
CFLAGS = ${CFLAGS}
+CXXFLAGS = ${CXXFLAGS}
ARFLAGS = -rus\$(if \$(quiet),c,v)
LDFLAGS = ${LDFLAGS}
ASFLAGS = ${ASFLAGS}
@@ -538,6 +577,7 @@ post_process_cmdline() {
setup_gnu_toolchain() {
CC=${CC:-${CROSS}gcc}
+ CXX=${CXX:-${CROSS}g++}
AR=${AR:-${CROSS}ar}
LD=${LD:-${CROSS}${link_with_cc:-ld}}
AS=${AS:-${CROSS}as}
@@ -549,10 +589,19 @@ setup_gnu_toolchain() {
process_common_toolchain() {
if [ -z "$toolchain" ]; then
- gcctarget="$(gcc -dumpmachine 2> /dev/null)"
+ gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}"
# detect tgt_isa
case "$gcctarget" in
+ armv6*)
+ tgt_isa=armv6
+ ;;
+ armv7*)
+ tgt_isa=armv7
+ ;;
+ armv5te*)
+ tgt_isa=armv5te
+ ;;
*x86_64*|*amd64*)
tgt_isa=x86_64
;;
@@ -718,6 +767,7 @@ process_common_toolchain() {
;;
armv5te)
soft_enable edsp
+ disable fast_unaligned
;;
esac
@@ -733,17 +783,23 @@ process_common_toolchain() {
check_add_asflags --defsym ARCHITECTURE=${arch_int}
tune_cflags="-mtune="
if [ ${tgt_isa} == "armv7" ]; then
+ check_add_cflags -march=armv7-a -mfloat-abi=softfp
+ check_add_asflags -march=armv7-a -mfloat-abi=softfp
+
if enabled neon
then
check_add_cflags -mfpu=neon #-ftree-vectorize
check_add_asflags -mfpu=neon
fi
- check_add_cflags -march=armv7-a -mcpu=cortex-a8 -mfloat-abi=softfp
- check_add_asflags -mcpu=cortex-a8 -mfloat-abi=softfp #-march=armv7-a
+
+ if [ -z "${tune_cpu}" ]; then
+ tune_cpu=cortex-a8
+ fi
else
check_add_cflags -march=${tgt_isa}
check_add_asflags -march=${tgt_isa}
fi
+
enabled debug && add_asflags -g
asm_conversion_cmd="${source_path}/build/make/ads2gas.pl"
;;
@@ -792,6 +848,7 @@ process_common_toolchain() {
-name "arm-linux-androideabi-gcc*" -print -quit`
TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi-
CC=${TOOLCHAIN_PATH}gcc
+ CXX=${TOOLCHAIN_PATH}g++
AR=${TOOLCHAIN_PATH}ar
LD=${TOOLCHAIN_PATH}gcc
AS=${TOOLCHAIN_PATH}as
@@ -810,12 +867,17 @@ process_common_toolchain() {
add_cflags "--sysroot=${alt_libc}"
add_ldflags "--sysroot=${alt_libc}"
- add_cflags "-I${SDK_PATH}/sources/android/cpufeatures/"
+ # linker flag that routes around a CPU bug in some
+ # Cortex-A8 implementations (NDK Dev Guide)
+ add_ldflags "-Wl,--fix-cortex-a8"
enable pic
soft_enable realtime_only
if [ ${tgt_isa} == "armv7" ]; then
- enable runtime_cpu_detect
+ soft_enable runtime_cpu_detect
+ fi
+ if enabled runtime_cpu_detect; then
+ add_cflags "-I${SDK_PATH}/sources/android/cpufeatures"
fi
;;
@@ -827,6 +889,7 @@ process_common_toolchain() {
SDK_PATH=${sdk_path}
fi
TOOLCHAIN_PATH=${SDK_PATH}/usr/bin
+ CXX=${TOOLCHAIN_PATH}/g++
CC=${TOOLCHAIN_PATH}/gcc
AR=${TOOLCHAIN_PATH}/ar
LD=${TOOLCHAIN_PATH}/arm-apple-darwin10-llvm-gcc-4.2
@@ -890,13 +953,16 @@ process_common_toolchain() {
esac
;;
mips*)
- CROSS=${CROSS:-mipsel-linux-uclibc-}
link_with_cc=gcc
setup_gnu_toolchain
tune_cflags="-mtune="
+ if enabled dspr2; then
+ check_add_cflags -mips32r2 -mdspr2
+ disable fast_unaligned
+ fi
check_add_cflags -march=${tgt_isa}
- check_add_asflags -march=${tgt_isa}
- check_add_asflags -KPIC
+ check_add_asflags -march=${tgt_isa}
+ check_add_asflags -KPIC
;;
ppc*)
enable ppc
@@ -924,6 +990,11 @@ process_common_toolchain() {
x86*)
bits=32
enabled x86_64 && bits=64
+ check_cpp <<EOF && bits=x32
+#ifndef __ILP32__
+#error "not x32"
+#endif
+EOF
soft_enable runtime_cpu_detect
soft_enable mmx
soft_enable sse
@@ -938,6 +1009,7 @@ process_common_toolchain() {
;;
solaris*)
CC=${CC:-${CROSS}gcc}
+ CXX=${CXX:-${CROSS}g++}
LD=${LD:-${CROSS}gcc}
CROSS=${CROSS:-g}
;;
@@ -965,16 +1037,22 @@ process_common_toolchain() {
tune_cflags="-march="
;;
esac
- ;;
+ ;;
gcc*)
- add_cflags -m${bits}
+ add_cflags -m${bits}
add_ldflags -m${bits}
link_with_cc=gcc
tune_cflags="-march="
setup_gnu_toolchain
#for 32 bit x86 builds, -O3 did not turn on this flag
enabled optimizations && check_add_cflags -fomit-frame-pointer
- ;;
+ ;;
+ vs*)
+ # When building with Microsoft Visual Studio the assembler is
+ # invoked directly. Checking at configure time is unnecessary.
+ # Skip the check by setting AS arbitrarily
+ AS=msvs
+ ;;
esac
case "${AS}" in
@@ -983,7 +1061,7 @@ process_common_toolchain() {
which yasm >/dev/null 2>&1 && AS=yasm
[ "${AS}" = auto -o -z "${AS}" ] \
&& die "Neither yasm nor nasm have been found"
- ;;
+ ;;
esac
log_echo " using $AS"
[ "${AS##*/}" = nasm ] && add_asflags -Ox
@@ -1065,7 +1143,7 @@ process_common_toolchain() {
# Work around longjmp interception on glibc >= 2.11, to improve binary
# compatibility. See http://code.google.com/p/webm/issues/detail?id=166
- enabled linux && check_add_cflags -D_FORTIFY_SOURCE=0
+ enabled linux && check_add_cflags -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0
# Check for strip utility variant
${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable gnu_strip
@@ -1080,12 +1158,24 @@ EOF
# Almost every platform uses pthreads.
if enabled multithread; then
case ${toolchain} in
- *-win*);;
+ *-win*-vs*);;
*-android-gcc);;
*) check_header pthread.h && add_extralibs -lpthread
esac
fi
+ # only for MIPS platforms
+ case ${toolchain} in
+ mips*)
+ if enabled dspr2; then
+ if enabled big_endian; then
+ echo "dspr2 optimizations are available only for little endian platforms"
+ disable dspr2
+ fi
+ fi
+ ;;
+ esac
+
# for sysconf(3) and friends.
check_header unistd.h
diff --git a/build/make/gen_asm_deps.sh b/build/make/gen_asm_deps.sh
index 717f870..0b4e3aa 100755
--- a/build/make/gen_asm_deps.sh
+++ b/build/make/gen_asm_deps.sh
@@ -42,7 +42,7 @@ done
[ -n "$srcfile" ] || show_help
sfx=${sfx:-asm}
-includes=$(LC_ALL=C egrep -i "include +\"?+[a-z0-9_/]+\.${sfx}" $srcfile |
+includes=$(LC_ALL=C egrep -i "include +\"?[a-z0-9_/]+\.${sfx}" $srcfile |
perl -p -e "s;.*?([a-z0-9_/]+.${sfx}).*;\1;")
#" restore editor state
for inc in ${includes}; do
diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c
index 04e14a6..bf317bd 100644
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -680,7 +680,7 @@ int parse_coff(uint8_t *buf, size_t sz)
uint32_t symoffset;
char **sectionlist; //this array holds all section names in their correct order.
- //it is used to check if the symbol is in .bss or .data section.
+ //it is used to check if the symbol is in .bss or .rdata section.
nsections = get_le16(buf + 2);
symtab_ptr = get_le32(buf + 8);
@@ -725,15 +725,15 @@ int parse_coff(uint8_t *buf, size_t sz)
}
strcpy(sectionlist[i], sectionname);
- if (!strcmp(sectionname, ".data")) sectionrawdata_ptr = get_le32(ptr + 20);
+ if (!strcmp(sectionname, ".rdata")) sectionrawdata_ptr = get_le32(ptr + 20);
ptr += 40;
}
//log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
- //log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr);
+ //log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr);
- /* The compiler puts the data with non-zero offset in .data section, but puts the data with
+ /* The compiler puts the data with non-zero offset in .rdata section, but puts the data with
zero offset in .bss section. So, if the data in in .bss section, set offset=0.
Note from Wiki: In an object module compiled from C, the bss section contains
the local variables (but not functions) that were declared with the static keyword,
diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh
index 1dffde5..ddf9e09 100755
--- a/build/make/rtcd.sh
+++ b/build/make/rtcd.sh
@@ -211,6 +211,8 @@ common_top() {
$(process_forward_decls)
$(declare_function_pointers c $ALL_ARCHS)
+
+void ${symbol:-rtcd}(void);
EOF
}
@@ -231,11 +233,10 @@ x86() {
cat <<EOF
$(common_top)
-void ${symbol:-rtcd}(void);
#ifdef RTCD_C
#include "vpx_ports/x86.h"
-void ${symbol:-rtcd}(void)
+static void setup_rtcd_internal(void)
{
int flags = x86_simd_caps();
@@ -261,11 +262,9 @@ arm() {
$(common_top)
#include "vpx_config.h"
-void ${symbol:-rtcd}(void);
-
#ifdef RTCD_C
#include "vpx_ports/arm.h"
-void ${symbol:-rtcd}(void)
+static void setup_rtcd_internal(void)
{
int flags = arm_cpu_caps();
@@ -279,16 +278,34 @@ EOF
}
+mips() {
+ determine_indirection c $ALL_ARCHS
+ cat <<EOF
+$(common_top)
+#include "vpx_config.h"
+
+#ifdef RTCD_C
+static void setup_rtcd_internal(void)
+{
+$(set_function_pointers c $ALL_ARCHS)
+#if HAVE_DSPR2
+void dsputil_static_init();
+dsputil_static_init();
+#endif
+}
+#endif
+$(common_bottom)
+EOF
+}
+
unoptimized() {
determine_indirection c
cat <<EOF
$(common_top)
#include "vpx_config.h"
-void ${symbol:-rtcd}(void);
-
#ifdef RTCD_C
-void ${symbol:-rtcd}(void)
+static void setup_rtcd_internal(void)
{
$(set_function_pointers c)
}
@@ -312,6 +329,15 @@ case $arch in
require $(filter $REQUIRES)
x86
;;
+ mips32)
+ ALL_ARCHS=$(filter mips32)
+ dspr2=$([ -f "$config_file" ] && eval echo $(grep HAVE_DSPR2 "$config_file"))
+ HAVE_DSPR2="${dspr2#*=}"
+ if [ "$HAVE_DSPR2" = "yes" ]; then
+ ALL_ARCHS=$(filter mips32 dspr2)
+ fi
+ mips
+ ;;
armv5te)
ALL_ARCHS=$(filter edsp)
arm
diff --git a/configure b/configure
index 62e1ffb..be36e56 100755
--- a/configure
+++ b/configure
@@ -20,9 +20,10 @@ show_help(){
show_help_pre
cat << EOF
Advanced options:
- ${toggle_libs} don't build libraries
- ${toggle_examples} don't build examples
- ${toggle_unit_tests} build unit tests
+ ${toggle_libs} libraries
+ ${toggle_examples} examples
+ ${toggle_docs} documentation
+ ${toggle_unit_tests} unit tests
--libc=PATH path to alternate libc
--as={yasm|nasm|auto} use specified assembler [auto, yasm preferred]
--sdk-path=PATH path to root of sdk (iOS, android builds only)
@@ -209,6 +210,7 @@ ARCH_EXT_LIST="
neon
mips32
+ dspr2
mmx
sse
@@ -292,6 +294,7 @@ CMDLINE_SELECT="
libs
examples
+ docs
libc
as
fast_unaligned
@@ -453,7 +456,13 @@ process_detect() {
# Can only build shared libs on a subset of platforms. Doing this check
# here rather than at option parse time because the target auto-detect
# magic happens after the command line has been parsed.
- enabled linux || die "--enable-shared only supported on ELF for now"
+ if ! enabled linux; then
+ if enabled gnu; then
+ echo "--enable-shared is only supported on ELF; assuming this is OK"
+ else
+ die "--enable-shared only supported on ELF for now"
+ fi
+ fi
fi
if [ -z "$CC" ]; then
echo "Bypassing toolchain for environment detection."
@@ -531,7 +540,7 @@ process_toolchain() {
check_add_cflags -Wimplicit-function-declaration
check_add_cflags -Wuninitialized
check_add_cflags -Wunused-variable
- check_add_cflags -Wunused-but-set-variable
+ check_add_cflags -Wunused-but-set-variable
enabled extra_warnings || check_add_cflags -Wno-unused-function
fi
@@ -586,6 +595,25 @@ process_toolchain() {
if enabled postproc_visualizer; then
enabled postproc || die "postproc_visualizer requires postproc to be enabled"
fi
+
+ # Enable unit tests if we have a working C++ compiler
+ case "$toolchain" in
+ *-vs*)
+ soft_enable unit_tests
+ ;;
+ *-android-*)
+ # GTestLog must be modified to use Android logging utilities.
+ ;;
+ *-darwin-*)
+ # iOS/ARM builds do not work with gtest. This does not match
+ # x86 targets.
+ ;;
+ *)
+ check_cxx "$@" <<EOF && soft_enable unit_tests
+int z;
+EOF
+ ;;
+ esac
}
diff --git a/examples.mk b/examples.mk
index b93a16b..90913e6 100644
--- a/examples.mk
+++ b/examples.mk
@@ -98,7 +98,7 @@ vp8cx_set_ref.GUID = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame
# C file is provided, not generated automatically.
-GEN_EXAMPLES-$(CONFIG_MULTI_RES_ENCODING) += vp8_multi_resolution_encoder.c
+UTILS-$(CONFIG_MULTI_RES_ENCODING) += vp8_multi_resolution_encoder.c
vp8_multi_resolution_encoder.SRCS \
+= third_party/libyuv/include/libyuv/basic_types.h \
third_party/libyuv/include/libyuv/cpu_id.h \
diff --git a/examples/decoder_tmpl.txt b/examples/decoder_tmpl.txt
index 92a2c30..e652a63 100644
--- a/examples/decoder_tmpl.txt
+++ b/examples/decoder_tmpl.txt
@@ -48,8 +48,8 @@ for(plane=0; plane < 3; plane++) {
unsigned char *buf =img->planes[plane];
for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) {
- if(fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
- outfile));
+ (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
+ outfile);
buf += img->stride[plane];
}
}
diff --git a/examples/encoder_tmpl.c b/examples/encoder_tmpl.c
index cc70b00..e2b65ec 100644
--- a/examples/encoder_tmpl.c
+++ b/examples/encoder_tmpl.c
@@ -85,7 +85,7 @@ static void write_ivf_file_header(FILE *outfile,
mem_put_le32(header+24, frame_cnt); /* length */
mem_put_le32(header+28, 0); /* unused */
- if(fwrite(header, 1, 32, outfile));
+ (void) fwrite(header, 1, 32, outfile);
}
@@ -103,7 +103,7 @@ static void write_ivf_frame_header(FILE *outfile,
mem_put_le32(header+4, pts&0xFFFFFFFF);
mem_put_le32(header+8, pts >> 32);
- if(fwrite(header, 1, 12, outfile));
+ (void) fwrite(header, 1, 12, outfile);
}
int main(int argc, char **argv) {
diff --git a/examples/encoder_tmpl.txt b/examples/encoder_tmpl.txt
index 0042071..1afbd8b 100644
--- a/examples/encoder_tmpl.txt
+++ b/examples/encoder_tmpl.txt
@@ -61,13 +61,14 @@ if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt,
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME
case VPX_CODEC_CX_FRAME_PKT:
write_ivf_frame_header(outfile, pkt);
- if(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
- outfile));
+ (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
+ outfile);
break;
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
+vpx_img_free(&raw);
if(vpx_codec_destroy(&codec))
die_codec(&codec, "Failed to destroy codec");
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
diff --git a/examples/twopass_encoder.txt b/examples/twopass_encoder.txt
index 4683bc7..2f81a90 100644
--- a/examples/twopass_encoder.txt
+++ b/examples/twopass_encoder.txt
@@ -71,5 +71,17 @@ Pass Progress Reporting
It's sometimes helpful to see when each pass completes.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_LOOP_END
printf("Pass %d complete.\n", pass+1);
+ if(vpx_codec_destroy(&codec))
+ die_codec(&codec, "Failed to destroy codec");
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_LOOP_END
+
+
+Clean-up
+-----------------------------
+Destruction of the encoder instance must be done on each pass. The
+raw image should be destroyed at the end as usual.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
+vpx_img_free(&raw);
+free(stats.buf);
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
diff --git a/libmkv/EbmlIDs.h b/libmkv/EbmlIDs.h
index 3418e36..e3ce585 100644
--- a/libmkv/EbmlIDs.h
+++ b/libmkv/EbmlIDs.h
@@ -1,16 +1,16 @@
-// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
#ifndef MKV_DEFS_HPP
#define MKV_DEFS_HPP 1
-//Commenting out values not available in webm, but available in matroska
+/* Commenting out values not available in webm, but available in matroska */
enum mkv
{
@@ -22,7 +22,7 @@ enum mkv
DocType = 0x4282,
DocTypeVersion = 0x4287,
DocTypeReadVersion = 0x4285,
-// CRC_32 = 0xBF,
+/* CRC_32 = 0xBF, */
Void = 0xEC,
SignatureSlot = 0x1B538667,
SignatureAlgo = 0x7E8A,
@@ -32,61 +32,61 @@ enum mkv
SignatureElements = 0x7E5B,
SignatureElementList = 0x7E7B,
SignedElement = 0x6532,
- //segment
+ /* segment */
Segment = 0x18538067,
- //Meta Seek Information
+ /* Meta Seek Information */
SeekHead = 0x114D9B74,
Seek = 0x4DBB,
SeekID = 0x53AB,
SeekPosition = 0x53AC,
- //Segment Information
+ /* Segment Information */
Info = 0x1549A966,
-// SegmentUID = 0x73A4,
-// SegmentFilename = 0x7384,
-// PrevUID = 0x3CB923,
-// PrevFilename = 0x3C83AB,
-// NextUID = 0x3EB923,
-// NextFilename = 0x3E83BB,
-// SegmentFamily = 0x4444,
-// ChapterTranslate = 0x6924,
-// ChapterTranslateEditionUID = 0x69FC,
-// ChapterTranslateCodec = 0x69BF,
-// ChapterTranslateID = 0x69A5,
+/* SegmentUID = 0x73A4, */
+/* SegmentFilename = 0x7384, */
+/* PrevUID = 0x3CB923, */
+/* PrevFilename = 0x3C83AB, */
+/* NextUID = 0x3EB923, */
+/* NextFilename = 0x3E83BB, */
+/* SegmentFamily = 0x4444, */
+/* ChapterTranslate = 0x6924, */
+/* ChapterTranslateEditionUID = 0x69FC, */
+/* ChapterTranslateCodec = 0x69BF, */
+/* ChapterTranslateID = 0x69A5, */
TimecodeScale = 0x2AD7B1,
Segment_Duration = 0x4489,
DateUTC = 0x4461,
-// Title = 0x7BA9,
+/* Title = 0x7BA9, */
MuxingApp = 0x4D80,
WritingApp = 0x5741,
- //Cluster
+ /* Cluster */
Cluster = 0x1F43B675,
Timecode = 0xE7,
-// SilentTracks = 0x5854,
-// SilentTrackNumber = 0x58D7,
-// Position = 0xA7,
+/* SilentTracks = 0x5854, */
+/* SilentTrackNumber = 0x58D7, */
+/* Position = 0xA7, */
PrevSize = 0xAB,
BlockGroup = 0xA0,
Block = 0xA1,
-// BlockVirtual = 0xA2,
-// BlockAdditions = 0x75A1,
-// BlockMore = 0xA6,
-// BlockAddID = 0xEE,
-// BlockAdditional = 0xA5,
+/* BlockVirtual = 0xA2, */
+/* BlockAdditions = 0x75A1, */
+/* BlockMore = 0xA6, */
+/* BlockAddID = 0xEE, */
+/* BlockAdditional = 0xA5, */
BlockDuration = 0x9B,
-// ReferencePriority = 0xFA,
+/* ReferencePriority = 0xFA, */
ReferenceBlock = 0xFB,
-// ReferenceVirtual = 0xFD,
-// CodecState = 0xA4,
-// Slices = 0x8E,
-// TimeSlice = 0xE8,
+/* ReferenceVirtual = 0xFD, */
+/* CodecState = 0xA4, */
+/* Slices = 0x8E, */
+/* TimeSlice = 0xE8, */
LaceNumber = 0xCC,
-// FrameNumber = 0xCD,
-// BlockAdditionID = 0xCB,
-// MkvDelay = 0xCE,
-// Cluster_Duration = 0xCF,
+/* FrameNumber = 0xCD, */
+/* BlockAdditionID = 0xCB, */
+/* MkvDelay = 0xCE, */
+/* Cluster_Duration = 0xCF, */
SimpleBlock = 0xA3,
-// EncryptedBlock = 0xAF,
- //Track
+/* EncryptedBlock = 0xAF, */
+ /* Track */
Tracks = 0x1654AE6B,
TrackEntry = 0xAE,
TrackNumber = 0xD7,
@@ -96,28 +96,28 @@ enum mkv
FlagDefault = 0x88,
FlagForced = 0x55AA,
FlagLacing = 0x9C,
-// MinCache = 0x6DE7,
-// MaxCache = 0x6DF8,
+/* MinCache = 0x6DE7, */
+/* MaxCache = 0x6DF8, */
DefaultDuration = 0x23E383,
-// TrackTimecodeScale = 0x23314F,
-// TrackOffset = 0x537F,
-// MaxBlockAdditionID = 0x55EE,
+/* TrackTimecodeScale = 0x23314F, */
+/* TrackOffset = 0x537F, */
+/* MaxBlockAdditionID = 0x55EE, */
Name = 0x536E,
Language = 0x22B59C,
CodecID = 0x86,
CodecPrivate = 0x63A2,
CodecName = 0x258688,
-// AttachmentLink = 0x7446,
-// CodecSettings = 0x3A9697,
-// CodecInfoURL = 0x3B4040,
-// CodecDownloadURL = 0x26B240,
-// CodecDecodeAll = 0xAA,
-// TrackOverlay = 0x6FAB,
-// TrackTranslate = 0x6624,
-// TrackTranslateEditionUID = 0x66FC,
-// TrackTranslateCodec = 0x66BF,
-// TrackTranslateTrackID = 0x66A5,
- //video
+/* AttachmentLink = 0x7446, */
+/* CodecSettings = 0x3A9697, */
+/* CodecInfoURL = 0x3B4040, */
+/* CodecDownloadURL = 0x26B240, */
+/* CodecDecodeAll = 0xAA, */
+/* TrackOverlay = 0x6FAB, */
+/* TrackTranslate = 0x6624, */
+/* TrackTranslateEditionUID = 0x66FC, */
+/* TrackTranslateCodec = 0x66BF, */
+/* TrackTranslateTrackID = 0x66A5, */
+ /* video */
Video = 0xE0,
FlagInterlaced = 0x9A,
StereoMode = 0x53B8,
@@ -131,101 +131,101 @@ enum mkv
DisplayHeight = 0x54BA,
DisplayUnit = 0x54B2,
AspectRatioType = 0x54B3,
-// ColourSpace = 0x2EB524,
-// GammaValue = 0x2FB523,
+/* ColourSpace = 0x2EB524, */
+/* GammaValue = 0x2FB523, */
FrameRate = 0x2383E3,
- //end video
- //audio
+ /* end video */
+ /* audio */
Audio = 0xE1,
SamplingFrequency = 0xB5,
OutputSamplingFrequency = 0x78B5,
Channels = 0x9F,
-// ChannelPositions = 0x7D7B,
+/* ChannelPositions = 0x7D7B, */
BitDepth = 0x6264,
- //end audio
- //content encoding
-// ContentEncodings = 0x6d80,
-// ContentEncoding = 0x6240,
-// ContentEncodingOrder = 0x5031,
-// ContentEncodingScope = 0x5032,
-// ContentEncodingType = 0x5033,
-// ContentCompression = 0x5034,
-// ContentCompAlgo = 0x4254,
-// ContentCompSettings = 0x4255,
-// ContentEncryption = 0x5035,
-// ContentEncAlgo = 0x47e1,
-// ContentEncKeyID = 0x47e2,
-// ContentSignature = 0x47e3,
-// ContentSigKeyID = 0x47e4,
-// ContentSigAlgo = 0x47e5,
-// ContentSigHashAlgo = 0x47e6,
- //end content encoding
- //Cueing Data
+ /* end audio */
+ /* content encoding */
+/* ContentEncodings = 0x6d80, */
+/* ContentEncoding = 0x6240, */
+/* ContentEncodingOrder = 0x5031, */
+/* ContentEncodingScope = 0x5032, */
+/* ContentEncodingType = 0x5033, */
+/* ContentCompression = 0x5034, */
+/* ContentCompAlgo = 0x4254, */
+/* ContentCompSettings = 0x4255, */
+/* ContentEncryption = 0x5035, */
+/* ContentEncAlgo = 0x47e1, */
+/* ContentEncKeyID = 0x47e2, */
+/* ContentSignature = 0x47e3, */
+/* ContentSigKeyID = 0x47e4, */
+/* ContentSigAlgo = 0x47e5, */
+/* ContentSigHashAlgo = 0x47e6, */
+ /* end content encoding */
+ /* Cueing Data */
Cues = 0x1C53BB6B,
CuePoint = 0xBB,
CueTime = 0xB3,
CueTrackPositions = 0xB7,
CueTrack = 0xF7,
CueClusterPosition = 0xF1,
- CueBlockNumber = 0x5378,
-// CueCodecState = 0xEA,
-// CueReference = 0xDB,
-// CueRefTime = 0x96,
-// CueRefCluster = 0x97,
-// CueRefNumber = 0x535F,
-// CueRefCodecState = 0xEB,
- //Attachment
-// Attachments = 0x1941A469,
-// AttachedFile = 0x61A7,
-// FileDescription = 0x467E,
-// FileName = 0x466E,
-// FileMimeType = 0x4660,
-// FileData = 0x465C,
-// FileUID = 0x46AE,
-// FileReferral = 0x4675,
- //Chapters
-// Chapters = 0x1043A770,
-// EditionEntry = 0x45B9,
-// EditionUID = 0x45BC,
-// EditionFlagHidden = 0x45BD,
-// EditionFlagDefault = 0x45DB,
-// EditionFlagOrdered = 0x45DD,
-// ChapterAtom = 0xB6,
-// ChapterUID = 0x73C4,
-// ChapterTimeStart = 0x91,
-// ChapterTimeEnd = 0x92,
-// ChapterFlagHidden = 0x98,
-// ChapterFlagEnabled = 0x4598,
-// ChapterSegmentUID = 0x6E67,
-// ChapterSegmentEditionUID = 0x6EBC,
-// ChapterPhysicalEquiv = 0x63C3,
-// ChapterTrack = 0x8F,
-// ChapterTrackNumber = 0x89,
-// ChapterDisplay = 0x80,
-// ChapString = 0x85,
-// ChapLanguage = 0x437C,
-// ChapCountry = 0x437E,
-// ChapProcess = 0x6944,
-// ChapProcessCodecID = 0x6955,
-// ChapProcessPrivate = 0x450D,
-// ChapProcessCommand = 0x6911,
-// ChapProcessTime = 0x6922,
-// ChapProcessData = 0x6933,
- //Tagging
-// Tags = 0x1254C367,
-// Tag = 0x7373,
-// Targets = 0x63C0,
-// TargetTypeValue = 0x68CA,
-// TargetType = 0x63CA,
-// Tagging_TrackUID = 0x63C5,
-// Tagging_EditionUID = 0x63C9,
-// Tagging_ChapterUID = 0x63C4,
-// AttachmentUID = 0x63C6,
-// SimpleTag = 0x67C8,
-// TagName = 0x45A3,
-// TagLanguage = 0x447A,
-// TagDefault = 0x4484,
-// TagString = 0x4487,
-// TagBinary = 0x4485,
+ CueBlockNumber = 0x5378
+/* CueCodecState = 0xEA, */
+/* CueReference = 0xDB, */
+/* CueRefTime = 0x96, */
+/* CueRefCluster = 0x97, */
+/* CueRefNumber = 0x535F, */
+/* CueRefCodecState = 0xEB, */
+ /* Attachment */
+/* Attachments = 0x1941A469, */
+/* AttachedFile = 0x61A7, */
+/* FileDescription = 0x467E, */
+/* FileName = 0x466E, */
+/* FileMimeType = 0x4660, */
+/* FileData = 0x465C, */
+/* FileUID = 0x46AE, */
+/* FileReferral = 0x4675, */
+ /* Chapters */
+/* Chapters = 0x1043A770, */
+/* EditionEntry = 0x45B9, */
+/* EditionUID = 0x45BC, */
+/* EditionFlagHidden = 0x45BD, */
+/* EditionFlagDefault = 0x45DB, */
+/* EditionFlagOrdered = 0x45DD, */
+/* ChapterAtom = 0xB6, */
+/* ChapterUID = 0x73C4, */
+/* ChapterTimeStart = 0x91, */
+/* ChapterTimeEnd = 0x92, */
+/* ChapterFlagHidden = 0x98, */
+/* ChapterFlagEnabled = 0x4598, */
+/* ChapterSegmentUID = 0x6E67, */
+/* ChapterSegmentEditionUID = 0x6EBC, */
+/* ChapterPhysicalEquiv = 0x63C3, */
+/* ChapterTrack = 0x8F, */
+/* ChapterTrackNumber = 0x89, */
+/* ChapterDisplay = 0x80, */
+/* ChapString = 0x85, */
+/* ChapLanguage = 0x437C, */
+/* ChapCountry = 0x437E, */
+/* ChapProcess = 0x6944, */
+/* ChapProcessCodecID = 0x6955, */
+/* ChapProcessPrivate = 0x450D, */
+/* ChapProcessCommand = 0x6911, */
+/* ChapProcessTime = 0x6922, */
+/* ChapProcessData = 0x6933, */
+ /* Tagging */
+/* Tags = 0x1254C367, */
+/* Tag = 0x7373, */
+/* Targets = 0x63C0, */
+/* TargetTypeValue = 0x68CA, */
+/* TargetType = 0x63CA, */
+/* Tagging_TrackUID = 0x63C5, */
+/* Tagging_EditionUID = 0x63C9, */
+/* Tagging_ChapterUID = 0x63C4, */
+/* AttachmentUID = 0x63C6, */
+/* SimpleTag = 0x67C8, */
+/* TagName = 0x45A3, */
+/* TagLanguage = 0x447A, */
+/* TagDefault = 0x4484, */
+/* TagString = 0x4487, */
+/* TagBinary = 0x4485, */
};
#endif
diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c
index fbf2c66..d70f06e 100644
--- a/libmkv/EbmlWriter.c
+++ b/libmkv/EbmlWriter.c
@@ -1,12 +1,12 @@
-// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
#include "EbmlWriter.h"
#include <stdlib.h>
#include <wchar.h>
@@ -18,11 +18,13 @@
#define LITERALU64(n) n##LLU
#endif
-void Ebml_WriteLen(EbmlGlobal *glob, long long val)
+void Ebml_WriteLen(EbmlGlobal *glob, int64_t val)
{
- //TODO check and make sure we are not > than 0x0100000000000000LLU
- unsigned char size = 8; //size in bytes to output
- unsigned long long minVal = LITERALU64(0x00000000000000ff); //mask to compare for byte size
+ /* TODO check and make sure we are not > than 0x0100000000000000LLU */
+ unsigned char size = 8; /* size in bytes to output */
+
+ /* mask to compare for byte size */
+ int64_t minVal = 0xff;
for (size = 1; size < 8; size ++)
{
@@ -32,7 +34,7 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val)
minVal = (minVal << 7);
}
- val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7));
+ val |= (((uint64_t)0x80) << ((size - 1) * 7));
Ebml_Serialize(glob, (void *) &val, sizeof(val), size);
}
@@ -40,23 +42,25 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val)
void Ebml_WriteString(EbmlGlobal *glob, const char *str)
{
const size_t size_ = strlen(str);
- const unsigned long long size = size_;
+ const uint64_t size = size_;
Ebml_WriteLen(glob, size);
- //TODO: it's not clear from the spec whether the nul terminator
- //should be serialized too. For now we omit the null terminator.
- Ebml_Write(glob, str, size);
+ /* TODO: it's not clear from the spec whether the nul terminator
+ * should be serialized too. For now we omit the null terminator.
+ */
+ Ebml_Write(glob, str, (unsigned long)size);
}
void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr)
{
const size_t strlen = wcslen(wstr);
- //TODO: it's not clear from the spec whether the nul terminator
- //should be serialized too. For now we include it.
- const unsigned long long size = strlen;
+ /* TODO: it's not clear from the spec whether the nul terminator
+ * should be serialized too. For now we include it.
+ */
+ const uint64_t size = strlen;
Ebml_WriteLen(glob, size);
- Ebml_Write(glob, wstr, size);
+ Ebml_Write(glob, wstr, (unsigned long)size);
}
void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id)
@@ -85,12 +89,12 @@ void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t
void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui)
{
- unsigned char size = 8; //size in bytes to output
+ unsigned char size = 8; /* size in bytes to output */
unsigned char sizeSerialized = 0;
unsigned long minVal;
Ebml_WriteID(glob, class_id);
- minVal = 0x7fLU; //mask to compare for byte size
+ minVal = 0x7fLU; /* mask to compare for byte size */
for (size = 1; size < 4; size ++)
{
@@ -106,7 +110,7 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l
Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
Ebml_Serialize(glob, &ui, sizeof(ui), size);
}
-//TODO: perhaps this is a poor name for this id serializer helper function
+/* TODO: perhaps this is a poor name for this id serializer helper function */
void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin)
{
int size;
@@ -168,4 +172,4 @@ void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize)
}
}
-//TODO Serialize Date
+/* TODO Serialize Date */
diff --git a/libmkv/EbmlWriter.h b/libmkv/EbmlWriter.h
index 324c9bc..b94f757 100644
--- a/libmkv/EbmlWriter.h
+++ b/libmkv/EbmlWriter.h
@@ -1,26 +1,30 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
#ifndef EBMLWRITER_HPP
#define EBMLWRITER_HPP
-
-// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-//note: you must define write and serialize functions as well as your own EBML_GLOBAL
-//These functions MUST be implemented
#include <stddef.h>
#include "vpx/vpx_integer.h"
+/* note: you must define write and serialize functions as well as your own
+ * EBML_GLOBAL
+ *
+ * These functions MUST be implemented
+ */
+
typedef struct EbmlGlobal EbmlGlobal;
void Ebml_Serialize(EbmlGlobal *glob, const void *, int, unsigned long);
void Ebml_Write(EbmlGlobal *glob, const void *, unsigned long);
-/////
+/*****/
-void Ebml_WriteLen(EbmlGlobal *glob, long long val);
+void Ebml_WriteLen(EbmlGlobal *glob, int64_t val);
void Ebml_WriteString(EbmlGlobal *glob, const char *str);
void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr);
void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id);
@@ -28,11 +32,11 @@ void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t
void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d);
-//TODO make this more generic to signed
+/* TODO make this more generic to signed */
void Ebml_WriteSigned16(EbmlGlobal *glob, short val);
void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s);
void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s);
void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length);
void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize);
-//TODO need date function
+/* TODO need date function */
#endif
diff --git a/libs.mk b/libs.mk
index e2ba737..4115dd8 100644
--- a/libs.mk
+++ b/libs.mk
@@ -20,8 +20,16 @@ endif
CODEC_SRCS-yes += CHANGELOG
CODEC_SRCS-yes += libs.mk
+# If this is a universal (fat) binary, then all the subarchitectures have
+# already been built and our job is to stitch them together. The
+# BUILD_LIBVPX variable indicates whether we should be building
+# (compiling, linking) the library. The LIPO_LIBVPX variable indicates
+# that we're stitching.
+$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)
+
include $(SRC_PATH_BARE)/vpx/vpx_codec.mk
CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS))
+CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS))
include $(SRC_PATH_BARE)/vpx_mem/vpx_mem.mk
CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS))
@@ -29,17 +37,17 @@ CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS))
include $(SRC_PATH_BARE)/vpx_scale/vpx_scale.mk
CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS))
+include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk
+CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS))
+
ifeq ($(CONFIG_VP8_ENCODER),yes)
VP8_PREFIX=vp8/
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx.mk
CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS))
CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS))
- CODEC_SRCS-yes += $(VP8_PREFIX)vp8cx.mk vpx/vp8.h vpx/vp8cx.h
- CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp8cx_arm.mk
INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
- CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
CODEC_DOC_SECTIONS += vp8 vp8_encoder
endif
@@ -48,10 +56,8 @@ ifeq ($(CONFIG_VP8_DECODER),yes)
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx.mk
CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS))
CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS))
- CODEC_SRCS-yes += $(VP8_PREFIX)vp8dx.mk vpx/vp8.h vpx/vp8dx.h
INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h
INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
- CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h
CODEC_DOC_SECTIONS += vp8 vp8_decoder
endif
@@ -66,6 +72,7 @@ endif
ifeq ($(CONFIG_MSVS),yes)
CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd)
+GTEST_LIB=$(if $(CONFIG_STATIC_MSVCRT),gtestmt,gtestmd)
# This variable uses deferred expansion intentionally, since the results of
# $(wildcard) may change during the course of the Make.
VS_PLATFORMS = $(foreach d,$(wildcard */Release/$(CODEC_LIB).lib),$(word 1,$(subst /, ,$(d))))
@@ -82,29 +89,10 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Release/%)
INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Debug/%)
endif
-# If this is a universal (fat) binary, then all the subarchitectures have
-# already been built and our job is to stitch them together. The
-# BUILD_LIBVPX variable indicates whether we should be building
-# (compiling, linking) the library. The LIPO_LIBVPX variable indicates
-# that we're stitching.
-$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)
-
CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.sh
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx/vpx_integer.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/asm_offsets.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_timer.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem.h
CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
-ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emms.asm
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c
-endif
-CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c
-CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm.h
CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
@@ -146,7 +134,7 @@ ifeq ($(CONFIG_MSVS),yes)
obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c
@cp $(SRC_PATH_BARE)/build/x86-msvs/obj_int_extract.bat .
@echo " [CREATE] $@"
- $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
+ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
--exe \
--target=$(TOOLCHAIN) \
--name=obj_int_extract \
@@ -162,14 +150,14 @@ PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.bat
vpx.def: $(call enabled,CODEC_EXPORTS)
@echo " [CREATE] $@"
- $(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\
+ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\
--name=vpx\
--out=$@ $^
CLEAN-OBJS += vpx.def
vpx.vcproj: $(CODEC_SRCS) vpx.def
@echo " [CREATE] $@"
- $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
+ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
--lib \
--target=$(TOOLCHAIN) \
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
@@ -242,6 +230,7 @@ vpx.pc: config.mk libs.mk
$(qexec)echo 'Requires:' >> $@
$(qexec)echo 'Conflicts:' >> $@
$(qexec)echo 'Libs: -L$${libdir} -lvpx' >> $@
+ $(qexec)echo 'Libs.private: -lm -lpthread' >> $@
$(qexec)echo 'Cflags: -I$${includedir}' >> $@
INSTALL-LIBS-yes += $(LIBSUBDIR)/pkgconfig/vpx.pc
INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
@@ -284,38 +273,44 @@ OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
$(BUILD_PFX)asm_com_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S
- LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
+ @echo " [CREATE] $@"
+ $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
$(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S: $(VP8_PREFIX)common/asm_com_offsets.c
CLEAN-OBJS += $(BUILD_PFX)asm_com_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S
$(BUILD_PFX)asm_enc_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S
- LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
+ @echo " [CREATE] $@"
+ $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
$(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S: $(VP8_PREFIX)encoder/asm_enc_offsets.c
CLEAN-OBJS += $(BUILD_PFX)asm_enc_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S
$(BUILD_PFX)asm_dec_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S
- LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
+ @echo " [CREATE] $@"
+ $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
$(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S: $(VP8_PREFIX)decoder/asm_dec_offsets.c
CLEAN-OBJS += $(BUILD_PFX)asm_dec_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S
else
ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
asm_com_offsets.asm: obj_int_extract
asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o
- ./obj_int_extract rvds $< $(ADS2GAS) > $@
+ @echo " [CREATE] $@"
+ $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o
CLEAN-OBJS += asm_com_offsets.asm
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm
asm_enc_offsets.asm: obj_int_extract
asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
- ./obj_int_extract rvds $< $(ADS2GAS) > $@
+ @echo " [CREATE] $@"
+ $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
CLEAN-OBJS += asm_enc_offsets.asm
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm
asm_dec_offsets.asm: obj_int_extract
asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
- ./obj_int_extract rvds $< $(ADS2GAS) > $@
+ @echo " [CREATE] $@"
+ $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
CLEAN-OBJS += asm_dec_offsets.asm
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm
@@ -328,7 +323,6 @@ CLEAN-OBJS += $(BUILD_PFX)vpx_version.h
#
# Rule to generate runtime cpu detection files
#
-$(OBJS-yes:.o=.d): $(BUILD_PFX)vpx_rtcd.h
$(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS)))
@echo " [CREATE] $@"
$(qexec)$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$(TGT_ISA) \
@@ -337,25 +331,43 @@ $(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_S
$(RTCD_OPTIONS) $^ > $@
CLEAN-OBJS += $(BUILD_PFX)vpx_rtcd.h
-CODEC_DOC_SRCS += vpx/vpx_codec.h \
- vpx/vpx_decoder.h \
- vpx/vpx_encoder.h \
- vpx/vpx_image.h
-
##
## libvpx test directives
##
-
ifeq ($(CONFIG_UNIT_TESTS),yes)
+LIBVPX_TEST_DATA_PATH ?= .
+
+include $(SRC_PATH_BARE)/test/test.mk
+LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS))
+LIBVPX_TEST_BINS=./test_libvpx
+LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
+ $(call enabled,LIBVPX_TEST_DATA))
+libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)
+
+$(LIBVPX_TEST_DATA):
+ @echo " [DOWNLOAD] $@"
+ $(qexec)trap 'rm -f $@' INT TERM &&\
+ curl -L -o $@ $(call libvpx_test_data_url,$(@F))
+
+testdata:: $(LIBVPX_TEST_DATA)
+ $(qexec)if [ -x "$$(which sha1sum)" ]; then\
+ echo "Checking test data:";\
+ if [ -n "$(LIBVPX_TEST_DATA)" ]; then\
+ for f in $(call enabled,LIBVPX_TEST_DATA); do\
+ grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\
+ (cd $(LIBVPX_TEST_DATA_PATH); sha1sum -c);\
+ done; \
+ fi; \
+ else\
+ echo "Skipping test data integrity check, sha1sum not found.";\
+ fi
+
ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
ifeq ($(CONFIG_MSVS),yes)
-LIBVPX_TEST_SRCS=$(filter %_test.cc,$(call enabled,CODEC_SRCS))
-LIBVPX_TEST_BINS=$(sort $(LIBVPX_TEST_SRCS:.cc.o=))
-
gtest.vcproj: $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc
@echo " [CREATE] $@"
- $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
+ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
--lib \
--target=$(TOOLCHAIN) \
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
@@ -368,27 +380,22 @@ gtest.vcproj: $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc
PROJECTS-$(CONFIG_MSVS) += gtest.vcproj
-define unit_test_vcproj_template
-$(notdir $(1:.cc=.vcproj)): $(SRC_PATH_BARE)/$(1)
- @echo " [vcproj] $$@"
- $$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh\
- --exe\
- --target=$$(TOOLCHAIN)\
- --name=$(notdir $(1:.cc=))\
- --ver=$$(CONFIG_VS_VERSION)\
- $$(if $$(CONFIG_STATIC_MSVCRT),--static-crt) \
- --out=$$@ $$(INTERNAL_CFLAGS) $$(CFLAGS) \
+test_libvpx.vcproj: $(LIBVPX_TEST_SRCS)
+ @echo " [CREATE] $@"
+ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
+ --exe \
+ --target=$(TOOLCHAIN) \
+ --name=test_libvpx \
+ --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \
+ --ver=$(CONFIG_VS_VERSION) \
+ $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
+ --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
-I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
- -L. -lvpxmt -lwinmm -lgtestmt $$^
-endef
+ -L. -l$(CODEC_LIB) -lwinmm -l$(GTEST_LIB) $^
-$(foreach proj,$(LIBVPX_TEST_BINS),\
- $(eval $(call unit_test_vcproj_template,$(proj))))
+PROJECTS-$(CONFIG_MSVS) += test_libvpx.vcproj
-PROJECTS-$(CONFIG_MSVS) += $(foreach proj,$(LIBVPX_TEST_BINS),\
- $(notdir $(proj:.cc=.vcproj)))
-
-test::
+test:: testdata
@set -e; for t in $(addprefix Win32/Release/,$(notdir $(LIBVPX_TEST_BINS:.cc=.exe))); do $$t; done
endif
else
@@ -396,28 +403,35 @@ else
include $(SRC_PATH_BARE)/third_party/googletest/gtest.mk
GTEST_SRCS := $(addprefix third_party/googletest/src/,$(call enabled,GTEST_SRCS))
GTEST_OBJS=$(call objs,$(GTEST_SRCS))
-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
+$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
+$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
$(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS)
-LIBVPX_TEST_SRCS=$(filter %_test.cc,$(call enabled,CODEC_SRCS))
-LIBVPX_TEST_OBJS=$(call objs,$(LIBVPX_TEST_SRCS))
-$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
-$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
-LIBVPX_TEST_BINS=$(sort $(LIBVPX_TEST_OBJS:.cc.o=))
+LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS)))
+$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
+$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
OBJS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_OBJS)
+BINS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_BINS)
+
+# Install test sources only if codec source is included
+INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\
+ $(shell find $(SRC_PATH_BARE)/third_party/googletest -type f))
+INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS)
+CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
+CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a)
$(foreach bin,$(LIBVPX_TEST_BINS),\
- $(if $(BUILD_LIBVPX),$(eval $(bin): libvpx.a libgtest.a ))\
+ $(if $(BUILD_LIBVPX),$(eval $(bin): \
+ lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
$(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
- $(bin).cc.o \
+ $(LIBVPX_TEST_OBJS) \
-L. -lvpx -lgtest -lpthread -lm)\
)))\
$(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\
-test:: $(LIBVPX_TEST_BINS)
+test:: $(LIBVPX_TEST_BINS) testdata
@set -e; for t in $(LIBVPX_TEST_BINS); do $$t; done
endif
@@ -435,3 +449,6 @@ libs.doxy: $(CODEC_DOC_SRCS)
@echo "PREDEFINED = VPX_CODEC_DISABLE_COMPAT" >> $@
@echo "INCLUDE_PATH += ." >> $@;
@echo "ENABLED_SECTIONS += $(sort $(CODEC_DOC_SECTIONS))" >> $@
+
+## Generate vpx_rtcd.h for all objects
+$(OBJS-yes:.o=.d): $(BUILD_PFX)vpx_rtcd.h
diff --git a/nestegg/src/nestegg.c b/nestegg/src/nestegg.c
index 63a0e83..cc87788 100644
--- a/nestegg/src/nestegg.c
+++ b/nestegg/src/nestegg.c
@@ -1272,7 +1272,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
if (total > block_size)
return -1;
- entry = ne_find_track_entry(ctx, track - 1);
+ entry = ne_find_track_entry(ctx, (unsigned int)(track - 1));
if (!entry)
return -1;
@@ -1291,7 +1291,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
pkt = ne_alloc(sizeof(*pkt));
pkt->track = track - 1;
- pkt->timecode = abs_timecode * tc_scale * track_scale;
+ pkt->timecode = (uint64_t)(abs_timecode * tc_scale * track_scale);
ctx->log(ctx, NESTEGG_LOG_DEBUG, "%sblock t %lld pts %f f %llx frames: %llu",
block_id == ID_BLOCK ? "" : "simple", pkt->track, pkt->timecode / 1e9, flags, frames);
@@ -1774,35 +1774,35 @@ nestegg_track_video_params(nestegg * ctx, unsigned int track,
if (ne_get_uint(entry->video.pixel_width, &value) != 0)
return -1;
- params->width = value;
+ params->width = (unsigned int)value;
if (ne_get_uint(entry->video.pixel_height, &value) != 0)
return -1;
- params->height = value;
+ params->height = (unsigned int)value;
value = 0;
ne_get_uint(entry->video.pixel_crop_bottom, &value);
- params->crop_bottom = value;
+ params->crop_bottom = (unsigned int)value;
value = 0;
ne_get_uint(entry->video.pixel_crop_top, &value);
- params->crop_top = value;
+ params->crop_top = (unsigned int)value;
value = 0;
ne_get_uint(entry->video.pixel_crop_left, &value);
- params->crop_left = value;
+ params->crop_left = (unsigned int)value;
value = 0;
ne_get_uint(entry->video.pixel_crop_right, &value);
- params->crop_right = value;
+ params->crop_right = (unsigned int)value;
value = params->width;
ne_get_uint(entry->video.display_width, &value);
- params->display_width = value;
+ params->display_width = (unsigned int)value;
value = params->height;
ne_get_uint(entry->video.display_height, &value);
- params->display_height = value;
+ params->display_height = (unsigned int)value;
return 0;
}
@@ -1828,11 +1828,11 @@ nestegg_track_audio_params(nestegg * ctx, unsigned int track,
value = 1;
ne_get_uint(entry->audio.channels, &value);
- params->channels = value;
+ params->channels = (unsigned int)value;
value = 16;
ne_get_uint(entry->audio.bit_depth, &value);
- params->depth = value;
+ params->depth = (unsigned int)value;
return 0;
}
@@ -1888,7 +1888,7 @@ nestegg_free_packet(nestegg_packet * pkt)
int
nestegg_packet_track(nestegg_packet * pkt, unsigned int * track)
{
- *track = pkt->track;
+ *track = (unsigned int)pkt->track;
return 0;
}
diff --git a/solution.mk b/solution.mk
index 2de1d8d..948305f 100644
--- a/solution.mk
+++ b/solution.mk
@@ -8,18 +8,19 @@
## be found in the AUTHORS file in the root of the source tree.
##
+# libvpx reverse dependencies (targets that depend on libvpx)
+VPX_NONDEPS=$(addsuffix .vcproj,vpx gtest obj_int_extract)
+VPX_RDEPS=$(foreach vcp,\
+ $(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.vcproj=):vpx)
vpx.sln: $(wildcard *.vcproj)
@echo " [CREATE] $@"
$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
- $(if $(filter %vpx.vcproj,$^),\
- $(foreach vcp,$(filter-out %vpx.vcproj %gtest.vcproj %obj_int_extract.vcproj,$^),\
- --dep=$(vcp:.vcproj=):vpx) \
- $(foreach vcp,$(filter %_test.vcproj,$^),\
- --dep=$(vcp:.vcproj=):gtest)) \
- --dep=vpx:obj_int_extract \
- --ver=$(CONFIG_VS_VERSION)\
- --out=$@ $^
+ $(if $(filter vpx.vcproj,$^),$(VPX_RDEPS)) \
+ --dep=vpx:obj_int_extract \
+ --dep=test_libvpx:gtest \
+ --ver=$(CONFIG_VS_VERSION)\
+ --out=$@ $^
vpx.sln.mk: vpx.sln
@true
diff --git a/test/acm_random.h b/test/acm_random.h
new file mode 100644
index 0000000..514894e
--- /dev/null
+++ b/test/acm_random.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef LIBVPX_TEST_ACM_RANDOM_H_
+#define LIBVPX_TEST_ACM_RANDOM_H_
+
+#include <stdlib.h>
+
+#include "vpx/vpx_integer.h"
+
+namespace libvpx_test {
+
+class ACMRandom {
+ public:
+ ACMRandom() {
+ Reset(DeterministicSeed());
+ }
+
+ explicit ACMRandom(int seed) {
+ Reset(seed);
+ }
+
+ void Reset(int seed) {
+ srand(seed);
+ }
+
+ uint8_t Rand8(void) {
+ return (rand() >> 8) & 0xff;
+ }
+
+ int PseudoUniform(int range) {
+ return (rand() >> 8) % range;
+ }
+
+ int operator()(int n) {
+ return PseudoUniform(n);
+ }
+
+ static int DeterministicSeed(void) {
+ return 0xbaba;
+ }
+};
+
+} // namespace libvpx_test
+
+#endif // LIBVPX_TEST_ACM_RANDOM_H_
diff --git a/test/altref_test.cc b/test/altref_test.cc
new file mode 100644
index 0000000..ca05577
--- /dev/null
+++ b/test/altref_test.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+
+namespace {
+
+// lookahead range: [kLookAheadMin, kLookAheadMax).
+const int kLookAheadMin = 5;
+const int kLookAheadMax = 26;
+
+class AltRefTest : public libvpx_test::EncoderTest,
+ public ::testing::TestWithParam<int> {
+ protected:
+ AltRefTest() : altref_count_(0) {}
+ virtual ~AltRefTest() {}
+
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(libvpx_test::kTwoPassGood);
+ }
+
+ virtual void BeginPassHook(unsigned int pass) {
+ altref_count_ = 0;
+ }
+
+ virtual bool Continue() const {
+ return !HasFatalFailure() && !abort_;
+ }
+
+ virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+ libvpx_test::Encoder *encoder) {
+ if (video->frame() == 1) {
+ encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+ encoder->Control(VP8E_SET_CPUUSED, 3);
+ }
+ }
+
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+ if (pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE) ++altref_count_;
+ }
+
+ int altref_count() const { return altref_count_; }
+
+ private:
+ int altref_count_;
+};
+
+TEST_P(AltRefTest, MonotonicTimestamps) {
+ const vpx_rational timebase = { 33333333, 1000000000 };
+ cfg_.g_timebase = timebase;
+ cfg_.rc_target_bitrate = 1000;
+ cfg_.g_lag_in_frames = GetParam();
+
+ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ timebase.den, timebase.num, 0, 30);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ EXPECT_GE(altref_count(), 1);
+}
+
+INSTANTIATE_TEST_CASE_P(NonZeroLag, AltRefTest,
+ ::testing::Range(kLookAheadMin, kLookAheadMax));
+} // namespace
diff --git a/test/boolcoder_test.cc b/test/boolcoder_test.cc
new file mode 100644
index 0000000..4e21be8
--- /dev/null
+++ b/test/boolcoder_test.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+extern "C" {
+#include "vp8/encoder/boolhuff.h"
+#include "vp8/decoder/dboolhuff.h"
+}
+
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "test/acm_random.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vpx/vpx_integer.h"
+
+namespace {
+const int num_tests = 10;
+} // namespace
+
+using libvpx_test::ACMRandom;
+
+TEST(VP8, TestBitIO) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ for (int n = 0; n < num_tests; ++n) {
+ for (int method = 0; method <= 7; ++method) { // we generate various proba
+ const int bits_to_test = 1000;
+ uint8_t probas[bits_to_test];
+
+ for (int i = 0; i < bits_to_test; ++i) {
+ const int parity = i & 1;
+ probas[i] =
+ (method == 0) ? 0 : (method == 1) ? 255 :
+ (method == 2) ? 128 :
+ (method == 3) ? rnd.Rand8() :
+ (method == 4) ? (parity ? 0 : 255) :
+ // alternate between low and high proba:
+ (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) :
+ (method == 6) ?
+ (parity ? rnd(64) : 255 - rnd(64)) :
+ (parity ? rnd(32) : 255 - rnd(32));
+ }
+ for (int bit_method = 0; bit_method <= 3; ++bit_method) {
+ const int random_seed = 6432;
+ const int buffer_size = 10000;
+ ACMRandom bit_rnd(random_seed);
+ BOOL_CODER bw;
+ uint8_t bw_buffer[buffer_size];
+ vp8_start_encode(&bw, bw_buffer, bw_buffer + buffer_size);
+
+ int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
+ for (int i = 0; i < bits_to_test; ++i) {
+ if (bit_method == 2) {
+ bit = (i & 1);
+ } else if (bit_method == 3) {
+ bit = bit_rnd(2);
+ }
+ vp8_encode_bool(&bw, bit, static_cast<int>(probas[i]));
+ }
+
+ vp8_stop_encode(&bw);
+
+ BOOL_DECODER br;
+ vp8dx_start_decode(&br, bw_buffer, buffer_size);
+ bit_rnd.Reset(random_seed);
+ for (int i = 0; i < bits_to_test; ++i) {
+ if (bit_method == 2) {
+ bit = (i & 1);
+ } else if (bit_method == 3) {
+ bit = bit_rnd(2);
+ }
+ GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit)
+ << "pos: "<< i << " / " << bits_to_test
+ << " bit_method: " << bit_method
+ << " method: " << method;
+ }
+ }
+ }
+ }
+}
diff --git a/test/config_test.cc b/test/config_test.cc
new file mode 100644
index 0000000..c4da46e
--- /dev/null
+++ b/test/config_test.cc
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/encode_test_driver.h"
+#include "test/video_source.h"
+
+namespace {
+
+class ConfigTest : public ::libvpx_test::EncoderTest,
+ public ::testing::TestWithParam<enum libvpx_test::TestMode> {
+ public:
+ ConfigTest() : frame_count_in_(0), frame_count_out_(0), frame_count_max_(0) {}
+
+ protected:
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(GetParam());
+ }
+
+ virtual void BeginPassHook(unsigned int /*pass*/) {
+ frame_count_in_ = 0;
+ frame_count_out_ = 0;
+ }
+
+ virtual void PreEncodeFrameHook(libvpx_test::VideoSource* /*video*/) {
+ ++frame_count_in_;
+ abort_ |= (frame_count_in_ >= frame_count_max_);
+ }
+
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t* /*pkt*/) {
+ ++frame_count_out_;
+ }
+
+ virtual bool Continue() const {
+ return !HasFatalFailure() && !abort_;
+ }
+
+ unsigned int frame_count_in_;
+ unsigned int frame_count_out_;
+ unsigned int frame_count_max_;
+};
+
+TEST_P(ConfigTest, LagIsDisabled) {
+ frame_count_max_ = 2;
+ cfg_.g_lag_in_frames = 15;
+
+ libvpx_test::DummyVideoSource video;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ EXPECT_EQ(frame_count_in_, frame_count_out_);
+}
+
+INSTANTIATE_TEST_CASE_P(OnePassModes, ConfigTest, ONE_PASS_TEST_MODES);
+} // namespace
diff --git a/test/cq_test.cc b/test/cq_test.cc
new file mode 100644
index 0000000..42ee2a2
--- /dev/null
+++ b/test/cq_test.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <cmath>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+
+// CQ level range: [kCQLevelMin, kCQLevelMax).
+const int kCQLevelMin = 4;
+const int kCQLevelMax = 63;
+const int kCQLevelStep = 8;
+const int kCQTargetBitrate = 2000;
+
+namespace {
+
+class CQTest : public libvpx_test::EncoderTest,
+ public ::testing::TestWithParam<int> {
+ protected:
+ CQTest() : cq_level_(GetParam()) { init_flags_ = VPX_CODEC_USE_PSNR; }
+ virtual ~CQTest() {}
+
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(libvpx_test::kTwoPassGood);
+ }
+
+ virtual void BeginPassHook(unsigned int /*pass*/) {
+ file_size_ = 0;
+ psnr_ = 0.0;
+ n_frames_ = 0;
+ }
+
+ virtual bool Continue() const {
+ return !HasFatalFailure() && !abort_;
+ }
+
+ virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+ libvpx_test::Encoder *encoder) {
+ if (video->frame() == 1) {
+ if (cfg_.rc_end_usage == VPX_CQ) {
+ encoder->Control(VP8E_SET_CQ_LEVEL, cq_level_);
+ }
+ encoder->Control(VP8E_SET_CPUUSED, 3);
+ }
+ }
+
+ virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
+ psnr_ += pow(10.0, pkt->data.psnr.psnr[0] / 10.0);
+ n_frames_++;
+ }
+
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+ file_size_ += pkt->data.frame.sz;
+ }
+
+ double GetLinearPSNROverBitrate() const {
+ double avg_psnr = log10(psnr_ / n_frames_) * 10.0;
+ return pow(10.0, avg_psnr / 10.0) / file_size_;
+ }
+
+ int file_size() const { return file_size_; }
+ int n_frames() const { return n_frames_; }
+
+ private:
+ int cq_level_;
+ int file_size_;
+ double psnr_;
+ int n_frames_;
+};
+
+int prev_actual_bitrate = kCQTargetBitrate;
+TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) {
+ const vpx_rational timebase = { 33333333, 1000000000 };
+ cfg_.g_timebase = timebase;
+ cfg_.rc_target_bitrate = kCQTargetBitrate;
+ cfg_.g_lag_in_frames = 25;
+
+ cfg_.rc_end_usage = VPX_CQ;
+ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ timebase.den, timebase.num, 0, 30);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ const double cq_psnr_lin = GetLinearPSNROverBitrate();
+ const int cq_actual_bitrate = file_size() * 8 * 30 / (n_frames() * 1000);
+ EXPECT_LE(cq_actual_bitrate, kCQTargetBitrate);
+ EXPECT_LE(cq_actual_bitrate, prev_actual_bitrate);
+ prev_actual_bitrate = cq_actual_bitrate;
+
+ // try targeting the approximate same bitrate with VBR mode
+ cfg_.rc_end_usage = VPX_VBR;
+ cfg_.rc_target_bitrate = cq_actual_bitrate;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ const double vbr_psnr_lin = GetLinearPSNROverBitrate();
+ EXPECT_GE(cq_psnr_lin, vbr_psnr_lin);
+}
+
+INSTANTIATE_TEST_CASE_P(CQLevelRange, CQTest,
+ ::testing::Range(kCQLevelMin, kCQLevelMax,
+ kCQLevelStep));
+} // namespace
diff --git a/test/datarate_test.cc b/test/datarate_test.cc
new file mode 100644
index 0000000..6fbcb64
--- /dev/null
+++ b/test/datarate_test.cc
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+namespace {
+
+class DatarateTest : public ::libvpx_test::EncoderTest,
+ public ::testing::TestWithParam<enum libvpx_test::TestMode> {
+ protected:
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(GetParam());
+ ResetModel();
+ }
+
+ virtual void ResetModel() {
+ last_pts_ = 0;
+ bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
+ frame_number_ = 0;
+ first_drop_ = 0;
+ bits_total_ = 0;
+ duration_ = 0.0;
+ }
+
+ virtual bool Continue() const {
+ return !HasFatalFailure() && !abort_;
+ }
+
+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+ ::libvpx_test::Encoder *encoder) {
+ const vpx_rational_t tb = video->timebase();
+ timebase_ = static_cast<double>(tb.num) / tb.den;
+ duration_ = 0;
+ }
+
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+ // Time since last timestamp = duration.
+ vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
+
+ // TODO(jimbankoski): Remove these lines when the issue:
+ // http://code.google.com/p/webm/issues/detail?id=496 is fixed.
+ // For now the codec assumes buffer starts at starting buffer rate
+ // plus one frame's time.
+ if (last_pts_ == 0)
+ duration = 1;
+
+ // Add to the buffer the bits we'd expect from a constant bitrate server.
+ bits_in_buffer_model_ += duration * timebase_ * cfg_.rc_target_bitrate
+ * 1000;
+
+ /* Test the buffer model here before subtracting the frame. Do so because
+ * the way the leaky bucket model works in libvpx is to allow the buffer to
+ * empty - and then stop showing frames until we've got enough bits to
+ * show one. As noted in comment below (issue 495), this does not currently
+ * apply to key frames. For now exclude key frames in condition below. */
+ bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true: false;
+ if (!key_frame) {
+ ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
+ << pkt->data.frame.pts;
+ }
+
+ const int frame_size_in_bits = pkt->data.frame.sz * 8;
+
+ // Subtract from the buffer the bits associated with a played back frame.
+ bits_in_buffer_model_ -= frame_size_in_bits;
+
+ // Update the running total of bits for end of test datarate checks.
+ bits_total_ += frame_size_in_bits ;
+
+ // If first drop not set and we have a drop set it to this time.
+ if (!first_drop_ && duration > 1)
+ first_drop_ = last_pts_ + 1;
+
+ // Update the most recent pts.
+ last_pts_ = pkt->data.frame.pts;
+
+ // We update this so that we can calculate the datarate minus the last
+ // frame encoded in the file.
+ bits_in_last_frame_ = frame_size_in_bits;
+
+ ++frame_number_;
+ }
+
+ virtual void EndPassHook(void) {
+ if (bits_total_) {
+ const double file_size_in_kb = bits_total_ / 1000; /* bits per kilobit */
+
+ duration_ = (last_pts_ + 1) * timebase_;
+
+ // Effective file datarate includes the time spent prebuffering.
+ effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0
+ / (cfg_.rc_buf_initial_sz / 1000.0 + duration_);
+
+ file_datarate_ = file_size_in_kb / duration_;
+ }
+ }
+
+ vpx_codec_pts_t last_pts_;
+ int bits_in_buffer_model_;
+ double timebase_;
+ int frame_number_;
+ vpx_codec_pts_t first_drop_;
+ int64_t bits_total_;
+ double duration_;
+ double file_datarate_;
+ double effective_datarate_;
+ int bits_in_last_frame_;
+};
+
+TEST_P(DatarateTest, BasicBufferModel) {
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_dropframe_thresh = 1;
+ cfg_.rc_max_quantizer = 56;
+ cfg_.rc_end_usage = VPX_CBR;
+ // 2 pass cbr datarate control has a bug hidden by the small # of
+ // frames selected in this encode. The problem is that even if the buffer is
+ // negative we produce a keyframe on a cutscene. Ignoring datarate
+ // constraints
+ // TODO(jimbankoski): ( Fix when issue
+ // http://code.google.com/p/webm/issues/detail?id=495 is addressed. )
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ 30, 1, 0, 140);
+
+ // There is an issue for low bitrates in real-time mode, where the
+ // effective_datarate slightly overshoots the target bitrate.
+ // This is same the issue as noted about (#495).
+ // TODO(jimbankoski/marpan): Update test to run for lower bitrates (< 100),
+ // when the issue is resolved.
+ for (int i = 100; i < 800; i += 200) {
+ cfg_.rc_target_bitrate = i;
+ ResetModel();
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_)
+ << " The datarate for the file exceeds the target!";
+
+ ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3)
+ << " The datarate for the file missed the target!";
+ }
+}
+
+TEST_P(DatarateTest, ChangingDropFrameThresh) {
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_max_quantizer = 36;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.rc_target_bitrate = 200;
+ cfg_.kf_mode = VPX_KF_DISABLED;
+
+ const int frame_count = 40;
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ 30, 1, 0, frame_count);
+
+ // Here we check that the first dropped frame gets earlier and earlier
+ // as the drop frame threshold is increased.
+
+ const int kDropFrameThreshTestStep = 30;
+ vpx_codec_pts_t last_drop = frame_count;
+ for (int i = 1; i < 91; i += kDropFrameThreshTestStep) {
+ cfg_.rc_dropframe_thresh = i;
+ ResetModel();
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_LE(first_drop_, last_drop)
+ << " The first dropped frame for drop_thresh " << i
+ << " > first dropped frame for drop_thresh "
+ << i - kDropFrameThreshTestStep;
+ last_drop = first_drop_;
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(AllModes, DatarateTest, ALL_TEST_MODES);
+} // namespace
diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc
new file mode 100644
index 0000000..84afe7f
--- /dev/null
+++ b/test/decode_test_driver.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "test/decode_test_driver.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/register_state_check.h"
+#include "test/video_source.h"
+
+namespace libvpx_test {
+#if CONFIG_VP8_DECODER
+void Decoder::DecodeFrame(const uint8_t *cxdata, int size) {
+ if (!decoder_.priv) {
+ const vpx_codec_err_t res_init = vpx_codec_dec_init(&decoder_,
+ &vpx_codec_vp8_dx_algo,
+ &cfg_, 0);
+ ASSERT_EQ(VPX_CODEC_OK, res_init) << DecodeError();
+ }
+
+ vpx_codec_err_t res_dec;
+ REGISTER_STATE_CHECK(res_dec = vpx_codec_decode(&decoder_,
+ cxdata, size, NULL, 0));
+ ASSERT_EQ(VPX_CODEC_OK, res_dec) << DecodeError();
+}
+
+void DecoderTest::RunLoop(CompressedVideoSource *video) {
+ vpx_codec_dec_cfg_t dec_cfg = {0};
+ Decoder decoder(dec_cfg, 0);
+
+ // Decode frames.
+ for (video->Begin(); video->cxdata(); video->Next()) {
+ decoder.DecodeFrame(video->cxdata(), video->frame_size());
+
+ DxDataIterator dec_iter = decoder.GetDxData();
+ const vpx_image_t *img = NULL;
+
+ // Get decompressed data
+ while ((img = dec_iter.Next()))
+ DecompressedFrameHook(*img, video->frame_number());
+ }
+}
+#endif
+} // namespace libvpx_test
diff --git a/test/decode_test_driver.h b/test/decode_test_driver.h
new file mode 100644
index 0000000..6408bee
--- /dev/null
+++ b/test/decode_test_driver.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef TEST_DECODE_TEST_DRIVER_H_
+#define TEST_DECODE_TEST_DRIVER_H_
+#include <cstring>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vpx_config.h"
+#include "vpx/vpx_decoder.h"
+#include "vpx/vp8dx.h"
+
+namespace libvpx_test {
+
+class CompressedVideoSource;
+
+// Provides an object to handle decoding output
+class DxDataIterator {
+ public:
+ explicit DxDataIterator(vpx_codec_ctx_t *decoder)
+ : decoder_(decoder), iter_(NULL) {}
+
+ const vpx_image_t *Next() {
+ return vpx_codec_get_frame(decoder_, &iter_);
+ }
+
+ private:
+ vpx_codec_ctx_t *decoder_;
+ vpx_codec_iter_t iter_;
+};
+
+// Provides a simplified interface to manage one video decoding.
+//
+// TODO: similar to Encoder class, the exact services should be
+// added as more tests are added.
+class Decoder {
+ public:
+ Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
+ : cfg_(cfg), deadline_(deadline) {
+ memset(&decoder_, 0, sizeof(decoder_));
+ }
+
+ ~Decoder() {
+ vpx_codec_destroy(&decoder_);
+ }
+
+ void DecodeFrame(const uint8_t *cxdata, int size);
+
+ DxDataIterator GetDxData() {
+ return DxDataIterator(&decoder_);
+ }
+
+ void set_deadline(unsigned long deadline) {
+ deadline_ = deadline;
+ }
+
+ void Control(int ctrl_id, int arg) {
+ const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);
+ ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
+ }
+
+ protected:
+ const char *DecodeError() {
+ const char *detail = vpx_codec_error_detail(&decoder_);
+ return detail ? detail : vpx_codec_error(&decoder_);
+ }
+
+ vpx_codec_ctx_t decoder_;
+ vpx_codec_dec_cfg_t cfg_;
+ unsigned int deadline_;
+};
+
+// Common test functionality for all Decoder tests.
+class DecoderTest {
+ public:
+ // Main loop.
+ virtual void RunLoop(CompressedVideoSource *video);
+
+ // Hook to be called on every decompressed frame.
+ virtual void DecompressedFrameHook(const vpx_image_t& img,
+ const unsigned int frame_number) {}
+
+ protected:
+ DecoderTest() {}
+
+ virtual ~DecoderTest() {}
+};
+
+} // namespace libvpx_test
+
+#endif // TEST_DECODE_TEST_DRIVER_H_
diff --git a/test/encode_test_driver.cc b/test/encode_test_driver.cc
new file mode 100644
index 0000000..56339ca
--- /dev/null
+++ b/test/encode_test_driver.cc
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vpx_config.h"
+#include "test/encode_test_driver.h"
+#if CONFIG_VP8_DECODER
+#include "test/decode_test_driver.h"
+#endif
+#include "test/register_state_check.h"
+#include "test/video_source.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+namespace libvpx_test {
+void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) {
+ if (video->img())
+ EncodeFrameInternal(*video, frame_flags);
+ else
+ Flush();
+
+ // Handle twopass stats
+ CxDataIterator iter = GetCxData();
+
+ while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) {
+ if (pkt->kind != VPX_CODEC_STATS_PKT)
+ continue;
+
+ stats_->Append(*pkt);
+ }
+}
+
+void Encoder::EncodeFrameInternal(const VideoSource &video,
+ const unsigned long frame_flags) {
+ vpx_codec_err_t res;
+ const vpx_image_t *img = video.img();
+
+ // Handle first frame initialization
+ if (!encoder_.priv) {
+ cfg_.g_w = img->d_w;
+ cfg_.g_h = img->d_h;
+ cfg_.g_timebase = video.timebase();
+ cfg_.rc_twopass_stats_in = stats_->buf();
+ res = vpx_codec_enc_init(&encoder_, &vpx_codec_vp8_cx_algo, &cfg_,
+ init_flags_);
+ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+ }
+
+ // Handle frame resizing
+ if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) {
+ cfg_.g_w = img->d_w;
+ cfg_.g_h = img->d_h;
+ res = vpx_codec_enc_config_set(&encoder_, &cfg_);
+ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+ }
+
+ // Encode the frame
+ REGISTER_STATE_CHECK(
+ res = vpx_codec_encode(&encoder_,
+ video.img(), video.pts(), video.duration(),
+ frame_flags, deadline_));
+ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+}
+
+void Encoder::Flush() {
+ const vpx_codec_err_t res = vpx_codec_encode(&encoder_, NULL, 0, 0, 0,
+ deadline_);
+ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+}
+
+void EncoderTest::SetMode(TestMode mode) {
+ switch (mode) {
+ case kRealTime:
+ deadline_ = VPX_DL_REALTIME;
+ break;
+
+ case kOnePassGood:
+ case kTwoPassGood:
+ deadline_ = VPX_DL_GOOD_QUALITY;
+ break;
+
+ case kOnePassBest:
+ case kTwoPassBest:
+ deadline_ = VPX_DL_BEST_QUALITY;
+ break;
+
+ default:
+ ASSERT_TRUE(false) << "Unexpected mode " << mode;
+ }
+
+ if (mode == kTwoPassGood || mode == kTwoPassBest)
+ passes_ = 2;
+ else
+ passes_ = 1;
+}
+// The function should return "true" most of the time, therefore no early
+// break-out is implemented within the match checking process.
+static bool compare_img(const vpx_image_t *img1,
+ const vpx_image_t *img2) {
+ bool match = (img1->fmt == img2->fmt) &&
+ (img1->d_w == img2->d_w) &&
+ (img1->d_h == img2->d_h);
+
+ const unsigned int width_y = img1->d_w;
+ const unsigned int height_y = img1->d_h;
+ unsigned int i;
+ for (i = 0; i < height_y; ++i)
+ match = ( memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
+ img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
+ width_y) == 0) && match;
+ const unsigned int width_uv = (img1->d_w + 1) >> 1;
+ const unsigned int height_uv = (img1->d_h + 1) >> 1;
+ for (i = 0; i < height_uv; ++i)
+ match = ( memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
+ img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
+ width_uv) == 0) && match;
+ for (i = 0; i < height_uv; ++i)
+ match = ( memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
+ img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
+ width_uv) == 0) && match;
+ return match;
+}
+
+void EncoderTest::RunLoop(VideoSource *video) {
+#if CONFIG_VP8_DECODER
+ vpx_codec_dec_cfg_t dec_cfg = {0};
+#endif
+
+ stats_.Reset();
+
+ for (unsigned int pass = 0; pass < passes_; pass++) {
+ last_pts_ = 0;
+
+ if (passes_ == 1)
+ cfg_.g_pass = VPX_RC_ONE_PASS;
+ else if (pass == 0)
+ cfg_.g_pass = VPX_RC_FIRST_PASS;
+ else
+ cfg_.g_pass = VPX_RC_LAST_PASS;
+
+ BeginPassHook(pass);
+ Encoder encoder(cfg_, deadline_, init_flags_, &stats_);
+#if CONFIG_VP8_DECODER
+ Decoder decoder(dec_cfg, 0);
+ bool has_cxdata = false;
+#endif
+ bool again;
+ for (again = true, video->Begin(); again; video->Next()) {
+ again = video->img() != NULL;
+
+ PreEncodeFrameHook(video);
+ PreEncodeFrameHook(video, &encoder);
+ encoder.EncodeFrame(video, frame_flags_);
+
+ CxDataIterator iter = encoder.GetCxData();
+
+ while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) {
+ again = true;
+
+ switch (pkt->kind) {
+ case VPX_CODEC_CX_FRAME_PKT:
+#if CONFIG_VP8_DECODER
+ has_cxdata = true;
+ decoder.DecodeFrame((const uint8_t*)pkt->data.frame.buf,
+ pkt->data.frame.sz);
+#endif
+ ASSERT_GE(pkt->data.frame.pts, last_pts_);
+ last_pts_ = pkt->data.frame.pts;
+ FramePktHook(pkt);
+ break;
+
+ case VPX_CODEC_PSNR_PKT:
+ PSNRPktHook(pkt);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+#if CONFIG_VP8_DECODER
+ if (has_cxdata) {
+ const vpx_image_t *img_enc = encoder.GetPreviewFrame();
+ DxDataIterator dec_iter = decoder.GetDxData();
+ const vpx_image_t *img_dec = dec_iter.Next();
+ if(img_enc && img_dec) {
+ const bool res = compare_img(img_enc, img_dec);
+ ASSERT_TRUE(res)<< "Encoder/Decoder mismatch found.";
+ }
+ }
+#endif
+ if (!Continue())
+ break;
+ }
+
+ EndPassHook();
+
+ if (!Continue())
+ break;
+ }
+}
+} // namespace libvpx_test
diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h
new file mode 100644
index 0000000..0141fa9
--- /dev/null
+++ b/test/encode_test_driver.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef TEST_ENCODE_TEST_DRIVER_H_
+#define TEST_ENCODE_TEST_DRIVER_H_
+#include <string>
+#include <vector>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vpx/vpx_encoder.h"
+#include "vpx/vp8cx.h"
+
+namespace libvpx_test {
+
+class VideoSource;
+
+enum TestMode {
+ kRealTime,
+ kOnePassGood,
+ kOnePassBest,
+ kTwoPassGood,
+ kTwoPassBest
+};
+#define ALL_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \
+ ::libvpx_test::kOnePassGood, \
+ ::libvpx_test::kOnePassBest, \
+ ::libvpx_test::kTwoPassGood, \
+ ::libvpx_test::kTwoPassBest)
+
+#define ONE_PASS_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \
+ ::libvpx_test::kOnePassGood, \
+ ::libvpx_test::kOnePassBest)
+
+
+// Provides an object to handle the libvpx get_cx_data() iteration pattern
+class CxDataIterator {
+ public:
+ explicit CxDataIterator(vpx_codec_ctx_t *encoder)
+ : encoder_(encoder), iter_(NULL) {}
+
+ const vpx_codec_cx_pkt_t *Next() {
+ return vpx_codec_get_cx_data(encoder_, &iter_);
+ }
+
+ private:
+ vpx_codec_ctx_t *encoder_;
+ vpx_codec_iter_t iter_;
+};
+
+// Implements an in-memory store for libvpx twopass statistics
+class TwopassStatsStore {
+ public:
+ void Append(const vpx_codec_cx_pkt_t &pkt) {
+ buffer_.append(reinterpret_cast<char *>(pkt.data.twopass_stats.buf),
+ pkt.data.twopass_stats.sz);
+ }
+
+ vpx_fixed_buf_t buf() {
+ const vpx_fixed_buf_t buf = { &buffer_[0], buffer_.size() };
+ return buf;
+ }
+
+ void Reset() {
+ buffer_.clear();
+ }
+
+ protected:
+ std::string buffer_;
+};
+
+
+// Provides a simplified interface to manage one video encoding pass, given
+// a configuration and video source.
+//
+// TODO(jkoleszar): The exact services it provides and the appropriate
+// level of abstraction will be fleshed out as more tests are written.
+class Encoder {
+ public:
+ Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline,
+ const unsigned long init_flags, TwopassStatsStore *stats)
+ : cfg_(cfg), deadline_(deadline), init_flags_(init_flags), stats_(stats) {
+ memset(&encoder_, 0, sizeof(encoder_));
+ }
+
+ ~Encoder() {
+ vpx_codec_destroy(&encoder_);
+ }
+
+ CxDataIterator GetCxData() {
+ return CxDataIterator(&encoder_);
+ }
+
+ const vpx_image_t *GetPreviewFrame() {
+ return vpx_codec_get_preview_frame(&encoder_);
+ }
+ // This is a thin wrapper around vpx_codec_encode(), so refer to
+ // vpx_encoder.h for its semantics.
+ void EncodeFrame(VideoSource *video, const unsigned long frame_flags);
+
+ // Convenience wrapper for EncodeFrame()
+ void EncodeFrame(VideoSource *video) {
+ EncodeFrame(video, 0);
+ }
+
+ void Control(int ctrl_id, int arg) {
+ const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
+ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+ }
+
+ void set_deadline(unsigned long deadline) {
+ deadline_ = deadline;
+ }
+
+ protected:
+ const char *EncoderError() {
+ const char *detail = vpx_codec_error_detail(&encoder_);
+ return detail ? detail : vpx_codec_error(&encoder_);
+ }
+
+ // Encode an image
+ void EncodeFrameInternal(const VideoSource &video,
+ const unsigned long frame_flags);
+
+ // Flush the encoder on EOS
+ void Flush();
+
+ vpx_codec_ctx_t encoder_;
+ vpx_codec_enc_cfg_t cfg_;
+ unsigned long deadline_;
+ unsigned long init_flags_;
+ TwopassStatsStore *stats_;
+};
+
+// Common test functionality for all Encoder tests.
+//
+// This class is a mixin which provides the main loop common to all
+// encoder tests. It provides hooks which can be overridden by subclasses
+// to implement each test's specific behavior, while centralizing the bulk
+// of the boilerplate. Note that it doesn't inherit the gtest testing
+// classes directly, so that tests can be parameterized differently.
+class EncoderTest {
+ protected:
+ EncoderTest() : abort_(false), init_flags_(0), frame_flags_(0),
+ last_pts_(0) {}
+
+ virtual ~EncoderTest() {}
+
+ // Initialize the cfg_ member with the default configuration.
+ void InitializeConfig() {
+ const vpx_codec_err_t res = vpx_codec_enc_config_default(
+ &vpx_codec_vp8_cx_algo, &cfg_, 0);
+ ASSERT_EQ(VPX_CODEC_OK, res);
+ }
+
+ // Map the TestMode enum to the deadline_ and passes_ variables.
+ void SetMode(TestMode mode);
+
+ // Main loop.
+ virtual void RunLoop(VideoSource *video);
+
+ // Hook to be called at the beginning of a pass.
+ virtual void BeginPassHook(unsigned int pass) {}
+
+ // Hook to be called at the end of a pass.
+ virtual void EndPassHook() {}
+
+ // Hook to be called before encoding a frame.
+ virtual void PreEncodeFrameHook(VideoSource *video) {}
+ virtual void PreEncodeFrameHook(VideoSource *video, Encoder *encoder) {}
+
+ // Hook to be called on every compressed data packet.
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {}
+
+ // Hook to be called on every PSNR packet.
+ virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {}
+
+ // Hook to determine whether the encode loop should continue.
+ virtual bool Continue() const { return !abort_; }
+
+ bool abort_;
+ vpx_codec_enc_cfg_t cfg_;
+ unsigned int passes_;
+ unsigned long deadline_;
+ TwopassStatsStore stats_;
+ unsigned long init_flags_;
+ unsigned long frame_flags_;
+ vpx_codec_pts_t last_pts_;
+};
+
+} // namespace libvpx_test
+
+#endif // TEST_ENCODE_TEST_DRIVER_H_
diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc
new file mode 100644
index 0000000..25c6731
--- /dev/null
+++ b/test/error_resilience_test.cc
@@ -0,0 +1,90 @@
+/*
+ Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+
+ Use of this source code is governed by a BSD-style license
+ that can be found in the LICENSE file in the root of the source
+ tree. An additional intellectual property rights grant can be found
+ in the file PATENTS. All contributing project authors may
+ be found in the AUTHORS file in the root of the source tree.
+*/
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+
+namespace {
+
+class ErrorResilienceTest : public libvpx_test::EncoderTest,
+ public ::testing::TestWithParam<int> {
+ protected:
+ ErrorResilienceTest() {
+ psnr_ = 0.0;
+ nframes_ = 0;
+ encoding_mode_ = static_cast<libvpx_test::TestMode>(GetParam());
+ }
+ virtual ~ErrorResilienceTest() {}
+
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(encoding_mode_);
+ }
+
+ virtual void BeginPassHook(unsigned int /*pass*/) {
+ psnr_ = 0.0;
+ nframes_ = 0;
+ }
+
+ virtual bool Continue() const {
+ return !HasFatalFailure() && !abort_;
+ }
+
+ virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
+ psnr_ += pkt->data.psnr.psnr[0];
+ nframes_++;
+ }
+
+ double GetAveragePsnr() const {
+ if (nframes_)
+ return psnr_ / nframes_;
+ return 0.0;
+ }
+
+ private:
+ double psnr_;
+ unsigned int nframes_;
+ libvpx_test::TestMode encoding_mode_;
+};
+
+TEST_P(ErrorResilienceTest, OnVersusOff) {
+ const vpx_rational timebase = { 33333333, 1000000000 };
+ cfg_.g_timebase = timebase;
+ cfg_.rc_target_bitrate = 2000;
+ cfg_.g_lag_in_frames = 25;
+
+ init_flags_ = VPX_CODEC_USE_PSNR;
+
+ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ timebase.den, timebase.num, 0, 30);
+
+ // Error resilient mode OFF.
+ cfg_.g_error_resilient = 0;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ const double psnr_resilience_off = GetAveragePsnr();
+ EXPECT_GT(psnr_resilience_off, 25.0);
+
+ // Error resilient mode ON.
+ cfg_.g_error_resilient = 1;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ const double psnr_resilience_on = GetAveragePsnr();
+ EXPECT_GT(psnr_resilience_on, 25.0);
+
+ // Test that turning on error resilient mode hurts by 10% at most.
+ if (psnr_resilience_off > 0.0) {
+ const double psnr_ratio = psnr_resilience_on / psnr_resilience_off;
+ EXPECT_GE(psnr_ratio, 0.9);
+ EXPECT_LE(psnr_ratio, 1.1);
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(OnOffTest, ErrorResilienceTest,
+ ONE_PASS_TEST_MODES);
+} // namespace
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
new file mode 100644
index 0000000..619b23d
--- /dev/null
+++ b/test/fdct4x4_test.cc
@@ -0,0 +1,169 @@
+/*
+* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+*
+* Use of this source code is governed by a BSD-style license
+* that can be found in the LICENSE file in the root of the source
+* tree. An additional intellectual property rights grant can be found
+* in the file PATENTS. All contributing project authors may
+* be found in the AUTHORS file in the root of the source tree.
+*/
+
+
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+
+extern "C" {
+#include "vpx_rtcd.h"
+}
+
+#include "test/acm_random.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vpx/vpx_integer.h"
+
+
+namespace {
+
+const int cospi8sqrt2minus1 = 20091;
+const int sinpi8sqrt2 = 35468;
+
+void reference_idct4x4(const int16_t *input, int16_t *output) {
+ const int16_t *ip = input;
+ int16_t *op = output;
+
+ for (int i = 0; i < 4; ++i) {
+ const int a1 = ip[0] + ip[8];
+ const int b1 = ip[0] - ip[8];
+ const int temp1 = (ip[4] * sinpi8sqrt2) >> 16;
+ const int temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
+ const int c1 = temp1 - temp2;
+ const int temp3 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
+ const int temp4 = (ip[12] * sinpi8sqrt2) >> 16;
+ const int d1 = temp3 + temp4;
+ op[0] = a1 + d1;
+ op[12] = a1 - d1;
+ op[4] = b1 + c1;
+ op[8] = b1 - c1;
+ ++ip;
+ ++op;
+ }
+ ip = output;
+ op = output;
+ for (int i = 0; i < 4; ++i) {
+ const int a1 = ip[0] + ip[2];
+ const int b1 = ip[0] - ip[2];
+ const int temp1 = (ip[1] * sinpi8sqrt2) >> 16;
+ const int temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
+ const int c1 = temp1 - temp2;
+ const int temp3 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
+ const int temp4 = (ip[3] * sinpi8sqrt2) >> 16;
+ const int d1 = temp3 + temp4;
+ op[0] = (a1 + d1 + 4) >> 3;
+ op[3] = (a1 - d1 + 4) >> 3;
+ op[1] = (b1 + c1 + 4) >> 3;
+ op[2] = (b1 - c1 + 4) >> 3;
+ ip += 4;
+ op += 4;
+ }
+}
+
+using libvpx_test::ACMRandom;
+
+TEST(Vp8FdctTest, SignBiasCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int16_t test_input_block[16];
+ int16_t test_output_block[16];
+ const int pitch = 8;
+ int count_sign_block[16][2];
+ const int count_test_block = 1000000;
+
+ memset(count_sign_block, 0, sizeof(count_sign_block));
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < 16; ++j)
+ test_input_block[j] = rnd.Rand8() - rnd.Rand8();
+
+ vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
+
+ for (int j = 0; j < 16; ++j) {
+ if (test_output_block[j] < 0)
+ ++count_sign_block[j][0];
+ else if (test_output_block[j] > 0)
+ ++count_sign_block[j][1];
+ }
+ }
+
+ bool bias_acceptable = true;
+ for (int j = 0; j < 16; ++j)
+ bias_acceptable = bias_acceptable &&
+ (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 10000);
+
+ EXPECT_EQ(true, bias_acceptable)
+ << "Error: 4x4 FDCT has a sign bias > 1% for input range [-255, 255]";
+
+ memset(count_sign_block, 0, sizeof(count_sign_block));
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-15, 15].
+ for (int j = 0; j < 16; ++j)
+ test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
+
+ vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
+
+ for (int j = 0; j < 16; ++j) {
+ if (test_output_block[j] < 0)
+ ++count_sign_block[j][0];
+ else if (test_output_block[j] > 0)
+ ++count_sign_block[j][1];
+ }
+ }
+
+ bias_acceptable = true;
+ for (int j = 0; j < 16; ++j)
+ bias_acceptable = bias_acceptable &&
+ (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 100000);
+
+ EXPECT_EQ(true, bias_acceptable)
+ << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]";
+};
+
+TEST(Vp8FdctTest, RoundTripErrorCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int max_error = 0;
+ double total_error = 0;
+ const int count_test_block = 1000000;
+ for (int i = 0; i < count_test_block; ++i) {
+ int16_t test_input_block[16];
+ int16_t test_temp_block[16];
+ int16_t test_output_block[16];
+
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < 16; ++j)
+ test_input_block[j] = rnd.Rand8() - rnd.Rand8();
+
+ const int pitch = 8;
+ vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch);
+ reference_idct4x4(test_temp_block, test_output_block);
+
+ for (int j = 0; j < 16; ++j) {
+ const int diff = test_input_block[j] - test_output_block[j];
+ const int error = diff * diff;
+ if (max_error < error)
+ max_error = error;
+ total_error += error;
+ }
+ }
+
+ EXPECT_GE(1, max_error )
+ << "Error: FDCT/IDCT has an individual roundtrip error > 1";
+
+ EXPECT_GE(count_test_block, total_error)
+ << "Error: FDCT/IDCT has average roundtrip error > 1 per block";
+};
+
+} // namespace
diff --git a/test/i420_video_source.h b/test/i420_video_source.h
new file mode 100644
index 0000000..219bd33
--- /dev/null
+++ b/test/i420_video_source.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef TEST_I420_VIDEO_SOURCE_H_
+#define TEST_I420_VIDEO_SOURCE_H_
+#include <cstdio>
+#include <cstdlib>
+
+#include "test/video_source.h"
+
+namespace libvpx_test {
+
+// This class extends VideoSource to allow parsing of raw yv12
+// so that we can do actual file encodes.
+class I420VideoSource : public VideoSource {
+ public:
+ I420VideoSource(const std::string &file_name,
+ unsigned int width, unsigned int height,
+ int rate_numerator, int rate_denominator,
+ unsigned int start, int limit)
+ : file_name_(file_name),
+ input_file_(NULL),
+ img_(NULL),
+ start_(start),
+ limit_(limit),
+ frame_(0),
+ width_(0),
+ height_(0),
+ framerate_numerator_(rate_numerator),
+ framerate_denominator_(rate_denominator) {
+
+ // This initializes raw_sz_, width_, height_ and allocates an img.
+ SetSize(width, height);
+ }
+
+ virtual ~I420VideoSource() {
+ vpx_img_free(img_);
+ if (input_file_)
+ fclose(input_file_);
+ }
+
+ virtual void Begin() {
+ if (input_file_)
+ fclose(input_file_);
+ input_file_ = OpenTestDataFile(file_name_);
+ ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
+ << file_name_;
+ if (start_) {
+ fseek(input_file_, raw_sz_ * start_, SEEK_SET);
+ }
+
+ frame_ = start_;
+ FillFrame();
+ }
+
+ virtual void Next() {
+ ++frame_;
+ FillFrame();
+ }
+
+ virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; }
+
+ // Models a stream where Timebase = 1/FPS, so pts == frame.
+ virtual vpx_codec_pts_t pts() const { return frame_; }
+
+ virtual unsigned long duration() const { return 1; }
+
+ virtual vpx_rational_t timebase() const {
+ const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ };
+ return t;
+ }
+
+ virtual unsigned int frame() const { return frame_; }
+
+ virtual unsigned int limit() const { return limit_; }
+
+ void SetSize(unsigned int width, unsigned int height) {
+ if (width != width_ || height != height_) {
+ vpx_img_free(img_);
+ img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 1);
+ ASSERT_TRUE(img_ != NULL);
+ width_ = width;
+ height_ = height;
+ raw_sz_ = width * height * 3 / 2;
+ }
+ }
+
+ virtual void FillFrame() {
+ // Read a frame from input_file.
+ if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
+ limit_ = frame_;
+ }
+ }
+
+ protected:
+ std::string file_name_;
+ FILE *input_file_;
+ vpx_image_t *img_;
+ size_t raw_sz_;
+ unsigned int start_;
+ unsigned int limit_;
+ unsigned int frame_;
+ unsigned int width_;
+ unsigned int height_;
+ unsigned int framerate_numerator_;
+ unsigned int framerate_denominator_;
+};
+
+} // namespace libvpx_test
+
+#endif // TEST_I420_VIDEO_SOURCE_H_
diff --git a/test/idctllm_test.cc b/test/idctllm_test.cc
new file mode 100644
index 0000000..1be5fa0
--- /dev/null
+++ b/test/idctllm_test.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+extern "C" {
+#include "vpx_config.h"
+#include "vpx_rtcd.h"
+}
+#include "test/register_state_check.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,
+ int pred_stride, unsigned char *dst_ptr,
+ int dst_stride);
+namespace {
+class IDCTTest : public ::testing::TestWithParam<idct_fn_t>
+{
+ protected:
+ virtual void SetUp()
+ {
+ int i;
+
+ UUT = GetParam();
+ memset(input, 0, sizeof(input));
+ /* Set up guard blocks */
+ for(i=0; i<256; i++)
+ output[i] = ((i&0xF)<4&&(i<64))?0:-1;
+ }
+
+ idct_fn_t UUT;
+ short input[16];
+ unsigned char output[256];
+ unsigned char predict[256];
+};
+
+TEST_P(IDCTTest, TestGuardBlocks)
+{
+ int i;
+
+ for(i=0; i<256; i++)
+ if((i&0xF) < 4 && i<64)
+ EXPECT_EQ(0, output[i]) << i;
+ else
+ EXPECT_EQ(255, output[i]);
+}
+
+TEST_P(IDCTTest, TestAllZeros)
+{
+ int i;
+
+ REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+
+ for(i=0; i<256; i++)
+ if((i&0xF) < 4 && i<64)
+ EXPECT_EQ(0, output[i]) << "i==" << i;
+ else
+ EXPECT_EQ(255, output[i]) << "i==" << i;
+}
+
+TEST_P(IDCTTest, TestAllOnes)
+{
+ int i;
+
+ input[0] = 4;
+ REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+
+ for(i=0; i<256; i++)
+ if((i&0xF) < 4 && i<64)
+ EXPECT_EQ(1, output[i]) << "i==" << i;
+ else
+ EXPECT_EQ(255, output[i]) << "i==" << i;
+}
+
+TEST_P(IDCTTest, TestAddOne)
+{
+ int i;
+
+ for(i=0; i<256; i++)
+ predict[i] = i;
+
+ input[0] = 4;
+ REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
+
+ for(i=0; i<256; i++)
+ if((i&0xF) < 4 && i<64)
+ EXPECT_EQ(i+1, output[i]) << "i==" << i;
+ else
+ EXPECT_EQ(255, output[i]) << "i==" << i;
+}
+
+TEST_P(IDCTTest, TestWithData)
+{
+ int i;
+
+ for(i=0; i<16; i++)
+ input[i] = i;
+
+ REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+
+ for(i=0; i<256; i++)
+ if((i&0xF) > 3 || i>63)
+ EXPECT_EQ(255, output[i]) << "i==" << i;
+ else if(i == 0)
+ EXPECT_EQ(11, output[i]) << "i==" << i;
+ else if(i == 34)
+ EXPECT_EQ(1, output[i]) << "i==" << i;
+ else if(i == 2 || i == 17 || i == 32)
+ EXPECT_EQ(3, output[i]) << "i==" << i;
+ else
+ EXPECT_EQ(0, output[i]) << "i==" << i;
+}
+
+INSTANTIATE_TEST_CASE_P(C, IDCTTest,
+ ::testing::Values(vp8_short_idct4x4llm_c));
+#if HAVE_MMX
+INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
+ ::testing::Values(vp8_short_idct4x4llm_mmx));
+#endif
+}
diff --git a/test/intrapred_test.cc b/test/intrapred_test.cc
new file mode 100644
index 0000000..4c16c3f
--- /dev/null
+++ b/test/intrapred_test.cc
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <string.h>
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+extern "C" {
+#include "vpx_config.h"
+#include "vpx_rtcd.h"
+#include "vp8/common/blockd.h"
+#include "vpx_mem/vpx_mem.h"
+}
+
+namespace {
+
+using libvpx_test::ACMRandom;
+
+class IntraPredBase {
+ protected:
+ void SetupMacroblock(uint8_t *data, int block_size, int stride,
+ int num_planes) {
+ memset(&mb_, 0, sizeof(mb_));
+ memset(&mi_, 0, sizeof(mi_));
+ mb_.up_available = 1;
+ mb_.left_available = 1;
+ mb_.mode_info_context = &mi_;
+ stride_ = stride;
+ block_size_ = block_size;
+ num_planes_ = num_planes;
+ for (int p = 0; p < num_planes; p++)
+ data_ptr_[p] = data + stride * (block_size + 1) * p +
+ stride + block_size;
+ }
+
+ void FillRandom() {
+ // Fill edges with random data
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ for (int p = 0; p < num_planes_; p++) {
+ for (int x = -1 ; x <= block_size_; x++)
+ data_ptr_[p][x - stride_] = rnd.Rand8();
+ for (int y = 0; y < block_size_; y++)
+ data_ptr_[p][y * stride_ - 1] = rnd.Rand8();
+ }
+ }
+
+ virtual void Predict(MB_PREDICTION_MODE mode) = 0;
+
+ void SetLeftUnavailable() {
+ mb_.left_available = 0;
+ for (int p = 0; p < num_planes_; p++)
+ for (int i = -1; i < block_size_; ++i)
+ data_ptr_[p][stride_ * i - 1] = 129;
+ }
+
+ void SetTopUnavailable() {
+ mb_.up_available = 0;
+ for (int p = 0; p < num_planes_; p++)
+ memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2);
+ }
+
+ void SetTopLeftUnavailable() {
+ SetLeftUnavailable();
+ SetTopUnavailable();
+ }
+
+ int BlockSizeLog2Min1() const {
+ switch (block_size_) {
+ case 16:
+ return 3;
+ case 8:
+ return 2;
+ default:
+ return 0;
+ }
+ }
+
+ // check DC prediction output against a reference
+ void CheckDCPrediction() const {
+ for (int p = 0; p < num_planes_; p++) {
+ // calculate expected DC
+ int expected;
+ if (mb_.up_available || mb_.left_available) {
+ int sum = 0, shift = BlockSizeLog2Min1() + mb_.up_available +
+ mb_.left_available;
+ if (mb_.up_available)
+ for (int x = 0; x < block_size_; x++)
+ sum += data_ptr_[p][x - stride_];
+ if (mb_.left_available)
+ for (int y = 0; y < block_size_; y++)
+ sum += data_ptr_[p][y * stride_ - 1];
+ expected = (sum + (1 << (shift - 1))) >> shift;
+ } else
+ expected = 0x80;
+
+ // check that all subsequent lines are equal to the first
+ for (int y = 1; y < block_size_; ++y)
+ ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],
+ block_size_));
+ // within the first line, ensure that each pixel has the same value
+ for (int x = 1; x < block_size_; ++x)
+ ASSERT_EQ(data_ptr_[p][0], data_ptr_[p][x]);
+ // now ensure that that pixel has the expected (DC) value
+ ASSERT_EQ(expected, data_ptr_[p][0]);
+ }
+ }
+
+ // check V prediction output against a reference
+ void CheckVPrediction() const {
+ // check that all lines equal the top border
+ for (int p = 0; p < num_planes_; p++)
+ for (int y = 0; y < block_size_; y++)
+ ASSERT_EQ(0, memcmp(&data_ptr_[p][-stride_],
+ &data_ptr_[p][y * stride_], block_size_));
+ }
+
+ // check H prediction output against a reference
+ void CheckHPrediction() const {
+ // for each line, ensure that each pixel is equal to the left border
+ for (int p = 0; p < num_planes_; p++)
+ for (int y = 0; y < block_size_; y++)
+ for (int x = 0; x < block_size_; x++)
+ ASSERT_EQ(data_ptr_[p][-1 + y * stride_],
+ data_ptr_[p][x + y * stride_]);
+ }
+
+ static int ClipByte(int value) {
+ if (value > 255)
+ return 255;
+ else if (value < 0)
+ return 0;
+ return value;
+ }
+
+ // check TM prediction output against a reference
+ void CheckTMPrediction() const {
+ for (int p = 0; p < num_planes_; p++)
+ for (int y = 0; y < block_size_; y++)
+ for (int x = 0; x < block_size_; x++) {
+ const int expected = ClipByte(data_ptr_[p][x - stride_]
+ + data_ptr_[p][stride_ * y - 1]
+ - data_ptr_[p][-1 - stride_]);
+ ASSERT_EQ(expected, data_ptr_[p][y * stride_ + x]);
+ }
+ }
+
+ // Actual test
+ void RunTest() {
+ {
+ SCOPED_TRACE("DC_PRED");
+ FillRandom();
+ Predict(DC_PRED);
+ CheckDCPrediction();
+ }
+ {
+ SCOPED_TRACE("DC_PRED LEFT");
+ FillRandom();
+ SetLeftUnavailable();
+ Predict(DC_PRED);
+ CheckDCPrediction();
+ }
+ {
+ SCOPED_TRACE("DC_PRED TOP");
+ FillRandom();
+ SetTopUnavailable();
+ Predict(DC_PRED);
+ CheckDCPrediction();
+ }
+ {
+ SCOPED_TRACE("DC_PRED TOP_LEFT");
+ FillRandom();
+ SetTopLeftUnavailable();
+ Predict(DC_PRED);
+ CheckDCPrediction();
+ }
+ {
+ SCOPED_TRACE("H_PRED");
+ FillRandom();
+ Predict(H_PRED);
+ CheckHPrediction();
+ }
+ {
+ SCOPED_TRACE("V_PRED");
+ FillRandom();
+ Predict(V_PRED);
+ CheckVPrediction();
+ }
+ {
+ SCOPED_TRACE("TM_PRED");
+ FillRandom();
+ Predict(TM_PRED);
+ CheckTMPrediction();
+ }
+ }
+
+ MACROBLOCKD mb_;
+ MODE_INFO mi_;
+ uint8_t *data_ptr_[2]; // in the case of Y, only [0] is used
+ int stride_;
+ int block_size_;
+ int num_planes_;
+};
+
+typedef void (*intra_pred_y_fn_t)(MACROBLOCKD *x,
+ uint8_t *yabove_row,
+ uint8_t *yleft,
+ int left_stride,
+ uint8_t *ypred_ptr,
+ int y_stride);
+
+class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>,
+ protected IntraPredBase {
+ public:
+ static void SetUpTestCase() {
+ data_array_ = reinterpret_cast<uint8_t*>(
+ vpx_memalign(kDataAlignment, kDataBufferSize));
+ }
+
+ static void TearDownTestCase() {
+ vpx_free(data_array_);
+ data_array_ = NULL;
+ }
+
+ protected:
+ static const int kBlockSize = 16;
+ static const int kDataAlignment = 16;
+ static const int kStride = kBlockSize * 3;
+ // We use 48 so that the data pointer of the first pixel in each row of
+ // each macroblock is 16-byte aligned, and this gives us access to the
+ // top-left and top-right corner pixels belonging to the top-left/right
+ // macroblocks.
+ // We use 17 lines so we have one line above us for top-prediction.
+ static const int kDataBufferSize = kStride * (kBlockSize + 1);
+
+ virtual void SetUp() {
+ pred_fn_ = GetParam();
+ SetupMacroblock(data_array_, kBlockSize, kStride, 1);
+ }
+
+ virtual void Predict(MB_PREDICTION_MODE mode) {
+ mb_.mode_info_context->mbmi.mode = mode;
+ REGISTER_STATE_CHECK(pred_fn_(&mb_,
+ data_ptr_[0] - kStride,
+ data_ptr_[0] - 1, kStride,
+ data_ptr_[0], kStride));
+ }
+
+ intra_pred_y_fn_t pred_fn_;
+ static uint8_t* data_array_;
+};
+
+uint8_t* IntraPredYTest::data_array_ = NULL;
+
+TEST_P(IntraPredYTest, IntraPredTests) {
+ RunTest();
+}
+
+INSTANTIATE_TEST_CASE_P(C, IntraPredYTest,
+ ::testing::Values(
+ vp8_build_intra_predictors_mby_s_c));
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest,
+ ::testing::Values(
+ vp8_build_intra_predictors_mby_s_sse2));
+#endif
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest,
+ ::testing::Values(
+ vp8_build_intra_predictors_mby_s_ssse3));
+#endif
+
+typedef void (*intra_pred_uv_fn_t)(MACROBLOCKD *x,
+ uint8_t *uabove_row,
+ uint8_t *vabove_row,
+ uint8_t *uleft,
+ uint8_t *vleft,
+ int left_stride,
+ uint8_t *upred_ptr,
+ uint8_t *vpred_ptr,
+ int pred_stride);
+
+class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>,
+ protected IntraPredBase {
+ public:
+ static void SetUpTestCase() {
+ data_array_ = reinterpret_cast<uint8_t*>(
+ vpx_memalign(kDataAlignment, kDataBufferSize));
+ }
+
+ static void TearDownTestCase() {
+ vpx_free(data_array_);
+ data_array_ = NULL;
+ }
+
+ protected:
+ static const int kBlockSize = 8;
+ static const int kDataAlignment = 8;
+ static const int kStride = kBlockSize * 3;
+ // We use 24 so that the data pointer of the first pixel in each row of
+ // each macroblock is 8-byte aligned, and this gives us access to the
+ // top-left and top-right corner pixels belonging to the top-left/right
+ // macroblocks.
+ // We use 9 lines so we have one line above us for top-prediction.
+ // [0] = U, [1] = V
+ static const int kDataBufferSize = 2 * kStride * (kBlockSize + 1);
+
+ virtual void SetUp() {
+ pred_fn_ = GetParam();
+ SetupMacroblock(data_array_, kBlockSize, kStride, 2);
+ }
+
+ virtual void Predict(MB_PREDICTION_MODE mode) {
+ mb_.mode_info_context->mbmi.uv_mode = mode;
+ pred_fn_(&mb_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
+ data_ptr_[0] - 1, data_ptr_[1] - 1, kStride,
+ data_ptr_[0], data_ptr_[1], kStride);
+ }
+
+ intra_pred_uv_fn_t pred_fn_;
+ // We use 24 so that the data pointer of the first pixel in each row of
+ // each macroblock is 8-byte aligned, and this gives us access to the
+ // top-left and top-right corner pixels belonging to the top-left/right
+ // macroblocks.
+ // We use 9 lines so we have one line above us for top-prediction.
+ // [0] = U, [1] = V
+ static uint8_t* data_array_;
+};
+
+uint8_t* IntraPredUVTest::data_array_ = NULL;
+
+TEST_P(IntraPredUVTest, IntraPredTests) {
+ RunTest();
+}
+
+INSTANTIATE_TEST_CASE_P(C, IntraPredUVTest,
+ ::testing::Values(
+ vp8_build_intra_predictors_mbuv_s_c));
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, IntraPredUVTest,
+ ::testing::Values(
+ vp8_build_intra_predictors_mbuv_s_sse2));
+#endif
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredUVTest,
+ ::testing::Values(
+ vp8_build_intra_predictors_mbuv_s_ssse3));
+#endif
+
+} // namespace
diff --git a/test/ivf_video_source.h b/test/ivf_video_source.h
new file mode 100644
index 0000000..48c3a7d
--- /dev/null
+++ b/test/ivf_video_source.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef TEST_IVF_VIDEO_SOURCE_H_
+#define TEST_IVF_VIDEO_SOURCE_H_
+#include <cstdio>
+#include <cstdlib>
+#include <new>
+#include <string>
+#include "test/video_source.h"
+
+namespace libvpx_test {
+const unsigned int kCodeBufferSize = 256 * 1024;
+const unsigned int kIvfFileHdrSize = 32;
+const unsigned int kIvfFrameHdrSize = 12;
+
+static unsigned int MemGetLe32(const uint8_t *mem) {
+ return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]);
+}
+
+// This class extends VideoSource to allow parsing of ivf files,
+// so that we can do actual file decodes.
+class IVFVideoSource : public CompressedVideoSource {
+ public:
+ IVFVideoSource(const std::string &file_name)
+ : file_name_(file_name),
+ input_file_(NULL),
+ compressed_frame_buf_(NULL),
+ frame_sz_(0),
+ frame_(0),
+ end_of_file_(false) {
+ }
+
+ virtual ~IVFVideoSource() {
+ delete[] compressed_frame_buf_;
+
+ if (input_file_)
+ fclose(input_file_);
+ }
+
+ virtual void Init() {
+ // Allocate a buffer for read in the compressed video frame.
+ compressed_frame_buf_ = new uint8_t[libvpx_test::kCodeBufferSize];
+ ASSERT_TRUE(compressed_frame_buf_) << "Allocate frame buffer failed";
+ }
+
+ virtual void Begin() {
+ input_file_ = OpenTestDataFile(file_name_);
+ ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
+ << file_name_;
+
+ // Read file header
+ uint8_t file_hdr[kIvfFileHdrSize];
+ ASSERT_EQ(kIvfFileHdrSize, fread(file_hdr, 1, kIvfFileHdrSize, input_file_))
+ << "File header read failed.";
+ // Check file header
+ ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' && file_hdr[2] == 'I'
+ && file_hdr[3] == 'F') << "Input is not an IVF file.";
+
+ FillFrame();
+ }
+
+ virtual void Next() {
+ ++frame_;
+ FillFrame();
+ }
+
+ void FillFrame() {
+ uint8_t frame_hdr[kIvfFrameHdrSize];
+ // Check frame header and read a frame from input_file.
+ if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_)
+ != kIvfFrameHdrSize) {
+ end_of_file_ = true;
+ } else {
+ end_of_file_ = false;
+
+ frame_sz_ = MemGetLe32(frame_hdr);
+ ASSERT_LE(frame_sz_, kCodeBufferSize)
+ << "Frame is too big for allocated code buffer";
+ ASSERT_EQ(frame_sz_,
+ fread(compressed_frame_buf_, 1, frame_sz_, input_file_))
+ << "Failed to read complete frame";
+ }
+ }
+
+ virtual const uint8_t *cxdata() const {
+ return end_of_file_ ? NULL : compressed_frame_buf_;
+ }
+ virtual const unsigned int frame_size() const { return frame_sz_; }
+ virtual const unsigned int frame_number() const { return frame_; }
+
+ protected:
+ std::string file_name_;
+ FILE *input_file_;
+ uint8_t *compressed_frame_buf_;
+ unsigned int frame_sz_;
+ unsigned int frame_;
+ bool end_of_file_;
+};
+
+} // namespace libvpx_test
+
+#endif // TEST_IVF_VIDEO_SOURCE_H_
diff --git a/test/keyframe_test.cc b/test/keyframe_test.cc
new file mode 100644
index 0000000..d0c81df
--- /dev/null
+++ b/test/keyframe_test.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <climits>
+#include <vector>
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+namespace {
+
+class KeyframeTest : public ::libvpx_test::EncoderTest,
+ public ::testing::TestWithParam<enum libvpx_test::TestMode> {
+ protected:
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(GetParam());
+ kf_count_ = 0;
+ kf_count_max_ = INT_MAX;
+ kf_do_force_kf_ = false;
+ set_cpu_used_ = 0;
+ }
+
+ virtual bool Continue() const {
+ return !HasFatalFailure() && !abort_;
+ }
+
+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+ ::libvpx_test::Encoder *encoder) {
+ if (kf_do_force_kf_)
+ frame_flags_ = (video->frame() % 3) ? 0 : VPX_EFLAG_FORCE_KF;
+ if (set_cpu_used_ && video->frame() == 1)
+ encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
+ }
+
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+ if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
+ kf_pts_list_.push_back(pkt->data.frame.pts);
+ kf_count_++;
+ abort_ |= kf_count_ > kf_count_max_;
+ }
+ }
+
+ bool kf_do_force_kf_;
+ int kf_count_;
+ int kf_count_max_;
+ std::vector<vpx_codec_pts_t> kf_pts_list_;
+ int set_cpu_used_;
+};
+
+TEST_P(KeyframeTest, TestRandomVideoSource) {
+ // Validate that encoding the RandomVideoSource produces multiple keyframes.
+ // This validates the results of the TestDisableKeyframes test.
+ kf_count_max_ = 2; // early exit successful tests.
+
+ ::libvpx_test::RandomVideoSource video;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ // In realtime mode - auto placed keyframes are exceedingly rare, don't
+ // bother with this check if(GetParam() > 0)
+ if(GetParam() > 0)
+ EXPECT_GT(kf_count_, 1);
+}
+
+TEST_P(KeyframeTest, TestDisableKeyframes) {
+ cfg_.kf_mode = VPX_KF_DISABLED;
+ kf_count_max_ = 1; // early exit failed tests.
+
+ ::libvpx_test::RandomVideoSource video;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ EXPECT_EQ(1, kf_count_);
+}
+
+TEST_P(KeyframeTest, TestForceKeyframe) {
+ cfg_.kf_mode = VPX_KF_DISABLED;
+ kf_do_force_kf_ = true;
+
+ ::libvpx_test::DummyVideoSource video;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ // verify that every third frame is a keyframe.
+ for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
+ iter != kf_pts_list_.end(); ++iter) {
+ ASSERT_EQ(0, *iter % 3) << "Unexpected keyframe at frame " << *iter;
+ }
+}
+
+TEST_P(KeyframeTest, TestKeyframeMaxDistance) {
+ cfg_.kf_max_dist = 25;
+
+ ::libvpx_test::DummyVideoSource video;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ // verify that keyframe interval matches kf_max_dist
+ for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
+ iter != kf_pts_list_.end(); ++iter) {
+ ASSERT_EQ(0, *iter % 25) << "Unexpected keyframe at frame " << *iter;
+ }
+}
+
+TEST_P(KeyframeTest, TestAutoKeyframe) {
+ cfg_.kf_mode = VPX_KF_AUTO;
+ kf_do_force_kf_ = false;
+
+ // Force a deterministic speed step in Real Time mode, as the faster modes
+ // may not produce a keyframe like we expect. This is necessary when running
+ // on very slow environments (like Valgrind). The step -11 was determined
+ // experimentally as the fastest mode that still throws the keyframe.
+ if (deadline_ == VPX_DL_REALTIME)
+ set_cpu_used_ = -11;
+
+ // This clip has a cut scene every 30 frames -> Frame 0, 30, 60, 90, 120.
+ // I check only the first 40 frames to make sure there's a keyframe at frame
+ // 0 and 30.
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ 30, 1, 0, 40);
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ // In realtime mode - auto placed keyframes are exceedingly rare, don't
+ // bother with this check
+ if(GetParam() > 0)
+ EXPECT_EQ(2u, kf_pts_list_.size()) << " Not the right number of keyframes ";
+
+ // Verify that keyframes match the file keyframes in the file.
+ for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
+ iter != kf_pts_list_.end(); ++iter) {
+
+ if (deadline_ == VPX_DL_REALTIME && *iter > 0)
+ EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame "
+ << *iter;
+ else
+ EXPECT_EQ(0, *iter % 30) << "Unexpected keyframe at frame " << *iter;
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(AllModes, KeyframeTest, ALL_TEST_MODES);
+} // namespace
diff --git a/test/pp_filter_test.cc b/test/pp_filter_test.cc
new file mode 100644
index 0000000..9227449
--- /dev/null
+++ b/test/pp_filter_test.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "test/register_state_check.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+extern "C" {
+#include "vpx_config.h"
+#include "vpx_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
+}
+
+typedef void (*post_proc_func_t)(unsigned char *src_ptr,
+ unsigned char *dst_ptr,
+ int src_pixels_per_line,
+ int dst_pixels_per_line,
+ int cols,
+ unsigned char *flimit,
+ int size);
+
+namespace {
+
+class Vp8PostProcessingFilterTest
+ : public ::testing::TestWithParam<post_proc_func_t> {};
+
+// Test routine for the VP8 post-processing function
+// vp8_post_proc_down_and_across_mb_row_c.
+
+TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) {
+ // Size of the underlying data block that will be filtered.
+ const int block_width = 16;
+ const int block_height = 16;
+
+ // 5-tap filter needs 2 padding rows above and below the block in the input.
+ const int input_width = block_width;
+ const int input_height = block_height + 4;
+ const int input_stride = input_width;
+ const int input_size = input_width * input_height;
+
+ // Filter extends output block by 8 samples at left and right edges.
+ const int output_width = block_width + 16;
+ const int output_height = block_height;
+ const int output_stride = output_width;
+ const int output_size = output_width * output_height;
+
+ uint8_t *const src_image =
+ reinterpret_cast<uint8_t*>(vpx_calloc(input_size, 1));
+ uint8_t *const dst_image =
+ reinterpret_cast<uint8_t*>(vpx_calloc(output_size, 1));
+
+ // Pointers to top-left pixel of block in the input and output images.
+ uint8_t *const src_image_ptr = src_image + (input_stride << 1);
+ uint8_t *const dst_image_ptr = dst_image + 8;
+ uint8_t *const flimits = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
+ (void)vpx_memset(flimits, 255, block_width);
+
+ // Initialize pixels in the input:
+ // block pixels to value 1,
+ // border pixels to value 10.
+ (void)vpx_memset(src_image, 10, input_size);
+ uint8_t *pixel_ptr = src_image_ptr;
+ for (int i = 0; i < block_height; ++i) {
+ for (int j = 0; j < block_width; ++j) {
+ pixel_ptr[j] = 1;
+ }
+ pixel_ptr += input_stride;
+ }
+
+ // Initialize pixels in the output to 99.
+ (void)vpx_memset(dst_image, 99, output_size);
+
+ REGISTER_STATE_CHECK(GetParam()(src_image_ptr, dst_image_ptr, input_stride,
+ output_stride, block_width, flimits, 16));
+
+ static const uint8_t expected_data[block_height] = {
+ 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4
+ };
+
+ pixel_ptr = dst_image_ptr;
+ for (int i = 0; i < block_height; ++i) {
+ for (int j = 0; j < block_width; ++j) {
+ EXPECT_EQ(expected_data[i], pixel_ptr[j])
+ << "Vp8PostProcessingFilterTest failed with invalid filter output";
+ }
+ pixel_ptr += output_stride;
+ }
+
+ vpx_free(src_image);
+ vpx_free(dst_image);
+ vpx_free(flimits);
+};
+
+INSTANTIATE_TEST_CASE_P(C, Vp8PostProcessingFilterTest,
+ ::testing::Values(vp8_post_proc_down_and_across_mb_row_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, Vp8PostProcessingFilterTest,
+ ::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2));
+#endif
+
+} // namespace
diff --git a/test/register_state_check.h b/test/register_state_check.h
new file mode 100644
index 0000000..fb3f53b
--- /dev/null
+++ b/test/register_state_check.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef LIBVPX_TEST_REGISTER_STATE_CHECK_H_
+#define LIBVPX_TEST_REGISTER_STATE_CHECK_H_
+
+#ifdef _WIN64
+
+#define _WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <winnt.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+namespace testing {
+namespace internal {
+
+inline bool operator==(const M128A& lhs, const M128A& rhs) {
+ return (lhs.Low == rhs.Low && lhs.High == rhs.High);
+}
+
+} // namespace internal
+} // namespace testing
+
+namespace libvpx_test {
+
+// Compares the state of xmm[6-15] at construction with their state at
+// destruction. These registers should be preserved by the callee on
+// Windows x64.
+// Usage:
+// {
+// RegisterStateCheck reg_check;
+// FunctionToVerify();
+// }
+class RegisterStateCheck {
+ public:
+ RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); }
+ ~RegisterStateCheck() { EXPECT_TRUE(Check()); }
+
+ private:
+ static bool StoreRegisters(CONTEXT* const context) {
+ const HANDLE this_thread = GetCurrentThread();
+ EXPECT_TRUE(this_thread != NULL);
+ context->ContextFlags = CONTEXT_FLOATING_POINT;
+ const bool context_saved = GetThreadContext(this_thread, context) == TRUE;
+ EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError();
+ return context_saved;
+ }
+
+ // Compares the register state. Returns true if the states match.
+ bool Check() const {
+ if (!initialized_) return false;
+ CONTEXT post_context;
+ if (!StoreRegisters(&post_context)) return false;
+
+ const M128A* xmm_pre = &pre_context_.Xmm6;
+ const M128A* xmm_post = &post_context.Xmm6;
+ for (int i = 6; i <= 15; ++i) {
+ EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!";
+ ++xmm_pre;
+ ++xmm_post;
+ }
+ return !testing::Test::HasNonfatalFailure();
+ }
+
+ bool initialized_;
+ CONTEXT pre_context_;
+};
+
+#define REGISTER_STATE_CHECK(statement) do { \
+ libvpx_test::RegisterStateCheck reg_check; \
+ statement; \
+} while (false)
+
+} // namespace libvpx_test
+
+#else // !_WIN64
+
+namespace libvpx_test {
+
+class RegisterStateCheck {};
+#define REGISTER_STATE_CHECK(statement) statement
+
+} // namespace libvpx_test
+
+#endif // _WIN64
+
+#endif // LIBVPX_TEST_REGISTER_STATE_CHECK_H_
diff --git a/test/resize_test.cc b/test/resize_test.cc
new file mode 100644
index 0000000..c846157
--- /dev/null
+++ b/test/resize_test.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <climits>
+#include <vector>
+#include "test/encode_test_driver.h"
+#include "test/video_source.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+namespace {
+
+const unsigned int kInitialWidth = 320;
+const unsigned int kInitialHeight = 240;
+
+unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) {
+ if (frame < 10)
+ return val;
+ if (frame < 20)
+ return val / 2;
+ if (frame < 30)
+ return val * 2 / 3;
+ if (frame < 40)
+ return val / 4;
+ if (frame < 50)
+ return val * 7 / 8;
+ return val;
+}
+
+class ResizingVideoSource : public ::libvpx_test::DummyVideoSource {
+ public:
+ ResizingVideoSource() {
+ SetSize(kInitialWidth, kInitialHeight);
+ limit_ = 60;
+ }
+
+ protected:
+ virtual void Next() {
+ ++frame_;
+ SetSize(ScaleForFrameNumber(frame_, kInitialWidth),
+ ScaleForFrameNumber(frame_, kInitialHeight));
+ FillFrame();
+ }
+};
+
+class ResizeTest : public ::libvpx_test::EncoderTest,
+ public ::testing::TestWithParam<enum libvpx_test::TestMode> {
+ protected:
+ struct FrameInfo {
+ FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
+ : pts(_pts), w(_w), h(_h) {}
+
+ vpx_codec_pts_t pts;
+ unsigned int w;
+ unsigned int h;
+ };
+
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(GetParam());
+ }
+
+ virtual bool Continue() const {
+ return !HasFatalFailure() && !abort_;
+ }
+
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+ if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
+ const unsigned char *buf =
+ reinterpret_cast<const unsigned char *>(pkt->data.frame.buf);
+ const unsigned int w = (buf[6] | (buf[7] << 8)) & 0x3fff;
+ const unsigned int h = (buf[8] | (buf[9] << 8)) & 0x3fff;
+
+ frame_info_list_.push_back(FrameInfo(pkt->data.frame.pts, w, h));
+ }
+ }
+
+ std::vector< FrameInfo > frame_info_list_;
+};
+
+TEST_P(ResizeTest, TestExternalResizeWorks) {
+ ResizingVideoSource video;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ for (std::vector<FrameInfo>::iterator info = frame_info_list_.begin();
+ info != frame_info_list_.end(); ++info) {
+ const vpx_codec_pts_t pts = info->pts;
+ const unsigned int expected_w = ScaleForFrameNumber(pts, kInitialWidth);
+ const unsigned int expected_h = ScaleForFrameNumber(pts, kInitialHeight);
+
+ EXPECT_EQ(expected_w, info->w)
+ << "Frame " << pts << "had unexpected width";
+ EXPECT_EQ(expected_h, info->h)
+ << "Frame " << pts << "had unexpected height";
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(OnePass, ResizeTest, ONE_PASS_TEST_MODES);
+} // namespace
diff --git a/test/sad_test.cc b/test/sad_test.cc
new file mode 100644
index 0000000..5a0653b
--- /dev/null
+++ b/test/sad_test.cc
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <string.h>
+#include <limits.h>
+#include <stdio.h>
+
+extern "C" {
+#include "./vpx_config.h"
+#include "./vpx_rtcd.h"
+#include "vp8/common/blockd.h"
+#include "vpx_mem/vpx_mem.h"
+}
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+
+typedef unsigned int (*sad_m_by_n_fn_t)(const unsigned char *source_ptr,
+ int source_stride,
+ const unsigned char *reference_ptr,
+ int reference_stride,
+ unsigned int max_sad);
+
+using libvpx_test::ACMRandom;
+
+namespace {
+class SADTest : public PARAMS(int, int, sad_m_by_n_fn_t) {
+ public:
+ static void SetUpTestCase() {
+ source_data_ = reinterpret_cast<uint8_t*>(
+ vpx_memalign(kDataAlignment, kDataBufferSize));
+ reference_data_ = reinterpret_cast<uint8_t*>(
+ vpx_memalign(kDataAlignment, kDataBufferSize));
+ }
+
+ static void TearDownTestCase() {
+ vpx_free(source_data_);
+ source_data_ = NULL;
+ vpx_free(reference_data_);
+ reference_data_ = NULL;
+ }
+
+ protected:
+ static const int kDataAlignment = 16;
+ static const int kDataBufferSize = 16 * 32;
+
+ virtual void SetUp() {
+ sad_fn_ = GET_PARAM(2);
+ height_ = GET_PARAM(1);
+ width_ = GET_PARAM(0);
+ source_stride_ = width_ * 2;
+ reference_stride_ = width_ * 2;
+ rnd_.Reset(ACMRandom::DeterministicSeed());
+ }
+
+ sad_m_by_n_fn_t sad_fn_;
+ virtual unsigned int SAD(unsigned int max_sad) {
+ unsigned int ret;
+ REGISTER_STATE_CHECK(ret = sad_fn_(source_data_, source_stride_,
+ reference_data_, reference_stride_,
+ max_sad));
+ return ret;
+ }
+
+ // Sum of Absolute Differences. Given two blocks, calculate the absolute
+ // difference between two pixels in the same relative location; accumulate.
+ unsigned int ReferenceSAD(unsigned int max_sad) {
+ unsigned int sad = 0;
+
+ for (int h = 0; h < height_; ++h) {
+ for (int w = 0; w < width_; ++w) {
+ sad += abs(source_data_[h * source_stride_ + w]
+ - reference_data_[h * reference_stride_ + w]);
+ }
+ if (sad > max_sad) {
+ break;
+ }
+ }
+ return sad;
+ }
+
+ void FillConstant(uint8_t *data, int stride, uint8_t fill_constant) {
+ for (int h = 0; h < height_; ++h) {
+ for (int w = 0; w < width_; ++w) {
+ data[h * stride + w] = fill_constant;
+ }
+ }
+ }
+
+ void FillRandom(uint8_t *data, int stride) {
+ for (int h = 0; h < height_; ++h) {
+ for (int w = 0; w < width_; ++w) {
+ data[h * stride + w] = rnd_.Rand8();
+ }
+ }
+ }
+
+ void CheckSad(unsigned int max_sad) {
+ unsigned int reference_sad, exp_sad;
+
+ reference_sad = ReferenceSAD(max_sad);
+ exp_sad = SAD(max_sad);
+
+ if (reference_sad <= max_sad) {
+ ASSERT_EQ(exp_sad, reference_sad);
+ } else {
+ // Alternative implementations are not required to check max_sad
+ ASSERT_GE(exp_sad, reference_sad);
+ }
+ }
+
+ // Handle blocks up to 16x16 with stride up to 32
+ int height_, width_;
+ static uint8_t* source_data_;
+ int source_stride_;
+ static uint8_t* reference_data_;
+ int reference_stride_;
+
+ ACMRandom rnd_;
+};
+
+uint8_t* SADTest::source_data_ = NULL;
+uint8_t* SADTest::reference_data_ = NULL;
+
+TEST_P(SADTest, MaxRef) {
+ FillConstant(source_data_, source_stride_, 0);
+ FillConstant(reference_data_, reference_stride_, 255);
+ CheckSad(UINT_MAX);
+}
+
+TEST_P(SADTest, MaxSrc) {
+ FillConstant(source_data_, source_stride_, 255);
+ FillConstant(reference_data_, reference_stride_, 0);
+ CheckSad(UINT_MAX);
+}
+
+TEST_P(SADTest, ShortRef) {
+ int tmp_stride = reference_stride_;
+ reference_stride_ >>= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(reference_data_, reference_stride_);
+ CheckSad(UINT_MAX);
+ reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADTest, UnalignedRef) {
+ // The reference frame, but not the source frame, may be unaligned for
+ // certain types of searches.
+ int tmp_stride = reference_stride_;
+ reference_stride_ -= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(reference_data_, reference_stride_);
+ CheckSad(UINT_MAX);
+ reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADTest, ShortSrc) {
+ int tmp_stride = source_stride_;
+ source_stride_ >>= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(reference_data_, reference_stride_);
+ CheckSad(UINT_MAX);
+ source_stride_ = tmp_stride;
+}
+
+TEST_P(SADTest, MaxSAD) {
+ // Verify that, when max_sad is set, the implementation does not return a
+ // value lower than the reference.
+ FillConstant(source_data_, source_stride_, 255);
+ FillConstant(reference_data_, reference_stride_, 0);
+ CheckSad(128);
+}
+
+using std::tr1::make_tuple;
+
+const sad_m_by_n_fn_t sad_16x16_c = vp8_sad16x16_c;
+const sad_m_by_n_fn_t sad_8x16_c = vp8_sad8x16_c;
+const sad_m_by_n_fn_t sad_16x8_c = vp8_sad16x8_c;
+const sad_m_by_n_fn_t sad_8x8_c = vp8_sad8x8_c;
+const sad_m_by_n_fn_t sad_4x4_c = vp8_sad4x4_c;
+INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::Values(
+ make_tuple(16, 16, sad_16x16_c),
+ make_tuple(8, 16, sad_8x16_c),
+ make_tuple(16, 8, sad_16x8_c),
+ make_tuple(8, 8, sad_8x8_c),
+ make_tuple(4, 4, sad_4x4_c)));
+
+// ARM tests
+#if HAVE_MEDIA
+const sad_m_by_n_fn_t sad_16x16_armv6 = vp8_sad16x16_armv6;
+INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::Values(
+ make_tuple(16, 16, sad_16x16_armv6)));
+
+#endif
+#if HAVE_NEON
+const sad_m_by_n_fn_t sad_16x16_neon = vp8_sad16x16_neon;
+const sad_m_by_n_fn_t sad_8x16_neon = vp8_sad8x16_neon;
+const sad_m_by_n_fn_t sad_16x8_neon = vp8_sad16x8_neon;
+const sad_m_by_n_fn_t sad_8x8_neon = vp8_sad8x8_neon;
+const sad_m_by_n_fn_t sad_4x4_neon = vp8_sad4x4_neon;
+INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::Values(
+ make_tuple(16, 16, sad_16x16_neon),
+ make_tuple(8, 16, sad_8x16_neon),
+ make_tuple(16, 8, sad_16x8_neon),
+ make_tuple(8, 8, sad_8x8_neon),
+ make_tuple(4, 4, sad_4x4_neon)));
+#endif
+
+// X86 tests
+#if HAVE_MMX
+const sad_m_by_n_fn_t sad_16x16_mmx = vp8_sad16x16_mmx;
+const sad_m_by_n_fn_t sad_8x16_mmx = vp8_sad8x16_mmx;
+const sad_m_by_n_fn_t sad_16x8_mmx = vp8_sad16x8_mmx;
+const sad_m_by_n_fn_t sad_8x8_mmx = vp8_sad8x8_mmx;
+const sad_m_by_n_fn_t sad_4x4_mmx = vp8_sad4x4_mmx;
+INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::Values(
+ make_tuple(16, 16, sad_16x16_mmx),
+ make_tuple(8, 16, sad_8x16_mmx),
+ make_tuple(16, 8, sad_16x8_mmx),
+ make_tuple(8, 8, sad_8x8_mmx),
+ make_tuple(4, 4, sad_4x4_mmx)));
+#endif
+#if HAVE_SSE2
+const sad_m_by_n_fn_t sad_16x16_wmt = vp8_sad16x16_wmt;
+const sad_m_by_n_fn_t sad_8x16_wmt = vp8_sad8x16_wmt;
+const sad_m_by_n_fn_t sad_16x8_wmt = vp8_sad16x8_wmt;
+const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt;
+const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt;
+INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::Values(
+ make_tuple(16, 16, sad_16x16_wmt),
+ make_tuple(8, 16, sad_8x16_wmt),
+ make_tuple(16, 8, sad_16x8_wmt),
+ make_tuple(8, 8, sad_8x8_wmt),
+ make_tuple(4, 4, sad_4x4_wmt)));
+#endif
+#if HAVE_SSSE3
+const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3;
+INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
+ make_tuple(16, 16, sad_16x16_sse3)));
+#endif
+
+} // namespace
diff --git a/test/set_roi.cc b/test/set_roi.cc
new file mode 100644
index 0000000..3b6112e
--- /dev/null
+++ b/test/set_roi.cc
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
+extern "C" {
+#include "vp8/encoder/onyx_int.h"
+}
+
+namespace {
+
+TEST(Vp8RoiMapTest, ParameterCheck) {
+ int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
+ int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
+ unsigned int threshold[MAX_MB_SEGMENTS] = { 0, 100, 200, 300 };
+
+ const int internalq_trans[] = {
+ 0, 1, 2, 3, 4, 5, 7, 8,
+ 9, 10, 12, 13, 15, 17, 18, 19,
+ 20, 21, 23, 24, 25, 26, 27, 28,
+ 29, 30, 31, 33, 35, 37, 39, 41,
+ 43, 45, 47, 49, 51, 53, 55, 57,
+ 59, 61, 64, 67, 70, 73, 76, 79,
+ 82, 85, 88, 91, 94, 97, 100, 103,
+ 106, 109, 112, 115, 118, 121, 124, 127,
+ };
+
+ // Initialize elements of cpi with valid defaults.
+ VP8_COMP cpi;
+ cpi.mb.e_mbd.mb_segement_abs_delta = SEGMENT_DELTADATA;
+ cpi.cyclic_refresh_mode_enabled = 0;
+ cpi.mb.e_mbd.segmentation_enabled = 0;
+ cpi.mb.e_mbd.update_mb_segmentation_map = 0;
+ cpi.mb.e_mbd.update_mb_segmentation_data = 0;
+ cpi.common.mb_rows = 240 >> 4;
+ cpi.common.mb_cols = 320 >> 4;
+ const int mbs = (cpi.common.mb_rows * cpi.common.mb_cols);
+ vpx_memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data));
+
+ // Segment map
+ cpi.segmentation_map = reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
+
+ // Allocate memory for the source memory map.
+ unsigned char *roi_map =
+ reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
+ vpx_memset(&roi_map[mbs >> 2], 1, (mbs >> 2));
+ vpx_memset(&roi_map[mbs >> 1], 2, (mbs >> 2));
+ vpx_memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2));
+
+ // Do a test call with valid parameters.
+ int roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
+ cpi.common.mb_cols, delta_q, delta_lf,
+ threshold);
+ EXPECT_EQ(0, roi_retval)
+ << "vp8_set_roimap roi failed with default test parameters";
+
+ // Check that the values in the cpi structure get set as expected.
+ if (roi_retval == 0) {
+ // Check that the segment map got set.
+ const int mapcompare = memcmp(roi_map, cpi.segmentation_map, mbs);
+ EXPECT_EQ(0, mapcompare) << "segment map error";
+
+ // Check the q deltas (note the need to translate into
+ // the interanl range of 0-127.
+ for (int i = 0; i < MAX_MB_SEGMENTS; ++i) {
+ const int transq = internalq_trans[abs(delta_q[i])];
+ if (abs(cpi.segment_feature_data[MB_LVL_ALT_Q][i]) != transq) {
+ EXPECT_EQ(transq, cpi.segment_feature_data[MB_LVL_ALT_Q][i])
+ << "segment delta_q error";
+ break;
+ }
+ }
+
+ // Check the loop filter deltas
+ for (int i = 0; i < MAX_MB_SEGMENTS; ++i) {
+ if (cpi.segment_feature_data[MB_LVL_ALT_LF][i] != delta_lf[i]) {
+ EXPECT_EQ(delta_lf[i], cpi.segment_feature_data[MB_LVL_ALT_LF][i])
+ << "segment delta_lf error";
+ break;
+ }
+ }
+
+ // Check the breakout thresholds
+ for (int i = 0; i < MAX_MB_SEGMENTS; ++i) {
+ unsigned int breakout =
+ static_cast<unsigned int>(cpi.segment_encode_breakout[i]);
+
+ if (threshold[i] != breakout) {
+ EXPECT_EQ(threshold[i], breakout)
+ << "breakout threshold error";
+ break;
+ }
+ }
+
+ // Segmentation, and segmentation update flages should be set.
+ EXPECT_EQ(1, cpi.mb.e_mbd.segmentation_enabled)
+ << "segmentation_enabled error";
+ EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_map)
+ << "update_mb_segmentation_map error";
+ EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_data)
+ << "update_mb_segmentation_data error";
+
+
+ // Try a range of delta q and lf parameters (some legal, some not)
+ for (int i = 0; i < 1000; ++i) {
+ int rand_deltas[4];
+ int deltas_valid;
+ rand_deltas[0] = (rand() % 160) - 80;
+ rand_deltas[1] = (rand() % 160) - 80;
+ rand_deltas[2] = (rand() % 160) - 80;
+ rand_deltas[3] = (rand() % 160) - 80;
+
+ deltas_valid = ((abs(rand_deltas[0]) <= 63) &&
+ (abs(rand_deltas[1]) <= 63) &&
+ (abs(rand_deltas[2]) <= 63) &&
+ (abs(rand_deltas[3]) <= 63)) ? 0 : -1;
+
+ // Test with random delta q values.
+ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
+ cpi.common.mb_cols, rand_deltas,
+ delta_lf, threshold);
+ EXPECT_EQ(deltas_valid, roi_retval) << "dq range check error";
+
+ // One delta_q error shown at a time
+ if (deltas_valid != roi_retval)
+ break;
+
+ // Test with random loop filter values.
+ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
+ cpi.common.mb_cols, delta_q,
+ rand_deltas, threshold);
+ EXPECT_EQ(deltas_valid, roi_retval) << "dlf range check error";
+
+ // One delta loop filter error shown at a time
+ if (deltas_valid != roi_retval)
+ break;
+ }
+
+ // Test that we report and error if cyclic refresh is enabled.
+ cpi.cyclic_refresh_mode_enabled = 1;
+ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
+ cpi.common.mb_cols, delta_q,
+ delta_lf, threshold);
+ EXPECT_EQ(-1, roi_retval) << "cyclic refresh check error";
+ cpi.cyclic_refresh_mode_enabled = 0;
+
+ // Test invalid number of rows or colums.
+ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows + 1,
+ cpi.common.mb_cols, delta_q,
+ delta_lf, threshold);
+ EXPECT_EQ(-1, roi_retval) << "MB rows bounds check error";
+
+ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
+ cpi.common.mb_cols - 1, delta_q,
+ delta_lf, threshold);
+ EXPECT_EQ(-1, roi_retval) << "MB cols bounds check error";
+ }
+
+ // Free allocated memory
+ if (cpi.segmentation_map)
+ vpx_free(cpi.segmentation_map);
+ if (roi_map)
+ vpx_free(roi_map);
+};
+
+} // namespace
diff --git a/test/sixtap_predict_test.cc b/test/sixtap_predict_test.cc
new file mode 100644
index 0000000..c9dcceb
--- /dev/null
+++ b/test/sixtap_predict_test.cc
@@ -0,0 +1,224 @@
+/*
+* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+*
+* Use of this source code is governed by a BSD-style license
+* that can be found in the LICENSE file in the root of the source
+* tree. An additional intellectual property rights grant can be found
+* in the file PATENTS. All contributing project authors may
+* be found in the AUTHORS file in the root of the source tree.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+extern "C" {
+#include "./vpx_config.h"
+#include "./vpx_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
+}
+
+namespace {
+
+typedef void (*sixtap_predict_fn_t)(uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ uint8_t *dst_ptr,
+ int dst_pitch);
+
+class SixtapPredictTest : public PARAMS(int, int, sixtap_predict_fn_t) {
+ public:
+ static void SetUpTestCase() {
+ src_ = reinterpret_cast<uint8_t*>(vpx_memalign(kDataAlignment, kSrcSize));
+ dst_ = reinterpret_cast<uint8_t*>(vpx_memalign(kDataAlignment, kDstSize));
+ dst_c_ = reinterpret_cast<uint8_t*>(vpx_memalign(kDataAlignment, kDstSize));
+ }
+
+ static void TearDownTestCase() {
+ vpx_free(src_);
+ src_ = NULL;
+ vpx_free(dst_);
+ dst_ = NULL;
+ vpx_free(dst_c_);
+ dst_c_ = NULL;
+ }
+
+ protected:
+ // Make test arrays big enough for 16x16 functions. Six-tap filters
+ // need 5 extra pixels outside of the macroblock.
+ static const int kSrcStride = 21;
+ static const int kDstStride = 16;
+ static const int kDataAlignment = 16;
+ static const int kSrcSize = kSrcStride * kSrcStride + 1;
+ static const int kDstSize = kDstStride * kDstStride;
+
+ virtual void SetUp() {
+ width_ = GET_PARAM(0);
+ height_ = GET_PARAM(1);
+ sixtap_predict_ = GET_PARAM(2);
+ memset(src_, 0, sizeof(src_));
+ memset(dst_, 0, sizeof(dst_));
+ memset(dst_c_, 0, sizeof(dst_c_));
+ }
+
+ int width_;
+ int height_;
+ sixtap_predict_fn_t sixtap_predict_;
+ // The src stores the macroblock we will filter on, and makes it 1 byte larger
+ // in order to test unaligned access. The result is stored in dst and dst_c(c
+ // reference code result).
+ static uint8_t* src_;
+ static uint8_t* dst_;
+ static uint8_t* dst_c_;
+};
+
+uint8_t* SixtapPredictTest::src_ = NULL;
+uint8_t* SixtapPredictTest::dst_ = NULL;
+uint8_t* SixtapPredictTest::dst_c_ = NULL;
+
+TEST_P(SixtapPredictTest, TestWithPresetData) {
+ // Test input
+ static const uint8_t test_data[kSrcSize] = {
+ 216, 184, 4, 191, 82, 92, 41, 0, 1, 226, 236, 172, 20, 182, 42, 226, 177,
+ 79, 94, 77, 179, 203, 206, 198, 22, 192, 19, 75, 17, 192, 44, 233, 120,
+ 48, 168, 203, 141, 210, 203, 143, 180, 184, 59, 201, 110, 102, 171, 32,
+ 182, 10, 109, 105, 213, 60, 47, 236, 253, 67, 55, 14, 3, 99, 247, 124,
+ 148, 159, 71, 34, 114, 19, 177, 38, 203, 237, 239, 58, 83, 155, 91, 10,
+ 166, 201, 115, 124, 5, 163, 104, 2, 231, 160, 16, 234, 4, 8, 103, 153,
+ 167, 174, 187, 26, 193, 109, 64, 141, 90, 48, 200, 174, 204, 36, 184,
+ 114, 237, 43, 238, 242, 207, 86, 245, 182, 247, 6, 161, 251, 14, 8, 148,
+ 182, 182, 79, 208, 120, 188, 17, 6, 23, 65, 206, 197, 13, 242, 126, 128,
+ 224, 170, 110, 211, 121, 197, 200, 47, 188, 207, 208, 184, 221, 216, 76,
+ 148, 143, 156, 100, 8, 89, 117, 14, 112, 183, 221, 54, 197, 208, 180, 69,
+ 176, 94, 180, 131, 215, 121, 76, 7, 54, 28, 216, 238, 249, 176, 58, 142,
+ 64, 215, 242, 72, 49, 104, 87, 161, 32, 52, 216, 230, 4, 141, 44, 181,
+ 235, 224, 57, 195, 89, 134, 203, 144, 162, 163, 126, 156, 84, 185, 42,
+ 148, 145, 29, 221, 194, 134, 52, 100, 166, 105, 60, 140, 110, 201, 184,
+ 35, 181, 153, 93, 121, 243, 227, 68, 131, 134, 232, 2, 35, 60, 187, 77,
+ 209, 76, 106, 174, 15, 241, 227, 115, 151, 77, 175, 36, 187, 121, 221,
+ 223, 47, 118, 61, 168, 105, 32, 237, 236, 167, 213, 238, 202, 17, 170,
+ 24, 226, 247, 131, 145, 6, 116, 117, 121, 11, 194, 41, 48, 126, 162, 13,
+ 93, 209, 131, 154, 122, 237, 187, 103, 217, 99, 60, 200, 45, 78, 115, 69,
+ 49, 106, 200, 194, 112, 60, 56, 234, 72, 251, 19, 120, 121, 182, 134, 215,
+ 135, 10, 114, 2, 247, 46, 105, 209, 145, 165, 153, 191, 243, 12, 5, 36,
+ 119, 206, 231, 231, 11, 32, 209, 83, 27, 229, 204, 149, 155, 83, 109, 35,
+ 93, 223, 37, 84, 14, 142, 37, 160, 52, 191, 96, 40, 204, 101, 77, 67, 52,
+ 53, 43, 63, 85, 253, 147, 113, 226, 96, 6, 125, 179, 115, 161, 17, 83,
+ 198, 101, 98, 85, 139, 3, 137, 75, 99, 178, 23, 201, 255, 91, 253, 52,
+ 134, 60, 138, 131, 208, 251, 101, 48, 2, 227, 228, 118, 132, 245, 202,
+ 75, 91, 44, 160, 231, 47, 41, 50, 147, 220, 74, 92, 219, 165, 89, 16
+ };
+
+ // Expected result
+ static const uint8_t expected_dst[kDstSize] = {
+ 117, 102, 74, 135, 42, 98, 175, 206, 70, 73, 222, 197, 50, 24, 39, 49, 38,
+ 105, 90, 47, 169, 40, 171, 215, 200, 73, 109, 141, 53, 85, 177, 164, 79,
+ 208, 124, 89, 212, 18, 81, 145, 151, 164, 217, 153, 91, 154, 102, 102,
+ 159, 75, 164, 152, 136, 51, 213, 219, 186, 116, 193, 224, 186, 36, 231,
+ 208, 84, 211, 155, 167, 35, 59, 42, 76, 216, 149, 73, 201, 78, 149, 184,
+ 100, 96, 196, 189, 198, 188, 235, 195, 117, 129, 120, 129, 49, 25, 133,
+ 113, 69, 221, 114, 70, 143, 99, 157, 108, 189, 140, 78, 6, 55, 65, 240,
+ 255, 245, 184, 72, 90, 100, 116, 131, 39, 60, 234, 167, 33, 160, 88, 185,
+ 200, 157, 159, 176, 127, 151, 138, 102, 168, 106, 170, 86, 82, 219, 189,
+ 76, 33, 115, 197, 106, 96, 198, 136, 97, 141, 237, 151, 98, 137, 191,
+ 185, 2, 57, 95, 142, 91, 255, 185, 97, 137, 76, 162, 94, 173, 131, 193,
+ 161, 81, 106, 72, 135, 222, 234, 137, 66, 137, 106, 243, 210, 147, 95,
+ 15, 137, 110, 85, 66, 16, 96, 167, 147, 150, 173, 203, 140, 118, 196,
+ 84, 147, 160, 19, 95, 101, 123, 74, 132, 202, 82, 166, 12, 131, 166,
+ 189, 170, 159, 85, 79, 66, 57, 152, 132, 203, 194, 0, 1, 56, 146, 180,
+ 224, 156, 28, 83, 181, 79, 76, 80, 46, 160, 175, 59, 106, 43, 87, 75,
+ 136, 85, 189, 46, 71, 200, 90
+ };
+
+ uint8_t *src = const_cast<uint8_t*>(test_data);
+
+ REGISTER_STATE_CHECK(sixtap_predict_(&src[kSrcStride * 2 + 2 + 1], kSrcStride,
+ 2, 2, dst_, kDstStride));
+
+ for (int i = 0; i < height_; ++i)
+ for (int j = 0; j < width_; ++j)
+ ASSERT_EQ(expected_dst[i * kDstStride + j], dst_[i * kDstStride + j])
+ << "i==" << (i * width_ + j);
+}
+
+using libvpx_test::ACMRandom;
+
+TEST_P(SixtapPredictTest, TestWithRandomData) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ for (int i = 0; i < kSrcSize; ++i)
+ src_[i] = rnd.Rand8();
+
+ // Run tests for all possible offsets.
+ for (int xoffset = 0; xoffset < 8; ++xoffset) {
+ for (int yoffset = 0; yoffset < 8; ++yoffset) {
+ // Call c reference function.
+ // Move start point to next pixel to test if the function reads
+ // unaligned data correctly.
+ vp8_sixtap_predict16x16_c(&src_[kSrcStride * 2 + 2 + 1], kSrcStride,
+ xoffset, yoffset, dst_c_, kDstStride);
+
+ // Run test.
+ REGISTER_STATE_CHECK(
+ sixtap_predict_(&src_[kSrcStride * 2 + 2 + 1], kSrcStride,
+ xoffset, yoffset, dst_, kDstStride));
+
+ for (int i = 0; i < height_; ++i)
+ for (int j = 0; j < width_; ++j)
+ ASSERT_EQ(dst_c_[i * kDstStride + j], dst_[i * kDstStride + j])
+ << "i==" << (i * width_ + j);
+ }
+ }
+}
+
+using std::tr1::make_tuple;
+
+const sixtap_predict_fn_t sixtap_16x16_c = vp8_sixtap_predict16x16_c;
+const sixtap_predict_fn_t sixtap_8x8_c = vp8_sixtap_predict8x8_c;
+const sixtap_predict_fn_t sixtap_8x4_c = vp8_sixtap_predict8x4_c;
+const sixtap_predict_fn_t sixtap_4x4_c = vp8_sixtap_predict4x4_c;
+INSTANTIATE_TEST_CASE_P(
+ C, SixtapPredictTest, ::testing::Values(
+ make_tuple(16, 16, sixtap_16x16_c),
+ make_tuple(8, 8, sixtap_8x8_c),
+ make_tuple(8, 4, sixtap_8x4_c),
+ make_tuple(4, 4, sixtap_4x4_c)));
+#if HAVE_MMX
+const sixtap_predict_fn_t sixtap_16x16_mmx = vp8_sixtap_predict16x16_mmx;
+const sixtap_predict_fn_t sixtap_8x8_mmx = vp8_sixtap_predict8x8_mmx;
+const sixtap_predict_fn_t sixtap_8x4_mmx = vp8_sixtap_predict8x4_mmx;
+const sixtap_predict_fn_t sixtap_4x4_mmx = vp8_sixtap_predict4x4_mmx;
+INSTANTIATE_TEST_CASE_P(
+ MMX, SixtapPredictTest, ::testing::Values(
+ make_tuple(16, 16, sixtap_16x16_mmx),
+ make_tuple(8, 8, sixtap_8x8_mmx),
+ make_tuple(8, 4, sixtap_8x4_mmx),
+ make_tuple(4, 4, sixtap_4x4_mmx)));
+#endif
+#if HAVE_SSE2
+const sixtap_predict_fn_t sixtap_16x16_sse2 = vp8_sixtap_predict16x16_sse2;
+const sixtap_predict_fn_t sixtap_8x8_sse2 = vp8_sixtap_predict8x8_sse2;
+const sixtap_predict_fn_t sixtap_8x4_sse2 = vp8_sixtap_predict8x4_sse2;
+INSTANTIATE_TEST_CASE_P(
+ SSE2, SixtapPredictTest, ::testing::Values(
+ make_tuple(16, 16, sixtap_16x16_sse2),
+ make_tuple(8, 8, sixtap_8x8_sse2),
+ make_tuple(8, 4, sixtap_8x4_sse2)));
+#endif
+#if HAVE_SSSE3
+const sixtap_predict_fn_t sixtap_16x16_ssse3 = vp8_sixtap_predict16x16_ssse3;
+const sixtap_predict_fn_t sixtap_8x8_ssse3 = vp8_sixtap_predict8x8_ssse3;
+const sixtap_predict_fn_t sixtap_8x4_ssse3 = vp8_sixtap_predict8x4_ssse3;
+const sixtap_predict_fn_t sixtap_4x4_ssse3 = vp8_sixtap_predict4x4_ssse3;
+INSTANTIATE_TEST_CASE_P(
+ SSSE3, SixtapPredictTest, ::testing::Values(
+ make_tuple(16, 16, sixtap_16x16_ssse3),
+ make_tuple(8, 8, sixtap_8x8_ssse3),
+ make_tuple(8, 4, sixtap_8x4_ssse3),
+ make_tuple(4, 4, sixtap_4x4_ssse3)));
+#endif
+} // namespace
diff --git a/test/subtract_test.cc b/test/subtract_test.cc
new file mode 100644
index 0000000..60acf81
--- /dev/null
+++ b/test/subtract_test.cc
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+extern "C" {
+#include "vpx_config.h"
+#include "vpx_rtcd.h"
+#include "vp8/common/blockd.h"
+#include "vp8/encoder/block.h"
+#include "vpx_mem/vpx_mem.h"
+}
+
+typedef void (*subtract_b_fn_t)(BLOCK *be, BLOCKD *bd, int pitch);
+
+namespace {
+
+class SubtractBlockTest : public ::testing::TestWithParam<subtract_b_fn_t> {};
+
+using libvpx_test::ACMRandom;
+
+TEST_P(SubtractBlockTest, SimpleSubtract) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ BLOCK be;
+ BLOCKD bd;
+ // in libvpx, this stride is always 16
+ const int kDiffPredStride = 16;
+ const int kSrcStride[] = {32, 16, 8, 4, 0};
+ const int kBlockWidth = 4;
+ const int kBlockHeight = 4;
+
+ // Allocate... align to 16 for mmx/sse tests
+ uint8_t *source = reinterpret_cast<uint8_t*>(
+ vpx_memalign(16, kBlockHeight * kSrcStride[0] * sizeof(*source)));
+ be.src_diff = reinterpret_cast<int16_t*>(
+ vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*be.src_diff)));
+ bd.predictor = reinterpret_cast<unsigned char*>(
+ vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor)));
+
+ for(int i = 0; kSrcStride[i] > 0; ++i) {
+ // start at block0
+ be.src = 0;
+ be.base_src = &source;
+ be.src_stride = kSrcStride[i];
+
+ // set difference
+ int16_t *src_diff = be.src_diff;
+ for (int r = 0; r < kBlockHeight; ++r) {
+ for (int c = 0; c < kBlockWidth; ++c) {
+ src_diff[c] = 0xa5a5;
+ }
+ src_diff += kDiffPredStride;
+ }
+
+ // set destination
+ uint8_t *base_src = *be.base_src;
+ for (int r = 0; r < kBlockHeight; ++r) {
+ for (int c = 0; c < kBlockWidth; ++c) {
+ base_src[c] = rnd.Rand8();
+ }
+ base_src += be.src_stride;
+ }
+
+ // set predictor
+ uint8_t *predictor = bd.predictor;
+ for (int r = 0; r < kBlockHeight; ++r) {
+ for (int c = 0; c < kBlockWidth; ++c) {
+ predictor[c] = rnd.Rand8();
+ }
+ predictor += kDiffPredStride;
+ }
+
+ REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride));
+
+ base_src = *be.base_src;
+ src_diff = be.src_diff;
+ predictor = bd.predictor;
+ for (int r = 0; r < kBlockHeight; ++r) {
+ for (int c = 0; c < kBlockWidth; ++c) {
+ EXPECT_EQ(base_src[c], (src_diff[c] + predictor[c])) << "r = " << r
+ << ", c = " << c;
+ }
+ src_diff += kDiffPredStride;
+ predictor += kDiffPredStride;
+ base_src += be.src_stride;
+ }
+ }
+ vpx_free(be.src_diff);
+ vpx_free(source);
+ vpx_free(bd.predictor);
+}
+
+INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest,
+ ::testing::Values(vp8_subtract_b_c));
+
+#if HAVE_MMX
+INSTANTIATE_TEST_CASE_P(MMX, SubtractBlockTest,
+ ::testing::Values(vp8_subtract_b_mmx));
+#endif
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, SubtractBlockTest,
+ ::testing::Values(vp8_subtract_b_sse2));
+#endif
+
+} // namespace
diff --git a/test/test-data.sha1 b/test/test-data.sha1
new file mode 100644
index 0000000..c1b6a83
--- /dev/null
+++ b/test/test-data.sha1
@@ -0,0 +1,123 @@
+d5dfb0151c9051f8c85999255645d7a23916d3c0 hantro_collage_w352h288.yuv
+5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf
+65bf1bbbced81b97bd030f376d1b7f61a224793f vp80-00-comprehensive-002.ivf
+906b4c1e99eb734504c504b3f1ad8052137ce672 vp80-00-comprehensive-003.ivf
+ec144b1af53af895db78355785650b96dd3f0ade vp80-00-comprehensive-004.ivf
+afc7091785c62f1c121c4554a2830c30704587d9 vp80-00-comprehensive-005.ivf
+42ea9d55c818145d06a9b633b8e85c6a6164fd3e vp80-00-comprehensive-006.ivf
+e5b3a73ab79fe024c14309d653d6bed92902ee3b vp80-00-comprehensive-007.ivf
+f3c50a58875930adfb84525c0ef59d7e4c08540c vp80-00-comprehensive-008.ivf
+4b2841fdb83db51ae322096ae468bbb9dc2c8362 vp80-00-comprehensive-009.ivf
+efbff736e3a91ab6a98c5bc2dce65d645944c7b1 vp80-00-comprehensive-010.ivf
+6b315102cae008d22a3d2c231be92cb704a222f8 vp80-00-comprehensive-011.ivf
+f3214a4fea14c2d5ec689936c1613f274c859ee8 vp80-00-comprehensive-012.ivf
+e4094e96d308c8a35b74c480a43d853c5294cd34 vp80-00-comprehensive-013.ivf
+5b0adfaf60a69e0aaf3ec021a39d0a68fc0e1b5a vp80-00-comprehensive-014.ivf
+e8467688ddf26b5000664f904faf0d70506aa653 vp80-00-comprehensive-015.ivf
+aab55582337dfd2a39ff54fb2576a91910d49337 vp80-00-comprehensive-016.ivf
+1ba24724f80203c9bae4f1d0f99d534721980016 vp80-00-comprehensive-017.ivf
+143a15512b46f436280ddb4d0e6411eb4af434f2 vp80-00-comprehensive-018.ivf
+c5baeaf5714fdfb3a8bc960a8e33ac438e83b16b vp80-01-intra-1400.ivf
+f383955229afe3408453e316d11553d923ca60d5 vp80-01-intra-1411.ivf
+84e1f4343f174c9f3c83f834bac3196fb325bf2c vp80-01-intra-1416.ivf
+fb6e712a47dd57a28a3727d2ae2c97a8b7c7ca51 vp80-01-intra-1417.ivf
+71ea772d3e9d315b8cbecf41207b8a237c34853b vp80-02-inter-1402.ivf
+d85dbc4271525dcd128c503f936fe69091d1f8d0 vp80-02-inter-1412.ivf
+d4e5d3ad56511867d025f93724d090f92ba6ec3d vp80-02-inter-1418.ivf
+91791cbcc37c60f35dbd8090bacb54e5ec6dd4fa vp80-02-inter-1424.ivf
+17fbfe2fea70f6e2f3fa6ca4efaae6c0b03b5f02 vp80-03-segmentation-01.ivf
+3c3600dbbcde08e20d54c66fe3b7eadd4f09bdbb vp80-03-segmentation-02.ivf
+c156778d5340967d4b369c490848076e92f1f875 vp80-03-segmentation-03.ivf
+d25dcff6c60e87a1af70945b8911b6b4998533b0 vp80-03-segmentation-04.ivf
+362baba2ce454c9db21218f35e81c27a5ed0b730 vp80-03-segmentation-1401.ivf
+d223ae7ee748ce07e74c4679bfd219e84aa9f4b0 vp80-03-segmentation-1403.ivf
+033adf7f3a13836a3f1cffcb87c1972900f2b5c6 vp80-03-segmentation-1407.ivf
+4d51dfbf9f3e2c590ec99d1d6f59dd731d04375f vp80-03-segmentation-1408.ivf
+f37a62b197c2600d75e0ccfbb31b60efdedac251 vp80-03-segmentation-1409.ivf
+eb25bd7bfba5b2f6935018a930f42d123b1e7fcd vp80-03-segmentation-1410.ivf
+b9d5c436663a30c27cfff84b53a002e501258843 vp80-03-segmentation-1413.ivf
+6da92b9d1a180cc3a8afe348ab12258f5a37be1a vp80-03-segmentation-1414.ivf
+a4f5842602886bd669f115f93d8a35c035cb0948 vp80-03-segmentation-1415.ivf
+f295dceb8ef278b77251b3f9df8aee22e161d547 vp80-03-segmentation-1425.ivf
+198dbf9f36f733200e432664cc8c5752d59779de vp80-03-segmentation-1426.ivf
+7704804e32f5de976803929934a7fafe101ac7b0 vp80-03-segmentation-1427.ivf
+831ccd862ea95ca025d2f3bd8b88678752f5416d vp80-03-segmentation-1432.ivf
+b3c11978529289f9109f2766fcaba3ebc40e11ef vp80-03-segmentation-1435.ivf
+a835a731f5520ebfc1002c40121264d0020559ac vp80-03-segmentation-1436.ivf
+1d1732942f773bb2a5775fcb9689b1579ce28eab vp80-03-segmentation-1437.ivf
+db04799adfe089dfdf74dbd43cc05ede7161f99e vp80-03-segmentation-1441.ivf
+7caf39b3f20cfd52b998210878062e52a5edf1e6 vp80-03-segmentation-1442.ivf
+3607f6bb4ee106c38fa1ea370dc4ff8b8cde2261 vp80-04-partitions-1404.ivf
+93cc323b6b6867f1b12dd48773424549c6960a6b vp80-04-partitions-1405.ivf
+047eedb14b865bdac8a3538e63801054e0295e9c vp80-04-partitions-1406.ivf
+0f1233bd2bc33f56ce5e495dbd455d122339f384 vp80-05-sharpness-1428.ivf
+51767fc136488a9535c2a4c38067c542ee2048df vp80-05-sharpness-1429.ivf
+9805aa107672de25d6fb8c35e20d06deca5efe18 vp80-05-sharpness-1430.ivf
+61db6b965f9c27aebe71b85bf2d5877e58e4bbdf vp80-05-sharpness-1431.ivf
+10420d266290d2923555f84af38eeb96edbd3ae8 vp80-05-sharpness-1433.ivf
+3ed24f9a80cddfdf75824ba95cdb4ff9286cb443 vp80-05-sharpness-1434.ivf
+c87599cbecd72d4cd4f7ace3313b7a6bc6eb8163 vp80-05-sharpness-1438.ivf
+aff51d865c2621b60510459244ea83e958e4baed vp80-05-sharpness-1439.ivf
+da386e72b19b5485a6af199c5eb60ef25e510dd1 vp80-05-sharpness-1440.ivf
+6759a095203d96ccd267ce09b1b050b8cc4c2f1f vp80-05-sharpness-1443.ivf
+db55ec7fd02c864ba996ff060b25b1e08611330b vp80-00-comprehensive-001.ivf.md5
+29db0ad011cba1e45f856d5623cd38dac3e3bf19 vp80-00-comprehensive-002.ivf.md5
+e84f258f69e173e7d68f8f8c037a0a3766902182 vp80-00-comprehensive-003.ivf.md5
+eb7912eaf69559a16fd82bc3f5fb1524cf4a4466 vp80-00-comprehensive-004.ivf.md5
+4206f71c94894bd5b5b376f6c09b3817dbc65206 vp80-00-comprehensive-005.ivf.md5
+4f89b356f6f2fecb928f330a10f804f00f5325f5 vp80-00-comprehensive-006.ivf.md5
+2813236a32964dd8007e17648bcf035a20fcda6c vp80-00-comprehensive-007.ivf.md5
+10746c72098f872803c900e17c5680e451f5f498 vp80-00-comprehensive-008.ivf.md5
+39a23d0692ce64421a7bb7cdf6ccec5928d37fff vp80-00-comprehensive-009.ivf.md5
+f6e3de8931a0cc659bda8fbc14050346955e72d4 vp80-00-comprehensive-010.ivf.md5
+101683ec195b6e944f7cd1e468fc8921439363e6 vp80-00-comprehensive-011.ivf.md5
+1f592751ce46d8688998fa0fa4fbdcda0fd4058c vp80-00-comprehensive-012.ivf.md5
+6066176f90ca790251e795fca1a5797d59999841 vp80-00-comprehensive-013.ivf.md5
+2656da94ba93691f23edc4d60b3a09e2be46c217 vp80-00-comprehensive-014.ivf.md5
+c6e0d5f5d61460c8ac8edfa4e701f10312c03133 vp80-00-comprehensive-015.ivf.md5
+ee60fee501d8493e34e8d6a1fe315b51ed09b24a vp80-00-comprehensive-016.ivf.md5
+9f1914ceffcad4546c0a29de3ef591d8bea304dc vp80-00-comprehensive-017.ivf.md5
+e0305178fe288a9fd8082b39e2d03181edb19054 vp80-00-comprehensive-018.ivf.md5
+612494da2fa799cc9d76dcdd835ae6c7cb2e5c05 vp80-01-intra-1400.ivf.md5
+48ea06097ac8269c5e8c2131d3d0639f431fcf0e vp80-01-intra-1411.ivf.md5
+6e2ab4e7677ad0ba868083ca6bc387ee922b400c vp80-01-intra-1416.ivf.md5
+eca0a90348959ce3854142f8d8641b13050e8349 vp80-01-intra-1417.ivf.md5
+920feea203145d5c2258a91c4e6991934a79a99e vp80-02-inter-1402.ivf.md5
+f71d97909fe2b3dd65be7e1f56c72237f0cef200 vp80-02-inter-1412.ivf.md5
+e911254569a30bbb2a237ff8b79f69ed9da0672d vp80-02-inter-1418.ivf.md5
+58c789c50c9bb9cc90580bed291164a0939d28ba vp80-02-inter-1424.ivf.md5
+ff3e2f441327b9c20a0b37c524e0f5a48a36de7b vp80-03-segmentation-01.ivf.md5
+0791f417f076a542ae66fbc3426ab4d94cbd6c75 vp80-03-segmentation-02.ivf.md5
+722e50f1a6a91c34302d68681faffc1c26d1cc57 vp80-03-segmentation-03.ivf.md5
+c701f1885bcfb27fb8e70cc65606b289172ef889 vp80-03-segmentation-04.ivf.md5
+f79bc9ec189a2b4807632a3d0c5bf04a178b5300 vp80-03-segmentation-1401.ivf.md5
+b9aa4c74c0219b639811c44760d0b24cd8bb436a vp80-03-segmentation-1403.ivf.md5
+70d5a2207ca1891bcaebd5cf6dd88ce8d57b4334 vp80-03-segmentation-1407.ivf.md5
+265f962ee781531f9a93b9309461316fd32b2a1d vp80-03-segmentation-1408.ivf.md5
+0c4ecbbd6dc042d30e626d951b65f460dd6cd563 vp80-03-segmentation-1409.ivf.md5
+cf779af36a937f06570a0fca9db64ba133451dee vp80-03-segmentation-1410.ivf.md5
+0e6c5036d51ab078842f133934926c598a9cff02 vp80-03-segmentation-1413.ivf.md5
+eb3930aaf229116c80d507516c34759c3f6cdf69 vp80-03-segmentation-1414.ivf.md5
+123d6c0f72ee87911c4ae7538e87b7d163b22d6c vp80-03-segmentation-1415.ivf.md5
+e70551d1a38920e097a5d8782390b79ecaeb7505 vp80-03-segmentation-1425.ivf.md5
+44e8f4117e46dbb302b2cfd81171cc1a1846e431 vp80-03-segmentation-1426.ivf.md5
+52636e54aee5f95bbace37021bd67de5db767e9a vp80-03-segmentation-1427.ivf.md5
+b1ad3eff20215c28e295b15ef3636ed926d59cba vp80-03-segmentation-1432.ivf.md5
+24c22a552fa28a90e5978f67f57181cc2d7546d7 vp80-03-segmentation-1435.ivf.md5
+96c49c390abfced18a7a8c9b9ea10af778e10edb vp80-03-segmentation-1436.ivf.md5
+f95eb6214571434f1f73ab7833b9ccdf47588020 vp80-03-segmentation-1437.ivf.md5
+1c0700ca27c9b0090a7747a4b0b4dc21d1843181 vp80-03-segmentation-1441.ivf.md5
+81d4f23ca32667ee958bae579c8f5e97ba72eb97 vp80-03-segmentation-1442.ivf.md5
+272efcef07a3a30fbca51bfd566063d8258ec0be vp80-04-partitions-1404.ivf.md5
+66ed219ab812ac801b256d35cf495d193d4cf478 vp80-04-partitions-1405.ivf.md5
+36083f37f56f502bd60ec5e07502ee9e6b8699b0 vp80-04-partitions-1406.ivf.md5
+6ca909bf168a64c09415626294665dc1be3d1973 vp80-05-sharpness-1428.ivf.md5
+1667d2ee2334e5fdea8a8a866f4ccf3cf76f033a vp80-05-sharpness-1429.ivf.md5
+71bcbe5357d36a19df5b07fbe3e27bffa8893f0a vp80-05-sharpness-1430.ivf.md5
+89a09b1dffce2d55770a89e58d9925c70ef79bf8 vp80-05-sharpness-1431.ivf.md5
+08444a18b4e6ba3450c0796dd728d48c399a2dc9 vp80-05-sharpness-1433.ivf.md5
+6d6223719a90c13e848aa2a8a6642098cdb5977a vp80-05-sharpness-1434.ivf.md5
+41d70bb5fa45bc88da1604a0af466930b8dd77b5 vp80-05-sharpness-1438.ivf.md5
+086c56378df81b6cee264d7540a7b8f2b405c7a4 vp80-05-sharpness-1439.ivf.md5
+d32dc2c4165eb266ea4c23c14a45459b363def32 vp80-05-sharpness-1440.ivf.md5
+8c69dc3d8e563f56ffab5ad1e400d9e689dd23df vp80-05-sharpness-1443.ivf.md5
\ No newline at end of file
diff --git a/test/test.mk b/test/test.mk
new file mode 100644
index 0000000..982be5b
--- /dev/null
+++ b/test/test.mk
@@ -0,0 +1,179 @@
+LIBVPX_TEST_SRCS-yes += acm_random.h
+LIBVPX_TEST_SRCS-yes += register_state_check.h
+LIBVPX_TEST_SRCS-yes += test.mk
+LIBVPX_TEST_SRCS-yes += test_libvpx.cc
+LIBVPX_TEST_SRCS-yes += util.h
+LIBVPX_TEST_SRCS-yes += video_source.h
+
+##
+## BLACK BOX TESTS
+##
+## Black box tests only use the public API.
+##
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += datarate_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.h
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += error_resilience_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += i420_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc
+
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ../md5_utils.h ../md5_utils.c
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.h
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ivf_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += test_vector_test.cc
+##
+## WHITE BOX TESTS
+##
+## Whitebox tests invoke functions not exposed via the public API. Certain
+## shared library builds don't make these functions accessible.
+##
+ifeq ($(CONFIG_SHARED),)
+
+# These tests require both the encoder and decoder to be built.
+ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes)
+LIBVPX_TEST_SRCS-yes += boolcoder_test.cc
+endif
+
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += fdct4x4_test.cc
+LIBVPX_TEST_SRCS-yes += idctllm_test.cc
+LIBVPX_TEST_SRCS-yes += intrapred_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc
+LIBVPX_TEST_SRCS-yes += sad_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
+LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
+
+endif
+
+
+##
+## TEST DATA
+##
+LIBVPX_TEST_DATA-$(CONFIG_VP8_ENCODER) += hantro_collage_w352h288.yuv
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf.md5
diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc
new file mode 100644
index 0000000..cfd5d28
--- /dev/null
+++ b/test/test_libvpx.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <string>
+#include "vpx_config.h"
+#if ARCH_X86 || ARCH_X86_64
+extern "C" {
+#include "vpx_ports/x86.h"
+}
+#endif
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+static void append_gtest_filter(const char *str) {
+ std::string filter = ::testing::FLAGS_gtest_filter;
+ filter += str;
+ ::testing::FLAGS_gtest_filter = filter;
+}
+
+int main(int argc, char **argv) {
+ ::testing::InitGoogleTest(&argc, argv);
+
+#if ARCH_X86 || ARCH_X86_64
+ const int simd_caps = x86_simd_caps();
+ if(!(simd_caps & HAS_MMX))
+ append_gtest_filter(":-MMX/*");
+ if(!(simd_caps & HAS_SSE))
+ append_gtest_filter(":-SSE/*");
+ if(!(simd_caps & HAS_SSE2))
+ append_gtest_filter(":-SSE2/*");
+ if(!(simd_caps & HAS_SSE3))
+ append_gtest_filter(":-SSE3/*");
+ if(!(simd_caps & HAS_SSSE3))
+ append_gtest_filter(":-SSSE3/*");
+ if(!(simd_caps & HAS_SSE4_1))
+ append_gtest_filter(":-SSE4_1/*");
+#endif
+
+ return RUN_ALL_TESTS();
+}
diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc
new file mode 100644
index 0000000..938457b
--- /dev/null
+++ b/test/test_vector_test.cc
@@ -0,0 +1,144 @@
+/*
+ Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+
+ Use of this source code is governed by a BSD-style license
+ that can be found in the LICENSE file in the root of the source
+ tree. An additional intellectual property rights grant can be found
+ in the file PATENTS. All contributing project authors may
+ be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/decode_test_driver.h"
+#include "test/ivf_video_source.h"
+extern "C" {
+#include "./md5_utils.h"
+#include "vpx_mem/vpx_mem.h"
+}
+
+#if defined(_MSC_VER)
+#define snprintf sprintf_s
+#endif
+
+namespace {
+// There are 61 test vectors in total.
+const char *kTestVectors[] = {
+ "vp80-00-comprehensive-001.ivf",
+ "vp80-00-comprehensive-002.ivf", "vp80-00-comprehensive-003.ivf",
+ "vp80-00-comprehensive-004.ivf", "vp80-00-comprehensive-005.ivf",
+ "vp80-00-comprehensive-006.ivf", "vp80-00-comprehensive-007.ivf",
+ "vp80-00-comprehensive-008.ivf", "vp80-00-comprehensive-009.ivf",
+ "vp80-00-comprehensive-010.ivf", "vp80-00-comprehensive-011.ivf",
+ "vp80-00-comprehensive-012.ivf", "vp80-00-comprehensive-013.ivf",
+ "vp80-00-comprehensive-014.ivf", "vp80-00-comprehensive-015.ivf",
+ "vp80-00-comprehensive-016.ivf", "vp80-00-comprehensive-017.ivf",
+ "vp80-00-comprehensive-018.ivf", "vp80-01-intra-1400.ivf",
+ "vp80-01-intra-1411.ivf", "vp80-01-intra-1416.ivf",
+ "vp80-01-intra-1417.ivf", "vp80-02-inter-1402.ivf",
+ "vp80-02-inter-1412.ivf", "vp80-02-inter-1418.ivf",
+ "vp80-02-inter-1424.ivf", "vp80-03-segmentation-01.ivf",
+ "vp80-03-segmentation-02.ivf", "vp80-03-segmentation-03.ivf",
+ "vp80-03-segmentation-04.ivf", "vp80-03-segmentation-1401.ivf",
+ "vp80-03-segmentation-1403.ivf", "vp80-03-segmentation-1407.ivf",
+ "vp80-03-segmentation-1408.ivf", "vp80-03-segmentation-1409.ivf",
+ "vp80-03-segmentation-1410.ivf", "vp80-03-segmentation-1413.ivf",
+ "vp80-03-segmentation-1414.ivf", "vp80-03-segmentation-1415.ivf",
+ "vp80-03-segmentation-1425.ivf", "vp80-03-segmentation-1426.ivf",
+ "vp80-03-segmentation-1427.ivf", "vp80-03-segmentation-1432.ivf",
+ "vp80-03-segmentation-1435.ivf", "vp80-03-segmentation-1436.ivf",
+ "vp80-03-segmentation-1437.ivf", "vp80-03-segmentation-1441.ivf",
+ "vp80-03-segmentation-1442.ivf", "vp80-04-partitions-1404.ivf",
+ "vp80-04-partitions-1405.ivf", "vp80-04-partitions-1406.ivf",
+ "vp80-05-sharpness-1428.ivf", "vp80-05-sharpness-1429.ivf",
+ "vp80-05-sharpness-1430.ivf", "vp80-05-sharpness-1431.ivf",
+ "vp80-05-sharpness-1433.ivf", "vp80-05-sharpness-1434.ivf",
+ "vp80-05-sharpness-1438.ivf", "vp80-05-sharpness-1439.ivf",
+ "vp80-05-sharpness-1440.ivf", "vp80-05-sharpness-1443.ivf"
+};
+
+class TestVectorTest : public libvpx_test::DecoderTest,
+ public ::testing::TestWithParam<const char*> {
+ protected:
+ TestVectorTest() : md5_file_(NULL) {}
+
+ virtual ~TestVectorTest() {
+ if (md5_file_)
+ fclose(md5_file_);
+ }
+
+ void OpenMD5File(const std::string& md5_file_name_) {
+ md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
+ ASSERT_TRUE(md5_file_) << "Md5 file open failed. Filename: "
+ << md5_file_name_;
+ }
+
+ virtual void DecompressedFrameHook(const vpx_image_t& img,
+ const unsigned int frame_number) {
+ char expected_md5[33];
+ char junk[128];
+
+ // Read correct md5 checksums.
+ const int res = fscanf(md5_file_, "%s %s", expected_md5, junk);
+ ASSERT_NE(res, EOF) << "Read md5 data failed";
+ expected_md5[32] = '\0';
+
+ MD5Context md5;
+ MD5Init(&md5);
+
+ // Compute and update md5 for each raw in decompressed data.
+ for (int plane = 0; plane < 3; ++plane) {
+ uint8_t *buf = img.planes[plane];
+
+ for (unsigned int y = 0; y < (plane ? (img.d_h + 1) >> 1 : img.d_h);
+ ++y) {
+ MD5Update(&md5, buf, (plane ? (img.d_w + 1) >> 1 : img.d_w));
+ buf += img.stride[plane];
+ }
+ }
+
+ uint8_t md5_sum[16];
+ MD5Final(md5_sum, &md5);
+
+ char actual_md5[33];
+ // Convert to get the actual md5.
+ for (int i = 0; i < 16; i++) {
+ snprintf(&actual_md5[i * 2], sizeof(actual_md5) - i * 2, "%02x",
+ md5_sum[i]);
+ }
+ actual_md5[32] = '\0';
+
+ // Check md5 match.
+ ASSERT_STREQ(expected_md5, actual_md5)
+ << "Md5 checksums don't match: frame number = " << frame_number;
+ }
+
+ private:
+ FILE *md5_file_;
+};
+
+// This test runs through the whole set of test vectors, and decodes them.
+// The md5 checksums are computed for each frame in the video file. If md5
+// checksums match the correct md5 data, then the test is passed. Otherwise,
+// the test failed.
+TEST_P(TestVectorTest, MD5Match) {
+ const std::string filename = GetParam();
+ // Open compressed video file.
+ libvpx_test::IVFVideoSource video(filename);
+
+ video.Init();
+
+ // Construct md5 file name.
+ const std::string md5_filename = filename + ".md5";
+ OpenMD5File(md5_filename);
+
+ // Decode frame, and check the md5 matching.
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+INSTANTIATE_TEST_CASE_P(TestVectorSequence, TestVectorTest,
+ ::testing::ValuesIn(kTestVectors));
+
+} // namespace
diff --git a/test/util.h b/test/util.h
new file mode 100644
index 0000000..06a70cc
--- /dev/null
+++ b/test/util.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef TEST_UTIL_H_
+#define TEST_UTIL_H_
+
+// Macros
+#define PARAMS(...) ::testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
+#define GET_PARAM(k) std::tr1::get< k >(GetParam())
+
+#endif // TEST_UTIL_H_
diff --git a/test/video_source.h b/test/video_source.h
new file mode 100644
index 0000000..9772657
--- /dev/null
+++ b/test/video_source.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef TEST_VIDEO_SOURCE_H_
+#define TEST_VIDEO_SOURCE_H_
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include "test/acm_random.h"
+#include "vpx/vpx_encoder.h"
+
+namespace libvpx_test {
+
+static FILE *OpenTestDataFile(const std::string& file_name) {
+ std::string path_to_source = file_name;
+ const char *kDataPath = getenv("LIBVPX_TEST_DATA_PATH");
+
+ if (kDataPath) {
+ path_to_source = kDataPath;
+ path_to_source += "/";
+ path_to_source += file_name;
+ }
+
+ return fopen(path_to_source.c_str(), "rb");
+}
+
+// Abstract base class for test video sources, which provide a stream of
+// vpx_image_t images with associated timestamps and duration.
+class VideoSource {
+ public:
+ virtual ~VideoSource() {}
+
+ // Prepare the stream for reading, rewind/open as necessary.
+ virtual void Begin() = 0;
+
+ // Advance the cursor to the next frame
+ virtual void Next() = 0;
+
+ // Get the current video frame, or NULL on End-Of-Stream.
+ virtual vpx_image_t *img() const = 0;
+
+ // Get the presentation timestamp of the current frame.
+ virtual vpx_codec_pts_t pts() const = 0;
+
+ // Get the current frame's duration
+ virtual unsigned long duration() const = 0;
+
+ // Get the timebase for the stream
+ virtual vpx_rational_t timebase() const = 0;
+
+ // Get the current frame counter, starting at 0.
+ virtual unsigned int frame() const = 0;
+
+ // Get the current file limit.
+ virtual unsigned int limit() const = 0;
+};
+
+
+class DummyVideoSource : public VideoSource {
+ public:
+ DummyVideoSource() : img_(NULL), limit_(100), width_(0), height_(0) {
+ SetSize(80, 64);
+ }
+
+ virtual ~DummyVideoSource() { vpx_img_free(img_); }
+
+ virtual void Begin() {
+ frame_ = 0;
+ FillFrame();
+ }
+
+ virtual void Next() {
+ ++frame_;
+ FillFrame();
+ }
+
+ virtual vpx_image_t *img() const {
+ return (frame_ < limit_) ? img_ : NULL;
+ }
+
+ // Models a stream where Timebase = 1/FPS, so pts == frame.
+ virtual vpx_codec_pts_t pts() const { return frame_; }
+
+ virtual unsigned long duration() const { return 1; }
+
+ virtual vpx_rational_t timebase() const {
+ const vpx_rational_t t = {1, 30};
+ return t;
+ }
+
+ virtual unsigned int frame() const { return frame_; }
+
+ virtual unsigned int limit() const { return limit_; }
+
+ void SetSize(unsigned int width, unsigned int height) {
+ if (width != width_ || height != height_) {
+ vpx_img_free(img_);
+ raw_sz_ = ((width + 31)&~31) * height * 3 / 2;
+ img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 32);
+ width_ = width;
+ height_ = height;
+ }
+ }
+
+ protected:
+ virtual void FillFrame() { memset(img_->img_data, 0, raw_sz_); }
+
+ vpx_image_t *img_;
+ size_t raw_sz_;
+ unsigned int limit_;
+ unsigned int frame_;
+ unsigned int width_;
+ unsigned int height_;
+};
+
+
+class RandomVideoSource : public DummyVideoSource {
+ public:
+ RandomVideoSource(int seed = ACMRandom::DeterministicSeed())
+ : rnd_(seed),
+ seed_(seed) { }
+
+ protected:
+ // Reset the RNG to get a matching stream for the second pass
+ virtual void Begin() {
+ frame_ = 0;
+ rnd_.Reset(seed_);
+ FillFrame();
+ }
+
+ // 15 frames of noise, followed by 15 static frames. Reset to 0 rather
+ // than holding previous frames to encourage keyframes to be thrown.
+ virtual void FillFrame() {
+ if (frame_ % 30 < 15)
+ for (size_t i = 0; i < raw_sz_; ++i)
+ img_->img_data[i] = rnd_.Rand8();
+ else
+ memset(img_->img_data, 0, raw_sz_);
+ }
+
+ ACMRandom rnd_;
+ int seed_;
+};
+
+// Abstract base class for test video sources, which provide a stream of
+// decompressed images to the decoder.
+class CompressedVideoSource {
+ public:
+ virtual ~CompressedVideoSource() {}
+
+ virtual void Init() = 0;
+
+ // Prepare the stream for reading, rewind/open as necessary.
+ virtual void Begin() = 0;
+
+ // Advance the cursor to the next frame
+ virtual void Next() = 0;
+
+ virtual const uint8_t *cxdata() const = 0;
+
+ virtual const unsigned int frame_size() const = 0;
+
+ virtual const unsigned int frame_number() const = 0;
+};
+
+} // namespace libvpx_test
+
+#endif // TEST_VIDEO_SOURCE_H_
diff --git a/third_party/libyuv/source/scale.c b/third_party/libyuv/source/scale.c
index 930a7ae..c142a17 100644
--- a/third_party/libyuv/source/scale.c
+++ b/third_party/libyuv/source/scale.c
@@ -60,7 +60,7 @@ void SetUseReferenceImpl(int use) {
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_SCALEROWDOWN2_NEON
-void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */,
+void ScaleRowDown2_NEON(const uint8* src_ptr, int src_stride,
uint8* dst, int dst_width) {
asm volatile (
"1: \n"
@@ -102,7 +102,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
}
#define HAS_SCALEROWDOWN4_NEON
-static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */,
+static void ScaleRowDown4_NEON(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
"1: \n"
@@ -160,7 +160,7 @@ static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride,
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
// to load up the every 4th pixel into a 4 different registers.
// Point samples 32 pixels to 24 pixels.
-static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */,
+static void ScaleRowDown34_NEON(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
"1: \n"
@@ -284,7 +284,7 @@ const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) =
65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
// 32 -> 12
-static void ScaleRowDown38_NEON(const uint8* src_ptr, int,
+static void ScaleRowDown38_NEON(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
"vld1.u8 {q3}, [%3] \n"
diff --git a/tools/ftfy.sh b/tools/ftfy.sh
index 95fd397..c5cfdea 100755
--- a/tools/ftfy.sh
+++ b/tools/ftfy.sh
@@ -34,7 +34,7 @@ vpx_style() {
--align-pointer=name \
--indent-preprocessor --convert-tabs --indent-labels \
--suffix=none --quiet "$@"
- sed -i 's/[[:space:]]\{1,\},/,/g' "$@"
+ sed -i "" 's/[[:space:]]\{1,\},/,/g' "$@"
}
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index d58e49c..8af9e90 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -17,23 +17,6 @@
#include "entropymode.h"
#include "systemdependent.h"
-
-extern void vp8_init_scan_order_mask();
-
-static void update_mode_info_border(MODE_INFO *mi, int rows, int cols)
-{
- int i;
- vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1));
-
- for (i = 0; i < rows; i++)
- {
- /* TODO(holmer): Bug? This updates the last element of each row
- * rather than the border element!
- */
- vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO));
- }
-}
-
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
{
int i;
@@ -45,16 +28,20 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
if (oci->post_proc_buffer_int_used)
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
+
+ vpx_free(oci->pp_limits_buffer);
+ oci->pp_limits_buffer = NULL;
#endif
vpx_free(oci->above_context);
vpx_free(oci->mip);
+#if CONFIG_ERROR_CONCEALMENT
vpx_free(oci->prev_mip);
+ oci->prev_mip = NULL;
+#endif
- oci->above_context = 0;
- oci->mip = 0;
- oci->prev_mip = 0;
-
+ oci->above_context = NULL;
+ oci->mip = NULL;
}
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
@@ -76,10 +63,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
oci->fb_idx_ref_cnt[i] = 0;
oci->yv12_fb[i].flags = 0;
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
- {
- vp8_de_alloc_frame_buffers(oci);
- return 1;
- }
+ goto allocation_fail;
}
oci->new_fb_idx = 0;
@@ -93,22 +77,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
oci->fb_idx_ref_cnt[3] = 1;
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
- {
- vp8_de_alloc_frame_buffers(oci);
- return 1;
- }
-
-#if CONFIG_POSTPROC
- if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
- {
- vp8_de_alloc_frame_buffers(oci);
- return 1;
- }
-
- oci->post_proc_buffer_int_used = 0;
- vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
- vpx_memset((&oci->post_proc_buffer)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size);
-#endif
+ goto allocation_fail;
oci->mb_rows = height >> 4;
oci->mb_cols = width >> 4;
@@ -117,44 +86,43 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
if (!oci->mip)
- {
- vp8_de_alloc_frame_buffers(oci);
- return 1;
- }
+ goto allocation_fail;
oci->mi = oci->mip + oci->mode_info_stride + 1;
- /* allocate memory for last frame MODE_INFO array */
-#if CONFIG_ERROR_CONCEALMENT
- oci->prev_mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
-
- if (!oci->prev_mip)
- {
- vp8_de_alloc_frame_buffers(oci);
- return 1;
- }
-
- oci->prev_mi = oci->prev_mip + oci->mode_info_stride + 1;
-#else
- oci->prev_mip = NULL;
- oci->prev_mi = NULL;
-#endif
+ /* Allocation of previous mode info will be done in vp8_decode_frame()
+ * as it is a decoder only data */
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
if (!oci->above_context)
- {
- vp8_de_alloc_frame_buffers(oci);
- return 1;
- }
+ goto allocation_fail;
- update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
-#if CONFIG_ERROR_CONCEALMENT
- update_mode_info_border(oci->prev_mi, oci->mb_rows, oci->mb_cols);
+#if CONFIG_POSTPROC
+ if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
+ goto allocation_fail;
+
+ oci->post_proc_buffer_int_used = 0;
+ vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
+ vpx_memset(oci->post_proc_buffer.buffer_alloc, 128,
+ oci->post_proc_buffer.frame_size);
+
+ /* Allocate buffer to store post-processing filter coefficients.
+ *
+ * Note: Round up mb_cols to support SIMD reads
+ */
+ oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
+ if (!oci->pp_limits_buffer)
+ goto allocation_fail;
#endif
return 0;
+
+allocation_fail:
+ vp8_de_alloc_frame_buffers(oci);
+ return 1;
}
+
void vp8_setup_version(VP8_COMMON *cm)
{
switch (cm->version)
diff --git a/vp8/common/arm/armv6/intra4x4_predict_v6.asm b/vp8/common/arm/armv6/intra4x4_predict_v6.asm
index a974cd1..c5ec824 100644
--- a/vp8/common/arm/armv6/intra4x4_predict_v6.asm
+++ b/vp8/common/arm/armv6/intra4x4_predict_v6.asm
@@ -18,15 +18,23 @@
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_intra4x4_predict(unsigned char *src, int src_stride, int b_mode,
-; unsigned char *dst, int dst_stride)
-
+;void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft,
+; B_PREDICTION_MODE left_stride, int b_mode,
+; unsigned char *dst, int dst_stride,
+; unsigned char top_left)
+
+; r0: *Above
+; r1: *yleft
+; r2: left_stride
+; r3: b_mode
+; sp + #40: dst
+; sp + #44: dst_stride
+; sp + #48: top_left
|vp8_intra4x4_predict_armv6| PROC
push {r4-r12, lr}
-
- cmp r2, #10
- addlt pc, pc, r2, lsl #2 ; position independent switch
+ cmp r3, #10
+ addlt pc, pc, r3, lsl #2 ; position independent switch
pop {r4-r12, pc} ; default
b b_dc_pred
b b_tm_pred
@@ -41,13 +49,13 @@
b_dc_pred
; load values
- ldr r8, [r0, -r1] ; Above
- ldrb r4, [r0, #-1]! ; Left[0]
+ ldr r8, [r0] ; Above
+ ldrb r4, [r1], r2 ; Left[0]
mov r9, #0
- ldrb r5, [r0, r1] ; Left[1]
- ldrb r6, [r0, r1, lsl #1]! ; Left[2]
+ ldrb r5, [r1], r2 ; Left[1]
+ ldrb r6, [r1], r2 ; Left[2]
usad8 r12, r8, r9
- ldrb r7, [r0, r1] ; Left[3]
+ ldrb r7, [r1] ; Left[3]
; calculate dc
add r4, r4, r5
@@ -55,31 +63,30 @@ b_dc_pred
add r4, r4, r7
add r4, r4, r12
add r4, r4, #4
- ldr r0, [sp, #40] ; load stride
+ ldr r0, [sp, #44] ; dst_stride
mov r12, r4, asr #3 ; (expected_dc + 4) >> 3
add r12, r12, r12, lsl #8
- add r3, r3, r0
+ ldr r3, [sp, #40] ; dst
add r12, r12, r12, lsl #16
; store values
- str r12, [r3, -r0]
+ str r12, [r3], r0
+ str r12, [r3], r0
+ str r12, [r3], r0
str r12, [r3]
- str r12, [r3, r0]
- str r12, [r3, r0, lsl #1]
pop {r4-r12, pc}
b_tm_pred
- sub r10, r0, #1 ; Left
- ldr r8, [r0, -r1] ; Above
- ldrb r9, [r10, -r1] ; top_left
- ldrb r4, [r0, #-1]! ; Left[0]
- ldrb r5, [r10, r1]! ; Left[1]
- ldrb r6, [r0, r1, lsl #1] ; Left[2]
- ldrb r7, [r10, r1, lsl #1] ; Left[3]
- ldr r0, [sp, #40] ; load stride
-
+ ldr r8, [r0] ; Above
+ ldrb r9, [sp, #48] ; top_left
+ ldrb r4, [r1], r2 ; Left[0]
+ ldrb r5, [r1], r2 ; Left[1]
+ ldrb r6, [r1], r2 ; Left[2]
+ ldrb r7, [r1] ; Left[3]
+ ldr r0, [sp, #44] ; dst_stride
+ ldr r3, [sp, #40] ; dst
add r9, r9, r9, lsl #16 ; [tl|tl]
uxtb16 r10, r8 ; a[2|0]
@@ -126,25 +133,26 @@ b_tm_pred
str r12, [r3], r0
add r12, r4, r5, lsl #8 ; [3|2|1|0]
- str r12, [r3], r0
+ str r12, [r3]
pop {r4-r12, pc}
b_ve_pred
- ldr r8, [r0, -r1]! ; a[3|2|1|0]
+ ldr r8, [r0] ; a[3|2|1|0]
ldr r11, c00FF00FF
- ldrb r9, [r0, #-1] ; top_left
+ ldrb r9, [sp, #48] ; top_left
ldrb r10, [r0, #4] ; a[4]
ldr r0, c00020002
uxtb16 r4, r8 ; a[2|0]
uxtb16 r5, r8, ror #8 ; a[3|1]
- ldr r2, [sp, #40] ; stride
+ ldr r2, [sp, #44] ; dst_stride
pkhbt r9, r9, r5, lsl #16 ; a[1|-1]
add r9, r9, r4, lsl #1 ;[a[1]+2*a[2] | tl+2*a[0] ]
uxtab16 r9, r9, r5 ;[a[1]+2*a[2]+a[3] | tl+2*a[0]+a[1] ]
+ ldr r3, [sp, #40] ; dst
uxtab16 r9, r9, r0 ;[a[1]+2*a[2]+a[3]+2| tl+2*a[0]+a[1]+2]
add r0, r0, r10, lsl #16 ;[a[4]+2 | 2]
@@ -154,25 +162,23 @@ b_ve_pred
and r9, r11, r9, asr #2
and r4, r11, r4, asr #2
- add r3, r3, r2 ; dst + dst_stride
add r9, r9, r4, lsl #8
; store values
- str r9, [r3, -r2]
+ str r9, [r3], r2
+ str r9, [r3], r2
+ str r9, [r3], r2
str r9, [r3]
- str r9, [r3, r2]
- str r9, [r3, r2, lsl #1]
pop {r4-r12, pc}
b_he_pred
- sub r10, r0, #1 ; Left
- ldrb r4, [r0, #-1]! ; Left[0]
- ldrb r8, [r10, -r1] ; top_left
- ldrb r5, [r10, r1]! ; Left[1]
- ldrb r6, [r0, r1, lsl #1] ; Left[2]
- ldrb r7, [r10, r1, lsl #1] ; Left[3]
+ ldrb r4, [r1], r2 ; Left[0]
+ ldrb r8, [sp, #48] ; top_left
+ ldrb r5, [r1], r2 ; Left[1]
+ ldrb r6, [r1], r2 ; Left[2]
+ ldrb r7, [r1] ; Left[3]
add r8, r8, r4 ; tl + l[0]
add r9, r4, r5 ; l[0] + l[1]
@@ -197,7 +203,8 @@ b_he_pred
pkhtb r10, r10, r10, asr #16 ; l[-|2|-|2]
pkhtb r11, r11, r11, asr #16 ; l[-|3|-|3]
- ldr r0, [sp, #40] ; stride
+ ldr r0, [sp, #44] ; dst_stride
+ ldr r3, [sp, #40] ; dst
add r8, r8, r8, lsl #8 ; l[0|0|0|0]
add r9, r9, r9, lsl #8 ; l[1|1|1|1]
@@ -206,16 +213,16 @@ b_he_pred
; store values
str r8, [r3], r0
- str r9, [r3]
- str r10, [r3, r0]
- str r11, [r3, r0, lsl #1]
+ str r9, [r3], r0
+ str r10, [r3], r0
+ str r11, [r3]
pop {r4-r12, pc}
b_ld_pred
- ldr r4, [r0, -r1]! ; Above
+ ldr r4, [r0] ; Above[0-3]
ldr r12, c00020002
- ldr r5, [r0, #4]
+ ldr r5, [r0, #4] ; Above[4-7]
ldr lr, c00FF00FF
uxtb16 r6, r4 ; a[2|0]
@@ -225,7 +232,6 @@ b_ld_pred
pkhtb r10, r6, r8 ; a[2|4]
pkhtb r11, r7, r9 ; a[3|5]
-
add r4, r6, r7, lsl #1 ; [a2+2*a3 | a0+2*a1]
add r4, r4, r10, ror #16 ; [a2+2*a3+a4 | a0+2*a1+a2]
uxtab16 r4, r4, r12 ; [a2+2*a3+a4+2 | a0+2*a1+a2+2]
@@ -244,7 +250,8 @@ b_ld_pred
add r7, r7, r9, asr #16 ; [ a5+2*a6+a7]
uxtah r7, r7, r12 ; [ a5+2*a6+a7+2]
- ldr r0, [sp, #40] ; stride
+ ldr r0, [sp, #44] ; dst_stride
+ ldr r3, [sp, #40] ; dst
; scale down
and r4, lr, r4, asr #2
@@ -266,18 +273,17 @@ b_ld_pred
mov r6, r6, lsr #16
mov r11, r10, lsr #8
add r11, r11, r6, lsl #24 ; [6|5|4|3]
- str r11, [r3], r0
+ str r11, [r3]
pop {r4-r12, pc}
b_rd_pred
- sub r12, r0, r1 ; Above = src - src_stride
- ldrb r7, [r0, #-1]! ; l[0] = pp[3]
- ldr lr, [r12] ; Above = pp[8|7|6|5]
- ldrb r8, [r12, #-1]! ; tl = pp[4]
- ldrb r6, [r12, r1, lsl #1] ; l[1] = pp[2]
- ldrb r5, [r0, r1, lsl #1] ; l[2] = pp[1]
- ldrb r4, [r12, r1, lsl #2] ; l[3] = pp[0]
+ ldrb r7, [r1], r2 ; l[0] = pp[3]
+ ldr lr, [r0] ; Above = pp[8|7|6|5]
+ ldrb r8, [sp, #48] ; tl = pp[4]
+ ldrb r6, [r1], r2 ; l[1] = pp[2]
+ ldrb r5, [r1], r2 ; l[2] = pp[1]
+ ldrb r4, [r1], r2 ; l[3] = pp[0]
uxtb16 r9, lr ; p[7|5]
@@ -307,7 +313,8 @@ b_rd_pred
add r7, r7, r10 ; [p6+2*p7+p8 | p4+2*p5+p6]
uxtab16 r7, r7, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
- ldr r0, [sp, #40] ; stride
+ ldr r0, [sp, #44] ; dst_stride
+ ldr r3, [sp, #40] ; dst
; scale down
and r7, lr, r7, asr #2
@@ -328,18 +335,17 @@ b_rd_pred
mov r11, r10, lsl #8 ; [3|2|1|-]
uxtab r11, r11, r4 ; [3|2|1|0]
- str r11, [r3], r0
+ str r11, [r3]
pop {r4-r12, pc}
b_vr_pred
- sub r12, r0, r1 ; Above = src - src_stride
- ldrb r7, [r0, #-1]! ; l[0] = pp[3]
- ldr lr, [r12] ; Above = pp[8|7|6|5]
- ldrb r8, [r12, #-1]! ; tl = pp[4]
- ldrb r6, [r12, r1, lsl #1] ; l[1] = pp[2]
- ldrb r5, [r0, r1, lsl #1] ; l[2] = pp[1]
- ldrb r4, [r12, r1, lsl #2] ; l[3] = pp[0]
+ ldrb r7, [r1], r2 ; l[0] = pp[3]
+ ldr lr, [r0] ; Above = pp[8|7|6|5]
+ ldrb r8, [sp, #48] ; tl = pp[4]
+ ldrb r6, [r1], r2 ; l[1] = pp[2]
+ ldrb r5, [r1], r2 ; l[2] = pp[1]
+ ldrb r4, [r1] ; l[3] = pp[0]
add r5, r5, r7, lsl #16 ; p[3|1]
add r6, r6, r8, lsl #16 ; p[4|2]
@@ -376,7 +382,8 @@ b_vr_pred
add r8, r8, r10 ; [p6+2*p7+p8 | p4+2*p5+p6]
uxtab16 r8, r8, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
- ldr r0, [sp, #40] ; stride
+ ldr r0, [sp, #44] ; dst_stride
+ ldr r3, [sp, #40] ; dst
; scale down
and r5, lr, r5, asr #2 ; [B|A]
@@ -397,14 +404,14 @@ b_vr_pred
pkhtb r10, r7, r5, asr #16 ; [-|H|-|B]
str r2, [r3], r0
add r12, r12, r10, lsl #8 ; [H|D|B|A]
- str r12, [r3], r0
+ str r12, [r3]
pop {r4-r12, pc}
b_vl_pred
- ldr r4, [r0, -r1]! ; [3|2|1|0]
+ ldr r4, [r0] ; [3|2|1|0] = Above[0-3]
ldr r12, c00020002
- ldr r5, [r0, #4] ; [7|6|5|4]
+ ldr r5, [r0, #4] ; [7|6|5|4] = Above[4-7]
ldr lr, c00FF00FF
ldr r2, c00010001
@@ -441,7 +448,8 @@ b_vl_pred
add r9, r9, r11 ; [p5+2*p6+p7 | p3+2*p4+p5]
uxtab16 r9, r9, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
- ldr r0, [sp, #40] ; stride
+ ldr r0, [sp, #44] ; dst_stride
+ ldr r3, [sp, #40] ; dst
; scale down
and r5, lr, r5, asr #2 ; [D|C]
@@ -449,7 +457,6 @@ b_vl_pred
and r8, lr, r8, asr #2 ; [I|D]
and r9, lr, r9, asr #2 ; [J|H]
-
add r10, r4, r6, lsl #8 ; [F|B|E|A]
str r10, [r3], r0
@@ -463,18 +470,17 @@ b_vl_pred
str r12, [r3], r0
add r10, r7, r10, lsl #8 ; [J|H|D|G]
- str r10, [r3], r0
+ str r10, [r3]
pop {r4-r12, pc}
b_hd_pred
- sub r12, r0, r1 ; Above = src - src_stride
- ldrb r7, [r0, #-1]! ; l[0] = pp[3]
- ldr lr, [r12] ; Above = pp[8|7|6|5]
- ldrb r8, [r12, #-1]! ; tl = pp[4]
- ldrb r6, [r0, r1] ; l[1] = pp[2]
- ldrb r5, [r0, r1, lsl #1] ; l[2] = pp[1]
- ldrb r4, [r12, r1, lsl #2] ; l[3] = pp[0]
+ ldrb r7, [r1], r2 ; l[0] = pp[3]
+ ldr lr, [r0] ; Above = pp[8|7|6|5]
+ ldrb r8, [sp, #48] ; tl = pp[4]
+ ldrb r6, [r1], r2 ; l[1] = pp[2]
+ ldrb r5, [r1], r2 ; l[2] = pp[1]
+ ldrb r4, [r1] ; l[3] = pp[0]
uxtb16 r9, lr ; p[7|5]
uxtb16 r10, lr, ror #8 ; p[8|6]
@@ -492,7 +498,6 @@ b_hd_pred
pkhtb r1, r9, r10 ; p[7|6]
pkhbt r10, r8, r10, lsl #16 ; p[6|5]
-
uadd16 r11, r4, r5 ; [p1+p2 | p0+p1]
uhadd16 r11, r11, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1]
; [B|A]
@@ -518,7 +523,8 @@ b_hd_pred
and r5, lr, r5, asr #2 ; [H|G]
and r6, lr, r6, asr #2 ; [J|I]
- ldr lr, [sp, #40] ; stride
+ ldr lr, [sp, #44] ; dst_stride
+ ldr r3, [sp, #40] ; dst
pkhtb r2, r0, r6 ; [-|F|-|I]
pkhtb r12, r6, r5, asr #16 ; [-|J|-|H]
@@ -527,7 +533,6 @@ b_hd_pred
mov r12, r12, ror #24 ; [J|I|H|F]
str r12, [r3], lr
-
mov r7, r11, asr #16 ; [-|-|-|B]
str r2, [r3], lr
add r7, r7, r0, lsl #16 ; [-|E|-|B]
@@ -536,21 +541,20 @@ b_hd_pred
str r7, [r3], lr
add r5, r11, r4, lsl #8 ; [D|B|C|A]
- str r5, [r3], lr
+ str r5, [r3]
pop {r4-r12, pc}
b_hu_pred
- ldrb r4, [r0, #-1]! ; Left[0]
+ ldrb r4, [r1], r2 ; Left[0]
ldr r12, c00020002
- ldrb r5, [r0, r1]! ; Left[1]
+ ldrb r5, [r1], r2 ; Left[1]
ldr lr, c00FF00FF
- ldrb r6, [r0, r1]! ; Left[2]
+ ldrb r6, [r1], r2 ; Left[2]
ldr r2, c00010001
- ldrb r7, [r0, r1] ; Left[3]
-
+ ldrb r7, [r1] ; Left[3]
add r4, r4, r5, lsl #16 ; [1|0]
add r5, r5, r6, lsl #16 ; [2|1]
@@ -563,7 +567,8 @@ b_hu_pred
add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1]
add r4, r4, r9 ; [p1+2*p2+p3 | p0+2*p1+p2]
uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2]
- ldr r2, [sp, #40] ; stride
+ ldr r2, [sp, #44] ; dst_stride
+ ldr r3, [sp, #40] ; dst
and r4, lr, r4, asr #2 ; [D|C]
add r10, r6, r7 ; [p2+p3]
@@ -587,9 +592,9 @@ b_hu_pred
add r10, r11, lsl #8 ; [-|-|F|E]
add r10, r10, r9, lsl #16 ; [G|G|F|E]
- str r10, [r3]
+ str r10, [r3], r2
- str r7, [r3, r2]
+ str r7, [r3]
pop {r4-r12, pc}
diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
index 65a4680..79ff02c 100644
--- a/vp8/common/arm/neon/dc_only_idct_add_neon.asm
+++ b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
@@ -46,7 +46,7 @@
vst1.32 {d2[1]}, [r3], r12
vst1.32 {d4[0]}, [r3], r12
vst1.32 {d4[1]}, [r3]
-
+
bx lr
ENDP
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index a4c1d92..f7ff577 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -161,22 +161,32 @@ typedef struct
uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */
} MB_MODE_INFO;
-typedef struct
+typedef struct modeinfo
{
MB_MODE_INFO mbmi;
union b_mode_info bmi[16];
} MODE_INFO;
#if CONFIG_MULTI_RES_ENCODING
-/* The information needed to be stored for higher-resolution encoder */
+/* The mb-level information needed to be stored for higher-resolution encoder */
typedef struct
{
MB_PREDICTION_MODE mode;
MV_REFERENCE_FRAME ref_frame;
int_mv mv;
- //union b_mode_info bmi[16];
- int dissim; // dissimilarity level of the macroblock
-} LOWER_RES_INFO;
+ int dissim; /* dissimilarity level of the macroblock */
+} LOWER_RES_MB_INFO;
+
+/* The frame-level information needed to be stored for higher-resolution
+ * encoder */
+typedef struct
+{
+ FRAME_TYPE frame_type;
+ int is_frame_dropped;
+ /* The frame number of each reference frames */
+ unsigned int low_res_ref_frames[MAX_REF_FRAMES];
+ LOWER_RES_MB_INFO *mb_info;
+} LOWER_RES_FRAME_INFO;
#endif
typedef struct blockd
@@ -216,12 +226,6 @@ typedef struct macroblockd
MODE_INFO *mode_info_context;
int mode_info_stride;
-#if CONFIG_TEMPORAL_DENOISING
- MB_PREDICTION_MODE best_sse_inter_mode;
- int_mv best_sse_mv;
- unsigned char need_to_clamp_best_mvs;
-#endif
-
FRAME_TYPE frame_type;
int up_available;
diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
index a95a923..8c046a4 100644
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -101,7 +101,7 @@ const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
/* vp8_coef_encodings generated with:
vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree);
*/
-const vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] =
+vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] =
{
{2, 2},
{6, 3},
diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c
index de7e828..091e4c7 100644
--- a/vp8/common/entropymode.c
+++ b/vp8/common/entropymode.c
@@ -160,9 +160,7 @@ const vp8_tree_index vp8_small_mvtree [14] =
void vp8_init_mbmode_probs(VP8_COMMON *x)
{
vpx_memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
- vpx_memcpy(x->kf_ymode_prob, vp8_kf_ymode_prob, sizeof(vp8_kf_ymode_prob));
vpx_memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
- vpx_memcpy(x->kf_uv_mode_prob, vp8_kf_uv_mode_prob, sizeof(vp8_kf_uv_mode_prob));
vpx_memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
}
@@ -171,7 +169,3 @@ void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1])
vpx_memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
}
-void vp8_kf_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1])
-{
- vpx_memcpy(p, vp8_kf_bmode_prob, sizeof(vp8_kf_bmode_prob));
-}
diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h
index 70200cb..1df0f64 100644
--- a/vp8/common/entropymode.h
+++ b/vp8/common/entropymode.h
@@ -24,11 +24,11 @@ typedef enum
SUBMVREF_LEFT_ABOVE_ZED
} sumvfref_t;
-typedef const int vp8_mbsplit[16];
+typedef int vp8_mbsplit[16];
#define VP8_NUMMBSPLITS 4
-extern vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS];
+extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS];
extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */
@@ -67,9 +67,14 @@ extern const vp8_tree_index vp8_small_mvtree[];
extern const struct vp8_token_struct vp8_small_mvencodings[8];
-void vp8_init_mbmode_probs(VP8_COMMON *x);
+/* Key frame default mode probs */
+extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES]
+[VP8_BINTRAMODES-1];
+extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1];
+extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1];
-void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
+void vp8_init_mbmode_probs(VP8_COMMON *x);
+void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]);
#endif
diff --git a/vp8/common/extend.c b/vp8/common/extend.c
index 9089e16..c9bdd21 100644
--- a/vp8/common/extend.c
+++ b/vp8/common/extend.c
@@ -116,7 +116,7 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
- // If the side is not touching the bounder then don't extend.
+ /* If the side is not touching the bounder then don't extend. */
if (srcy)
et = 0;
if (srcx)
@@ -157,7 +157,10 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
/* note the extension is only for the last row, for intra prediction purpose */
-void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr)
+void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf,
+ unsigned char *YPtr,
+ unsigned char *UPtr,
+ unsigned char *VPtr)
{
int i;
diff --git a/vp8/common/filter.h b/vp8/common/filter.h
index 0f225c2..b7591f2 100644
--- a/vp8/common/filter.h
+++ b/vp8/common/filter.h
@@ -19,4 +19,4 @@
extern const short vp8_bilinear_filters[8][2];
extern const short vp8_sub_pel_filters[8][6];
-#endif //FILTER_H
+#endif
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 2a30166..5a6ac7b 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -83,57 +83,6 @@ static int get_cpu_count()
#endif
-#if HAVE_PTHREAD_H
-#include <pthread.h>
-static void once(void (*func)(void))
-{
- static pthread_once_t lock = PTHREAD_ONCE_INIT;
- pthread_once(&lock, func);
-}
-
-
-#elif defined(_WIN32)
-static void once(void (*func)(void))
-{
- /* Using a static initializer here rather than InitializeCriticalSection()
- * since there's no race-free context in which to execute it. Protecting
- * it with an atomic op like InterlockedCompareExchangePointer introduces
- * an x86 dependency, and InitOnceExecuteOnce requires Vista.
- */
- static CRITICAL_SECTION lock = {(void *)-1, -1, 0, 0, 0, 0};
- static int done;
-
- EnterCriticalSection(&lock);
-
- if (!done)
- {
- func();
- done = 1;
- }
-
- LeaveCriticalSection(&lock);
-}
-
-
-#else
-/* No-op version that performs no synchronization. vpx_rtcd() is idempotent,
- * so as long as your platform provides atomic loads/stores of pointers
- * no synchronization is strictly necessary.
- */
-
-static void once(void (*func)(void))
-{
- static int done;
-
- if(!done)
- {
- func();
- done = 1;
- }
-}
-#endif
-
-
void vp8_machine_specific_config(VP8_COMMON *ctx)
{
#if CONFIG_MULTITHREAD
@@ -145,6 +94,4 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
#elif ARCH_X86 || ARCH_X86_64
ctx->cpu_caps = x86_simd_caps();
#endif
-
- once(vpx_rtcd);
}
diff --git a/vp8/common/idctllm_test.cc b/vp8/common/idctllm_test.cc
deleted file mode 100755
index 0f6ebe7..0000000
--- a/vp8/common/idctllm_test.cc
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
- extern "C" {
- void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr,
- int pred_stride, unsigned char *dst_ptr,
- int dst_stride);
-}
-
-#include "vpx_config.h"
-#include "idctllm_test.h"
-namespace
-{
-
-INSTANTIATE_TEST_CASE_P(C, IDCTTest,
- ::testing::Values(vp8_short_idct4x4llm_c));
-
-} // namespace
-
-int main(int argc, char **argv) {
- ::testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-}
diff --git a/vp8/common/idctllm_test.h b/vp8/common/idctllm_test.h
deleted file mode 100755
index a6a694b..0000000
--- a/vp8/common/idctllm_test.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
- #include "third_party/googletest/src/include/gtest/gtest.h"
-typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,
- int pred_stride, unsigned char *dst_ptr,
- int dst_stride);
-namespace {
-class IDCTTest : public ::testing::TestWithParam<idct_fn_t>
-{
- protected:
- virtual void SetUp()
- {
- int i;
-
- UUT = GetParam();
- memset(input, 0, sizeof(input));
- /* Set up guard blocks */
- for(i=0; i<256; i++)
- output[i] = ((i&0xF)<4&&(i<64))?0:-1;
- }
-
- idct_fn_t UUT;
- short input[16];
- unsigned char output[256];
- unsigned char predict[256];
-};
-
-TEST_P(IDCTTest, TestGuardBlocks)
-{
- int i;
-
- for(i=0; i<256; i++)
- if((i&0xF) < 4 && i<64)
- EXPECT_EQ(0, output[i]) << i;
- else
- EXPECT_EQ(255, output[i]);
-}
-
-TEST_P(IDCTTest, TestAllZeros)
-{
- int i;
-
- UUT(input, output, 16, output, 16);
-
- for(i=0; i<256; i++)
- if((i&0xF) < 4 && i<64)
- EXPECT_EQ(0, output[i]) << "i==" << i;
- else
- EXPECT_EQ(255, output[i]) << "i==" << i;
-}
-
-TEST_P(IDCTTest, TestAllOnes)
-{
- int i;
-
- input[0] = 4;
- UUT(input, output, 16, output, 16);
-
- for(i=0; i<256; i++)
- if((i&0xF) < 4 && i<64)
- EXPECT_EQ(1, output[i]) << "i==" << i;
- else
- EXPECT_EQ(255, output[i]) << "i==" << i;
-}
-
-TEST_P(IDCTTest, TestAddOne)
-{
- int i;
-
- for(i=0; i<256; i++)
- predict[i] = i;
-
- input[0] = 4;
- UUT(input, predict, 16, output, 16);
-
- for(i=0; i<256; i++)
- if((i&0xF) < 4 && i<64)
- EXPECT_EQ(i+1, output[i]) << "i==" << i;
- else
- EXPECT_EQ(255, output[i]) << "i==" << i;
-}
-
-TEST_P(IDCTTest, TestWithData)
-{
- int i;
-
- for(i=0; i<16; i++)
- input[i] = i;
-
- UUT(input, output, 16, output, 16);
-
- for(i=0; i<256; i++)
- if((i&0xF) > 3 || i>63)
- EXPECT_EQ(255, output[i]) << "i==" << i;
- else if(i == 0)
- EXPECT_EQ(11, output[i]) << "i==" << i;
- else if(i == 34)
- EXPECT_EQ(1, output[i]) << "i==" << i;
- else if(i == 2 || i == 17 || i == 32)
- EXPECT_EQ(3, output[i]) << "i==" << i;
- else
- EXPECT_EQ(0, output[i]) << "i==" << i;
-}
-}
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index 3f05efe..41b4f12 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -196,18 +196,122 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm,
}
}
-void vp8_loop_filter_frame
-(
- VP8_COMMON *cm,
- MACROBLOCKD *mbd
-)
+
+void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context,
+ int mb_row, int post_ystride, int post_uvstride,
+ unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr)
{
- YV12_BUFFER_CONFIG *post = cm->frame_to_show;
+ int mb_col;
+ int filter_level;
loop_filter_info_n *lfi_n = &cm->lf_info;
loop_filter_info lfi;
-
FRAME_TYPE frame_type = cm->frame_type;
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+ {
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
+
+ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
+ const int seg = mode_info_context->mbmi.segment_id;
+ const int ref_frame = mode_info_context->mbmi.ref_frame;
+
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+
+ if (filter_level)
+ {
+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (mb_col > 0)
+ vp8_loop_filter_mbv
+ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
+
+ if (!skip_lf)
+ vp8_loop_filter_bv
+ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ vp8_loop_filter_mbh
+ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
+
+ if (!skip_lf)
+ vp8_loop_filter_bh
+ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
+ }
+
+ y_ptr += 16;
+ u_ptr += 8;
+ v_ptr += 8;
+
+ mode_info_context++; /* step to next MB */
+ }
+
+}
+
+void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context,
+ int mb_row, int post_ystride, int post_uvstride,
+ unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr)
+{
+ int mb_col;
+ int filter_level;
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+ {
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
+
+ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
+ const int seg = mode_info_context->mbmi.segment_id;
+ const int ref_frame = mode_info_context->mbmi.ref_frame;
+
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+
+ if (filter_level)
+ {
+ if (mb_col > 0)
+ vp8_loop_filter_simple_mbv
+ (y_ptr, post_ystride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp8_loop_filter_simple_bv
+ (y_ptr, post_ystride, lfi_n->blim[filter_level]);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ vp8_loop_filter_simple_mbh
+ (y_ptr, post_ystride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp8_loop_filter_simple_bh
+ (y_ptr, post_ystride, lfi_n->blim[filter_level]);
+ }
+
+ y_ptr += 16;
+ u_ptr += 8;
+ v_ptr += 8;
+
+ mode_info_context++; /* step to next MB */
+ }
+
+}
+void vp8_loop_filter_frame(VP8_COMMON *cm,
+ MACROBLOCKD *mbd,
+ int frame_type)
+{
+ YV12_BUFFER_CONFIG *post = cm->frame_to_show;
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+ loop_filter_info lfi;
+
int mb_row;
int mb_col;
int mb_rows = cm->mb_rows;
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index 0fa8375..b3af2d6 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -69,6 +69,7 @@ typedef void loop_filter_uvfunction
/* assorted loopfilter functions which get used elsewhere */
struct VP8Common;
struct macroblockd;
+struct modeinfo;
void vp8_loop_filter_init(struct VP8Common *cm);
@@ -76,7 +77,8 @@ void vp8_loop_filter_frame_init(struct VP8Common *cm,
struct macroblockd *mbd,
int default_filt_lvl);
-void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd);
+void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd,
+ int frame_type);
void vp8_loop_filter_partial_frame(struct VP8Common *cm,
struct macroblockd *mbd,
@@ -89,4 +91,15 @@ void vp8_loop_filter_frame_yonly(struct VP8Common *cm,
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
int sharpness_lvl);
+void vp8_loop_filter_row_normal(struct VP8Common *cm,
+ struct modeinfo *mode_info_context,
+ int mb_row, int post_ystride, int post_uvstride,
+ unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr);
+
+void vp8_loop_filter_row_simple(struct VP8Common *cm,
+ struct modeinfo *mode_info_context,
+ int mb_row, int post_ystride, int post_uvstride,
+ unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr);
#endif
diff --git a/vp8/common/mfqe.c b/vp8/common/mfqe.c
index ca67e91..3dff150 100644
--- a/vp8/common/mfqe.c
+++ b/vp8/common/mfqe.c
@@ -160,9 +160,9 @@ static void multiframe_quality_enhance_block
vsad = (vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse));
vsad = (sse + 32)>>6;
#else
- sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, INT_MAX)+128)>>8;
- usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, INT_MAX)+32)>>6;
- vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, INT_MAX)+32)>>6;
+ sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8;
+ usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6;
+ vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6;
#endif
}
else /* if (blksize == 8) */
@@ -177,16 +177,16 @@ static void multiframe_quality_enhance_block
vsad = (vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse));
vsad = (sse + 8)>>4;
#else
- sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, INT_MAX)+32)>>6;
- usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, INT_MAX)+8)>>4;
- vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, INT_MAX)+8)>>4;
+ sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6;
+ usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4;
+ vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4;
#endif
}
actrisk = (actd > act * 5);
- /* thr = qdiff/8 + log2(act) + log4(qprev) */
- thr = (qdiff >> 3);
+ /* thr = qdiff/16 + log2(act) + log4(qprev) */
+ thr = (qdiff >> 4);
while (actd >>= 1) thr++;
while (qprev >>= 2) thr++;
diff --git a/vp8/common/mips/dspr2/dequantize_dspr2.c b/vp8/common/mips/dspr2/dequantize_dspr2.c
new file mode 100644
index 0000000..6823325
--- /dev/null
+++ b/vp8/common/mips/dspr2/dequantize_dspr2.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vpx_rtcd.h"
+#include "vpx_mem/vpx_mem.h"
+
+#if HAVE_DSPR2
+void vp8_dequant_idct_add_dspr2(short *input, short *dq,
+ unsigned char *dest, int stride)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ input[i] = dq[i] * input[i];
+ }
+
+ vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride);
+
+ vpx_memset(input, 0, 32);
+
+}
+
+#endif
diff --git a/vp8/common/mips/dspr2/filter_dspr2.c b/vp8/common/mips/dspr2/filter_dspr2.c
new file mode 100644
index 0000000..71fdcd7
--- /dev/null
+++ b/vp8/common/mips/dspr2/filter_dspr2.c
@@ -0,0 +1,2823 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdlib.h>
+#include "vpx_rtcd.h"
+#include "vpx_ports/mem.h"
+
+#if HAVE_DSPR2
+#define CROP_WIDTH 256
+unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH];
+
+static const unsigned short sub_pel_filterss[8][3] =
+{
+ { 0, 0, 0},
+ { 0, 0x0601, 0x7b0c},
+ { 0x0201, 0x0b08, 0x6c24},
+ { 0, 0x0906, 0x5d32},
+ { 0x0303, 0x1010, 0x4d4d},
+ { 0, 0x0609, 0x325d},
+ { 0x0102, 0x080b, 0x246c},
+ { 0, 0x0106, 0x0c7b},
+};
+
+
+static const int sub_pel_filters_int[8][3] =
+{
+ { 0, 0, 0},
+ { 0x0000fffa, 0x007b000c, 0xffff0000},
+ { 0x0002fff5, 0x006c0024, 0xfff80001},
+ { 0x0000fff7, 0x005d0032, 0xfffa0000},
+ { 0x0003fff0, 0x004d004d, 0xfff00003},
+ { 0x0000fffa, 0x0032005d, 0xfff70000},
+ { 0x0001fff8, 0x0024006c, 0xfff50002},
+ { 0x0000ffff, 0x000c007b, 0xfffa0000},
+};
+
+
+static const int sub_pel_filters_inv[8][3] =
+{
+ { 0, 0, 0},
+ { 0xfffa0000, 0x000c007b, 0x0000ffff},
+ { 0xfff50002, 0x0024006c, 0x0001fff8},
+ { 0xfff70000, 0x0032005d, 0x0000fffa},
+ { 0xfff00003, 0x004d004d, 0x0003fff0},
+ { 0xfffa0000, 0x005d0032, 0x0000fff7},
+ { 0xfff80001, 0x006c0024, 0x0002fff5},
+ { 0xffff0000, 0x007b000c, 0x0000fffa},
+};
+
+
+static const int sub_pel_filters_int_tap_4[8][2] =
+{
+ { 0, 0},
+ { 0xfffa007b, 0x000cffff},
+ { 0, 0},
+ { 0xfff7005d, 0x0032fffa},
+ { 0, 0},
+ { 0xfffa0032, 0x005dfff7},
+ { 0, 0},
+ { 0xffff000c, 0x007bfffa},
+};
+
+
+static const int sub_pel_filters_inv_tap_4[8][2] =
+{
+ { 0, 0},
+ { 0x007bfffa, 0xffff000c},
+ { 0, 0},
+ { 0x005dfff7, 0xfffa0032},
+ { 0, 0},
+ { 0x0032fffa, 0xfff7005d},
+ { 0, 0},
+ { 0x000cffff, 0xfffa007b},
+};
+
+inline void prefetch_load(unsigned char *src)
+{
+ __asm__ __volatile__ (
+ "pref 0, 0(%[src]) \n\t"
+ :
+ : [src] "r" (src)
+ );
+}
+
+
+inline void prefetch_store(unsigned char *dst)
+{
+ __asm__ __volatile__ (
+ "pref 1, 0(%[dst]) \n\t"
+ :
+ : [dst] "r" (dst)
+ );
+}
+
+void dsputil_static_init(void)
+{
+ int i;
+
+ for (i = 0; i < 256; i++) ff_cropTbl[i + CROP_WIDTH] = i;
+
+ for (i = 0; i < CROP_WIDTH; i++)
+ {
+ ff_cropTbl[i] = 0;
+ ff_cropTbl[i + CROP_WIDTH + 256] = 255;
+ }
+}
+
+void vp8_filter_block2d_first_pass_4
+(
+ unsigned char *RESTRICT src_ptr,
+ unsigned char *RESTRICT dst_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int output_height,
+ int xoffset,
+ int pitch
+)
+{
+ unsigned int i;
+ int Temp1, Temp2, Temp3, Temp4;
+
+ unsigned int vector4a = 64;
+ int vector1b, vector2b, vector3b;
+ unsigned int tp1, tp2, tn1, tn2;
+ unsigned int p1, p2, p3;
+ unsigned int n1, n2, n3;
+ unsigned char *cm = ff_cropTbl + CROP_WIDTH;
+
+ vector3b = sub_pel_filters_inv[xoffset][2];
+
+ /* if (xoffset == 0) we don't need any filtering */
+ if (vector3b == 0)
+ {
+ for (i = 0; i < output_height; i++)
+ {
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr + src_pixels_per_line);
+ dst_ptr[0] = src_ptr[0];
+ dst_ptr[1] = src_ptr[1];
+ dst_ptr[2] = src_ptr[2];
+ dst_ptr[3] = src_ptr[3];
+
+ /* next row... */
+ src_ptr += src_pixels_per_line;
+ dst_ptr += 4;
+ }
+ }
+ else
+ {
+ if (vector3b > 65536)
+ {
+ /* 6 tap filter */
+
+ vector1b = sub_pel_filters_inv[xoffset][0];
+ vector2b = sub_pel_filters_inv[xoffset][1];
+
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr + src_pixels_per_line);
+
+ for (i = output_height; i--;)
+ {
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "ulw %[tp1], -2(%[src_ptr]) \n\t"
+ "ulw %[tp2], 2(%[src_ptr]) \n\t"
+
+ /* even 1. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[p1], %[tp1] \n\t"
+ "preceu.ph.qbl %[p2], %[tp1] \n\t"
+ "preceu.ph.qbr %[p3], %[tp2] \n\t"
+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t"
+
+ /* even 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[p1], %[tp2] \n\t"
+ "balign %[tp2], %[tp1], 3 \n\t"
+ "extp %[Temp1], $ac3, 9 \n\t"
+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t"
+
+ /* odd 1. pixel */
+ "ulw %[tn2], 3(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[n1], %[tp2] \n\t"
+ "preceu.ph.qbl %[n2], %[tp2] \n\t"
+ "preceu.ph.qbr %[n3], %[tn2] \n\t"
+ "extp %[Temp3], $ac2, 9 \n\t"
+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t"
+
+ /* even 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[n1], %[tn2] \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t"
+ "extp %[Temp4], $ac2, 9 \n\t"
+
+ /* clamp */
+ "lbux %[tp1], %[Temp1](%[cm]) \n\t"
+ "lbux %[tn1], %[Temp2](%[cm]) \n\t"
+ "lbux %[tp2], %[Temp3](%[cm]) \n\t"
+ "lbux %[n2], %[Temp4](%[cm]) \n\t"
+
+ /* store bytes */
+ "sb %[tp1], 0(%[dst_ptr]) \n\t"
+ "sb %[tn1], 1(%[dst_ptr]) \n\t"
+ "sb %[tp2], 2(%[dst_ptr]) \n\t"
+ "sb %[n2], 3(%[dst_ptr]) \n\t"
+
+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1),
+ [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2),
+ [p3] "=&r" (p3), [n1] "=&r" (n1), [n2] "=&r" (n2),
+ [n3] "=&r" (n3), [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr),
+ [vector3b] "r" (vector3b), [src_ptr] "r" (src_ptr)
+ );
+
+ /* Next row... */
+ src_ptr += src_pixels_per_line;
+ dst_ptr += pitch;
+ }
+ }
+ else
+ {
+ /* 4 tap filter */
+
+ vector1b = sub_pel_filters_inv_tap_4[xoffset][0];
+ vector2b = sub_pel_filters_inv_tap_4[xoffset][1];
+
+ for (i = output_height; i--;)
+ {
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "ulw %[tp1], -1(%[src_ptr]) \n\t"
+ "ulw %[tp2], 3(%[src_ptr]) \n\t"
+
+ /* even 1. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[p1], %[tp1] \n\t"
+ "preceu.ph.qbl %[p2], %[tp1] \n\t"
+ "preceu.ph.qbr %[p3], %[tp2] \n\t"
+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t"
+
+ /* even 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t"
+ "extp %[Temp1], $ac3, 9 \n\t"
+
+ /* odd 1. pixel */
+ "srl %[tn1], %[tp2], 8 \n\t"
+ "balign %[tp2], %[tp1], 3 \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[n1], %[tp2] \n\t"
+ "preceu.ph.qbl %[n2], %[tp2] \n\t"
+ "preceu.ph.qbr %[n3], %[tn1] \n\t"
+ "extp %[Temp3], $ac2, 9 \n\t"
+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t"
+
+ /* odd 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t"
+ "extp %[Temp4], $ac2, 9 \n\t"
+
+ /* clamp and store results */
+ "lbux %[tp1], %[Temp1](%[cm]) \n\t"
+ "lbux %[tn1], %[Temp2](%[cm]) \n\t"
+ "lbux %[tp2], %[Temp3](%[cm]) \n\t"
+ "sb %[tp1], 0(%[dst_ptr]) \n\t"
+ "sb %[tn1], 1(%[dst_ptr]) \n\t"
+ "lbux %[n2], %[Temp4](%[cm]) \n\t"
+ "sb %[tp2], 2(%[dst_ptr]) \n\t"
+ "sb %[n2], 3(%[dst_ptr]) \n\t"
+
+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1),
+ [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3),
+ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr),
+ [src_ptr] "r" (src_ptr)
+ );
+ /* Next row... */
+ src_ptr += src_pixels_per_line;
+ dst_ptr += pitch;
+ }
+ }
+ }
+}
+
+void vp8_filter_block2d_first_pass_8_all
+(
+ unsigned char *RESTRICT src_ptr,
+ unsigned char *RESTRICT dst_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int output_height,
+ int xoffset,
+ int pitch
+)
+{
+ unsigned int i;
+ int Temp1, Temp2, Temp3, Temp4;
+
+ unsigned int vector4a = 64;
+ unsigned int vector1b, vector2b, vector3b;
+ unsigned int tp1, tp2, tn1, tn2;
+ unsigned int p1, p2, p3, p4;
+ unsigned int n1, n2, n3, n4;
+
+ unsigned char *cm = ff_cropTbl + CROP_WIDTH;
+
+ /* if (xoffset == 0) we don't need any filtering */
+ if (xoffset == 0)
+ {
+ for (i = 0; i < output_height; i++)
+ {
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr + src_pixels_per_line);
+
+ dst_ptr[0] = src_ptr[0];
+ dst_ptr[1] = src_ptr[1];
+ dst_ptr[2] = src_ptr[2];
+ dst_ptr[3] = src_ptr[3];
+ dst_ptr[4] = src_ptr[4];
+ dst_ptr[5] = src_ptr[5];
+ dst_ptr[6] = src_ptr[6];
+ dst_ptr[7] = src_ptr[7];
+
+ /* next row... */
+ src_ptr += src_pixels_per_line;
+ dst_ptr += 8;
+ }
+ }
+ else
+ {
+ vector3b = sub_pel_filters_inv[xoffset][2];
+
+ if (vector3b > 65536)
+ {
+ /* 6 tap filter */
+
+ vector1b = sub_pel_filters_inv[xoffset][0];
+ vector2b = sub_pel_filters_inv[xoffset][1];
+
+ for (i = output_height; i--;)
+ {
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr + src_pixels_per_line);
+
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "ulw %[tp1], -2(%[src_ptr]) \n\t"
+ "ulw %[tp2], 2(%[src_ptr]) \n\t"
+
+ /* even 1. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[p1], %[tp1] \n\t"
+ "preceu.ph.qbl %[p2], %[tp1] \n\t"
+ "preceu.ph.qbr %[p3], %[tp2] \n\t"
+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t"
+
+ /* even 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[p1], %[tp2] \n\t"
+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t"
+
+ "balign %[tp2], %[tp1], 3 \n\t"
+ "extp %[Temp1], $ac3, 9 \n\t"
+ "ulw %[tn2], 3(%[src_ptr]) \n\t"
+
+ /* odd 1. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[n1], %[tp2] \n\t"
+ "preceu.ph.qbl %[n2], %[tp2] \n\t"
+ "preceu.ph.qbr %[n3], %[tn2] \n\t"
+ "extp %[Temp3], $ac2, 9 \n\t"
+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t"
+
+ /* odd 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[n1], %[tn2] \n\t"
+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t"
+ "ulw %[tp1], 6(%[src_ptr]) \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[p2], %[tp1] \n\t"
+ "extp %[Temp4], $ac2, 9 \n\t"
+
+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2),
+ [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3),
+ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b),
+ [src_ptr] "r" (src_ptr)
+ );
+
+ /* clamp and store results */
+ dst_ptr[0] = cm[Temp1];
+ dst_ptr[1] = cm[Temp2];
+ dst_ptr[2] = cm[Temp3];
+ dst_ptr[3] = cm[Temp4];
+
+ /* next 4 pixels */
+ __asm__ __volatile__ (
+ /* even 3. pixel */
+ "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[p1], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[p2], %[vector3b] \n\t"
+
+ /* even 4. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[p4], %[tp1] \n\t"
+ "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t"
+
+ "ulw %[tn1], 7(%[src_ptr]) \n\t"
+ "extp %[Temp1], $ac3, 9 \n\t"
+
+ /* odd 3. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[n2], %[tn1] \n\t"
+ "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[n1], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[n2], %[vector3b] \n\t"
+ "extp %[Temp3], $ac2, 9 \n\t"
+
+ /* odd 4. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[n4], %[tn1] \n\t"
+ "dpa.w.ph $ac2, %[n1], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+ "extp %[Temp4], $ac2, 9 \n\t"
+
+ : [tn1] "=&r" (tn1), [n2] "=&r" (n2),
+ [p4] "=&r" (p4), [n4] "=&r" (n4),
+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
+ : [tp1] "r" (tp1), [vector1b] "r" (vector1b), [p2] "r" (p2),
+ [vector2b] "r" (vector2b), [n1] "r" (n1), [p1] "r" (p1),
+ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b),
+ [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr)
+ );
+
+ /* clamp and store results */
+ dst_ptr[4] = cm[Temp1];
+ dst_ptr[5] = cm[Temp2];
+ dst_ptr[6] = cm[Temp3];
+ dst_ptr[7] = cm[Temp4];
+
+ src_ptr += src_pixels_per_line;
+ dst_ptr += pitch;
+ }
+ }
+ else
+ {
+ /* 4 tap filter */
+
+ vector1b = sub_pel_filters_inv_tap_4[xoffset][0];
+ vector2b = sub_pel_filters_inv_tap_4[xoffset][1];
+
+ for (i = output_height; i--;)
+ {
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr + src_pixels_per_line);
+
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "ulw %[tp1], -1(%[src_ptr]) \n\t"
+
+ /* even 1. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[p1], %[tp1] \n\t"
+ "preceu.ph.qbl %[p2], %[tp1] \n\t"
+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t"
+
+ "ulw %[tp2], 3(%[src_ptr]) \n\t"
+
+ /* even 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbr %[p3], %[tp2] \n\t"
+ "preceu.ph.qbl %[p4], %[tp2] \n\t"
+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t"
+ "extp %[Temp1], $ac3, 9 \n\t"
+
+ "balign %[tp2], %[tp1], 3 \n\t"
+
+ /* odd 1. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[n1], %[tp2] \n\t"
+ "preceu.ph.qbl %[n2], %[tp2] \n\t"
+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t"
+ "extp %[Temp3], $ac2, 9 \n\t"
+
+ "ulw %[tn2], 4(%[src_ptr]) \n\t"
+
+ /* odd 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbr %[n3], %[tn2] \n\t"
+ "preceu.ph.qbl %[n4], %[tn2] \n\t"
+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t"
+ "ulw %[tp1], 7(%[src_ptr]) \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp4], $ac2, 9 \n\t"
+
+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2),
+ [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2),
+ [p3] "=&r" (p3), [p4] "=&r" (p4), [n1] "=&r" (n1),
+ [n2] "=&r" (n2), [n3] "=&r" (n3), [n4] "=&r" (n4),
+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
+ );
+
+ /* clamp and store results */
+ dst_ptr[0] = cm[Temp1];
+ dst_ptr[1] = cm[Temp2];
+ dst_ptr[2] = cm[Temp3];
+ dst_ptr[3] = cm[Temp4];
+
+ /* next 4 pixels */
+ __asm__ __volatile__ (
+ /* even 3. pixel */
+ "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t"
+
+ /* even 4. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbr %[p2], %[tp1] \n\t"
+ "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t"
+ "extp %[Temp1], $ac3, 9 \n\t"
+
+ /* odd 3. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[n4], %[vector2b] \n\t"
+ "ulw %[tn1], 8(%[src_ptr]) \n\t"
+ "extp %[Temp3], $ac2, 9 \n\t"
+
+ /* odd 4. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbr %[n2], %[tn1] \n\t"
+ "dpa.w.ph $ac2, %[n4], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+ "extp %[Temp4], $ac2, 9 \n\t"
+
+ : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2),
+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
+ : [tp1] "r" (tp1), [p3] "r" (p3), [p4] "r" (p4),
+ [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr),
+ [n3] "r" (n3), [n4] "r" (n4)
+ );
+
+ /* clamp and store results */
+ dst_ptr[4] = cm[Temp1];
+ dst_ptr[5] = cm[Temp2];
+ dst_ptr[6] = cm[Temp3];
+ dst_ptr[7] = cm[Temp4];
+
+ /* next row... */
+ src_ptr += src_pixels_per_line;
+ dst_ptr += pitch;
+ }
+ }
+ }
+}
+
+
+void vp8_filter_block2d_first_pass16_6tap
+(
+ unsigned char *RESTRICT src_ptr,
+ unsigned char *RESTRICT dst_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int output_height,
+ int xoffset,
+ int pitch
+)
+{
+ unsigned int i;
+ int Temp1, Temp2, Temp3, Temp4;
+
+ unsigned int vector4a;
+ unsigned int vector1b, vector2b, vector3b;
+ unsigned int tp1, tp2, tn1, tn2;
+ unsigned int p1, p2, p3, p4;
+ unsigned int n1, n2, n3, n4;
+ unsigned char *cm = ff_cropTbl + CROP_WIDTH;
+
+ vector1b = sub_pel_filters_inv[xoffset][0];
+ vector2b = sub_pel_filters_inv[xoffset][1];
+ vector3b = sub_pel_filters_inv[xoffset][2];
+ vector4a = 64;
+
+ for (i = output_height; i--;)
+ {
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr + src_pixels_per_line);
+
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "ulw %[tp1], -2(%[src_ptr]) \n\t"
+ "ulw %[tp2], 2(%[src_ptr]) \n\t"
+
+ /* even 1. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[p1], %[tp1] \n\t"
+ "preceu.ph.qbl %[p2], %[tp1] \n\t"
+ "preceu.ph.qbr %[p3], %[tp2] \n\t"
+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t"
+
+ /* even 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[p1], %[tp2] \n\t"
+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t"
+
+ "balign %[tp2], %[tp1], 3 \n\t"
+ "ulw %[tn2], 3(%[src_ptr]) \n\t"
+ "extp %[Temp1], $ac3, 9 \n\t"
+
+ /* odd 1. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[n1], %[tp2] \n\t"
+ "preceu.ph.qbl %[n2], %[tp2] \n\t"
+ "preceu.ph.qbr %[n3], %[tn2] \n\t"
+ "extp %[Temp3], $ac2, 9 \n\t"
+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t"
+
+ /* odd 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[n1], %[tn2] \n\t"
+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t"
+ "ulw %[tp1], 6(%[src_ptr]) \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[p2], %[tp1] \n\t"
+ "extp %[Temp4], $ac2, 9 \n\t"
+
+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2),
+ [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3),
+ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b),
+ [src_ptr] "r" (src_ptr)
+ );
+
+ /* clamp and store results */
+ dst_ptr[0] = cm[Temp1];
+ dst_ptr[1] = cm[Temp2];
+ dst_ptr[2] = cm[Temp3];
+ dst_ptr[3] = cm[Temp4];
+
+ /* next 4 pixels */
+ __asm__ __volatile__ (
+ /* even 3. pixel */
+ "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[p1], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[p2], %[vector3b] \n\t"
+
+ /* even 4. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[p4], %[tp1] \n\t"
+ "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t"
+ "ulw %[tn1], 7(%[src_ptr]) \n\t"
+ "extp %[Temp1], $ac3, 9 \n\t"
+
+ /* odd 3. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[n2], %[tn1] \n\t"
+ "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[n1], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[n2], %[vector3b] \n\t"
+ "extp %[Temp3], $ac2, 9 \n\t"
+
+ /* odd 4. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[n4], %[tn1] \n\t"
+ "dpa.w.ph $ac2, %[n1], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t"
+ "ulw %[tp2], 10(%[src_ptr]) \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[p1], %[tp2] \n\t"
+ "extp %[Temp4], $ac2, 9 \n\t"
+
+ : [tn1] "=&r" (tn1), [tp2] "=&r" (tp2), [n2] "=&r" (n2),
+ [p4] "=&r" (p4), [n4] "=&r" (n4),
+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [tp1] "r" (tp1), [n1] "r" (n1), [p1] "r" (p1),
+ [vector4a] "r" (vector4a), [p2] "r" (p2), [vector3b] "r" (vector3b),
+ [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr)
+ );
+
+ /* clamp and store results */
+ dst_ptr[4] = cm[Temp1];
+ dst_ptr[5] = cm[Temp2];
+ dst_ptr[6] = cm[Temp3];
+ dst_ptr[7] = cm[Temp4];
+
+ /* next 4 pixels */
+ __asm__ __volatile__ (
+ /* even 5. pixel */
+ "dpa.w.ph $ac3, %[p2], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[p1], %[vector3b] \n\t"
+
+ /* even 6. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[p3], %[tp2] \n\t"
+ "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[p3], %[vector3b] \n\t"
+
+ "ulw %[tn1], 11(%[src_ptr]) \n\t"
+ "extp %[Temp1], $ac3, 9 \n\t"
+
+ /* odd 5. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[n1], %[tn1] \n\t"
+ "dpa.w.ph $ac3, %[n2], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[n4], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t"
+ "extp %[Temp3], $ac2, 9 \n\t"
+
+ /* odd 6. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[n3], %[tn1] \n\t"
+ "dpa.w.ph $ac2, %[n4], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[n1], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[n3], %[vector3b] \n\t"
+ "ulw %[tp1], 14(%[src_ptr]) \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[p4], %[tp1] \n\t"
+ "extp %[Temp4], $ac2, 9 \n\t"
+
+ : [tn1] "=&r" (tn1), [tp1] "=&r" (tp1),
+ [n1] "=&r" (n1), [p3] "=&r" (p3), [n3] "=&r" (n3),
+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [tp2] "r" (tp2), [p2] "r" (p2), [n2] "r" (n2),
+ [p4] "r" (p4), [n4] "r" (n4), [p1] "r" (p1), [src_ptr] "r" (src_ptr),
+ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b)
+ );
+
+ /* clamp and store results */
+ dst_ptr[8] = cm[Temp1];
+ dst_ptr[9] = cm[Temp2];
+ dst_ptr[10] = cm[Temp3];
+ dst_ptr[11] = cm[Temp4];
+
+ /* next 4 pixels */
+ __asm__ __volatile__ (
+ /* even 7. pixel */
+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[p3], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[p4], %[vector3b] \n\t"
+
+ /* even 8. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[p2], %[tp1] \n\t"
+ "dpa.w.ph $ac2, %[p3], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[p4], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[p2], %[vector3b] \n\t"
+ "ulw %[tn1], 15(%[src_ptr]) \n\t"
+ "extp %[Temp1], $ac3, 9 \n\t"
+
+ /* odd 7. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "preceu.ph.qbr %[n4], %[tn1] \n\t"
+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[n3], %[vector2b] \n\t"
+ "dpa.w.ph $ac3, %[n4], %[vector3b] \n\t"
+ "extp %[Temp3], $ac2, 9 \n\t"
+
+ /* odd 8. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "preceu.ph.qbl %[n2], %[tn1] \n\t"
+ "dpa.w.ph $ac2, %[n3], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[n4], %[vector2b] \n\t"
+ "dpa.w.ph $ac2, %[n2], %[vector3b] \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+ "extp %[Temp4], $ac2, 9 \n\t"
+
+ /* clamp and store results */
+ "lbux %[tp1], %[Temp1](%[cm]) \n\t"
+ "lbux %[tn1], %[Temp2](%[cm]) \n\t"
+ "lbux %[p2], %[Temp3](%[cm]) \n\t"
+ "sb %[tp1], 12(%[dst_ptr]) \n\t"
+ "sb %[tn1], 13(%[dst_ptr]) \n\t"
+ "lbux %[n2], %[Temp4](%[cm]) \n\t"
+ "sb %[p2], 14(%[dst_ptr]) \n\t"
+ "sb %[n2], 15(%[dst_ptr]) \n\t"
+
+ : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2), [n4] "=&r" (n4),
+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [tp1] "r" (tp1), [p4] "r" (p4), [n1] "r" (n1), [p1] "r" (p1),
+ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), [p3] "r" (p3),
+ [n3] "r" (n3), [src_ptr] "r" (src_ptr),
+ [cm] "r" (cm), [dst_ptr] "r" (dst_ptr)
+ );
+
+ src_ptr += src_pixels_per_line;
+ dst_ptr += pitch;
+ }
+}
+
+
+void vp8_filter_block2d_first_pass16_0
+(
+ unsigned char *RESTRICT src_ptr,
+ unsigned char *RESTRICT output_ptr,
+ unsigned int src_pixels_per_line
+)
+{
+ int Temp1, Temp2, Temp3, Temp4;
+ int i;
+
+ /* prefetch src_ptr data to cache memory */
+ prefetch_store(output_ptr + 32);
+
+ /* copy memory from src buffer to dst buffer */
+ for (i = 0; i < 7; i++)
+ {
+ __asm__ __volatile__ (
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "ulw %[Temp3], 8(%[src_ptr]) \n\t"
+ "ulw %[Temp4], 12(%[src_ptr]) \n\t"
+ "sw %[Temp1], 0(%[output_ptr]) \n\t"
+ "sw %[Temp2], 4(%[output_ptr]) \n\t"
+ "sw %[Temp3], 8(%[output_ptr]) \n\t"
+ "sw %[Temp4], 12(%[output_ptr]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3),
+ [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr)
+ : [src_pixels_per_line] "r" (src_pixels_per_line),
+ [output_ptr] "r" (output_ptr)
+ );
+
+ __asm__ __volatile__ (
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "ulw %[Temp3], 8(%[src_ptr]) \n\t"
+ "ulw %[Temp4], 12(%[src_ptr]) \n\t"
+ "sw %[Temp1], 16(%[output_ptr]) \n\t"
+ "sw %[Temp2], 20(%[output_ptr]) \n\t"
+ "sw %[Temp3], 24(%[output_ptr]) \n\t"
+ "sw %[Temp4], 28(%[output_ptr]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3),
+ [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr)
+ : [src_pixels_per_line] "r" (src_pixels_per_line),
+ [output_ptr] "r" (output_ptr)
+ );
+
+ __asm__ __volatile__ (
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "ulw %[Temp3], 8(%[src_ptr]) \n\t"
+ "ulw %[Temp4], 12(%[src_ptr]) \n\t"
+ "sw %[Temp1], 32(%[output_ptr]) \n\t"
+ "sw %[Temp2], 36(%[output_ptr]) \n\t"
+ "sw %[Temp3], 40(%[output_ptr]) \n\t"
+ "sw %[Temp4], 44(%[output_ptr]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3),
+ [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr)
+ : [src_pixels_per_line] "r" (src_pixels_per_line),
+ [output_ptr] "r" (output_ptr)
+ );
+
+ output_ptr += 48;
+ }
+}
+
+
+void vp8_filter_block2d_first_pass16_4tap
+(
+ unsigned char *RESTRICT src_ptr,
+ unsigned char *RESTRICT output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int output_width,
+ unsigned int output_height,
+ int xoffset,
+ int yoffset,
+ unsigned char *RESTRICT dst_ptr,
+ int pitch
+)
+{
+ unsigned int i, j;
+ int Temp1, Temp2, Temp3, Temp4;
+
+ unsigned int vector4a;
+ int vector1b, vector2b;
+ unsigned int tp1, tp2, tp3, tn1;
+ unsigned int p1, p2, p3;
+ unsigned int n1, n2, n3;
+ unsigned char *cm = ff_cropTbl + CROP_WIDTH;
+
+ vector4a = 64;
+
+ vector1b = sub_pel_filters_inv_tap_4[xoffset][0];
+ vector2b = sub_pel_filters_inv_tap_4[xoffset][1];
+
+ /* if (yoffset == 0) don't need temp buffer, data will be stored in dst_ptr */
+ if (yoffset == 0)
+ {
+ output_height -= 5;
+ src_ptr += (src_pixels_per_line + src_pixels_per_line);
+
+ for (i = output_height; i--;)
+ {
+ __asm__ __volatile__ (
+ "ulw %[tp3], -1(%[src_ptr]) \n\t"
+ : [tp3] "=&r" (tp3)
+ : [src_ptr] "r" (src_ptr)
+ );
+
+ /* processing 4 adjacent pixels */
+ for (j = 0; j < 16; j += 4)
+ {
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "ulw %[tp2], 3(%[src_ptr]) \n\t"
+ "move %[tp1], %[tp3] \n\t"
+
+ /* even 1. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "mthi $0, $ac3 \n\t"
+ "move %[tp3], %[tp2] \n\t"
+ "preceu.ph.qbr %[p1], %[tp1] \n\t"
+ "preceu.ph.qbl %[p2], %[tp1] \n\t"
+ "preceu.ph.qbr %[p3], %[tp2] \n\t"
+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t"
+
+ /* even 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "mthi $0, $ac2 \n\t"
+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t"
+ "extr.w %[Temp1], $ac3, 7 \n\t"
+
+ /* odd 1. pixel */
+ "ulw %[tn1], 4(%[src_ptr]) \n\t"
+ "balign %[tp2], %[tp1], 3 \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "mthi $0, $ac3 \n\t"
+ "preceu.ph.qbr %[n1], %[tp2] \n\t"
+ "preceu.ph.qbl %[n2], %[tp2] \n\t"
+ "preceu.ph.qbr %[n3], %[tn1] \n\t"
+ "extr.w %[Temp3], $ac2, 7 \n\t"
+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t"
+
+ /* odd 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "mthi $0, $ac2 \n\t"
+ "extr.w %[Temp2], $ac3, 7 \n\t"
+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t"
+ "extr.w %[Temp4], $ac2, 7 \n\t"
+
+ /* clamp and store results */
+ "lbux %[tp1], %[Temp1](%[cm]) \n\t"
+ "lbux %[tn1], %[Temp2](%[cm]) \n\t"
+ "lbux %[tp2], %[Temp3](%[cm]) \n\t"
+ "sb %[tp1], 0(%[dst_ptr]) \n\t"
+ "sb %[tn1], 1(%[dst_ptr]) \n\t"
+ "lbux %[n2], %[Temp4](%[cm]) \n\t"
+ "sb %[tp2], 2(%[dst_ptr]) \n\t"
+ "sb %[n2], 3(%[dst_ptr]) \n\t"
+
+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tp3] "=&r" (tp3),
+ [tn1] "=&r" (tn1), [p1] "=&r" (p1), [p2] "=&r" (p2),
+ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [p3] "=&r" (p3),
+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr),
+ [src_ptr] "r" (src_ptr)
+ );
+
+ src_ptr += 4;
+ }
+
+ /* Next row... */
+ src_ptr += src_pixels_per_line - 16;
+ dst_ptr += pitch;
+ }
+ }
+ else
+ {
+ for (i = output_height; i--;)
+ {
+ /* processing 4 adjacent pixels */
+ for (j = 0; j < 16; j += 4)
+ {
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "ulw %[tp1], -1(%[src_ptr]) \n\t"
+ "ulw %[tp2], 3(%[src_ptr]) \n\t"
+
+ /* even 1. pixel */
+ "mtlo %[vector4a], $ac3 \n\t"
+ "mthi $0, $ac3 \n\t"
+ "preceu.ph.qbr %[p1], %[tp1] \n\t"
+ "preceu.ph.qbl %[p2], %[tp1] \n\t"
+ "preceu.ph.qbr %[p3], %[tp2] \n\t"
+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t"
+
+ /* even 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "mthi $0, $ac2 \n\t"
+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t"
+ "extr.w %[Temp1], $ac3, 7 \n\t"
+
+ /* odd 1. pixel */
+ "ulw %[tn1], 4(%[src_ptr]) \n\t"
+ "balign %[tp2], %[tp1], 3 \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "mthi $0, $ac3 \n\t"
+ "preceu.ph.qbr %[n1], %[tp2] \n\t"
+ "preceu.ph.qbl %[n2], %[tp2] \n\t"
+ "preceu.ph.qbr %[n3], %[tn1] \n\t"
+ "extr.w %[Temp3], $ac2, 7 \n\t"
+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t"
+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t"
+
+ /* odd 2. pixel */
+ "mtlo %[vector4a], $ac2 \n\t"
+ "mthi $0, $ac2 \n\t"
+ "extr.w %[Temp2], $ac3, 7 \n\t"
+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t"
+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t"
+ "extr.w %[Temp4], $ac2, 7 \n\t"
+
+ /* clamp and store results */
+ "lbux %[tp1], %[Temp1](%[cm]) \n\t"
+ "lbux %[tn1], %[Temp2](%[cm]) \n\t"
+ "lbux %[tp2], %[Temp3](%[cm]) \n\t"
+ "sb %[tp1], 0(%[output_ptr]) \n\t"
+ "sb %[tn1], 1(%[output_ptr]) \n\t"
+ "lbux %[n2], %[Temp4](%[cm]) \n\t"
+ "sb %[tp2], 2(%[output_ptr]) \n\t"
+ "sb %[n2], 3(%[output_ptr]) \n\t"
+
+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1),
+ [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3),
+ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector4a] "r" (vector4a), [cm] "r" (cm),
+ [output_ptr] "r" (output_ptr), [src_ptr] "r" (src_ptr)
+ );
+
+ src_ptr += 4;
+ }
+
+ /* next row... */
+ src_ptr += src_pixels_per_line;
+ output_ptr += output_width;
+ }
+ }
+}
+
+
+void vp8_filter_block2d_second_pass4
+(
+ unsigned char *RESTRICT src_ptr,
+ unsigned char *RESTRICT output_ptr,
+ int output_pitch,
+ int yoffset
+)
+{
+ unsigned int i;
+
+ int Temp1, Temp2, Temp3, Temp4;
+ unsigned int vector1b, vector2b, vector3b, vector4a;
+
+ unsigned char src_ptr_l2;
+ unsigned char src_ptr_l1;
+ unsigned char src_ptr_0;
+ unsigned char src_ptr_r1;
+ unsigned char src_ptr_r2;
+ unsigned char src_ptr_r3;
+
+ unsigned char *cm = ff_cropTbl + CROP_WIDTH;
+
+ vector4a = 64;
+
+ /* load filter coefficients */
+ vector1b = sub_pel_filterss[yoffset][0];
+ vector2b = sub_pel_filterss[yoffset][2];
+ vector3b = sub_pel_filterss[yoffset][1];
+
+ if (vector1b)
+ {
+ /* 6 tap filter */
+
+ for (i = 2; i--;)
+ {
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr);
+
+ /* do not allow compiler to reorder instructions */
+ __asm__ __volatile__ (
+ ".set noreorder \n\t"
+ :
+ :
+ );
+
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "lbu %[src_ptr_l2], -8(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 4(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 8(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 12(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac2 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -7(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 5(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 9(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 13(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp1], $ac2, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -6(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 6(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 10(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 14(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac0 \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -5(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 7(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 11(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 15(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac1 \n\t"
+ "extp %[Temp3], $ac0, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp4], $ac1, 9 \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4),
+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
+ [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
+ [src_ptr] "r" (src_ptr)
+ );
+
+ /* clamp and store results */
+ output_ptr[0] = cm[Temp1];
+ output_ptr[1] = cm[Temp2];
+ output_ptr[2] = cm[Temp3];
+ output_ptr[3] = cm[Temp4];
+
+ output_ptr += output_pitch;
+
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "lbu %[src_ptr_l2], -4(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], 0(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 12(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 16(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac2 \n\t"
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -3(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], 1(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 13(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 17(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp1], $ac2, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -2(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], 2(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 14(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 18(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac0 \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -1(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], 3(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 15(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 19(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac1 \n\t"
+ "extp %[Temp3], $ac0, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp4], $ac1, 9 \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4),
+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
+ [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
+ [src_ptr] "r" (src_ptr)
+ );
+
+ /* clamp and store results */
+ output_ptr[0] = cm[Temp1];
+ output_ptr[1] = cm[Temp2];
+ output_ptr[2] = cm[Temp3];
+ output_ptr[3] = cm[Temp4];
+
+ src_ptr += 8;
+ output_ptr += output_pitch;
+ }
+ }
+ else
+ {
+ /* 4 tap filter */
+
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr);
+
+ for (i = 2; i--;)
+ {
+ /* do not allow compiler to reorder instructions */
+ __asm__ __volatile__ (
+ ".set noreorder \n\t"
+ :
+ :
+ );
+
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 4(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 8(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac2 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 5(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 9(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp1], $ac2, 9 \n\t"
+
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 6(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 10(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac0 \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 7(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 11(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac1 \n\t"
+ "extp %[Temp3], $ac0, 9 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp4], $ac1, 9 \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4),
+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
+ );
+
+ /* clamp and store results */
+ output_ptr[0] = cm[Temp1];
+ output_ptr[1] = cm[Temp2];
+ output_ptr[2] = cm[Temp3];
+ output_ptr[3] = cm[Temp4];
+
+ output_ptr += output_pitch;
+
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "lbu %[src_ptr_l1], 0(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 12(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac2 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l1], 1(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 13(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp1], $ac2, 9 \n\t"
+
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l1], 2(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 14(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac0 \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l1], 3(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 15(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac1 \n\t"
+ "extp %[Temp3], $ac0, 9 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp4], $ac1, 9 \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4),
+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
+ );
+
+ /* clamp and store results */
+ output_ptr[0] = cm[Temp1];
+ output_ptr[1] = cm[Temp2];
+ output_ptr[2] = cm[Temp3];
+ output_ptr[3] = cm[Temp4];
+
+ src_ptr += 8;
+ output_ptr += output_pitch;
+ }
+ }
+}
+
+
+void vp8_filter_block2d_second_pass_8
+(
+ unsigned char *RESTRICT src_ptr,
+ unsigned char *RESTRICT output_ptr,
+ int output_pitch,
+ unsigned int output_height,
+ unsigned int output_width,
+ unsigned int yoffset
+)
+{
+ unsigned int i;
+
+ int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8;
+ unsigned int vector1b, vector2b, vector3b, vector4a;
+
+ unsigned char src_ptr_l2;
+ unsigned char src_ptr_l1;
+ unsigned char src_ptr_0;
+ unsigned char src_ptr_r1;
+ unsigned char src_ptr_r2;
+ unsigned char src_ptr_r3;
+ unsigned char *cm = ff_cropTbl + CROP_WIDTH;
+
+ vector4a = 64;
+
+ vector1b = sub_pel_filterss[yoffset][0];
+ vector2b = sub_pel_filterss[yoffset][2];
+ vector3b = sub_pel_filterss[yoffset][1];
+
+ if (vector1b)
+ {
+ /* 6 tap filter */
+
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr);
+
+ for (i = output_height; i--;)
+ {
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "lbu %[src_ptr_l2], -16(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 16(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 24(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac2 \n\t"
+
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -15(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 17(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 25(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp1], $ac2, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -14(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -6(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 18(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 26(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac0 \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -13(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -5(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 19(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 27(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac1 \n\t"
+ "extp %[Temp3], $ac0, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3),
+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
+ [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
+ [src_ptr] "r" (src_ptr)
+ );
+
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "lbu %[src_ptr_l2], -12(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 12(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 20(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 28(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac2 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp4], $ac1, 9 \n\t"
+
+ "lbu %[src_ptr_l2], -11(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 13(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 21(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 29(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp5], $ac2, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -10(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 14(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 22(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 30(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac0 \n\t"
+ "extp %[Temp6], $ac3, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -9(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 15(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 23(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 31(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac1 \n\t"
+ "extp %[Temp7], $ac0, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp8], $ac1, 9 \n\t"
+
+ : [Temp4] "=&r" (Temp4), [Temp5] "=&r" (Temp5),
+ [Temp6] "=&r" (Temp6), [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8),
+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
+ [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
+ [src_ptr] "r" (src_ptr)
+ );
+
+ /* clamp and store results */
+ output_ptr[0] = cm[Temp1];
+ output_ptr[1] = cm[Temp2];
+ output_ptr[2] = cm[Temp3];
+ output_ptr[3] = cm[Temp4];
+ output_ptr[4] = cm[Temp5];
+ output_ptr[5] = cm[Temp6];
+ output_ptr[6] = cm[Temp7];
+ output_ptr[7] = cm[Temp8];
+
+ src_ptr += 8;
+ output_ptr += output_pitch;
+ }
+ }
+ else
+ {
+ /* 4 tap filter */
+
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr);
+
+ for (i = output_height; i--;)
+ {
+ __asm__ __volatile__ (
+ "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 16(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac2 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+
+ : [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
+ );
+
+ __asm__ __volatile__ (
+ "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 17(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp1], $ac2, 9 \n\t"
+
+ : [Temp1] "=r" (Temp1),
+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
+ );
+
+ src_ptr_l1 = src_ptr[-6];
+ src_ptr_0 = src_ptr[2];
+ src_ptr_r1 = src_ptr[10];
+ src_ptr_r2 = src_ptr[18];
+
+ __asm__ __volatile__ (
+ "mtlo %[vector4a], $ac0 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+
+ : [Temp2] "=r" (Temp2)
+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
+ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
+ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
+ [vector4a] "r" (vector4a)
+ );
+
+ src_ptr_l1 = src_ptr[-5];
+ src_ptr_0 = src_ptr[3];
+ src_ptr_r1 = src_ptr[11];
+ src_ptr_r2 = src_ptr[19];
+
+ __asm__ __volatile__ (
+ "mtlo %[vector4a], $ac1 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp3], $ac0, 9 \n\t"
+
+ : [Temp3] "=r" (Temp3)
+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
+ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
+ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
+ [vector4a] "r" (vector4a)
+ );
+
+ src_ptr_l1 = src_ptr[-4];
+ src_ptr_0 = src_ptr[4];
+ src_ptr_r1 = src_ptr[12];
+ src_ptr_r2 = src_ptr[20];
+
+ __asm__ __volatile__ (
+ "mtlo %[vector4a], $ac2 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp4], $ac1, 9 \n\t"
+
+ : [Temp4] "=r" (Temp4)
+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
+ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
+ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
+ [vector4a] "r" (vector4a)
+ );
+
+ src_ptr_l1 = src_ptr[-3];
+ src_ptr_0 = src_ptr[5];
+ src_ptr_r1 = src_ptr[13];
+ src_ptr_r2 = src_ptr[21];
+
+ __asm__ __volatile__ (
+ "mtlo %[vector4a], $ac3 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp5], $ac2, 9 \n\t"
+
+ : [Temp5] "=&r" (Temp5)
+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
+ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
+ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
+ [vector4a] "r" (vector4a)
+ );
+
+ src_ptr_l1 = src_ptr[-2];
+ src_ptr_0 = src_ptr[6];
+ src_ptr_r1 = src_ptr[14];
+ src_ptr_r2 = src_ptr[22];
+
+ __asm__ __volatile__ (
+ "mtlo %[vector4a], $ac0 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp6], $ac3, 9 \n\t"
+
+ : [Temp6] "=r" (Temp6)
+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
+ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
+ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
+ [vector4a] "r" (vector4a)
+ );
+
+ src_ptr_l1 = src_ptr[-1];
+ src_ptr_0 = src_ptr[7];
+ src_ptr_r1 = src_ptr[15];
+ src_ptr_r2 = src_ptr[23];
+
+ __asm__ __volatile__ (
+ "mtlo %[vector4a], $ac1 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp7], $ac0, 9 \n\t"
+ "extp %[Temp8], $ac1, 9 \n\t"
+
+ : [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8)
+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
+ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
+ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
+ [vector4a] "r" (vector4a)
+ );
+
+ /* clamp and store results */
+ output_ptr[0] = cm[Temp1];
+ output_ptr[1] = cm[Temp2];
+ output_ptr[2] = cm[Temp3];
+ output_ptr[3] = cm[Temp4];
+ output_ptr[4] = cm[Temp5];
+ output_ptr[5] = cm[Temp6];
+ output_ptr[6] = cm[Temp7];
+ output_ptr[7] = cm[Temp8];
+
+ src_ptr += 8;
+ output_ptr += output_pitch;
+ }
+ }
+}
+
+
+void vp8_filter_block2d_second_pass161
+(
+ unsigned char *RESTRICT src_ptr,
+ unsigned char *RESTRICT output_ptr,
+ int output_pitch,
+ const unsigned short *vp8_filter
+)
+{
+ unsigned int i, j;
+
+ int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8;
+ unsigned int vector4a;
+ unsigned int vector1b, vector2b, vector3b;
+
+ unsigned char src_ptr_l2;
+ unsigned char src_ptr_l1;
+ unsigned char src_ptr_0;
+ unsigned char src_ptr_r1;
+ unsigned char src_ptr_r2;
+ unsigned char src_ptr_r3;
+ unsigned char *cm = ff_cropTbl + CROP_WIDTH;
+
+ vector4a = 64;
+
+ vector1b = vp8_filter[0];
+ vector2b = vp8_filter[2];
+ vector3b = vp8_filter[1];
+
+ if (vector1b == 0)
+ {
+ /* 4 tap filter */
+
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr + 16);
+
+ for (i = 16; i--;)
+ {
+ /* unrolling for loop */
+ for (j = 0; j < 16; j += 8)
+ {
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "lbu %[src_ptr_l1], -16(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 16(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 32(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac2 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l1], -15(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 17(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 33(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp1], $ac2, 9 \n\t"
+
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l1], -14(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 18(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 34(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac1 \n\t"
+ "extp %[Temp2], $ac3, 9 \n\t"
+
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l1], -13(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 19(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 35(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp3], $ac1, 9 \n\t"
+
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l1], -12(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 20(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 36(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac2 \n\t"
+ "extp %[Temp4], $ac3, 9 \n\t"
+
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l1], -11(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 21(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 37(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp5], $ac2, 9 \n\t"
+
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l1], -10(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 22(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 38(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac1 \n\t"
+ "extp %[Temp6], $ac3, 9 \n\t"
+
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l1], -9(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 23(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 39(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp7], $ac1, 9 \n\t"
+
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp8], $ac3, 9 \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4),
+ [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6),
+ [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8),
+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
+ );
+
+ /* clamp and store results */
+ output_ptr[j] = cm[Temp1];
+ output_ptr[j + 1] = cm[Temp2];
+ output_ptr[j + 2] = cm[Temp3];
+ output_ptr[j + 3] = cm[Temp4];
+ output_ptr[j + 4] = cm[Temp5];
+ output_ptr[j + 5] = cm[Temp6];
+ output_ptr[j + 6] = cm[Temp7];
+ output_ptr[j + 7] = cm[Temp8];
+
+ src_ptr += 8;
+ }
+
+ output_ptr += output_pitch;
+ }
+ }
+ else
+ {
+ /* 4 tap filter */
+
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr + 16);
+
+ /* unroll for loop */
+ for (i = 16; i--;)
+ {
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "lbu %[src_ptr_l2], -32(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -16(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 16(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 32(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 48(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac2 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -31(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -15(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 17(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 33(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 49(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac0 \n\t"
+ "extp %[Temp1], $ac2, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -30(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -14(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 18(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 34(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 50(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac1 \n\t"
+ "extp %[Temp2], $ac0, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -29(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -13(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 19(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 35(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 51(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp3], $ac1, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -28(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -12(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 20(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 36(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 52(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac2 \n\t"
+ "extp %[Temp4], $ac3, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -27(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -11(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 21(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 37(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 53(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac0 \n\t"
+ "extp %[Temp5], $ac2, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -26(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -10(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 22(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 38(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 54(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac1 \n\t"
+ "extp %[Temp6], $ac0, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -25(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -9(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 23(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 39(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 55(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp7], $ac1, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp8], $ac3, 9 \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4),
+ [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6),
+ [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8),
+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
+ [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
+ [src_ptr] "r" (src_ptr)
+ );
+
+ /* clamp and store results */
+ output_ptr[0] = cm[Temp1];
+ output_ptr[1] = cm[Temp2];
+ output_ptr[2] = cm[Temp3];
+ output_ptr[3] = cm[Temp4];
+ output_ptr[4] = cm[Temp5];
+ output_ptr[5] = cm[Temp6];
+ output_ptr[6] = cm[Temp7];
+ output_ptr[7] = cm[Temp8];
+
+ /* apply filter with vectors pairs */
+ __asm__ __volatile__ (
+ "lbu %[src_ptr_l2], -24(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 8(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 24(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 40(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 56(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac2 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -23(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 9(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 25(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 41(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 57(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac0 \n\t"
+ "extp %[Temp1], $ac2, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -22(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -6(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 10(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 26(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 42(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 58(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac1 \n\t"
+ "extp %[Temp2], $ac0, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -21(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -5(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 11(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 27(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 43(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 59(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp3], $ac1, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -20(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 12(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 28(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 44(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 60(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac2 \n\t"
+ "extp %[Temp4], $ac3, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -19(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 13(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 29(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 45(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 61(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac0 \n\t"
+ "extp %[Temp5], $ac2, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -18(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 14(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 30(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 46(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 62(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac1 \n\t"
+ "extp %[Temp6], $ac0, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t"
+
+ "lbu %[src_ptr_l2], -17(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_0], 15(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r1], 31(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r2], 47(%[src_ptr]) \n\t"
+ "lbu %[src_ptr_r3], 63(%[src_ptr]) \n\t"
+ "mtlo %[vector4a], $ac3 \n\t"
+ "extp %[Temp7], $ac1, 9 \n\t"
+
+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t"
+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t"
+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t"
+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t"
+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t"
+ "extp %[Temp8], $ac3, 9 \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4),
+ [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6),
+ [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8),
+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
+ [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3)
+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
+ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
+ [src_ptr] "r" (src_ptr)
+ );
+
+ src_ptr += 16;
+ output_ptr[8] = cm[Temp1];
+ output_ptr[9] = cm[Temp2];
+ output_ptr[10] = cm[Temp3];
+ output_ptr[11] = cm[Temp4];
+ output_ptr[12] = cm[Temp5];
+ output_ptr[13] = cm[Temp6];
+ output_ptr[14] = cm[Temp7];
+ output_ptr[15] = cm[Temp8];
+
+ output_ptr += output_pitch;
+ }
+ }
+}
+
+
+void vp8_sixtap_predict4x4_dspr2
+(
+ unsigned char *RESTRICT src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *RESTRICT dst_ptr,
+ int dst_pitch
+)
+{
+ unsigned char FData[9 * 4]; /* Temp data bufffer used in filtering */
+ unsigned int pos = 16;
+
+ /* bit positon for extract from acc */
+ __asm__ __volatile__ (
+ "wrdsp %[pos], 1 \n\t"
+ :
+ : [pos] "r" (pos)
+ );
+
+ if (yoffset)
+ {
+ /* First filter 1-D horizontally... */
+ vp8_filter_block2d_first_pass_4(src_ptr - (2 * src_pixels_per_line), FData,
+ src_pixels_per_line, 9, xoffset, 4);
+ /* then filter verticaly... */
+ vp8_filter_block2d_second_pass4(FData + 8, dst_ptr, dst_pitch, yoffset);
+ }
+ else
+ /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */
+ vp8_filter_block2d_first_pass_4(src_ptr, dst_ptr, src_pixels_per_line,
+ 4, xoffset, dst_pitch);
+}
+
+
+void vp8_sixtap_predict8x8_dspr2
+(
+ unsigned char *RESTRICT src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *RESTRICT dst_ptr,
+ int dst_pitch
+)
+{
+
+ unsigned char FData[13 * 8]; /* Temp data bufffer used in filtering */
+ unsigned int pos, Temp1, Temp2;
+
+ pos = 16;
+
+ /* bit positon for extract from acc */
+ __asm__ __volatile__ (
+ "wrdsp %[pos], 1 \n\t"
+ :
+ : [pos] "r" (pos)
+ );
+
+ if (yoffset)
+ {
+
+ src_ptr = src_ptr - (2 * src_pixels_per_line);
+
+ if (xoffset)
+ /* filter 1-D horizontally... */
+ vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line,
+ 13, xoffset, 8);
+
+ else
+ {
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr + 2 * src_pixels_per_line);
+
+ __asm__ __volatile__ (
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 0(%[FData]) \n\t"
+ "sw %[Temp2], 4(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 8(%[FData]) \n\t"
+ "sw %[Temp2], 12(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 16(%[FData]) \n\t"
+ "sw %[Temp2], 20(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 24(%[FData]) \n\t"
+ "sw %[Temp2], 28(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 32(%[FData]) \n\t"
+ "sw %[Temp2], 36(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 40(%[FData]) \n\t"
+ "sw %[Temp2], 44(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 48(%[FData]) \n\t"
+ "sw %[Temp2], 52(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 56(%[FData]) \n\t"
+ "sw %[Temp2], 60(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 64(%[FData]) \n\t"
+ "sw %[Temp2], 68(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 72(%[FData]) \n\t"
+ "sw %[Temp2], 76(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 80(%[FData]) \n\t"
+ "sw %[Temp2], 84(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 88(%[FData]) \n\t"
+ "sw %[Temp2], 92(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 96(%[FData]) \n\t"
+ "sw %[Temp2], 100(%[FData]) \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2)
+ : [FData] "r" (FData), [src_ptr] "r" (src_ptr),
+ [src_pixels_per_line] "r" (src_pixels_per_line)
+ );
+ }
+
+ /* filter verticaly... */
+ vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 8, 8, yoffset);
+ }
+
+ /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */
+ else
+ {
+ if (xoffset)
+ vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line,
+ 8, xoffset, dst_pitch);
+
+ else
+ {
+ /* copy from src buffer to dst buffer */
+ __asm__ __volatile__ (
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 0(%[dst_ptr]) \n\t"
+ "sw %[Temp2], 4(%[dst_ptr]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 8(%[dst_ptr]) \n\t"
+ "sw %[Temp2], 12(%[dst_ptr]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 16(%[dst_ptr]) \n\t"
+ "sw %[Temp2], 20(%[dst_ptr]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 24(%[dst_ptr]) \n\t"
+ "sw %[Temp2], 28(%[dst_ptr]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 32(%[dst_ptr]) \n\t"
+ "sw %[Temp2], 36(%[dst_ptr]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 40(%[dst_ptr]) \n\t"
+ "sw %[Temp2], 44(%[dst_ptr]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 48(%[dst_ptr]) \n\t"
+ "sw %[Temp2], 52(%[dst_ptr]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 56(%[dst_ptr]) \n\t"
+ "sw %[Temp2], 60(%[dst_ptr]) \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2)
+ : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr),
+ [src_pixels_per_line] "r" (src_pixels_per_line)
+ );
+ }
+ }
+}
+
+
+void vp8_sixtap_predict8x4_dspr2
+(
+ unsigned char *RESTRICT src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *RESTRICT dst_ptr,
+ int dst_pitch
+)
+{
+ unsigned char FData[9 * 8]; /* Temp data bufffer used in filtering */
+ unsigned int pos, Temp1, Temp2;
+
+ pos = 16;
+
+ /* bit positon for extract from acc */
+ __asm__ __volatile__ (
+ "wrdsp %[pos], 1 \n\t"
+ :
+ : [pos] "r" (pos)
+ );
+
+ if (yoffset)
+ {
+
+ src_ptr = src_ptr - (2 * src_pixels_per_line);
+
+ if (xoffset)
+ /* filter 1-D horizontally... */
+ vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line,
+ 9, xoffset, 8);
+
+ else
+ {
+ /* prefetch src_ptr data to cache memory */
+ prefetch_load(src_ptr + 2 * src_pixels_per_line);
+
+ __asm__ __volatile__ (
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 0(%[FData]) \n\t"
+ "sw %[Temp2], 4(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 8(%[FData]) \n\t"
+ "sw %[Temp2], 12(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 16(%[FData]) \n\t"
+ "sw %[Temp2], 20(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 24(%[FData]) \n\t"
+ "sw %[Temp2], 28(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 32(%[FData]) \n\t"
+ "sw %[Temp2], 36(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 40(%[FData]) \n\t"
+ "sw %[Temp2], 44(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 48(%[FData]) \n\t"
+ "sw %[Temp2], 52(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 56(%[FData]) \n\t"
+ "sw %[Temp2], 60(%[FData]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 64(%[FData]) \n\t"
+ "sw %[Temp2], 68(%[FData]) \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2)
+ : [FData] "r" (FData), [src_ptr] "r" (src_ptr),
+ [src_pixels_per_line] "r" (src_pixels_per_line)
+ );
+ }
+
+ /* filter verticaly... */
+ vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 4, 8, yoffset);
+ }
+
+ /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */
+ else
+ {
+ if (xoffset)
+ vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line,
+ 4, xoffset, dst_pitch);
+
+ else
+ {
+ /* copy from src buffer to dst buffer */
+ __asm__ __volatile__ (
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 0(%[dst_ptr]) \n\t"
+ "sw %[Temp2], 4(%[dst_ptr]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 8(%[dst_ptr]) \n\t"
+ "sw %[Temp2], 12(%[dst_ptr]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 16(%[dst_ptr]) \n\t"
+ "sw %[Temp2], 20(%[dst_ptr]) \n\t"
+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t"
+
+ "ulw %[Temp1], 0(%[src_ptr]) \n\t"
+ "ulw %[Temp2], 4(%[src_ptr]) \n\t"
+ "sw %[Temp1], 24(%[dst_ptr]) \n\t"
+ "sw %[Temp2], 28(%[dst_ptr]) \n\t"
+
+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2)
+ : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr),
+ [src_pixels_per_line] "r" (src_pixels_per_line)
+ );
+ }
+ }
+}
+
+
+void vp8_sixtap_predict16x16_dspr2
+(
+ unsigned char *RESTRICT src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *RESTRICT dst_ptr,
+ int dst_pitch
+)
+{
+ const unsigned short *VFilter;
+ unsigned char FData[21 * 16]; /* Temp data bufffer used in filtering */
+ unsigned int pos;
+
+ VFilter = sub_pel_filterss[yoffset];
+
+ pos = 16;
+
+ /* bit positon for extract from acc */
+ __asm__ __volatile__ (
+ "wrdsp %[pos], 1 \n\t"
+ :
+ : [pos] "r" (pos)
+ );
+
+ if (yoffset)
+ {
+
+ src_ptr = src_ptr - (2 * src_pixels_per_line);
+
+ switch (xoffset)
+ {
+ /* filter 1-D horizontally... */
+ case 2:
+ case 4:
+ case 6:
+ /* 6 tap filter */
+ vp8_filter_block2d_first_pass16_6tap(src_ptr, FData, src_pixels_per_line,
+ 21, xoffset, 16);
+ break;
+
+ case 0:
+ /* only copy buffer */
+ vp8_filter_block2d_first_pass16_0(src_ptr, FData, src_pixels_per_line);
+ break;
+
+ case 1:
+ case 3:
+ case 5:
+ case 7:
+ /* 4 tap filter */
+ vp8_filter_block2d_first_pass16_4tap(src_ptr, FData, src_pixels_per_line, 16,
+ 21, xoffset, yoffset, dst_ptr, dst_pitch);
+ break;
+ }
+
+ /* filter verticaly... */
+ vp8_filter_block2d_second_pass161(FData + 32, dst_ptr, dst_pitch, VFilter);
+ }
+ else
+ {
+ /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */
+ switch (xoffset)
+ {
+ case 2:
+ case 4:
+ case 6:
+ /* 6 tap filter */
+ vp8_filter_block2d_first_pass16_6tap(src_ptr, dst_ptr, src_pixels_per_line,
+ 16, xoffset, dst_pitch);
+ break;
+
+ case 1:
+ case 3:
+ case 5:
+ case 7:
+ /* 4 tap filter */
+ vp8_filter_block2d_first_pass16_4tap(src_ptr, dst_ptr, src_pixels_per_line, 16,
+ 21, xoffset, yoffset, dst_ptr, dst_pitch);
+ break;
+ }
+ }
+}
+
+#endif
diff --git a/vp8/common/mips/dspr2/idct_blk_dspr2.c b/vp8/common/mips/dspr2/idct_blk_dspr2.c
new file mode 100644
index 0000000..1e0ebd1
--- /dev/null
+++ b/vp8/common/mips/dspr2/idct_blk_dspr2.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_config.h"
+#include "vpx_rtcd.h"
+
+#if HAVE_DSPR2
+
+void vp8_dequant_idct_add_y_block_dspr2
+(short *q, short *dq,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i, j;
+
+ for (i = 0; i < 4; i++)
+ {
+ for (j = 0; j < 4; j++)
+ {
+ if (*eobs++ > 1)
+ vp8_dequant_idct_add_dspr2(q, dq, dst, stride);
+ else
+ {
+ vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dst, stride, dst, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ dst += 4;
+ }
+
+ dst += 4 * stride - 16;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_dspr2
+(short *q, short *dq,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ int i, j;
+
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < 2; j++)
+ {
+ if (*eobs++ > 1)
+ vp8_dequant_idct_add_dspr2(q, dq, dstu, stride);
+ else
+ {
+ vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstu, stride, dstu, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ dstu += 4;
+ }
+
+ dstu += 4 * stride - 8;
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < 2; j++)
+ {
+ if (*eobs++ > 1)
+ vp8_dequant_idct_add_dspr2(q, dq, dstv, stride);
+ else
+ {
+ vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstv, stride, dstv, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ dstv += 4;
+ }
+
+ dstv += 4 * stride - 8;
+ }
+}
+
+#endif
+
diff --git a/vp8/common/mips/dspr2/idctllm_dspr2.c b/vp8/common/mips/dspr2/idctllm_dspr2.c
new file mode 100644
index 0000000..25b7936
--- /dev/null
+++ b/vp8/common/mips/dspr2/idctllm_dspr2.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_rtcd.h"
+
+#if HAVE_DSPR2
+#define CROP_WIDTH 256
+
+/******************************************************************************
+ * Notes:
+ *
+ * This implementation makes use of 16 bit fixed point version of two multiply
+ * constants:
+ * 1. sqrt(2) * cos (pi/8)
+ * 2. sqrt(2) * sin (pi/8)
+ * Since the first constant is bigger than 1, to maintain the same 16 bit
+ * fixed point precision as the second one, we use a trick of
+ * x * a = x + x*(a-1)
+ * so
+ * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
+ ****************************************************************************/
+extern unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH];
+static const int cospi8sqrt2minus1 = 20091;
+static const int sinpi8sqrt2 = 35468;
+
+inline void prefetch_load_short(short *src)
+{
+ __asm__ __volatile__ (
+ "pref 0, 0(%[src]) \n\t"
+ :
+ : [src] "r" (src)
+ );
+}
+
+void vp8_short_idct4x4llm_dspr2(short *input, unsigned char *pred_ptr,
+ int pred_stride, unsigned char *dst_ptr,
+ int dst_stride)
+{
+ int r, c;
+ int a1, b1, c1, d1;
+ short output[16];
+ short *ip = input;
+ short *op = output;
+ int temp1, temp2;
+ int shortpitch = 4;
+
+ int c2, d2;
+ int temp3, temp4;
+ unsigned char *cm = ff_cropTbl + CROP_WIDTH;
+
+ /* prepare data for load */
+ prefetch_load_short(ip + 8);
+
+ /* first loop is unrolled */
+ a1 = ip[0] + ip[8];
+ b1 = ip[0] - ip[8];
+
+ temp1 = (ip[4] * sinpi8sqrt2) >> 16;
+ temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
+ c1 = temp1 - temp2;
+
+ temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
+ temp2 = (ip[12] * sinpi8sqrt2) >> 16;
+ d1 = temp1 + temp2;
+
+ temp3 = (ip[5] * sinpi8sqrt2) >> 16;
+ temp4 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
+ c2 = temp3 - temp4;
+
+ temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
+ temp4 = (ip[13] * sinpi8sqrt2) >> 16;
+ d2 = temp3 + temp4;
+
+ op[0] = a1 + d1;
+ op[12] = a1 - d1;
+ op[4] = b1 + c1;
+ op[8] = b1 - c1;
+
+ a1 = ip[1] + ip[9];
+ b1 = ip[1] - ip[9];
+
+ op[1] = a1 + d2;
+ op[13] = a1 - d2;
+ op[5] = b1 + c2;
+ op[9] = b1 - c2;
+
+ a1 = ip[2] + ip[10];
+ b1 = ip[2] - ip[10];
+
+ temp1 = (ip[6] * sinpi8sqrt2) >> 16;
+ temp2 = ip[14] + ((ip[14] * cospi8sqrt2minus1) >> 16);
+ c1 = temp1 - temp2;
+
+ temp1 = ip[6] + ((ip[6] * cospi8sqrt2minus1) >> 16);
+ temp2 = (ip[14] * sinpi8sqrt2) >> 16;
+ d1 = temp1 + temp2;
+
+ temp3 = (ip[7] * sinpi8sqrt2) >> 16;
+ temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
+ c2 = temp3 - temp4;
+
+ temp3 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
+ temp4 = (ip[15] * sinpi8sqrt2) >> 16;
+ d2 = temp3 + temp4;
+
+ op[2] = a1 + d1;
+ op[14] = a1 - d1;
+ op[6] = b1 + c1;
+ op[10] = b1 - c1;
+
+ a1 = ip[3] + ip[11];
+ b1 = ip[3] - ip[11];
+
+ op[3] = a1 + d2;
+ op[15] = a1 - d2;
+ op[7] = b1 + c2;
+ op[11] = b1 - c2;
+
+ ip = output;
+
+ /* prepare data for load */
+ prefetch_load_short(ip + shortpitch);
+
+ /* second loop is unrolled */
+ a1 = ip[0] + ip[2];
+ b1 = ip[0] - ip[2];
+
+ temp1 = (ip[1] * sinpi8sqrt2) >> 16;
+ temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
+ c1 = temp1 - temp2;
+
+ temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
+ temp2 = (ip[3] * sinpi8sqrt2) >> 16;
+ d1 = temp1 + temp2;
+
+ temp3 = (ip[5] * sinpi8sqrt2) >> 16;
+ temp4 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
+ c2 = temp3 - temp4;
+
+ temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
+ temp4 = (ip[7] * sinpi8sqrt2) >> 16;
+ d2 = temp3 + temp4;
+
+ op[0] = (a1 + d1 + 4) >> 3;
+ op[3] = (a1 - d1 + 4) >> 3;
+ op[1] = (b1 + c1 + 4) >> 3;
+ op[2] = (b1 - c1 + 4) >> 3;
+
+ a1 = ip[4] + ip[6];
+ b1 = ip[4] - ip[6];
+
+ op[4] = (a1 + d2 + 4) >> 3;
+ op[7] = (a1 - d2 + 4) >> 3;
+ op[5] = (b1 + c2 + 4) >> 3;
+ op[6] = (b1 - c2 + 4) >> 3;
+
+ a1 = ip[8] + ip[10];
+ b1 = ip[8] - ip[10];
+
+ temp1 = (ip[9] * sinpi8sqrt2) >> 16;
+ temp2 = ip[11] + ((ip[11] * cospi8sqrt2minus1) >> 16);
+ c1 = temp1 - temp2;
+
+ temp1 = ip[9] + ((ip[9] * cospi8sqrt2minus1) >> 16);
+ temp2 = (ip[11] * sinpi8sqrt2) >> 16;
+ d1 = temp1 + temp2;
+
+ temp3 = (ip[13] * sinpi8sqrt2) >> 16;
+ temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
+ c2 = temp3 - temp4;
+
+ temp3 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
+ temp4 = (ip[15] * sinpi8sqrt2) >> 16;
+ d2 = temp3 + temp4;
+
+ op[8] = (a1 + d1 + 4) >> 3;
+ op[11] = (a1 - d1 + 4) >> 3;
+ op[9] = (b1 + c1 + 4) >> 3;
+ op[10] = (b1 - c1 + 4) >> 3;
+
+ a1 = ip[12] + ip[14];
+ b1 = ip[12] - ip[14];
+
+ op[12] = (a1 + d2 + 4) >> 3;
+ op[15] = (a1 - d2 + 4) >> 3;
+ op[13] = (b1 + c2 + 4) >> 3;
+ op[14] = (b1 - c2 + 4) >> 3;
+
+ ip = output;
+
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ short a = ip[c] + pred_ptr[c] ;
+ dst_ptr[c] = cm[a] ;
+ }
+
+ ip += 4;
+ dst_ptr += dst_stride;
+ pred_ptr += pred_stride;
+ }
+}
+
+void vp8_dc_only_idct_add_dspr2(short input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride)
+{
+ int a1;
+ int i, absa1;
+ int t2, vector_a1, vector_a;
+
+ /* a1 = ((input_dc + 4) >> 3); */
+ __asm__ __volatile__ (
+ "addi %[a1], %[input_dc], 4 \n\t"
+ "sra %[a1], %[a1], 3 \n\t"
+ : [a1] "=r" (a1)
+ : [input_dc] "r" (input_dc)
+ );
+
+ if (a1 < 0)
+ {
+ /* use quad-byte
+ * input and output memory are four byte aligned
+ */
+ __asm__ __volatile__ (
+ "abs %[absa1], %[a1] \n\t"
+ "replv.qb %[vector_a1], %[absa1] \n\t"
+ : [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1)
+ : [a1] "r" (a1)
+ );
+
+ /* use (a1 - predptr[c]) instead a1 + predptr[c] */
+ for (i = 4; i--;)
+ {
+ __asm__ __volatile__ (
+ "lw %[t2], 0(%[pred_ptr]) \n\t"
+ "add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
+ "subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t"
+ "sw %[vector_a], 0(%[dst_ptr]) \n\t"
+ "add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
+ : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
+ [dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
+ : [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
+ );
+ }
+ }
+ else
+ {
+ /* use quad-byte
+ * input and output memory are four byte aligned
+ */
+ __asm__ __volatile__ (
+ "replv.qb %[vector_a1], %[a1] \n\t"
+ : [vector_a1] "=r" (vector_a1)
+ : [a1] "r" (a1)
+ );
+
+ for (i = 4; i--;)
+ {
+ __asm__ __volatile__ (
+ "lw %[t2], 0(%[pred_ptr]) \n\t"
+ "add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
+ "addu_s.qb %[vector_a], %[vector_a1], %[t2] \n\t"
+ "sw %[vector_a], 0(%[dst_ptr]) \n\t"
+ "add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
+ : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
+ [dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
+ : [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
+ );
+ }
+ }
+
+}
+
+void vp8_short_inv_walsh4x4_dspr2(short *input, short *mb_dqcoeff)
+{
+ short output[16];
+ int i;
+ int a1, b1, c1, d1;
+ int a2, b2, c2, d2;
+ short *ip = input;
+ short *op = output;
+
+ prefetch_load_short(ip);
+
+ for (i = 4; i--;)
+ {
+ a1 = ip[0] + ip[12];
+ b1 = ip[4] + ip[8];
+ c1 = ip[4] - ip[8];
+ d1 = ip[0] - ip[12];
+
+ op[0] = a1 + b1;
+ op[4] = c1 + d1;
+ op[8] = a1 - b1;
+ op[12] = d1 - c1;
+
+ ip++;
+ op++;
+ }
+
+ ip = output;
+ op = output;
+
+ prefetch_load_short(ip);
+
+ for (i = 4; i--;)
+ {
+ a1 = ip[0] + ip[3] + 3;
+ b1 = ip[1] + ip[2];
+ c1 = ip[1] - ip[2];
+ d1 = ip[0] - ip[3] + 3;
+
+ a2 = a1 + b1;
+ b2 = d1 + c1;
+ c2 = a1 - b1;
+ d2 = d1 - c1;
+
+ op[0] = a2 >> 3;
+ op[1] = b2 >> 3;
+ op[2] = c2 >> 3;
+ op[3] = d2 >> 3;
+
+ ip += 4;
+ op += 4;
+ }
+
+ for (i = 0; i < 16; i++)
+ {
+ mb_dqcoeff[i * 16] = output[i];
+ }
+}
+
+void vp8_short_inv_walsh4x4_1_dspr2(short *input, short *mb_dqcoeff)
+{
+ int a1;
+
+ a1 = ((input[0] + 3) >> 3);
+
+ __asm__ __volatile__ (
+ "sh %[a1], 0(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 32(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 64(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 96(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 128(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 160(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 192(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 224(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 256(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 288(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 320(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 352(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 384(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 416(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 448(%[mb_dqcoeff]) \n\t"
+ "sh %[a1], 480(%[mb_dqcoeff]) \n\t"
+
+ :
+ : [a1] "r" (a1), [mb_dqcoeff] "r" (mb_dqcoeff)
+ );
+}
+
+#endif
diff --git a/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c
new file mode 100644
index 0000000..b8e5e4d
--- /dev/null
+++ b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c
@@ -0,0 +1,2622 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdlib.h>
+#include "vpx_rtcd.h"
+#include "vp8/common/onyxc_int.h"
+
+#if HAVE_DSPR2
+typedef unsigned char uc;
+
+/* prefetch data for load */
+inline void prefetch_load_lf(unsigned char *src)
+{
+ __asm__ __volatile__ (
+ "pref 0, 0(%[src]) \n\t"
+ :
+ : [src] "r" (src)
+ );
+}
+
+
+/* prefetch data for store */
+inline void prefetch_store_lf(unsigned char *dst)
+{
+ __asm__ __volatile__ (
+ "pref 1, 0(%[dst]) \n\t"
+ :
+ : [dst] "r" (dst)
+ );
+}
+
+/* processing 4 pixels at the same time
+ * compute hev and mask in the same function
+ */
+static __inline void vp8_filter_mask_vec_mips
+(
+ uint32_t limit,
+ uint32_t flimit,
+ uint32_t p1,
+ uint32_t p0,
+ uint32_t p3,
+ uint32_t p2,
+ uint32_t q0,
+ uint32_t q1,
+ uint32_t q2,
+ uint32_t q3,
+ uint32_t thresh,
+ uint32_t *hev,
+ uint32_t *mask
+)
+{
+ uint32_t c, r, r3, r_k;
+ uint32_t s1, s2, s3;
+ uint32_t ones = 0xFFFFFFFF;
+ uint32_t hev1;
+
+ __asm__ __volatile__ (
+ /* mask |= (abs(p3 - p2) > limit) */
+ "subu_s.qb %[c], %[p3], %[p2] \n\t"
+ "subu_s.qb %[r_k], %[p2], %[p3] \n\t"
+ "or %[r_k], %[r_k], %[c] \n\t"
+ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
+ "or %[r], $0, %[c] \n\t"
+
+ /* mask |= (abs(p2 - p1) > limit) */
+ "subu_s.qb %[c], %[p2], %[p1] \n\t"
+ "subu_s.qb %[r_k], %[p1], %[p2] \n\t"
+ "or %[r_k], %[r_k], %[c] \n\t"
+ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
+ "or %[r], %[r], %[c] \n\t"
+
+ /* mask |= (abs(p1 - p0) > limit)
+ * hev |= (abs(p1 - p0) > thresh)
+ */
+ "subu_s.qb %[c], %[p1], %[p0] \n\t"
+ "subu_s.qb %[r_k], %[p0], %[p1] \n\t"
+ "or %[r_k], %[r_k], %[c] \n\t"
+ "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t"
+ "or %[r3], $0, %[c] \n\t"
+ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
+ "or %[r], %[r], %[c] \n\t"
+
+ /* mask |= (abs(q1 - q0) > limit)
+ * hev |= (abs(q1 - q0) > thresh)
+ */
+ "subu_s.qb %[c], %[q1], %[q0] \n\t"
+ "subu_s.qb %[r_k], %[q0], %[q1] \n\t"
+ "or %[r_k], %[r_k], %[c] \n\t"
+ "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t"
+ "or %[r3], %[r3], %[c] \n\t"
+ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
+ "or %[r], %[r], %[c] \n\t"
+
+ /* mask |= (abs(q2 - q1) > limit) */
+ "subu_s.qb %[c], %[q2], %[q1] \n\t"
+ "subu_s.qb %[r_k], %[q1], %[q2] \n\t"
+ "or %[r_k], %[r_k], %[c] \n\t"
+ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
+ "or %[r], %[r], %[c] \n\t"
+ "sll %[r3], %[r3], 24 \n\t"
+
+ /* mask |= (abs(q3 - q2) > limit) */
+ "subu_s.qb %[c], %[q3], %[q2] \n\t"
+ "subu_s.qb %[r_k], %[q2], %[q3] \n\t"
+ "or %[r_k], %[r_k], %[c] \n\t"
+ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
+ "or %[r], %[r], %[c] \n\t"
+
+ : [c] "=&r" (c), [r_k] "=&r" (r_k),
+ [r] "=&r" (r), [r3] "=&r" (r3)
+ : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2),
+ [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0),
+ [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh)
+ );
+
+ __asm__ __volatile__ (
+ /* abs(p0 - q0) */
+ "subu_s.qb %[c], %[p0], %[q0] \n\t"
+ "subu_s.qb %[r_k], %[q0], %[p0] \n\t"
+ "wrdsp %[r3] \n\t"
+ "or %[s1], %[r_k], %[c] \n\t"
+
+ /* abs(p1 - q1) */
+ "subu_s.qb %[c], %[p1], %[q1] \n\t"
+ "addu_s.qb %[s3], %[s1], %[s1] \n\t"
+ "pick.qb %[hev1], %[ones], $0 \n\t"
+ "subu_s.qb %[r_k], %[q1], %[p1] \n\t"
+ "or %[s2], %[r_k], %[c] \n\t"
+
+ /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */
+ "shrl.qb %[s2], %[s2], 1 \n\t"
+ "addu_s.qb %[s1], %[s2], %[s3] \n\t"
+ "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t"
+ "or %[r], %[r], %[c] \n\t"
+ "sll %[r], %[r], 24 \n\t"
+
+ "wrdsp %[r] \n\t"
+ "pick.qb %[s2], $0, %[ones] \n\t"
+
+ : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1),
+ [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3)
+ : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3),
+ [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit)
+ );
+
+ *hev = hev1;
+ *mask = s2;
+}
+
+
+/* inputs & outputs are quad-byte vectors */
+static __inline void vp8_filter_mips
+(
+ uint32_t mask,
+ uint32_t hev,
+ uint32_t *ps1,
+ uint32_t *ps0,
+ uint32_t *qs0,
+ uint32_t *qs1
+)
+{
+ int32_t vp8_filter_l, vp8_filter_r;
+ int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r;
+ int32_t subr_r, subr_l;
+ uint32_t t1, t2, HWM, t3;
+ uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r;
+
+ int32_t vps1, vps0, vqs0, vqs1;
+ int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r;
+ uint32_t N128;
+
+ N128 = 0x80808080;
+ t1 = 0x03000300;
+ t2 = 0x04000400;
+ t3 = 0x01000100;
+ HWM = 0xFF00FF00;
+
+ vps0 = (*ps0) ^ N128;
+ vps1 = (*ps1) ^ N128;
+ vqs0 = (*qs0) ^ N128;
+ vqs1 = (*qs1) ^ N128;
+
+ /* use halfword pairs instead quad-bytes because of accuracy */
+ vps0_l = vps0 & HWM;
+ vps0_r = vps0 << 8;
+ vps0_r = vps0_r & HWM;
+
+ vps1_l = vps1 & HWM;
+ vps1_r = vps1 << 8;
+ vps1_r = vps1_r & HWM;
+
+ vqs0_l = vqs0 & HWM;
+ vqs0_r = vqs0 << 8;
+ vqs0_r = vqs0_r & HWM;
+
+ vqs1_l = vqs1 & HWM;
+ vqs1_r = vqs1 << 8;
+ vqs1_r = vqs1_r & HWM;
+
+ mask_l = mask & HWM;
+ mask_r = mask << 8;
+ mask_r = mask_r & HWM;
+
+ hev_l = hev & HWM;
+ hev_r = hev << 8;
+ hev_r = hev_r & HWM;
+
+ __asm__ __volatile__ (
+ /* vp8_filter = vp8_signed_char_clamp(ps1 - qs1); */
+ "subq_s.ph %[vp8_filter_l], %[vps1_l], %[vqs1_l] \n\t"
+ "subq_s.ph %[vp8_filter_r], %[vps1_r], %[vqs1_r] \n\t"
+
+ /* qs0 - ps0 */
+ "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t"
+ "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t"
+
+ /* vp8_filter &= hev; */
+ "and %[vp8_filter_l], %[vp8_filter_l], %[hev_l] \n\t"
+ "and %[vp8_filter_r], %[vp8_filter_r], %[hev_r] \n\t"
+
+ /* vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); */
+ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t"
+ "xor %[invhev_l], %[hev_l], %[HWM] \n\t"
+ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t"
+ "xor %[invhev_r], %[hev_r], %[HWM] \n\t"
+ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t"
+
+ /* vp8_filter &= mask; */
+ "and %[vp8_filter_l], %[vp8_filter_l], %[mask_l] \n\t"
+ "and %[vp8_filter_r], %[vp8_filter_r], %[mask_r] \n\t"
+
+ : [vp8_filter_l] "=&r" (vp8_filter_l), [vp8_filter_r] "=&r" (vp8_filter_r),
+ [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r),
+ [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r)
+
+ : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l),
+ [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r),
+ [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r),
+ [mask_l] "r" (mask_l), [mask_r] "r" (mask_r),
+ [hev_l] "r" (hev_l), [hev_r] "r" (hev_r),
+ [HWM] "r" (HWM)
+ );
+
+ /* save bottom 3 bits so that we round one side +4 and the other +3 */
+ __asm__ __volatile__ (
+ /* Filter2 = vp8_signed_char_clamp(vp8_filter + 3) >>= 3; */
+ "addq_s.ph %[Filter1_l], %[vp8_filter_l], %[t2] \n\t"
+ "addq_s.ph %[Filter1_r], %[vp8_filter_r], %[t2] \n\t"
+
+ /* Filter1 = vp8_signed_char_clamp(vp8_filter + 4) >>= 3; */
+ "addq_s.ph %[Filter2_l], %[vp8_filter_l], %[t1] \n\t"
+ "addq_s.ph %[Filter2_r], %[vp8_filter_r], %[t1] \n\t"
+ "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t"
+ "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t"
+
+ "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t"
+ "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t"
+
+ "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t"
+ "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t"
+
+ /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */
+ "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t"
+ "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t"
+
+ /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */
+ "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t"
+ "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t"
+
+ : [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r),
+ [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r),
+ [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
+ [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)
+
+ : [t1] "r" (t1), [t2] "r" (t2),
+ [vp8_filter_l] "r" (vp8_filter_l), [vp8_filter_r] "r" (vp8_filter_r),
+ [HWM] "r" (HWM)
+ );
+
+ __asm__ __volatile__ (
+ /* (vp8_filter += 1) >>= 1 */
+ "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t"
+ "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t"
+
+ /* vp8_filter &= ~hev; */
+ "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t"
+ "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t"
+
+ /* vps1 = vp8_signed_char_clamp(ps1 + vp8_filter); */
+ "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t"
+ "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t"
+
+ /* vqs1 = vp8_signed_char_clamp(qs1 - vp8_filter); */
+ "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t"
+ "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t"
+
+ : [Filter1_l] "+r" (Filter1_l), [Filter1_r] "+r" (Filter1_r),
+ [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r),
+ [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r)
+
+ : [t3] "r" (t3), [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r)
+ );
+
+ /* Create quad-bytes from halfword pairs */
+ vqs0_l = vqs0_l & HWM;
+ vqs1_l = vqs1_l & HWM;
+ vps0_l = vps0_l & HWM;
+ vps1_l = vps1_l & HWM;
+
+ __asm__ __volatile__ (
+ "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t"
+ "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t"
+ "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t"
+ "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t"
+
+ : [vps1_r] "+r" (vps1_r), [vqs1_r] "+r" (vqs1_r),
+ [vps0_r] "+r" (vps0_r), [vqs0_r] "+r" (vqs0_r)
+ :
+ );
+
+ vqs0 = vqs0_l | vqs0_r;
+ vqs1 = vqs1_l | vqs1_r;
+ vps0 = vps0_l | vps0_r;
+ vps1 = vps1_l | vps1_r;
+
+ *ps0 = vps0 ^ N128;
+ *ps1 = vps1 ^ N128;
+ *qs0 = vqs0 ^ N128;
+ *qs1 = vqs1 ^ N128;
+}
+
+void vp8_loop_filter_horizontal_edge_mips
+(
+ unsigned char *s,
+ int p,
+ unsigned int flimit,
+ unsigned int limit,
+ unsigned int thresh,
+ int count
+)
+{
+ uint32_t mask;
+ uint32_t hev;
+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
+ unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6;
+
+ mask = 0;
+ hev = 0;
+ p1 = 0;
+ p2 = 0;
+ p3 = 0;
+ p4 = 0;
+
+ /* prefetch data for store */
+ prefetch_store_lf(s);
+
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
+
+ sm1 = s - (p << 2);
+ s0 = s - p - p - p;
+ s1 = s - p - p ;
+ s2 = s - p;
+ s3 = s;
+ s4 = s + p;
+ s5 = s + p + p;
+ s6 = s + p + p + p;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p1 = *((uint32_t *)(s1));
+ p2 = *((uint32_t *)(s2));
+ p3 = *((uint32_t *)(s3));
+ p4 = *((uint32_t *)(s4));
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ pm1 = *((uint32_t *)(sm1));
+ p0 = *((uint32_t *)(s0));
+ p5 = *((uint32_t *)(s5));
+ p6 = *((uint32_t *)(s6));
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
+
+ /* unpack processed 4x4 neighborhood */
+ *((uint32_t *)s1) = p1;
+ *((uint32_t *)s2) = p2;
+ *((uint32_t *)s3) = p3;
+ *((uint32_t *)s4) = p4;
+ }
+ }
+
+ sm1 += 4;
+ s0 += 4;
+ s1 += 4;
+ s2 += 4;
+ s3 += 4;
+ s4 += 4;
+ s5 += 4;
+ s6 += 4;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p1 = *((uint32_t *)(s1));
+ p2 = *((uint32_t *)(s2));
+ p3 = *((uint32_t *)(s3));
+ p4 = *((uint32_t *)(s4));
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ pm1 = *((uint32_t *)(sm1));
+ p0 = *((uint32_t *)(s0));
+ p5 = *((uint32_t *)(s5));
+ p6 = *((uint32_t *)(s6));
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
+
+ /* unpack processed 4x4 neighborhood */
+ *((uint32_t *)s1) = p1;
+ *((uint32_t *)s2) = p2;
+ *((uint32_t *)s3) = p3;
+ *((uint32_t *)s4) = p4;
+ }
+ }
+
+ sm1 += 4;
+ s0 += 4;
+ s1 += 4;
+ s2 += 4;
+ s3 += 4;
+ s4 += 4;
+ s5 += 4;
+ s6 += 4;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p1 = *((uint32_t *)(s1));
+ p2 = *((uint32_t *)(s2));
+ p3 = *((uint32_t *)(s3));
+ p4 = *((uint32_t *)(s4));
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ pm1 = *((uint32_t *)(sm1));
+ p0 = *((uint32_t *)(s0));
+ p5 = *((uint32_t *)(s5));
+ p6 = *((uint32_t *)(s6));
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
+
+ /* unpack processed 4x4 neighborhood */
+ *((uint32_t *)s1) = p1;
+ *((uint32_t *)s2) = p2;
+ *((uint32_t *)s3) = p3;
+ *((uint32_t *)s4) = p4;
+ }
+ }
+
+ sm1 += 4;
+ s0 += 4;
+ s1 += 4;
+ s2 += 4;
+ s3 += 4;
+ s4 += 4;
+ s5 += 4;
+ s6 += 4;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p1 = *((uint32_t *)(s1));
+ p2 = *((uint32_t *)(s2));
+ p3 = *((uint32_t *)(s3));
+ p4 = *((uint32_t *)(s4));
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ pm1 = *((uint32_t *)(sm1));
+ p0 = *((uint32_t *)(s0));
+ p5 = *((uint32_t *)(s5));
+ p6 = *((uint32_t *)(s6));
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
+
+ /* unpack processed 4x4 neighborhood */
+ *((uint32_t *)s1) = p1;
+ *((uint32_t *)s2) = p2;
+ *((uint32_t *)s3) = p3;
+ *((uint32_t *)s4) = p4;
+ }
+ }
+}
+
+void vp8_loop_filter_uvhorizontal_edge_mips
+(
+ unsigned char *s,
+ int p,
+ unsigned int flimit,
+ unsigned int limit,
+ unsigned int thresh,
+ int count
+)
+{
+ uint32_t mask;
+ uint32_t hev;
+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
+ unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6;
+
+ mask = 0;
+ hev = 0;
+ p1 = 0;
+ p2 = 0;
+ p3 = 0;
+ p4 = 0;
+
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
+
+ sm1 = s - (p << 2);
+ s0 = s - p - p - p;
+ s1 = s - p - p ;
+ s2 = s - p;
+ s3 = s;
+ s4 = s + p;
+ s5 = s + p + p;
+ s6 = s + p + p + p;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p1 = *((uint32_t *)(s1));
+ p2 = *((uint32_t *)(s2));
+ p3 = *((uint32_t *)(s3));
+ p4 = *((uint32_t *)(s4));
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ pm1 = *((uint32_t *)(sm1));
+ p0 = *((uint32_t *)(s0));
+ p5 = *((uint32_t *)(s5));
+ p6 = *((uint32_t *)(s6));
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
+
+ /* unpack processed 4x4 neighborhood */
+ *((uint32_t *)s1) = p1;
+ *((uint32_t *)s2) = p2;
+ *((uint32_t *)s3) = p3;
+ *((uint32_t *)s4) = p4;
+ }
+ }
+
+ sm1 += 4;
+ s0 += 4;
+ s1 += 4;
+ s2 += 4;
+ s3 += 4;
+ s4 += 4;
+ s5 += 4;
+ s6 += 4;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p1 = *((uint32_t *)(s1));
+ p2 = *((uint32_t *)(s2));
+ p3 = *((uint32_t *)(s3));
+ p4 = *((uint32_t *)(s4));
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ pm1 = *((uint32_t *)(sm1));
+ p0 = *((uint32_t *)(s0));
+ p5 = *((uint32_t *)(s5));
+ p6 = *((uint32_t *)(s6));
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
+
+ /* unpack processed 4x4 neighborhood */
+ *((uint32_t *)s1) = p1;
+ *((uint32_t *)s2) = p2;
+ *((uint32_t *)s3) = p3;
+ *((uint32_t *)s4) = p4;
+ }
+ }
+}
+
+void vp8_loop_filter_vertical_edge_mips
+(
+ unsigned char *s,
+ int p,
+ const unsigned int flimit,
+ const unsigned int limit,
+ const unsigned int thresh,
+ int count
+)
+{
+ int i;
+ uint32_t mask, hev;
+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
+ unsigned char *s1, *s2, *s3, *s4;
+ uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
+
+ hev = 0;
+ mask = 0;
+ i = 0;
+ pm1 = 0;
+ p0 = 0;
+ p1 = 0;
+ p2 = 0;
+ p3 = 0;
+ p4 = 0;
+ p5 = 0;
+ p6 = 0;
+
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
+
+ /* apply filter on 4 pixesl at the same time */
+ do
+ {
+
+ /* prefetch data for store */
+ prefetch_store_lf(s + p);
+
+ s1 = s;
+ s2 = s + p;
+ s3 = s2 + p;
+ s4 = s3 + p;
+ s = s4 + p;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p2 = *((uint32_t *)(s1 - 4));
+ p6 = *((uint32_t *)(s1));
+ p1 = *((uint32_t *)(s2 - 4));
+ p5 = *((uint32_t *)(s2));
+ p0 = *((uint32_t *)(s3 - 4));
+ p4 = *((uint32_t *)(s3));
+ pm1 = *((uint32_t *)(s4 - 4));
+ p3 = *((uint32_t *)(s4));
+
+ /* transpose pm1, p0, p1, p2 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t"
+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t"
+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t"
+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t"
+
+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t"
+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t"
+ "append %[p1], %[sec3], 16 \n\t"
+ "append %[pm1], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* transpose p3, p4, p5, p6 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t"
+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t"
+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t"
+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t"
+
+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t"
+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t"
+ "append %[p5], %[sec3], 16 \n\t"
+ "append %[p3], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
+
+ /* unpack processed 4x4 neighborhood
+ * don't use transpose on output data
+ * because memory isn't aligned
+ */
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s4]) \n\t"
+ "sb %[p3], 0(%[s4]) \n\t"
+ "sb %[p2], -1(%[s4]) \n\t"
+ "sb %[p1], -2(%[s4]) \n\t"
+ :
+ : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
+ [p2] "r" (p2), [p1] "r" (p1)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s3]) \n\t"
+ "sb %[p3], 0(%[s3]) \n\t"
+ "sb %[p2], -1(%[s3]) \n\t"
+ "sb %[p1], -2(%[s3]) \n\t"
+ : [p1] "+r" (p1)
+ : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s2]) \n\t"
+ "sb %[p3], 0(%[s2]) \n\t"
+ "sb %[p2], -1(%[s2]) \n\t"
+ "sb %[p1], -2(%[s2]) \n\t"
+ :
+ : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
+ [p2] "r" (p2), [p1] "r" (p1)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s1]) \n\t"
+ "sb %[p3], 0(%[s1]) \n\t"
+ "sb %[p2], -1(%[s1]) \n\t"
+ "sb %[p1], -2(%[s1]) \n\t"
+ :
+ : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1),
+ [p2] "r" (p2), [p1] "r" (p1)
+ );
+ }
+ }
+
+ s1 = s;
+ s2 = s + p;
+ s3 = s2 + p;
+ s4 = s3 + p;
+ s = s4 + p;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p2 = *((uint32_t *)(s1 - 4));
+ p6 = *((uint32_t *)(s1));
+ p1 = *((uint32_t *)(s2 - 4));
+ p5 = *((uint32_t *)(s2));
+ p0 = *((uint32_t *)(s3 - 4));
+ p4 = *((uint32_t *)(s3));
+ pm1 = *((uint32_t *)(s4 - 4));
+ p3 = *((uint32_t *)(s4));
+
+ /* transpose pm1, p0, p1, p2 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t"
+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t"
+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t"
+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t"
+
+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t"
+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t"
+ "append %[p1], %[sec3], 16 \n\t"
+ "append %[pm1], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* transpose p3, p4, p5, p6 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t"
+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t"
+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t"
+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t"
+
+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t"
+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t"
+ "append %[p5], %[sec3], 16 \n\t"
+ "append %[p3], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
+
+ /* unpack processed 4x4 neighborhood
+ * don't use transpose on output data
+ * because memory isn't aligned
+ */
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s4]) \n\t"
+ "sb %[p3], 0(%[s4]) \n\t"
+ "sb %[p2], -1(%[s4]) \n\t"
+ "sb %[p1], -2(%[s4]) \n\t"
+ :
+ : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
+ [p2] "r" (p2), [p1] "r" (p1)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s3]) \n\t"
+ "sb %[p3], 0(%[s3]) \n\t"
+ "sb %[p2], -1(%[s3]) \n\t"
+ "sb %[p1], -2(%[s3]) \n\t"
+ : [p1] "+r" (p1)
+ : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s2]) \n\t"
+ "sb %[p3], 0(%[s2]) \n\t"
+ "sb %[p2], -1(%[s2]) \n\t"
+ "sb %[p1], -2(%[s2]) \n\t"
+ :
+ : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
+ [p2] "r" (p2), [p1] "r" (p1)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s1]) \n\t"
+ "sb %[p3], 0(%[s1]) \n\t"
+ "sb %[p2], -1(%[s1]) \n\t"
+ "sb %[p1], -2(%[s1]) \n\t"
+ :
+ : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1),
+ [p2] "r" (p2), [p1] "r" (p1)
+ );
+ }
+ }
+
+ i += 8;
+ }
+
+ while (i < count);
+}
+
+void vp8_loop_filter_uvvertical_edge_mips
+(
+ unsigned char *s,
+ int p,
+ unsigned int flimit,
+ unsigned int limit,
+ unsigned int thresh,
+ int count
+)
+{
+ uint32_t mask, hev;
+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
+ unsigned char *s1, *s2, *s3, *s4;
+ uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
+
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
+
+ /* apply filter on 4 pixesl at the same time */
+
+ s1 = s;
+ s2 = s + p;
+ s3 = s2 + p;
+ s4 = s3 + p;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p2 = *((uint32_t *)(s1 - 4));
+ p6 = *((uint32_t *)(s1));
+ p1 = *((uint32_t *)(s2 - 4));
+ p5 = *((uint32_t *)(s2));
+ p0 = *((uint32_t *)(s3 - 4));
+ p4 = *((uint32_t *)(s3));
+ pm1 = *((uint32_t *)(s4 - 4));
+ p3 = *((uint32_t *)(s4));
+
+ /* transpose pm1, p0, p1, p2 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t"
+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t"
+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t"
+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t"
+
+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t"
+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t"
+ "append %[p1], %[sec3], 16 \n\t"
+ "append %[pm1], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* transpose p3, p4, p5, p6 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t"
+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t"
+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t"
+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t"
+
+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t"
+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t"
+ "append %[p5], %[sec3], 16 \n\t"
+ "append %[p3], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
+
+ /* unpack processed 4x4 neighborhood
+ * don't use transpose on output data
+ * because memory isn't aligned
+ */
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s4]) \n\t"
+ "sb %[p3], 0(%[s4]) \n\t"
+ "sb %[p2], -1(%[s4]) \n\t"
+ "sb %[p1], -2(%[s4]) \n\t"
+ :
+ : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
+ [p2] "r" (p2), [p1] "r" (p1)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s3]) \n\t"
+ "sb %[p3], 0(%[s3]) \n\t"
+ "sb %[p2], -1(%[s3]) \n\t"
+ "sb %[p1], -2(%[s3]) \n\t"
+ : [p1] "+r" (p1)
+ : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s2]) \n\t"
+ "sb %[p3], 0(%[s2]) \n\t"
+ "sb %[p2], -1(%[s2]) \n\t"
+ "sb %[p1], -2(%[s2]) \n\t"
+ :
+ : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
+ [p2] "r" (p2), [p1] "r" (p1)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s1]) \n\t"
+ "sb %[p3], 0(%[s1]) \n\t"
+ "sb %[p2], -1(%[s1]) \n\t"
+ "sb %[p1], -2(%[s1]) \n\t"
+ :
+ : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), [p2] "r" (p2), [p1] "r" (p1)
+ );
+ }
+ }
+
+ s1 = s4 + p;
+ s2 = s1 + p;
+ s3 = s2 + p;
+ s4 = s3 + p;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p2 = *((uint32_t *)(s1 - 4));
+ p6 = *((uint32_t *)(s1));
+ p1 = *((uint32_t *)(s2 - 4));
+ p5 = *((uint32_t *)(s2));
+ p0 = *((uint32_t *)(s3 - 4));
+ p4 = *((uint32_t *)(s3));
+ pm1 = *((uint32_t *)(s4 - 4));
+ p3 = *((uint32_t *)(s4));
+
+ /* transpose pm1, p0, p1, p2 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t"
+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t"
+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t"
+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t"
+
+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t"
+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t"
+ "append %[p1], %[sec3], 16 \n\t"
+ "append %[pm1], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* transpose p3, p4, p5, p6 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t"
+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t"
+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t"
+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t"
+
+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t"
+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t"
+ "append %[p5], %[sec3], 16 \n\t"
+ "append %[p3], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
+
+ /* unpack processed 4x4 neighborhood
+ * don't use transpose on output data
+ * because memory isn't aligned
+ */
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s4]) \n\t"
+ "sb %[p3], 0(%[s4]) \n\t"
+ "sb %[p2], -1(%[s4]) \n\t"
+ "sb %[p1], -2(%[s4]) \n\t"
+ :
+ : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
+ [p2] "r" (p2), [p1] "r" (p1)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s3]) \n\t"
+ "sb %[p3], 0(%[s3]) \n\t"
+ "sb %[p2], -1(%[s3]) \n\t"
+ "sb %[p1], -2(%[s3]) \n\t"
+ : [p1] "+r" (p1)
+ : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s2]) \n\t"
+ "sb %[p3], 0(%[s2]) \n\t"
+ "sb %[p2], -1(%[s2]) \n\t"
+ "sb %[p1], -2(%[s2]) \n\t"
+ :
+ : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
+ [p2] "r" (p2), [p1] "r" (p1)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p4], 1(%[s1]) \n\t"
+ "sb %[p3], 0(%[s1]) \n\t"
+ "sb %[p2], -1(%[s1]) \n\t"
+ "sb %[p1], -2(%[s1]) \n\t"
+ :
+ : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1),
+ [p2] "r" (p2), [p1] "r" (p1)
+ );
+ }
+ }
+}
+
+/* inputs & outputs are quad-byte vectors */
+static __inline void vp8_mbfilter_mips
+(
+ uint32_t mask,
+ uint32_t hev,
+ uint32_t *ps2,
+ uint32_t *ps1,
+ uint32_t *ps0,
+ uint32_t *qs0,
+ uint32_t *qs1,
+ uint32_t *qs2
+)
+{
+ int32_t vps2, vps1, vps0, vqs0, vqs1, vqs2;
+ int32_t vps2_l, vps1_l, vps0_l, vqs0_l, vqs1_l, vqs2_l;
+ int32_t vps2_r, vps1_r, vps0_r, vqs0_r, vqs1_r, vqs2_r;
+ uint32_t HWM, vp8_filter_l, vp8_filter_r, mask_l, mask_r, hev_l, hev_r, subr_r, subr_l;
+ uint32_t Filter2_l, Filter2_r, t1, t2, Filter1_l, Filter1_r, invhev_l, invhev_r;
+ uint32_t N128, R63;
+ uint32_t u1_l, u1_r, u2_l, u2_r, u3_l, u3_r;
+
+ R63 = 0x003F003F;
+ HWM = 0xFF00FF00;
+ N128 = 0x80808080;
+ t1 = 0x03000300;
+ t2 = 0x04000400;
+
+ vps0 = (*ps0) ^ N128;
+ vps1 = (*ps1) ^ N128;
+ vps2 = (*ps2) ^ N128;
+ vqs0 = (*qs0) ^ N128;
+ vqs1 = (*qs1) ^ N128;
+ vqs2 = (*qs2) ^ N128;
+
+ /* use halfword pairs instead quad-bytes because of accuracy */
+ vps0_l = vps0 & HWM;
+ vps0_r = vps0 << 8;
+ vps0_r = vps0_r & HWM;
+
+ vqs0_l = vqs0 & HWM;
+ vqs0_r = vqs0 << 8;
+ vqs0_r = vqs0_r & HWM;
+
+ vps1_l = vps1 & HWM;
+ vps1_r = vps1 << 8;
+ vps1_r = vps1_r & HWM;
+
+ vqs1_l = vqs1 & HWM;
+ vqs1_r = vqs1 << 8;
+ vqs1_r = vqs1_r & HWM;
+
+ vqs2_l = vqs2 & HWM;
+ vqs2_r = vqs2 << 8;
+ vqs2_r = vqs2_r & HWM;
+
+ __asm__ __volatile__ (
+ /* qs0 - ps0 */
+ "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t"
+ "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t"
+
+ /* vp8_filter = vp8_signed_char_clamp(ps1 - qs1); */
+ "subq_s.ph %[vp8_filter_l], %[vps1_l], %[vqs1_l] \n\t"
+ "subq_s.ph %[vp8_filter_r], %[vps1_r], %[vqs1_r] \n\t"
+
+ : [vp8_filter_l] "=&r" (vp8_filter_l), [vp8_filter_r] "=r" (vp8_filter_r),
+ [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r)
+ : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l),
+ [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r),
+ [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r)
+ );
+
+ vps2_l = vps2 & HWM;
+ vps2_r = vps2 << 8;
+ vps2_r = vps2_r & HWM;
+
+ /* add outer taps if we have high edge variance */
+ __asm__ __volatile__ (
+ /* vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); */
+ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t"
+ "and %[mask_l], %[HWM], %[mask] \n\t"
+ "sll %[mask_r], %[mask], 8 \n\t"
+ "and %[mask_r], %[HWM], %[mask_r] \n\t"
+ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t"
+ "and %[hev_l], %[HWM], %[hev] \n\t"
+ "sll %[hev_r], %[hev], 8 \n\t"
+ "and %[hev_r], %[HWM], %[hev_r] \n\t"
+ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t"
+
+ /* vp8_filter &= mask; */
+ "and %[vp8_filter_l], %[vp8_filter_l], %[mask_l] \n\t"
+ "and %[vp8_filter_r], %[vp8_filter_r], %[mask_r] \n\t"
+
+ /* Filter2 = vp8_filter & hev; */
+ "and %[Filter2_l], %[vp8_filter_l], %[hev_l] \n\t"
+ "and %[Filter2_r], %[vp8_filter_r], %[hev_r] \n\t"
+
+ : [vp8_filter_l] "+r" (vp8_filter_l), [vp8_filter_r] "+r" (vp8_filter_r),
+ [hev_l] "=&r" (hev_l), [hev_r] "=&r" (hev_r),
+ [mask_l] "=&r" (mask_l), [mask_r] "=&r" (mask_r),
+ [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r)
+ : [subr_l] "r" (subr_l), [subr_r] "r" (subr_r),
+ [HWM] "r" (HWM), [hev] "r" (hev), [mask] "r" (mask)
+ );
+
+ /* save bottom 3 bits so that we round one side +4 and the other +3 */
+ __asm__ __volatile__ (
+ /* Filter1 = vp8_signed_char_clamp(Filter2 + 4) >>= 3; */
+ "addq_s.ph %[Filter1_l], %[Filter2_l], %[t2] \n\t"
+ "xor %[invhev_l], %[hev_l], %[HWM] \n\t"
+ "addq_s.ph %[Filter1_r], %[Filter2_r], %[t2] \n\t"
+
+ /* Filter2 = vp8_signed_char_clamp(Filter2 + 3) >>= 3; */
+ "addq_s.ph %[Filter2_l], %[Filter2_l], %[t1] \n\t"
+ "addq_s.ph %[Filter2_r], %[Filter2_r], %[t1] \n\t"
+
+ "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t"
+ "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t"
+
+ "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t"
+ "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t"
+ "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t"
+ "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t"
+ "xor %[invhev_r], %[hev_r], %[HWM] \n\t"
+
+ /* qs0 = vp8_signed_char_clamp(qs0 - Filter1); */
+ "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t"
+ "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t"
+
+ /* ps0 = vp8_signed_char_clamp(ps0 + Filter2); */
+ "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t"
+ "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t"
+
+ : [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r),
+ [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r),
+ [Filter2_l] "+r" (Filter2_l), [Filter2_r] "+r" (Filter2_r),
+ [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
+ [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)
+ : [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM),
+ [hev_l] "r" (hev_l), [hev_r] "r" (hev_r)
+ );
+
+ /* only apply wider filter if not high edge variance */
+ __asm__ __volatile__ (
+ /* vp8_filter &= ~hev; */
+ "and %[Filter2_l], %[vp8_filter_l], %[invhev_l] \n\t"
+ "and %[Filter2_r], %[vp8_filter_r], %[invhev_r] \n\t"
+
+ "shra.ph %[Filter2_l], %[Filter2_l], 8 \n\t"
+ "shra.ph %[Filter2_r], %[Filter2_r], 8 \n\t"
+
+ : [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r)
+ : [vp8_filter_l] "r" (vp8_filter_l), [vp8_filter_r] "r" (vp8_filter_r),
+ [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r)
+ );
+
+ /* roughly 3/7th difference across boundary */
+ __asm__ __volatile__ (
+ "shll.ph %[u3_l], %[Filter2_l], 3 \n\t"
+ "shll.ph %[u3_r], %[Filter2_r], 3 \n\t"
+
+ "addq.ph %[u3_l], %[u3_l], %[Filter2_l] \n\t"
+ "addq.ph %[u3_r], %[u3_r], %[Filter2_r] \n\t"
+
+ "shll.ph %[u2_l], %[u3_l], 1 \n\t"
+ "shll.ph %[u2_r], %[u3_r], 1 \n\t"
+
+ "addq.ph %[u1_l], %[u3_l], %[u2_l] \n\t"
+ "addq.ph %[u1_r], %[u3_r], %[u2_r] \n\t"
+
+ "addq.ph %[u2_l], %[u2_l], %[R63] \n\t"
+ "addq.ph %[u2_r], %[u2_r], %[R63] \n\t"
+
+ "addq.ph %[u3_l], %[u3_l], %[R63] \n\t"
+ "addq.ph %[u3_r], %[u3_r], %[R63] \n\t"
+
+ /* vp8_signed_char_clamp((63 + Filter2 * 27) >> 7)
+ * vp8_signed_char_clamp((63 + Filter2 * 18) >> 7)
+ */
+ "addq.ph %[u1_l], %[u1_l], %[R63] \n\t"
+ "addq.ph %[u1_r], %[u1_r], %[R63] \n\t"
+ "shra.ph %[u1_l], %[u1_l], 7 \n\t"
+ "shra.ph %[u1_r], %[u1_r], 7 \n\t"
+ "shra.ph %[u2_l], %[u2_l], 7 \n\t"
+ "shra.ph %[u2_r], %[u2_r], 7 \n\t"
+ "shll.ph %[u1_l], %[u1_l], 8 \n\t"
+ "shll.ph %[u1_r], %[u1_r], 8 \n\t"
+ "shll.ph %[u2_l], %[u2_l], 8 \n\t"
+ "shll.ph %[u2_r], %[u2_r], 8 \n\t"
+
+ /* vqs0 = vp8_signed_char_clamp(qs0 - u); */
+ "subq_s.ph %[vqs0_l], %[vqs0_l], %[u1_l] \n\t"
+ "subq_s.ph %[vqs0_r], %[vqs0_r], %[u1_r] \n\t"
+
+ /* vps0 = vp8_signed_char_clamp(ps0 + u); */
+ "addq_s.ph %[vps0_l], %[vps0_l], %[u1_l] \n\t"
+ "addq_s.ph %[vps0_r], %[vps0_r], %[u1_r] \n\t"
+
+ : [u1_l] "=&r" (u1_l), [u1_r] "=&r" (u1_r), [u2_l] "=&r" (u2_l),
+ [u2_r] "=&r" (u2_r), [u3_l] "=&r" (u3_l), [u3_r] "=&r" (u3_r),
+ [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
+ [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)
+ : [R63] "r" (R63),
+ [Filter2_l] "r" (Filter2_l), [Filter2_r] "r" (Filter2_r)
+ );
+
+ __asm__ __volatile__ (
+ /* vqs1 = vp8_signed_char_clamp(qs1 - u); */
+ "subq_s.ph %[vqs1_l], %[vqs1_l], %[u2_l] \n\t"
+ "addq_s.ph %[vps1_l], %[vps1_l], %[u2_l] \n\t"
+
+ /* vps1 = vp8_signed_char_clamp(ps1 + u); */
+ "addq_s.ph %[vps1_r], %[vps1_r], %[u2_r] \n\t"
+ "subq_s.ph %[vqs1_r], %[vqs1_r], %[u2_r] \n\t"
+
+ : [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r),
+ [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r)
+ : [u2_l] "r" (u2_l), [u2_r] "r" (u2_r)
+ );
+
+ /* roughly 1/7th difference across boundary */
+ __asm__ __volatile__ (
+ /* u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7); */
+ "shra.ph %[u3_l], %[u3_l], 7 \n\t"
+ "shra.ph %[u3_r], %[u3_r], 7 \n\t"
+ "shll.ph %[u3_l], %[u3_l], 8 \n\t"
+ "shll.ph %[u3_r], %[u3_r], 8 \n\t"
+
+ /* vqs2 = vp8_signed_char_clamp(qs2 - u); */
+ "subq_s.ph %[vqs2_l], %[vqs2_l], %[u3_l] \n\t"
+ "subq_s.ph %[vqs2_r], %[vqs2_r], %[u3_r] \n\t"
+
+ /* vps2 = vp8_signed_char_clamp(ps2 + u); */
+ "addq_s.ph %[vps2_l], %[vps2_l], %[u3_l] \n\t"
+ "addq_s.ph %[vps2_r], %[vps2_r], %[u3_r] \n\t"
+
+ : [u3_l] "+r" (u3_l), [u3_r] "+r" (u3_r), [vps2_l] "+r" (vps2_l),
+ [vps2_r] "+r" (vps2_r), [vqs2_l] "+r" (vqs2_l), [vqs2_r] "+r" (vqs2_r)
+ :
+ );
+
+ /* Create quad-bytes from halfword pairs */
+ __asm__ __volatile__ (
+ "and %[vqs0_l], %[vqs0_l], %[HWM] \n\t"
+ "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t"
+
+ "and %[vps0_l], %[vps0_l], %[HWM] \n\t"
+ "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t"
+
+ "and %[vqs1_l], %[vqs1_l], %[HWM] \n\t"
+ "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t"
+
+ "and %[vps1_l], %[vps1_l], %[HWM] \n\t"
+ "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t"
+
+ "and %[vqs2_l], %[vqs2_l], %[HWM] \n\t"
+ "shrl.ph %[vqs2_r], %[vqs2_r], 8 \n\t"
+
+ "and %[vps2_l], %[vps2_l], %[HWM] \n\t"
+ "shrl.ph %[vps2_r], %[vps2_r], 8 \n\t"
+
+ "or %[vqs0_r], %[vqs0_l], %[vqs0_r] \n\t"
+ "or %[vps0_r], %[vps0_l], %[vps0_r] \n\t"
+ "or %[vqs1_r], %[vqs1_l], %[vqs1_r] \n\t"
+ "or %[vps1_r], %[vps1_l], %[vps1_r] \n\t"
+ "or %[vqs2_r], %[vqs2_l], %[vqs2_r] \n\t"
+ "or %[vps2_r], %[vps2_l], %[vps2_r] \n\t"
+
+ : [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), [vqs1_l] "+r" (vqs1_l),
+ [vqs1_r] "+r" (vqs1_r), [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
+ [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r), [vqs2_l] "+r" (vqs2_l),
+ [vqs2_r] "+r" (vqs2_r), [vps2_r] "+r" (vps2_r), [vps2_l] "+r" (vps2_l)
+ : [HWM] "r" (HWM)
+ );
+
+ *ps0 = vps0_r ^ N128;
+ *ps1 = vps1_r ^ N128;
+ *ps2 = vps2_r ^ N128;
+ *qs0 = vqs0_r ^ N128;
+ *qs1 = vqs1_r ^ N128;
+ *qs2 = vqs2_r ^ N128;
+}
+
+void vp8_mbloop_filter_horizontal_edge_mips
+(
+ unsigned char *s,
+ int p,
+ unsigned int flimit,
+ unsigned int limit,
+ unsigned int thresh,
+ int count
+)
+{
+ int i;
+ uint32_t mask, hev;
+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
+ unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6;
+
+ mask = 0;
+ hev = 0;
+ i = 0;
+ p1 = 0;
+ p2 = 0;
+ p3 = 0;
+ p4 = 0;
+
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
+
+ sm1 = s - (p << 2);
+ s0 = s - p - p - p;
+ s1 = s - p - p;
+ s2 = s - p;
+ s3 = s;
+ s4 = s + p;
+ s5 = s + p + p;
+ s6 = s + p + p + p;
+
+ /* prefetch data for load */
+ prefetch_load_lf(s + p);
+
+ /* apply filter on 4 pixesl at the same time */
+ do
+ {
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p1 = *((uint32_t *)(s1));
+ p2 = *((uint32_t *)(s2));
+ p3 = *((uint32_t *)(s3));
+ p4 = *((uint32_t *)(s4));
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ pm1 = *((uint32_t *)(sm1));
+ p0 = *((uint32_t *)(s0));
+ p5 = *((uint32_t *)(s5));
+ p6 = *((uint32_t *)(s6));
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
+
+ /* unpack processed 4x4 neighborhood
+ * memory is 4 byte aligned
+ */
+ *((uint32_t *)s0) = p0;
+ *((uint32_t *)s1) = p1;
+ *((uint32_t *)s2) = p2;
+ *((uint32_t *)s3) = p3;
+ *((uint32_t *)s4) = p4;
+ *((uint32_t *)s5) = p5;
+ }
+ }
+
+ sm1 += 4;
+ s0 += 4;
+ s1 += 4;
+ s2 += 4;
+ s3 += 4;
+ s4 += 4;
+ s5 += 4;
+ s6 += 4;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p1 = *((uint32_t *)(s1));
+ p2 = *((uint32_t *)(s2));
+ p3 = *((uint32_t *)(s3));
+ p4 = *((uint32_t *)(s4));
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ pm1 = *((uint32_t *)(sm1));
+ p0 = *((uint32_t *)(s0));
+ p5 = *((uint32_t *)(s5));
+ p6 = *((uint32_t *)(s6));
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
+
+ /* unpack processed 4x4 neighborhood
+ * memory is 4 byte aligned
+ */
+ *((uint32_t *)s0) = p0;
+ *((uint32_t *)s1) = p1;
+ *((uint32_t *)s2) = p2;
+ *((uint32_t *)s3) = p3;
+ *((uint32_t *)s4) = p4;
+ *((uint32_t *)s5) = p5;
+ }
+ }
+
+ sm1 += 4;
+ s0 += 4;
+ s1 += 4;
+ s2 += 4;
+ s3 += 4;
+ s4 += 4;
+ s5 += 4;
+ s6 += 4;
+
+ i += 8;
+ }
+
+ while (i < count);
+}
+
+void vp8_mbloop_filter_uvhorizontal_edge_mips
+(
+ unsigned char *s,
+ int p,
+ unsigned int flimit,
+ unsigned int limit,
+ unsigned int thresh,
+ int count
+)
+{
+ uint32_t mask, hev;
+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
+ unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6;
+
+ mask = 0;
+ hev = 0;
+ p1 = 0;
+ p2 = 0;
+ p3 = 0;
+ p4 = 0;
+
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
+
+ sm1 = s - (p << 2);
+ s0 = s - p - p - p;
+ s1 = s - p - p;
+ s2 = s - p;
+ s3 = s;
+ s4 = s + p;
+ s5 = s + p + p;
+ s6 = s + p + p + p;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p1 = *((uint32_t *)(s1));
+ p2 = *((uint32_t *)(s2));
+ p3 = *((uint32_t *)(s3));
+ p4 = *((uint32_t *)(s4));
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ pm1 = *((uint32_t *)(sm1));
+ p0 = *((uint32_t *)(s0));
+ p5 = *((uint32_t *)(s5));
+ p6 = *((uint32_t *)(s6));
+
+ /* if mask == 0 do filtering is not needed */
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ if (mask)
+ {
+ /* filtering */
+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
+
+ /* unpack processed 4x4 neighborhood
+ * memory is 4 byte aligned
+ */
+ *((uint32_t *)s0) = p0;
+ *((uint32_t *)s1) = p1;
+ *((uint32_t *)s2) = p2;
+ *((uint32_t *)s3) = p3;
+ *((uint32_t *)s4) = p4;
+ *((uint32_t *)s5) = p5;
+ }
+ }
+
+ sm1 += 4;
+ s0 += 4;
+ s1 += 4;
+ s2 += 4;
+ s3 += 4;
+ s4 += 4;
+ s5 += 4;
+ s6 += 4;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p1 = *((uint32_t *)(s1));
+ p2 = *((uint32_t *)(s2));
+ p3 = *((uint32_t *)(s3));
+ p4 = *((uint32_t *)(s4));
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ pm1 = *((uint32_t *)(sm1));
+ p0 = *((uint32_t *)(s0));
+ p5 = *((uint32_t *)(s5));
+ p6 = *((uint32_t *)(s6));
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
+
+ /* unpack processed 4x4 neighborhood
+ * memory is 4 byte aligned
+ */
+ *((uint32_t *)s0) = p0;
+ *((uint32_t *)s1) = p1;
+ *((uint32_t *)s2) = p2;
+ *((uint32_t *)s3) = p3;
+ *((uint32_t *)s4) = p4;
+ *((uint32_t *)s5) = p5;
+ }
+ }
+}
+
+
+void vp8_mbloop_filter_vertical_edge_mips
+(
+ unsigned char *s,
+ int p,
+ unsigned int flimit,
+ unsigned int limit,
+ unsigned int thresh,
+ int count
+)
+{
+
+ int i;
+ uint32_t mask, hev;
+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
+ unsigned char *s1, *s2, *s3, *s4;
+ uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
+
+ mask = 0;
+ hev = 0;
+ i = 0;
+ pm1 = 0;
+ p0 = 0;
+ p1 = 0;
+ p2 = 0;
+ p3 = 0;
+ p4 = 0;
+ p5 = 0;
+ p6 = 0;
+
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
+
+ /* apply filter on 4 pixesl at the same time */
+ do
+ {
+ s1 = s;
+ s2 = s + p;
+ s3 = s2 + p;
+ s4 = s3 + p;
+ s = s4 + p;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p2 = *((uint32_t *)(s1 - 4));
+ p6 = *((uint32_t *)(s1));
+ p1 = *((uint32_t *)(s2 - 4));
+ p5 = *((uint32_t *)(s2));
+ p0 = *((uint32_t *)(s3 - 4));
+ p4 = *((uint32_t *)(s3));
+ pm1 = *((uint32_t *)(s4 - 4));
+ p3 = *((uint32_t *)(s4));
+
+ /* transpose pm1, p0, p1, p2 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t"
+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t"
+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t"
+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t"
+
+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t"
+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t"
+ "append %[p1], %[sec3], 16 \n\t"
+ "append %[pm1], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* transpose p3, p4, p5, p6 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t"
+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t"
+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t"
+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t"
+
+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t"
+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t"
+ "append %[p5], %[sec3], 16 \n\t"
+ "append %[p3], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
+
+ /* don't use transpose on output data
+ * because memory isn't aligned
+ */
+ __asm__ __volatile__ (
+ "sb %[p5], 2(%[s4]) \n\t"
+ "sb %[p4], 1(%[s4]) \n\t"
+ "sb %[p3], 0(%[s4]) \n\t"
+ "sb %[p2], -1(%[s4]) \n\t"
+ "sb %[p1], -2(%[s4]) \n\t"
+ "sb %[p0], -3(%[s4]) \n\t"
+ :
+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p5], %[p5], 8 \n\t"
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ "srl %[p0], %[p0], 8 \n\t"
+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p5], 2(%[s3]) \n\t"
+ "sb %[p4], 1(%[s3]) \n\t"
+ "sb %[p3], 0(%[s3]) \n\t"
+ "sb %[p2], -1(%[s3]) \n\t"
+ "sb %[p1], -2(%[s3]) \n\t"
+ "sb %[p0], -3(%[s3]) \n\t"
+ :
+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3),
+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p5], %[p5], 8 \n\t"
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ "srl %[p0], %[p0], 8 \n\t"
+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p5], 2(%[s2]) \n\t"
+ "sb %[p4], 1(%[s2]) \n\t"
+ "sb %[p3], 0(%[s2]) \n\t"
+ "sb %[p2], -1(%[s2]) \n\t"
+ "sb %[p1], -2(%[s2]) \n\t"
+ "sb %[p0], -3(%[s2]) \n\t"
+ :
+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p5], %[p5], 8 \n\t"
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ "srl %[p0], %[p0], 8 \n\t"
+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p5], 2(%[s1]) \n\t"
+ "sb %[p4], 1(%[s1]) \n\t"
+ "sb %[p3], 0(%[s1]) \n\t"
+ "sb %[p2], -1(%[s1]) \n\t"
+ "sb %[p1], -2(%[s1]) \n\t"
+ "sb %[p0], -3(%[s1]) \n\t"
+ :
+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1),
+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
+ );
+ }
+ }
+
+ i += 4;
+ }
+
+ while (i < count);
+}
+
+void vp8_mbloop_filter_uvvertical_edge_mips
+(
+ unsigned char *s,
+ int p,
+ unsigned int flimit,
+ unsigned int limit,
+ unsigned int thresh,
+ int count
+)
+{
+ uint32_t mask, hev;
+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
+ unsigned char *s1, *s2, *s3, *s4;
+ uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
+
+ mask = 0;
+ hev = 0;
+ pm1 = 0;
+ p0 = 0;
+ p1 = 0;
+ p2 = 0;
+ p3 = 0;
+ p4 = 0;
+ p5 = 0;
+ p6 = 0;
+
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
+
+ /* apply filter on 4 pixesl at the same time */
+
+ s1 = s;
+ s2 = s + p;
+ s3 = s2 + p;
+ s4 = s3 + p;
+
+ /* prefetch data for load */
+ prefetch_load_lf(s + 2 * p);
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p2 = *((uint32_t *)(s1 - 4));
+ p6 = *((uint32_t *)(s1));
+ p1 = *((uint32_t *)(s2 - 4));
+ p5 = *((uint32_t *)(s2));
+ p0 = *((uint32_t *)(s3 - 4));
+ p4 = *((uint32_t *)(s3));
+ pm1 = *((uint32_t *)(s4 - 4));
+ p3 = *((uint32_t *)(s4));
+
+ /* transpose pm1, p0, p1, p2 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t"
+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t"
+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t"
+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t"
+
+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t"
+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t"
+ "append %[p1], %[sec3], 16 \n\t"
+ "append %[pm1], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* transpose p3, p4, p5, p6 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t"
+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t"
+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t"
+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t"
+
+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t"
+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t"
+ "append %[p5], %[sec3], 16 \n\t"
+ "append %[p3], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
+ thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
+
+ /* don't use transpose on output data
+ * because memory isn't aligned
+ */
+ __asm__ __volatile__ (
+ "sb %[p5], 2(%[s4]) \n\t"
+ "sb %[p4], 1(%[s4]) \n\t"
+ "sb %[p3], 0(%[s4]) \n\t"
+ "sb %[p2], -1(%[s4]) \n\t"
+ "sb %[p1], -2(%[s4]) \n\t"
+ "sb %[p0], -3(%[s4]) \n\t"
+ :
+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p5], %[p5], 8 \n\t"
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ "srl %[p0], %[p0], 8 \n\t"
+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p5], 2(%[s3]) \n\t"
+ "sb %[p4], 1(%[s3]) \n\t"
+ "sb %[p3], 0(%[s3]) \n\t"
+ "sb %[p2], -1(%[s3]) \n\t"
+ "sb %[p1], -2(%[s3]) \n\t"
+ "sb %[p0], -3(%[s3]) \n\t"
+ :
+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3),
+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p5], %[p5], 8 \n\t"
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ "srl %[p0], %[p0], 8 \n\t"
+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p5], 2(%[s2]) \n\t"
+ "sb %[p4], 1(%[s2]) \n\t"
+ "sb %[p3], 0(%[s2]) \n\t"
+ "sb %[p2], -1(%[s2]) \n\t"
+ "sb %[p1], -2(%[s2]) \n\t"
+ "sb %[p0], -3(%[s2]) \n\t"
+ :
+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p5], %[p5], 8 \n\t"
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ "srl %[p0], %[p0], 8 \n\t"
+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p5], 2(%[s1]) \n\t"
+ "sb %[p4], 1(%[s1]) \n\t"
+ "sb %[p3], 0(%[s1]) \n\t"
+ "sb %[p2], -1(%[s1]) \n\t"
+ "sb %[p1], -2(%[s1]) \n\t"
+ "sb %[p0], -3(%[s1]) \n\t"
+ :
+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1),
+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
+ );
+ }
+ }
+
+ s1 = s4 + p;
+ s2 = s1 + p;
+ s3 = s2 + p;
+ s4 = s3 + p;
+
+ /* load quad-byte vectors
+ * memory is 4 byte aligned
+ */
+ p2 = *((uint32_t *)(s1 - 4));
+ p6 = *((uint32_t *)(s1));
+ p1 = *((uint32_t *)(s2 - 4));
+ p5 = *((uint32_t *)(s2));
+ p0 = *((uint32_t *)(s3 - 4));
+ p4 = *((uint32_t *)(s3));
+ pm1 = *((uint32_t *)(s4 - 4));
+ p3 = *((uint32_t *)(s4));
+
+ /* transpose pm1, p0, p1, p2 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t"
+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t"
+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t"
+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t"
+
+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t"
+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t"
+ "append %[p1], %[sec3], 16 \n\t"
+ "append %[pm1], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* transpose p3, p4, p5, p6 */
+ __asm__ __volatile__ (
+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t"
+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t"
+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t"
+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t"
+
+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t"
+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t"
+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
+
+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t"
+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t"
+ "append %[p5], %[sec3], 16 \n\t"
+ "append %[p3], %[sec4], 16 \n\t"
+
+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
+ :
+ );
+
+ /* if (p1 - p4 == 0) and (p2 - p3 == 0)
+ * mask will be zero and filtering is not needed
+ */
+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
+ {
+
+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask);
+
+ /* if mask == 0 do filtering is not needed */
+ if (mask)
+ {
+ /* filtering */
+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
+
+ /* don't use transpose on output data
+ * because memory isn't aligned
+ */
+ __asm__ __volatile__ (
+ "sb %[p5], 2(%[s4]) \n\t"
+ "sb %[p4], 1(%[s4]) \n\t"
+ "sb %[p3], 0(%[s4]) \n\t"
+ "sb %[p2], -1(%[s4]) \n\t"
+ "sb %[p1], -2(%[s4]) \n\t"
+ "sb %[p0], -3(%[s4]) \n\t"
+ :
+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p5], %[p5], 8 \n\t"
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ "srl %[p0], %[p0], 8 \n\t"
+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p5], 2(%[s3]) \n\t"
+ "sb %[p4], 1(%[s3]) \n\t"
+ "sb %[p3], 0(%[s3]) \n\t"
+ "sb %[p2], -1(%[s3]) \n\t"
+ "sb %[p1], -2(%[s3]) \n\t"
+ "sb %[p0], -3(%[s3]) \n\t"
+ :
+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3),
+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p5], %[p5], 8 \n\t"
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ "srl %[p0], %[p0], 8 \n\t"
+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p5], 2(%[s2]) \n\t"
+ "sb %[p4], 1(%[s2]) \n\t"
+ "sb %[p3], 0(%[s2]) \n\t"
+ "sb %[p2], -1(%[s2]) \n\t"
+ "sb %[p1], -2(%[s2]) \n\t"
+ "sb %[p0], -3(%[s2]) \n\t"
+ :
+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
+ );
+
+ __asm__ __volatile__ (
+ "srl %[p5], %[p5], 8 \n\t"
+ "srl %[p4], %[p4], 8 \n\t"
+ "srl %[p3], %[p3], 8 \n\t"
+ "srl %[p2], %[p2], 8 \n\t"
+ "srl %[p1], %[p1], 8 \n\t"
+ "srl %[p0], %[p0], 8 \n\t"
+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
+ :
+ );
+
+ __asm__ __volatile__ (
+ "sb %[p5], 2(%[s1]) \n\t"
+ "sb %[p4], 1(%[s1]) \n\t"
+ "sb %[p3], 0(%[s1]) \n\t"
+ "sb %[p2], -1(%[s1]) \n\t"
+ "sb %[p1], -2(%[s1]) \n\t"
+ "sb %[p0], -3(%[s1]) \n\t"
+ :
+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1),
+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
+ );
+ }
+ }
+}
+
+/* Horizontal MB filtering */
+void vp8_loop_filter_mbh_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride, loop_filter_info *lfi)
+{
+ unsigned int thresh_vec, flimit_vec, limit_vec;
+ unsigned char thresh, flimit, limit, flimit_temp;
+
+ /* use direct value instead pointers */
+ limit = *(lfi->lim);
+ flimit_temp = *(lfi->mblim);
+ thresh = *(lfi->hev_thr);
+ flimit = flimit_temp;
+
+ /* create quad-byte */
+ __asm__ __volatile__ (
+ "replv.qb %[thresh_vec], %[thresh] \n\t"
+ "replv.qb %[flimit_vec], %[flimit] \n\t"
+ "replv.qb %[limit_vec], %[limit] \n\t"
+ : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec)
+ : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit)
+ );
+
+ vp8_mbloop_filter_horizontal_edge_mips(y_ptr, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
+
+ if (u_ptr)
+ {
+ vp8_mbloop_filter_uvhorizontal_edge_mips(u_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
+ }
+
+ if (v_ptr)
+ {
+ vp8_mbloop_filter_uvhorizontal_edge_mips(v_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
+ }
+}
+
+
+/* Vertical MB Filtering */
+void vp8_loop_filter_mbv_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride, loop_filter_info *lfi)
+{
+ unsigned int thresh_vec, flimit_vec, limit_vec;
+ unsigned char thresh, flimit, limit, flimit_temp;
+
+ /* use direct value instead pointers */
+ limit = *(lfi->lim);
+ flimit_temp = *(lfi->mblim);
+ thresh = *(lfi->hev_thr);
+ flimit = flimit_temp;
+
+ /* create quad-byte */
+ __asm__ __volatile__ (
+ "replv.qb %[thresh_vec], %[thresh] \n\t"
+ "replv.qb %[flimit_vec], %[flimit] \n\t"
+ "replv.qb %[limit_vec], %[limit] \n\t"
+ : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec)
+ : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit)
+ );
+
+ vp8_mbloop_filter_vertical_edge_mips(y_ptr, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
+
+ if (u_ptr)
+ vp8_mbloop_filter_uvvertical_edge_mips(u_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
+
+ if (v_ptr)
+ vp8_mbloop_filter_uvvertical_edge_mips(v_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
+}
+
+
+/* Horizontal B Filtering */
+void vp8_loop_filter_bh_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride, loop_filter_info *lfi)
+{
+ unsigned int thresh_vec, flimit_vec, limit_vec;
+ unsigned char thresh, flimit, limit, flimit_temp;
+
+ /* use direct value instead pointers */
+ limit = *(lfi->lim);
+ flimit_temp = *(lfi->blim);
+ thresh = *(lfi->hev_thr);
+ flimit = flimit_temp;
+
+ /* create quad-byte */
+ __asm__ __volatile__ (
+ "replv.qb %[thresh_vec], %[thresh] \n\t"
+ "replv.qb %[flimit_vec], %[flimit] \n\t"
+ "replv.qb %[limit_vec], %[limit] \n\t"
+ : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec)
+ : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit)
+ );
+
+ vp8_loop_filter_horizontal_edge_mips(y_ptr + 4 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
+ vp8_loop_filter_horizontal_edge_mips(y_ptr + 8 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
+ vp8_loop_filter_horizontal_edge_mips(y_ptr + 12 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
+
+ if (u_ptr)
+ vp8_loop_filter_uvhorizontal_edge_mips(u_ptr + 4 * uv_stride, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
+
+ if (v_ptr)
+ vp8_loop_filter_uvhorizontal_edge_mips(v_ptr + 4 * uv_stride, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
+}
+
+
+/* Vertical B Filtering */
+void vp8_loop_filter_bv_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride, loop_filter_info *lfi)
+{
+ unsigned int thresh_vec, flimit_vec, limit_vec;
+ unsigned char thresh, flimit, limit, flimit_temp;
+
+ /* use direct value instead pointers */
+ limit = *(lfi->lim);
+ flimit_temp = *(lfi->blim);
+ thresh = *(lfi->hev_thr);
+ flimit = flimit_temp;
+
+ /* create quad-byte */
+ __asm__ __volatile__ (
+ "replv.qb %[thresh_vec], %[thresh] \n\t"
+ "replv.qb %[flimit_vec], %[flimit] \n\t"
+ "replv.qb %[limit_vec], %[limit] \n\t"
+ : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec)
+ : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit)
+ );
+
+ vp8_loop_filter_vertical_edge_mips(y_ptr + 4, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
+ vp8_loop_filter_vertical_edge_mips(y_ptr + 8, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
+ vp8_loop_filter_vertical_edge_mips(y_ptr + 12, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
+
+ if (u_ptr)
+ vp8_loop_filter_uvvertical_edge_mips(u_ptr + 4, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
+
+ if (v_ptr)
+ vp8_loop_filter_uvvertical_edge_mips(v_ptr + 4, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
+}
+
+#endif
diff --git a/vp8/common/mips/dspr2/reconinter_dspr2.c b/vp8/common/mips/dspr2/reconinter_dspr2.c
new file mode 100644
index 0000000..a5239a3
--- /dev/null
+++ b/vp8/common/mips/dspr2/reconinter_dspr2.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vpx_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+#if HAVE_DSPR2
+inline void prefetch_load_int(unsigned char *src)
+{
+ __asm__ __volatile__ (
+ "pref 0, 0(%[src]) \n\t"
+ :
+ : [src] "r" (src)
+ );
+}
+
+
+__inline void vp8_copy_mem16x16_dspr2(
+ unsigned char *RESTRICT src,
+ int src_stride,
+ unsigned char *RESTRICT dst,
+ int dst_stride)
+{
+ int r;
+ unsigned int a0, a1, a2, a3;
+
+ for (r = 16; r--;)
+ {
+ /* load src data in cache memory */
+ prefetch_load_int(src + src_stride);
+
+ /* use unaligned memory load and store */
+ __asm__ __volatile__ (
+ "ulw %[a0], 0(%[src]) \n\t"
+ "ulw %[a1], 4(%[src]) \n\t"
+ "ulw %[a2], 8(%[src]) \n\t"
+ "ulw %[a3], 12(%[src]) \n\t"
+ "sw %[a0], 0(%[dst]) \n\t"
+ "sw %[a1], 4(%[dst]) \n\t"
+ "sw %[a2], 8(%[dst]) \n\t"
+ "sw %[a3], 12(%[dst]) \n\t"
+ : [a0] "=&r" (a0), [a1] "=&r" (a1),
+ [a2] "=&r" (a2), [a3] "=&r" (a3)
+ : [src] "r" (src), [dst] "r" (dst)
+ );
+
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+
+__inline void vp8_copy_mem8x8_dspr2(
+ unsigned char *RESTRICT src,
+ int src_stride,
+ unsigned char *RESTRICT dst,
+ int dst_stride)
+{
+ int r;
+ unsigned int a0, a1;
+
+ /* load src data in cache memory */
+ prefetch_load_int(src + src_stride);
+
+ for (r = 8; r--;)
+ {
+ /* use unaligned memory load and store */
+ __asm__ __volatile__ (
+ "ulw %[a0], 0(%[src]) \n\t"
+ "ulw %[a1], 4(%[src]) \n\t"
+ "sw %[a0], 0(%[dst]) \n\t"
+ "sw %[a1], 4(%[dst]) \n\t"
+ : [a0] "=&r" (a0), [a1] "=&r" (a1)
+ : [src] "r" (src), [dst] "r" (dst)
+ );
+
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+
+__inline void vp8_copy_mem8x4_dspr2(
+ unsigned char *RESTRICT src,
+ int src_stride,
+ unsigned char *RESTRICT dst,
+ int dst_stride)
+{
+ int r;
+ unsigned int a0, a1;
+
+ /* load src data in cache memory */
+ prefetch_load_int(src + src_stride);
+
+ for (r = 4; r--;)
+ {
+ /* use unaligned memory load and store */
+ __asm__ __volatile__ (
+ "ulw %[a0], 0(%[src]) \n\t"
+ "ulw %[a1], 4(%[src]) \n\t"
+ "sw %[a0], 0(%[dst]) \n\t"
+ "sw %[a1], 4(%[dst]) \n\t"
+ : [a0] "=&r" (a0), [a1] "=&r" (a1)
+ : [src] "r" (src), [dst] "r" (dst)
+ );
+
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+#endif
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index 2e282f6..766b4ea 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -39,14 +39,6 @@ extern "C"
typedef enum
{
- VP8_LAST_FLAG = 1,
- VP8_GOLD_FLAG = 2,
- VP8_ALT_FLAG = 4
- } VP8_REFFRAME;
-
-
- typedef enum
- {
USAGE_STREAM_FROM_SERVER = 0x0,
USAGE_LOCAL_FILE_PLAYBACK = 0x1,
USAGE_CONSTRAINED_QUALITY = 0x2
@@ -102,83 +94,101 @@ extern "C"
typedef struct
{
- int Version; // 4 versions of bitstream defined 0 best quality/slowest decode, 3 lowest quality/fastest decode
- int Width; // width of data passed to the compressor
- int Height; // height of data passed to the compressor
+ /* 4 versions of bitstream defined:
+ * 0 best quality/slowest decode, 3 lowest quality/fastest decode
+ */
+ int Version;
+ int Width;
+ int Height;
struct vpx_rational timebase;
- int target_bandwidth; // bandwidth to be used in kilobits per second
+ unsigned int target_bandwidth; /* kilobits per second */
+
+ /* parameter used for applying pre processing blur: recommendation 0 */
+ int noise_sensitivity;
- int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0
- int Sharpness; // parameter used for sharpening output: recommendation 0:
+ /* parameter used for sharpening output: recommendation 0: */
+ int Sharpness;
int cpu_used;
unsigned int rc_max_intra_bitrate_pct;
- // mode ->
- //(0)=Realtime/Live Encoding. This mode is optimized for realtim encoding (for example, capturing
- // a television signal or feed from a live camera). ( speed setting controls how fast )
- //(1)=Good Quality Fast Encoding. The encoder balances quality with the amount of time it takes to
- // encode the output. ( speed setting controls how fast )
- //(2)=One Pass - Best Quality. The encoder places priority on the quality of the output over encoding
- // speed. The output is compressed at the highest possible quality. This option takes the longest
- // amount of time to encode. ( speed setting ignored )
- //(3)=Two Pass - First Pass. The encoder generates a file of statistics for use in the second encoding
- // pass. ( speed setting controls how fast )
- //(4)=Two Pass - Second Pass. The encoder uses the statistics that were generated in the first encoding
- // pass to create the compressed output. ( speed setting controls how fast )
- //(5)=Two Pass - Second Pass Best. The encoder uses the statistics that were generated in the first
- // encoding pass to create the compressed output using the highest possible quality, and taking a
- // longer amount of time to encode.. ( speed setting ignored )
- int Mode; //
-
- // Key Framing Operations
- int auto_key; // automatically detect cut scenes and set the keyframes
- int key_freq; // maximum distance to key frame.
-
- int allow_lag; // allow lagged compression (if 0 lagin frames is ignored)
- int lag_in_frames; // how many frames lag before we start encoding
-
- //----------------------------------------------------------------
- // DATARATE CONTROL OPTIONS
-
- int end_usage; // vbr or cbr
-
- // buffer targeting aggressiveness
+ /* mode ->
+ *(0)=Realtime/Live Encoding. This mode is optimized for realtim
+ * encoding (for example, capturing a television signal or feed
+ * from a live camera). ( speed setting controls how fast )
+ *(1)=Good Quality Fast Encoding. The encoder balances quality with
+ * the amount of time it takes to encode the output. ( speed
+ * setting controls how fast )
+ *(2)=One Pass - Best Quality. The encoder places priority on the
+ * quality of the output over encoding speed. The output is
+ * compressed at the highest possible quality. This option takes
+ * the longest amount of time to encode. ( speed setting ignored
+ * )
+ *(3)=Two Pass - First Pass. The encoder generates a file of
+ * statistics for use in the second encoding pass. ( speed
+ * setting controls how fast )
+ *(4)=Two Pass - Second Pass. The encoder uses the statistics that
+ * were generated in the first encoding pass to create the
+ * compressed output. ( speed setting controls how fast )
+ *(5)=Two Pass - Second Pass Best. The encoder uses the statistics
+ * that were generated in the first encoding pass to create the
+ * compressed output using the highest possible quality, and
+ * taking a longer amount of time to encode.. ( speed setting
+ * ignored )
+ */
+ int Mode;
+
+ /* Key Framing Operations */
+ int auto_key; /* automatically detect cut scenes */
+ int key_freq; /* maximum distance to key frame. */
+
+ /* lagged compression (if allow_lag == 0 lag_in_frames is ignored) */
+ int allow_lag;
+ int lag_in_frames; /* how many frames lag before we start encoding */
+
+ /*
+ * DATARATE CONTROL OPTIONS
+ */
+
+ int end_usage; /* vbr or cbr */
+
+ /* buffer targeting aggressiveness */
int under_shoot_pct;
int over_shoot_pct;
- // buffering parameters
- int64_t starting_buffer_level; // in bytes
+ /* buffering parameters */
+ int64_t starting_buffer_level;
int64_t optimal_buffer_level;
int64_t maximum_buffer_size;
- int64_t starting_buffer_level_in_ms; // in milli-seconds
+ int64_t starting_buffer_level_in_ms;
int64_t optimal_buffer_level_in_ms;
int64_t maximum_buffer_size_in_ms;
- // controlling quality
+ /* controlling quality */
int fixed_q;
int worst_allowed_q;
int best_allowed_q;
int cq_level;
- // allow internal resizing ( currently disabled in the build !!!!!)
+ /* allow internal resizing */
int allow_spatial_resampling;
int resample_down_water_mark;
int resample_up_water_mark;
- // allow internal frame rate alterations
+ /* allow internal frame rate alterations */
int allow_df;
int drop_frames_water_mark;
- // two pass datarate control
- int two_pass_vbrbias; // two pass datarate control tweaks
+ /* two pass datarate control */
+ int two_pass_vbrbias;
int two_pass_vbrmin_section;
int two_pass_vbrmax_section;
- // END DATARATE CONTROL OPTIONS
- //----------------------------------------------------------------
+ /*
+ * END DATARATE CONTROL OPTIONS
+ */
- // these parameters aren't to be used in final build don't use!!!
+ /* these parameters aren't to be used in final build don't use!!! */
int play_alternate;
int alt_freq;
int alt_q;
@@ -186,26 +196,28 @@ extern "C"
int gold_q;
- int multi_threaded; // how many threads to run the encoder on
- int token_partitions; // how many token partitions to create for multi core decoding
- int encode_breakout; // early breakout encode threshold : for video conf recommend 800
+ int multi_threaded; /* how many threads to run the encoder on */
+ int token_partitions; /* how many token partitions to create */
+
+ /* early breakout threshold: for video conf recommend 800 */
+ int encode_breakout;
- unsigned int error_resilient_mode; // Bitfield defining the error
- // resiliency features to enable. Can provide
- // decodable frames after losses in previous
- // frames and decodable partitions after
- // losses in the same frame.
+ /* Bitfield defining the error resiliency features to enable.
+ * Can provide decodable frames after losses in previous
+ * frames and decodable partitions after losses in the same frame.
+ */
+ unsigned int error_resilient_mode;
int arnr_max_frames;
- int arnr_strength ;
- int arnr_type ;
+ int arnr_strength;
+ int arnr_type;
- struct vpx_fixed_buf two_pass_stats_in;
+ struct vpx_fixed_buf two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
vp8e_tuning tuning;
- // Temporal scaling parameters
+ /* Temporal scaling parameters */
unsigned int number_of_layers;
unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY];
unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY];
@@ -236,16 +248,14 @@ extern "C"
void vp8_init_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
void vp8_change_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
-// receive a frames worth of data caller can assume that a copy of this frame is made
-// and not just a copy of the pointer..
int vp8_receive_raw_frame(struct VP8_COMP* comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp);
int vp8_get_compressed_data(struct VP8_COMP* comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush);
int vp8_get_preview_raw_frame(struct VP8_COMP* comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
int vp8_use_as_reference(struct VP8_COMP* comp, int ref_frame_flags);
int vp8_update_reference(struct VP8_COMP* comp, int ref_frame_flags);
- int vp8_get_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
- int vp8_set_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+ int vp8_get_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+ int vp8_set_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
int vp8_update_entropy(struct VP8_COMP* comp, int update);
int vp8_set_roimap(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]);
int vp8_set_active_map(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols);
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index c3215c0..5325bac 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -42,7 +42,6 @@ typedef struct frame_contexts
vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
MV_CONTEXT mvc[2];
- MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */
} FRAME_CONTEXT;
typedef enum
@@ -59,12 +58,6 @@ typedef enum
RECON_CLAMP_NOTREQUIRED = 1
} CLAMP_TYPE;
-typedef enum
-{
- SIXTAP = 0,
- BILINEAR = 1
-} INTERPOLATIONFILTERTYPE;
-
typedef struct VP8Common
{
@@ -94,6 +87,7 @@ typedef struct VP8Common
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG post_proc_buffer_int;
int post_proc_buffer_int_used;
+ unsigned char *pp_limits_buffer; /* post-processing filter coefficients */
#endif
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
@@ -114,7 +108,6 @@ typedef struct VP8Common
int full_pixel;
int base_qindex;
- int last_kf_gf_q; /* Q used on the last GF or KF */
int y1dc_delta_q;
int y2dc_delta_q;
@@ -130,11 +123,11 @@ typedef struct VP8Common
MODE_INFO *mip; /* Base of allocated array */
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
+#if CONFIG_ERROR_CONCEALMENT
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
+#endif
-
- INTERPOLATIONFILTERTYPE mcomp_filter_type;
LOOPFILTERTYPE filter_type;
loop_filter_info_n lf_info;
@@ -158,14 +151,6 @@ typedef struct VP8Common
ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */
ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */
-
- /* keyframe block modes are predicted by their above, left neighbors */
-
- vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1];
- vp8_prob kf_ymode_prob [VP8_YMODES-1]; /* keyframe "" */
- vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1];
-
-
FRAME_CONTEXT lfc; /* last frame entropy */
FRAME_CONTEXT fc; /* this frame entropy */
diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h
index 35a8b6e..fd7e051 100644
--- a/vp8/common/onyxd.h
+++ b/vp8/common/onyxd.h
@@ -22,6 +22,7 @@ extern "C"
#include "ppflags.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_codec.h"
+#include "vpx/vp8.h"
struct VP8D_COMP;
@@ -35,12 +36,6 @@ extern "C"
int error_concealment;
int input_fragments;
} VP8D_CONFIG;
- typedef enum
- {
- VP8_LAST_FLAG = 1,
- VP8_GOLD_FLAG = 2,
- VP8_ALT_FLAG = 4
- } VP8_REFFRAME;
typedef enum
{
@@ -53,11 +48,13 @@ extern "C"
int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst);
- int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, unsigned long size, const unsigned char *dest, int64_t time_stamp);
+ int vp8dx_receive_compressed_data(struct VP8D_COMP* comp,
+ size_t size, const uint8_t *dest,
+ int64_t time_stamp);
int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
- vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
- vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+ vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+ vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
struct VP8D_COMP* vp8dx_create_decompressor(VP8D_CONFIG *oxcf);
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index ccf6ad7..80fa530 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -127,27 +127,24 @@ extern void vp8_blit_text(const char *msg, unsigned char *address, const int pit
extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch);
/***********************************************************************************************************
*/
-void vp8_post_proc_down_and_across_c
+void vp8_post_proc_down_and_across_mb_row_c
(
unsigned char *src_ptr,
unsigned char *dst_ptr,
int src_pixels_per_line,
int dst_pixels_per_line,
- int rows,
int cols,
- int flimit
+ unsigned char *f,
+ int size
)
{
unsigned char *p_src, *p_dst;
int row;
int col;
- int i;
- int v;
- int pitch = src_pixels_per_line;
- unsigned char d[8];
- (void)dst_pixels_per_line;
+ unsigned char v;
+ unsigned char d[4];
- for (row = 0; row < rows; row++)
+ for (row = 0; row < size; row++)
{
/* post_proc_down for one row */
p_src = src_ptr;
@@ -155,20 +152,23 @@ void vp8_post_proc_down_and_across_c
for (col = 0; col < cols; col++)
{
+ unsigned char p_above2 = p_src[col - 2 * src_pixels_per_line];
+ unsigned char p_above1 = p_src[col - src_pixels_per_line];
+ unsigned char p_below1 = p_src[col + src_pixels_per_line];
+ unsigned char p_below2 = p_src[col + 2 * src_pixels_per_line];
- int kernel = 4;
- int v = p_src[col];
+ v = p_src[col];
- for (i = -2; i <= 2; i++)
+ if ((abs(v - p_above2) < f[col]) && (abs(v - p_above1) < f[col])
+ && (abs(v - p_below1) < f[col]) && (abs(v - p_below2) < f[col]))
{
- if (abs(v - p_src[col+i*pitch]) > flimit)
- goto down_skip_convolve;
-
- kernel += kernel5[2+i] * p_src[col+i*pitch];
+ unsigned char k1, k2, k3;
+ k1 = (p_above2 + p_above1 + 1) >> 1;
+ k2 = (p_below2 + p_below1 + 1) >> 1;
+ k3 = (k1 + k2 + 1) >> 1;
+ v = (k3 + v + 1) >> 1;
}
- v = (kernel >> 3);
- down_skip_convolve:
p_dst[col] = v;
}
@@ -176,45 +176,38 @@ void vp8_post_proc_down_and_across_c
p_src = dst_ptr;
p_dst = dst_ptr;
- for (i = -8; i<0; i++)
- p_src[i]=p_src[0];
-
- for (i = cols; i<cols+8; i++)
- p_src[i]=p_src[cols-1];
-
- for (i = 0; i < 8; i++)
- d[i] = p_src[i];
+ p_src[-2] = p_src[-1] = p_src[0];
+ p_src[cols] = p_src[cols + 1] = p_src[cols - 1];
for (col = 0; col < cols; col++)
{
- int kernel = 4;
v = p_src[col];
- d[col&7] = v;
-
- for (i = -2; i <= 2; i++)
+ if ((abs(v - p_src[col - 2]) < f[col])
+ && (abs(v - p_src[col - 1]) < f[col])
+ && (abs(v - p_src[col + 1]) < f[col])
+ && (abs(v - p_src[col + 2]) < f[col]))
{
- if (abs(v - p_src[col+i]) > flimit)
- goto across_skip_convolve;
-
- kernel += kernel5[2+i] * p_src[col+i];
+ unsigned char k1, k2, k3;
+ k1 = (p_src[col - 2] + p_src[col - 1] + 1) >> 1;
+ k2 = (p_src[col + 2] + p_src[col + 1] + 1) >> 1;
+ k3 = (k1 + k2 + 1) >> 1;
+ v = (k3 + v + 1) >> 1;
}
- d[col&7] = (kernel >> 3);
- across_skip_convolve:
+ d[col & 3] = v;
if (col >= 2)
- p_dst[col-2] = d[(col-2)&7];
+ p_dst[col - 2] = d[(col - 2) & 3];
}
/* handle the last two pixels */
- p_dst[col-2] = d[(col-2)&7];
- p_dst[col-1] = d[(col-1)&7];
-
+ p_dst[col - 2] = d[(col - 2) & 3];
+ p_dst[col - 1] = d[(col - 1) & 3];
/* next row */
- src_ptr += pitch;
- dst_ptr += pitch;
+ src_ptr += src_pixels_per_line;
+ dst_ptr += dst_pixels_per_line;
}
}
@@ -240,8 +233,9 @@ void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int co
for (i = -8; i<0; i++)
s[i]=s[0];
- // 17 avoids valgrind warning - we buffer values in c in d
- // and only write them when we've read 8 ahead...
+ /* 17 avoids valgrind warning - we buffer values in c in d
+ * and only write them when we've read 8 ahead...
+ */
for (i = cols; i<cols+17; i++)
s[i]=s[cols-1];
@@ -275,9 +269,6 @@ void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int co
}
-
-
-
void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, int flimit)
{
int r, c, i;
@@ -294,8 +285,9 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, i
for (i = -8; i < 0; i++)
s[i*pitch]=s[0];
- // 17 avoids valgrind warning - we buffer values in c in d
- // and only write them when we've read 8 ahead...
+ /* 17 avoids valgrind warning - we buffer values in c in d
+ * and only write them when we've read 8 ahead...
+ */
for (i = rows; i < rows+17; i++)
s[i*pitch]=s[(rows-1)*pitch];
@@ -322,28 +314,17 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, i
}
}
-
-static void vp8_deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *post,
- int q,
- int low_var_thresh,
- int flag)
+static void vp8_de_mblock(YV12_BUFFER_CONFIG *post,
+ int q)
{
- double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
- int ppl = (int)(level + .5);
- (void) low_var_thresh;
- (void) flag;
-
- vp8_post_proc_down_and_across(source->y_buffer, post->y_buffer, source->y_stride, post->y_stride, source->y_height, source->y_width, ppl);
- vp8_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
- vp8_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
-
- vp8_post_proc_down_and_across(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
- vp8_post_proc_down_and_across(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
-
+ vp8_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height,
+ post->y_width, q2mbl(q));
+ vp8_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height,
+ post->y_width, q2mbl(q));
}
-void vp8_deblock(YV12_BUFFER_CONFIG *source,
+void vp8_deblock(VP8_COMMON *cm,
+ YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *post,
int q,
int low_var_thresh,
@@ -351,16 +332,64 @@ void vp8_deblock(YV12_BUFFER_CONFIG *source,
{
double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
int ppl = (int)(level + .5);
+
+ const MODE_INFO *mode_info_context = cm->mi;
+ int mbr, mbc;
+
+ /* The pixel thresholds are adjusted according to if or not the macroblock
+ * is a skipped block. */
+ unsigned char *ylimits = cm->pp_limits_buffer;
+ unsigned char *uvlimits = cm->pp_limits_buffer + 16 * cm->mb_cols;
(void) low_var_thresh;
(void) flag;
- vp8_post_proc_down_and_across(source->y_buffer, post->y_buffer, source->y_stride, post->y_stride, source->y_height, source->y_width, ppl);
- vp8_post_proc_down_and_across(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
- vp8_post_proc_down_and_across(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
+ if (ppl > 0)
+ {
+ for (mbr = 0; mbr < cm->mb_rows; mbr++)
+ {
+ unsigned char *ylptr = ylimits;
+ unsigned char *uvlptr = uvlimits;
+ for (mbc = 0; mbc < cm->mb_cols; mbc++)
+ {
+ unsigned char mb_ppl;
+
+ if (mode_info_context->mbmi.mb_skip_coeff)
+ mb_ppl = (unsigned char)ppl >> 1;
+ else
+ mb_ppl = (unsigned char)ppl;
+
+ vpx_memset(ylptr, mb_ppl, 16);
+ vpx_memset(uvlptr, mb_ppl, 8);
+
+ ylptr += 16;
+ uvlptr += 8;
+ mode_info_context++;
+ }
+ mode_info_context++;
+
+ vp8_post_proc_down_and_across_mb_row(
+ source->y_buffer + 16 * mbr * source->y_stride,
+ post->y_buffer + 16 * mbr * post->y_stride, source->y_stride,
+ post->y_stride, source->y_width, ylimits, 16);
+
+ vp8_post_proc_down_and_across_mb_row(
+ source->u_buffer + 8 * mbr * source->uv_stride,
+ post->u_buffer + 8 * mbr * post->uv_stride, source->uv_stride,
+ post->uv_stride, source->uv_width, uvlimits, 8);
+ vp8_post_proc_down_and_across_mb_row(
+ source->v_buffer + 8 * mbr * source->uv_stride,
+ post->v_buffer + 8 * mbr * post->uv_stride, source->uv_stride,
+ post->uv_stride, source->uv_width, uvlimits, 8);
+ }
+ } else
+ {
+ vp8_yv12_copy_frame(source, post);
+ }
}
#if !(CONFIG_TEMPORAL_DENOISING)
-void vp8_de_noise(YV12_BUFFER_CONFIG *source,
+void vp8_de_noise(VP8_COMMON *cm,
+ YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *post,
int q,
int low_var_thresh,
@@ -368,33 +397,33 @@ void vp8_de_noise(YV12_BUFFER_CONFIG *source,
{
double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
int ppl = (int)(level + .5);
+ int mb_rows = source->y_width >> 4;
+ int mb_cols = source->y_height >> 4;
+ unsigned char *limits = cm->pp_limits_buffer;;
+ int mbr, mbc;
(void) post;
(void) low_var_thresh;
(void) flag;
- vp8_post_proc_down_and_across(
- source->y_buffer + 2 * source->y_stride + 2,
- source->y_buffer + 2 * source->y_stride + 2,
- source->y_stride,
- source->y_stride,
- source->y_height - 4,
- source->y_width - 4,
- ppl);
- vp8_post_proc_down_and_across(
- source->u_buffer + 2 * source->uv_stride + 2,
- source->u_buffer + 2 * source->uv_stride + 2,
- source->uv_stride,
- source->uv_stride,
- source->uv_height - 4,
- source->uv_width - 4, ppl);
- vp8_post_proc_down_and_across(
- source->v_buffer + 2 * source->uv_stride + 2,
- source->v_buffer + 2 * source->uv_stride + 2,
- source->uv_stride,
- source->uv_stride,
- source->uv_height - 4,
- source->uv_width - 4, ppl);
+ vpx_memset(limits, (unsigned char)ppl, 16 * mb_cols);
+ /* TODO: The original code don't filter the 2 outer rows and columns. */
+ for (mbr = 0; mbr < mb_rows; mbr++)
+ {
+ vp8_post_proc_down_and_across_mb_row(
+ source->y_buffer + 16 * mbr * source->y_stride,
+ source->y_buffer + 16 * mbr * source->y_stride,
+ source->y_stride, source->y_stride, source->y_width, limits, 16);
+
+ vp8_post_proc_down_and_across_mb_row(
+ source->u_buffer + 8 * mbr * source->uv_stride,
+ source->u_buffer + 8 * mbr * source->uv_stride,
+ source->uv_stride, source->uv_stride, source->uv_width, limits, 8);
+ vp8_post_proc_down_and_across_mb_row(
+ source->v_buffer + 8 * mbr * source->uv_stride,
+ source->v_buffer + 8 * mbr * source->uv_stride,
+ source->uv_stride, source->uv_stride, source->uv_width, limits, 8);
+ }
}
#endif
@@ -441,7 +470,7 @@ static void fillrd(struct postproc_state *state, int q, int a)
}
- for (next = next; next < 256; next++)
+ for (; next < 256; next++)
char_dist[next] = 0;
}
@@ -731,21 +760,21 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
oci->post_proc_buffer_int_used = 1;
- // insure that postproc is set to all 0's so that post proc
- // doesn't pull random data in from edge
+ /* insure that postproc is set to all 0's so that post proc
+ * doesn't pull random data in from edge
+ */
vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size);
}
}
-#if ARCH_X86||ARCH_X86_64
- vpx_reset_mmx_state();
-#endif
+ vp8_clear_system_state();
if ((flags & VP8D_MFQE) &&
oci->postproc_state.last_frame_valid &&
oci->current_video_frame >= 2 &&
- oci->base_qindex - oci->postproc_state.last_base_qindex >= 10)
+ oci->postproc_state.last_base_qindex < 60 &&
+ oci->base_qindex - oci->postproc_state.last_base_qindex >= 20)
{
vp8_multiframe_quality_enhance(oci);
if (((flags & VP8D_DEBLOCK) || (flags & VP8D_DEMACROBLOCK)) &&
@@ -754,12 +783,14 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
vp8_yv12_copy_frame(&oci->post_proc_buffer, &oci->post_proc_buffer_int);
if (flags & VP8D_DEMACROBLOCK)
{
- vp8_deblock_and_de_macro_block(&oci->post_proc_buffer_int, &oci->post_proc_buffer,
+ vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer,
q + (deblock_level - 5) * 10, 1, 0);
+ vp8_de_mblock(&oci->post_proc_buffer,
+ q + (deblock_level - 5) * 10);
}
else if (flags & VP8D_DEBLOCK)
{
- vp8_deblock(&oci->post_proc_buffer_int, &oci->post_proc_buffer,
+ vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer,
q, 1, 0);
}
}
@@ -768,13 +799,15 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
}
else if (flags & VP8D_DEMACROBLOCK)
{
- vp8_deblock_and_de_macro_block(oci->frame_to_show, &oci->post_proc_buffer,
- q + (deblock_level - 5) * 10, 1, 0);
+ vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer,
+ q + (deblock_level - 5) * 10, 1, 0);
+ vp8_de_mblock(&oci->post_proc_buffer, q + (deblock_level - 5) * 10);
+
oci->postproc_state.last_base_qindex = oci->base_qindex;
}
else if (flags & VP8D_DEBLOCK)
{
- vp8_deblock(oci->frame_to_show, &oci->post_proc_buffer,
+ vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer,
q, 1, 0);
oci->postproc_state.last_base_qindex = oci->base_qindex;
}
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index 6ac788c..495a2c9 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -30,13 +30,15 @@ int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
vp8_ppflags_t *flags);
-void vp8_de_noise(YV12_BUFFER_CONFIG *source,
+void vp8_de_noise(struct VP8Common *oci,
+ YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *post,
int q,
int low_var_thresh,
int flag);
-void vp8_deblock(YV12_BUFFER_CONFIG *source,
+void vp8_deblock(struct VP8Common *oci,
+ YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *post,
int q,
int low_var_thresh,
diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c
index 7046a63..87f4cac 100644
--- a/vp8/common/ppc/systemdependent.c
+++ b/vp8/common/ppc/systemdependent.c
@@ -19,14 +19,14 @@ void (*vp8_short_idct4x4)(short *input, short *output, int pitch);
void (*vp8_short_idct4x4_1)(short *input, short *output, int pitch);
void (*vp8_dc_only_idct)(short input_dc, short *output, int pitch);
-extern void (*vp8_post_proc_down_and_across)(
+extern void (*vp8_post_proc_down_and_across_mb_row)(
unsigned char *src_ptr,
unsigned char *dst_ptr,
int src_pixels_per_line,
int dst_pixels_per_line,
- int rows,
int cols,
- int flimit
+ unsigned char *f,
+ int size
);
extern void (*vp8_mbpost_proc_down)(unsigned char *dst, int pitch, int rows, int cols, int flimit);
@@ -34,15 +34,15 @@ extern void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int
extern void (*vp8_mbpost_proc_across_ip)(unsigned char *src, int pitch, int rows, int cols, int flimit);
extern void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int cols, int flimit);
-extern void vp8_post_proc_down_and_across_c
+extern void vp8_post_proc_down_and_across_mb_row_c
(
unsigned char *src_ptr,
unsigned char *dst_ptr,
int src_pixels_per_line,
int dst_pixels_per_line,
- int rows,
int cols,
- int flimit
+ unsigned char *f,
+ int size
);
void vp8_plane_add_noise_c(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int q, int a);
@@ -158,7 +158,7 @@ void vp8_machine_specific_config(void)
vp8_lf_mbhsimple = loop_filter_mbhs_ppc;
vp8_lf_bhsimple = loop_filter_bhs_ppc;
- vp8_post_proc_down_and_across = vp8_post_proc_down_and_across_c;
+ vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_c;
vp8_mbpost_proc_down = vp8_mbpost_proc_down_c;
vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_c;
vp8_plane_add_noise = vp8_plane_add_noise_c;
diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c
index e9833fe..05f9210 100644
--- a/vp8/common/quant_common.c
+++ b/vp8/common/quant_common.c
@@ -109,7 +109,10 @@ int vp8_ac2quant(int QIndex, int Delta)
else if (QIndex < 0)
QIndex = 0;
- retval = (ac_qlookup[ QIndex ] * 155) / 100;
+ /* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
+ * The smallest precision for that is '(x*6349) >> 12' but 16 is a good
+ * word size. */
+ retval = (ac_qlookup[ QIndex ] * 101581) >> 16;
if (retval < 8)
retval = 8;
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index dcc35ec..7bb8d0a 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -13,11 +13,11 @@
#include "vpx_rtcd.h"
#include "blockd.h"
-void vp8_intra4x4_predict_d_c(unsigned char *Above,
- unsigned char *yleft, int left_stride,
- int b_mode,
- unsigned char *dst, int dst_stride,
- unsigned char top_left)
+void vp8_intra4x4_predict_c(unsigned char *Above,
+ unsigned char *yleft, int left_stride,
+ B_PREDICTION_MODE b_mode,
+ unsigned char *dst, int dst_stride,
+ unsigned char top_left)
{
int i, r, c;
@@ -290,19 +290,8 @@ void vp8_intra4x4_predict_d_c(unsigned char *Above,
}
break;
+ default:
+ break;
}
}
-
-void vp8_intra4x4_predict_c(unsigned char *src, int src_stride,
- int b_mode,
- unsigned char *dst, int dst_stride)
-{
- unsigned char *Above = src - src_stride;
-
- vp8_intra4x4_predict_d_c(Above,
- src - 1, src_stride,
- b_mode,
- dst, dst_stride,
- Above[-1]);
-}
diff --git a/vp8/common/rtcd.c b/vp8/common/rtcd.c
index 232640d..01dad46 100644
--- a/vp8/common/rtcd.c
+++ b/vp8/common/rtcd.c
@@ -10,3 +10,96 @@
#include "vpx_config.h"
#define RTCD_C
#include "vpx_rtcd.h"
+
+#if CONFIG_MULTITHREAD && defined(_WIN32)
+#include <windows.h>
+#include <stdlib.h>
+static void once(void (*func)(void))
+{
+ static CRITICAL_SECTION *lock;
+ static LONG waiters;
+ static int done;
+ void *lock_ptr = &lock;
+
+ /* If the initialization is complete, return early. This isn't just an
+ * optimization, it prevents races on the destruction of the global
+ * lock.
+ */
+ if(done)
+ return;
+
+ InterlockedIncrement(&waiters);
+
+ /* Get a lock. We create one and try to make it the one-true-lock,
+ * throwing it away if we lost the race.
+ */
+
+ {
+ /* Scope to protect access to new_lock */
+ CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION));
+ InitializeCriticalSection(new_lock);
+ if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL)
+ {
+ DeleteCriticalSection(new_lock);
+ free(new_lock);
+ }
+ }
+
+ /* At this point, we have a lock that can be synchronized on. We don't
+ * care which thread actually performed the allocation.
+ */
+
+ EnterCriticalSection(lock);
+
+ if (!done)
+ {
+ func();
+ done = 1;
+ }
+
+ LeaveCriticalSection(lock);
+
+ /* Last one out should free resources. The destructed objects are
+ * protected by checking if(done) above.
+ */
+ if(!InterlockedDecrement(&waiters))
+ {
+ DeleteCriticalSection(lock);
+ free(lock);
+ lock = NULL;
+ }
+}
+
+
+#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H
+#include <pthread.h>
+static void once(void (*func)(void))
+{
+ static pthread_once_t lock = PTHREAD_ONCE_INIT;
+ pthread_once(&lock, func);
+}
+
+
+#else
+/* No-op version that performs no synchronization. vpx_rtcd() is idempotent,
+ * so as long as your platform provides atomic loads/stores of pointers
+ * no synchronization is strictly necessary.
+ */
+
+static void once(void (*func)(void))
+{
+ static int done;
+
+ if(!done)
+ {
+ func();
+ done = 1;
+ }
+}
+#endif
+
+
+void vpx_rtcd()
+{
+ once(setup_rtcd_internal);
+}
diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh
index 33bf08b..0f950f8 100644
--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@@ -1,5 +1,7 @@
common_forward_decls() {
cat <<EOF
+#include "vp8/common/blockd.h"
+
struct blockd;
struct macroblockd;
struct loop_filter_info;
@@ -22,35 +24,42 @@ specialize vp8_dequantize_b mmx media neon
vp8_dequantize_b_media=vp8_dequantize_b_v6
prototype void vp8_dequant_idct_add "short *input, short *dq, unsigned char *output, int stride"
-specialize vp8_dequant_idct_add mmx media neon
+specialize vp8_dequant_idct_add mmx media neon dspr2
vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6
+vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2
prototype void vp8_dequant_idct_add_y_block "short *q, short *dq, unsigned char *dst, int stride, char *eobs"
-specialize vp8_dequant_idct_add_y_block mmx sse2 media neon
+specialize vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2
vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6
+vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2
prototype void vp8_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"
-specialize vp8_dequant_idct_add_uv_block mmx sse2 media neon
+specialize vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2
vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6
+vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2
#
# Loopfilter
#
prototype void vp8_loop_filter_mbv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_mbv mmx sse2 media neon
+specialize vp8_loop_filter_mbv mmx sse2 media neon dspr2
vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6
+vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2
prototype void vp8_loop_filter_bv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bv mmx sse2 media neon
+specialize vp8_loop_filter_bv mmx sse2 media neon dspr2
vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6
+vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2
prototype void vp8_loop_filter_mbh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_mbh mmx sse2 media neon
+specialize vp8_loop_filter_mbh mmx sse2 media neon dspr2
vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6
+vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2
prototype void vp8_loop_filter_bh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bh mmx sse2 media neon
+specialize vp8_loop_filter_bh mmx sse2 media neon dspr2
vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6
+vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2
prototype void vp8_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit"
@@ -90,37 +99,45 @@ vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon
#
#idct16
prototype void vp8_short_idct4x4llm "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride"
-specialize vp8_short_idct4x4llm mmx media neon
+specialize vp8_short_idct4x4llm mmx media neon dspr2
vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual
+vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2
#iwalsh1
prototype void vp8_short_inv_walsh4x4_1 "short *input, short *output"
+specialize vp8_short_inv_walsh4x4_1 dspr2
+vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2
# no asm yet
#iwalsh16
prototype void vp8_short_inv_walsh4x4 "short *input, short *output"
-specialize vp8_short_inv_walsh4x4 mmx sse2 media neon
+specialize vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2
vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6
+vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2
#idct1_scalar_add
prototype void vp8_dc_only_idct_add "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride"
-specialize vp8_dc_only_idct_add mmx media neon
+specialize vp8_dc_only_idct_add mmx media neon dspr2
vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6
+vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2
#
# RECON
#
prototype void vp8_copy_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem16x16 mmx sse2 media neon
+specialize vp8_copy_mem16x16 mmx sse2 media neon dspr2
vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6
+vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2
prototype void vp8_copy_mem8x8 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem8x8 mmx media neon
+specialize vp8_copy_mem8x8 mmx media neon dspr2
vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6
+vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2
prototype void vp8_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem8x4 mmx media neon
+specialize vp8_copy_mem8x4 mmx media neon dspr2
vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6
+vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2
prototype void vp8_build_intra_predictors_mby_s "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride"
specialize vp8_build_intra_predictors_mby_s sse2 ssse3
@@ -129,8 +146,7 @@ specialize vp8_build_intra_predictors_mby_s sse2 ssse3
prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"
specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
-prototype void vp8_intra4x4_predict_d "unsigned char *above, unsigned char *left, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
-prototype void vp8_intra4x4_predict "unsigned char *src, int src_stride, int b_mode, unsigned char *dst, int dst_stride"
+prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, B_PREDICTION_MODE b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
specialize vp8_intra4x4_predict media
vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6
@@ -146,9 +162,8 @@ if [ "$CONFIG_POSTPROC" = "yes" ]; then
specialize vp8_mbpost_proc_across_ip sse2
vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm
- prototype void vp8_post_proc_down_and_across "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int rows, int cols, int flimit"
- specialize vp8_post_proc_down_and_across mmx sse2
- vp8_post_proc_down_and_across_sse2=vp8_post_proc_down_and_across_xmm
+ prototype void vp8_post_proc_down_and_across_mb_row "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size"
+ specialize vp8_post_proc_down_and_across_mb_row sse2
prototype void vp8_plane_add_noise "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch"
specialize vp8_plane_add_noise mmx sse2
@@ -177,20 +192,24 @@ fi
# Subpixel
#
prototype void vp8_sixtap_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon
+specialize vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2
vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6
+vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2
prototype void vp8_sixtap_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon
+specialize vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2
vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6
+vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2
prototype void vp8_sixtap_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon
+specialize vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2
vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6
+vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2
prototype void vp8_sixtap_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict4x4 mmx ssse3 media neon
+specialize vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2
vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6
+vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2
prototype void vp8_bilinear_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
specialize vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon
@@ -276,23 +295,23 @@ vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6
#
# Single block SAD
#
-prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
+prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp8_sad4x4 mmx sse2 neon
vp8_sad4x4_sse2=vp8_sad4x4_wmt
-prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
+prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp8_sad8x8 mmx sse2 neon
vp8_sad8x8_sse2=vp8_sad8x8_wmt
-prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
+prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp8_sad8x16 mmx sse2 neon
vp8_sad8x16_sse2=vp8_sad8x16_wmt
-prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
+prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp8_sad16x8 mmx sse2 neon
vp8_sad16x8_sse2=vp8_sad16x8_wmt
-prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
+prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp8_sad16x16 mmx sse2 sse3 media neon
vp8_sad16x16_sse2=vp8_sad16x16_wmt
vp8_sad16x16_media=vp8_sad16x16_armv6
@@ -300,59 +319,59 @@ vp8_sad16x16_media=vp8_sad16x16_armv6
#
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
#
-prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
specialize vp8_sad4x4x3 sse3
-prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
specialize vp8_sad8x8x3 sse3
-prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
specialize vp8_sad8x16x3 sse3
-prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
specialize vp8_sad16x8x3 sse3 ssse3
-prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
specialize vp8_sad16x16x3 sse3 ssse3
# Note the only difference in the following prototypes is that they return into
# an array of short
-prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
specialize vp8_sad4x4x8 sse4_1
vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4
-prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
specialize vp8_sad8x8x8 sse4_1
vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4
-prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
specialize vp8_sad8x16x8 sse4_1
vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4
-prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
specialize vp8_sad16x8x8 sse4_1
vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4
-prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
specialize vp8_sad16x16x8 sse4_1
vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4
#
# Multi-block SAD, comparing a reference to N independent blocks
#
-prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr[4], int ref_stride, unsigned int *sad_array"
+prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp8_sad4x4x4d sse3
-prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr[4], int ref_stride, unsigned int *sad_array"
+prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp8_sad8x8x4d sse3
-prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr[4], int ref_stride, unsigned int *sad_array"
+prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp8_sad8x16x4d sse3
-prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr[4], int ref_stride, unsigned int *sad_array"
+prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp8_sad16x8x4d sse3
-prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr[4], int ref_stride, unsigned int *sad_array"
+prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp8_sad16x16x4d sse3
#
@@ -501,6 +520,14 @@ fi
prototype void vp8_yv12_copy_partial_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
specialize vp8_yv12_copy_partial_frame neon
+#
+# Denoiser filter
+#
+if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then
+ prototype int vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"
+ specialize vp8_denoiser_filter sse2
+fi
+
# End of encoder only functions
fi
diff --git a/vp8/common/sad_c.c b/vp8/common/sad_c.c
index 6a3e889..5f36fc9 100644
--- a/vp8/common/sad_c.c
+++ b/vp8/common/sad_c.c
@@ -9,21 +9,15 @@
*/
+#include <limits.h>
#include <stdlib.h>
#include "vpx_config.h"
#include "vpx/vpx_integer.h"
-static
-unsigned int sad_mx_n_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- int max_sad,
- int m,
- int n)
+static unsigned int sad_mx_n_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned int max_sad, int m, int n)
{
-
int r, c;
unsigned int sad = 0;
@@ -48,298 +42,211 @@ unsigned int sad_mx_n_c(
* implementations of these functions are not required to check it.
*/
-unsigned int vp8_sad16x16_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- int max_sad)
+unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned int max_sad)
{
-
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 16);
}
-
-unsigned int vp8_sad8x8_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- int max_sad)
+unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned int max_sad)
{
-
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 8);
}
-
-unsigned int vp8_sad16x8_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- int max_sad)
+unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned int max_sad)
{
-
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 8);
}
-
-unsigned int vp8_sad8x16_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- int max_sad)
+unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned int max_sad)
{
-
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 16);
}
-
-unsigned int vp8_sad4x4_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- int max_sad)
+unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned int max_sad)
{
-
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 4, 4);
}
-void vp8_sad16x16x3_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- unsigned int *sad_array
-)
+void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned int *sad_array)
{
- sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
- sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
+ sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
+ sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
-void vp8_sad16x16x8_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- unsigned short *sad_array
-)
+void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned short *sad_array)
{
- sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
- sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
- sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
- sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
- sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
- sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
- sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+ sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
+ sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
+ sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
+ sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
+ sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
+ sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
+ sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
+ sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
-void vp8_sad16x8x3_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- unsigned int *sad_array
-)
+void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned int *sad_array)
{
- sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
- sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
+ sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
+ sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
-void vp8_sad16x8x8_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- unsigned short *sad_array
-)
+void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned short *sad_array)
{
- sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
- sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
- sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
- sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
- sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
- sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
- sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+ sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
+ sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
+ sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
+ sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
+ sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
+ sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
+ sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
+ sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
-void vp8_sad8x8x3_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- unsigned int *sad_array
-)
+void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned int *sad_array)
{
- sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
- sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
+ sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
+ sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
-void vp8_sad8x8x8_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- unsigned short *sad_array
-)
+void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned short *sad_array)
{
- sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
- sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
- sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
- sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
- sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
- sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
- sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+ sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
+ sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
+ sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
+ sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
+ sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
+ sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
+ sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
+ sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
-void vp8_sad8x16x3_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- unsigned int *sad_array
-)
+void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned int *sad_array)
{
- sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
- sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
+ sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
+ sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
-void vp8_sad8x16x8_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- unsigned short *sad_array
-)
+void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned short *sad_array)
{
- sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
- sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
- sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
- sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
- sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
- sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
- sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+ sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
+ sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
+ sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
+ sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
+ sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
+ sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
+ sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
+ sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
-void vp8_sad4x4x3_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- unsigned int *sad_array
-)
+void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned int *sad_array)
{
- sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
- sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
+ sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
+ sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
-void vp8_sad4x4x8_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- unsigned short *sad_array
-)
+void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned short *sad_array)
{
- sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
- sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
- sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
- sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
- sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
- sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
- sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+ sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
+ sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
+ sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
+ sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
+ sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
+ sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
+ sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
+ sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
-void vp8_sad16x16x4d_c(
- const unsigned char *src_ptr,
- int src_stride,
- unsigned char *ref_ptr[],
- int ref_stride,
- unsigned int *sad_array
-)
+void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char * const ref_ptr[], int ref_stride,
+ unsigned int *sad_array)
{
- sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
+ sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
+ sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
+ sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
-void vp8_sad16x8x4d_c(
- const unsigned char *src_ptr,
- int src_stride,
- unsigned char *ref_ptr[],
- int ref_stride,
- unsigned int *sad_array
-)
+void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char * const ref_ptr[], int ref_stride,
+ unsigned int *sad_array)
{
- sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
+ sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
+ sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
+ sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
-void vp8_sad8x8x4d_c(
- const unsigned char *src_ptr,
- int src_stride,
- unsigned char *ref_ptr[],
- int ref_stride,
- unsigned int *sad_array
-)
+void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char * const ref_ptr[], int ref_stride,
+ unsigned int *sad_array)
{
- sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
+ sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
+ sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
+ sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
-void vp8_sad8x16x4d_c(
- const unsigned char *src_ptr,
- int src_stride,
- unsigned char *ref_ptr[],
- int ref_stride,
- unsigned int *sad_array
-)
+void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char * const ref_ptr[], int ref_stride,
+ unsigned int *sad_array)
{
- sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
+ sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
+ sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
+ sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
-void vp8_sad4x4x4d_c(
- const unsigned char *src_ptr,
- int src_stride,
- unsigned char *ref_ptr[],
- int ref_stride,
- unsigned int *sad_array
-)
+void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride,
+ const unsigned char * const ref_ptr[], int ref_stride,
+ unsigned int *sad_array)
{
- sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
+ sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
+ sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
+ sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
/* Copy 2 macroblocks to a buffer */
-void vp8_copy32xn_c(
- unsigned char *src_ptr,
- int src_stride,
- unsigned char *dst_ptr,
- int dst_stride,
- int height)
+void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride,
+ unsigned char *dst_ptr, int dst_stride,
+ int height)
{
int r;
diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c
index 7976e25..60afe51 100644
--- a/vp8/common/setupintrarecon.c
+++ b/vp8/common/setupintrarecon.c
@@ -30,3 +30,10 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
}
+
+void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf)
+{
+ vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
+ vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
+ vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
+}
diff --git a/vp8/common/setupintrarecon.h b/vp8/common/setupintrarecon.h
index 5264fd0..e515c3a 100644
--- a/vp8/common/setupintrarecon.h
+++ b/vp8/common/setupintrarecon.h
@@ -11,3 +11,23 @@
#include "vpx_scale/yv12config.h"
extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf);
+extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf);
+
+static
+void setup_intra_recon_left(unsigned char *y_buffer,
+ unsigned char *u_buffer,
+ unsigned char *v_buffer,
+ int y_stride,
+ int uv_stride)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ y_buffer[y_stride *i] = (unsigned char) 129;
+
+ for (i = 0; i < 8; i++)
+ u_buffer[uv_stride *i] = (unsigned char) 129;
+
+ for (i = 0; i < 8; i++)
+ v_buffer[uv_stride *i] = (unsigned char) 129;
+}
diff --git a/vp8/common/variance.h b/vp8/common/variance.h
index b77aa28..01193b8 100644
--- a/vp8/common/variance.h
+++ b/vp8/common/variance.h
@@ -12,14 +12,14 @@
#ifndef VARIANCE_H
#define VARIANCE_H
-typedef unsigned int(*vp8_sad_fn_t)
- (
+#include "vpx_config.h"
+
+typedef unsigned int(*vp8_sad_fn_t)(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int ref_stride,
- int max_sad
- );
+ unsigned int max_sad);
typedef void (*vp8_copy32xn_fn_t)(
const unsigned char *src_ptr,
@@ -48,7 +48,7 @@ typedef void (*vp8_sad_multi_d_fn_t)
(
const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr[4],
+ const unsigned char * const ref_ptr[],
int ref_stride,
unsigned int *sad_array
);
diff --git a/vp8/common/variance_c.c b/vp8/common/variance_c.c
index 996404d..da08aff 100644
--- a/vp8/common/variance_c.c
+++ b/vp8/common/variance_c.c
@@ -205,14 +205,14 @@ static void var_filter_block2d_bil_first_pass
{
for (j = 0; j < output_width; j++)
{
- // Apply bilinear filter
+ /* Apply bilinear filter */
output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[pixel_step] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
src_ptr++;
}
- // Next row...
+ /* Next row... */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_width;
}
@@ -264,15 +264,15 @@ static void var_filter_block2d_bil_second_pass
{
for (j = 0; j < output_width; j++)
{
- // Apply filter
- Temp = ((int)src_ptr[0] * vp8_filter[0]) +
+ /* Apply filter */
+ Temp = ((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[pixel_step] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT / 2);
output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
src_ptr++;
}
- // Next row...
+ /* Next row... */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_width;
}
@@ -292,15 +292,15 @@ unsigned int vp8_sub_pixel_variance4x4_c
{
unsigned char temp2[20*16];
const short *HFilter, *VFilter;
- unsigned short FData3[5*4]; // Temp data bufffer used in filtering
+ unsigned short FData3[5*4]; /* Temp data bufffer used in filtering */
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
- // First filter 1d Horizontal
+ /* First filter 1d Horizontal */
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
- // Now filter Verticaly
+ /* Now filter Verticaly */
var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter);
return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
@@ -318,7 +318,7 @@ unsigned int vp8_sub_pixel_variance8x8_c
unsigned int *sse
)
{
- unsigned short FData3[9*8]; // Temp data bufffer used in filtering
+ unsigned short FData3[9*8]; /* Temp data bufffer used in filtering */
unsigned char temp2[20*16];
const short *HFilter, *VFilter;
@@ -342,7 +342,7 @@ unsigned int vp8_sub_pixel_variance16x16_c
unsigned int *sse
)
{
- unsigned short FData3[17*16]; // Temp data bufffer used in filtering
+ unsigned short FData3[17*16]; /* Temp data bufffer used in filtering */
unsigned char temp2[20*16];
const short *HFilter, *VFilter;
@@ -418,7 +418,7 @@ unsigned int vp8_sub_pixel_variance16x8_c
unsigned int *sse
)
{
- unsigned short FData3[16*9]; // Temp data bufffer used in filtering
+ unsigned short FData3[16*9]; /* Temp data bufffer used in filtering */
unsigned char temp2[20*16];
const short *HFilter, *VFilter;
@@ -442,7 +442,7 @@ unsigned int vp8_sub_pixel_variance8x16_c
unsigned int *sse
)
{
- unsigned short FData3[9*16]; // Temp data bufffer used in filtering
+ unsigned short FData3[9*16]; /* Temp data bufffer used in filtering */
unsigned char temp2[20*16];
const short *HFilter, *VFilter;
diff --git a/vp8/common/vp8_entropymodedata.h b/vp8/common/vp8_entropymodedata.h
old mode 100755
new mode 100644
diff --git a/vp8/common/x86/dequantize_mmx.asm b/vp8/common/x86/dequantize_mmx.asm
index de9eba8..4e551f0 100644
--- a/vp8/common/x86/dequantize_mmx.asm
+++ b/vp8/common/x86/dequantize_mmx.asm
@@ -13,7 +13,7 @@
;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q)
-global sym(vp8_dequantize_b_impl_mmx)
+global sym(vp8_dequantize_b_impl_mmx) PRIVATE
sym(vp8_dequantize_b_impl_mmx):
push rbp
mov rbp, rsp
@@ -55,7 +55,7 @@ sym(vp8_dequantize_b_impl_mmx):
;short *dq, 1
;unsigned char *dest, 2
;int stride) 3
-global sym(vp8_dequant_idct_add_mmx)
+global sym(vp8_dequant_idct_add_mmx) PRIVATE
sym(vp8_dequant_idct_add_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm
index 0c9c205..96fa2c6 100644
--- a/vp8/common/x86/idctllm_mmx.asm
+++ b/vp8/common/x86/idctllm_mmx.asm
@@ -34,7 +34,7 @@
;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred,
;int pitch, unsigned char *dest,int stride)
-global sym(vp8_short_idct4x4llm_mmx)
+global sym(vp8_short_idct4x4llm_mmx) PRIVATE
sym(vp8_short_idct4x4llm_mmx):
push rbp
mov rbp, rsp
@@ -224,7 +224,7 @@ sym(vp8_short_idct4x4llm_mmx):
;int pred_stride,
;unsigned char *dst_ptr,
;int stride)
-global sym(vp8_dc_only_idct_add_mmx)
+global sym(vp8_dc_only_idct_add_mmx) PRIVATE
sym(vp8_dc_only_idct_add_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/idctllm_mmx_test.cc b/vp8/common/x86/idctllm_mmx_test.cc
deleted file mode 100755
index 8c11533..0000000
--- a/vp8/common/x86/idctllm_mmx_test.cc
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
- extern "C" {
- void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred_ptr,
- int pred_stride, unsigned char *dst_ptr,
- int dst_stride);
-}
-
-#include "vp8/common/idctllm_test.h"
-
-namespace
-{
-
-INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
- ::testing::Values(vp8_short_idct4x4llm_mmx));
-
-} // namespace
-
-int main(int argc, char **argv) {
- ::testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-}
diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm
index abeb0b6..bf8e2c4 100644
--- a/vp8/common/x86/idctllm_sse2.asm
+++ b/vp8/common/x86/idctllm_sse2.asm
@@ -19,7 +19,7 @@
; int dst_stride - 3
; )
-global sym(vp8_idct_dequant_0_2x_sse2)
+global sym(vp8_idct_dequant_0_2x_sse2) PRIVATE
sym(vp8_idct_dequant_0_2x_sse2):
push rbp
mov rbp, rsp
@@ -101,7 +101,7 @@ sym(vp8_idct_dequant_0_2x_sse2):
; unsigned char *dst - 2
; int dst_stride - 3
; )
-global sym(vp8_idct_dequant_full_2x_sse2)
+global sym(vp8_idct_dequant_full_2x_sse2) PRIVATE
sym(vp8_idct_dequant_full_2x_sse2):
push rbp
mov rbp, rsp
@@ -358,7 +358,7 @@ sym(vp8_idct_dequant_full_2x_sse2):
; int dst_stride - 3
; short *dc - 4
; )
-global sym(vp8_idct_dequant_dc_0_2x_sse2)
+global sym(vp8_idct_dequant_dc_0_2x_sse2) PRIVATE
sym(vp8_idct_dequant_dc_0_2x_sse2):
push rbp
mov rbp, rsp
@@ -434,7 +434,7 @@ sym(vp8_idct_dequant_dc_0_2x_sse2):
; int dst_stride - 3
; short *dc - 4
; )
-global sym(vp8_idct_dequant_dc_full_2x_sse2)
+global sym(vp8_idct_dequant_dc_full_2x_sse2) PRIVATE
sym(vp8_idct_dequant_dc_full_2x_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm
index 6582687..4aac094 100644
--- a/vp8/common/x86/iwalsh_mmx.asm
+++ b/vp8/common/x86/iwalsh_mmx.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
;void vp8_short_inv_walsh4x4_mmx(short *input, short *output)
-global sym(vp8_short_inv_walsh4x4_mmx)
+global sym(vp8_short_inv_walsh4x4_mmx) PRIVATE
sym(vp8_short_inv_walsh4x4_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm
index 51cb5e2..06e86a8 100644
--- a/vp8/common/x86/iwalsh_sse2.asm
+++ b/vp8/common/x86/iwalsh_sse2.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
;void vp8_short_inv_walsh4x4_sse2(short *input, short *output)
-global sym(vp8_short_inv_walsh4x4_sse2)
+global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE
sym(vp8_short_inv_walsh4x4_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/loopfilter_block_sse2.asm b/vp8/common/x86/loopfilter_block_sse2.asm
index 4918eb5..3d45c61 100644
--- a/vp8/common/x86/loopfilter_block_sse2.asm
+++ b/vp8/common/x86/loopfilter_block_sse2.asm
@@ -133,7 +133,7 @@
; const char *limit,
; const char *thresh
;)
-global sym(vp8_loop_filter_bh_y_sse2)
+global sym(vp8_loop_filter_bh_y_sse2) PRIVATE
sym(vp8_loop_filter_bh_y_sse2):
%ifidn __OUTPUT_FORMAT__,x64
@@ -150,6 +150,7 @@ sym(vp8_loop_filter_bh_y_sse2):
push rbp
mov rbp, rsp
+ SAVE_XMM 11
push r12
push r13
mov thresh, arg(4)
@@ -258,6 +259,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2
%ifidn __OUTPUT_FORMAT__,x64
pop r13
pop r12
+ RESTORE_XMM
pop rbp
%endif
@@ -273,7 +275,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2
; const char *thresh
;)
-global sym(vp8_loop_filter_bv_y_sse2)
+global sym(vp8_loop_filter_bv_y_sse2) PRIVATE
sym(vp8_loop_filter_bv_y_sse2):
%ifidn __OUTPUT_FORMAT__,x64
diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm
index 697a5de..f388d24 100644
--- a/vp8/common/x86/loopfilter_mmx.asm
+++ b/vp8/common/x86/loopfilter_mmx.asm
@@ -21,7 +21,7 @@
; const char *thresh,
; int count
;)
-global sym(vp8_loop_filter_horizontal_edge_mmx)
+global sym(vp8_loop_filter_horizontal_edge_mmx) PRIVATE
sym(vp8_loop_filter_horizontal_edge_mmx):
push rbp
mov rbp, rsp
@@ -233,7 +233,7 @@ sym(vp8_loop_filter_horizontal_edge_mmx):
; const char *thresh,
; int count
;)
-global sym(vp8_loop_filter_vertical_edge_mmx)
+global sym(vp8_loop_filter_vertical_edge_mmx) PRIVATE
sym(vp8_loop_filter_vertical_edge_mmx):
push rbp
mov rbp, rsp
@@ -603,7 +603,7 @@ sym(vp8_loop_filter_vertical_edge_mmx):
; const char *thresh,
; int count
;)
-global sym(vp8_mbloop_filter_horizontal_edge_mmx)
+global sym(vp8_mbloop_filter_horizontal_edge_mmx) PRIVATE
sym(vp8_mbloop_filter_horizontal_edge_mmx):
push rbp
mov rbp, rsp
@@ -920,7 +920,7 @@ sym(vp8_mbloop_filter_horizontal_edge_mmx):
; const char *thresh,
; int count
;)
-global sym(vp8_mbloop_filter_vertical_edge_mmx)
+global sym(vp8_mbloop_filter_vertical_edge_mmx) PRIVATE
sym(vp8_mbloop_filter_vertical_edge_mmx):
push rbp
mov rbp, rsp
@@ -1384,7 +1384,7 @@ sym(vp8_mbloop_filter_vertical_edge_mmx):
; int src_pixel_step,
; const char *blimit
;)
-global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
+global sym(vp8_loop_filter_simple_horizontal_edge_mmx) PRIVATE
sym(vp8_loop_filter_simple_horizontal_edge_mmx):
push rbp
mov rbp, rsp
@@ -1500,7 +1500,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx):
; int src_pixel_step,
; const char *blimit
;)
-global sym(vp8_loop_filter_simple_vertical_edge_mmx)
+global sym(vp8_loop_filter_simple_vertical_edge_mmx) PRIVATE
sym(vp8_loop_filter_simple_vertical_edge_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 9944c33..a66753b 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -286,7 +286,7 @@
; const char *limit,
; const char *thresh,
;)
-global sym(vp8_loop_filter_horizontal_edge_sse2)
+global sym(vp8_loop_filter_horizontal_edge_sse2) PRIVATE
sym(vp8_loop_filter_horizontal_edge_sse2):
push rbp
mov rbp, rsp
@@ -334,7 +334,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
; const char *thresh,
; int count
;)
-global sym(vp8_loop_filter_horizontal_edge_uv_sse2)
+global sym(vp8_loop_filter_horizontal_edge_uv_sse2) PRIVATE
sym(vp8_loop_filter_horizontal_edge_uv_sse2):
push rbp
mov rbp, rsp
@@ -561,7 +561,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
; const char *limit,
; const char *thresh,
;)
-global sym(vp8_mbloop_filter_horizontal_edge_sse2)
+global sym(vp8_mbloop_filter_horizontal_edge_sse2) PRIVATE
sym(vp8_mbloop_filter_horizontal_edge_sse2):
push rbp
mov rbp, rsp
@@ -607,7 +607,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
; const char *thresh,
; unsigned char *v
;)
-global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2)
+global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) PRIVATE
sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
push rbp
mov rbp, rsp
@@ -928,7 +928,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
; const char *limit,
; const char *thresh,
;)
-global sym(vp8_loop_filter_vertical_edge_sse2)
+global sym(vp8_loop_filter_vertical_edge_sse2) PRIVATE
sym(vp8_loop_filter_vertical_edge_sse2):
push rbp
mov rbp, rsp
@@ -993,7 +993,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
; const char *thresh,
; unsigned char *v
;)
-global sym(vp8_loop_filter_vertical_edge_uv_sse2)
+global sym(vp8_loop_filter_vertical_edge_uv_sse2) PRIVATE
sym(vp8_loop_filter_vertical_edge_uv_sse2):
push rbp
mov rbp, rsp
@@ -1142,7 +1142,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
; const char *limit,
; const char *thresh,
;)
-global sym(vp8_mbloop_filter_vertical_edge_sse2)
+global sym(vp8_mbloop_filter_vertical_edge_sse2) PRIVATE
sym(vp8_mbloop_filter_vertical_edge_sse2):
push rbp
mov rbp, rsp
@@ -1209,7 +1209,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
; const char *thresh,
; unsigned char *v
;)
-global sym(vp8_mbloop_filter_vertical_edge_uv_sse2)
+global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) PRIVATE
sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
push rbp
mov rbp, rsp
@@ -1269,7 +1269,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
; int src_pixel_step,
; const char *blimit,
;)
-global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
+global sym(vp8_loop_filter_simple_horizontal_edge_sse2) PRIVATE
sym(vp8_loop_filter_simple_horizontal_edge_sse2):
push rbp
mov rbp, rsp
@@ -1374,7 +1374,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
; int src_pixel_step,
; const char *blimit,
;)
-global sym(vp8_loop_filter_simple_vertical_edge_sse2)
+global sym(vp8_loop_filter_simple_vertical_edge_sse2) PRIVATE
sym(vp8_loop_filter_simple_vertical_edge_sse2):
push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value.
diff --git a/vp8/common/x86/mfqe_sse2.asm b/vp8/common/x86/mfqe_sse2.asm
index 10d21f3..c1d2174 100644
--- a/vp8/common/x86/mfqe_sse2.asm
+++ b/vp8/common/x86/mfqe_sse2.asm
@@ -19,7 +19,7 @@
; int dst_stride,
; int src_weight
;)
-global sym(vp8_filter_by_weight16x16_sse2)
+global sym(vp8_filter_by_weight16x16_sse2) PRIVATE
sym(vp8_filter_by_weight16x16_sse2):
push rbp
mov rbp, rsp
@@ -97,7 +97,7 @@ sym(vp8_filter_by_weight16x16_sse2):
; int dst_stride,
; int src_weight
;)
-global sym(vp8_filter_by_weight8x8_sse2)
+global sym(vp8_filter_by_weight8x8_sse2) PRIVATE
sym(vp8_filter_by_weight8x8_sse2):
push rbp
mov rbp, rsp
@@ -165,7 +165,7 @@ sym(vp8_filter_by_weight8x8_sse2):
; unsigned int *variance, 4
; unsigned int *sad, 5
;)
-global sym(vp8_variance_and_sad_16x16_sse2)
+global sym(vp8_variance_and_sad_16x16_sse2) PRIVATE
sym(vp8_variance_and_sad_16x16_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm
index d24f740..966c586 100644
--- a/vp8/common/x86/postproc_mmx.asm
+++ b/vp8/common/x86/postproc_mmx.asm
@@ -14,275 +14,10 @@
%define VP8_FILTER_WEIGHT 128
%define VP8_FILTER_SHIFT 7
-;void vp8_post_proc_down_and_across_mmx
-;(
-; unsigned char *src_ptr,
-; unsigned char *dst_ptr,
-; int src_pixels_per_line,
-; int dst_pixels_per_line,
-; int rows,
-; int cols,
-; int flimit
-;)
-global sym(vp8_post_proc_down_and_across_mmx)
-sym(vp8_post_proc_down_and_across_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-%if ABI_IS_32BIT=1 && CONFIG_PIC=1
- ; move the global rd onto the stack, since we don't have enough registers
- ; to do PIC addressing
- movq mm0, [GLOBAL(rd)]
- sub rsp, 8
- movq [rsp], mm0
-%define RD [rsp]
-%else
-%define RD [GLOBAL(rd)]
-%endif
-
- push rbx
- lea rbx, [GLOBAL(Blur)]
- movd mm2, dword ptr arg(6) ;flimit
- punpcklwd mm2, mm2
- punpckldq mm2, mm2
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(1) ;dst_ptr
-
- movsxd rcx, DWORD PTR arg(4) ;rows
- movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch?
- pxor mm0, mm0 ; mm0 = 00000000
-
-.nextrow:
-
- xor rdx, rdx ; clear out rdx for use as loop counter
-.nextcol:
-
- pxor mm7, mm7 ; mm7 = 00000000
- movq mm6, [rbx + 32 ] ; mm6 = kernel 2 taps
- movq mm3, [rsi] ; mm4 = r0 p0..p7
- punpcklbw mm3, mm0 ; mm3 = p0..p3
- movq mm1, mm3 ; mm1 = p0..p3
- pmullw mm3, mm6 ; mm3 *= kernel 2 modifiers
-
- movq mm6, [rbx + 48] ; mm6 = kernel 3 taps
- movq mm5, [rsi + rax] ; mm4 = r1 p0..p7
- punpcklbw mm5, mm0 ; mm5 = r1 p0..p3
- pmullw mm6, mm5 ; mm6 *= p0..p3 * kernel 3 modifiers
- paddusw mm3, mm6 ; mm3 += mm6
-
- ; thresholding
- movq mm7, mm1 ; mm7 = r0 p0..p3
- psubusw mm7, mm5 ; mm7 = r0 p0..p3 - r1 p0..p3
- psubusw mm5, mm1 ; mm5 = r1 p0..p3 - r0 p0..p3
- paddusw mm7, mm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3)
- pcmpgtw mm7, mm2
-
- movq mm6, [rbx + 64 ] ; mm6 = kernel 4 modifiers
- movq mm5, [rsi + 2*rax] ; mm4 = r2 p0..p7
- punpcklbw mm5, mm0 ; mm5 = r2 p0..p3
- pmullw mm6, mm5 ; mm5 *= kernel 4 modifiers
- paddusw mm3, mm6 ; mm3 += mm5
-
- ; thresholding
- movq mm6, mm1 ; mm6 = r0 p0..p3
- psubusw mm6, mm5 ; mm6 = r0 p0..p3 - r2 p0..p3
- psubusw mm5, mm1 ; mm5 = r2 p0..p3 - r2 p0..p3
- paddusw mm6, mm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3)
- pcmpgtw mm6, mm2
- por mm7, mm6 ; accumulate thresholds
-
-
- neg rax
- movq mm6, [rbx ] ; kernel 0 taps
- movq mm5, [rsi+2*rax] ; mm4 = r-2 p0..p7
- punpcklbw mm5, mm0 ; mm5 = r-2 p0..p3
- pmullw mm6, mm5 ; mm5 *= kernel 0 modifiers
- paddusw mm3, mm6 ; mm3 += mm5
-
- ; thresholding
- movq mm6, mm1 ; mm6 = r0 p0..p3
- psubusw mm6, mm5 ; mm6 = p0..p3 - r-2 p0..p3
- psubusw mm5, mm1 ; mm5 = r-2 p0..p3 - p0..p3
- paddusw mm6, mm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3)
- pcmpgtw mm6, mm2
- por mm7, mm6 ; accumulate thresholds
-
- movq mm6, [rbx + 16] ; kernel 1 taps
- movq mm4, [rsi+rax] ; mm4 = r-1 p0..p7
- punpcklbw mm4, mm0 ; mm4 = r-1 p0..p3
- pmullw mm6, mm4 ; mm4 *= kernel 1 modifiers.
- paddusw mm3, mm6 ; mm3 += mm5
-
- ; thresholding
- movq mm6, mm1 ; mm6 = r0 p0..p3
- psubusw mm6, mm4 ; mm6 = p0..p3 - r-2 p0..p3
- psubusw mm4, mm1 ; mm5 = r-1 p0..p3 - p0..p3
- paddusw mm6, mm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3)
- pcmpgtw mm6, mm2
- por mm7, mm6 ; accumulate thresholds
-
-
- paddusw mm3, RD ; mm3 += round value
- psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128
-
- pand mm1, mm7 ; mm1 select vals > thresh from source
- pandn mm7, mm3 ; mm7 select vals < thresh from blurred result
- paddusw mm1, mm7 ; combination
-
- packuswb mm1, mm0 ; pack to bytes
-
- movd [rdi], mm1 ;
- neg rax ; pitch is positive
-
-
- add rsi, 4
- add rdi, 4
- add rdx, 4
-
- cmp edx, dword ptr arg(5) ;cols
- jl .nextcol
- ; done with the all cols, start the across filtering in place
- sub rsi, rdx
- sub rdi, rdx
-
- ; dup the first byte into the left border 8 times
- movq mm1, [rdi]
- punpcklbw mm1, mm1
- punpcklwd mm1, mm1
- punpckldq mm1, mm1
-
- mov rdx, -8
- movq [rdi+rdx], mm1
-
- ; dup the last byte into the right border
- movsxd rdx, dword arg(5)
- movq mm1, [rdi + rdx + -1]
- punpcklbw mm1, mm1
- punpcklwd mm1, mm1
- punpckldq mm1, mm1
- movq [rdi+rdx], mm1
-
-
- push rax
- xor rdx, rdx
- mov rax, [rdi-4];
-
-.acrossnextcol:
- pxor mm7, mm7 ; mm7 = 00000000
- movq mm6, [rbx + 32 ] ;
- movq mm4, [rdi+rdx] ; mm4 = p0..p7
- movq mm3, mm4 ; mm3 = p0..p7
- punpcklbw mm3, mm0 ; mm3 = p0..p3
- movq mm1, mm3 ; mm1 = p0..p3
- pmullw mm3, mm6 ; mm3 *= kernel 2 modifiers
-
- movq mm6, [rbx + 48]
- psrlq mm4, 8 ; mm4 = p1..p7
- movq mm5, mm4 ; mm5 = p1..p7
- punpcklbw mm5, mm0 ; mm5 = p1..p4
- pmullw mm6, mm5 ; mm6 *= p1..p4 * kernel 3 modifiers
- paddusw mm3, mm6 ; mm3 += mm6
-
- ; thresholding
- movq mm7, mm1 ; mm7 = p0..p3
- psubusw mm7, mm5 ; mm7 = p0..p3 - p1..p4
- psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3
- paddusw mm7, mm5 ; mm7 = abs(p0..p3 - p1..p4)
- pcmpgtw mm7, mm2
-
- movq mm6, [rbx + 64 ]
- psrlq mm4, 8 ; mm4 = p2..p7
- movq mm5, mm4 ; mm5 = p2..p7
- punpcklbw mm5, mm0 ; mm5 = p2..p5
- pmullw mm6, mm5 ; mm5 *= kernel 4 modifiers
- paddusw mm3, mm6 ; mm3 += mm5
-
- ; thresholding
- movq mm6, mm1 ; mm6 = p0..p3
- psubusw mm6, mm5 ; mm6 = p0..p3 - p1..p4
- psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3
- paddusw mm6, mm5 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw mm6, mm2
- por mm7, mm6 ; accumulate thresholds
-
-
- movq mm6, [rbx ]
- movq mm4, [rdi+rdx-2] ; mm4 = p-2..p5
- movq mm5, mm4 ; mm5 = p-2..p5
- punpcklbw mm5, mm0 ; mm5 = p-2..p1
- pmullw mm6, mm5 ; mm5 *= kernel 0 modifiers
- paddusw mm3, mm6 ; mm3 += mm5
-
- ; thresholding
- movq mm6, mm1 ; mm6 = p0..p3
- psubusw mm6, mm5 ; mm6 = p0..p3 - p1..p4
- psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3
- paddusw mm6, mm5 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw mm6, mm2
- por mm7, mm6 ; accumulate thresholds
-
- movq mm6, [rbx + 16]
- psrlq mm4, 8 ; mm4 = p-1..p5
- punpcklbw mm4, mm0 ; mm4 = p-1..p2
- pmullw mm6, mm4 ; mm4 *= kernel 1 modifiers.
- paddusw mm3, mm6 ; mm3 += mm5
-
- ; thresholding
- movq mm6, mm1 ; mm6 = p0..p3
- psubusw mm6, mm4 ; mm6 = p0..p3 - p1..p4
- psubusw mm4, mm1 ; mm5 = p1..p4 - p0..p3
- paddusw mm6, mm4 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw mm6, mm2
- por mm7, mm6 ; accumulate thresholds
-
- paddusw mm3, RD ; mm3 += round value
- psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128
-
- pand mm1, mm7 ; mm1 select vals > thresh from source
- pandn mm7, mm3 ; mm7 select vals < thresh from blurred result
- paddusw mm1, mm7 ; combination
-
- packuswb mm1, mm0 ; pack to bytes
- mov DWORD PTR [rdi+rdx-4], eax ; store previous four bytes
- movd eax, mm1
-
- add rdx, 4
- cmp edx, dword ptr arg(5) ;cols
- jl .acrossnextcol;
-
- mov DWORD PTR [rdi+rdx-4], eax
- pop rax
-
- ; done with this rwo
- add rsi,rax ; next line
- movsxd rax, dword ptr arg(3) ;dst_pixels_per_line ; destination pitch?
- add rdi,rax ; next destination
- movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; destination pitch?
-
- dec rcx ; decrement count
- jnz .nextrow ; next row
- pop rbx
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-%undef RD
-
-
;void vp8_mbpost_proc_down_mmx(unsigned char *dst,
; int pitch, int rows, int cols,int flimit)
extern sym(vp8_rv)
-global sym(vp8_mbpost_proc_down_mmx)
+global sym(vp8_mbpost_proc_down_mmx) PRIVATE
sym(vp8_mbpost_proc_down_mmx):
push rbp
mov rbp, rsp
@@ -510,7 +245,7 @@ sym(vp8_mbpost_proc_down_mmx):
; unsigned char bothclamp[16],
; unsigned int Width, unsigned int Height, int Pitch)
extern sym(rand)
-global sym(vp8_plane_add_noise_mmx)
+global sym(vp8_plane_add_noise_mmx) PRIVATE
sym(vp8_plane_add_noise_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm
index 966aafd..00f84a3 100644
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -11,146 +11,159 @@
%include "vpx_ports/x86_abi_support.asm"
-;void vp8_post_proc_down_and_across_xmm
+;macro in deblock functions
+%macro FIRST_2_ROWS 0
+ movdqa xmm4, xmm0
+ movdqa xmm6, xmm0
+ movdqa xmm5, xmm1
+ pavgb xmm5, xmm3
+
+ ;calculate absolute value
+ psubusb xmm4, xmm1
+ psubusb xmm1, xmm0
+ psubusb xmm6, xmm3
+ psubusb xmm3, xmm0
+ paddusb xmm4, xmm1
+ paddusb xmm6, xmm3
+
+ ;get threshold
+ movdqa xmm2, flimit
+ pxor xmm1, xmm1
+ movdqa xmm7, xmm2
+
+ ;get mask
+ psubusb xmm2, xmm4
+ psubusb xmm7, xmm6
+ pcmpeqb xmm2, xmm1
+ pcmpeqb xmm7, xmm1
+ por xmm7, xmm2
+%endmacro
+
+%macro SECOND_2_ROWS 0
+ movdqa xmm6, xmm0
+ movdqa xmm4, xmm0
+ movdqa xmm2, xmm1
+ pavgb xmm1, xmm3
+
+ ;calculate absolute value
+ psubusb xmm6, xmm2
+ psubusb xmm2, xmm0
+ psubusb xmm4, xmm3
+ psubusb xmm3, xmm0
+ paddusb xmm6, xmm2
+ paddusb xmm4, xmm3
+
+ pavgb xmm5, xmm1
+
+ ;get threshold
+ movdqa xmm2, flimit
+ pxor xmm1, xmm1
+ movdqa xmm3, xmm2
+
+ ;get mask
+ psubusb xmm2, xmm6
+ psubusb xmm3, xmm4
+ pcmpeqb xmm2, xmm1
+ pcmpeqb xmm3, xmm1
+
+ por xmm7, xmm2
+ por xmm7, xmm3
+
+ pavgb xmm5, xmm0
+
+ ;decide if or not to use filtered value
+ pand xmm0, xmm7
+ pandn xmm7, xmm5
+ paddusb xmm0, xmm7
+%endmacro
+
+%macro UPDATE_FLIMIT 0
+ movdqa xmm2, XMMWORD PTR [rbx]
+ movdqa [rsp], xmm2
+ add rbx, 16
+%endmacro
+
+;void vp8_post_proc_down_and_across_mb_row_sse2
;(
; unsigned char *src_ptr,
; unsigned char *dst_ptr,
; int src_pixels_per_line,
; int dst_pixels_per_line,
-; int rows,
; int cols,
-; int flimit
+; int *flimits,
+; int size
;)
-global sym(vp8_post_proc_down_and_across_xmm)
-sym(vp8_post_proc_down_and_across_xmm):
+global sym(vp8_post_proc_down_and_across_mb_row_sse2) PRIVATE
+sym(vp8_post_proc_down_and_across_mb_row_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM 7
- GET_GOT rbx
+ push rbx
push rsi
push rdi
; end prolog
-
-%if ABI_IS_32BIT=1 && CONFIG_PIC=1
ALIGN_STACK 16, rax
- ; move the global rd onto the stack, since we don't have enough registers
- ; to do PIC addressing
- movdqa xmm0, [GLOBAL(rd42)]
sub rsp, 16
- movdqa [rsp], xmm0
-%define RD42 [rsp]
-%else
-%define RD42 [GLOBAL(rd42)]
-%endif
-
- movd xmm2, dword ptr arg(6) ;flimit
- punpcklwd xmm2, xmm2
- punpckldq xmm2, xmm2
- punpcklqdq xmm2, xmm2
+ ; put flimit on stack
+ mov rbx, arg(5) ;flimits ptr
+ UPDATE_FLIMIT
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(1) ;dst_ptr
+%define flimit [rsp]
- movsxd rcx, DWORD PTR arg(4) ;rows
- movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch?
- pxor xmm0, xmm0 ; mm0 = 00000000
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;dst_ptr
+ movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line
+ movsxd rcx, DWORD PTR arg(6) ;rows in a macroblock
.nextrow:
-
- xor rdx, rdx ; clear out rdx for use as loop counter
+ xor rdx, rdx ;col
.nextcol:
- movq xmm3, QWORD PTR [rsi] ; mm4 = r0 p0..p7
- punpcklbw xmm3, xmm0 ; mm3 = p0..p3
- movdqa xmm1, xmm3 ; mm1 = p0..p3
- psllw xmm3, 2 ;
-
- movq xmm5, QWORD PTR [rsi + rax] ; mm4 = r1 p0..p7
- punpcklbw xmm5, xmm0 ; mm5 = r1 p0..p3
- paddusw xmm3, xmm5 ; mm3 += mm6
-
- ; thresholding
- movdqa xmm7, xmm1 ; mm7 = r0 p0..p3
- psubusw xmm7, xmm5 ; mm7 = r0 p0..p3 - r1 p0..p3
- psubusw xmm5, xmm1 ; mm5 = r1 p0..p3 - r0 p0..p3
- paddusw xmm7, xmm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3)
- pcmpgtw xmm7, xmm2
-
- movq xmm5, QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7
- punpcklbw xmm5, xmm0 ; mm5 = r2 p0..p3
- paddusw xmm3, xmm5 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
- psubusw xmm6, xmm5 ; mm6 = r0 p0..p3 - r2 p0..p3
- psubusw xmm5, xmm1 ; mm5 = r2 p0..p3 - r2 p0..p3
- paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
+ ;load current and next 2 rows
+ movdqu xmm0, XMMWORD PTR [rsi]
+ movdqu xmm1, XMMWORD PTR [rsi + rax]
+ movdqu xmm3, XMMWORD PTR [rsi + 2*rax]
+ FIRST_2_ROWS
+ ;load above 2 rows
neg rax
- movq xmm5, QWORD PTR [rsi+2*rax] ; mm4 = r-2 p0..p7
- punpcklbw xmm5, xmm0 ; mm5 = r-2 p0..p3
- paddusw xmm3, xmm5 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
- psubusw xmm6, xmm5 ; mm6 = p0..p3 - r-2 p0..p3
- psubusw xmm5, xmm1 ; mm5 = r-2 p0..p3 - p0..p3
- paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
- movq xmm4, QWORD PTR [rsi+rax] ; mm4 = r-1 p0..p7
- punpcklbw xmm4, xmm0 ; mm4 = r-1 p0..p3
- paddusw xmm3, xmm4 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
- psubusw xmm6, xmm4 ; mm6 = p0..p3 - r-2 p0..p3
- psubusw xmm4, xmm1 ; mm5 = r-1 p0..p3 - p0..p3
- paddusw xmm6, xmm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
-
- paddusw xmm3, RD42 ; mm3 += round value
- psraw xmm3, 3 ; mm3 /= 8
-
- pand xmm1, xmm7 ; mm1 select vals > thresh from source
- pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result
- paddusw xmm1, xmm7 ; combination
+ movdqu xmm1, XMMWORD PTR [rsi + 2*rax]
+ movdqu xmm3, XMMWORD PTR [rsi + rax]
- packuswb xmm1, xmm0 ; pack to bytes
- movq QWORD PTR [rdi], xmm1 ;
+ SECOND_2_ROWS
- neg rax ; pitch is positive
- add rsi, 8
- add rdi, 8
+ movdqu XMMWORD PTR [rdi], xmm0
- add rdx, 8
- cmp edx, dword arg(5) ;cols
+ neg rax ; positive stride
+ add rsi, 16
+ add rdi, 16
- jl .nextcol
+ add rdx, 16
+ cmp edx, dword arg(4) ;cols
+ jge .downdone
+ UPDATE_FLIMIT
+ jmp .nextcol
+.downdone:
; done with the all cols, start the across filtering in place
sub rsi, rdx
sub rdi, rdx
+ mov rbx, arg(5) ; flimits
+ UPDATE_FLIMIT
; dup the first byte into the left border 8 times
movq mm1, [rdi]
punpcklbw mm1, mm1
punpcklwd mm1, mm1
punpckldq mm1, mm1
-
mov rdx, -8
movq [rdi+rdx], mm1
; dup the last byte into the right border
- movsxd rdx, dword arg(5)
+ movsxd rdx, dword arg(4)
movq mm1, [rdi + rdx + -1]
punpcklbw mm1, mm1
punpcklwd mm1, mm1
@@ -158,118 +171,69 @@ sym(vp8_post_proc_down_and_across_xmm):
movq [rdi+rdx], mm1
xor rdx, rdx
- movq mm0, QWORD PTR [rdi-8];
+ movq mm0, QWORD PTR [rdi-16];
+ movq mm1, QWORD PTR [rdi-8];
.acrossnextcol:
- movq xmm7, QWORD PTR [rdi +rdx -2]
- movd xmm4, DWORD PTR [rdi +rdx +6]
-
- pslldq xmm4, 8
- por xmm4, xmm7
-
- movdqa xmm3, xmm4
- psrldq xmm3, 2
- punpcklbw xmm3, xmm0 ; mm3 = p0..p3
- movdqa xmm1, xmm3 ; mm1 = p0..p3
- psllw xmm3, 2
-
-
- movdqa xmm5, xmm4
- psrldq xmm5, 3
- punpcklbw xmm5, xmm0 ; mm5 = p1..p4
- paddusw xmm3, xmm5 ; mm3 += mm6
-
- ; thresholding
- movdqa xmm7, xmm1 ; mm7 = p0..p3
- psubusw xmm7, xmm5 ; mm7 = p0..p3 - p1..p4
- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
- paddusw xmm7, xmm5 ; mm7 = abs(p0..p3 - p1..p4)
- pcmpgtw xmm7, xmm2
-
- movdqa xmm5, xmm4
- psrldq xmm5, 4
- punpcklbw xmm5, xmm0 ; mm5 = p2..p5
- paddusw xmm3, xmm5 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = p0..p3
- psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4
- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
- paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
-
- movdqa xmm5, xmm4 ; mm5 = p-2..p5
- punpcklbw xmm5, xmm0 ; mm5 = p-2..p1
- paddusw xmm3, xmm5 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = p0..p3
- psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4
- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
- paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
- psrldq xmm4, 1 ; mm4 = p-1..p5
- punpcklbw xmm4, xmm0 ; mm4 = p-1..p2
- paddusw xmm3, xmm4 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = p0..p3
- psubusw xmm6, xmm4 ; mm6 = p0..p3 - p1..p4
- psubusw xmm4, xmm1 ; mm5 = p1..p4 - p0..p3
- paddusw xmm6, xmm4 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
- paddusw xmm3, RD42 ; mm3 += round value
- psraw xmm3, 3 ; mm3 /= 8
-
- pand xmm1, xmm7 ; mm1 select vals > thresh from source
- pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result
- paddusw xmm1, xmm7 ; combination
-
- packuswb xmm1, xmm0 ; pack to bytes
- movq QWORD PTR [rdi+rdx-8], mm0 ; store previous four bytes
- movdq2q mm0, xmm1
-
- add rdx, 8
- cmp edx, dword arg(5) ;cols
- jl .acrossnextcol;
-
- ; last 8 pixels
- movq QWORD PTR [rdi+rdx-8], mm0
+ movdqu xmm0, XMMWORD PTR [rdi + rdx]
+ movdqu xmm1, XMMWORD PTR [rdi + rdx -2]
+ movdqu xmm3, XMMWORD PTR [rdi + rdx -1]
+
+ FIRST_2_ROWS
+
+ movdqu xmm1, XMMWORD PTR [rdi + rdx +1]
+ movdqu xmm3, XMMWORD PTR [rdi + rdx +2]
+
+ SECOND_2_ROWS
+
+ movq QWORD PTR [rdi+rdx-16], mm0 ; store previous 8 bytes
+ movq QWORD PTR [rdi+rdx-8], mm1 ; store previous 8 bytes
+ movdq2q mm0, xmm0
+ psrldq xmm0, 8
+ movdq2q mm1, xmm0
+
+ add rdx, 16
+ cmp edx, dword arg(4) ;cols
+ jge .acrossdone
+ UPDATE_FLIMIT
+ jmp .acrossnextcol
+.acrossdone
+ ; last 16 pixels
+ movq QWORD PTR [rdi+rdx-16], mm0
+
+ cmp edx, dword arg(4)
+ jne .throw_last_8
+ movq QWORD PTR [rdi+rdx-8], mm1
+.throw_last_8:
; done with this rwo
- add rsi,rax ; next line
- mov eax, dword arg(3) ;dst_pixels_per_line ; destination pitch?
- add rdi,rax ; next destination
- mov eax, dword arg(2) ;src_pixels_per_line ; destination pitch?
+ add rsi,rax ;next src line
+ mov eax, dword arg(3) ;dst_pixels_per_line
+ add rdi,rax ;next destination
+ mov eax, dword arg(2) ;src_pixels_per_line
- dec rcx ; decrement count
- jnz .nextrow ; next row
+ mov rbx, arg(5) ;flimits
+ UPDATE_FLIMIT
-%if ABI_IS_32BIT=1 && CONFIG_PIC=1
- add rsp,16
+ dec rcx ;decrement count
+ jnz .nextrow ;next row
+
+ add rsp, 16
pop rsp
-%endif
; begin epilog
pop rdi
pop rsi
- RESTORE_GOT
+ pop rbx
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
-%undef RD42
-
+%undef flimit
;void vp8_mbpost_proc_down_xmm(unsigned char *dst,
; int pitch, int rows, int cols,int flimit)
extern sym(vp8_rv)
-global sym(vp8_mbpost_proc_down_xmm)
+global sym(vp8_mbpost_proc_down_xmm) PRIVATE
sym(vp8_mbpost_proc_down_xmm):
push rbp
mov rbp, rsp
@@ -497,7 +461,7 @@ sym(vp8_mbpost_proc_down_xmm):
;void vp8_mbpost_proc_across_ip_xmm(unsigned char *src,
; int pitch, int rows, int cols,int flimit)
-global sym(vp8_mbpost_proc_across_ip_xmm)
+global sym(vp8_mbpost_proc_across_ip_xmm) PRIVATE
sym(vp8_mbpost_proc_across_ip_xmm):
push rbp
mov rbp, rsp
@@ -694,7 +658,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
; unsigned char bothclamp[16],
; unsigned int Width, unsigned int Height, int Pitch)
extern sym(rand)
-global sym(vp8_plane_add_noise_wmt)
+global sym(vp8_plane_add_noise_wmt) PRIVATE
sym(vp8_plane_add_noise_wmt):
push rbp
mov rbp, rsp
@@ -753,7 +717,5 @@ sym(vp8_plane_add_noise_wmt):
SECTION_RODATA
align 16
-rd42:
- times 8 dw 0x04
four8s:
times 4 dd 8
diff --git a/vp8/common/x86/postproc_x86.c b/vp8/common/x86/postproc_x86.c
index a25921b..3ec0106 100644
--- a/vp8/common/x86/postproc_x86.c
+++ b/vp8/common/x86/postproc_x86.c
@@ -18,4 +18,7 @@ extern int rand(void)
{
return __rand();
}
+#else
+/* ISO C forbids an empty translation unit. */
+int vp8_unused;
#endif
diff --git a/vp8/common/x86/recon_mmx.asm b/vp8/common/x86/recon_mmx.asm
index 19c0faf..15e9871 100644
--- a/vp8/common/x86/recon_mmx.asm
+++ b/vp8/common/x86/recon_mmx.asm
@@ -18,7 +18,7 @@
; unsigned char *dst,
; int dst_stride
; )
-global sym(vp8_copy_mem8x8_mmx)
+global sym(vp8_copy_mem8x8_mmx) PRIVATE
sym(vp8_copy_mem8x8_mmx):
push rbp
mov rbp, rsp
@@ -81,7 +81,7 @@ sym(vp8_copy_mem8x8_mmx):
; unsigned char *dst,
; int dst_stride
; )
-global sym(vp8_copy_mem8x4_mmx)
+global sym(vp8_copy_mem8x4_mmx) PRIVATE
sym(vp8_copy_mem8x4_mmx):
push rbp
mov rbp, rsp
@@ -125,7 +125,7 @@ sym(vp8_copy_mem8x4_mmx):
; unsigned char *dst,
; int dst_stride
; )
-global sym(vp8_copy_mem16x16_mmx)
+global sym(vp8_copy_mem16x16_mmx) PRIVATE
sym(vp8_copy_mem16x16_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index 7b6e3cf..1434bcd 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -17,7 +17,7 @@
; unsigned char *dst,
; int dst_stride
; )
-global sym(vp8_copy_mem16x16_sse2)
+global sym(vp8_copy_mem16x16_sse2) PRIVATE
sym(vp8_copy_mem16x16_sse2):
push rbp
mov rbp, rsp
@@ -123,7 +123,7 @@ sym(vp8_copy_mem16x16_sse2):
; unsigned char *left,
; int left_stride,
; )
-global sym(vp8_intra_pred_uv_dc_mmx2)
+global sym(vp8_intra_pred_uv_dc_mmx2) PRIVATE
sym(vp8_intra_pred_uv_dc_mmx2):
push rbp
mov rbp, rsp
@@ -196,7 +196,7 @@ sym(vp8_intra_pred_uv_dc_mmx2):
; unsigned char *left,
; int left_stride,
; )
-global sym(vp8_intra_pred_uv_dctop_mmx2)
+global sym(vp8_intra_pred_uv_dctop_mmx2) PRIVATE
sym(vp8_intra_pred_uv_dctop_mmx2):
push rbp
mov rbp, rsp
@@ -250,7 +250,7 @@ sym(vp8_intra_pred_uv_dctop_mmx2):
; unsigned char *left,
; int left_stride,
; )
-global sym(vp8_intra_pred_uv_dcleft_mmx2)
+global sym(vp8_intra_pred_uv_dcleft_mmx2) PRIVATE
sym(vp8_intra_pred_uv_dcleft_mmx2):
push rbp
mov rbp, rsp
@@ -317,7 +317,7 @@ sym(vp8_intra_pred_uv_dcleft_mmx2):
; unsigned char *left,
; int left_stride,
; )
-global sym(vp8_intra_pred_uv_dc128_mmx)
+global sym(vp8_intra_pred_uv_dc128_mmx) PRIVATE
sym(vp8_intra_pred_uv_dc128_mmx):
push rbp
mov rbp, rsp
@@ -357,7 +357,7 @@ sym(vp8_intra_pred_uv_dc128_mmx):
; int left_stride,
; )
%macro vp8_intra_pred_uv_tm 1
-global sym(vp8_intra_pred_uv_tm_%1)
+global sym(vp8_intra_pred_uv_tm_%1) PRIVATE
sym(vp8_intra_pred_uv_tm_%1):
push rbp
mov rbp, rsp
@@ -437,7 +437,7 @@ vp8_intra_pred_uv_tm ssse3
; unsigned char *left,
; int left_stride,
; )
-global sym(vp8_intra_pred_uv_ve_mmx)
+global sym(vp8_intra_pred_uv_ve_mmx) PRIVATE
sym(vp8_intra_pred_uv_ve_mmx):
push rbp
mov rbp, rsp
@@ -479,7 +479,7 @@ sym(vp8_intra_pred_uv_ve_mmx):
; int left_stride
; )
%macro vp8_intra_pred_uv_ho 1
-global sym(vp8_intra_pred_uv_ho_%1)
+global sym(vp8_intra_pred_uv_ho_%1) PRIVATE
sym(vp8_intra_pred_uv_ho_%1):
push rbp
mov rbp, rsp
@@ -577,7 +577,7 @@ vp8_intra_pred_uv_ho ssse3
; unsigned char *left,
; int left_stride
; )
-global sym(vp8_intra_pred_y_dc_sse2)
+global sym(vp8_intra_pred_y_dc_sse2) PRIVATE
sym(vp8_intra_pred_y_dc_sse2):
push rbp
mov rbp, rsp
@@ -683,7 +683,7 @@ sym(vp8_intra_pred_y_dc_sse2):
; unsigned char *left,
; int left_stride
; )
-global sym(vp8_intra_pred_y_dctop_sse2)
+global sym(vp8_intra_pred_y_dctop_sse2) PRIVATE
sym(vp8_intra_pred_y_dctop_sse2):
push rbp
mov rbp, rsp
@@ -745,7 +745,7 @@ sym(vp8_intra_pred_y_dctop_sse2):
; unsigned char *left,
; int left_stride
; )
-global sym(vp8_intra_pred_y_dcleft_sse2)
+global sym(vp8_intra_pred_y_dcleft_sse2) PRIVATE
sym(vp8_intra_pred_y_dcleft_sse2):
push rbp
mov rbp, rsp
@@ -838,7 +838,7 @@ sym(vp8_intra_pred_y_dcleft_sse2):
; unsigned char *left,
; int left_stride
; )
-global sym(vp8_intra_pred_y_dc128_sse2)
+global sym(vp8_intra_pred_y_dc128_sse2) PRIVATE
sym(vp8_intra_pred_y_dc128_sse2):
push rbp
mov rbp, rsp
@@ -885,11 +885,12 @@ sym(vp8_intra_pred_y_dc128_sse2):
; int left_stride
; )
%macro vp8_intra_pred_y_tm 1
-global sym(vp8_intra_pred_y_tm_%1)
+global sym(vp8_intra_pred_y_tm_%1) PRIVATE
sym(vp8_intra_pred_y_tm_%1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM 7
push rsi
push rdi
GET_GOT rbx
@@ -957,6 +958,7 @@ vp8_intra_pred_y_tm_%1_loop:
RESTORE_GOT
pop rdi
pop rsi
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -972,7 +974,7 @@ vp8_intra_pred_y_tm ssse3
; unsigned char *left,
; int left_stride
; )
-global sym(vp8_intra_pred_y_ve_sse2)
+global sym(vp8_intra_pred_y_ve_sse2) PRIVATE
sym(vp8_intra_pred_y_ve_sse2):
push rbp
mov rbp, rsp
@@ -1020,7 +1022,7 @@ sym(vp8_intra_pred_y_ve_sse2):
; unsigned char *left,
; int left_stride,
; )
-global sym(vp8_intra_pred_y_ho_sse2)
+global sym(vp8_intra_pred_y_ho_sse2) PRIVATE
sym(vp8_intra_pred_y_ho_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/sad_mmx.asm b/vp8/common/x86/sad_mmx.asm
index 407b399..592112f 100644
--- a/vp8/common/x86/sad_mmx.asm
+++ b/vp8/common/x86/sad_mmx.asm
@@ -11,11 +11,11 @@
%include "vpx_ports/x86_abi_support.asm"
-global sym(vp8_sad16x16_mmx)
-global sym(vp8_sad8x16_mmx)
-global sym(vp8_sad8x8_mmx)
-global sym(vp8_sad4x4_mmx)
-global sym(vp8_sad16x8_mmx)
+global sym(vp8_sad16x16_mmx) PRIVATE
+global sym(vp8_sad8x16_mmx) PRIVATE
+global sym(vp8_sad8x8_mmx) PRIVATE
+global sym(vp8_sad4x4_mmx) PRIVATE
+global sym(vp8_sad16x8_mmx) PRIVATE
;unsigned int vp8_sad16x16_mmx(
; unsigned char *src_ptr,
diff --git a/vp8/common/x86/sad_sse2.asm b/vp8/common/x86/sad_sse2.asm
index 0b01d7b..8d86abc 100644
--- a/vp8/common/x86/sad_sse2.asm
+++ b/vp8/common/x86/sad_sse2.asm
@@ -16,7 +16,7 @@
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
-global sym(vp8_sad16x16_wmt)
+global sym(vp8_sad16x16_wmt) PRIVATE
sym(vp8_sad16x16_wmt):
push rbp
mov rbp, rsp
@@ -90,7 +90,7 @@ sym(vp8_sad16x16_wmt):
; unsigned char *ref_ptr,
; int ref_stride,
; int max_sad)
-global sym(vp8_sad8x16_wmt)
+global sym(vp8_sad8x16_wmt) PRIVATE
sym(vp8_sad8x16_wmt):
push rbp
mov rbp, rsp
@@ -115,7 +115,7 @@ sym(vp8_sad8x16_wmt):
movq rax, mm7
cmp eax, arg(4)
- jg .x8x16sad_wmt_early_exit
+ ja .x8x16sad_wmt_early_exit
movq mm0, QWORD PTR [rsi]
movq mm1, QWORD PTR [rdi]
@@ -153,7 +153,7 @@ sym(vp8_sad8x16_wmt):
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
-global sym(vp8_sad8x8_wmt)
+global sym(vp8_sad8x8_wmt) PRIVATE
sym(vp8_sad8x8_wmt):
push rbp
mov rbp, rsp
@@ -176,7 +176,7 @@ sym(vp8_sad8x8_wmt):
movq rax, mm7
cmp eax, arg(4)
- jg .x8x8sad_wmt_early_exit
+ ja .x8x8sad_wmt_early_exit
movq mm0, QWORD PTR [rsi]
movq mm1, QWORD PTR [rdi]
@@ -206,7 +206,7 @@ sym(vp8_sad8x8_wmt):
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
-global sym(vp8_sad4x4_wmt)
+global sym(vp8_sad4x4_wmt) PRIVATE
sym(vp8_sad4x4_wmt):
push rbp
mov rbp, rsp
@@ -261,7 +261,7 @@ sym(vp8_sad4x4_wmt):
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
-global sym(vp8_sad16x8_wmt)
+global sym(vp8_sad16x8_wmt) PRIVATE
sym(vp8_sad16x8_wmt):
push rbp
mov rbp, rsp
@@ -285,7 +285,7 @@ sym(vp8_sad16x8_wmt):
movq rax, mm7
cmp eax, arg(4)
- jg .x16x8sad_wmt_early_exit
+ ja .x16x8sad_wmt_early_exit
movq mm0, QWORD PTR [rsi]
movq mm2, QWORD PTR [rsi+8]
@@ -335,7 +335,7 @@ sym(vp8_sad16x8_wmt):
; unsigned char *dst_ptr,
; int dst_stride,
; int height);
-global sym(vp8_copy32xn_sse2)
+global sym(vp8_copy32xn_sse2) PRIVATE
sym(vp8_copy32xn_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/sad_sse3.asm b/vp8/common/x86/sad_sse3.asm
index c2af3c8..f90a589 100644
--- a/vp8/common/x86/sad_sse3.asm
+++ b/vp8/common/x86/sad_sse3.asm
@@ -380,7 +380,7 @@
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x16x3_sse3)
+global sym(vp8_sad16x16x3_sse3) PRIVATE
sym(vp8_sad16x16x3_sse3):
STACK_FRAME_CREATE_X3
@@ -422,7 +422,7 @@ sym(vp8_sad16x16x3_sse3):
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x8x3_sse3)
+global sym(vp8_sad16x8x3_sse3) PRIVATE
sym(vp8_sad16x8x3_sse3):
STACK_FRAME_CREATE_X3
@@ -460,7 +460,7 @@ sym(vp8_sad16x8x3_sse3):
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad8x16x3_sse3)
+global sym(vp8_sad8x16x3_sse3) PRIVATE
sym(vp8_sad8x16x3_sse3):
STACK_FRAME_CREATE_X3
@@ -489,7 +489,7 @@ sym(vp8_sad8x16x3_sse3):
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad8x8x3_sse3)
+global sym(vp8_sad8x8x3_sse3) PRIVATE
sym(vp8_sad8x8x3_sse3):
STACK_FRAME_CREATE_X3
@@ -514,7 +514,7 @@ sym(vp8_sad8x8x3_sse3):
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad4x4x3_sse3)
+global sym(vp8_sad4x4x3_sse3) PRIVATE
sym(vp8_sad4x4x3_sse3):
STACK_FRAME_CREATE_X3
@@ -589,7 +589,7 @@ sym(vp8_sad4x4x3_sse3):
; int ref_stride,
; int max_sad)
;%define lddqu movdqu
-global sym(vp8_sad16x16_sse3)
+global sym(vp8_sad16x16_sse3) PRIVATE
sym(vp8_sad16x16_sse3):
STACK_FRAME_CREATE_X3
@@ -642,7 +642,7 @@ sym(vp8_sad16x16_sse3):
; unsigned char *dst_ptr,
; int dst_stride,
; int height);
-global sym(vp8_copy32xn_sse3)
+global sym(vp8_copy32xn_sse3) PRIVATE
sym(vp8_copy32xn_sse3):
STACK_FRAME_CREATE_X3
@@ -703,7 +703,7 @@ sym(vp8_copy32xn_sse3):
; unsigned char *ref_ptr_base,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x16x4d_sse3)
+global sym(vp8_sad16x16x4d_sse3) PRIVATE
sym(vp8_sad16x16x4d_sse3):
STACK_FRAME_CREATE_X4
@@ -754,7 +754,7 @@ sym(vp8_sad16x16x4d_sse3):
; unsigned char *ref_ptr_base,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x8x4d_sse3)
+global sym(vp8_sad16x8x4d_sse3) PRIVATE
sym(vp8_sad16x8x4d_sse3):
STACK_FRAME_CREATE_X4
@@ -801,7 +801,7 @@ sym(vp8_sad16x8x4d_sse3):
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad8x16x4d_sse3)
+global sym(vp8_sad8x16x4d_sse3) PRIVATE
sym(vp8_sad8x16x4d_sse3):
STACK_FRAME_CREATE_X4
@@ -834,7 +834,7 @@ sym(vp8_sad8x16x4d_sse3):
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad8x8x4d_sse3)
+global sym(vp8_sad8x8x4d_sse3) PRIVATE
sym(vp8_sad8x8x4d_sse3):
STACK_FRAME_CREATE_X4
@@ -863,7 +863,7 @@ sym(vp8_sad8x8x4d_sse3):
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad4x4x4d_sse3)
+global sym(vp8_sad4x4x4d_sse3) PRIVATE
sym(vp8_sad4x4x4d_sse3):
STACK_FRAME_CREATE_X4
diff --git a/vp8/common/x86/sad_sse4.asm b/vp8/common/x86/sad_sse4.asm
index 03ecec4..f7fccd7 100644
--- a/vp8/common/x86/sad_sse4.asm
+++ b/vp8/common/x86/sad_sse4.asm
@@ -161,7 +161,7 @@
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array);
-global sym(vp8_sad16x16x8_sse4)
+global sym(vp8_sad16x16x8_sse4) PRIVATE
sym(vp8_sad16x16x8_sse4):
push rbp
mov rbp, rsp
@@ -203,7 +203,7 @@ sym(vp8_sad16x16x8_sse4):
; int ref_stride,
; unsigned short *sad_array
;);
-global sym(vp8_sad16x8x8_sse4)
+global sym(vp8_sad16x8x8_sse4) PRIVATE
sym(vp8_sad16x8x8_sse4):
push rbp
mov rbp, rsp
@@ -241,7 +241,7 @@ sym(vp8_sad16x8x8_sse4):
; int ref_stride,
; unsigned short *sad_array
;);
-global sym(vp8_sad8x8x8_sse4)
+global sym(vp8_sad8x8x8_sse4) PRIVATE
sym(vp8_sad8x8x8_sse4):
push rbp
mov rbp, rsp
@@ -279,7 +279,7 @@ sym(vp8_sad8x8x8_sse4):
; int ref_stride,
; unsigned short *sad_array
;);
-global sym(vp8_sad8x16x8_sse4)
+global sym(vp8_sad8x16x8_sse4) PRIVATE
sym(vp8_sad8x16x8_sse4):
push rbp
mov rbp, rsp
@@ -320,7 +320,7 @@ sym(vp8_sad8x16x8_sse4):
; int ref_stride,
; unsigned short *sad_array
;);
-global sym(vp8_sad4x4x8_sse4)
+global sym(vp8_sad4x4x8_sse4) PRIVATE
sym(vp8_sad4x4x8_sse4):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/sad_ssse3.asm b/vp8/common/x86/sad_ssse3.asm
index 95b6c89..278fc06 100644
--- a/vp8/common/x86/sad_ssse3.asm
+++ b/vp8/common/x86/sad_ssse3.asm
@@ -152,7 +152,7 @@
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x16x3_ssse3)
+global sym(vp8_sad16x16x3_ssse3) PRIVATE
sym(vp8_sad16x16x3_ssse3):
push rbp
mov rbp, rsp
@@ -265,7 +265,7 @@ sym(vp8_sad16x16x3_ssse3):
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x8x3_ssse3)
+global sym(vp8_sad16x8x3_ssse3) PRIVATE
sym(vp8_sad16x8x3_ssse3):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm
index 5528fd0..47dd452 100644
--- a/vp8/common/x86/subpixel_mmx.asm
+++ b/vp8/common/x86/subpixel_mmx.asm
@@ -28,7 +28,7 @@ extern sym(vp8_bilinear_filters_x86_8)
; unsigned int output_width,
; short * vp8_filter
;)
-global sym(vp8_filter_block1d_h6_mmx)
+global sym(vp8_filter_block1d_h6_mmx) PRIVATE
sym(vp8_filter_block1d_h6_mmx):
push rbp
mov rbp, rsp
@@ -125,7 +125,7 @@ sym(vp8_filter_block1d_h6_mmx):
; unsigned int output_width,
; short * vp8_filter
;)
-global sym(vp8_filter_block1dc_v6_mmx)
+global sym(vp8_filter_block1dc_v6_mmx) PRIVATE
sym(vp8_filter_block1dc_v6_mmx):
push rbp
mov rbp, rsp
@@ -213,7 +213,7 @@ sym(vp8_filter_block1dc_v6_mmx):
; unsigned char *dst_ptr,
; int dst_pitch
;)
-global sym(vp8_bilinear_predict8x8_mmx)
+global sym(vp8_bilinear_predict8x8_mmx) PRIVATE
sym(vp8_bilinear_predict8x8_mmx):
push rbp
mov rbp, rsp
@@ -370,7 +370,7 @@ sym(vp8_bilinear_predict8x8_mmx):
; unsigned char *dst_ptr,
; int dst_pitch
;)
-global sym(vp8_bilinear_predict8x4_mmx)
+global sym(vp8_bilinear_predict8x4_mmx) PRIVATE
sym(vp8_bilinear_predict8x4_mmx):
push rbp
mov rbp, rsp
@@ -525,7 +525,7 @@ sym(vp8_bilinear_predict8x4_mmx):
; unsigned char *dst_ptr,
; int dst_pitch
;)
-global sym(vp8_bilinear_predict4x4_mmx)
+global sym(vp8_bilinear_predict4x4_mmx) PRIVATE
sym(vp8_bilinear_predict4x4_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index cb550af..69f8d10 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -33,7 +33,7 @@ extern sym(vp8_bilinear_filters_x86_8)
; unsigned int output_width,
; short *vp8_filter
;)
-global sym(vp8_filter_block1d8_h6_sse2)
+global sym(vp8_filter_block1d8_h6_sse2) PRIVATE
sym(vp8_filter_block1d8_h6_sse2):
push rbp
mov rbp, rsp
@@ -153,7 +153,7 @@ sym(vp8_filter_block1d8_h6_sse2):
; even number. This function handles 8 pixels in horizontal direction, calculating ONE
; rows each iteration to take advantage of the 128 bits operations.
;*************************************************************************************/
-global sym(vp8_filter_block1d16_h6_sse2)
+global sym(vp8_filter_block1d16_h6_sse2) PRIVATE
sym(vp8_filter_block1d16_h6_sse2):
push rbp
mov rbp, rsp
@@ -329,7 +329,7 @@ sym(vp8_filter_block1d16_h6_sse2):
; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The
; input pixel array has output_height rows.
;*************************************************************************************/
-global sym(vp8_filter_block1d8_v6_sse2)
+global sym(vp8_filter_block1d8_v6_sse2) PRIVATE
sym(vp8_filter_block1d8_v6_sse2):
push rbp
mov rbp, rsp
@@ -424,7 +424,7 @@ sym(vp8_filter_block1d8_v6_sse2):
; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The
; input pixel array has output_height rows.
;*************************************************************************************/
-global sym(vp8_filter_block1d16_v6_sse2)
+global sym(vp8_filter_block1d16_v6_sse2) PRIVATE
sym(vp8_filter_block1d16_v6_sse2):
push rbp
mov rbp, rsp
@@ -534,7 +534,7 @@ sym(vp8_filter_block1d16_v6_sse2):
; const short *vp8_filter
;)
; First-pass filter only when yoffset==0
-global sym(vp8_filter_block1d8_h6_only_sse2)
+global sym(vp8_filter_block1d8_h6_only_sse2) PRIVATE
sym(vp8_filter_block1d8_h6_only_sse2):
push rbp
mov rbp, rsp
@@ -647,7 +647,7 @@ sym(vp8_filter_block1d8_h6_only_sse2):
; const short *vp8_filter
;)
; First-pass filter only when yoffset==0
-global sym(vp8_filter_block1d16_h6_only_sse2)
+global sym(vp8_filter_block1d16_h6_only_sse2) PRIVATE
sym(vp8_filter_block1d16_h6_only_sse2):
push rbp
mov rbp, rsp
@@ -812,7 +812,7 @@ sym(vp8_filter_block1d16_h6_only_sse2):
; const short *vp8_filter
;)
; Second-pass filter only when xoffset==0
-global sym(vp8_filter_block1d8_v6_only_sse2)
+global sym(vp8_filter_block1d8_v6_only_sse2) PRIVATE
sym(vp8_filter_block1d8_v6_only_sse2):
push rbp
mov rbp, rsp
@@ -904,7 +904,7 @@ sym(vp8_filter_block1d8_v6_only_sse2):
; unsigned int output_height,
; unsigned int output_width
;)
-global sym(vp8_unpack_block1d16_h6_sse2)
+global sym(vp8_unpack_block1d16_h6_sse2) PRIVATE
sym(vp8_unpack_block1d16_h6_sse2):
push rbp
mov rbp, rsp
@@ -963,7 +963,7 @@ sym(vp8_unpack_block1d16_h6_sse2):
; int dst_pitch
;)
extern sym(vp8_bilinear_filters_x86_8)
-global sym(vp8_bilinear_predict16x16_sse2)
+global sym(vp8_bilinear_predict16x16_sse2) PRIVATE
sym(vp8_bilinear_predict16x16_sse2):
push rbp
mov rbp, rsp
@@ -1231,7 +1231,7 @@ sym(vp8_bilinear_predict16x16_sse2):
; unsigned char *dst_ptr,
; int dst_pitch
;)
-global sym(vp8_bilinear_predict8x8_sse2)
+global sym(vp8_bilinear_predict8x8_sse2) PRIVATE
sym(vp8_bilinear_predict8x8_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
index 6bca82b..c06f245 100644
--- a/vp8/common/x86/subpixel_ssse3.asm
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -34,7 +34,7 @@
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
-global sym(vp8_filter_block1d8_h6_ssse3)
+global sym(vp8_filter_block1d8_h6_ssse3) PRIVATE
sym(vp8_filter_block1d8_h6_ssse3):
push rbp
mov rbp, rsp
@@ -177,7 +177,7 @@ vp8_filter_block1d8_h4_ssse3:
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
-global sym(vp8_filter_block1d16_h6_ssse3)
+global sym(vp8_filter_block1d16_h6_ssse3) PRIVATE
sym(vp8_filter_block1d16_h6_ssse3):
push rbp
mov rbp, rsp
@@ -284,7 +284,7 @@ sym(vp8_filter_block1d16_h6_ssse3):
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
-global sym(vp8_filter_block1d4_h6_ssse3)
+global sym(vp8_filter_block1d4_h6_ssse3) PRIVATE
sym(vp8_filter_block1d4_h6_ssse3):
push rbp
mov rbp, rsp
@@ -352,6 +352,7 @@ sym(vp8_filter_block1d4_h6_ssse3):
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -413,7 +414,7 @@ sym(vp8_filter_block1d4_h6_ssse3):
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
-global sym(vp8_filter_block1d16_v6_ssse3)
+global sym(vp8_filter_block1d16_v6_ssse3) PRIVATE
sym(vp8_filter_block1d16_v6_ssse3):
push rbp
mov rbp, rsp
@@ -601,7 +602,7 @@ sym(vp8_filter_block1d16_v6_ssse3):
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
-global sym(vp8_filter_block1d8_v6_ssse3)
+global sym(vp8_filter_block1d8_v6_ssse3) PRIVATE
sym(vp8_filter_block1d8_v6_ssse3):
push rbp
mov rbp, rsp
@@ -741,7 +742,7 @@ sym(vp8_filter_block1d8_v6_ssse3):
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
-global sym(vp8_filter_block1d4_v6_ssse3)
+global sym(vp8_filter_block1d4_v6_ssse3) PRIVATE
sym(vp8_filter_block1d4_v6_ssse3):
push rbp
mov rbp, rsp
@@ -880,7 +881,7 @@ sym(vp8_filter_block1d4_v6_ssse3):
; unsigned char *dst_ptr,
; int dst_pitch
;)
-global sym(vp8_bilinear_predict16x16_ssse3)
+global sym(vp8_bilinear_predict16x16_ssse3) PRIVATE
sym(vp8_bilinear_predict16x16_ssse3):
push rbp
mov rbp, rsp
@@ -1143,7 +1144,7 @@ sym(vp8_bilinear_predict16x16_ssse3):
; unsigned char *dst_ptr,
; int dst_pitch
;)
-global sym(vp8_bilinear_predict8x8_ssse3)
+global sym(vp8_bilinear_predict8x8_ssse3) PRIVATE
sym(vp8_bilinear_predict8x8_ssse3):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/variance_impl_mmx.asm b/vp8/common/x86/variance_impl_mmx.asm
index 2be8bbe..d9120d0 100644
--- a/vp8/common/x86/variance_impl_mmx.asm
+++ b/vp8/common/x86/variance_impl_mmx.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
;unsigned int vp8_get_mb_ss_mmx( short *src_ptr )
-global sym(vp8_get_mb_ss_mmx)
+global sym(vp8_get_mb_ss_mmx) PRIVATE
sym(vp8_get_mb_ss_mmx):
push rbp
mov rbp, rsp
@@ -72,7 +72,7 @@ sym(vp8_get_mb_ss_mmx):
; unsigned int *SSE,
; int *Sum
;)
-global sym(vp8_get8x8var_mmx)
+global sym(vp8_get8x8var_mmx) PRIVATE
sym(vp8_get8x8var_mmx):
push rbp
mov rbp, rsp
@@ -320,7 +320,7 @@ sym(vp8_get8x8var_mmx):
; unsigned int *SSE,
; int *Sum
;)
-global sym(vp8_get4x4var_mmx)
+global sym(vp8_get4x4var_mmx) PRIVATE
sym(vp8_get4x4var_mmx):
push rbp
mov rbp, rsp
@@ -433,7 +433,7 @@ sym(vp8_get4x4var_mmx):
; unsigned char *ref_ptr,
; int recon_stride
;)
-global sym(vp8_get4x4sse_cs_mmx)
+global sym(vp8_get4x4sse_cs_mmx) PRIVATE
sym(vp8_get4x4sse_cs_mmx):
push rbp
mov rbp, rsp
@@ -522,7 +522,7 @@ sym(vp8_get4x4sse_cs_mmx):
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_filter_block2d_bil4x4_var_mmx)
+global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
sym(vp8_filter_block2d_bil4x4_var_mmx):
push rbp
mov rbp, rsp
@@ -667,7 +667,7 @@ sym(vp8_filter_block2d_bil4x4_var_mmx):
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_filter_block2d_bil_var_mmx)
+global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
sym(vp8_filter_block2d_bil_var_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/variance_impl_sse2.asm b/vp8/common/x86/variance_impl_sse2.asm
index 7629220..761433c 100644
--- a/vp8/common/x86/variance_impl_sse2.asm
+++ b/vp8/common/x86/variance_impl_sse2.asm
@@ -17,7 +17,7 @@
;(
; short *src_ptr
;)
-global sym(vp8_get_mb_ss_sse2)
+global sym(vp8_get_mb_ss_sse2) PRIVATE
sym(vp8_get_mb_ss_sse2):
push rbp
mov rbp, rsp
@@ -80,7 +80,7 @@ sym(vp8_get_mb_ss_sse2):
; unsigned int * SSE,
; int * Sum
;)
-global sym(vp8_get16x16var_sse2)
+global sym(vp8_get16x16var_sse2) PRIVATE
sym(vp8_get16x16var_sse2):
push rbp
mov rbp, rsp
@@ -224,7 +224,7 @@ sym(vp8_get16x16var_sse2):
; unsigned int * SSE,
; int * Sum
;)
-global sym(vp8_get8x8var_sse2)
+global sym(vp8_get8x8var_sse2) PRIVATE
sym(vp8_get8x8var_sse2):
push rbp
mov rbp, rsp
@@ -413,7 +413,7 @@ sym(vp8_get8x8var_sse2):
; unsigned int *sumsquared;;
;
;)
-global sym(vp8_filter_block2d_bil_var_sse2)
+global sym(vp8_filter_block2d_bil_var_sse2) PRIVATE
sym(vp8_filter_block2d_bil_var_sse2):
push rbp
mov rbp, rsp
@@ -690,7 +690,7 @@ filter_block2d_bil_variance:
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_half_horiz_vert_variance8x_h_sse2)
+global sym(vp8_half_horiz_vert_variance8x_h_sse2) PRIVATE
sym(vp8_half_horiz_vert_variance8x_h_sse2):
push rbp
mov rbp, rsp
@@ -812,7 +812,7 @@ vp8_half_horiz_vert_variance8x_h_1:
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_half_horiz_vert_variance16x_h_sse2)
+global sym(vp8_half_horiz_vert_variance16x_h_sse2) PRIVATE
sym(vp8_half_horiz_vert_variance16x_h_sse2):
push rbp
mov rbp, rsp
@@ -928,7 +928,7 @@ vp8_half_horiz_vert_variance16x_h_1:
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_half_vert_variance8x_h_sse2)
+global sym(vp8_half_vert_variance8x_h_sse2) PRIVATE
sym(vp8_half_vert_variance8x_h_sse2):
push rbp
mov rbp, rsp
@@ -1035,7 +1035,7 @@ vp8_half_vert_variance8x_h_1:
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_half_vert_variance16x_h_sse2)
+global sym(vp8_half_vert_variance16x_h_sse2) PRIVATE
sym(vp8_half_vert_variance16x_h_sse2):
push rbp
mov rbp, rsp
@@ -1143,7 +1143,7 @@ vp8_half_vert_variance16x_h_1:
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_half_horiz_variance8x_h_sse2)
+global sym(vp8_half_horiz_variance8x_h_sse2) PRIVATE
sym(vp8_half_horiz_variance8x_h_sse2):
push rbp
mov rbp, rsp
@@ -1248,7 +1248,7 @@ vp8_half_horiz_variance8x_h_1:
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_half_horiz_variance16x_h_sse2)
+global sym(vp8_half_horiz_variance16x_h_sse2) PRIVATE
sym(vp8_half_horiz_variance16x_h_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/variance_impl_ssse3.asm b/vp8/common/x86/variance_impl_ssse3.asm
index 97e8b0e..686b4a9 100644
--- a/vp8/common/x86/variance_impl_ssse3.asm
+++ b/vp8/common/x86/variance_impl_ssse3.asm
@@ -29,7 +29,7 @@
;)
;Note: The filter coefficient at offset=0 is 128. Since the second register
;for Pmaddubsw is signed bytes, we must calculate zero offset seperately.
-global sym(vp8_filter_block2d_bil_var_ssse3)
+global sym(vp8_filter_block2d_bil_var_ssse3) PRIVATE
sym(vp8_filter_block2d_bil_var_ssse3):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/variance_sse2.c b/vp8/common/x86/variance_sse2.c
index 2769a30..afd6429 100644
--- a/vp8/common/x86/variance_sse2.c
+++ b/vp8/common/x86/variance_sse2.c
@@ -332,8 +332,9 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
unsigned int xxsum0, xxsum1;
- // note we could avoid these if statements if the calling function
- // just called the appropriate functions inside.
+ /* note we could avoid these if statements if the calling function
+ * just called the appropriate functions inside.
+ */
if (xoffset == 4 && yoffset == 0)
{
vp8_half_horiz_variance16x_h_sse2(
diff --git a/vp8/common/x86/variance_ssse3.c b/vp8/common/x86/variance_ssse3.c
index 1be0d92..ba2055c 100644
--- a/vp8/common/x86/variance_ssse3.c
+++ b/vp8/common/x86/variance_ssse3.c
@@ -79,8 +79,9 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
int xsum0;
unsigned int xxsum0;
- // note we could avoid these if statements if the calling function
- // just called the appropriate functions inside.
+ /* note we could avoid these if statements if the calling function
+ * just called the appropriate functions inside.
+ */
if (xoffset == 4 && yoffset == 0)
{
vp8_half_horiz_variance16x_h_sse2(
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 23a7fdc..3437a23 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -438,19 +438,35 @@ void vp8_sixtap_predict16x16_ssse3
{
if (yoffset)
{
- vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset);
- vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset);
+ vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line, FData2,
+ 16, 21, xoffset);
+ vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch,
+ 16, yoffset);
}
else
{
/* First-pass only */
- vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
+ vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 16, xoffset);
}
}
else
{
- /* Second-pass only */
- vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
+ if (yoffset)
+ {
+ /* Second-pass only */
+ vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 16, yoffset);
+ }
+ else
+ {
+ /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
+ * yoffset==0) case correctly. Add copy function here to guarantee
+ * six-tap function handles all possible offsets. */
+ vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
+ }
}
}
@@ -470,18 +486,34 @@ void vp8_sixtap_predict8x8_ssse3
{
if (yoffset)
{
- vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset);
- vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
+ vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line, FData2,
+ 8, 13, xoffset);
+ vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
+ 8, yoffset);
}
else
{
- vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset);
+ vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 8, xoffset);
}
}
else
{
- /* Second-pass only */
- vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
+ if (yoffset)
+ {
+ /* Second-pass only */
+ vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 8, yoffset);
+ }
+ else
+ {
+ /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
+ * yoffset==0) case correctly. Add copy function here to guarantee
+ * six-tap function handles all possible offsets. */
+ vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
+ }
}
}
@@ -502,19 +534,35 @@ void vp8_sixtap_predict8x4_ssse3
{
if (yoffset)
{
- vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset);
- vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
+ vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line, FData2,
+ 8, 9, xoffset);
+ vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
+ 4, yoffset);
}
else
{
/* First-pass only */
- vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
+ vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, xoffset);
}
}
else
{
- /* Second-pass only */
- vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
+ if (yoffset)
+ {
+ /* Second-pass only */
+ vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, yoffset);
+ }
+ else
+ {
+ /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
+ * yoffset==0) case correctly. Add copy function here to guarantee
+ * six-tap function handles all possible offsets. */
+ vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
+ }
}
}
@@ -534,19 +582,48 @@ void vp8_sixtap_predict4x4_ssse3
{
if (yoffset)
{
- vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset);
- vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
+ vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ FData2, 4, 9, xoffset);
+ vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch,
+ 4, yoffset);
}
else
{
- vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
+ vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, xoffset);
}
}
else
{
- vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
+ if (yoffset)
+ {
+ vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, yoffset);
+ }
+ else
+ {
+ /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
+ * yoffset==0) case correctly. Add copy function here to guarantee
+ * six-tap function handles all possible offsets. */
+ int r;
+
+ for (r = 0; r < 4; r++)
+ {
+ #if !(CONFIG_FAST_UNALIGNED)
+ dst_ptr[0] = src_ptr[0];
+ dst_ptr[1] = src_ptr[1];
+ dst_ptr[2] = src_ptr[2];
+ dst_ptr[3] = src_ptr[3];
+ #else
+ *(uint32_t *)dst_ptr = *(uint32_t *)src_ptr ;
+ #endif
+ dst_ptr += dst_pitch;
+ src_ptr += src_pixels_per_line;
+ }
+ }
}
-
}
#endif
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index 880c185..1a08c05 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -55,7 +55,7 @@ void vp8dx_bool_decoder_fill(BOOL_DECODER *br);
int loop_end, x; \
size_t bits_left = ((_bufend)-(_bufptr))*CHAR_BIT; \
\
- x = shift + CHAR_BIT - bits_left; \
+ x = (int)(shift + CHAR_BIT - bits_left); \
loop_end = 0; \
if(x >= 0) \
{ \
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 51e2420..8027a07 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -48,11 +48,11 @@ static MB_PREDICTION_MODE read_uv_mode(vp8_reader *bc, const vp8_prob *p)
static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi)
{
- vp8_reader *const bc = & pbi->bc;
+ vp8_reader *const bc = & pbi->mbc[8];
const int mis = pbi->common.mode_info_stride;
mi->mbmi.ref_frame = INTRA_FRAME;
- mi->mbmi.mode = read_kf_ymode(bc, pbi->common.kf_ymode_prob);
+ mi->mbmi.mode = read_kf_ymode(bc, vp8_kf_ymode_prob);
if (mi->mbmi.mode == B_PRED)
{
@@ -65,12 +65,12 @@ static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi)
const B_PREDICTION_MODE L = left_block_mode(mi, i);
mi->bmi[i].as_mode =
- read_bmode(bc, pbi->common.kf_bmode_prob [A] [L]);
+ read_bmode(bc, vp8_kf_bmode_prob [A] [L]);
}
while (++i < 16);
}
- mi->mbmi.uv_mode = read_uv_mode(bc, pbi->common.kf_uv_mode_prob);
+ mi->mbmi.uv_mode = read_uv_mode(bc, vp8_kf_uv_mode_prob);
}
static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)
@@ -150,7 +150,7 @@ static const unsigned char mbsplit_fill_offset[4][16] = {
static void mb_mode_mv_init(VP8D_COMP *pbi)
{
- vp8_reader *const bc = & pbi->bc;
+ vp8_reader *const bc = & pbi->mbc[8];
MV_CONTEXT *const mvc = pbi->common.fc.mvc;
#if CONFIG_ERROR_CONCEALMENT
@@ -159,6 +159,9 @@ static void mb_mode_mv_init(VP8D_COMP *pbi)
* outside the frame. */
pbi->mvs_corrupt_from_mb = UINT_MAX;
#endif
+ /* Read the mb_no_coeff_skip flag */
+ pbi->common.mb_no_coeff_skip = (int)vp8_read_bit(bc);
+
pbi->prob_skip_false = 0;
if (pbi->common.mb_no_coeff_skip)
pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8);
@@ -293,26 +296,24 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi,
blockmv.as_mv.row += best_mv.as_mv.row;
blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) << 1;
blockmv.as_mv.col += best_mv.as_mv.col;
-
- mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv,
- mb_to_left_edge,
- mb_to_right_edge,
- mb_to_top_edge,
- mb_to_bottom_edge);
}
}
else
{
blockmv.as_int = abovemv.as_int;
- mbmi->need_to_clamp_mvs |= above_mb->mbmi.need_to_clamp_mvs;
}
}
else
{
blockmv.as_int = leftmv.as_int;
- mbmi->need_to_clamp_mvs |= left_mb->mbmi.need_to_clamp_mvs;
}
+ mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv,
+ mb_to_left_edge,
+ mb_to_right_edge,
+ mb_to_top_edge,
+ mb_to_bottom_edge);
+
{
/* Fill (uniform) modes, mvs of jth subset.
Must do it here because ensuing subsets can
@@ -337,7 +338,7 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi,
static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi)
{
- vp8_reader *const bc = & pbi->bc;
+ vp8_reader *const bc = & pbi->mbc[8];
mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra);
if (mbmi->ref_frame) /* inter MB */
{
@@ -595,14 +596,14 @@ static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi,
* By default on a key frame reset all MBs to segment 0
*/
if (pbi->mb.update_mb_segmentation_map)
- read_mb_features(&pbi->bc, &mi->mbmi, &pbi->mb);
+ read_mb_features(&pbi->mbc[8], &mi->mbmi, &pbi->mb);
else if(pbi->common.frame_type == KEY_FRAME)
mi->mbmi.segment_id = 0;
/* Read the macroblock coeff skip flag if this feature is in use,
* else default to 0 */
if (pbi->common.mb_no_coeff_skip)
- mi->mbmi.mb_skip_coeff = vp8_read(&pbi->bc, pbi->prob_skip_false);
+ mi->mbmi.mb_skip_coeff = vp8_read(&pbi->mbc[8], pbi->prob_skip_false);
else
mi->mbmi.mb_skip_coeff = 0;
@@ -644,7 +645,8 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
#if CONFIG_ERROR_CONCEALMENT
/* look for corruption. set mvs_corrupt_from_mb to the current
* mb_num if the frame is corrupt from this macroblock. */
- if (vp8dx_bool_error(&pbi->bc) && mb_num < pbi->mvs_corrupt_from_mb)
+ if (vp8dx_bool_error(&pbi->mbc[8]) && mb_num <
+ (int)pbi->mvs_corrupt_from_mb)
{
pbi->mvs_corrupt_from_mb = mb_num;
/* no need to continue since the partition is corrupt from
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 62a068b..a4a00f6 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -177,7 +177,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
{
short *DQC = xd->dequant_y1;
int dst_stride = xd->dst.y_stride;
- unsigned char *base_dst = xd->dst.y_buffer;
/* clear out residual eob info */
if(xd->mode_info_context->mbmi.mb_skip_coeff)
@@ -188,38 +187,29 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
for (i = 0; i < 16; i++)
{
BLOCKD *b = &xd->block[i];
- int b_mode = xd->mode_info_context->bmi[i].as_mode;
- unsigned char *yabove;
- unsigned char *yleft;
- int left_stride;
- unsigned char top_left;
-
- yabove = base_dst + b->offset - dst_stride;
- yleft = base_dst + b->offset - 1;
- left_stride = dst_stride;
- top_left = yabove[-1];
-
- // vp8_intra4x4_predict (base_dst + b->offset, dst_stride, b_mode,
- // base_dst + b->offset, dst_stride );
- vp8_intra4x4_predict_d_c(yabove, yleft, left_stride,
- b_mode,
- base_dst + b->offset, dst_stride,
- top_left);
+ unsigned char *dst = xd->dst.y_buffer + b->offset;
+ B_PREDICTION_MODE b_mode =
+ xd->mode_info_context->bmi[i].as_mode;
+ unsigned char *Above = dst - dst_stride;
+ unsigned char *yleft = dst - 1;
+ int left_stride = dst_stride;
+ unsigned char top_left = Above[-1];
+
+ vp8_intra4x4_predict(Above, yleft, left_stride, b_mode,
+ dst, dst_stride, top_left);
if (xd->eobs[i])
{
if (xd->eobs[i] > 1)
{
- vp8_dequant_idct_add
- (b->qcoeff, DQC,
- base_dst + b->offset, dst_stride);
+ vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride);
}
else
{
vp8_dc_only_idct_add
(b->qcoeff[0] * DQC[0],
- base_dst + b->offset, dst_stride,
- base_dst + b->offset, dst_stride);
+ dst, dst_stride,
+ dst, dst_stride);
((int *)b->qcoeff)[0] = 0;
}
}
@@ -317,48 +307,253 @@ static int get_delta_q(vp8_reader *bc, int prev, int *q_update)
FILE *vpxlog = 0;
#endif
+static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf)
+{
+ int i;
+ unsigned char *src_ptr1;
+ unsigned char *dest_ptr1;
+
+ unsigned int Border;
+ int plane_stride;
+
+ /***********/
+ /* Y Plane */
+ /***********/
+ Border = ybf->border;
+ plane_stride = ybf->y_stride;
+ src_ptr1 = ybf->y_buffer - Border;
+ dest_ptr1 = src_ptr1 - (Border * plane_stride);
+
+ for (i = 0; i < (int)Border; i++)
+ {
+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
+ dest_ptr1 += plane_stride;
+ }
+
+
+ /***********/
+ /* U Plane */
+ /***********/
+ plane_stride = ybf->uv_stride;
+ Border /= 2;
+ src_ptr1 = ybf->u_buffer - Border;
+ dest_ptr1 = src_ptr1 - (Border * plane_stride);
+
+ for (i = 0; i < (int)(Border); i++)
+ {
+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
+ dest_ptr1 += plane_stride;
+ }
+
+ /***********/
+ /* V Plane */
+ /***********/
+
+ src_ptr1 = ybf->v_buffer - Border;
+ dest_ptr1 = src_ptr1 - (Border * plane_stride);
+
+ for (i = 0; i < (int)(Border); i++)
+ {
+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
+ dest_ptr1 += plane_stride;
+ }
+}
+
+static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf)
+{
+ int i;
+ unsigned char *src_ptr1, *src_ptr2;
+ unsigned char *dest_ptr2;
+
+ unsigned int Border;
+ int plane_stride;
+ int plane_height;
+
+ /***********/
+ /* Y Plane */
+ /***********/
+ Border = ybf->border;
+ plane_stride = ybf->y_stride;
+ plane_height = ybf->y_height;
+
+ src_ptr1 = ybf->y_buffer - Border;
+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
+ dest_ptr2 = src_ptr2 + plane_stride;
+
+ for (i = 0; i < (int)Border; i++)
+ {
+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
+ dest_ptr2 += plane_stride;
+ }
+
+
+ /***********/
+ /* U Plane */
+ /***********/
+ plane_stride = ybf->uv_stride;
+ plane_height = ybf->uv_height;
+ Border /= 2;
+
+ src_ptr1 = ybf->u_buffer - Border;
+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
+ dest_ptr2 = src_ptr2 + plane_stride;
+
+ for (i = 0; i < (int)(Border); i++)
+ {
+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
+ dest_ptr2 += plane_stride;
+ }
+
+ /***********/
+ /* V Plane */
+ /***********/
+
+ src_ptr1 = ybf->v_buffer - Border;
+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
+ dest_ptr2 = src_ptr2 + plane_stride;
+
+ for (i = 0; i < (int)(Border); i++)
+ {
+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
+ dest_ptr2 += plane_stride;
+ }
+}
+
+static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf,
+ unsigned char *y_src,
+ unsigned char *u_src,
+ unsigned char *v_src)
+{
+ int i;
+ unsigned char *src_ptr1, *src_ptr2;
+ unsigned char *dest_ptr1, *dest_ptr2;
+
+ unsigned int Border;
+ int plane_stride;
+ int plane_height;
+ int plane_width;
+
+ /***********/
+ /* Y Plane */
+ /***********/
+ Border = ybf->border;
+ plane_stride = ybf->y_stride;
+ plane_height = 16;
+ plane_width = ybf->y_width;
+
+ /* copy the left and right most columns out */
+ src_ptr1 = y_src;
+ src_ptr2 = src_ptr1 + plane_width - 1;
+ dest_ptr1 = src_ptr1 - Border;
+ dest_ptr2 = src_ptr2 + 1;
+
+ for (i = 0; i < plane_height; i++)
+ {
+ vpx_memset(dest_ptr1, src_ptr1[0], Border);
+ vpx_memset(dest_ptr2, src_ptr2[0], Border);
+ src_ptr1 += plane_stride;
+ src_ptr2 += plane_stride;
+ dest_ptr1 += plane_stride;
+ dest_ptr2 += plane_stride;
+ }
+
+ /***********/
+ /* U Plane */
+ /***********/
+ plane_stride = ybf->uv_stride;
+ plane_height = 8;
+ plane_width = ybf->uv_width;
+ Border /= 2;
+
+ /* copy the left and right most columns out */
+ src_ptr1 = u_src;
+ src_ptr2 = src_ptr1 + plane_width - 1;
+ dest_ptr1 = src_ptr1 - Border;
+ dest_ptr2 = src_ptr2 + 1;
+
+ for (i = 0; i < plane_height; i++)
+ {
+ vpx_memset(dest_ptr1, src_ptr1[0], Border);
+ vpx_memset(dest_ptr2, src_ptr2[0], Border);
+ src_ptr1 += plane_stride;
+ src_ptr2 += plane_stride;
+ dest_ptr1 += plane_stride;
+ dest_ptr2 += plane_stride;
+ }
+
+ /***********/
+ /* V Plane */
+ /***********/
+
+ /* copy the left and right most columns out */
+ src_ptr1 = v_src;
+ src_ptr2 = src_ptr1 + plane_width - 1;
+ dest_ptr1 = src_ptr1 - Border;
+ dest_ptr2 = src_ptr2 + 1;
+
+ for (i = 0; i < plane_height; i++)
+ {
+ vpx_memset(dest_ptr1, src_ptr1[0], Border);
+ vpx_memset(dest_ptr2, src_ptr2[0], Border);
+ src_ptr1 += plane_stride;
+ src_ptr2 += plane_stride;
+ dest_ptr1 += plane_stride;
+ dest_ptr2 += plane_stride;
+ }
+}
+
static void decode_mb_rows(VP8D_COMP *pbi)
{
VP8_COMMON *const pc = & pbi->common;
MACROBLOCKD *const xd = & pbi->mb;
+ MODE_INFO *lf_mic = xd->mode_info_context;
+
int ibc = 0;
int num_part = 1 << pc->multi_token_partition;
int recon_yoffset, recon_uvoffset;
int mb_row, mb_col;
int mb_idx = 0;
- int dst_fb_idx = pc->new_fb_idx;
- int recon_y_stride = pc->yv12_fb[dst_fb_idx].y_stride;
- int recon_uv_stride = pc->yv12_fb[dst_fb_idx].uv_stride;
+
+ YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
+
+ int recon_y_stride = yv12_fb_new->y_stride;
+ int recon_uv_stride = yv12_fb_new->uv_stride;
unsigned char *ref_buffer[MAX_REF_FRAMES][3];
unsigned char *dst_buffer[3];
+ unsigned char *lf_dst[3];
+ unsigned char *eb_dst[3];
int i;
- int ref_fb_index[MAX_REF_FRAMES];
int ref_fb_corrupted[MAX_REF_FRAMES];
ref_fb_corrupted[INTRA_FRAME] = 0;
- ref_fb_index[LAST_FRAME] = pc->lst_fb_idx;
- ref_fb_index[GOLDEN_FRAME] = pc->gld_fb_idx;
- ref_fb_index[ALTREF_FRAME] = pc->alt_fb_idx;
-
for(i = 1; i < MAX_REF_FRAMES; i++)
{
- ref_buffer[i][0] = pc->yv12_fb[ref_fb_index[i]].y_buffer;
- ref_buffer[i][1] = pc->yv12_fb[ref_fb_index[i]].u_buffer;
- ref_buffer[i][2] = pc->yv12_fb[ref_fb_index[i]].v_buffer;
+ YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i];
+
+ ref_buffer[i][0] = this_fb->y_buffer;
+ ref_buffer[i][1] = this_fb->u_buffer;
+ ref_buffer[i][2] = this_fb->v_buffer;
- ref_fb_corrupted[i] = pc->yv12_fb[ref_fb_index[i]].corrupted;
+ ref_fb_corrupted[i] = this_fb->corrupted;
}
- dst_buffer[0] = pc->yv12_fb[dst_fb_idx].y_buffer;
- dst_buffer[1] = pc->yv12_fb[dst_fb_idx].u_buffer;
- dst_buffer[2] = pc->yv12_fb[dst_fb_idx].v_buffer;
+ /* Set up the buffer pointers */
+ eb_dst[0] = lf_dst[0] = dst_buffer[0] = yv12_fb_new->y_buffer;
+ eb_dst[1] = lf_dst[1] = dst_buffer[1] = yv12_fb_new->u_buffer;
+ eb_dst[2] = lf_dst[2] = dst_buffer[2] = yv12_fb_new->v_buffer;
xd->up_available = 0;
+ /* Initialize the loop filter for this frame. */
+ if(pc->filter_level)
+ vp8_loop_filter_frame_init(pc, xd, pc->filter_level);
+
+ vp8_setup_intra_recon_top_line(yv12_fb_new);
+
/* Decode the individual macro block */
for (mb_row = 0; mb_row < pc->mb_rows; mb_row++)
{
@@ -395,10 +590,14 @@ static void decode_mb_rows(VP8D_COMP *pbi)
xd->recon_above[1] -= xd->dst.uv_stride;
xd->recon_above[2] -= xd->dst.uv_stride;
- //TODO: move to outside row loop
+ /* TODO: move to outside row loop */
xd->recon_left_stride[0] = xd->dst.y_stride;
xd->recon_left_stride[1] = xd->dst.uv_stride;
+ setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1],
+ xd->recon_left[2], xd->dst.y_stride,
+ xd->dst.uv_stride);
+
for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
{
/* Distance of Mb to the various image edges.
@@ -460,26 +659,103 @@ static void decode_mb_rows(VP8D_COMP *pbi)
xd->recon_left[1] += 8;
xd->recon_left[2] += 8;
-
recon_yoffset += 16;
recon_uvoffset += 8;
++xd->mode_info_context; /* next mb */
xd->above_context++;
-
}
/* adjust to the next row of mbs */
- vp8_extend_mb_row(
- &pc->yv12_fb[dst_fb_idx],
- xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
- );
+ vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16,
+ xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
++xd->mode_info_context; /* skip prediction column */
xd->up_available = 1;
+ if(pc->filter_level)
+ {
+ if(mb_row > 0)
+ {
+ if (pc->filter_type == NORMAL_LOOPFILTER)
+ vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1,
+ recon_y_stride, recon_uv_stride,
+ lf_dst[0], lf_dst[1], lf_dst[2]);
+ else
+ vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1,
+ recon_y_stride, recon_uv_stride,
+ lf_dst[0], lf_dst[1], lf_dst[2]);
+
+ if(mb_row > 1)
+ {
+ yv12_extend_frame_left_right_c(yv12_fb_new,
+ eb_dst[0],
+ eb_dst[1],
+ eb_dst[2]);
+
+ eb_dst[0] += recon_y_stride * 16;
+ eb_dst[1] += recon_uv_stride * 8;
+ eb_dst[2] += recon_uv_stride * 8;
+
+ if(mb_row == 2)
+ yv12_extend_frame_top_c(yv12_fb_new);
+
+ }
+
+ lf_dst[0] += recon_y_stride * 16;
+ lf_dst[1] += recon_uv_stride * 8;
+ lf_dst[2] += recon_uv_stride * 8;
+ lf_mic += pc->mb_cols;
+ lf_mic++; /* Skip border mb */
+ }
+ }
+ else
+ {
+ if(mb_row > 0)
+ {
+ /**/
+ yv12_extend_frame_left_right_c(yv12_fb_new,
+ eb_dst[0],
+ eb_dst[1],
+ eb_dst[2]);
+
+ eb_dst[0] += recon_y_stride * 16;
+ eb_dst[1] += recon_uv_stride * 8;
+ eb_dst[2] += recon_uv_stride * 8;
+
+ if(mb_row == 1)
+ yv12_extend_frame_top_c(yv12_fb_new);
+ }
+ }
+ }
+
+ if(pc->filter_level)
+ {
+ if (pc->filter_type == NORMAL_LOOPFILTER)
+ vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, recon_y_stride,
+ recon_uv_stride, lf_dst[0], lf_dst[1],
+ lf_dst[2]);
+ else
+ vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, recon_y_stride,
+ recon_uv_stride, lf_dst[0], lf_dst[1],
+ lf_dst[2]);
+
+ yv12_extend_frame_left_right_c(yv12_fb_new,
+ eb_dst[0],
+ eb_dst[1],
+ eb_dst[2]);
+ eb_dst[0] += recon_y_stride * 16;
+ eb_dst[1] += recon_uv_stride * 8;
+ eb_dst[2] += recon_uv_stride * 8;
}
+ yv12_extend_frame_left_right_c(yv12_fb_new,
+ eb_dst[0],
+ eb_dst[1],
+ eb_dst[2]);
+
+ yv12_extend_frame_bottom_c(yv12_fb_new);
+
}
static unsigned int read_partition_size(const unsigned char *cx_size)
@@ -519,13 +795,13 @@ static unsigned int read_available_partition_size(
if (read_is_valid(partition_size_ptr, 3, first_fragment_end))
partition_size = read_partition_size(partition_size_ptr);
else if (pbi->ec_active)
- partition_size = bytes_left;
+ partition_size = (unsigned int)bytes_left;
else
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
"Truncated partition size data");
}
else
- partition_size = bytes_left;
+ partition_size = (unsigned int)bytes_left;
/* Validate the calculated partition length. If the buffer
* described by the partition can't be fully read, then restrict
@@ -534,7 +810,7 @@ static unsigned int read_available_partition_size(
if (!read_is_valid(fragment_start, partition_size, fragment_end))
{
if (pbi->ec_active)
- partition_size = bytes_left;
+ partition_size = (unsigned int)bytes_left;
else
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
"Truncated packet or corrupt partition "
@@ -547,24 +823,18 @@ static unsigned int read_available_partition_size(
static void setup_token_decoder(VP8D_COMP *pbi,
const unsigned char* token_part_sizes)
{
- vp8_reader *bool_decoder = &pbi->bc2;
+ vp8_reader *bool_decoder = &pbi->mbc[0];
unsigned int partition_idx;
- int fragment_idx;
- int num_token_partitions;
+ unsigned int fragment_idx;
+ unsigned int num_token_partitions;
const unsigned char *first_fragment_end = pbi->fragments[0] +
pbi->fragment_sizes[0];
TOKEN_PARTITION multi_token_partition =
- (TOKEN_PARTITION)vp8_read_literal(&pbi->bc, 2);
- if (!vp8dx_bool_error(&pbi->bc))
+ (TOKEN_PARTITION)vp8_read_literal(&pbi->mbc[8], 2);
+ if (!vp8dx_bool_error(&pbi->mbc[8]))
pbi->common.multi_token_partition = multi_token_partition;
num_token_partitions = 1 << pbi->common.multi_token_partition;
- if (num_token_partitions > 1)
- {
- CHECK_MEM_ERROR(pbi->mbc, vpx_malloc(num_token_partitions *
- sizeof(vp8_reader)));
- bool_decoder = pbi->mbc;
- }
/* Check for partitions within the fragments and unpack the fragments
* so that each fragment pointer points to its corresponding partition. */
@@ -580,10 +850,10 @@ static void setup_token_decoder(VP8D_COMP *pbi,
/* Size of first partition + token partition sizes element */
ptrdiff_t ext_first_part_size = token_part_sizes -
pbi->fragments[0] + 3 * (num_token_partitions - 1);
- fragment_size -= ext_first_part_size;
+ fragment_size -= (unsigned int)ext_first_part_size;
if (fragment_size > 0)
{
- pbi->fragment_sizes[0] = ext_first_part_size;
+ pbi->fragment_sizes[0] = (unsigned int)ext_first_part_size;
/* The fragment contains an additional partition. Move to
* next. */
fragment_idx++;
@@ -602,8 +872,8 @@ static void setup_token_decoder(VP8D_COMP *pbi,
fragment_end,
fragment_idx - 1,
num_token_partitions);
- pbi->fragment_sizes[fragment_idx] = partition_size;
- fragment_size -= partition_size;
+ pbi->fragment_sizes[fragment_idx] = (unsigned int)partition_size;
+ fragment_size -= (unsigned int)partition_size;
assert(fragment_idx <= num_token_partitions);
if (fragment_size > 0)
{
@@ -637,16 +907,6 @@ static void setup_token_decoder(VP8D_COMP *pbi,
#endif
}
-static void stop_token_decoder(VP8D_COMP *pbi)
-{
- VP8_COMMON *pc = &pbi->common;
-
- if (pc->multi_token_partition != ONE_PARTITION)
- {
- vpx_free(pbi->mbc);
- pbi->mbc = NULL;
- }
-}
static void init_frame(VP8D_COMP *pbi)
{
@@ -661,7 +921,6 @@ static void init_frame(VP8D_COMP *pbi)
vp8_init_mbmode_probs(pc);
vp8_default_coef_probs(pc);
- vp8_kf_default_bmode_probs(pc->kf_bmode_prob);
/* reset the segment feature data to 0 with delta coding (Default state). */
vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
@@ -685,13 +944,8 @@ static void init_frame(VP8D_COMP *pbi)
}
else
{
- if (!pc->use_bilinear_mc_filter)
- pc->mcomp_filter_type = SIXTAP;
- else
- pc->mcomp_filter_type = BILINEAR;
-
/* To enable choice of different interploation filters */
- if (pc->mcomp_filter_type == SIXTAP)
+ if (!pc->use_bilinear_mc_filter)
{
xd->subpixel_predict = vp8_sixtap_predict4x4;
xd->subpixel_predict8x4 = vp8_sixtap_predict8x4;
@@ -725,7 +979,7 @@ static void init_frame(VP8D_COMP *pbi)
int vp8_decode_frame(VP8D_COMP *pbi)
{
- vp8_reader *const bc = & pbi->bc;
+ vp8_reader *const bc = & pbi->mbc[8];
VP8_COMMON *const pc = & pbi->common;
MACROBLOCKD *const xd = & pbi->mb;
const unsigned char *data = pbi->fragments[0];
@@ -737,9 +991,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
int corrupt_tokens = 0;
int prev_independent_partitions = pbi->independent_partitions;
+ YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
+
/* start with no corruption of current frame */
xd->corrupted = 0;
- pc->yv12_fb[pc->new_fb_idx].corrupted = 0;
+ yv12_fb_new->corrupted = 0;
if (data_end - data < 3)
{
@@ -774,11 +1030,9 @@ int vp8_decode_frame(VP8D_COMP *pbi)
vp8_setup_version(pc);
+
if (pc->frame_type == KEY_FRAME)
{
- const int Width = pc->Width;
- const int Height = pc->Height;
-
/* vet via sync code */
/* When error concealment is enabled we should only check the sync
* code if we have enough bits available
@@ -803,56 +1057,21 @@ int vp8_decode_frame(VP8D_COMP *pbi)
}
data += 7;
- if (Width != pc->Width || Height != pc->Height)
- {
- int prev_mb_rows = pc->mb_rows;
-
- if (pc->Width <= 0)
- {
- pc->Width = Width;
- vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
- "Invalid frame width");
- }
-
- if (pc->Height <= 0)
- {
- pc->Height = Height;
- vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
- "Invalid frame height");
- }
-
- if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height))
- vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate frame buffers");
-
-#if CONFIG_ERROR_CONCEALMENT
- pbi->overlaps = NULL;
- if (pbi->ec_enabled)
- {
- if (vp8_alloc_overlap_lists(pbi))
- vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate overlap lists "
- "for error concealment");
- }
-#endif
-
-#if CONFIG_MULTITHREAD
- if (pbi->b_multithreaded_rd)
- vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows);
-#endif
- }
+ }
+ else
+ {
+ vpx_memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
+ vpx_memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
}
}
-
- if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME) ||
- pc->Width == 0 || pc->Height == 0)
+ if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME))
{
return -1;
}
init_frame(pbi);
- if (vp8dx_start_decode(bc, data, data_end - data))
+ if (vp8dx_start_decode(bc, data, (unsigned int)(data_end - data)))
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder 0");
if (pc->frame_type == KEY_FRAME) {
@@ -961,7 +1180,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
setup_token_decoder(pbi, data + first_partition_length_in_bytes);
- xd->current_bc = &pbi->bc2;
+ xd->current_bc = &pbi->mbc[0];
/* Read the default quantizers. */
{
@@ -1094,26 +1313,9 @@ int vp8_decode_frame(VP8D_COMP *pbi)
}
}
- vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG));
- vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG));
-
- /* set up frame new frame for intra coded blocks */
-#if CONFIG_MULTITHREAD
- if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level))
-#endif
- vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]);
-
- vp8_setup_block_dptrs(xd);
-
- vp8_build_block_doffsets(xd);
-
/* clear out the coeff buffer */
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
- /* Read the mb_no_coeff_skip flag */
- pc->mb_no_coeff_skip = (int)vp8_read_bit(bc);
-
-
vp8_decode_mode_mvs(pbi);
#if CONFIG_ERROR_CONCEALMENT
@@ -1132,9 +1334,9 @@ int vp8_decode_frame(VP8D_COMP *pbi)
#if CONFIG_MULTITHREAD
if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION)
{
- int i;
+ unsigned int i;
vp8mt_decode_mb_rows(pbi, xd);
- vp8_yv12_extend_frame_borders(&pc->yv12_fb[pc->new_fb_idx]); /*cm->frame_to_show);*/
+ vp8_yv12_extend_frame_borders(yv12_fb_new);
for (i = 0; i < pbi->decoding_thread_count; ++i)
corrupt_tokens |= pbi->mb_row_di[i].mbd.corrupted;
}
@@ -1145,18 +1347,16 @@ int vp8_decode_frame(VP8D_COMP *pbi)
corrupt_tokens |= xd->corrupted;
}
- stop_token_decoder(pbi);
-
/* Collect information about decoder corruption. */
/* 1. Check first boolean decoder for errors. */
- pc->yv12_fb[pc->new_fb_idx].corrupted = vp8dx_bool_error(bc);
+ yv12_fb_new->corrupted = vp8dx_bool_error(bc);
/* 2. Check the macroblock information */
- pc->yv12_fb[pc->new_fb_idx].corrupted |= corrupt_tokens;
+ yv12_fb_new->corrupted |= corrupt_tokens;
if (!pbi->decoded_key_frame)
{
if (pc->frame_type == KEY_FRAME &&
- !pc->yv12_fb[pc->new_fb_idx].corrupted)
+ !yv12_fb_new->corrupted)
pbi->decoded_key_frame = 1;
else
vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME,
@@ -1165,13 +1365,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
/* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); */
- /* If this was a kf or Gf note the Q used */
- if ((pc->frame_type == KEY_FRAME) ||
- pc->refresh_golden_frame || pc->refresh_alt_ref_frame)
- {
- pc->last_kf_gf_q = pc->base_qindex;
- }
-
if (pc->refresh_entropy_probs == 0)
{
vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc));
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 0c39848..452ff6c 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -53,7 +53,8 @@ static const uint8_t kZigzag[16] = {
#define NUM_PROBAS 11
#define NUM_CTX 3
-typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting
+/* for const-casting */
+typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS];
static int GetSigned(BOOL_DECODER *br, int value_to_sign)
{
diff --git a/vp8/decoder/error_concealment.c b/vp8/decoder/error_concealment.c
index 7750728..8b2e32b 100644
--- a/vp8/decoder/error_concealment.c
+++ b/vp8/decoder/error_concealment.c
@@ -51,12 +51,13 @@ int vp8_alloc_overlap_lists(VP8D_COMP *pbi)
vpx_free(pbi->overlaps);
pbi->overlaps = NULL;
}
+
pbi->overlaps = vpx_calloc(pbi->common.mb_rows * pbi->common.mb_cols,
sizeof(MB_OVERLAP));
+
if (pbi->overlaps == NULL)
return -1;
- vpx_memset(pbi->overlaps, 0,
- sizeof(MB_OVERLAP) * pbi->common.mb_rows * pbi->common.mb_cols);
+
return 0;
}
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index c59ce25..8d6871b 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -80,6 +80,7 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
#if CONFIG_ERROR_CONCEALMENT
pbi->ec_enabled = oxcf->error_concealment;
+ pbi->overlaps = NULL;
#else
pbi->ec_enabled = 0;
#endif
@@ -99,6 +100,8 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
*/
pbi->independent_partitions = 0;
+ vp8_setup_block_dptrs(&pbi->mb);
+
return pbi;
}
@@ -117,21 +120,20 @@ void vp8dx_remove_decompressor(VP8D_COMP *pbi)
vp8_de_alloc_overlap_lists(pbi);
#endif
vp8_remove_common(&pbi->common);
- vpx_free(pbi->mbc);
vpx_free(pbi);
}
-vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
+vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd)
{
VP8_COMMON *cm = &pbi->common;
int ref_fb_idx;
- if (ref_frame_flag == VP8_LAST_FLAG)
+ if (ref_frame_flag == VP8_LAST_FRAME)
ref_fb_idx = cm->lst_fb_idx;
- else if (ref_frame_flag == VP8_GOLD_FLAG)
+ else if (ref_frame_flag == VP8_GOLD_FRAME)
ref_fb_idx = cm->gld_fb_idx;
- else if (ref_frame_flag == VP8_ALT_FLAG)
+ else if (ref_frame_flag == VP8_ALTR_FRAME)
ref_fb_idx = cm->alt_fb_idx;
else{
vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
@@ -153,17 +155,17 @@ vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag,
}
-vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
+vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd)
{
VP8_COMMON *cm = &pbi->common;
int *ref_fb_ptr = NULL;
int free_fb;
- if (ref_frame_flag == VP8_LAST_FLAG)
+ if (ref_frame_flag == VP8_LAST_FRAME)
ref_fb_ptr = &cm->lst_fb_idx;
- else if (ref_frame_flag == VP8_GOLD_FLAG)
+ else if (ref_frame_flag == VP8_GOLD_FRAME)
ref_fb_ptr = &cm->gld_fb_idx;
- else if (ref_frame_flag == VP8_ALT_FLAG)
+ else if (ref_frame_flag == VP8_ALTR_FRAME)
ref_fb_ptr = &cm->alt_fb_idx;
else{
vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
@@ -279,28 +281,22 @@ static int swap_frame_buffers (VP8_COMMON *cm)
return err;
}
-int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsigned char *source, int64_t time_stamp)
+int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size,
+ const uint8_t *source,
+ int64_t time_stamp)
{
#if HAVE_NEON
int64_t dx_store_reg[8];
#endif
VP8_COMMON *cm = &pbi->common;
- int retcode = 0;
-
- /*if(pbi->ready_for_new_data == 0)
- return -1;*/
-
- if (pbi == 0)
- {
- return -1;
- }
+ int retcode = -1;
pbi->common.error.error_code = VPX_CODEC_OK;
if (pbi->num_fragments == 0)
{
/* New frame, reset fragment pointers and sizes */
- vpx_memset(pbi->fragments, 0, sizeof(pbi->fragments));
+ vpx_memset((void*)pbi->fragments, 0, sizeof(pbi->fragments));
vpx_memset(pbi->fragment_sizes, 0, sizeof(pbi->fragment_sizes));
}
if (pbi->input_fragments && !(source == NULL && size == 0))
@@ -381,20 +377,14 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi
cm->new_fb_idx = get_free_fb (cm);
+ /* setup reference frames for vp8_decode_frame */
+ pbi->dec_fb_ref[INTRA_FRAME] = &cm->yv12_fb[cm->new_fb_idx];
+ pbi->dec_fb_ref[LAST_FRAME] = &cm->yv12_fb[cm->lst_fb_idx];
+ pbi->dec_fb_ref[GOLDEN_FRAME] = &cm->yv12_fb[cm->gld_fb_idx];
+ pbi->dec_fb_ref[ALTREF_FRAME] = &cm->yv12_fb[cm->alt_fb_idx];
+
if (setjmp(pbi->common.error.jmp))
{
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_pop_neon(dx_store_reg);
- }
-#endif
- pbi->common.error.setjmp = 0;
-
- pbi->num_fragments = 0;
-
/* We do not know if the missing frame(s) was supposed to update
* any of the reference buffers, but we act conservative and
* mark only the last buffer as corrupted.
@@ -403,7 +393,8 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi
if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
- return -1;
+
+ goto decode_exit;
}
pbi->common.error.setjmp = 1;
@@ -412,68 +403,19 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi
if (retcode < 0)
{
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_pop_neon(dx_store_reg);
- }
-#endif
- pbi->common.error.error_code = VPX_CODEC_ERROR;
- pbi->common.error.setjmp = 0;
- pbi->num_fragments = 0;
if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
- return retcode;
+
+ pbi->common.error.error_code = VPX_CODEC_ERROR;
+ goto decode_exit;
}
-#if CONFIG_MULTITHREAD
- if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION)
- {
- if (swap_frame_buffers (cm))
- {
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_pop_neon(dx_store_reg);
- }
-#endif
- pbi->common.error.error_code = VPX_CODEC_ERROR;
- pbi->common.error.setjmp = 0;
- pbi->num_fragments = 0;
- return -1;
- }
- } else
-#endif
+ if (swap_frame_buffers (cm))
{
- if (swap_frame_buffers (cm))
- {
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_pop_neon(dx_store_reg);
- }
-#endif
- pbi->common.error.error_code = VPX_CODEC_ERROR;
- pbi->common.error.setjmp = 0;
- pbi->num_fragments = 0;
- return -1;
- }
-
- if(cm->filter_level)
- {
- /* Apply the loop filter if appropriate. */
- vp8_loop_filter_frame(cm, &pbi->mb);
- }
- vp8_yv12_extend_frame_borders(cm->frame_to_show);
+ pbi->common.error.error_code = VPX_CODEC_ERROR;
+ goto decode_exit;
}
-
vp8_clear_system_state();
#if CONFIG_ERROR_CONCEALMENT
@@ -498,49 +440,13 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi
}
#endif
- /*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/
-
if (cm->show_frame)
cm->current_video_frame++;
pbi->ready_for_new_data = 0;
pbi->last_time_stamp = time_stamp;
- pbi->num_fragments = 0;
-
-#if 0
- {
- int i;
- int64_t earliest_time = pbi->dr[0].time_stamp;
- int64_t latest_time = pbi->dr[0].time_stamp;
- int64_t time_diff = 0;
- int bytes = 0;
-
- pbi->dr[pbi->common.current_video_frame&0xf].size = pbi->bc.pos + pbi->bc2.pos + 4;;
- pbi->dr[pbi->common.current_video_frame&0xf].time_stamp = time_stamp;
-
- for (i = 0; i < 16; i++)
- {
-
- bytes += pbi->dr[i].size;
-
- if (pbi->dr[i].time_stamp < earliest_time)
- earliest_time = pbi->dr[i].time_stamp;
-
- if (pbi->dr[i].time_stamp > latest_time)
- latest_time = pbi->dr[i].time_stamp;
- }
-
- time_diff = latest_time - earliest_time;
-
- if (time_diff > 0)
- {
- pbi->common.bitrate = 80000.00 * bytes / time_diff ;
- pbi->common.framerate = 160000000.00 / time_diff ;
- }
-
- }
-#endif
+decode_exit:
#if HAVE_NEON
#if CONFIG_RUNTIME_CPU_DETECT
if (cm->cpu_caps & HAS_NEON)
@@ -549,7 +455,9 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi
vp8_pop_neon(dx_store_reg);
}
#endif
+
pbi->common.error.setjmp = 0;
+ pbi->num_fragments = 0;
return retcode;
}
int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags)
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index 97cf0dc..0063beb 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -31,23 +31,18 @@ typedef struct
typedef struct
{
MACROBLOCKD mbd;
- int mb_row;
} MB_ROW_DEC;
-typedef struct
-{
- int64_t time_stamp;
- int size;
-} DATARATE;
-
-
typedef struct VP8D_COMP
{
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
+ YV12_BUFFER_CONFIG *dec_fb_ref[NUM_YV12_BUFFERS];
+
DECLARE_ALIGNED(16, VP8_COMMON, common);
- vp8_reader bc, bc2;
+ /* the last partition will be used for the modes/mvs */
+ vp8_reader mbc[MAX_PARTITIONS];
VP8D_CONFIG oxcf;
@@ -62,7 +57,7 @@ typedef struct VP8D_COMP
volatile int b_multithreaded_rd;
int max_threads;
int current_mb_col_main;
- int decoding_thread_count;
+ unsigned int decoding_thread_count;
int allocated_decoding_thread_count;
int mt_baseline_filter_level[MAX_MB_SEGMENTS];
@@ -85,12 +80,9 @@ typedef struct VP8D_COMP
/* end of threading data */
#endif
- vp8_reader *mbc;
int64_t last_time_stamp;
int ready_for_new_data;
- DATARATE dr[16];
-
vp8_prob prob_intra;
vp8_prob prob_last;
vp8_prob prob_gf;
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 47a0349..88c06be 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -24,10 +24,18 @@
#include "detokenize.h"
#include "vp8/common/reconintra4x4.h"
#include "vp8/common/reconinter.h"
+#include "vp8/common/setupintrarecon.h"
#if CONFIG_ERROR_CONCEALMENT
#include "error_concealment.h"
#endif
+#define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n)))
+#define CALLOC_ARRAY_ALIGNED(p, n, algn) do { \
+ CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \
+ memset((p), 0, (n) * sizeof(*(p))); \
+} while (0)
+
+
extern void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
@@ -47,11 +55,9 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
mbd->mode_info_stride = pc->mode_info_stride;
mbd->frame_type = pc->frame_type;
- mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
- mbd->dst = pc->yv12_fb[pc->new_fb_idx];
+ mbd->pre = xd->pre;
+ mbd->dst = xd->dst;
- vp8_setup_block_dptrs(mbd);
- vp8_build_block_doffsets(mbd);
mbd->segmentation_enabled = xd->segmentation_enabled;
mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
@@ -65,7 +71,7 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled;
mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update;
- mbd->current_bc = &pbi->bc2;
+ mbd->current_bc = &pbi->mbc[0];
vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
@@ -73,16 +79,18 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
mbd->fullpixel_mask = 0xffffffff;
- if(pc->full_pixel)
+
+ if (pc->full_pixel)
mbd->fullpixel_mask = 0xfffffff8;
}
- for (i=0; i< pc->mb_rows; i++)
- pbi->mt_current_mb_col[i]=-1;
+ for (i = 0; i < pc->mb_rows; i++)
+ pbi->mt_current_mb_col[i] = -1;
}
-static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_idx)
+static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
+ unsigned int mb_idx)
{
MB_PREDICTION_MODE mode;
int i;
@@ -166,7 +174,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i
{
short *DQC = xd->dequant_y1;
int dst_stride = xd->dst.y_stride;
- unsigned char *base_dst = xd->dst.y_buffer;
/* clear out residual eob info */
if(xd->mode_info_context->mbmi.mb_skip_coeff)
@@ -177,17 +184,19 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i
for (i = 0; i < 16; i++)
{
BLOCKD *b = &xd->block[i];
- int b_mode = xd->mode_info_context->bmi[i].as_mode;
- unsigned char *yabove;
+ unsigned char *dst = xd->dst.y_buffer + b->offset;
+ B_PREDICTION_MODE b_mode =
+ xd->mode_info_context->bmi[i].as_mode;
+ unsigned char *Above;
unsigned char *yleft;
int left_stride;
unsigned char top_left;
/*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/
if (i < 4 && pbi->common.filter_level)
- yabove = xd->recon_above[0] + b->offset; //i*4;
+ Above = xd->recon_above[0] + b->offset;
else
- yabove = (base_dst - dst_stride) + b->offset;
+ Above = dst - dst_stride;
if (i%4==0 && pbi->common.filter_level)
{
@@ -196,34 +205,28 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i
}
else
{
- yleft = (base_dst - 1) + b->offset;
+ yleft = dst - 1;
left_stride = dst_stride;
}
if ((i==4 || i==8 || i==12) && pbi->common.filter_level)
top_left = *(xd->recon_left[0] + i - 1);
else
- top_left = yabove[-1];
+ top_left = Above[-1];
- vp8_intra4x4_predict_d_c(yabove, yleft, left_stride,
- b_mode,
- base_dst + b->offset, dst_stride,
- top_left);
+ vp8_intra4x4_predict(Above, yleft, left_stride,
+ b_mode, dst, dst_stride, top_left);
if (xd->eobs[i] )
{
if (xd->eobs[i] > 1)
{
- vp8_dequant_idct_add
- (b->qcoeff, DQC,
- base_dst + b->offset, dst_stride);
+ vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride);
}
else
{
- vp8_dc_only_idct_add
- (b->qcoeff[0] * DQC[0],
- base_dst + b->offset, dst_stride,
- base_dst + b->offset, dst_stride);
+ vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0],
+ dst, dst_stride, dst, dst_stride);
((int *)b->qcoeff)[0] = 0;
}
}
@@ -297,60 +300,44 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i
}
}
-typedef void (*init_current_bc_fn_t)(VP8D_COMP *pbi, MACROBLOCKD *xd,
- int start_mb_row, int mb_row, int num_part);
-
-static void init_current_bc(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
- int mb_row, int num_part)
+static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
{
- (void) start_mb_row;
-
- xd->current_bc = &pbi->mbc[mb_row%num_part];
-}
-
-static void init_current_bc_threads(VP8D_COMP *pbi, MACROBLOCKD *xd,
- int start_mb_row, int mb_row, int num_part)
-{
- (void) xd;
- pbi->mb_row_di[start_mb_row - 1].mb_row = mb_row;
- pbi->mb_row_di[start_mb_row - 1].mbd.current_bc = &pbi->mbc[mb_row%num_part];
-}
-
-
-static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
- init_current_bc_fn_t init_current_bc_fn)
-{
- volatile int *last_row_current_mb_col = NULL;
+ volatile const int *last_row_current_mb_col;
+ volatile int *current_mb_col;
int mb_row;
VP8_COMMON *pc = &pbi->common;
- int nsync = pbi->sync_range;
+ const int nsync = pbi->sync_range;
+ const int first_row_no_sync_above = pc->mb_cols + nsync;
int num_part = 1 << pbi->common.multi_token_partition;
+ int last_mb_row = start_mb_row;
+
+ YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
+ YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME];
+
+ int recon_y_stride = yv12_fb_new->y_stride;
+ int recon_uv_stride = yv12_fb_new->uv_stride;
- int dst_fb_idx = pc->new_fb_idx;
unsigned char *ref_buffer[MAX_REF_FRAMES][3];
unsigned char *dst_buffer[3];
int i;
- int ref_fb_index[MAX_REF_FRAMES];
int ref_fb_corrupted[MAX_REF_FRAMES];
ref_fb_corrupted[INTRA_FRAME] = 0;
- ref_fb_index[LAST_FRAME] = pc->lst_fb_idx;
- ref_fb_index[GOLDEN_FRAME] = pc->gld_fb_idx;
- ref_fb_index[ALTREF_FRAME] = pc->alt_fb_idx;
-
for(i = 1; i < MAX_REF_FRAMES; i++)
{
- ref_buffer[i][0] = pc->yv12_fb[ref_fb_index[i]].y_buffer;
- ref_buffer[i][1] = pc->yv12_fb[ref_fb_index[i]].u_buffer;
- ref_buffer[i][2] = pc->yv12_fb[ref_fb_index[i]].v_buffer;
+ YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i];
- ref_fb_corrupted[i] = pc->yv12_fb[ref_fb_index[i]].corrupted;
+ ref_buffer[i][0] = this_fb->y_buffer;
+ ref_buffer[i][1] = this_fb->u_buffer;
+ ref_buffer[i][2] = this_fb->v_buffer;
+
+ ref_fb_corrupted[i] = this_fb->corrupted;
}
- dst_buffer[0] = pc->yv12_fb[dst_fb_idx].y_buffer;
- dst_buffer[1] = pc->yv12_fb[dst_fb_idx].u_buffer;
- dst_buffer[2] = pc->yv12_fb[dst_fb_idx].v_buffer;
+ dst_buffer[0] = yv12_fb_new->y_buffer;
+ dst_buffer[1] = yv12_fb_new->u_buffer;
+ dst_buffer[2] = yv12_fb_new->v_buffer;
xd->up_available = (start_mb_row != 0);
@@ -359,18 +346,20 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
int i;
int recon_yoffset, recon_uvoffset;
int mb_col;
- int ref_fb_idx = pc->lst_fb_idx;
- int dst_fb_idx = pc->new_fb_idx;
- int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
- int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
-
int filter_level;
loop_filter_info_n *lfi_n = &pc->lf_info;
- init_current_bc_fn(pbi, xd, start_mb_row, mb_row, num_part);
+ /* save last row processed by this thread */
+ last_mb_row = mb_row;
+ /* select bool coder for current partition */
+ xd->current_bc = &pbi->mbc[mb_row%num_part];
if (mb_row > 0)
last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
+ else
+ last_row_current_mb_col = &first_row_no_sync_above;
+
+ current_mb_col = &pbi->mt_current_mb_col[mb_row];
recon_yoffset = mb_row * recon_y_stride * 16;
recon_uvoffset = mb_row * recon_uv_stride * 8;
@@ -394,7 +383,7 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
xd->recon_left[1] = pbi->mt_uleft_col[mb_row];
xd->recon_left[2] = pbi->mt_vleft_col[mb_row];
- //TODO: move to outside row loop
+ /* TODO: move to outside row loop */
xd->recon_left_stride[0] = 1;
xd->recon_left_stride[1] = 1;
}
@@ -412,16 +401,22 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
xd->recon_above[1] -= xd->dst.uv_stride;
xd->recon_above[2] -= xd->dst.uv_stride;
- //TODO: move to outside row loop
+ /* TODO: move to outside row loop */
xd->recon_left_stride[0] = xd->dst.y_stride;
xd->recon_left_stride[1] = xd->dst.uv_stride;
+
+ setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1],
+ xd->recon_left[2], xd->dst.y_stride,
+ xd->dst.uv_stride);
}
for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
{
- if ( mb_row > 0 && (mb_col & (nsync-1)) == 0)
+ *current_mb_col = mb_col - 1;
+
+ if ((mb_col & (nsync - 1)) == 0)
{
- while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
+ while (mb_col > (*last_row_current_mb_col - nsync))
{
x86_pause_hint();
thread_sleep(0);
@@ -477,7 +472,7 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
/* propagate errors from reference frames */
xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame];
- decode_macroblock(pbi, xd, 0);
+ mt_decode_macroblock(pbi, xd, 0);
xd->left_available = 1;
@@ -591,9 +586,6 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
++xd->mode_info_context; /* next mb */
xd->above_context++;
-
- /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
- pbi->mt_current_mb_col[mb_row] = mb_col;
}
/* adjust to the next row of mbs */
@@ -601,8 +593,8 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
{
if(mb_row != pc->mb_rows-1)
{
- int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
- int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
+ int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS;
+ int lastuv = (yv12_fb_lst->y_width>>1) + (VP8BORDERINPIXELS>>1);
for (i = 0; i < 4; i++)
{
@@ -611,8 +603,13 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
}
}
- } else
- vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+ }
+ else
+ vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16,
+ xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+
+ /* last MB of row is ready just after extension is done */
+ *current_mb_col = mb_col + nsync;
++xd->mode_info_context; /* skip prediction column */
xd->up_available = 1;
@@ -620,6 +617,11 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
/* since we have multithread */
xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
}
+
+ /* signal end of frame decoding if this thread processed the last mb_row */
+ if (last_mb_row == (pc->mb_rows - 1))
+ sem_post(&pbi->h_event_end_decoding);
+
}
@@ -635,7 +637,6 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
if (pbi->b_multithreaded_rd == 0)
break;
- /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
{
if (pbi->b_multithreaded_rd == 0)
@@ -643,21 +644,11 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
else
{
MACROBLOCKD *xd = &mbrd->mbd;
-
xd->left_context = &mb_row_left_context;
- decode_mb_rows(pbi, xd, ithread+1, init_current_bc_threads);
+ mt_decode_mb_rows(pbi, xd, ithread+1);
}
}
-
- /* add this to each frame */
- if ((mbrd->mb_row == pbi->common.mb_rows-1) ||
- ((mbrd->mb_row == pbi->common.mb_rows-2) &&
- (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
- {
- /*SetEvent(pbi->h_event_end_decoding);*/
- sem_post(&pbi->h_event_end_decoding);
- }
}
return 0 ;
@@ -667,7 +658,7 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
void vp8_decoder_create_threads(VP8D_COMP *pbi)
{
int core_count = 0;
- int ithread;
+ unsigned int ithread;
pbi->b_multithreaded_rd = 0;
pbi->allocated_decoding_thread_count = 0;
@@ -684,16 +675,17 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
pbi->b_multithreaded_rd = 1;
pbi->decoding_thread_count = core_count - 1;
- CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
- CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
- CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
- vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
- CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
+ CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count);
+ CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count);
+ CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32);
+ CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count);
for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
{
sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
+ vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd);
+
pbi->de_thread_data[ithread].ithread = ithread;
pbi->de_thread_data[ithread].ptr1 = (void *)pbi;
pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread];
@@ -810,32 +802,32 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
uv_width = width >>1;
/* Allocate an int for each mb row. */
- CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
+ CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);
/* Allocate memory for above_row buffers. */
- CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
- for (i=0; i< pc->mb_rows; i++)
+ CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
+ for (i = 0; i < pc->mb_rows; i++)
CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1))));
- CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
- for (i=0; i< pc->mb_rows; i++)
+ CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
+ for (i = 0; i < pc->mb_rows; i++)
CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
- CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
- for (i=0; i< pc->mb_rows; i++)
+ CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
+ for (i = 0; i < pc->mb_rows; i++)
CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
/* Allocate memory for left_col buffers. */
- CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
- for (i=0; i< pc->mb_rows; i++)
+ CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);
+ for (i = 0; i < pc->mb_rows; i++)
CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
- CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
- for (i=0; i< pc->mb_rows; i++)
+ CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows);
+ for (i = 0; i < pc->mb_rows; i++)
CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
- CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
- for (i=0; i< pc->mb_rows; i++)
+ CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows);
+ for (i = 0; i < pc->mb_rows; i++)
CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
}
}
@@ -881,42 +873,46 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
{
VP8_COMMON *pc = &pbi->common;
- int i;
+ unsigned int i;
+ int j;
int filter_level = pc->filter_level;
+ YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
if (filter_level)
{
/* Set above_row buffer to 127 for decoding first MB row */
- vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
- vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
- vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
+ vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5);
+ vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
+ vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
- for (i=1; i<pc->mb_rows; i++)
+ for (j=1; j<pc->mb_rows; j++)
{
- vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
- vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
- vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
+ vpx_memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
+ vpx_memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
+ vpx_memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
}
/* Set left_col to 129 initially */
- for (i=0; i<pc->mb_rows; i++)
+ for (j=0; j<pc->mb_rows; j++)
{
- vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
- vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
- vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
+ vpx_memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
+ vpx_memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
+ vpx_memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
}
/* Initialize the loop filter for this frame. */
vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level);
}
+ else
+ vp8_setup_intra_recon_top_line(yv12_fb_new);
setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
for (i = 0; i < pbi->decoding_thread_count; i++)
sem_post(&pbi->h_event_start_decoding[i]);
- decode_mb_rows(pbi, xd, 0, init_current_bc);
+ mt_decode_mb_rows(pbi, xd, 0);
sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */
}
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 3824294..e666b6c 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -118,7 +118,7 @@ static void update_mbintra_mode_probs(VP8_COMP *cpi)
update_mode(
w, VP8_YMODES, vp8_ymode_encodings, vp8_ymode_tree,
- Pnew, x->fc.ymode_prob, bct, (unsigned int *)cpi->ymode_count
+ Pnew, x->fc.ymode_prob, bct, (unsigned int *)cpi->mb.ymode_count
);
}
{
@@ -127,7 +127,7 @@ static void update_mbintra_mode_probs(VP8_COMP *cpi)
update_mode(
w, VP8_UV_MODES, vp8_uv_mode_encodings, vp8_uv_mode_tree,
- Pnew, x->fc.uv_mode_prob, bct, (unsigned int *)cpi->uv_mode_count
+ Pnew, x->fc.uv_mode_prob, bct, (unsigned int *)cpi->mb.uv_mode_count
);
}
}
@@ -172,7 +172,7 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
while (p < stop)
{
const int t = p->Token;
- const vp8_token *a = vp8_coef_encodings + t;
+ vp8_token *a = vp8_coef_encodings + t;
const vp8_extra_bit_struct *b = vp8_extra_bits + t;
int i = 0;
const unsigned char *pp = p->context_tree;
@@ -397,7 +397,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
{
const TOKENEXTRA *p = cpi->tplist[mb_row].start;
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
- int tokens = stop - p;
+ int tokens = (int)(stop - p);
vp8_pack_tokens_c(w, p, tokens);
}
@@ -416,7 +416,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w)
{
const TOKENEXTRA *p = cpi->tplist[mb_row].start;
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
- int tokens = stop - p;
+ int tokens = (int)(stop - p);
vp8_pack_tokens_c(w, p, tokens);
}
@@ -461,7 +461,7 @@ static void write_mv
static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACROBLOCKD *x)
{
- // Encode the MB segment id.
+ /* Encode the MB segment id. */
if (x->segmentation_enabled && x->update_mb_segmentation_map)
{
switch (mi->segment_id)
@@ -483,7 +483,7 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO
vp8_write(w, 1, x->mb_segment_tree_probs[2]);
break;
- // TRAP.. This should not happen
+ /* TRAP.. This should not happen */
default:
vp8_write(w, 0, x->mb_segment_tree_probs[0]);
vp8_write(w, 0, x->mb_segment_tree_probs[1]);
@@ -493,11 +493,11 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO
}
void vp8_convert_rfct_to_prob(VP8_COMP *const cpi)
{
- const int *const rfct = cpi->count_mb_ref_frame_usage;
+ const int *const rfct = cpi->mb.count_mb_ref_frame_usage;
const int rf_intra = rfct[INTRA_FRAME];
const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
- // Calculate the probabilities used to code the ref frame based on useage
+ /* Calculate the probabilities used to code the ref frame based on usage */
if (!(cpi->prob_intra_coded = rf_intra * 255 / (rf_intra + rf_inter)))
cpi->prob_intra_coded = 1;
@@ -539,7 +539,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
{
int total_mbs = pc->mb_rows * pc->mb_cols;
- prob_skip_false = (total_mbs - cpi->skip_true_count ) * 256 / total_mbs;
+ prob_skip_false = (total_mbs - cpi->mb.skip_true_count ) * 256 / total_mbs;
if (prob_skip_false <= 1)
prob_skip_false = 1;
@@ -571,8 +571,10 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
MACROBLOCKD *xd = &cpi->mb.e_mbd;
- // Distance of Mb to the various image edges.
- // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
+ /* Distance of Mb to the various image edges.
+ * These specified to 8th pel as they are always compared to MV
+ * values that are in 1/8th pel units
+ */
xd->mb_to_left_edge = -((mb_col * 16) << 3);
xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
@@ -728,7 +730,7 @@ static void write_kfmodes(VP8_COMP *cpi)
{
int total_mbs = c->mb_rows * c->mb_cols;
- prob_skip_false = (total_mbs - cpi->skip_true_count ) * 256 / total_mbs;
+ prob_skip_false = (total_mbs - cpi->mb.skip_true_count ) * 256 / total_mbs;
if (prob_skip_false <= 1)
prob_skip_false = 1;
@@ -754,7 +756,7 @@ static void write_kfmodes(VP8_COMP *cpi)
if (c->mb_no_coeff_skip)
vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false);
- kfwrite_ymode(bc, ym, c->kf_ymode_prob);
+ kfwrite_ymode(bc, ym, vp8_kf_ymode_prob);
if (ym == B_PRED)
{
@@ -771,15 +773,15 @@ static void write_kfmodes(VP8_COMP *cpi)
++intra_mode_stats [A] [L] [bm];
#endif
- write_bmode(bc, bm, c->kf_bmode_prob [A] [L]);
+ write_bmode(bc, bm, vp8_kf_bmode_prob [A] [L]);
}
while (++i < 16);
}
- write_uv_mode(bc, (m++)->mbmi.uv_mode, c->kf_uv_mode_prob);
+ write_uv_mode(bc, (m++)->mbmi.uv_mode, vp8_kf_uv_mode_prob);
}
- m++; // skip L prediction border
+ m++; /* skip L prediction border */
}
}
@@ -849,6 +851,7 @@ static int prob_update_savings(const unsigned int *ct,
static int independent_coef_context_savings(VP8_COMP *cpi)
{
+ MACROBLOCK *const x = & cpi->mb;
int savings = 0;
int i = 0;
do
@@ -865,7 +868,7 @@ static int independent_coef_context_savings(VP8_COMP *cpi)
*/
probs = (const unsigned int (*)[MAX_ENTROPY_TOKENS])
- cpi->coef_counts[i][j];
+ x->coef_counts[i][j];
/* Reset to default probabilities at key frames */
if (cpi->common.frame_type == KEY_FRAME)
@@ -878,9 +881,6 @@ static int independent_coef_context_savings(VP8_COMP *cpi)
/* at every context */
/* calc probs and branch cts for this frame only */
- //vp8_prob new_p [ENTROPY_NODES];
- //unsigned int branch_ct [ENTROPY_NODES] [2];
-
int t = 0; /* token/prob index */
vp8_tree_probs_from_distribution(
@@ -927,6 +927,7 @@ static int independent_coef_context_savings(VP8_COMP *cpi)
static int default_coef_context_savings(VP8_COMP *cpi)
{
+ MACROBLOCK *const x = & cpi->mb;
int savings = 0;
int i = 0;
do
@@ -940,16 +941,13 @@ static int default_coef_context_savings(VP8_COMP *cpi)
/* at every context */
/* calc probs and branch cts for this frame only */
- //vp8_prob new_p [ENTROPY_NODES];
- //unsigned int branch_ct [ENTROPY_NODES] [2];
-
int t = 0; /* token/prob index */
vp8_tree_probs_from_distribution(
MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
cpi->frame_coef_probs [i][j][k],
cpi->frame_branch_ct [i][j][k],
- cpi->coef_counts [i][j][k],
+ x->coef_counts [i][j][k],
256, 1
);
@@ -998,13 +996,13 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi)
{
int savings = 0;
- const int *const rfct = cpi->count_mb_ref_frame_usage;
+ const int *const rfct = cpi->mb.count_mb_ref_frame_usage;
const int rf_intra = rfct[INTRA_FRAME];
const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
int new_intra, new_last, new_garf, oldtotal, newtotal;
int ref_frame_cost[MAX_REF_FRAMES];
- vp8_clear_system_state(); //__asm emms;
+ vp8_clear_system_state();
if (cpi->common.frame_type != KEY_FRAME)
{
@@ -1026,7 +1024,7 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi)
rfct[ALTREF_FRAME] * ref_frame_cost[ALTREF_FRAME];
- // old costs
+ /* old costs */
vp8_calc_ref_frame_costs(ref_frame_cost,cpi->prob_intra_coded,
cpi->prob_last_coded,cpi->prob_gf_coded);
@@ -1078,7 +1076,7 @@ void vp8_update_coef_probs(VP8_COMP *cpi)
#endif
int savings = 0;
- vp8_clear_system_state(); //__asm emms;
+ vp8_clear_system_state();
do
{
@@ -1110,21 +1108,15 @@ void vp8_update_coef_probs(VP8_COMP *cpi)
}
do
{
- //note: use result from vp8_estimate_entropy_savings, so no need to call vp8_tree_probs_from_distribution here.
+ /* note: use result from vp8_estimate_entropy_savings, so no
+ * need to call vp8_tree_probs_from_distribution here.
+ */
+
/* at every context */
/* calc probs and branch cts for this frame only */
- //vp8_prob new_p [ENTROPY_NODES];
- //unsigned int branch_ct [ENTROPY_NODES] [2];
-
int t = 0; /* token/prob index */
- //vp8_tree_probs_from_distribution(
- // MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
- // new_p, branch_ct, (unsigned int *)cpi->coef_counts [i][j][k],
- // 256, 1
- // );
-
do
{
const vp8_prob newp = cpi->frame_coef_probs [i][j][k][t];
@@ -1295,19 +1287,16 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
Sectionbits[active_section = 1] += sizeof(VP8_HEADER) * 8 * 256;
#endif
- //vp8_kf_default_bmode_probs() is called in vp8_setup_key_frame() once for each
- //K frame before encode frame. pc->kf_bmode_prob doesn't get changed anywhere
- //else. No need to call it again here. --yw
- //vp8_kf_default_bmode_probs( pc->kf_bmode_prob);
-
- // every keyframe send startcode, width, height, scale factor, clamp and color type
+ /* every keyframe send startcode, width, height, scale factor, clamp
+ * and color type
+ */
if (oh.type == KEY_FRAME)
{
int v;
validate_buffer(cx_data, 7, cx_data_end, &cpi->common.error);
- // Start / synch code
+ /* Start / synch code */
cx_data[0] = 0x9D;
cx_data[1] = 0x01;
cx_data[2] = 0x2a;
@@ -1326,7 +1315,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
vp8_start_encode(bc, cx_data, cx_data_end);
- // signal clr type
+ /* signal clr type */
vp8_write_bit(bc, pc->clr_type);
vp8_write_bit(bc, pc->clamp_type);
@@ -1335,13 +1324,13 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
vp8_start_encode(bc, cx_data, cx_data_end);
- // Signal whether or not Segmentation is enabled
+ /* Signal whether or not Segmentation is enabled */
vp8_write_bit(bc, xd->segmentation_enabled);
- // Indicate which features are enabled
+ /* Indicate which features are enabled */
if (xd->segmentation_enabled)
{
- // Signal whether or not the segmentation map is being updated.
+ /* Signal whether or not the segmentation map is being updated. */
vp8_write_bit(bc, xd->update_mb_segmentation_map);
vp8_write_bit(bc, xd->update_mb_segmentation_data);
@@ -1351,15 +1340,15 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
vp8_write_bit(bc, xd->mb_segement_abs_delta);
- // For each segmentation feature (Quant and loop filter level)
+ /* For each segmentation feature (Quant and loop filter level) */
for (i = 0; i < MB_LVL_MAX; i++)
{
- // For each of the segments
+ /* For each of the segments */
for (j = 0; j < MAX_MB_SEGMENTS; j++)
{
Data = xd->segment_feature_data[i][j];
- // Frame level data
+ /* Frame level data */
if (Data)
{
vp8_write_bit(bc, 1);
@@ -1384,7 +1373,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
if (xd->update_mb_segmentation_map)
{
- // Write the probs used to decode the segment id for each macro block.
+ /* Write the probs used to decode the segment id for each mb */
for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
{
int Data = xd->mb_segment_tree_probs[i];
@@ -1400,17 +1389,18 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
}
}
- // Code to determine whether or not to update the scan order.
vp8_write_bit(bc, pc->filter_type);
vp8_write_literal(bc, pc->filter_level, 6);
vp8_write_literal(bc, pc->sharpness_level, 3);
- // Write out loop filter deltas applied at the MB level based on mode or ref frame (if they are enabled).
+ /* Write out loop filter deltas applied at the MB level based on mode
+ * or ref frame (if they are enabled).
+ */
vp8_write_bit(bc, xd->mode_ref_lf_delta_enabled);
if (xd->mode_ref_lf_delta_enabled)
{
- // Do the deltas need to be updated
+ /* Do the deltas need to be updated */
int send_update = xd->mode_ref_lf_delta_update
|| cpi->oxcf.error_resilient_mode;
@@ -1419,12 +1409,12 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
{
int Data;
- // Send update
+ /* Send update */
for (i = 0; i < MAX_REF_LF_DELTAS; i++)
{
Data = xd->ref_lf_deltas[i];
- // Frame level data
+ /* Frame level data */
if (xd->ref_lf_deltas[i] != xd->last_ref_lf_deltas[i]
|| cpi->oxcf.error_resilient_mode)
{
@@ -1434,20 +1424,20 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
if (Data > 0)
{
vp8_write_literal(bc, (Data & 0x3F), 6);
- vp8_write_bit(bc, 0); // sign
+ vp8_write_bit(bc, 0); /* sign */
}
else
{
Data = -Data;
vp8_write_literal(bc, (Data & 0x3F), 6);
- vp8_write_bit(bc, 1); // sign
+ vp8_write_bit(bc, 1); /* sign */
}
}
else
vp8_write_bit(bc, 0);
}
- // Send update
+ /* Send update */
for (i = 0; i < MAX_MODE_LF_DELTAS; i++)
{
Data = xd->mode_lf_deltas[i];
@@ -1461,13 +1451,13 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
if (Data > 0)
{
vp8_write_literal(bc, (Data & 0x3F), 6);
- vp8_write_bit(bc, 0); // sign
+ vp8_write_bit(bc, 0); /* sign */
}
else
{
Data = -Data;
vp8_write_literal(bc, (Data & 0x3F), 6);
- vp8_write_bit(bc, 1); // sign
+ vp8_write_bit(bc, 1); /* sign */
}
}
else
@@ -1476,34 +1466,42 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
}
}
- //signal here is multi token partition is enabled
+ /* signal here is multi token partition is enabled */
vp8_write_literal(bc, pc->multi_token_partition, 2);
- // Frame Qbaseline quantizer index
+ /* Frame Qbaseline quantizer index */
vp8_write_literal(bc, pc->base_qindex, 7);
- // Transmit Dc, Second order and Uv quantizer delta information
+ /* Transmit Dc, Second order and Uv quantizer delta information */
put_delta_q(bc, pc->y1dc_delta_q);
put_delta_q(bc, pc->y2dc_delta_q);
put_delta_q(bc, pc->y2ac_delta_q);
put_delta_q(bc, pc->uvdc_delta_q);
put_delta_q(bc, pc->uvac_delta_q);
- // When there is a key frame all reference buffers are updated using the new key frame
+ /* When there is a key frame all reference buffers are updated using
+ * the new key frame
+ */
if (pc->frame_type != KEY_FRAME)
{
- // Should the GF or ARF be updated using the transmitted frame or buffer
+ /* Should the GF or ARF be updated using the transmitted frame
+ * or buffer
+ */
vp8_write_bit(bc, pc->refresh_golden_frame);
vp8_write_bit(bc, pc->refresh_alt_ref_frame);
- // If not being updated from current frame should either GF or ARF be updated from another buffer
+ /* If not being updated from current frame should either GF or ARF
+ * be updated from another buffer
+ */
if (!pc->refresh_golden_frame)
vp8_write_literal(bc, pc->copy_buffer_to_gf, 2);
if (!pc->refresh_alt_ref_frame)
vp8_write_literal(bc, pc->copy_buffer_to_arf, 2);
- // Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer)
+ /* Indicate reference frame sign bias for Golden and ARF frames
+ * (always 0 for last frame buffer)
+ */
vp8_write_bit(bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]);
vp8_write_bit(bc, pc->ref_frame_sign_bias[ALTREF_FRAME]);
}
@@ -1532,14 +1530,14 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
#endif
- vp8_clear_system_state(); //__asm emms;
+ vp8_clear_system_state();
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
pack_coef_probs(cpi);
#else
if (pc->refresh_entropy_probs == 0)
{
- // save a copy for later refresh
+ /* save a copy for later refresh */
vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
}
@@ -1550,7 +1548,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
active_section = 2;
#endif
- // Write out the mb_no_coeff_skip flag
+ /* Write out the mb_no_coeff_skip flag */
vp8_write_bit(bc, pc->mb_no_coeff_skip);
if (pc->frame_type == KEY_FRAME)
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 6165d04..a30f888 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -18,7 +18,10 @@
#include "vp8/common/entropy.h"
#include "vpx_ports/mem.h"
-// motion search site
+#define MAX_MODES 20
+#define MAX_ERROR_BINS 1024
+
+/* motion search site */
typedef struct
{
MV mv;
@@ -27,11 +30,11 @@ typedef struct
typedef struct block
{
- // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
+ /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
short *src_diff;
short *coeff;
- // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
+ /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
short *quant;
short *quant_fast;
unsigned char *quant_shift;
@@ -39,7 +42,7 @@ typedef struct block
short *zrun_zbin_boost;
short *round;
- // Zbin Over Quant value
+ /* Zbin Over Quant value */
short zbin_extra;
unsigned char **base_src;
@@ -59,12 +62,12 @@ typedef struct
typedef struct macroblock
{
- DECLARE_ALIGNED(16, short, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
- DECLARE_ALIGNED(16, short, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
+ DECLARE_ALIGNED(16, short, src_diff[400]); /* 25 blocks Y,U,V,Y2 */
+ DECLARE_ALIGNED(16, short, coeff[400]); /* 25 blocks Y,U,V,Y2 */
DECLARE_ALIGNED(16, unsigned char, thismb[256]);
unsigned char *thismb_ptr;
- // 16 Y blocks, 4 U blocks, 4 V blocks, 1 DC 2nd order block each with 16 entries
+ /* 16 Y, 4 U, 4 V, 1 DC 2nd order block */
BLOCK block[25];
YV12_BUFFER_CONFIG src;
@@ -90,16 +93,18 @@ typedef struct macroblock
signed int act_zbin_adj;
signed int last_act_zbin_adj;
- int mvcosts[2][MVvals+1];
int *mvcost[2];
- int mvsadcosts[2][MVfpvals+1];
int *mvsadcost[2];
- int mbmode_cost[2][MB_MODE_COUNT];
- int intra_uv_mode_cost[2][MB_MODE_COUNT];
- unsigned int bmode_costs[10][10][10];
- unsigned int inter_bmode_costs[B_MODE_COUNT];
-
- // These define limits to motion vector components to prevent them from extending outside the UMV borders
+ int (*mbmode_cost)[MB_MODE_COUNT];
+ int (*intra_uv_mode_cost)[MB_MODE_COUNT];
+ int (*bmode_costs)[10][10];
+ int *inter_bmode_costs;
+ int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
+ [MAX_ENTROPY_TOKENS];
+
+ /* These define limits to motion vector components to prevent
+ * them from extending outside the UMV borders.
+ */
int mv_col_min;
int mv_col_max;
int mv_row_min;
@@ -107,18 +112,45 @@ typedef struct macroblock
int skip;
- int encode_breakout;
+ unsigned int encode_breakout;
- //char * gf_active_ptr;
signed char *gf_active_ptr;
unsigned char *active_ptr;
MV_CONTEXT *mvc;
- unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
int optimize;
int q_index;
+#if CONFIG_TEMPORAL_DENOISING
+ MB_PREDICTION_MODE best_sse_inter_mode;
+ int_mv best_sse_mv;
+ MV_REFERENCE_FRAME best_reference_frame;
+ MV_REFERENCE_FRAME best_zeromv_reference_frame;
+ unsigned char need_to_clamp_best_mvs;
+#endif
+
+ int skip_true_count;
+ unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+ unsigned int MVcount [2] [MVvals]; /* (row,col) MV cts this frame */
+ int ymode_count [VP8_YMODES]; /* intra MB type cts this frame */
+ int uv_mode_count[VP8_UV_MODES]; /* intra MB type cts this frame */
+ int64_t prediction_error;
+ int64_t intra_error;
+ int count_mb_ref_frame_usage[MAX_REF_FRAMES];
+
+ int rd_thresh_mult[MAX_MODES];
+ int rd_threshes[MAX_MODES];
+ unsigned int mbs_tested_so_far;
+ unsigned int mode_test_hit_counts[MAX_MODES];
+ int zbin_mode_boost_enabled;
+ int zbin_mode_boost;
+ int last_zbin_mode_boost;
+
+ int last_zbin_over_quant;
+ int zbin_over_quant;
+ int error_bins[MAX_ERROR_BINS];
+
void (*short_fdct4x4)(short *input, short *output, int pitch);
void (*short_fdct8x4)(short *input, short *output, int pitch);
void (*short_walsh4x4)(short *input, short *output, int pitch);
diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h
index fb6cbaf..8309063 100644
--- a/vp8/encoder/boolhuff.h
+++ b/vp8/encoder/boolhuff.h
@@ -32,7 +32,7 @@ typedef struct
unsigned char *buffer_end;
struct vpx_internal_error_info *error;
- // Variables used to track bit costs without outputing to the bitstream
+ /* Variables used to track bit costs without outputing to the bitstream */
unsigned int measure_cost;
unsigned long bit_counter;
} BOOL_CODER;
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index 09ed9dd..f3faa22 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -15,198 +15,293 @@
#include "vpx_mem/vpx_mem.h"
#include "vpx_rtcd.h"
-static const unsigned int NOISE_MOTION_THRESHOLD = 20*20;
-static const unsigned int NOISE_DIFF2_THRESHOLD = 75;
-// SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming var(noise) ~= 100.
-static const unsigned int SSE_DIFF_THRESHOLD = 16*16*20;
-static const unsigned int SSE_THRESHOLD = 16*16*40;
+static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25;
+/* SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming
+ * var(noise) ~= 100.
+ */
+static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20;
+static const unsigned int SSE_THRESHOLD = 16 * 16 * 40;
-static uint8_t blend(uint8_t state, uint8_t sample, uint8_t factor_q8)
-{
- return (uint8_t)(
- (((uint16_t)factor_q8 * ((uint16_t)state) + // Q8
- (uint16_t)(256 - factor_q8) * ((uint16_t)sample)) + 128) // Q8
- >> 8);
-}
+/*
+ * The filter function was modified to reduce the computational complexity.
+ * Step 1:
+ * Instead of applying tap coefficients for each pixel, we calculated the
+ * pixel adjustments vs. pixel diff value ahead of time.
+ * adjustment = filtered_value - current_raw
+ * = (filter_coefficient * diff + 128) >> 8
+ * where
+ * filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3));
+ * filter_coefficient += filter_coefficient /
+ * (3 + motion_magnitude_adjustment);
+ * filter_coefficient is clamped to 0 ~ 255.
+ *
+ * Step 2:
+ * The adjustment vs. diff curve becomes flat very quick when diff increases.
+ * This allowed us to use only several levels to approximate the curve without
+ * changing the filtering algorithm too much.
+ * The adjustments were further corrected by checking the motion magnitude.
+ * The levels used are:
+ * diff adjustment w/o motion correction adjustment w/ motion correction
+ * [-255, -16] -6 -7
+ * [-15, -8] -4 -5
+ * [-7, -4] -3 -4
+ * [-3, 3] diff diff
+ * [4, 7] 3 4
+ * [8, 15] 4 5
+ * [16, 255] 6 7
+ */
-static unsigned int denoiser_motion_compensate(YV12_BUFFER_CONFIG* src,
- YV12_BUFFER_CONFIG* dst,
- MACROBLOCK* x,
- unsigned int best_sse,
- unsigned int zero_mv_sse,
- int recon_yoffset,
- int recon_uvoffset)
+int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
+ YV12_BUFFER_CONFIG *running_avg, MACROBLOCK *signal,
+ unsigned int motion_magnitude, int y_offset,
+ int uv_offset)
{
- MACROBLOCKD filter_xd = x->e_mbd;
- int mv_col;
- int mv_row;
- int sse_diff = zero_mv_sse - best_sse;
- // Compensate the running average.
- filter_xd.pre.y_buffer = src->y_buffer + recon_yoffset;
- filter_xd.pre.u_buffer = src->u_buffer + recon_uvoffset;
- filter_xd.pre.v_buffer = src->v_buffer + recon_uvoffset;
- // Write the compensated running average to the destination buffer.
- filter_xd.dst.y_buffer = dst->y_buffer + recon_yoffset;
- filter_xd.dst.u_buffer = dst->u_buffer + recon_uvoffset;
- filter_xd.dst.v_buffer = dst->v_buffer + recon_uvoffset;
- // Use the best MV for the compensation.
- filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
- filter_xd.mode_info_context->mbmi.mode = filter_xd.best_sse_inter_mode;
- filter_xd.mode_info_context->mbmi.mv = filter_xd.best_sse_mv;
- filter_xd.mode_info_context->mbmi.need_to_clamp_mvs =
- filter_xd.need_to_clamp_best_mvs;
- mv_col = filter_xd.best_sse_mv.as_mv.col;
- mv_row = filter_xd.best_sse_mv.as_mv.row;
- if (filter_xd.mode_info_context->mbmi.mode <= B_PRED ||
- (mv_row*mv_row + mv_col*mv_col <= NOISE_MOTION_THRESHOLD &&
- sse_diff < SSE_DIFF_THRESHOLD))
- {
- // Handle intra blocks as referring to last frame with zero motion and
- // let the absolute pixel difference affect the filter factor.
- // Also consider small amount of motion as being random walk due to noise,
- // if it doesn't mean that we get a much bigger error.
- // Note that any changes to the mode info only affects the denoising.
- filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
- filter_xd.mode_info_context->mbmi.mode = ZEROMV;
- filter_xd.mode_info_context->mbmi.mv.as_int = 0;
- x->e_mbd.best_sse_inter_mode = ZEROMV;
- x->e_mbd.best_sse_mv.as_int = 0;
- best_sse = zero_mv_sse;
- }
- if (!x->skip)
- {
- vp8_build_inter_predictors_mb(&filter_xd);
- }
- else
- {
- vp8_build_inter16x16_predictors_mb(&filter_xd,
- filter_xd.dst.y_buffer,
- filter_xd.dst.u_buffer,
- filter_xd.dst.v_buffer,
- filter_xd.dst.y_stride,
- filter_xd.dst.uv_stride);
- }
- return best_sse;
-}
+ unsigned char *sig = signal->thismb;
+ int sig_stride = 16;
+ unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
+ int mc_avg_y_stride = mc_running_avg->y_stride;
+ unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
+ int avg_y_stride = running_avg->y_stride;
+ int r, c, i;
+ int sum_diff = 0;
+ int adj_val[3] = {3, 4, 6};
-static void denoiser_filter(YV12_BUFFER_CONFIG* mc_running_avg,
- YV12_BUFFER_CONFIG* running_avg,
- MACROBLOCK* signal,
- unsigned int motion_magnitude2,
- int y_offset,
- int uv_offset)
-{
- unsigned char* sig = signal->thismb;
- int sig_stride = 16;
- unsigned char* mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
- int mc_avg_y_stride = mc_running_avg->y_stride;
- unsigned char* running_avg_y = running_avg->y_buffer + y_offset;
- int avg_y_stride = running_avg->y_stride;
- int r, c;
- for (r = 0; r < 16; r++)
- {
- for (c = 0; c < 16; c++)
+ /* If motion_magnitude is small, making the denoiser more aggressive by
+ * increasing the adjustment for each level. */
+ if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD)
+ {
+ for (i = 0; i < 3; i++)
+ adj_val[i] += 1;
+ }
+
+ for (r = 0; r < 16; ++r)
{
- int diff;
- int absdiff = 0;
- unsigned int filter_coefficient;
- absdiff = sig[c] - mc_running_avg_y[c];
- absdiff = absdiff > 0 ? absdiff : -absdiff;
- assert(absdiff >= 0 && absdiff < 256);
- filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3));
- // Allow some additional filtering of static blocks, or blocks with very
- // small motion vectors.
- filter_coefficient += filter_coefficient / (3 + (motion_magnitude2 >> 3));
- filter_coefficient = filter_coefficient > 255 ? 255 : filter_coefficient;
-
- running_avg_y[c] = blend(mc_running_avg_y[c], sig[c], filter_coefficient);
- diff = sig[c] - running_avg_y[c];
-
- if (diff * diff < NOISE_DIFF2_THRESHOLD)
- {
- // Replace with mean to suppress the noise.
- sig[c] = running_avg_y[c];
- }
- else
- {
- // Replace the filter state with the signal since the change in this
- // pixel isn't classified as noise.
- running_avg_y[c] = sig[c];
- }
+ for (c = 0; c < 16; ++c)
+ {
+ int diff = 0;
+ int adjustment = 0;
+ int absdiff = 0;
+
+ diff = mc_running_avg_y[c] - sig[c];
+ absdiff = abs(diff);
+
+ /* When |diff| < 4, use pixel value from last denoised raw. */
+ if (absdiff <= 3)
+ {
+ running_avg_y[c] = mc_running_avg_y[c];
+ sum_diff += diff;
+ }
+ else
+ {
+ if (absdiff >= 4 && absdiff <= 7)
+ adjustment = adj_val[0];
+ else if (absdiff >= 8 && absdiff <= 15)
+ adjustment = adj_val[1];
+ else
+ adjustment = adj_val[2];
+
+ if (diff > 0)
+ {
+ if ((sig[c] + adjustment) > 255)
+ running_avg_y[c] = 255;
+ else
+ running_avg_y[c] = sig[c] + adjustment;
+
+ sum_diff += adjustment;
+ }
+ else
+ {
+ if ((sig[c] - adjustment) < 0)
+ running_avg_y[c] = 0;
+ else
+ running_avg_y[c] = sig[c] - adjustment;
+
+ sum_diff -= adjustment;
+ }
+ }
+ }
+
+ /* Update pointers for next iteration. */
+ sig += sig_stride;
+ mc_running_avg_y += mc_avg_y_stride;
+ running_avg_y += avg_y_stride;
}
- sig += sig_stride;
- mc_running_avg_y += mc_avg_y_stride;
- running_avg_y += avg_y_stride;
- }
+
+ if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
+ return COPY_BLOCK;
+
+ vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride,
+ signal->thismb, sig_stride);
+ return FILTER_BLOCK;
}
int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height)
{
- assert(denoiser);
- denoiser->yv12_running_avg.flags = 0;
- if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg), width,
- height, VP8BORDERINPIXELS) < 0)
- {
- vp8_denoiser_free(denoiser);
- return 1;
- }
- denoiser->yv12_mc_running_avg.flags = 0;
- if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width,
- height, VP8BORDERINPIXELS) < 0)
- {
- vp8_denoiser_free(denoiser);
- return 1;
- }
- vpx_memset(denoiser->yv12_running_avg.buffer_alloc, 0,
- denoiser->yv12_running_avg.frame_size);
- vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
- denoiser->yv12_mc_running_avg.frame_size);
- return 0;
+ int i;
+ assert(denoiser);
+
+ for (i = 0; i < MAX_REF_FRAMES; i++)
+ {
+ denoiser->yv12_running_avg[i].flags = 0;
+
+ if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg[i]), width,
+ height, VP8BORDERINPIXELS)
+ < 0)
+ {
+ vp8_denoiser_free(denoiser);
+ return 1;
+ }
+ vpx_memset(denoiser->yv12_running_avg[i].buffer_alloc, 0,
+ denoiser->yv12_running_avg[i].frame_size);
+
+ }
+ denoiser->yv12_mc_running_avg.flags = 0;
+
+ if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width,
+ height, VP8BORDERINPIXELS) < 0)
+ {
+ vp8_denoiser_free(denoiser);
+ return 1;
+ }
+
+ vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
+ denoiser->yv12_mc_running_avg.frame_size);
+ return 0;
}
void vp8_denoiser_free(VP8_DENOISER *denoiser)
{
- assert(denoiser);
- vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg);
- vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg);
+ int i;
+ assert(denoiser);
+
+ for (i = 0; i < MAX_REF_FRAMES ; i++)
+ {
+ vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg[i]);
+ }
+ vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg);
}
+
void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
MACROBLOCK *x,
unsigned int best_sse,
unsigned int zero_mv_sse,
int recon_yoffset,
- int recon_uvoffset) {
- int mv_row;
- int mv_col;
- unsigned int motion_magnitude2;
- // Motion compensate the running average.
- best_sse = denoiser_motion_compensate(&denoiser->yv12_running_avg,
- &denoiser->yv12_mc_running_avg,
- x,
- best_sse,
- zero_mv_sse,
- recon_yoffset,
- recon_uvoffset);
-
- mv_row = x->e_mbd.best_sse_mv.as_mv.row;
- mv_col = x->e_mbd.best_sse_mv.as_mv.col;
- motion_magnitude2 = mv_row*mv_row + mv_col*mv_col;
- if (best_sse > SSE_THRESHOLD ||
- motion_magnitude2 > 8 * NOISE_MOTION_THRESHOLD)
- {
- // No filtering of this block since it differs too much from the predictor,
- // or the motion vector magnitude is considered too big.
- vp8_copy_mem16x16(x->thismb, 16,
- denoiser->yv12_running_avg.y_buffer + recon_yoffset,
- denoiser->yv12_running_avg.y_stride);
- return;
- }
- // Filter.
- denoiser_filter(&denoiser->yv12_mc_running_avg,
- &denoiser->yv12_running_avg,
- x,
- motion_magnitude2,
- recon_yoffset,
- recon_uvoffset);
+ int recon_uvoffset)
+{
+ int mv_row;
+ int mv_col;
+ unsigned int motion_magnitude2;
+
+ MV_REFERENCE_FRAME frame = x->best_reference_frame;
+ MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;
+
+ enum vp8_denoiser_decision decision = FILTER_BLOCK;
+
+ if (zero_frame)
+ {
+ YV12_BUFFER_CONFIG *src = &denoiser->yv12_running_avg[frame];
+ YV12_BUFFER_CONFIG *dst = &denoiser->yv12_mc_running_avg;
+ YV12_BUFFER_CONFIG saved_pre,saved_dst;
+ MB_MODE_INFO saved_mbmi;
+ MACROBLOCKD *filter_xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi;
+ int mv_col;
+ int mv_row;
+ int sse_diff = zero_mv_sse - best_sse;
+
+ saved_mbmi = *mbmi;
+
+ /* Use the best MV for the compensation. */
+ mbmi->ref_frame = x->best_reference_frame;
+ mbmi->mode = x->best_sse_inter_mode;
+ mbmi->mv = x->best_sse_mv;
+ mbmi->need_to_clamp_mvs = x->need_to_clamp_best_mvs;
+ mv_col = x->best_sse_mv.as_mv.col;
+ mv_row = x->best_sse_mv.as_mv.row;
+
+ if (frame == INTRA_FRAME ||
+ ((unsigned int)(mv_row *mv_row + mv_col *mv_col)
+ <= NOISE_MOTION_THRESHOLD &&
+ sse_diff < (int)SSE_DIFF_THRESHOLD))
+ {
+ /*
+ * Handle intra blocks as referring to last frame with zero motion
+ * and let the absolute pixel difference affect the filter factor.
+ * Also consider small amount of motion as being random walk due
+ * to noise, if it doesn't mean that we get a much bigger error.
+ * Note that any changes to the mode info only affects the
+ * denoising.
+ */
+ mbmi->ref_frame =
+ x->best_zeromv_reference_frame;
+
+ src = &denoiser->yv12_running_avg[zero_frame];
+
+ mbmi->mode = ZEROMV;
+ mbmi->mv.as_int = 0;
+ x->best_sse_inter_mode = ZEROMV;
+ x->best_sse_mv.as_int = 0;
+ best_sse = zero_mv_sse;
+ }
+
+ saved_pre = filter_xd->pre;
+ saved_dst = filter_xd->dst;
+
+ /* Compensate the running average. */
+ filter_xd->pre.y_buffer = src->y_buffer + recon_yoffset;
+ filter_xd->pre.u_buffer = src->u_buffer + recon_uvoffset;
+ filter_xd->pre.v_buffer = src->v_buffer + recon_uvoffset;
+ /* Write the compensated running average to the destination buffer. */
+ filter_xd->dst.y_buffer = dst->y_buffer + recon_yoffset;
+ filter_xd->dst.u_buffer = dst->u_buffer + recon_uvoffset;
+ filter_xd->dst.v_buffer = dst->v_buffer + recon_uvoffset;
+
+ if (!x->skip)
+ {
+ vp8_build_inter_predictors_mb(filter_xd);
+ }
+ else
+ {
+ vp8_build_inter16x16_predictors_mb(filter_xd,
+ filter_xd->dst.y_buffer,
+ filter_xd->dst.u_buffer,
+ filter_xd->dst.v_buffer,
+ filter_xd->dst.y_stride,
+ filter_xd->dst.uv_stride);
+ }
+ filter_xd->pre = saved_pre;
+ filter_xd->dst = saved_dst;
+ *mbmi = saved_mbmi;
+
+ }
+
+ mv_row = x->best_sse_mv.as_mv.row;
+ mv_col = x->best_sse_mv.as_mv.col;
+ motion_magnitude2 = mv_row * mv_row + mv_col * mv_col;
+ if (best_sse > SSE_THRESHOLD || motion_magnitude2
+ > 8 * NOISE_MOTION_THRESHOLD)
+ {
+ decision = COPY_BLOCK;
+ }
+
+ if (decision == FILTER_BLOCK)
+ {
+ /* Filter. */
+ decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg,
+ &denoiser->yv12_running_avg[INTRA_FRAME],
+ x,
+ motion_magnitude2,
+ recon_yoffset, recon_uvoffset);
+ }
+ if (decision == COPY_BLOCK)
+ {
+ /* No filtering of this block; it differs too much from the predictor,
+ * or the motion vector magnitude is considered too big.
+ */
+ vp8_copy_mem16x16(
+ x->thismb, 16,
+ denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
+ denoiser->yv12_running_avg[INTRA_FRAME].y_stride);
+ }
}
diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h
index 343531b..b025f5c 100644
--- a/vp8/encoder/denoising.h
+++ b/vp8/encoder/denoising.h
@@ -13,10 +13,19 @@
#include "block.h"
+#define SUM_DIFF_THRESHOLD (16 * 16 * 2)
+#define MOTION_MAGNITUDE_THRESHOLD (8*3)
+
+enum vp8_denoiser_decision
+{
+ COPY_BLOCK,
+ FILTER_BLOCK
+};
+
typedef struct vp8_denoiser
{
- YV12_BUFFER_CONFIG yv12_running_avg;
- YV12_BUFFER_CONFIG yv12_mc_running_avg;
+ YV12_BUFFER_CONFIG yv12_running_avg[MAX_REF_FRAMES];
+ YV12_BUFFER_CONFIG yv12_mc_running_avg;
} VP8_DENOISER;
int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height);
@@ -30,4 +39,4 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
int recon_yoffset,
int recon_uvoffset);
-#endif // VP8_ENCODER_DENOISING_H_
+#endif /* VP8_ENCODER_DENOISING_H_ */
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 8233873..d1b647b 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -33,7 +33,7 @@
#endif
#include "encodeframe.h"
-extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
+extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ;
extern void vp8_calc_ref_frame_costs(int *ref_frame_cost,
int prob_intra,
int prob_last,
@@ -45,7 +45,6 @@ extern void vp8_auto_select_speed(VP8_COMP *cpi);
extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
MACROBLOCK *x,
MB_ROW_COMP *mbr_ei,
- int mb_row,
int count);
static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x );
@@ -77,7 +76,7 @@ static const unsigned char VP8_VAR_OFFS[16]=
};
-// Original activity measure from Tim T's code.
+/* Original activity measure from Tim T's code. */
static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
{
unsigned int act;
@@ -100,7 +99,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
return act;
}
-// Stub for alternative experimental activity measures.
+/* Stub for alternative experimental activity measures. */
static unsigned int alt_activity_measure( VP8_COMP *cpi,
MACROBLOCK *x, int use_dc_pred )
{
@@ -108,8 +107,9 @@ static unsigned int alt_activity_measure( VP8_COMP *cpi,
}
-// Measure the activity of the current macroblock
-// What we measure here is TBD so abstracted to this function
+/* Measure the activity of the current macroblock
+ * What we measure here is TBD so abstracted to this function
+ */
#define ALT_ACT_MEASURE 1
static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x,
int mb_row, int mb_col)
@@ -120,12 +120,12 @@ static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x,
{
int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
- // Or use and alternative.
+ /* Or use and alternative. */
mb_activity = alt_activity_measure( cpi, x, use_dc_pred );
}
else
{
- // Original activity measure from Tim T's code.
+ /* Original activity measure from Tim T's code. */
mb_activity = tt_activity_measure( cpi, x );
}
@@ -135,36 +135,36 @@ static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x,
return mb_activity;
}
-// Calculate an "average" mb activity value for the frame
+/* Calculate an "average" mb activity value for the frame */
#define ACT_MEDIAN 0
static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum )
{
#if ACT_MEDIAN
- // Find median: Simple n^2 algorithm for experimentation
+ /* Find median: Simple n^2 algorithm for experimentation */
{
unsigned int median;
unsigned int i,j;
unsigned int * sortlist;
unsigned int tmp;
- // Create a list to sort to
+ /* Create a list to sort to */
CHECK_MEM_ERROR(sortlist,
vpx_calloc(sizeof(unsigned int),
cpi->common.MBs));
- // Copy map to sort list
+ /* Copy map to sort list */
vpx_memcpy( sortlist, cpi->mb_activity_map,
sizeof(unsigned int) * cpi->common.MBs );
- // Ripple each value down to its correct position
+ /* Ripple each value down to its correct position */
for ( i = 1; i < cpi->common.MBs; i ++ )
{
for ( j = i; j > 0; j -- )
{
if ( sortlist[j] < sortlist[j-1] )
{
- // Swap values
+ /* Swap values */
tmp = sortlist[j-1];
sortlist[j-1] = sortlist[j];
sortlist[j] = tmp;
@@ -174,7 +174,7 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum )
}
}
- // Even number MBs so estimate median as mean of two either side.
+ /* Even number MBs so estimate median as mean of two either side. */
median = ( 1 + sortlist[cpi->common.MBs >> 1] +
sortlist[(cpi->common.MBs >> 1) + 1] ) >> 1;
@@ -183,14 +183,14 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum )
vpx_free(sortlist);
}
#else
- // Simple mean for now
+ /* Simple mean for now */
cpi->activity_avg = (unsigned int)(activity_sum/cpi->common.MBs);
#endif
if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
- // Experimental code: return fixed value normalized for several clips
+ /* Experimental code: return fixed value normalized for several clips */
if ( ALT_ACT_MEASURE )
cpi->activity_avg = 100000;
}
@@ -199,7 +199,7 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum )
#define OUTPUT_NORM_ACT_STATS 0
#if USE_ACT_INDEX
-// Calculate and activity index for each mb
+/* Calculate and activity index for each mb */
static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x )
{
VP8_COMMON *const cm = & cpi->common;
@@ -214,19 +214,19 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x )
fprintf(f, "\n%12d\n", cpi->activity_avg );
#endif
- // Reset pointers to start of activity map
+ /* Reset pointers to start of activity map */
x->mb_activity_ptr = cpi->mb_activity_map;
- // Calculate normalized mb activity number.
+ /* Calculate normalized mb activity number. */
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
- // for each macroblock col in image
+ /* for each macroblock col in image */
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
- // Read activity from the map
+ /* Read activity from the map */
act = *(x->mb_activity_ptr);
- // Calculate a normalized activity number
+ /* Calculate a normalized activity number */
a = act + 4*cpi->activity_avg;
b = 4*act + cpi->activity_avg;
@@ -238,7 +238,7 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x )
#if OUTPUT_NORM_ACT_STATS
fprintf(f, " %6d", *(x->mb_activity_ptr));
#endif
- // Increment activity map pointers
+ /* Increment activity map pointers */
x->mb_activity_ptr++;
}
@@ -255,8 +255,9 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x )
}
#endif
-// Loop through all MBs. Note activity of each, average activity and
-// calculate a normalized activity for each
+/* Loop through all MBs. Note activity of each, average activity and
+ * calculate a normalized activity for each
+ */
static void build_activity_map( VP8_COMP *cpi )
{
MACROBLOCK *const x = & cpi->mb;
@@ -273,15 +274,15 @@ static void build_activity_map( VP8_COMP *cpi )
unsigned int mb_activity;
int64_t activity_sum = 0;
- // for each macroblock row in image
+ /* for each macroblock row in image */
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
#if ALT_ACT_MEASURE
- // reset above block coeffs
+ /* reset above block coeffs */
xd->up_available = (mb_row != 0);
recon_yoffset = (mb_row * recon_y_stride * 16);
#endif
- // for each macroblock col in image
+ /* for each macroblock col in image */
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
#if ALT_ACT_MEASURE
@@ -289,48 +290,48 @@ static void build_activity_map( VP8_COMP *cpi )
xd->left_available = (mb_col != 0);
recon_yoffset += 16;
#endif
- //Copy current mb to a buffer
+ /* Copy current mb to a buffer */
vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
- // measure activity
+ /* measure activity */
mb_activity = mb_activity_measure( cpi, x, mb_row, mb_col );
- // Keep frame sum
+ /* Keep frame sum */
activity_sum += mb_activity;
- // Store MB level activity details.
+ /* Store MB level activity details. */
*x->mb_activity_ptr = mb_activity;
- // Increment activity map pointer
+ /* Increment activity map pointer */
x->mb_activity_ptr++;
- // adjust to the next column of source macroblocks
+ /* adjust to the next column of source macroblocks */
x->src.y_buffer += 16;
}
- // adjust to the next row of mbs
+ /* adjust to the next row of mbs */
x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
#if ALT_ACT_MEASURE
- //extend the recon for intra prediction
+ /* extend the recon for intra prediction */
vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16,
xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
#endif
}
- // Calculate an "average" MB activity
+ /* Calculate an "average" MB activity */
calc_av_activity(cpi, activity_sum);
#if USE_ACT_INDEX
- // Calculate an activity index number of each mb
+ /* Calculate an activity index number of each mb */
calc_activity_index( cpi, x );
#endif
}
-// Macroblock activity masking
+/* Macroblock activity masking */
void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
{
#if USE_ACT_INDEX
@@ -342,7 +343,7 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
int64_t b;
int64_t act = *(x->mb_activity_ptr);
- // Apply the masking to the RD multiplier.
+ /* Apply the masking to the RD multiplier. */
a = act + (2*cpi->activity_avg);
b = (2*act) + cpi->activity_avg;
@@ -351,7 +352,7 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
x->errorperbit += (x->errorperbit==0);
#endif
- // Activity based Zbin adjustment
+ /* Activity based Zbin adjustment */
adjust_act_zbin(cpi, x);
}
@@ -398,7 +399,7 @@ void encode_mb_row(VP8_COMP *cpi,
w = &cpi->bc[1];
#endif
- // reset above block coeffs
+ /* reset above block coeffs */
xd->above_context = cm->above_context;
xd->up_available = (mb_row != 0);
@@ -406,37 +407,41 @@ void encode_mb_row(VP8_COMP *cpi,
recon_uvoffset = (mb_row * recon_uv_stride * 8);
cpi->tplist[mb_row].start = *tp;
- //printf("Main mb_row = %d\n", mb_row);
+ /* printf("Main mb_row = %d\n", mb_row); */
- // Distance of Mb to the top & bottom edges, specified in 1/8th pel
- // units as they are always compared to values that are in 1/8th pel units
+ /* Distance of Mb to the top & bottom edges, specified in 1/8th pel
+ * units as they are always compared to values that are in 1/8th pel
+ */
xd->mb_to_top_edge = -((mb_row * 16) << 3);
xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
- // Set up limit values for vertical motion vector components
- // to prevent them extending beyond the UMV borders
+ /* Set up limit values for vertical motion vector components
+ * to prevent them extending beyond the UMV borders
+ */
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
+ (VP8BORDERINPIXELS - 16);
- // Set the mb activity pointer to the start of the row.
+ /* Set the mb activity pointer to the start of the row. */
x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
- // for each macroblock col in image
+ /* for each macroblock col in image */
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
*tp = cpi->tok;
#endif
- // Distance of Mb to the left & right edges, specified in
- // 1/8th pel units as they are always compared to values
- // that are in 1/8th pel units
+ /* Distance of Mb to the left & right edges, specified in
+ * 1/8th pel units as they are always compared to values
+ * that are in 1/8th pel units
+ */
xd->mb_to_left_edge = -((mb_col * 16) << 3);
xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
- // Set up limit values for horizontal motion vector components
- // to prevent them extending beyond the UMV borders
+ /* Set up limit values for horizontal motion vector components
+ * to prevent them extending beyond the UMV borders
+ */
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
+ (VP8BORDERINPIXELS - 16);
@@ -449,13 +454,13 @@ void encode_mb_row(VP8_COMP *cpi,
x->rddiv = cpi->RDDIV;
x->rdmult = cpi->RDMULT;
- //Copy current mb to a buffer
+ /* Copy current mb to a buffer */
vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
#if CONFIG_MULTITHREAD
if (cpi->b_multi_threaded != 0)
{
- *current_mb_col = mb_col - 1; // set previous MB done
+ *current_mb_col = mb_col - 1; /* set previous MB done */
if ((mb_col & (nsync - 1)) == 0)
{
@@ -471,11 +476,13 @@ void encode_mb_row(VP8_COMP *cpi,
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
vp8_activity_masking(cpi, x);
- // Is segmentation enabled
- // MB level adjustment to quantizer
+ /* Is segmentation enabled */
+ /* MB level adjustment to quantizer */
if (xd->segmentation_enabled)
{
- // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
+ /* Code to set segment id in xd->mbmi.segment_id for current MB
+ * (with range checking)
+ */
if (cpi->segmentation_map[map_index+mb_col] <= 3)
xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col];
else
@@ -484,7 +491,8 @@ void encode_mb_row(VP8_COMP *cpi,
vp8cx_mb_init_quantizer(cpi, x, 1);
}
else
- xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default
+ /* Set to Segment 0 by default */
+ xd->mode_info_context->mbmi.segment_id = 0;
x->active_ptr = cpi->active_map + map_index + mb_col;
@@ -514,21 +522,25 @@ void encode_mb_row(VP8_COMP *cpi,
#endif
- // Count of last ref frame 0,0 usage
- if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
- cpi->inter_zz_count ++;
-
- // Special case code for cyclic refresh
- // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
- // during vp8cx_encode_inter_macroblock()) back into the global segmentation map
+ /* Special case code for cyclic refresh
+ * If cyclic update enabled then copy xd->mbmi.segment_id; (which
+ * may have been updated based on mode during
+ * vp8cx_encode_inter_macroblock()) back into the global
+ * segmentation map
+ */
if ((cpi->current_layer == 0) &&
- (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled))
+ (cpi->cyclic_refresh_mode_enabled &&
+ xd->segmentation_enabled))
{
cpi->segmentation_map[map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
- // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
- // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
- // else mark it as dirty (1).
+ /* If the block has been refreshed mark it as clean (the
+ * magnitude of the -ve influences how long it will be before
+ * we consider another refresh):
+ * Else if it was coded (last frame 0,0) and has not already
+ * been refreshed then mark it as a candidate for cleanup
+ * next time (marked 0) else mark it as dirty (1).
+ */
if (xd->mode_info_context->mbmi.segment_id)
cpi->cyclic_refresh_map[map_index+mb_col] = -1;
else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
@@ -551,13 +563,13 @@ void encode_mb_row(VP8_COMP *cpi,
pack_tokens(w, tp_start, tok_count);
}
#endif
- // Increment pointer into gf usage flags structure.
+ /* Increment pointer into gf usage flags structure. */
x->gf_active_ptr++;
- // Increment the activity mask pointers.
+ /* Increment the activity mask pointers. */
x->mb_activity_ptr++;
- // adjust to the next column of macroblocks
+ /* adjust to the next column of macroblocks */
x->src.y_buffer += 16;
x->src.u_buffer += 8;
x->src.v_buffer += 8;
@@ -565,16 +577,16 @@ void encode_mb_row(VP8_COMP *cpi,
recon_yoffset += 16;
recon_uvoffset += 8;
- // Keep track of segment usage
+ /* Keep track of segment usage */
segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
- // skip to next mb
+ /* skip to next mb */
xd->mode_info_context++;
x->partition_info++;
xd->above_context++;
}
- //extend the recon for intra prediction
+ /* extend the recon for intra prediction */
vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx],
xd->dst.y_buffer + 16,
xd->dst.u_buffer + 8,
@@ -585,7 +597,7 @@ void encode_mb_row(VP8_COMP *cpi,
*current_mb_col = rightmost_col;
#endif
- // this is to account for the border
+ /* this is to account for the border */
xd->mode_info_context++;
x->partition_info++;
}
@@ -596,10 +608,10 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi)
VP8_COMMON *const cm = & cpi->common;
MACROBLOCKD *const xd = & x->e_mbd;
- // GF active flags data structure
+ /* GF active flags data structure */
x->gf_active_ptr = (signed char *)cpi->gf_active_flags;
- // Activity map pointer
+ /* Activity map pointer */
x->mb_activity_ptr = cpi->mb_activity_map;
x->act_zbin_adj = 0;
@@ -611,48 +623,42 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi)
xd->frame_type = cm->frame_type;
- // reset intra mode contexts
+ /* reset intra mode contexts */
if (cm->frame_type == KEY_FRAME)
vp8_init_mbmode_probs(cm);
- // Copy data over into macro block data structures.
+ /* Copy data over into macro block data structures. */
x->src = * cpi->Source;
xd->pre = cm->yv12_fb[cm->lst_fb_idx];
xd->dst = cm->yv12_fb[cm->new_fb_idx];
- // set up frame for intra coded blocks
+ /* set up frame for intra coded blocks */
vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
vp8_build_block_offsets(x);
- vp8_setup_block_dptrs(&x->e_mbd);
-
- vp8_setup_block_ptrs(x);
-
xd->mode_info_context->mbmi.mode = DC_PRED;
xd->mode_info_context->mbmi.uv_mode = DC_PRED;
xd->left_context = &cm->left_context;
- vp8_zero(cpi->count_mb_ref_frame_usage)
- vp8_zero(cpi->ymode_count)
- vp8_zero(cpi->uv_mode_count)
-
x->mvc = cm->fc.mvc;
vpx_memset(cm->above_context, 0,
sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
- // Special case treatment when GF and ARF are not sensible options for reference
- if (cpi->ref_frame_flags == VP8_LAST_FLAG)
+ /* Special case treatment when GF and ARF are not sensible options
+ * for reference
+ */
+ if (cpi->ref_frame_flags == VP8_LAST_FRAME)
vp8_calc_ref_frame_costs(x->ref_frame_cost,
cpi->prob_intra_coded,255,128);
else if ((cpi->oxcf.number_of_layers > 1) &&
- (cpi->ref_frame_flags == VP8_GOLD_FLAG))
+ (cpi->ref_frame_flags == VP8_GOLD_FRAME))
vp8_calc_ref_frame_costs(x->ref_frame_cost,
cpi->prob_intra_coded,1,255);
else if ((cpi->oxcf.number_of_layers > 1) &&
- (cpi->ref_frame_flags == VP8_ALT_FLAG))
+ (cpi->ref_frame_flags == VP8_ALTR_FRAME))
vp8_calc_ref_frame_costs(x->ref_frame_cost,
cpi->prob_intra_coded,1,1);
else
@@ -664,6 +670,43 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi)
xd->fullpixel_mask = 0xffffffff;
if(cm->full_pixel)
xd->fullpixel_mask = 0xfffffff8;
+
+ vp8_zero(x->coef_counts);
+ vp8_zero(x->ymode_count);
+ vp8_zero(x->uv_mode_count)
+ x->prediction_error = 0;
+ x->intra_error = 0;
+ vp8_zero(x->count_mb_ref_frame_usage);
+}
+
+static void sum_coef_counts(MACROBLOCK *x, MACROBLOCK *x_thread)
+{
+ int i = 0;
+ do
+ {
+ int j = 0;
+ do
+ {
+ int k = 0;
+ do
+ {
+ /* at every context */
+
+ /* calc probs and branch cts for this frame only */
+ int t = 0; /* token/prob index */
+
+ do
+ {
+ x->coef_counts [i][j][k][t] +=
+ x_thread->coef_counts [i][j][k][t];
+ }
+ while (++t < ENTROPY_NODES);
+ }
+ while (++k < PREV_COEF_CONTEXTS);
+ }
+ while (++j < COEF_BANDS);
+ }
+ while (++i < BLOCK_TYPES);
}
void vp8_encode_frame(VP8_COMP *cpi)
@@ -676,7 +719,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
int segment_counts[MAX_MB_SEGMENTS];
int totalrate;
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
- BOOL_CODER * bc = &cpi->bc[1]; // bc[0] is for control partition
+ BOOL_CODER * bc = &cpi->bc[1]; /* bc[0] is for control partition */
const int num_part = (1 << cm->multi_token_partition);
#endif
@@ -691,8 +734,8 @@ void vp8_encode_frame(VP8_COMP *cpi)
vp8_auto_select_speed(cpi);
}
- // Functions setup for all frame types so we can use MC in AltRef
- if (cm->mcomp_filter_type == SIXTAP)
+ /* Functions setup for all frame types so we can use MC in AltRef */
+ if(!cm->use_bilinear_mc_filter)
{
xd->subpixel_predict = vp8_sixtap_predict4x4;
xd->subpixel_predict8x4 = vp8_sixtap_predict8x4;
@@ -707,43 +750,36 @@ void vp8_encode_frame(VP8_COMP *cpi)
xd->subpixel_predict16x16 = vp8_bilinear_predict16x16;
}
- // Reset frame count of inter 0,0 motion vector usage.
- cpi->inter_zz_count = 0;
-
- cpi->prediction_error = 0;
- cpi->intra_error = 0;
- cpi->skip_true_count = 0;
+ cpi->mb.skip_true_count = 0;
cpi->tok_count = 0;
#if 0
- // Experimental code
+ /* Experimental code */
cpi->frame_distortion = 0;
cpi->last_mb_distortion = 0;
#endif
xd->mode_info_context = cm->mi;
- vp8_zero(cpi->MVcount);
-
- vp8_zero(cpi->coef_counts);
+ vp8_zero(cpi->mb.MVcount);
vp8cx_frame_init_quantizer(cpi);
- vp8_initialize_rd_consts(cpi,
+ vp8_initialize_rd_consts(cpi, x,
vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
vp8cx_initialize_me_consts(cpi, cm->base_qindex);
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
{
- // Initialize encode frame context.
+ /* Initialize encode frame context. */
init_encode_frame_mb_context(cpi);
- // Build a frame level activity map
+ /* Build a frame level activity map */
build_activity_map(cpi);
}
- // re-init encode frame context.
+ /* re-init encode frame context. */
init_encode_frame_mb_context(cpi);
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
@@ -768,7 +804,8 @@ void vp8_encode_frame(VP8_COMP *cpi)
{
int i;
- vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count);
+ vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei,
+ cpi->encoding_thread_count);
for (i = 0; i < cm->mb_rows; i++)
cpi->mt_current_mb_col[i] = -1;
@@ -790,7 +827,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
- // adjust to the next row of mbs
+ /* adjust to the next row of mbs */
x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
@@ -809,7 +846,8 @@ void vp8_encode_frame(VP8_COMP *cpi)
for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
{
- cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
+ cpi->tok_count += (unsigned int)
+ (cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start);
}
if (xd->segmentation_enabled)
@@ -829,14 +867,50 @@ void vp8_encode_frame(VP8_COMP *cpi)
for (i = 0; i < cpi->encoding_thread_count; i++)
{
+ int mode_count;
+ int c_idx;
totalrate += cpi->mb_row_ei[i].totalrate;
+
+ cpi->mb.skip_true_count += cpi->mb_row_ei[i].mb.skip_true_count;
+
+ for(mode_count = 0; mode_count < VP8_YMODES; mode_count++)
+ cpi->mb.ymode_count[mode_count] +=
+ cpi->mb_row_ei[i].mb.ymode_count[mode_count];
+
+ for(mode_count = 0; mode_count < VP8_UV_MODES; mode_count++)
+ cpi->mb.uv_mode_count[mode_count] +=
+ cpi->mb_row_ei[i].mb.uv_mode_count[mode_count];
+
+ for(c_idx = 0; c_idx < MVvals; c_idx++)
+ {
+ cpi->mb.MVcount[0][c_idx] +=
+ cpi->mb_row_ei[i].mb.MVcount[0][c_idx];
+ cpi->mb.MVcount[1][c_idx] +=
+ cpi->mb_row_ei[i].mb.MVcount[1][c_idx];
+ }
+
+ cpi->mb.prediction_error +=
+ cpi->mb_row_ei[i].mb.prediction_error;
+ cpi->mb.intra_error += cpi->mb_row_ei[i].mb.intra_error;
+
+ for(c_idx = 0; c_idx < MAX_REF_FRAMES; c_idx++)
+ cpi->mb.count_mb_ref_frame_usage[c_idx] +=
+ cpi->mb_row_ei[i].mb.count_mb_ref_frame_usage[c_idx];
+
+ for(c_idx = 0; c_idx < MAX_ERROR_BINS; c_idx++)
+ cpi->mb.error_bins[c_idx] +=
+ cpi->mb_row_ei[i].mb.error_bins[c_idx];
+
+ /* add up counts for each thread */
+ sum_coef_counts(x, &cpi->mb_row_ei[i].mb);
}
}
else
#endif
{
- // for each macroblock row in image
+
+ /* for each macroblock row in image */
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
vp8_zero(cm->left_context)
@@ -847,13 +921,13 @@ void vp8_encode_frame(VP8_COMP *cpi)
encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
- // adjust to the next row of mbs
+ /* adjust to the next row of mbs */
x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
}
- cpi->tok_count = tp - cpi->tok;
+ cpi->tok_count = (unsigned int)(tp - cpi->tok);
}
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
@@ -873,12 +947,13 @@ void vp8_encode_frame(VP8_COMP *cpi)
// Work out the segment probabilities if segmentation is enabled
- if (xd->segmentation_enabled)
+ // and needs to be updated
+ if (xd->segmentation_enabled && xd->update_mb_segmentation_map)
{
int tot_count;
int i;
- // Set to defaults
+ /* Set to defaults */
vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
@@ -899,7 +974,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
if (tot_count > 0)
xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
- // Zero probabilities not allowed
+ /* Zero probabilities not allowed */
for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
{
if (xd->mb_segment_tree_probs[i] == 0)
@@ -908,10 +983,10 @@ void vp8_encode_frame(VP8_COMP *cpi)
}
}
- // 256 rate units to the bit
- cpi->projected_frame_size = totalrate >> 8; // projected_frame_size in units of BYTES
+ /* projected_frame_size in units of BYTES */
+ cpi->projected_frame_size = totalrate >> 8;
- // Make a note of the percentage MBs coded Intra.
+ /* Make a note of the percentage MBs coded Intra. */
if (cm->frame_type == KEY_FRAME)
{
cpi->this_frame_percent_intra = 100;
@@ -920,50 +995,23 @@ void vp8_encode_frame(VP8_COMP *cpi)
{
int tot_modes;
- tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]
- + cpi->count_mb_ref_frame_usage[LAST_FRAME]
- + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]
- + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
+ tot_modes = cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME]
+ + cpi->mb.count_mb_ref_frame_usage[LAST_FRAME]
+ + cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME]
+ + cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME];
if (tot_modes)
- cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
-
- }
-
-#if 0
- {
- int cnt = 0;
- int flag[2] = {0, 0};
-
- for (cnt = 0; cnt < MVPcount; cnt++)
- {
- if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt])
- {
- flag[0] = 1;
- vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount);
- break;
- }
- }
-
- for (cnt = 0; cnt < MVPcount; cnt++)
- {
- if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt])
- {
- flag[1] = 1;
- vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount);
- break;
- }
- }
+ cpi->this_frame_percent_intra =
+ cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
- if (flag[0] || flag[1])
- vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
}
-#endif
#if ! CONFIG_REALTIME_ONLY
- // Adjust the projected reference frame usage probability numbers to reflect
- // what we have just seen. This may be useful when we make multiple iterations
- // of the recode loop rather than continuing to use values from the previous frame.
+ /* Adjust the projected reference frame usage probability numbers to
+ * reflect what we have just seen. This may be useful when we make
+ * multiple iterations of the recode loop rather than continuing to use
+ * values from the previous frame.
+ */
if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) ||
(!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)))
{
@@ -1017,16 +1065,13 @@ void vp8_build_block_offsets(MACROBLOCK *x)
vp8_build_block_doffsets(&x->e_mbd);
- // y blocks
+ /* y blocks */
x->thismb_ptr = &x->thismb[0];
for (br = 0; br < 4; br++)
{
for (bc = 0; bc < 4; bc++)
{
BLOCK *this_block = &x->block[block];
- //this_block->base_src = &x->src.y_buffer;
- //this_block->src_stride = x->src.y_stride;
- //this_block->src = 4 * br * this_block->src_stride + 4 * bc;
this_block->base_src = &x->thismb_ptr;
this_block->src_stride = 16;
this_block->src = 4 * br * 16 + 4 * bc;
@@ -1034,7 +1079,7 @@ void vp8_build_block_offsets(MACROBLOCK *x)
}
}
- // u blocks
+ /* u blocks */
for (br = 0; br < 2; br++)
{
for (bc = 0; bc < 2; bc++)
@@ -1047,7 +1092,7 @@ void vp8_build_block_offsets(MACROBLOCK *x)
}
}
- // v blocks
+ /* v blocks */
for (br = 0; br < 2; br++)
{
for (bc = 0; bc < 2; bc++)
@@ -1087,13 +1132,14 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
#endif
- ++cpi->ymode_count[m];
- ++cpi->uv_mode_count[uvm];
+ ++x->ymode_count[m];
+ ++x->uv_mode_count[uvm];
}
-// Experimental stub function to create a per MB zbin adjustment based on
-// some previously calculated measure of MB activity.
+/* Experimental stub function to create a per MB zbin adjustment based on
+ * some previously calculated measure of MB activity.
+ */
static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x )
{
#if USE_ACT_INDEX
@@ -1103,7 +1149,7 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x )
int64_t b;
int64_t act = *(x->mb_activity_ptr);
- // Apply the masking to the RD multiplier.
+ /* Apply the masking to the RD multiplier. */
a = act + 4*cpi->activity_avg;
b = 4*act + cpi->activity_avg;
@@ -1114,15 +1160,16 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x )
#endif
}
-int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
+int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
+ TOKENEXTRA **t)
{
MACROBLOCKD *xd = &x->e_mbd;
int rate;
if (cpi->sf.RD && cpi->compressor_speed != 2)
- vp8_rd_pick_intra_mode(cpi, x, &rate);
+ vp8_rd_pick_intra_mode(x, &rate);
else
- vp8_pick_intra_mode(cpi, x, &rate);
+ vp8_pick_intra_mode(x, &rate);
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
{
@@ -1139,7 +1186,7 @@ int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
sum_intra_stats(cpi, x);
- vp8_tokenize_mb(cpi, &x->e_mbd, t);
+ vp8_tokenize_mb(cpi, x, t);
if (xd->mode_info_context->mbmi.mode != B_PRED)
vp8_inverse_transform_mby(xd);
@@ -1176,25 +1223,27 @@ int vp8cx_encode_inter_macroblock
x->encode_breakout = cpi->oxcf.encode_breakout;
#if CONFIG_TEMPORAL_DENOISING
- // Reset the best sse mode/mv for each macroblock.
- x->e_mbd.best_sse_inter_mode = 0;
- x->e_mbd.best_sse_mv.as_int = 0;
- x->e_mbd.need_to_clamp_best_mvs = 0;
+ /* Reset the best sse mode/mv for each macroblock. */
+ x->best_reference_frame = INTRA_FRAME;
+ x->best_zeromv_reference_frame = INTRA_FRAME;
+ x->best_sse_inter_mode = 0;
+ x->best_sse_mv.as_int = 0;
+ x->need_to_clamp_best_mvs = 0;
#endif
if (cpi->sf.RD)
{
- int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
+ int zbin_mode_boost_enabled = x->zbin_mode_boost_enabled;
/* Are we using the fast quantizer for the mode selection? */
if(cpi->sf.use_fastquant_for_pick)
{
- cpi->mb.quantize_b = vp8_fast_quantize_b;
- cpi->mb.quantize_b_pair = vp8_fast_quantize_b_pair;
+ x->quantize_b = vp8_fast_quantize_b;
+ x->quantize_b_pair = vp8_fast_quantize_b_pair;
/* the fast quantizer does not use zbin_extra, so
* do not recalculate */
- cpi->zbin_mode_boost_enabled = 0;
+ x->zbin_mode_boost_enabled = 0;
}
vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
&distortion, &intra_error);
@@ -1202,12 +1251,12 @@ int vp8cx_encode_inter_macroblock
/* switch back to the regular quantizer for the encode */
if (cpi->sf.improved_quant)
{
- cpi->mb.quantize_b = vp8_regular_quantize_b;
- cpi->mb.quantize_b_pair = vp8_regular_quantize_b_pair;
+ x->quantize_b = vp8_regular_quantize_b;
+ x->quantize_b_pair = vp8_regular_quantize_b_pair;
}
/* restore cpi->zbin_mode_boost_enabled */
- cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
+ x->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
}
else
@@ -1216,28 +1265,28 @@ int vp8cx_encode_inter_macroblock
&distortion, &intra_error, mb_row, mb_col);
}
- cpi->prediction_error += distortion;
- cpi->intra_error += intra_error;
+ x->prediction_error += distortion;
+ x->intra_error += intra_error;
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
{
- // Adjust the zbin based on this MB rate.
+ /* Adjust the zbin based on this MB rate. */
adjust_act_zbin( cpi, x );
}
#if 0
- // Experimental RD code
+ /* Experimental RD code */
cpi->frame_distortion += distortion;
cpi->last_mb_distortion = distortion;
#endif
- // MB level adjutment to quantizer setup
+ /* MB level adjutment to quantizer setup */
if (xd->segmentation_enabled)
{
- // If cyclic update enabled
+ /* If cyclic update enabled */
if (cpi->current_layer == 0 && cpi->cyclic_refresh_mode_enabled)
{
- // Clear segment_id back to 0 if not coded (last frame 0,0)
+ /* Clear segment_id back to 0 if not coded (last frame 0,0) */
if ((xd->mode_info_context->mbmi.segment_id == 1) &&
((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
{
@@ -1250,24 +1299,25 @@ int vp8cx_encode_inter_macroblock
}
{
- // Experimental code. Special case for gf and arf zeromv modes.
- // Increase zbin size to supress noise
- cpi->zbin_mode_boost = 0;
- if (cpi->zbin_mode_boost_enabled)
+ /* Experimental code. Special case for gf and arf zeromv modes.
+ * Increase zbin size to supress noise
+ */
+ x->zbin_mode_boost = 0;
+ if (x->zbin_mode_boost_enabled)
{
if ( xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME )
{
if (xd->mode_info_context->mbmi.mode == ZEROMV)
{
if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
- cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
+ x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
else
- cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
+ x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
}
else if (xd->mode_info_context->mbmi.mode == SPLITMV)
- cpi->zbin_mode_boost = 0;
+ x->zbin_mode_boost = 0;
else
- cpi->zbin_mode_boost = MV_ZBIN_BOOST;
+ x->zbin_mode_boost = MV_ZBIN_BOOST;
}
}
@@ -1277,7 +1327,7 @@ int vp8cx_encode_inter_macroblock
vp8_update_zbin_extra(cpi, x);
}
- cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
+ x->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
@@ -1322,7 +1372,7 @@ int vp8cx_encode_inter_macroblock
if (!x->skip)
{
- vp8_tokenize_mb(cpi, xd, t);
+ vp8_tokenize_mb(cpi, x, t);
if (xd->mode_info_context->mbmi.mode != B_PRED)
vp8_inverse_transform_mby(xd);
@@ -1339,12 +1389,12 @@ int vp8cx_encode_inter_macroblock
if (cpi->common.mb_no_coeff_skip)
{
- cpi->skip_true_count ++;
+ x->skip_true_count ++;
vp8_fix_contexts(xd);
}
else
{
- vp8_stuff_mb(cpi, xd, t);
+ vp8_stuff_mb(cpi, x, t);
}
}
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 1f445b7..340dd63 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -54,10 +54,13 @@ void vp8_encode_intra4x4block(MACROBLOCK *x, int ib)
BLOCKD *b = &x->e_mbd.block[ib];
BLOCK *be = &x->block[ib];
int dst_stride = x->e_mbd.dst.y_stride;
- unsigned char *base_dst = x->e_mbd.dst.y_buffer;
+ unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
+ unsigned char *Above = dst - dst_stride;
+ unsigned char *yleft = dst - 1;
+ unsigned char top_left = Above[-1];
- vp8_intra4x4_predict(base_dst + b->offset, dst_stride,
- b->bmi.as_mode, b->predictor, 16);
+ vp8_intra4x4_predict(Above, yleft, dst_stride, b->bmi.as_mode,
+ b->predictor, 16, top_left);
vp8_subtract_b(be, b, 16);
@@ -67,14 +70,11 @@ void vp8_encode_intra4x4block(MACROBLOCK *x, int ib)
if (*b->eob > 1)
{
- vp8_short_idct4x4llm(b->dqcoeff,
- b->predictor, 16, base_dst + b->offset, dst_stride);
+ vp8_short_idct4x4llm(b->dqcoeff, b->predictor, 16, dst, dst_stride);
}
else
{
- vp8_dc_only_idct_add
- (b->dqcoeff[0], b->predictor, 16, base_dst + b->offset,
- dst_stride);
+ vp8_dc_only_idct_add(b->dqcoeff[0], b->predictor, 16, dst, dst_stride);
}
}
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index f89e4f7..7d494f2 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -137,10 +137,10 @@ void vp8_transform_intra_mby(MACROBLOCK *x)
&x->block[i].coeff[0], 32);
}
- // build dc block from 16 y dc values
+ /* build dc block from 16 y dc values */
build_dcblock(x);
- // do 2nd order transform on the dc block
+ /* do 2nd order transform on the dc block */
x->short_walsh4x4(&x->block[24].src_diff[0],
&x->block[24].coeff[0], 8);
@@ -157,7 +157,7 @@ static void transform_mb(MACROBLOCK *x)
&x->block[i].coeff[0], 32);
}
- // build dc block from 16 y dc values
+ /* build dc block from 16 y dc values */
if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
build_dcblock(x);
@@ -167,7 +167,7 @@ static void transform_mb(MACROBLOCK *x)
&x->block[i].coeff[0], 16);
}
- // do 2nd order transform on the dc block
+ /* do 2nd order transform on the dc block */
if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
x->short_walsh4x4(&x->block[24].src_diff[0],
&x->block[24].coeff[0], 8);
@@ -185,7 +185,7 @@ static void transform_mby(MACROBLOCK *x)
&x->block[i].coeff[0], 32);
}
- // build dc block from 16 y dc values
+ /* build dc block from 16 y dc values */
if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
{
build_dcblock(x);
@@ -208,7 +208,7 @@ struct vp8_token_state{
short qc;
};
-// TODO: experiments to find optimal multiple numbers
+/* TODO: experiments to find optimal multiple numbers */
#define Y1_RD_MULT 4
#define UV_RD_MULT 2
#define Y2_RD_MULT 16
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index 0145f6d..0c43d06 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -29,15 +29,15 @@ static void encode_mvcomponent(
const vp8_prob *p = mvc->prob;
const int x = v < 0 ? -v : v;
- if (x < mvnum_short) // Small
+ if (x < mvnum_short) /* Small */
{
vp8_write(w, 0, p [mvpis_short]);
vp8_treed_write(w, vp8_small_mvtree, p + MVPshort, x, 3);
if (!x)
- return; // no sign bit
+ return; /* no sign bit */
}
- else // Large
+ else /* Large */
{
int i = 0;
@@ -100,7 +100,7 @@ void vp8_encode_motion_vector(vp8_writer *w, const MV *mv, const MV_CONTEXT *mvc
static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc)
{
const vp8_prob *p = mvc->prob;
- const int x = v; //v<0? -v:v;
+ const int x = v;
unsigned int cost;
if (x < mvnum_short)
@@ -132,12 +132,12 @@ static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc)
cost += vp8_cost_bit(p [MVPbits + 3], (x >> 3) & 1);
}
- return cost; // + vp8_cost_bit( p [MVPsign], v < 0);
+ return cost; /* + vp8_cost_bit( p [MVPsign], v < 0); */
}
void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int mvc_flag[2])
{
- int i = 1; //-mv_max;
+ int i = 1;
unsigned int cost0 = 0;
unsigned int cost1 = 0;
@@ -151,7 +151,6 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m
do
{
- //mvcost [0] [i] = cost_mvcomponent( i, &mvc[0]);
cost0 = cost_mvcomponent(i, &mvc[0]);
mvcost [0] [i] = cost0 + vp8_cost_zero(mvc[0].prob[MVPsign]);
@@ -168,7 +167,6 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m
do
{
- //mvcost [1] [i] = cost_mvcomponent( i, mvc[1]);
cost1 = cost_mvcomponent(i, &mvc[1]);
mvcost [1] [i] = cost1 + vp8_cost_zero(mvc[1].prob[MVPsign]);
@@ -179,10 +177,10 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m
}
-// Motion vector probability table update depends on benefit.
-// Small correction allows for the fact that an update to an MV probability
-// may have benefit in subsequent frames as well as the current one.
-
+/* Motion vector probability table update depends on benefit.
+ * Small correction allows for the fact that an update to an MV probability
+ * may have benefit in subsequent frames as well as the current one.
+ */
#define MV_PROB_UPDATE_CORRECTION -1
@@ -254,22 +252,22 @@ static void write_component_probs(
vp8_zero(short_bct)
- //j=0
+ /* j=0 */
{
const int c = events [mv_max];
- is_short_ct [0] += c; // Short vector
- short_ct [0] += c; // Magnitude distribution
+ is_short_ct [0] += c; /* Short vector */
+ short_ct [0] += c; /* Magnitude distribution */
}
- //j: 1 ~ mv_max (1023)
+ /* j: 1 ~ mv_max (1023) */
{
int j = 1;
do
{
- const int c1 = events [mv_max + j]; //positive
- const int c2 = events [mv_max - j]; //negative
+ const int c1 = events [mv_max + j]; /* positive */
+ const int c2 = events [mv_max - j]; /* negative */
const int c = c1 + c2;
int a = j;
@@ -278,13 +276,13 @@ static void write_component_probs(
if (a < mvnum_short)
{
- is_short_ct [0] += c; // Short vector
- short_ct [a] += c; // Magnitude distribution
+ is_short_ct [0] += c; /* Short vector */
+ short_ct [a] += c; /* Magnitude distribution */
}
else
{
int k = mvlong_width - 1;
- is_short_ct [1] += c; // Long vector
+ is_short_ct [1] += c; /* Long vector */
/* bit 3 not always encoded. */
do
@@ -296,43 +294,6 @@ static void write_component_probs(
while (++j <= mv_max);
}
- /*
- {
- int j = -mv_max;
- do
- {
-
- const int c = events [mv_max + j];
- int a = j;
-
- if( j < 0)
- {
- sign_ct [1] += c;
- a = -j;
- }
- else if( j)
- sign_ct [0] += c;
-
- if( a < mvnum_short)
- {
- is_short_ct [0] += c; // Short vector
- short_ct [a] += c; // Magnitude distribution
- }
- else
- {
- int k = mvlong_width - 1;
- is_short_ct [1] += c; // Long vector
-
- // bit 3 not always encoded.
-
- do
- bit_ct [k] [(a >> k) & 1] += c;
- while( --k >= 0);
- }
- } while( ++j <= mv_max);
- }
- */
-
calc_prob(Pnew + mvpis_short, is_short_ct);
calc_prob(Pnew + MVPsign, sign_ct);
@@ -402,10 +363,12 @@ void vp8_write_mvprobs(VP8_COMP *cpi)
active_section = 4;
#endif
write_component_probs(
- w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0], cpi->MVcount[0], 0, &flags[0]
+ w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0],
+ cpi->mb.MVcount[0], 0, &flags[0]
);
write_component_probs(
- w, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1], cpi->MVcount[1], 1, &flags[1]
+ w, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1],
+ cpi->mb.MVcount[1], 1, &flags[1]
);
if (flags[0] || flags[1])
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 2a2cb2f..d4b17ce 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -17,12 +17,6 @@
#if CONFIG_MULTITHREAD
-extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
- TOKENEXTRA **t,
- int recon_yoffset, int recon_uvoffset,
- int mb_row, int mb_col);
-extern int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
- TOKENEXTRA **t);
extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip);
extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
@@ -39,7 +33,7 @@ static THREAD_FUNCTION thread_loopfilter(void *p_data)
if (sem_wait(&cpi->h_event_start_lpf) == 0)
{
- if (cpi->b_multi_threaded == 0) // we're shutting down
+ if (cpi->b_multi_threaded == 0) /* we're shutting down */
break;
vp8_loopfilter_frame(cpi, cm);
@@ -59,17 +53,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2);
ENTROPY_CONTEXT_PLANES mb_row_left_context;
- const int nsync = cpi->mt_sync_range;
- //printf("Started thread %d\n", ithread);
-
while (1)
{
if (cpi->b_multi_threaded == 0)
break;
- //if(WaitForSingleObject(cpi->h_event_mbrencoding[ithread], INFINITE) == WAIT_OBJECT_0)
if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0)
{
+ const int nsync = cpi->mt_sync_range;
VP8_COMMON *cm = &cpi->common;
int mb_row;
MACROBLOCK *x = &mbri->mb;
@@ -83,7 +74,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int *segment_counts = mbri->segment_counts;
int *totalrate = &mbri->totalrate;
- if (cpi->b_multi_threaded == 0) // we're shutting down
+ if (cpi->b_multi_threaded == 0) /* we're shutting down */
break;
for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
@@ -108,7 +99,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
- // reset above block coeffs
+ /* reset above block coeffs */
xd->above_context = cm->above_context;
xd->left_context = &mb_row_left_context;
@@ -118,10 +109,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
recon_yoffset = (mb_row * recon_y_stride * 16);
recon_uvoffset = (mb_row * recon_uv_stride * 8);
- // Set the mb activity pointer to the start of the row.
+ /* Set the mb activity pointer to the start of the row. */
x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
- // for each macroblock col in image
+ /* for each macroblock col in image */
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
*current_mb_col = mb_col - 1;
@@ -139,14 +130,18 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
tp = tp_start;
#endif
- // Distance of Mb to the various image edges.
- // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ /* Distance of Mb to the various image edges.
+ * These specified to 8th pel as they are always compared
+ * to values that are in 1/8th pel units
+ */
xd->mb_to_left_edge = -((mb_col * 16) << 3);
xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
xd->mb_to_top_edge = -((mb_row * 16) << 3);
xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
- // Set up limit values for motion vectors used to prevent them extending outside the UMV borders
+ /* Set up limit values for motion vectors used to prevent
+ * them extending outside the UMV borders
+ */
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16);
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
@@ -160,17 +155,19 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
x->rddiv = cpi->RDDIV;
x->rdmult = cpi->RDMULT;
- //Copy current mb to a buffer
+ /* Copy current mb to a buffer */
vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
vp8_activity_masking(cpi, x);
- // Is segmentation enabled
- // MB level adjustment to quantizer
+ /* Is segmentation enabled */
+ /* MB level adjustment to quantizer */
if (xd->segmentation_enabled)
{
- // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
+ /* Code to set segment id in xd->mbmi.segment_id for
+ * current MB (with range checking)
+ */
if (cpi->segmentation_map[map_index + mb_col] <= 3)
xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index + mb_col];
else
@@ -179,7 +176,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
vp8cx_mb_init_quantizer(cpi, x, 1);
}
else
- xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default
+ /* Set to Segment 0 by default */
+ xd->mode_info_context->mbmi.segment_id = 0;
x->active_ptr = cpi->active_map + map_index + mb_col;
@@ -209,21 +207,28 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
#endif
- // Count of last ref frame 0,0 usage
- if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
- cpi->inter_zz_count++;
-
- // Special case code for cyclic refresh
- // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
- // during vp8cx_encode_inter_macroblock()) back into the global segmentation map
- if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
+ /* Special case code for cyclic refresh
+ * If cyclic update enabled then copy
+ * xd->mbmi.segment_id; (which may have been updated
+ * based on mode during
+ * vp8cx_encode_inter_macroblock()) back into the
+ * global segmentation map
+ */
+ if ((cpi->current_layer == 0) &&
+ (cpi->cyclic_refresh_mode_enabled &&
+ xd->segmentation_enabled))
{
const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
cpi->segmentation_map[map_index + mb_col] = mbmi->segment_id;
- // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
- // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
- // else mark it as dirty (1).
+ /* If the block has been refreshed mark it as clean
+ * (the magnitude of the -ve influences how long it
+ * will be before we consider another refresh):
+ * Else if it was coded (last frame 0,0) and has
+ * not already been refreshed then mark it as a
+ * candidate for cleanup next time (marked 0) else
+ * mark it as dirty (1).
+ */
if (mbmi->segment_id)
cpi->cyclic_refresh_map[map_index + mb_col] = -1;
else if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME))
@@ -246,13 +251,13 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
#else
cpi->tplist[mb_row].stop = tp;
#endif
- // Increment pointer into gf usage flags structure.
+ /* Increment pointer into gf usage flags structure. */
x->gf_active_ptr++;
- // Increment the activity mask pointers.
+ /* Increment the activity mask pointers. */
x->mb_activity_ptr++;
- // adjust to the next column of macroblocks
+ /* adjust to the next column of macroblocks */
x->src.y_buffer += 16;
x->src.u_buffer += 8;
x->src.v_buffer += 8;
@@ -260,10 +265,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
recon_yoffset += 16;
recon_uvoffset += 8;
- // Keep track of segment usage
+ /* Keep track of segment usage */
segment_counts[xd->mode_info_context->mbmi.segment_id]++;
- // skip to next mb
+ /* skip to next mb */
xd->mode_info_context++;
x->partition_info++;
xd->above_context++;
@@ -276,7 +281,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
*current_mb_col = mb_col + nsync;
- // this is to account for the border
+ /* this is to account for the border */
xd->mode_info_context++;
x->partition_info++;
@@ -296,7 +301,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
}
}
- //printf("exit thread %d\n", ithread);
+ /* printf("exit thread %d\n", ithread); */
return 0;
}
@@ -336,21 +341,16 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->src.v_buffer = x->src.v_buffer;
*/
+ z->mvcost[0] = x->mvcost[0];
+ z->mvcost[1] = x->mvcost[1];
+ z->mvsadcost[0] = x->mvsadcost[0];
+ z->mvsadcost[1] = x->mvsadcost[1];
- vpx_memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts));
- z->mvcost[0] = &z->mvcosts[0][mv_max+1];
- z->mvcost[1] = &z->mvcosts[1][mv_max+1];
- z->mvsadcost[0] = &z->mvsadcosts[0][mvfp_max+1];
- z->mvsadcost[1] = &z->mvsadcosts[1][mvfp_max+1];
-
-
- vpx_memcpy(z->token_costs, x->token_costs, sizeof(x->token_costs));
- vpx_memcpy(z->inter_bmode_costs, x->inter_bmode_costs, sizeof(x->inter_bmode_costs));
- //memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts));
- //memcpy(z->mvcost, x->mvcost, sizeof(x->mvcost));
- vpx_memcpy(z->mbmode_cost, x->mbmode_cost, sizeof(x->mbmode_cost));
- vpx_memcpy(z->intra_uv_mode_cost, x->intra_uv_mode_cost, sizeof(x->intra_uv_mode_cost));
- vpx_memcpy(z->bmode_costs, x->bmode_costs, sizeof(x->bmode_costs));
+ z->token_costs = x->token_costs;
+ z->inter_bmode_costs = x->inter_bmode_costs;
+ z->mbmode_cost = x->mbmode_cost;
+ z->intra_uv_mode_cost = x->intra_uv_mode_cost;
+ z->bmode_costs = x->bmode_costs;
for (i = 0; i < 25; i++)
{
@@ -358,17 +358,15 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->block[i].quant_fast = x->block[i].quant_fast;
z->block[i].quant_shift = x->block[i].quant_shift;
z->block[i].zbin = x->block[i].zbin;
- z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
+ z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
z->block[i].round = x->block[i].round;
- z->q_index = x->q_index;
- z->act_zbin_adj = x->act_zbin_adj;
- z->last_act_zbin_adj = x->last_act_zbin_adj;
- /*
- z->block[i].src = x->block[i].src;
- */
- z->block[i].src_stride = x->block[i].src_stride;
+ z->block[i].src_stride = x->block[i].src_stride;
}
+ z->q_index = x->q_index;
+ z->act_zbin_adj = x->act_zbin_adj;
+ z->last_act_zbin_adj = x->last_act_zbin_adj;
+
{
MACROBLOCKD *xd = &x->e_mbd;
MACROBLOCKD *zd = &z->e_mbd;
@@ -400,9 +398,11 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
zd->subpixel_predict16x16 = xd->subpixel_predict16x16;
zd->segmentation_enabled = xd->segmentation_enabled;
zd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
- vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
+ vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data,
+ sizeof(xd->segment_feature_data));
- vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
+ vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc,
+ sizeof(xd->dequant_y1_dc));
vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
@@ -418,13 +418,23 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
zd->block[i].dequant = zd->dequant_uv;
zd->block[24].dequant = zd->dequant_y2;
#endif
+
+
+ vpx_memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes));
+ vpx_memcpy(z->rd_thresh_mult, x->rd_thresh_mult,
+ sizeof(x->rd_thresh_mult));
+
+ z->zbin_over_quant = x->zbin_over_quant;
+ z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled;
+ z->zbin_mode_boost = x->zbin_mode_boost;
+
+ vpx_memset(z->error_bins, 0, sizeof(z->error_bins));
}
}
void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
MACROBLOCK *x,
MB_ROW_COMP *mbr_ei,
- int mb_row,
int count
)
{
@@ -432,7 +442,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
VP8_COMMON *const cm = & cpi->common;
MACROBLOCKD *const xd = & x->e_mbd;
int i;
- (void) mb_row;
for (i = 0; i < count; i++)
{
@@ -465,10 +474,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
vp8_build_block_offsets(mb);
- vp8_setup_block_dptrs(mbd);
-
- vp8_setup_block_ptrs(mb);
-
mbd->left_context = &cm->left_context;
mb->mvc = cm->fc.mvc;
@@ -477,10 +482,19 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
mbd->fullpixel_mask = 0xffffffff;
if(cm->full_pixel)
mbd->fullpixel_mask = 0xfffffff8;
+
+ vp8_zero(mb->coef_counts);
+ vp8_zero(x->ymode_count);
+ mb->skip_true_count = 0;
+ vp8_zero(mb->MVcount);
+ mb->prediction_error = 0;
+ mb->intra_error = 0;
+ vp8_zero(mb->count_mb_ref_frame_usage);
+ mb->mbs_tested_so_far = 0;
}
}
-void vp8cx_create_encoder_threads(VP8_COMP *cpi)
+int vp8cx_create_encoder_threads(VP8_COMP *cpi)
{
const VP8_COMMON * cm = &cpi->common;
@@ -492,6 +506,7 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
{
int ithread;
int th_count = cpi->oxcf.multi_threaded - 1;
+ int rc = 0;
/* don't allocate more threads than cores available */
if (cpi->oxcf.multi_threaded > cm->processor_core_count)
@@ -505,16 +520,17 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
}
if(th_count == 0)
- return;
-
- CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count));
- CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count));
- CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
+ return 0;
+
+ CHECK_MEM_ERROR(cpi->h_encoding_thread,
+ vpx_malloc(sizeof(pthread_t) * th_count));
+ CHECK_MEM_ERROR(cpi->h_event_start_encoding,
+ vpx_malloc(sizeof(sem_t) * th_count));
+ CHECK_MEM_ERROR(cpi->mb_row_ei,
+ vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
CHECK_MEM_ERROR(cpi->en_thread_data,
vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));
- CHECK_MEM_ERROR(cpi->mt_current_mb_col,
- vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));
sem_init(&cpi->h_event_end_encoding, 0, 0);
@@ -528,16 +544,45 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
for (ithread = 0; ithread < th_count; ithread++)
{
- ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread];
+ ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread];
+
+ /* Setup block ptrs and offsets */
+ vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb);
+ vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd);
sem_init(&cpi->h_event_start_encoding[ithread], 0, 0);
+
ethd->ithread = ithread;
ethd->ptr1 = (void *)cpi;
ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread];
- pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd);
+ rc = pthread_create(&cpi->h_encoding_thread[ithread], 0,
+ thread_encoding_proc, ethd);
+ if(rc)
+ break;
}
+ if(rc)
+ {
+ /* shutdown other threads */
+ cpi->b_multi_threaded = 0;
+ for(--ithread; ithread >= 0; ithread--)
+ {
+ pthread_join(cpi->h_encoding_thread[ithread], 0);
+ sem_destroy(&cpi->h_event_start_encoding[ithread]);
+ }
+ sem_destroy(&cpi->h_event_end_encoding);
+
+ /* free thread related resources */
+ vpx_free(cpi->h_event_start_encoding);
+ vpx_free(cpi->h_encoding_thread);
+ vpx_free(cpi->mb_row_ei);
+ vpx_free(cpi->en_thread_data);
+
+ return -1;
+ }
+
+
{
LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data;
@@ -545,24 +590,47 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
sem_init(&cpi->h_event_end_lpf, 0, 0);
lpfthd->ptr1 = (void *)cpi;
- pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, lpfthd);
+ rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter,
+ lpfthd);
+
+ if(rc)
+ {
+ /* shutdown other threads */
+ cpi->b_multi_threaded = 0;
+ for(--ithread; ithread >= 0; ithread--)
+ {
+ sem_post(&cpi->h_event_start_encoding[ithread]);
+ pthread_join(cpi->h_encoding_thread[ithread], 0);
+ sem_destroy(&cpi->h_event_start_encoding[ithread]);
+ }
+ sem_destroy(&cpi->h_event_end_encoding);
+ sem_destroy(&cpi->h_event_end_lpf);
+ sem_destroy(&cpi->h_event_start_lpf);
+
+ /* free thread related resources */
+ vpx_free(cpi->h_event_start_encoding);
+ vpx_free(cpi->h_encoding_thread);
+ vpx_free(cpi->mb_row_ei);
+ vpx_free(cpi->en_thread_data);
+
+ return -2;
+ }
}
}
-
+ return 0;
}
void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
{
if (cpi->b_multi_threaded)
{
- //shutdown other threads
+ /* shutdown other threads */
cpi->b_multi_threaded = 0;
{
int i;
for (i = 0; i < cpi->encoding_thread_count; i++)
{
- //SetEvent(cpi->h_event_mbrencoding[i]);
sem_post(&cpi->h_event_start_encoding[i]);
pthread_join(cpi->h_encoding_thread[i], 0);
@@ -577,12 +645,11 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
sem_destroy(&cpi->h_event_end_lpf);
sem_destroy(&cpi->h_event_start_lpf);
- //free thread related resources
+ /* free thread related resources */
vpx_free(cpi->h_event_start_encoding);
vpx_free(cpi->h_encoding_thread);
vpx_free(cpi->mb_row_ei);
vpx_free(cpi->en_thread_data);
- vpx_free(cpi->mt_current_mb_col);
}
}
#endif
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 8de1a6a..30bf8a6 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -30,14 +30,12 @@
#include "encodemv.h"
#include "encodeframe.h"
-//#define OUTPUT_FPF 1
+/* #define OUTPUT_FPF 1 */
extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi);
extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv);
extern void vp8_alloc_compressor_data(VP8_COMP *cpi);
-//#define GFQ_ADJUSTMENT (40 + ((15*Q)/10))
-//#define GFQ_ADJUSTMENT (80 + ((15*Q)/10))
#define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q]
extern int vp8_kf_boost_qadjustment[QINDEX_RANGE];
@@ -77,7 +75,9 @@ static const int cq_level[QINDEX_RANGE] =
static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame);
-// Resets the first pass file to the given position using a relative seek from the current position
+/* Resets the first pass file to the given position using a relative seek
+ * from the current position
+ */
static void reset_fpf_position(VP8_COMP *cpi, FIRSTPASS_STATS *Position)
{
cpi->twopass.stats_in = Position;
@@ -92,14 +92,14 @@ static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
return 1;
}
-// Read frame stats at an offset from the current position
+/* Read frame stats at an offset from the current position */
static int read_frame_stats( VP8_COMP *cpi,
FIRSTPASS_STATS *frame_stats,
int offset )
{
FIRSTPASS_STATS * fps_ptr = cpi->twopass.stats_in;
- // Check legality of offset
+ /* Check legality of offset */
if ( offset >= 0 )
{
if ( &fps_ptr[offset] >= cpi->twopass.stats_in_end )
@@ -136,7 +136,7 @@ static void output_stats(const VP8_COMP *cpi,
pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
vpx_codec_pkt_list_add(pktlist, &pkt);
-// TEMP debug code
+/* TEMP debug code */
#if OUTPUT_FPF
{
@@ -257,7 +257,9 @@ static void avg_stats(FIRSTPASS_STATS *section)
section->duration /= section->count;
}
-// Calculate a modified Error used in distributing bits between easier and harder frames
+/* Calculate a modified Error used in distributing bits between easier
+ * and harder frames
+ */
static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
{
double av_err = ( cpi->twopass.total_stats.ssim_weighted_pred_err /
@@ -315,7 +317,9 @@ static double simple_weight(YV12_BUFFER_CONFIG *source)
unsigned char *src = source->y_buffer;
double sum_weights = 0.0;
- // Loop throught the Y plane raw examining levels and creating a weight for the image
+ /* Loop throught the Y plane raw examining levels and creating a weight
+ * for the image
+ */
i = source->y_height;
do
{
@@ -335,41 +339,52 @@ static double simple_weight(YV12_BUFFER_CONFIG *source)
}
-// This function returns the current per frame maximum bitrate target
+/* This function returns the current per frame maximum bitrate target */
static int frame_max_bits(VP8_COMP *cpi)
{
- // Max allocation for a single frame based on the max section guidelines passed in and how many bits are left
+ /* Max allocation for a single frame based on the max section guidelines
+ * passed in and how many bits are left
+ */
int max_bits;
- // For CBR we need to also consider buffer fullness.
- // If we are running below the optimal level then we need to gradually tighten up on max_bits.
+ /* For CBR we need to also consider buffer fullness.
+ * If we are running below the optimal level then we need to gradually
+ * tighten up on max_bits.
+ */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level);
- // For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user
+ /* For CBR base this on the target average bits per frame plus the
+ * maximum sedction rate passed in by the user
+ */
max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
- // If our buffer is below the optimum level
+ /* If our buffer is below the optimum level */
if (buffer_fullness_ratio < 1.0)
{
- // The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4.
+ /* The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4. */
int min_max_bits = ((cpi->av_per_frame_bandwidth >> 2) < (max_bits >> 2)) ? cpi->av_per_frame_bandwidth >> 2 : max_bits >> 2;
max_bits = (int)(max_bits * buffer_fullness_ratio);
+ /* Lowest value we will set ... which should allow the buffer to
+ * refill.
+ */
if (max_bits < min_max_bits)
- max_bits = min_max_bits; // Lowest value we will set ... which should allow the buffer to refil.
+ max_bits = min_max_bits;
}
}
- // VBR
+ /* VBR */
else
{
- // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user
+ /* For VBR base this on the bits and frames left plus the
+ * two_pass_vbrmax_section rate passed in by the user
+ */
max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats.count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
}
- // Trap case where we are out of bits
+ /* Trap case where we are out of bits */
if (max_bits < 0)
max_bits = 0;
@@ -403,13 +418,13 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x,
unsigned char *ref_ptr;
int ref_stride = x->e_mbd.pre.y_stride;
- // Set up pointers for this macro block raw buffer
+ /* Set up pointers for this macro block raw buffer */
raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset
+ d->offset);
vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride,
(unsigned int *)(raw_motion_err));
- // Set up pointers for this macro block recon buffer
+ /* Set up pointers for this macro block recon buffer */
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset );
vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride,
@@ -430,19 +445,19 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
int_mv ref_mv_full;
int tmp_err;
- int step_param = 3; //3; // Dont search over full range for first pass
- int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; //3;
+ int step_param = 3; /* Dont search over full range for first pass */
+ int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
int n;
vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
int new_mv_mode_penalty = 256;
- // override the default variance function to use MSE
+ /* override the default variance function to use MSE */
v_fn_ptr.vf = vp8_mse16x16;
- // Set up pointers for this macro block recon buffer
+ /* Set up pointers for this macro block recon buffer */
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
- // Initial step/diamond search centred on best mv
+ /* Initial step/diamond search centred on best mv */
tmp_mv.as_int = 0;
ref_mv_full.as_mv.col = ref_mv->as_mv.col>>3;
ref_mv_full.as_mv.row = ref_mv->as_mv.row>>3;
@@ -459,7 +474,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
best_mv->col = tmp_mv.as_mv.col;
}
- // Further step/diamond searches as necessary
+ /* Further step/diamond searches as necessary */
n = num00;
num00 = 0;
@@ -520,7 +535,7 @@ void vp8_first_pass(VP8_COMP *cpi)
zero_ref_mv.as_int = 0;
- vp8_clear_system_state(); //__asm emms;
+ vp8_clear_system_state();
x->src = * cpi->Source;
xd->pre = *lst_yv12;
@@ -530,44 +545,55 @@ void vp8_first_pass(VP8_COMP *cpi)
xd->mode_info_context = cm->mi;
- vp8_build_block_offsets(x);
-
- vp8_setup_block_dptrs(&x->e_mbd);
+ if(!cm->use_bilinear_mc_filter)
+ {
+ xd->subpixel_predict = vp8_sixtap_predict4x4;
+ xd->subpixel_predict8x4 = vp8_sixtap_predict8x4;
+ xd->subpixel_predict8x8 = vp8_sixtap_predict8x8;
+ xd->subpixel_predict16x16 = vp8_sixtap_predict16x16;
+ }
+ else
+ {
+ xd->subpixel_predict = vp8_bilinear_predict4x4;
+ xd->subpixel_predict8x4 = vp8_bilinear_predict8x4;
+ xd->subpixel_predict8x8 = vp8_bilinear_predict8x8;
+ xd->subpixel_predict16x16 = vp8_bilinear_predict16x16;
+ }
- vp8_setup_block_ptrs(x);
+ vp8_build_block_offsets(x);
- // set up frame new frame for intra coded blocks
+ /* set up frame new frame for intra coded blocks */
vp8_setup_intra_recon(new_yv12);
vp8cx_frame_init_quantizer(cpi);
- // Initialise the MV cost table to the defaults
- //if( cm->current_video_frame == 0)
- //if ( 0 )
+ /* Initialise the MV cost table to the defaults */
{
int flag[2] = {1, 1};
- vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
+ vp8_initialize_rd_consts(cpi, x, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
vpx_memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
}
- // for each macroblock row in image
+ /* for each macroblock row in image */
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
int_mv best_ref_mv;
best_ref_mv.as_int = 0;
- // reset above block coeffs
+ /* reset above block coeffs */
xd->up_available = (mb_row != 0);
recon_yoffset = (mb_row * recon_y_stride * 16);
recon_uvoffset = (mb_row * recon_uv_stride * 8);
- // Set up limit values for motion vectors to prevent them extending outside the UMV borders
+ /* Set up limit values for motion vectors to prevent them extending
+ * outside the UMV borders
+ */
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
- // for each macroblock col in image
+ /* for each macroblock col in image */
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
int this_error;
@@ -579,26 +605,33 @@ void vp8_first_pass(VP8_COMP *cpi)
xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
- //Copy current mb to a buffer
+ /* Copy current mb to a buffer */
vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
- // do intra 16x16 prediction
+ /* do intra 16x16 prediction */
this_error = vp8_encode_intra(cpi, x, use_dc_pred);
- // "intrapenalty" below deals with situations where the intra and inter error scores are very low (eg a plain black frame)
- // We do not have special cases in first pass for 0,0 and nearest etc so all inter modes carry an overhead cost estimate fot the mv.
- // When the error score is very low this causes us to pick all or lots of INTRA modes and throw lots of key frames.
- // This penalty adds a cost matching that of a 0,0 mv to the intra case.
+ /* "intrapenalty" below deals with situations where the intra
+ * and inter error scores are very low (eg a plain black frame)
+ * We do not have special cases in first pass for 0,0 and
+ * nearest etc so all inter modes carry an overhead cost
+ * estimate fot the mv. When the error score is very low this
+ * causes us to pick all or lots of INTRA modes and throw lots
+ * of key frames. This penalty adds a cost matching that of a
+ * 0,0 mv to the intra case.
+ */
this_error += intrapenalty;
- // Cumulative intra error total
+ /* Cumulative intra error total */
intra_error += (int64_t)this_error;
- // Set up limit values for motion vectors to prevent them extending outside the UMV borders
+ /* Set up limit values for motion vectors to prevent them
+ * extending outside the UMV borders
+ */
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16);
- // Other than for the first frame do a motion search
+ /* Other than for the first frame do a motion search */
if (cm->current_video_frame > 0)
{
BLOCKD *d = &x->e_mbd.block[0];
@@ -607,7 +640,7 @@ void vp8_first_pass(VP8_COMP *cpi)
int motion_error = INT_MAX;
int raw_motion_error = INT_MAX;
- // Simple 0,0 motion with no mv overhead
+ /* Simple 0,0 motion with no mv overhead */
zz_motion_search( cpi, x, cpi->last_frame_unscaled_source,
&raw_motion_error, lst_yv12, &motion_error,
recon_yoffset );
@@ -617,13 +650,16 @@ void vp8_first_pass(VP8_COMP *cpi)
if (raw_motion_error < cpi->oxcf.encode_breakout)
goto skip_motion_search;
- // Test last reference frame using the previous best mv as the
- // starting point (best reference) for the search
+ /* Test last reference frame using the previous best mv as the
+ * starting point (best reference) for the search
+ */
first_pass_motion_search(cpi, x, &best_ref_mv,
&d->bmi.mv.as_mv, lst_yv12,
&motion_error, recon_yoffset);
- // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well
+ /* If the current best reference mv is not centred on 0,0
+ * then do a 0,0 based search as well
+ */
if (best_ref_mv.as_int)
{
tmp_err = INT_MAX;
@@ -638,7 +674,9 @@ void vp8_first_pass(VP8_COMP *cpi)
}
}
- // Experimental search in a second reference frame ((0,0) based only)
+ /* Experimental search in a second reference frame ((0,0)
+ * based only)
+ */
if (cm->current_video_frame > 1)
{
first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset);
@@ -646,19 +684,9 @@ void vp8_first_pass(VP8_COMP *cpi)
if ((gf_motion_error < motion_error) && (gf_motion_error < this_error))
{
second_ref_count++;
- //motion_error = gf_motion_error;
- //d->bmi.mv.as_mv.row = tmp_mv.row;
- //d->bmi.mv.as_mv.col = tmp_mv.col;
}
- /*else
- {
- xd->pre.y_buffer = cm->last_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = cm->last_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = cm->last_frame.v_buffer + recon_uvoffset;
- }*/
-
- // Reset to last frame as reference buffer
+ /* Reset to last frame as reference buffer */
xd->pre.y_buffer = lst_yv12->y_buffer + recon_yoffset;
xd->pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset;
xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
@@ -670,10 +698,11 @@ skip_motion_search:
if (motion_error <= this_error)
{
- // Keep a count of cases where the inter and intra were
- // very close and very low. This helps with scene cut
- // detection for example in cropped clips with black bars
- // at the sides or top and bottom.
+ /* Keep a count of cases where the inter and intra were
+ * very close and very low. This helps with scene cut
+ * detection for example in cropped clips with black bars
+ * at the sides or top and bottom.
+ */
if( (((this_error-intrapenalty) * 9) <=
(motion_error*10)) &&
(this_error < (2*intrapenalty)) )
@@ -696,17 +725,17 @@ skip_motion_search:
best_ref_mv.as_int = d->bmi.mv.as_int;
- // Was the vector non-zero
+ /* Was the vector non-zero */
if (d->bmi.mv.as_int)
{
mvcount++;
- // Was it different from the last non zero vector
+ /* Was it different from the last non zero vector */
if ( d->bmi.mv.as_int != lastmv_as_int )
new_mv_count++;
lastmv_as_int = d->bmi.mv.as_int;
- // Does the Row vector point inwards or outwards
+ /* Does the Row vector point inwards or outwards */
if (mb_row < cm->mb_rows / 2)
{
if (d->bmi.mv.as_mv.row > 0)
@@ -722,7 +751,7 @@ skip_motion_search:
sum_in_vectors--;
}
- // Does the Row vector point inwards or outwards
+ /* Does the Row vector point inwards or outwards */
if (mb_col < cm->mb_cols / 2)
{
if (d->bmi.mv.as_mv.col > 0)
@@ -743,7 +772,7 @@ skip_motion_search:
coded_error += (int64_t)this_error;
- // adjust to the next column of macroblocks
+ /* adjust to the next column of macroblocks */
x->src.y_buffer += 16;
x->src.u_buffer += 8;
x->src.v_buffer += 8;
@@ -752,25 +781,25 @@ skip_motion_search:
recon_uvoffset += 8;
}
- // adjust to the next row of mbs
+ /* adjust to the next row of mbs */
x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
- //extend the recon for intra prediction
+ /* extend the recon for intra prediction */
vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
- vp8_clear_system_state(); //__asm emms;
+ vp8_clear_system_state();
}
- vp8_clear_system_state(); //__asm emms;
+ vp8_clear_system_state();
{
double weight = 0.0;
FIRSTPASS_STATS fps;
fps.frame = cm->current_video_frame ;
- fps.intra_error = intra_error >> 8;
- fps.coded_error = coded_error >> 8;
+ fps.intra_error = (double)(intra_error >> 8);
+ fps.coded_error = (double)(coded_error >> 8);
weight = simple_weight(cpi->Source);
@@ -809,12 +838,13 @@ skip_motion_search:
fps.pcnt_motion = 1.0 * (double)mvcount / cpi->common.MBs;
}
- // TODO: handle the case when duration is set to 0, or something less
- // than the full time between subsequent cpi->source_time_stamp s .
- fps.duration = cpi->source->ts_end
- - cpi->source->ts_start;
+ /* TODO: handle the case when duration is set to 0, or something less
+ * than the full time between subsequent cpi->source_time_stamps
+ */
+ fps.duration = (double)(cpi->source->ts_end
+ - cpi->source->ts_start);
- // don't want to do output stats with a stack variable!
+ /* don't want to do output stats with a stack variable! */
memcpy(&cpi->twopass.this_frame_stats,
&fps,
sizeof(FIRSTPASS_STATS));
@@ -822,7 +852,9 @@ skip_motion_search:
accumulate_stats(&cpi->twopass.total_stats, &fps);
}
- // Copy the previous Last Frame into the GF buffer if specific conditions for doing so are met
+ /* Copy the previous Last Frame into the GF buffer if specific
+ * conditions for doing so are met
+ */
if ((cm->current_video_frame > 0) &&
(cpi->twopass.this_frame_stats.pcnt_inter > 0.20) &&
((cpi->twopass.this_frame_stats.intra_error / cpi->twopass.this_frame_stats.coded_error) > 2.0))
@@ -830,18 +862,22 @@ skip_motion_search:
vp8_yv12_copy_frame(lst_yv12, gld_yv12);
}
- // swap frame pointers so last frame refers to the frame we just compressed
+ /* swap frame pointers so last frame refers to the frame we just
+ * compressed
+ */
vp8_swap_yv12_buffer(lst_yv12, new_yv12);
vp8_yv12_extend_frame_borders(lst_yv12);
- // Special case for the first frame. Copy into the GF buffer as a second reference.
+ /* Special case for the first frame. Copy into the GF buffer as a
+ * second reference.
+ */
if (cm->current_video_frame == 0)
{
vp8_yv12_copy_frame(lst_yv12, gld_yv12);
}
- // use this to see what the first pass reconstruction looks like
+ /* use this to see what the first pass reconstruction looks like */
if (0)
{
char filename[512];
@@ -853,7 +889,8 @@ skip_motion_search:
else
recon_file = fopen(filename, "ab");
- if(fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file));
+ (void) fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1,
+ recon_file);
fclose(recon_file);
}
@@ -862,11 +899,10 @@ skip_motion_search:
}
extern const int vp8_bits_per_mb[2][QINDEX_RANGE];
-// Estimate a cost per mb attributable to overheads such as the coding of
-// modes and motion vectors.
-// Currently simplistic in its assumptions for testing.
-//
-
+/* Estimate a cost per mb attributable to overheads such as the coding of
+ * modes and motion vectors.
+ * Currently simplistic in its assumptions for testing.
+ */
static double bitcost( double prob )
{
@@ -890,12 +926,14 @@ static int64_t estimate_modemvcost(VP8_COMP *cpi,
motion_cost = bitcost(av_pct_motion);
intra_cost = bitcost(av_intra);
- // Estimate of extra bits per mv overhead for mbs
- // << 9 is the normalization to the (bits * 512) used in vp8_bits_per_mb
+ /* Estimate of extra bits per mv overhead for mbs
+ * << 9 is the normalization to the (bits * 512) used in vp8_bits_per_mb
+ */
mv_cost = ((int)(fpstats->new_mv_count / fpstats->count) * 8) << 9;
- // Crude estimate of overhead cost from modes
- // << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb
+ /* Crude estimate of overhead cost from modes
+ * << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb
+ */
mode_cost =
(int)( ( ((av_pct_inter - av_pct_motion) * zz_cost) +
(av_pct_motion * motion_cost) +
@@ -914,17 +952,17 @@ static double calc_correction_factor( double err_per_mb,
double error_term = err_per_mb / err_devisor;
double correction_factor;
- // Adjustment based on Q to power term.
+ /* Adjustment based on Q to power term. */
power_term = pt_low + (Q * 0.01);
power_term = (power_term > pt_high) ? pt_high : power_term;
- // Adjustments to error term
- // TBD
+ /* Adjustments to error term */
+ /* TBD */
- // Calculate correction factor
+ /* Calculate correction factor */
correction_factor = pow(error_term, power_term);
- // Clip range
+ /* Clip range */
correction_factor =
(correction_factor < 0.05)
? 0.05 : (correction_factor > 5.0) ? 5.0 : correction_factor;
@@ -948,15 +986,16 @@ static int estimate_max_q(VP8_COMP *cpi,
int overhead_bits_per_mb;
if (section_target_bandwitdh <= 0)
- return cpi->twopass.maxq_max_limit; // Highest value allowed
+ return cpi->twopass.maxq_max_limit; /* Highest value allowed */
target_norm_bits_per_mb =
(section_target_bandwitdh < (1 << 20))
? (512 * section_target_bandwitdh) / num_mbs
: 512 * (section_target_bandwitdh / num_mbs);
- // Calculate a corrective factor based on a rolling ratio of bits spent
- // vs target bits
+ /* Calculate a corrective factor based on a rolling ratio of bits spent
+ * vs target bits
+ */
if ((cpi->rolling_target_bits > 0) &&
(cpi->active_worst_quality < cpi->worst_quality))
{
@@ -977,8 +1016,9 @@ static int estimate_max_q(VP8_COMP *cpi,
? 10.0 : cpi->twopass.est_max_qcorrection_factor;
}
- // Corrections for higher compression speed settings
- // (reduced compression expected)
+ /* Corrections for higher compression speed settings
+ * (reduced compression expected)
+ */
if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1))
{
if (cpi->oxcf.cpu_used <= 5)
@@ -987,18 +1027,20 @@ static int estimate_max_q(VP8_COMP *cpi,
speed_correction = 1.25;
}
- // Estimate of overhead bits per mb
- // Correction to overhead bits for min allowed Q.
+ /* Estimate of overhead bits per mb */
+ /* Correction to overhead bits for min allowed Q. */
overhead_bits_per_mb = overhead_bits / num_mbs;
- overhead_bits_per_mb *= pow( 0.98, (double)cpi->twopass.maxq_min_limit );
+ overhead_bits_per_mb = (int)(overhead_bits_per_mb *
+ pow( 0.98, (double)cpi->twopass.maxq_min_limit ));
- // Try and pick a max Q that will be high enough to encode the
- // content at the given rate.
+ /* Try and pick a max Q that will be high enough to encode the
+ * content at the given rate.
+ */
for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; Q++)
{
int bits_per_mb_at_this_q;
- // Error per MB based correction factor
+ /* Error per MB based correction factor */
err_correction_factor =
calc_correction_factor(err_per_mb, 150.0, 0.40, 0.90, Q);
@@ -1010,27 +1052,29 @@ static int estimate_max_q(VP8_COMP *cpi,
* cpi->twopass.section_max_qfactor
* (double)bits_per_mb_at_this_q);
- // Mode and motion overhead
- // As Q rises in real encode loop rd code will force overhead down
- // We make a crude adjustment for this here as *.98 per Q step.
+ /* Mode and motion overhead */
+ /* As Q rises in real encode loop rd code will force overhead down
+ * We make a crude adjustment for this here as *.98 per Q step.
+ */
overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98);
if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
break;
}
- // Restriction on active max q for constrained quality mode.
+ /* Restriction on active max q for constrained quality mode. */
if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
(Q < cpi->cq_target_quality) )
{
Q = cpi->cq_target_quality;
}
- // Adjust maxq_min_limit and maxq_max_limit limits based on
- // averaga q observed in clip for non kf/gf.arf frames
- // Give average a chance to settle though.
+ /* Adjust maxq_min_limit and maxq_max_limit limits based on
+ * average q observed in clip for non kf/gf.arf frames
+ * Give average a chance to settle though.
+ */
if ( (cpi->ni_frames >
- ((unsigned int)cpi->twopass.total_stats.count >> 8)) &&
+ ((int)cpi->twopass.total_stats.count >> 8)) &&
(cpi->ni_frames > 150) )
{
cpi->twopass.maxq_max_limit = ((cpi->ni_av_qi + 32) < cpi->worst_quality)
@@ -1042,8 +1086,9 @@ static int estimate_max_q(VP8_COMP *cpi,
return Q;
}
-// For cq mode estimate a cq level that matches the observed
-// complexity and data rate.
+/* For cq mode estimate a cq level that matches the observed
+ * complexity and data rate.
+ */
static int estimate_cq( VP8_COMP *cpi,
FIRSTPASS_STATS * fpstats,
int section_target_bandwitdh,
@@ -1072,11 +1117,12 @@ static int estimate_cq( VP8_COMP *cpi,
? (512 * section_target_bandwitdh) / num_mbs
: 512 * (section_target_bandwitdh / num_mbs);
- // Estimate of overhead bits per mb
+ /* Estimate of overhead bits per mb */
overhead_bits_per_mb = overhead_bits / num_mbs;
- // Corrections for higher compression speed settings
- // (reduced compression expected)
+ /* Corrections for higher compression speed settings
+ * (reduced compression expected)
+ */
if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1))
{
if (cpi->oxcf.cpu_used <= 5)
@@ -1085,19 +1131,19 @@ static int estimate_cq( VP8_COMP *cpi,
speed_correction = 1.25;
}
- // II ratio correction factor for clip as a whole
+ /* II ratio correction factor for clip as a whole */
clip_iiratio = cpi->twopass.total_stats.intra_error /
DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.coded_error);
clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025);
if (clip_iifactor < 0.80)
clip_iifactor = 0.80;
- // Try and pick a Q that can encode the content at the given rate.
+ /* Try and pick a Q that can encode the content at the given rate. */
for (Q = 0; Q < MAXQ; Q++)
{
int bits_per_mb_at_this_q;
- // Error per MB based correction factor
+ /* Error per MB based correction factor */
err_correction_factor =
calc_correction_factor(err_per_mb, 100.0, 0.40, 0.90, Q);
@@ -1110,16 +1156,17 @@ static int estimate_cq( VP8_COMP *cpi,
clip_iifactor *
(double)bits_per_mb_at_this_q);
- // Mode and motion overhead
- // As Q rises in real encode loop rd code will force overhead down
- // We make a crude adjustment for this here as *.98 per Q step.
+ /* Mode and motion overhead */
+ /* As Q rises in real encode loop rd code will force overhead down
+ * We make a crude adjustment for this here as *.98 per Q step.
+ */
overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98);
if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
break;
}
- // Clip value to range "best allowed to (worst allowed - 1)"
+ /* Clip value to range "best allowed to (worst allowed - 1)" */
Q = cq_level[Q];
if ( Q >= cpi->worst_quality )
Q = cpi->worst_quality - 1;
@@ -1141,7 +1188,9 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band
target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs);
- // Corrections for higher compression speed settings (reduced compression expected)
+ /* Corrections for higher compression speed settings
+ * (reduced compression expected)
+ */
if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1))
{
if (cpi->oxcf.cpu_used <= 5)
@@ -1150,12 +1199,12 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band
speed_correction = 1.25;
}
- // Try and pick a Q that can encode the content at the given rate.
+ /* Try and pick a Q that can encode the content at the given rate. */
for (Q = 0; Q < MAXQ; Q++)
{
int bits_per_mb_at_this_q;
- // Error per MB based correction factor
+ /* Error per MB based correction factor */
err_correction_factor =
calc_correction_factor(err_per_mb, 150.0, 0.40, 0.90, Q);
@@ -1172,7 +1221,7 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band
return Q;
}
-// Estimate a worst case Q for a KF group
+/* Estimate a worst case Q for a KF group */
static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, double group_iiratio)
{
int Q;
@@ -1192,12 +1241,14 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
double combined_correction_factor;
- // Trap special case where the target is <= 0
+ /* Trap special case where the target is <= 0 */
if (target_norm_bits_per_mb <= 0)
return MAXQ * 2;
- // Calculate a corrective factor based on a rolling ratio of bits spent vs target bits
- // This is clamped to the range 0.1 to 10.0
+ /* Calculate a corrective factor based on a rolling ratio of bits spent
+ * vs target bits
+ * This is clamped to the range 0.1 to 10.0
+ */
if (cpi->long_rolling_target_bits <= 0)
current_spend_ratio = 10.0;
else
@@ -1206,14 +1257,19 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
current_spend_ratio = (current_spend_ratio > 10.0) ? 10.0 : (current_spend_ratio < 0.1) ? 0.1 : current_spend_ratio;
}
- // Calculate a correction factor based on the quality of prediction in the sequence as indicated by intra_inter error score ratio (IIRatio)
- // The idea here is to favour subsampling in the hardest sections vs the easyest.
+ /* Calculate a correction factor based on the quality of prediction in
+ * the sequence as indicated by intra_inter error score ratio (IIRatio)
+ * The idea here is to favour subsampling in the hardest sections vs
+ * the easyest.
+ */
iiratio_correction_factor = 1.0 - ((group_iiratio - 6.0) * 0.1);
if (iiratio_correction_factor < 0.5)
iiratio_correction_factor = 0.5;
- // Corrections for higher compression speed settings (reduced compression expected)
+ /* Corrections for higher compression speed settings
+ * (reduced compression expected)
+ */
if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1))
{
if (cpi->oxcf.cpu_used <= 5)
@@ -1222,13 +1278,15 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
speed_correction = 1.25;
}
- // Combine the various factors calculated above
+ /* Combine the various factors calculated above */
combined_correction_factor = speed_correction * iiratio_correction_factor * current_spend_ratio;
- // Try and pick a Q that should be high enough to encode the content at the given rate.
+ /* Try and pick a Q that should be high enough to encode the content at
+ * the given rate.
+ */
for (Q = 0; Q < MAXQ; Q++)
{
- // Error per MB based correction factor
+ /* Error per MB based correction factor */
err_correction_factor =
calc_correction_factor(err_per_mb, 150.0, pow_lowq, pow_highq, Q);
@@ -1241,7 +1299,9 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
break;
}
- // If we could not hit the target even at Max Q then estimate what Q would have bee required
+ /* If we could not hit the target even at Max Q then estimate what Q
+ * would have been required
+ */
while ((bits_per_mb_at_this_q > target_norm_bits_per_mb) && (Q < (MAXQ * 2)))
{
@@ -1280,30 +1340,34 @@ void vp8_init_second_pass(VP8_COMP *cpi)
cpi->twopass.total_stats = *cpi->twopass.stats_in_end;
cpi->twopass.total_left_stats = cpi->twopass.total_stats;
- // each frame can have a different duration, as the frame rate in the source
- // isn't guaranteed to be constant. The frame rate prior to the first frame
- // encoded in the second pass is a guess. However the sum duration is not.
- // Its calculated based on the actual durations of all frames from the first
- // pass.
+ /* each frame can have a different duration, as the frame rate in the
+ * source isn't guaranteed to be constant. The frame rate prior to
+ * the first frame encoded in the second pass is a guess. However the
+ * sum duration is not. Its calculated based on the actual durations of
+ * all frames from the first pass.
+ */
vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration);
cpi->output_frame_rate = cpi->frame_rate;
cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0);
- // Calculate a minimum intra value to be used in determining the IIratio
- // scores used in the second pass. We have this minimum to make sure
- // that clips that are static but "low complexity" in the intra domain
- // are still boosted appropriately for KF/GF/ARF
+ /* Calculate a minimum intra value to be used in determining the IIratio
+ * scores used in the second pass. We have this minimum to make sure
+ * that clips that are static but "low complexity" in the intra domain
+ * are still boosted appropriately for KF/GF/ARF
+ */
cpi->twopass.kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
cpi->twopass.gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
- // Scan the first pass file and calculate an average Intra / Inter error score ratio for the sequence
+ /* Scan the first pass file and calculate an average Intra / Inter error
+ * score ratio for the sequence
+ */
{
double sum_iiratio = 0.0;
double IIRatio;
- start_pos = cpi->twopass.stats_in; // Note starting "file" position
+ start_pos = cpi->twopass.stats_in; /* Note starting "file" position */
while (input_stats(cpi, &this_frame) != EOF)
{
@@ -1314,14 +1378,15 @@ void vp8_init_second_pass(VP8_COMP *cpi)
cpi->twopass.avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats.count);
- // Reset file position
+ /* Reset file position */
reset_fpf_position(cpi, start_pos);
}
- // Scan the first pass file and calculate a modified total error based upon the bias/power function
- // used to allocate bits
+ /* Scan the first pass file and calculate a modified total error based
+ * upon the bias/power function used to allocate bits
+ */
{
- start_pos = cpi->twopass.stats_in; // Note starting "file" position
+ start_pos = cpi->twopass.stats_in; /* Note starting "file" position */
cpi->twopass.modified_error_total = 0.0;
cpi->twopass.modified_error_used = 0.0;
@@ -1332,7 +1397,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
}
cpi->twopass.modified_error_left = cpi->twopass.modified_error_total;
- reset_fpf_position(cpi, start_pos); // Reset file position
+ reset_fpf_position(cpi, start_pos); /* Reset file position */
}
}
@@ -1341,23 +1406,24 @@ void vp8_end_second_pass(VP8_COMP *cpi)
{
}
-// This function gives and estimate of how badly we believe
-// the prediction quality is decaying from frame to frame.
+/* This function gives and estimate of how badly we believe the prediction
+ * quality is decaying from frame to frame.
+ */
static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
{
double prediction_decay_rate;
double motion_decay;
double motion_pct = next_frame->pcnt_motion;
- // Initial basis is the % mbs inter coded
+ /* Initial basis is the % mbs inter coded */
prediction_decay_rate = next_frame->pcnt_inter;
- // High % motion -> somewhat higher decay rate
+ /* High % motion -> somewhat higher decay rate */
motion_decay = (1.0 - (motion_pct / 20.0));
if (motion_decay < prediction_decay_rate)
prediction_decay_rate = motion_decay;
- // Adjustment to decay rate based on speed of motion
+ /* Adjustment to decay rate based on speed of motion */
{
double this_mv_rabs;
double this_mv_cabs;
@@ -1377,9 +1443,10 @@ static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_fra
return prediction_decay_rate;
}
-// Function to test for a condition where a complex transition is followed
-// by a static section. For example in slide shows where there is a fade
-// between slides. This is to help with more optimal kf and gf positioning.
+/* Function to test for a condition where a complex transition is followed
+ * by a static section. For example in slide shows where there is a fade
+ * between slides. This is to help with more optimal kf and gf positioning.
+ */
static int detect_transition_to_still(
VP8_COMP *cpi,
int frame_interval,
@@ -1389,9 +1456,10 @@ static int detect_transition_to_still(
{
int trans_to_still = 0;
- // Break clause to detect very still sections after motion
- // For example a static image after a fade or other transition
- // instead of a clean scene cut.
+ /* Break clause to detect very still sections after motion
+ * For example a static image after a fade or other transition
+ * instead of a clean scene cut.
+ */
if ( (frame_interval > MIN_GF_INTERVAL) &&
(loop_decay_rate >= 0.999) &&
(decay_accumulator < 0.9) )
@@ -1401,8 +1469,7 @@ static int detect_transition_to_still(
FIRSTPASS_STATS tmp_next_frame;
double decay_rate;
- // Look ahead a few frames to see if static condition
- // persists...
+ /* Look ahead a few frames to see if static condition persists... */
for ( j = 0; j < still_interval; j++ )
{
if (EOF == input_stats(cpi, &tmp_next_frame))
@@ -1412,10 +1479,10 @@ static int detect_transition_to_still(
if ( decay_rate < 0.999 )
break;
}
- // Reset file position
+ /* Reset file position */
reset_fpf_position(cpi, position);
- // Only if it does do we signal a transition to still
+ /* Only if it does do we signal a transition to still */
if ( j == still_interval )
trans_to_still = 1;
}
@@ -1423,24 +1490,26 @@ static int detect_transition_to_still(
return trans_to_still;
}
-// This function detects a flash through the high relative pcnt_second_ref
-// score in the frame following a flash frame. The offset passed in should
-// reflect this
+/* This function detects a flash through the high relative pcnt_second_ref
+ * score in the frame following a flash frame. The offset passed in should
+ * reflect this
+ */
static int detect_flash( VP8_COMP *cpi, int offset )
{
FIRSTPASS_STATS next_frame;
int flash_detected = 0;
- // Read the frame data.
- // The return is 0 (no flash detected) if not a valid frame
+ /* Read the frame data. */
+ /* The return is 0 (no flash detected) if not a valid frame */
if ( read_frame_stats(cpi, &next_frame, offset) != EOF )
{
- // What we are looking for here is a situation where there is a
- // brief break in prediction (such as a flash) but subsequent frames
- // are reasonably well predicted by an earlier (pre flash) frame.
- // The recovery after a flash is indicated by a high pcnt_second_ref
- // comapred to pcnt_inter.
+ /* What we are looking for here is a situation where there is a
+ * brief break in prediction (such as a flash) but subsequent frames
+ * are reasonably well predicted by an earlier (pre flash) frame.
+ * The recovery after a flash is indicated by a high pcnt_second_ref
+ * comapred to pcnt_inter.
+ */
if ( (next_frame.pcnt_second_ref > next_frame.pcnt_inter) &&
(next_frame.pcnt_second_ref >= 0.5 ) )
{
@@ -1461,7 +1530,7 @@ static int detect_flash( VP8_COMP *cpi, int offset )
return flash_detected;
}
-// Update the motion related elements to the GF arf boost calculation
+/* Update the motion related elements to the GF arf boost calculation */
static void accumulate_frame_motion_stats(
VP8_COMP *cpi,
FIRSTPASS_STATS * this_frame,
@@ -1470,22 +1539,22 @@ static void accumulate_frame_motion_stats(
double * abs_mv_in_out_accumulator,
double * mv_ratio_accumulator )
{
- //double this_frame_mv_in_out;
double this_frame_mvr_ratio;
double this_frame_mvc_ratio;
double motion_pct;
- // Accumulate motion stats.
+ /* Accumulate motion stats. */
motion_pct = this_frame->pcnt_motion;
- // Accumulate Motion In/Out of frame stats
+ /* Accumulate Motion In/Out of frame stats */
*this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct;
*mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct;
*abs_mv_in_out_accumulator +=
fabs(this_frame->mv_in_out_count * motion_pct);
- // Accumulate a measure of how uniform (or conversely how random)
- // the motion field is. (A ratio of absmv / mv)
+ /* Accumulate a measure of how uniform (or conversely how random)
+ * the motion field is. (A ratio of absmv / mv)
+ */
if (motion_pct > 0.05)
{
this_frame_mvr_ratio = fabs(this_frame->mvr_abs) /
@@ -1507,7 +1576,7 @@ static void accumulate_frame_motion_stats(
}
}
-// Calculate a baseline boost number for the current frame.
+/* Calculate a baseline boost number for the current frame. */
static double calc_frame_boost(
VP8_COMP *cpi,
FIRSTPASS_STATS * this_frame,
@@ -1515,7 +1584,7 @@ static double calc_frame_boost(
{
double frame_boost;
- // Underlying boost factor is based on inter intra error ratio
+ /* Underlying boost factor is based on inter intra error ratio */
if (this_frame->intra_error > cpi->twopass.gf_intra_err_min)
frame_boost = (IIFACTOR * this_frame->intra_error /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
@@ -1523,17 +1592,18 @@ static double calc_frame_boost(
frame_boost = (IIFACTOR * cpi->twopass.gf_intra_err_min /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
- // Increase boost for frames where new data coming into frame
- // (eg zoom out). Slightly reduce boost if there is a net balance
- // of motion out of the frame (zoom in).
- // The range for this_frame_mv_in_out is -1.0 to +1.0
+ /* Increase boost for frames where new data coming into frame
+ * (eg zoom out). Slightly reduce boost if there is a net balance
+ * of motion out of the frame (zoom in).
+ * The range for this_frame_mv_in_out is -1.0 to +1.0
+ */
if (this_frame_mv_in_out > 0.0)
frame_boost += frame_boost * (this_frame_mv_in_out * 2.0);
- // In extreme case boost is halved
+ /* In extreme case boost is halved */
else
frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
- // Clip to maximum
+ /* Clip to maximum */
if (frame_boost > GF_RMAX)
frame_boost = GF_RMAX;
@@ -1561,26 +1631,27 @@ static int calc_arf_boost(
double r;
int flash_detected = 0;
- // Search forward from the proposed arf/next gf position
+ /* Search forward from the proposed arf/next gf position */
for ( i = 0; i < f_frames; i++ )
{
if ( read_frame_stats(cpi, &this_frame, (i+offset)) == EOF )
break;
- // Update the motion related elements to the boost calculation
+ /* Update the motion related elements to the boost calculation */
accumulate_frame_motion_stats( cpi, &this_frame,
&this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator, &mv_ratio_accumulator );
- // Calculate the baseline boost number for this frame
+ /* Calculate the baseline boost number for this frame */
r = calc_frame_boost( cpi, &this_frame, this_frame_mv_in_out );
- // We want to discount the the flash frame itself and the recovery
- // frame that follows as both will have poor scores.
+ /* We want to discount the the flash frame itself and the recovery
+ * frame that follows as both will have poor scores.
+ */
flash_detected = detect_flash(cpi, (i+offset)) ||
detect_flash(cpi, (i+offset+1));
- // Cumulative effect of prediction quality decay
+ /* Cumulative effect of prediction quality decay */
if ( !flash_detected )
{
decay_accumulator =
@@ -1591,7 +1662,7 @@ static int calc_arf_boost(
}
boost_score += (decay_accumulator * r);
- // Break out conditions.
+ /* Break out conditions. */
if ( (!flash_detected) &&
((mv_ratio_accumulator > 100.0) ||
(abs_mv_in_out_accumulator > 3.0) ||
@@ -1603,7 +1674,7 @@ static int calc_arf_boost(
*f_boost = (int)(boost_score * 100.0) >> 4;
- // Reset for backward looking loop
+ /* Reset for backward looking loop */
boost_score = 0.0;
mv_ratio_accumulator = 0.0;
decay_accumulator = 1.0;
@@ -1611,26 +1682,27 @@ static int calc_arf_boost(
mv_in_out_accumulator = 0.0;
abs_mv_in_out_accumulator = 0.0;
- // Search forward from the proposed arf/next gf position
+ /* Search forward from the proposed arf/next gf position */
for ( i = -1; i >= -b_frames; i-- )
{
if ( read_frame_stats(cpi, &this_frame, (i+offset)) == EOF )
break;
- // Update the motion related elements to the boost calculation
+ /* Update the motion related elements to the boost calculation */
accumulate_frame_motion_stats( cpi, &this_frame,
&this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator, &mv_ratio_accumulator );
- // Calculate the baseline boost number for this frame
+ /* Calculate the baseline boost number for this frame */
r = calc_frame_boost( cpi, &this_frame, this_frame_mv_in_out );
- // We want to discount the the flash frame itself and the recovery
- // frame that follows as both will have poor scores.
+ /* We want to discount the the flash frame itself and the recovery
+ * frame that follows as both will have poor scores.
+ */
flash_detected = detect_flash(cpi, (i+offset)) ||
detect_flash(cpi, (i+offset+1));
- // Cumulative effect of prediction quality decay
+ /* Cumulative effect of prediction quality decay */
if ( !flash_detected )
{
decay_accumulator =
@@ -1642,7 +1714,7 @@ static int calc_arf_boost(
boost_score += (decay_accumulator * r);
- // Break out conditions.
+ /* Break out conditions. */
if ( (!flash_detected) &&
((mv_ratio_accumulator > 100.0) ||
(abs_mv_in_out_accumulator > 3.0) ||
@@ -1657,7 +1729,7 @@ static int calc_arf_boost(
}
#endif
-// Analyse and define a gf/arf group .
+/* Analyse and define a gf/arf group . */
static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
{
FIRSTPASS_STATS next_frame;
@@ -1673,14 +1745,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
double mv_ratio_accumulator = 0.0;
double decay_accumulator = 1.0;
- double loop_decay_rate = 1.00; // Starting decay rate
+ double loop_decay_rate = 1.00; /* Starting decay rate */
double this_frame_mv_in_out = 0.0;
double mv_in_out_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
double mod_err_per_mb_accumulator = 0.0;
- int max_bits = frame_max_bits(cpi); // Max for a single frame
+ int max_bits = frame_max_bits(cpi); /* Max for a single frame */
unsigned int allow_alt_ref =
cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames;
@@ -1693,37 +1765,40 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->twopass.gf_group_bits = 0;
cpi->twopass.gf_decay_rate = 0;
- vp8_clear_system_state(); //__asm emms;
+ vp8_clear_system_state();
start_pos = cpi->twopass.stats_in;
- vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
+ vpx_memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */
- // Load stats for the current frame.
+ /* Load stats for the current frame. */
mod_frame_err = calculate_modified_err(cpi, this_frame);
- // Note the error of the frame at the start of the group (this will be
- // the GF frame error if we code a normal gf
+ /* Note the error of the frame at the start of the group (this will be
+ * the GF frame error if we code a normal gf
+ */
gf_first_frame_err = mod_frame_err;
- // Special treatment if the current frame is a key frame (which is also
- // a gf). If it is then its error score (and hence bit allocation) need
- // to be subtracted out from the calculation for the GF group
+ /* Special treatment if the current frame is a key frame (which is also
+ * a gf). If it is then its error score (and hence bit allocation) need
+ * to be subtracted out from the calculation for the GF group
+ */
if (cpi->common.frame_type == KEY_FRAME)
gf_group_err -= gf_first_frame_err;
- // Scan forward to try and work out how many frames the next gf group
- // should contain and what level of boost is appropriate for the GF
- // or ARF that will be coded with the group
+ /* Scan forward to try and work out how many frames the next gf group
+ * should contain and what level of boost is appropriate for the GF
+ * or ARF that will be coded with the group
+ */
i = 0;
while (((i < cpi->twopass.static_scene_max_gf_interval) ||
((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL)) &&
(i < cpi->twopass.frames_to_key))
{
- i++; // Increment the loop counter
+ i++;
- // Accumulate error score of frames in this gf group
+ /* Accumulate error score of frames in this gf group */
mod_frame_err = calculate_modified_err(cpi, this_frame);
gf_group_err += mod_frame_err;
@@ -1734,19 +1809,20 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
if (EOF == input_stats(cpi, &next_frame))
break;
- // Test for the case where there is a brief flash but the prediction
- // quality back to an earlier frame is then restored.
+ /* Test for the case where there is a brief flash but the prediction
+ * quality back to an earlier frame is then restored.
+ */
flash_detected = detect_flash(cpi, 0);
- // Update the motion related elements to the boost calculation
+ /* Update the motion related elements to the boost calculation */
accumulate_frame_motion_stats( cpi, &next_frame,
&this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator, &mv_ratio_accumulator );
- // Calculate a baseline boost number for this frame
+ /* Calculate a baseline boost number for this frame */
r = calc_frame_boost( cpi, &next_frame, this_frame_mv_in_out );
- // Cumulative effect of prediction quality decay
+ /* Cumulative effect of prediction quality decay */
if ( !flash_detected )
{
loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
@@ -1756,8 +1832,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
boost_score += (decay_accumulator * r);
- // Break clause to detect very still sections after motion
- // For example a staic image after a fade or other transition.
+ /* Break clause to detect very still sections after motion
+ * For example a staic image after a fade or other transition.
+ */
if ( detect_transition_to_still( cpi, i, 5,
loop_decay_rate,
decay_accumulator ) )
@@ -1767,14 +1844,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
break;
}
- // Break out conditions.
+ /* Break out conditions. */
if (
- // Break at cpi->max_gf_interval unless almost totally static
+ /* Break at cpi->max_gf_interval unless almost totally static */
(i >= cpi->max_gf_interval && (decay_accumulator < 0.995)) ||
(
- // Dont break out with a very short interval
+ /* Dont break out with a very short interval */
(i > MIN_GF_INTERVAL) &&
- // Dont break out very close to a key frame
+ /* Dont break out very close to a key frame */
((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) &&
((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) &&
(!flash_detected) &&
@@ -1796,15 +1873,15 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->twopass.gf_decay_rate =
(i > 0) ? (int)(100.0 * (1.0 - decay_accumulator)) / i : 0;
- // When using CBR apply additional buffer related upper limits
+ /* When using CBR apply additional buffer related upper limits */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
double max_boost;
- // For cbr apply buffer related limits
+ /* For cbr apply buffer related limits */
if (cpi->drop_frames_allowed)
{
- int df_buffer_level = cpi->oxcf.drop_frames_water_mark *
+ int64_t df_buffer_level = cpi->oxcf.drop_frames_water_mark *
(cpi->oxcf.optimal_buffer_level / 100);
if (cpi->buffer_level > df_buffer_level)
@@ -1825,7 +1902,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
boost_score = max_boost;
}
- // Dont allow conventional gf too near the next kf
+ /* Dont allow conventional gf too near the next kf */
if ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL)
{
while (i < cpi->twopass.frames_to_key)
@@ -1846,14 +1923,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->gfu_boost = (int)(boost_score * 100.0) >> 4;
#if NEW_BOOST
- // Alterrnative boost calculation for alt ref
+ /* Alterrnative boost calculation for alt ref */
alt_boost = calc_arf_boost( cpi, 0, (i-1), (i-1), &f_boost, &b_boost );
#endif
- // Should we use the alternate refernce frame
+ /* Should we use the alternate refernce frame */
if (allow_alt_ref &&
(i >= MIN_GF_INTERVAL) &&
- // dont use ARF very near next kf
+ /* dont use ARF very near next kf */
(i <= (cpi->twopass.frames_to_key - MIN_GF_INTERVAL)) &&
#if NEW_BOOST
((next_frame.pcnt_inter > 0.75) ||
@@ -1883,7 +1960,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->gfu_boost = alt_boost;
#endif
- // Estimate the bits to be allocated to the group as a whole
+ /* Estimate the bits to be allocated to the group as a whole */
if ((cpi->twopass.kf_group_bits > 0) &&
(cpi->twopass.kf_group_error_left > 0))
{
@@ -1893,7 +1970,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
else
group_bits = 0;
- // Boost for arf frame
+ /* Boost for arf frame */
#if NEW_BOOST
Boost = (alt_boost * GFQ_ADJUSTMENT) / 100;
#else
@@ -1901,7 +1978,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
#endif
Boost += (i * 50);
- // Set max and minimum boost and hence minimum allocation
+ /* Set max and minimum boost and hence minimum allocation */
if (Boost > ((cpi->baseline_gf_interval + 1) * 200))
Boost = ((cpi->baseline_gf_interval + 1) * 200);
else if (Boost < 125)
@@ -1909,24 +1986,27 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
allocation_chunks = (i * 100) + Boost;
- // Normalize Altboost and allocations chunck down to prevent overflow
+ /* Normalize Altboost and allocations chunck down to prevent overflow */
while (Boost > 1000)
{
Boost /= 2;
allocation_chunks /= 2;
}
- // Calculate the number of bits to be spent on the arf based on the
- // boost number
+ /* Calculate the number of bits to be spent on the arf based on the
+ * boost number
+ */
arf_frame_bits = (int)((double)Boost * (group_bits /
(double)allocation_chunks));
- // Estimate if there are enough bits available to make worthwhile use
- // of an arf.
+ /* Estimate if there are enough bits available to make worthwhile use
+ * of an arf.
+ */
tmp_q = estimate_q(cpi, mod_frame_err, (int)arf_frame_bits);
- // Only use an arf if it is likely we will be able to code
- // it at a lower Q than the surrounding frames.
+ /* Only use an arf if it is likely we will be able to code
+ * it at a lower Q than the surrounding frames.
+ */
if (tmp_q < cpi->worst_quality)
{
int half_gf_int;
@@ -1936,42 +2016,46 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->source_alt_ref_pending = 1;
- // For alt ref frames the error score for the end frame of the
- // group (the alt ref frame) should not contribute to the group
- // total and hence the number of bit allocated to the group.
- // Rather it forms part of the next group (it is the GF at the
- // start of the next group)
- // gf_group_err -= mod_frame_err;
-
- // For alt ref frames alt ref frame is technically part of the
- // GF frame for the next group but we always base the error
- // calculation and bit allocation on the current group of frames.
-
- // Set the interval till the next gf or arf.
- // For ARFs this is the number of frames to be coded before the
- // future frame that is coded as an ARF.
- // The future frame itself is part of the next group
+ /*
+ * For alt ref frames the error score for the end frame of the
+ * group (the alt ref frame) should not contribute to the group
+ * total and hence the number of bit allocated to the group.
+ * Rather it forms part of the next group (it is the GF at the
+ * start of the next group)
+ * gf_group_err -= mod_frame_err;
+ *
+ * For alt ref frames alt ref frame is technically part of the
+ * GF frame for the next group but we always base the error
+ * calculation and bit allocation on the current group of frames.
+ *
+ * Set the interval till the next gf or arf.
+ * For ARFs this is the number of frames to be coded before the
+ * future frame that is coded as an ARF.
+ * The future frame itself is part of the next group
+ */
cpi->baseline_gf_interval = i;
- // Define the arnr filter width for this group of frames:
- // We only filter frames that lie within a distance of half
- // the GF interval from the ARF frame. We also have to trap
- // cases where the filter extends beyond the end of clip.
- // Note: this_frame->frame has been updated in the loop
- // so it now points at the ARF frame.
+ /*
+ * Define the arnr filter width for this group of frames:
+ * We only filter frames that lie within a distance of half
+ * the GF interval from the ARF frame. We also have to trap
+ * cases where the filter extends beyond the end of clip.
+ * Note: this_frame->frame has been updated in the loop
+ * so it now points at the ARF frame.
+ */
half_gf_int = cpi->baseline_gf_interval >> 1;
- frames_after_arf = cpi->twopass.total_stats.count -
- this_frame->frame - 1;
+ frames_after_arf = (int)(cpi->twopass.total_stats.count -
+ this_frame->frame - 1);
switch (cpi->oxcf.arnr_type)
{
- case 1: // Backward filter
+ case 1: /* Backward filter */
frames_fwd = 0;
if (frames_bwd > half_gf_int)
frames_bwd = half_gf_int;
break;
- case 2: // Forward filter
+ case 2: /* Forward filter */
if (frames_fwd > half_gf_int)
frames_fwd = half_gf_int;
if (frames_fwd > frames_after_arf)
@@ -1979,7 +2063,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
frames_bwd = 0;
break;
- case 3: // Centered filter
+ case 3: /* Centered filter */
default:
frames_fwd >>= 1;
if (frames_fwd > frames_after_arf)
@@ -1989,8 +2073,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
frames_bwd = frames_fwd;
- // For even length filter there is one more frame backward
- // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
+ /* For even length filter there is one more frame backward
+ * than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
+ */
if (frames_bwd < half_gf_int)
frames_bwd += (cpi->oxcf.arnr_max_frames+1) & 0x1;
break;
@@ -2010,12 +2095,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->baseline_gf_interval = i;
}
- // Now decide how many bits should be allocated to the GF group as a
- // proportion of those remaining in the kf group.
- // The final key frame group in the clip is treated as a special case
- // where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left.
- // This is also important for short clips where there may only be one
- // key frame.
+ /*
+ * Now decide how many bits should be allocated to the GF group as a
+ * proportion of those remaining in the kf group.
+ * The final key frame group in the clip is treated as a special case
+ * where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left.
+ * This is also important for short clips where there may only be one
+ * key frame.
+ */
if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats.count -
cpi->common.current_video_frame))
{
@@ -2023,7 +2110,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
(cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0;
}
- // Calculate the bits to be allocated to the group as a whole
+ /* Calculate the bits to be allocated to the group as a whole */
if ((cpi->twopass.kf_group_bits > 0) &&
(cpi->twopass.kf_group_error_left > 0))
{
@@ -2034,31 +2121,32 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
else
cpi->twopass.gf_group_bits = 0;
- cpi->twopass.gf_group_bits =
+ cpi->twopass.gf_group_bits = (int)(
(cpi->twopass.gf_group_bits < 0)
? 0
: (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits)
- ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits;
+ ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits);
- // Clip cpi->twopass.gf_group_bits based on user supplied data rate
- // variability limit (cpi->oxcf.two_pass_vbrmax_section)
+ /* Clip cpi->twopass.gf_group_bits based on user supplied data rate
+ * variability limit (cpi->oxcf.two_pass_vbrmax_section)
+ */
if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval)
cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval;
- // Reset the file position
+ /* Reset the file position */
reset_fpf_position(cpi, start_pos);
- // Update the record of error used so far (only done once per gf group)
+ /* Update the record of error used so far (only done once per gf group) */
cpi->twopass.modified_error_used += gf_group_err;
- // Assign bits to the arf or gf.
+ /* Assign bits to the arf or gf. */
for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); i++) {
int Boost;
int allocation_chunks;
int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
int gf_bits;
- // For ARF frames
+ /* For ARF frames */
if (cpi->source_alt_ref_pending && i == 0)
{
#if NEW_BOOST
@@ -2068,7 +2156,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
#endif
Boost += (cpi->baseline_gf_interval * 50);
- // Set max and minimum boost and hence minimum allocation
+ /* Set max and minimum boost and hence minimum allocation */
if (Boost > ((cpi->baseline_gf_interval + 1) * 200))
Boost = ((cpi->baseline_gf_interval + 1) * 200);
else if (Boost < 125)
@@ -2077,13 +2165,13 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
allocation_chunks =
((cpi->baseline_gf_interval + 1) * 100) + Boost;
}
- // Else for standard golden frames
+ /* Else for standard golden frames */
else
{
- // boost based on inter / intra ratio of subsequent frames
+ /* boost based on inter / intra ratio of subsequent frames */
Boost = (cpi->gfu_boost * GFQ_ADJUSTMENT) / 100;
- // Set max and minimum boost and hence minimum allocation
+ /* Set max and minimum boost and hence minimum allocation */
if (Boost > (cpi->baseline_gf_interval * 150))
Boost = (cpi->baseline_gf_interval * 150);
else if (Boost < 125)
@@ -2093,22 +2181,24 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
(cpi->baseline_gf_interval * 100) + (Boost - 100);
}
- // Normalize Altboost and allocations chunck down to prevent overflow
+ /* Normalize Altboost and allocations chunck down to prevent overflow */
while (Boost > 1000)
{
Boost /= 2;
allocation_chunks /= 2;
}
- // Calculate the number of bits to be spent on the gf or arf based on
- // the boost number
+ /* Calculate the number of bits to be spent on the gf or arf based on
+ * the boost number
+ */
gf_bits = (int)((double)Boost *
(cpi->twopass.gf_group_bits /
(double)allocation_chunks));
- // If the frame that is to be boosted is simpler than the average for
- // the gf/arf group then use an alternative calculation
- // based on the error score of the frame itself
+ /* If the frame that is to be boosted is simpler than the average for
+ * the gf/arf group then use an alternative calculation
+ * based on the error score of the frame itself
+ */
if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval)
{
double alt_gf_grp_bits;
@@ -2127,9 +2217,10 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
gf_bits = alt_gf_bits;
}
}
- // Else if it is harder than other frames in the group make sure it at
- // least receives an allocation in keeping with its relative error
- // score, otherwise it may be worse off than an "un-boosted" frame
+ /* Else if it is harder than other frames in the group make sure it at
+ * least receives an allocation in keeping with its relative error
+ * score, otherwise it may be worse off than an "un-boosted" frame
+ */
else
{
int alt_gf_bits =
@@ -2143,18 +2234,19 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
}
- // Apply an additional limit for CBR
+ /* Apply an additional limit for CBR */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
- if (cpi->twopass.gf_bits > (cpi->buffer_level >> 1))
- cpi->twopass.gf_bits = cpi->buffer_level >> 1;
+ if (cpi->twopass.gf_bits > (int)(cpi->buffer_level >> 1))
+ cpi->twopass.gf_bits = (int)(cpi->buffer_level >> 1);
}
- // Dont allow a negative value for gf_bits
+ /* Dont allow a negative value for gf_bits */
if (gf_bits < 0)
gf_bits = 0;
- gf_bits += cpi->min_frame_bandwidth; // Add in minimum for a frame
+ /* Add in minimum for a frame */
+ gf_bits += cpi->min_frame_bandwidth;
if (i == 0)
{
@@ -2162,33 +2254,39 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
if (i == 1 || (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)))
{
- cpi->per_frame_bandwidth = gf_bits; // Per frame bit target for this frame
+ /* Per frame bit target for this frame */
+ cpi->per_frame_bandwidth = gf_bits;
}
}
{
- // Adjust KF group bits and error remainin
- cpi->twopass.kf_group_error_left -= gf_group_err;
+ /* Adjust KF group bits and error remainin */
+ cpi->twopass.kf_group_error_left -= (int64_t)gf_group_err;
cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits;
if (cpi->twopass.kf_group_bits < 0)
cpi->twopass.kf_group_bits = 0;
- // Note the error score left in the remaining frames of the group.
- // For normal GFs we want to remove the error score for the first frame of the group (except in Key frame case where this has already happened)
+ /* Note the error score left in the remaining frames of the group.
+ * For normal GFs we want to remove the error score for the first
+ * frame of the group (except in Key frame case where this has
+ * already happened)
+ */
if (!cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME)
- cpi->twopass.gf_group_error_left = gf_group_err - gf_first_frame_err;
+ cpi->twopass.gf_group_error_left = (int)(gf_group_err -
+ gf_first_frame_err);
else
- cpi->twopass.gf_group_error_left = gf_group_err;
+ cpi->twopass.gf_group_error_left = (int) gf_group_err;
cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth;
if (cpi->twopass.gf_group_bits < 0)
cpi->twopass.gf_group_bits = 0;
- // This condition could fail if there are two kfs very close together
- // despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the
- // calculation of cpi->twopass.alt_extra_bits.
+ /* This condition could fail if there are two kfs very close together
+ * despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the
+ * calculation of cpi->twopass.alt_extra_bits.
+ */
if ( cpi->baseline_gf_interval >= 3 )
{
#if NEW_BOOST
@@ -2217,7 +2315,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->twopass.alt_extra_bits = 0;
}
- // Adjustments based on a measure of complexity of the section
+ /* Adjustments based on a measure of complexity of the section */
if (cpi->common.frame_type != KEY_FRAME)
{
FIRSTPASS_STATS sectionstats;
@@ -2234,47 +2332,45 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
avg_stats(&sectionstats);
- cpi->twopass.section_intra_rating =
- sectionstats.intra_error /
- DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
+ cpi->twopass.section_intra_rating = (unsigned int)
+ (sectionstats.intra_error /
+ DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
- //if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) )
- //{
cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025);
if (cpi->twopass.section_max_qfactor < 0.80)
cpi->twopass.section_max_qfactor = 0.80;
- //}
- //else
- // cpi->twopass.section_max_qfactor = 1.0;
-
reset_fpf_position(cpi, start_pos);
}
}
-// Allocate bits to a normal frame that is neither a gf an arf or a key frame.
+/* Allocate bits to a normal frame that is neither a gf an arf or a key frame. */
static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
{
- int target_frame_size; // gf_group_error_left
+ int target_frame_size;
double modified_err;
- double err_fraction; // What portion of the remaining GF group error is used by this frame
+ double err_fraction;
- int max_bits = frame_max_bits(cpi); // Max for a single frame
+ int max_bits = frame_max_bits(cpi); /* Max for a single frame */
- // Calculate modified prediction error used in bit allocation
+ /* Calculate modified prediction error used in bit allocation */
modified_err = calculate_modified_err(cpi, this_frame);
+ /* What portion of the remaining GF group error is used by this frame */
if (cpi->twopass.gf_group_error_left > 0)
- err_fraction = modified_err / cpi->twopass.gf_group_error_left; // What portion of the remaining GF group error is used by this frame
+ err_fraction = modified_err / cpi->twopass.gf_group_error_left;
else
err_fraction = 0.0;
- target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); // How many of those bits available for allocation should we give it?
+ /* How many of those bits available for allocation should we give it? */
+ target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction);
- // Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at the top end.
+ /* Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits)
+ * at the top end.
+ */
if (target_frame_size < 0)
target_frame_size = 0;
else
@@ -2286,22 +2382,25 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
target_frame_size = cpi->twopass.gf_group_bits;
}
- cpi->twopass.gf_group_error_left -= modified_err; // Adjust error remaining
- cpi->twopass.gf_group_bits -= target_frame_size; // Adjust bits remaining
+ /* Adjust error and bits remaining */
+ cpi->twopass.gf_group_error_left -= (int)modified_err;
+ cpi->twopass.gf_group_bits -= target_frame_size;
if (cpi->twopass.gf_group_bits < 0)
cpi->twopass.gf_group_bits = 0;
- target_frame_size += cpi->min_frame_bandwidth; // Add in the minimum number of bits that is set aside for every frame.
+ /* Add in the minimum number of bits that is set aside for every frame. */
+ target_frame_size += cpi->min_frame_bandwidth;
- // Every other frame gets a few extra bits
+ /* Every other frame gets a few extra bits */
if ( (cpi->common.frames_since_golden & 0x01) &&
(cpi->frames_till_gf_update_due > 0) )
{
target_frame_size += cpi->twopass.alt_extra_bits;
}
- cpi->per_frame_bandwidth = target_frame_size; // Per frame bit target for this frame
+ /* Per frame bit target for this frame */
+ cpi->per_frame_bandwidth = target_frame_size;
}
void vp8_second_pass(VP8_COMP *cpi)
@@ -2330,20 +2429,25 @@ void vp8_second_pass(VP8_COMP *cpi)
this_frame_intra_error = this_frame.intra_error;
this_frame_coded_error = this_frame.coded_error;
- // keyframe and section processing !
+ /* keyframe and section processing ! */
if (cpi->twopass.frames_to_key == 0)
{
- // Define next KF group and assign bits to it
+ /* Define next KF group and assign bits to it */
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
find_next_key_frame(cpi, &this_frame_copy);
- // Special case: Error error_resilient_mode mode does not make much sense for two pass but with its current meaning but this code is designed to stop
- // outlandish behaviour if someone does set it when using two pass. It effectively disables GF groups.
- // This is temporary code till we decide what should really happen in this case.
+ /* Special case: Error error_resilient_mode mode does not make much
+ * sense for two pass but with its current meaning but this code is
+ * designed to stop outlandish behaviour if someone does set it when
+ * using two pass. It effectively disables GF groups. This is
+ * temporary code till we decide what should really happen in this
+ * case.
+ */
if (cpi->oxcf.error_resilient_mode)
{
- cpi->twopass.gf_group_bits = cpi->twopass.kf_group_bits;
- cpi->twopass.gf_group_error_left = cpi->twopass.kf_group_error_left;
+ cpi->twopass.gf_group_bits = (int)cpi->twopass.kf_group_bits;
+ cpi->twopass.gf_group_error_left =
+ (int)cpi->twopass.kf_group_error_left;
cpi->baseline_gf_interval = cpi->twopass.frames_to_key;
cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
cpi->source_alt_ref_pending = 0;
@@ -2351,19 +2455,25 @@ void vp8_second_pass(VP8_COMP *cpi)
}
- // Is this a GF / ARF (Note that a KF is always also a GF)
+ /* Is this a GF / ARF (Note that a KF is always also a GF) */
if (cpi->frames_till_gf_update_due == 0)
{
- // Define next gf group and assign bits to it
+ /* Define next gf group and assign bits to it */
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
define_gf_group(cpi, &this_frame_copy);
- // If we are going to code an altref frame at the end of the group and the current frame is not a key frame....
- // If the previous group used an arf this frame has already benefited from that arf boost and it should not be given extra bits
- // If the previous group was NOT coded using arf we may want to apply some boost to this GF as well
+ /* If we are going to code an altref frame at the end of the group
+ * and the current frame is not a key frame.... If the previous
+ * group used an arf this frame has already benefited from that arf
+ * boost and it should not be given extra bits If the previous
+ * group was NOT coded using arf we may want to apply some boost to
+ * this GF as well
+ */
if (cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME))
{
- // Assign a standard frames worth of bits from those allocated to the GF group
+ /* Assign a standard frames worth of bits from those allocated
+ * to the GF group
+ */
int bak = cpi->per_frame_bandwidth;
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
assign_std_frame_bits(cpi, &this_frame_copy);
@@ -2371,59 +2481,64 @@ void vp8_second_pass(VP8_COMP *cpi)
}
}
- // Otherwise this is an ordinary frame
+ /* Otherwise this is an ordinary frame */
else
{
- // Special case: Error error_resilient_mode mode does not make much sense for two pass but with its current meaning but this code is designed to stop
- // outlandish behaviour if someone does set it when using two pass. It effectively disables GF groups.
- // This is temporary code till we decide what should really happen in this case.
+ /* Special case: Error error_resilient_mode mode does not make much
+ * sense for two pass but with its current meaning but this code is
+ * designed to stop outlandish behaviour if someone does set it
+ * when using two pass. It effectively disables GF groups. This is
+ * temporary code till we decide what should really happen in this
+ * case.
+ */
if (cpi->oxcf.error_resilient_mode)
{
cpi->frames_till_gf_update_due = cpi->twopass.frames_to_key;
if (cpi->common.frame_type != KEY_FRAME)
{
- // Assign bits from those allocated to the GF group
+ /* Assign bits from those allocated to the GF group */
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
assign_std_frame_bits(cpi, &this_frame_copy);
}
}
else
{
- // Assign bits from those allocated to the GF group
+ /* Assign bits from those allocated to the GF group */
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
assign_std_frame_bits(cpi, &this_frame_copy);
}
}
- // Keep a globally available copy of this and the next frame's iiratio.
- cpi->twopass.this_iiratio = this_frame_intra_error /
- DOUBLE_DIVIDE_CHECK(this_frame_coded_error);
+ /* Keep a globally available copy of this and the next frame's iiratio. */
+ cpi->twopass.this_iiratio = (unsigned int)(this_frame_intra_error /
+ DOUBLE_DIVIDE_CHECK(this_frame_coded_error));
{
FIRSTPASS_STATS next_frame;
if ( lookup_next_frame_stats(cpi, &next_frame) != EOF )
{
- cpi->twopass.next_iiratio = next_frame.intra_error /
- DOUBLE_DIVIDE_CHECK(next_frame.coded_error);
+ cpi->twopass.next_iiratio = (unsigned int)(next_frame.intra_error /
+ DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
}
}
- // Set nominal per second bandwidth for this frame
- cpi->target_bandwidth = cpi->per_frame_bandwidth * cpi->output_frame_rate;
+ /* Set nominal per second bandwidth for this frame */
+ cpi->target_bandwidth = (int)
+ (cpi->per_frame_bandwidth * cpi->output_frame_rate);
if (cpi->target_bandwidth < 0)
cpi->target_bandwidth = 0;
- // Account for mv, mode and other overheads.
- overhead_bits = estimate_modemvcost(
+ /* Account for mv, mode and other overheads. */
+ overhead_bits = (int)estimate_modemvcost(
cpi, &cpi->twopass.total_left_stats );
- // Special case code for first frame.
+ /* Special case code for first frame. */
if (cpi->common.current_video_frame == 0)
{
cpi->twopass.est_max_qcorrection_factor = 1.0;
- // Set a cq_level in constrained quality mode.
+ /* Set a cq_level in constrained quality mode. */
if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY )
{
int est_cq;
@@ -2439,7 +2554,7 @@ void vp8_second_pass(VP8_COMP *cpi)
cpi->cq_target_quality = est_cq;
}
- // guess at maxq needed in 2nd pass
+ /* guess at maxq needed in 2nd pass */
cpi->twopass.maxq_max_limit = cpi->worst_quality;
cpi->twopass.maxq_min_limit = cpi->best_quality;
@@ -2449,11 +2564,12 @@ void vp8_second_pass(VP8_COMP *cpi)
(int)(cpi->twopass.bits_left / frames_left),
overhead_bits );
- // Limit the maxq value returned subsequently.
- // This increases the risk of overspend or underspend if the initial
- // estimate for the clip is bad, but helps prevent excessive
- // variation in Q, especially near the end of a clip
- // where for example a small overspend may cause Q to crash
+ /* Limit the maxq value returned subsequently.
+ * This increases the risk of overspend or underspend if the initial
+ * estimate for the clip is bad, but helps prevent excessive
+ * variation in Q, especially near the end of a clip
+ * where for example a small overspend may cause Q to crash
+ */
cpi->twopass.maxq_max_limit = ((tmp_q + 32) < cpi->worst_quality)
? (tmp_q + 32) : cpi->worst_quality;
cpi->twopass.maxq_min_limit = ((tmp_q - 32) > cpi->best_quality)
@@ -2463,10 +2579,11 @@ void vp8_second_pass(VP8_COMP *cpi)
cpi->ni_av_qi = tmp_q;
}
- // The last few frames of a clip almost always have to few or too many
- // bits and for the sake of over exact rate control we dont want to make
- // radical adjustments to the allowed quantizer range just to use up a
- // few surplus bits or get beneath the target rate.
+ /* The last few frames of a clip almost always have to few or too many
+ * bits and for the sake of over exact rate control we dont want to make
+ * radical adjustments to the allowed quantizer range just to use up a
+ * few surplus bits or get beneath the target rate.
+ */
else if ( (cpi->common.current_video_frame <
(((unsigned int)cpi->twopass.total_stats.count * 255)>>8)) &&
((cpi->common.current_video_frame + cpi->baseline_gf_interval) <
@@ -2481,7 +2598,7 @@ void vp8_second_pass(VP8_COMP *cpi)
(int)(cpi->twopass.bits_left / frames_left),
overhead_bits );
- // Move active_worst_quality but in a damped way
+ /* Move active_worst_quality but in a damped way */
if (tmp_q > cpi->active_worst_quality)
cpi->active_worst_quality ++;
else if (tmp_q < cpi->active_worst_quality)
@@ -2493,7 +2610,7 @@ void vp8_second_pass(VP8_COMP *cpi)
cpi->twopass.frames_to_key --;
- // Update the total stats remaining sturcture
+ /* Update the total stats remaining sturcture */
subtract_stats(&cpi->twopass.total_left_stats, &this_frame );
}
@@ -2502,8 +2619,9 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP
{
int is_viable_kf = 0;
- // Does the frame satisfy the primary criteria of a key frame
- // If so, then examine how well it predicts subsequent frames
+ /* Does the frame satisfy the primary criteria of a key frame
+ * If so, then examine how well it predicts subsequent frames
+ */
if ((this_frame->pcnt_second_ref < 0.10) &&
(next_frame->pcnt_second_ref < 0.10) &&
((this_frame->pcnt_inter < 0.05) ||
@@ -2530,10 +2648,10 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP
vpx_memcpy(&local_next_frame, next_frame, sizeof(*next_frame));
- // Note the starting file position so we can reset to it
+ /* Note the starting file position so we can reset to it */
start_pos = cpi->twopass.stats_in;
- // Examine how well the key frame predicts subsequent frames
+ /* Examine how well the key frame predicts subsequent frames */
for (i = 0 ; i < 16; i++)
{
next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error / DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error)) ;
@@ -2541,18 +2659,16 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP
if (next_iiratio > RMAX)
next_iiratio = RMAX;
- // Cumulative effect of decay in prediction quality
+ /* Cumulative effect of decay in prediction quality */
if (local_next_frame.pcnt_inter > 0.85)
decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter;
else
decay_accumulator = decay_accumulator * ((0.85 + local_next_frame.pcnt_inter) / 2.0);
- //decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter;
-
- // Keep a running total
+ /* Keep a running total */
boost_score += (decay_accumulator * next_iiratio);
- // Test various breakout clauses
+ /* Test various breakout clauses */
if ((local_next_frame.pcnt_inter < 0.05) ||
(next_iiratio < 1.5) ||
(((local_next_frame.pcnt_inter -
@@ -2567,17 +2683,19 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP
old_boost_score = boost_score;
- // Get the next frame details
+ /* Get the next frame details */
if (EOF == input_stats(cpi, &local_next_frame))
break;
}
- // If there is tolerable prediction for at least the next 3 frames then break out else discard this pottential key frame and move on
+ /* If there is tolerable prediction for at least the next 3 frames
+ * then break out else discard this pottential key frame and move on
+ */
if (boost_score > 5.0 && (i > 3))
is_viable_kf = 1;
else
{
- // Reset the file position
+ /* Reset the file position */
reset_fpf_position(cpi, start_pos);
is_viable_kf = 0;
@@ -2605,65 +2723,71 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
double kf_group_coded_err = 0.0;
double recent_loop_decay[8] = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0};
- vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
+ vpx_memset(&next_frame, 0, sizeof(next_frame));
- vp8_clear_system_state(); //__asm emms;
+ vp8_clear_system_state();
start_position = cpi->twopass.stats_in;
cpi->common.frame_type = KEY_FRAME;
- // is this a forced key frame by interval
+ /* is this a forced key frame by interval */
cpi->this_key_frame_forced = cpi->next_key_frame_forced;
- // Clear the alt ref active flag as this can never be active on a key frame
+ /* Clear the alt ref active flag as this can never be active on a key
+ * frame
+ */
cpi->source_alt_ref_active = 0;
- // Kf is always a gf so clear frames till next gf counter
+ /* Kf is always a gf so clear frames till next gf counter */
cpi->frames_till_gf_update_due = 0;
cpi->twopass.frames_to_key = 1;
- // Take a copy of the initial frame details
+ /* Take a copy of the initial frame details */
vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame));
- cpi->twopass.kf_group_bits = 0; // Total bits avaialable to kf group
- cpi->twopass.kf_group_error_left = 0; // Group modified error score.
+ cpi->twopass.kf_group_bits = 0;
+ cpi->twopass.kf_group_error_left = 0;
kf_mod_err = calculate_modified_err(cpi, this_frame);
- // find the next keyframe
+ /* find the next keyframe */
i = 0;
while (cpi->twopass.stats_in < cpi->twopass.stats_in_end)
{
- // Accumulate kf group error
+ /* Accumulate kf group error */
kf_group_err += calculate_modified_err(cpi, this_frame);
- // These figures keep intra and coded error counts for all frames including key frames in the group.
- // The effect of the key frame itself can be subtracted out using the first_frame data collected above
+ /* These figures keep intra and coded error counts for all frames
+ * including key frames in the group. The effect of the key frame
+ * itself can be subtracted out using the first_frame data
+ * collected above
+ */
kf_group_intra_err += this_frame->intra_error;
kf_group_coded_err += this_frame->coded_error;
- // load a the next frame's stats
+ /* load a the next frame's stats */
vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame));
input_stats(cpi, this_frame);
- // Provided that we are not at the end of the file...
+ /* Provided that we are not at the end of the file... */
if (cpi->oxcf.auto_key
&& lookup_next_frame_stats(cpi, &next_frame) != EOF)
{
- // Normal scene cut check
+ /* Normal scene cut check */
if ( ( i >= MIN_GF_INTERVAL ) &&
test_candidate_kf(cpi, &last_frame, this_frame, &next_frame) )
{
break;
}
- // How fast is prediction quality decaying
+ /* How fast is prediction quality decaying */
loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
- // We want to know something about the recent past... rather than
- // as used elsewhere where we are concened with decay in prediction
- // quality since the last GF or KF.
+ /* We want to know something about the recent past... rather than
+ * as used elsewhere where we are concened with decay in prediction
+ * quality since the last GF or KF.
+ */
recent_loop_decay[i%8] = loop_decay_rate;
decay_accumulator = 1.0;
for (j = 0; j < 8; j++)
@@ -2671,8 +2795,9 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
decay_accumulator = decay_accumulator * recent_loop_decay[j];
}
- // Special check for transition or high motion followed by a
- // to a static scene.
+ /* Special check for transition or high motion followed by a
+ * static scene.
+ */
if ( detect_transition_to_still( cpi, i,
(cpi->key_frame_frequency-i),
loop_decay_rate,
@@ -2682,11 +2807,12 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
- // Step on to the next frame
+ /* Step on to the next frame */
cpi->twopass.frames_to_key ++;
- // If we don't have a real key frame within the next two
- // forcekeyframeevery intervals then break out of the loop.
+ /* If we don't have a real key frame within the next two
+ * forcekeyframeevery intervals then break out of the loop.
+ */
if (cpi->twopass.frames_to_key >= 2 *(int)cpi->key_frame_frequency)
break;
} else
@@ -2695,10 +2821,11 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
i++;
}
- // If there is a max kf interval set by the user we must obey it.
- // We already breakout of the loop above at 2x max.
- // This code centers the extra kf if the actual natural
- // interval is between 1x and 2x
+ /* If there is a max kf interval set by the user we must obey it.
+ * We already breakout of the loop above at 2x max.
+ * This code centers the extra kf if the actual natural
+ * interval is between 1x and 2x
+ */
if (cpi->oxcf.auto_key
&& cpi->twopass.frames_to_key > (int)cpi->key_frame_frequency )
{
@@ -2707,29 +2834,29 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->twopass.frames_to_key /= 2;
- // Copy first frame details
+ /* Copy first frame details */
vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame));
- // Reset to the start of the group
+ /* Reset to the start of the group */
reset_fpf_position(cpi, start_position);
kf_group_err = 0;
kf_group_intra_err = 0;
kf_group_coded_err = 0;
- // Rescan to get the correct error data for the forced kf group
+ /* Rescan to get the correct error data for the forced kf group */
for( i = 0; i < cpi->twopass.frames_to_key; i++ )
{
- // Accumulate kf group errors
+ /* Accumulate kf group errors */
kf_group_err += calculate_modified_err(cpi, &tmp_frame);
kf_group_intra_err += tmp_frame.intra_error;
kf_group_coded_err += tmp_frame.coded_error;
- // Load a the next frame's stats
+ /* Load a the next frame's stats */
input_stats(cpi, &tmp_frame);
}
- // Reset to the start of the group
+ /* Reset to the start of the group */
reset_fpf_position(cpi, current_pos);
cpi->next_key_frame_forced = 1;
@@ -2737,58 +2864,63 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
else
cpi->next_key_frame_forced = 0;
- // Special case for the last frame of the file
+ /* Special case for the last frame of the file */
if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end)
{
- // Accumulate kf group error
+ /* Accumulate kf group error */
kf_group_err += calculate_modified_err(cpi, this_frame);
- // These figures keep intra and coded error counts for all frames including key frames in the group.
- // The effect of the key frame itself can be subtracted out using the first_frame data collected above
+ /* These figures keep intra and coded error counts for all frames
+ * including key frames in the group. The effect of the key frame
+ * itself can be subtracted out using the first_frame data
+ * collected above
+ */
kf_group_intra_err += this_frame->intra_error;
kf_group_coded_err += this_frame->coded_error;
}
- // Calculate the number of bits that should be assigned to the kf group.
+ /* Calculate the number of bits that should be assigned to the kf group. */
if ((cpi->twopass.bits_left > 0) && (cpi->twopass.modified_error_left > 0.0))
{
- // Max for a single normal frame (not key frame)
+ /* Max for a single normal frame (not key frame) */
int max_bits = frame_max_bits(cpi);
- // Maximum bits for the kf group
+ /* Maximum bits for the kf group */
int64_t max_grp_bits;
- // Default allocation based on bits left and relative
- // complexity of the section
+ /* Default allocation based on bits left and relative
+ * complexity of the section
+ */
cpi->twopass.kf_group_bits = (int64_t)( cpi->twopass.bits_left *
( kf_group_err /
cpi->twopass.modified_error_left ));
- // Clip based on maximum per frame rate defined by the user.
+ /* Clip based on maximum per frame rate defined by the user. */
max_grp_bits = (int64_t)max_bits * (int64_t)cpi->twopass.frames_to_key;
if (cpi->twopass.kf_group_bits > max_grp_bits)
cpi->twopass.kf_group_bits = max_grp_bits;
- // Additional special case for CBR if buffer is getting full.
+ /* Additional special case for CBR if buffer is getting full. */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
- int opt_buffer_lvl = cpi->oxcf.optimal_buffer_level;
- int buffer_lvl = cpi->buffer_level;
+ int64_t opt_buffer_lvl = cpi->oxcf.optimal_buffer_level;
+ int64_t buffer_lvl = cpi->buffer_level;
- // If the buffer is near or above the optimal and this kf group is
- // not being allocated much then increase the allocation a bit.
+ /* If the buffer is near or above the optimal and this kf group is
+ * not being allocated much then increase the allocation a bit.
+ */
if (buffer_lvl >= opt_buffer_lvl)
{
- int high_water_mark = (opt_buffer_lvl +
+ int64_t high_water_mark = (opt_buffer_lvl +
cpi->oxcf.maximum_buffer_size) >> 1;
int64_t av_group_bits;
- // Av bits per frame * number of frames
+ /* Av bits per frame * number of frames */
av_group_bits = (int64_t)cpi->av_per_frame_bandwidth *
(int64_t)cpi->twopass.frames_to_key;
- // We are at or above the maximum.
+ /* We are at or above the maximum. */
if (cpi->buffer_level >= high_water_mark)
{
int64_t min_group_bits;
@@ -2800,7 +2932,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
if (cpi->twopass.kf_group_bits < min_group_bits)
cpi->twopass.kf_group_bits = min_group_bits;
}
- // We are above optimal but below the maximum
+ /* We are above optimal but below the maximum */
else if (cpi->twopass.kf_group_bits < av_group_bits)
{
int64_t bits_below_av = av_group_bits -
@@ -2817,13 +2949,15 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
else
cpi->twopass.kf_group_bits = 0;
- // Reset the first pass file position
+ /* Reset the first pass file position */
reset_fpf_position(cpi, start_position);
- // determine how big to make this keyframe based on how well the subsequent frames use inter blocks
+ /* determine how big to make this keyframe based on how well the
+ * subsequent frames use inter blocks
+ */
decay_accumulator = 1.0;
boost_score = 0.0;
- loop_decay_rate = 1.00; // Starting decay rate
+ loop_decay_rate = 1.00; /* Starting decay rate */
for (i = 0 ; i < cpi->twopass.frames_to_key ; i++)
{
@@ -2842,7 +2976,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
if (r > RMAX)
r = RMAX;
- // How fast is prediction quality decaying
+ /* How fast is prediction quality decaying */
loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
decay_accumulator = decay_accumulator * loop_decay_rate;
@@ -2875,31 +3009,26 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
avg_stats(&sectionstats);
- cpi->twopass.section_intra_rating =
- sectionstats.intra_error
- / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
+ cpi->twopass.section_intra_rating = (unsigned int)
+ (sectionstats.intra_error
+ / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
- // if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) )
- //{
cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025);
if (cpi->twopass.section_max_qfactor < 0.80)
cpi->twopass.section_max_qfactor = 0.80;
-
- //}
- //else
- // cpi->twopass.section_max_qfactor = 1.0;
}
- // When using CBR apply additional buffer fullness related upper limits
+ /* When using CBR apply additional buffer fullness related upper limits */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
double max_boost;
if (cpi->drop_frames_allowed)
{
- int df_buffer_level = cpi->oxcf.drop_frames_water_mark * (cpi->oxcf.optimal_buffer_level / 100);
+ int df_buffer_level = (int)(cpi->oxcf.drop_frames_water_mark
+ * (cpi->oxcf.optimal_buffer_level / 100));
if (cpi->buffer_level > df_buffer_level)
max_boost = ((double)((cpi->buffer_level - df_buffer_level) * 2 / 3) * 16.0) / DOUBLE_DIVIDE_CHECK((double)cpi->av_per_frame_bandwidth);
@@ -2919,18 +3048,18 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
boost_score = max_boost;
}
- // Reset the first pass file position
+ /* Reset the first pass file position */
reset_fpf_position(cpi, start_position);
- // Work out how many bits to allocate for the key frame itself
+ /* Work out how many bits to allocate for the key frame itself */
if (1)
{
- int kf_boost = boost_score;
+ int kf_boost = (int)boost_score;
int allocation_chunks;
int Counter = cpi->twopass.frames_to_key;
int alt_kf_bits;
YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
- // Min boost based on kf interval
+ /* Min boost based on kf interval */
#if 0
while ((kf_boost < 48) && (Counter > 0))
@@ -2948,32 +3077,33 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
if (kf_boost > 48) kf_boost = 48;
}
- // bigger frame sizes need larger kf boosts, smaller frames smaller boosts...
+ /* bigger frame sizes need larger kf boosts, smaller frames smaller
+ * boosts...
+ */
if ((lst_yv12->y_width * lst_yv12->y_height) > (320 * 240))
kf_boost += 2 * (lst_yv12->y_width * lst_yv12->y_height) / (320 * 240);
else if ((lst_yv12->y_width * lst_yv12->y_height) < (320 * 240))
kf_boost -= 4 * (320 * 240) / (lst_yv12->y_width * lst_yv12->y_height);
- kf_boost = (int)((double)kf_boost * 100.0) >> 4; // Scale 16 to 100
-
- // Adjustment to boost based on recent average q
- //kf_boost = kf_boost * vp8_kf_boost_qadjustment[cpi->ni_av_qi] / 100;
-
- if (kf_boost < 250) // Min KF boost
+ /* Min KF boost */
+ kf_boost = (int)((double)kf_boost * 100.0) >> 4; /* Scale 16 to 100 */
+ if (kf_boost < 250)
kf_boost = 250;
- // We do three calculations for kf size.
- // The first is based on the error score for the whole kf group.
- // The second (optionaly) on the key frames own error if this is
- // smaller than the average for the group.
- // The final one insures that the frame receives at least the
- // allocation it would have received based on its own error score vs
- // the error score remaining
- // Special case if the sequence appears almost totaly static
- // as measured by the decay accumulator. In this case we want to
- // spend almost all of the bits on the key frame.
- // cpi->twopass.frames_to_key-1 because key frame itself is taken
- // care of by kf_boost.
+ /*
+ * We do three calculations for kf size.
+ * The first is based on the error score for the whole kf group.
+ * The second (optionaly) on the key frames own error if this is
+ * smaller than the average for the group.
+ * The final one insures that the frame receives at least the
+ * allocation it would have received based on its own error score vs
+ * the error score remaining
+ * Special case if the sequence appears almost totaly static
+ * as measured by the decay accumulator. In this case we want to
+ * spend almost all of the bits on the key frame.
+ * cpi->twopass.frames_to_key-1 because key frame itself is taken
+ * care of by kf_boost.
+ */
if ( decay_accumulator >= 0.99 )
{
allocation_chunks =
@@ -2985,7 +3115,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
((cpi->twopass.frames_to_key - 1) * 100) + kf_boost;
}
- // Normalize Altboost and allocations chunck down to prevent overflow
+ /* Normalize Altboost and allocations chunck down to prevent overflow */
while (kf_boost > 1000)
{
kf_boost /= 2;
@@ -2994,20 +3124,21 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->twopass.kf_group_bits = (cpi->twopass.kf_group_bits < 0) ? 0 : cpi->twopass.kf_group_bits;
- // Calculate the number of bits to be spent on the key frame
+ /* Calculate the number of bits to be spent on the key frame */
cpi->twopass.kf_bits = (int)((double)kf_boost * ((double)cpi->twopass.kf_group_bits / (double)allocation_chunks));
- // Apply an additional limit for CBR
+ /* Apply an additional limit for CBR */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
- if (cpi->twopass.kf_bits > ((3 * cpi->buffer_level) >> 2))
- cpi->twopass.kf_bits = (3 * cpi->buffer_level) >> 2;
+ if (cpi->twopass.kf_bits > (int)((3 * cpi->buffer_level) >> 2))
+ cpi->twopass.kf_bits = (int)((3 * cpi->buffer_level) >> 2);
}
- // If the key frame is actually easier than the average for the
- // kf group (which does sometimes happen... eg a blank intro frame)
- // Then use an alternate calculation based on the kf error score
- // which should give a smaller key frame.
+ /* If the key frame is actually easier than the average for the
+ * kf group (which does sometimes happen... eg a blank intro frame)
+ * Then use an alternate calculation based on the kf error score
+ * which should give a smaller key frame.
+ */
if (kf_mod_err < kf_group_err / cpi->twopass.frames_to_key)
{
double alt_kf_grp_bits =
@@ -3023,9 +3154,10 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->twopass.kf_bits = alt_kf_bits;
}
}
- // Else if it is much harder than other frames in the group make sure
- // it at least receives an allocation in keeping with its relative
- // error score
+ /* Else if it is much harder than other frames in the group make sure
+ * it at least receives an allocation in keeping with its relative
+ * error score
+ */
else
{
alt_kf_bits =
@@ -3040,17 +3172,23 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
cpi->twopass.kf_group_bits -= cpi->twopass.kf_bits;
- cpi->twopass.kf_bits += cpi->min_frame_bandwidth; // Add in the minimum frame allowance
+ /* Add in the minimum frame allowance */
+ cpi->twopass.kf_bits += cpi->min_frame_bandwidth;
+
+ /* Peer frame bit target for this frame */
+ cpi->per_frame_bandwidth = cpi->twopass.kf_bits;
- cpi->per_frame_bandwidth = cpi->twopass.kf_bits; // Peer frame bit target for this frame
- cpi->target_bandwidth = cpi->twopass.kf_bits * cpi->output_frame_rate; // Convert to a per second bitrate
+ /* Convert to a per second bitrate */
+ cpi->target_bandwidth = (int)(cpi->twopass.kf_bits *
+ cpi->output_frame_rate);
}
- // Note the total error score of the kf group minus the key frame itself
+ /* Note the total error score of the kf group minus the key frame itself */
cpi->twopass.kf_group_error_left = (int)(kf_group_err - kf_mod_err);
- // Adjust the count of total modified error left.
- // The count of bits left is adjusted elsewhere based on real coded frame sizes
+ /* Adjust the count of total modified error left. The count of bits left
+ * is adjusted elsewhere based on real coded frame sizes
+ */
cpi->twopass.modified_error_left -= kf_group_err;
if (cpi->oxcf.allow_spatial_resampling)
@@ -3063,7 +3201,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
int new_width = cpi->oxcf.Width;
int new_height = cpi->oxcf.Height;
- int projected_buffer_level = cpi->buffer_level;
+ int projected_buffer_level = (int)cpi->buffer_level;
int tmp_q;
double projected_bits_perframe;
@@ -3076,40 +3214,47 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
if ((cpi->common.Width != cpi->oxcf.Width) || (cpi->common.Height != cpi->oxcf.Height))
last_kf_resampled = 1;
- // Set back to unscaled by defaults
+ /* Set back to unscaled by defaults */
cpi->common.horiz_scale = NORMAL;
cpi->common.vert_scale = NORMAL;
- // Calculate Average bits per frame.
- //av_bits_per_frame = cpi->twopass.bits_left/(double)(cpi->twopass.total_stats.count - cpi->common.current_video_frame);
+ /* Calculate Average bits per frame. */
av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate);
- //if ( av_bits_per_frame < 0.0 )
- // av_bits_per_frame = 0.0
- // CBR... Use the clip average as the target for deciding resample
+ /* CBR... Use the clip average as the target for deciding resample */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
bits_per_frame = av_bits_per_frame;
}
- // In VBR we want to avoid downsampling in easy section unless we are under extreme pressure
- // So use the larger of target bitrate for this sectoion or average bitrate for sequence
+ /* In VBR we want to avoid downsampling in easy section unless we
+ * are under extreme pressure So use the larger of target bitrate
+ * for this section or average bitrate for sequence
+ */
else
{
- bits_per_frame = cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key; // This accounts for how hard the section is...
+ /* This accounts for how hard the section is... */
+ bits_per_frame = (double)
+ (cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key);
- if (bits_per_frame < av_bits_per_frame) // Dont turn to resampling in easy sections just because they have been assigned a small number of bits
+ /* Dont turn to resampling in easy sections just because they
+ * have been assigned a small number of bits
+ */
+ if (bits_per_frame < av_bits_per_frame)
bits_per_frame = av_bits_per_frame;
}
- // bits_per_frame should comply with our minimum
+ /* bits_per_frame should comply with our minimum */
if (bits_per_frame < (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100))
bits_per_frame = (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
- // Work out if spatial resampling is necessary
- kf_q = estimate_kf_group_q(cpi, err_per_frame, bits_per_frame, group_iiratio);
+ /* Work out if spatial resampling is necessary */
+ kf_q = estimate_kf_group_q(cpi, err_per_frame,
+ (int)bits_per_frame, group_iiratio);
- // If we project a required Q higher than the maximum allowed Q then make a guess at the actual size of frames in this section
+ /* If we project a required Q higher than the maximum allowed Q then
+ * make a guess at the actual size of frames in this section
+ */
projected_bits_perframe = bits_per_frame;
tmp_q = kf_q;
@@ -3119,8 +3264,11 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
tmp_q--;
}
- // Guess at buffer level at the end of the section
- projected_buffer_level = cpi->buffer_level - (int)((projected_bits_perframe - av_bits_per_frame) * cpi->twopass.frames_to_key);
+ /* Guess at buffer level at the end of the section */
+ projected_buffer_level = (int)
+ (cpi->buffer_level - (int)
+ ((projected_bits_perframe - av_bits_per_frame) *
+ cpi->twopass.frames_to_key));
if (0)
{
@@ -3129,15 +3277,17 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
fclose(f);
}
- // The trigger for spatial resampling depends on the various parameters such as whether we are streaming (CBR) or VBR.
+ /* The trigger for spatial resampling depends on the various
+ * parameters such as whether we are streaming (CBR) or VBR.
+ */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
- // Trigger resample if we are projected to fall below down sample level or
- // resampled last time and are projected to remain below the up sample level
+ /* Trigger resample if we are projected to fall below down
+ * sample level or resampled last time and are projected to
+ * remain below the up sample level
+ */
if ((projected_buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100)) ||
(last_kf_resampled && (projected_buffer_level < (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100))))
- //( ((cpi->buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100))) &&
- // ((projected_buffer_level < (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100))) ))
resample_trigger = 1;
else
resample_trigger = 0;
@@ -3147,9 +3297,15 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate));
int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;
- if ((last_kf_resampled && (kf_q > cpi->worst_quality)) || // If triggered last time the threshold for triggering again is reduced
- ((kf_q > cpi->worst_quality) && // Projected Q higher than allowed and ...
- (over_spend > clip_bits / 20))) // ... Overspend > 5% of total bits
+ /* If triggered last time the threshold for triggering again is
+ * reduced:
+ *
+ * Projected Q higher than allowed and Overspend > 5% of total
+ * bits
+ */
+ if ((last_kf_resampled && (kf_q > cpi->worst_quality)) ||
+ ((kf_q > cpi->worst_quality) &&
+ (over_spend > clip_bits / 20)))
resample_trigger = 1;
else
resample_trigger = 0;
@@ -3171,13 +3327,19 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
new_width = ((hs - 1) + (cpi->oxcf.Width * hr)) / hs;
new_height = ((vs - 1) + (cpi->oxcf.Height * vr)) / vs;
- // Reducing the area to 1/4 does not reduce the complexity (err_per_frame) to 1/4...
- // effective_sizeratio attempts to provide a crude correction for this
+ /* Reducing the area to 1/4 does not reduce the complexity
+ * (err_per_frame) to 1/4... effective_sizeratio attempts
+ * to provide a crude correction for this
+ */
effective_size_ratio = (double)(new_width * new_height) / (double)(cpi->oxcf.Width * cpi->oxcf.Height);
effective_size_ratio = (1.0 + (3.0 * effective_size_ratio)) / 4.0;
- // Now try again and see what Q we get with the smaller image size
- kf_q = estimate_kf_group_q(cpi, err_per_frame * effective_size_ratio, bits_per_frame, group_iiratio);
+ /* Now try again and see what Q we get with the smaller
+ * image size
+ */
+ kf_q = estimate_kf_group_q(cpi,
+ err_per_frame * effective_size_ratio,
+ (int)bits_per_frame, group_iiratio);
if (0)
{
diff --git a/vp8/encoder/lookahead.c b/vp8/encoder/lookahead.c
index 4c92281..ce2ce08 100644
--- a/vp8/encoder/lookahead.c
+++ b/vp8/encoder/lookahead.c
@@ -118,10 +118,11 @@ vp8_lookahead_push(struct lookahead_ctx *ctx,
ctx->sz++;
buf = pop(ctx, &ctx->write_idx);
- // Only do this partial copy if the following conditions are all met:
- // 1. Lookahead queue has has size of 1.
- // 2. Active map is provided.
- // 3. This is not a key frame, golden nor altref frame.
+ /* Only do this partial copy if the following conditions are all met:
+ * 1. Lookahead queue has has size of 1.
+ * 2. Active map is provided.
+ * 3. This is not a key frame, golden nor altref frame.
+ */
if (ctx->max_sz == 1 && active_map && !flags)
{
for (row = 0; row < mb_rows; ++row)
@@ -130,18 +131,18 @@ vp8_lookahead_push(struct lookahead_ctx *ctx,
while (1)
{
- // Find the first active macroblock in this row.
+ /* Find the first active macroblock in this row. */
for (; col < mb_cols; ++col)
{
if (active_map[col])
break;
}
- // No more active macroblock in this row.
+ /* No more active macroblock in this row. */
if (col == mb_cols)
break;
- // Find the end of active region in this row.
+ /* Find the end of active region in this row. */
active_end = col;
for (; active_end < mb_cols; ++active_end)
@@ -150,13 +151,13 @@ vp8_lookahead_push(struct lookahead_ctx *ctx,
break;
}
- // Only copy this active region.
+ /* Only copy this active region. */
vp8_copy_and_extend_frame_with_rect(src, &buf->img,
row << 4,
col << 4, 16,
(active_end - col) << 4);
- // Start again from the end of this active region.
+ /* Start again from the end of this active region. */
col = active_end;
}
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 67e4f7e..b08c7a5 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -25,26 +25,35 @@ static int mv_mode_cts [4] [2];
int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
{
- // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
- // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
- // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
- // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
+ /* MV costing is based on the distribution of vectors in the previous
+ * frame and as such will tend to over state the cost of vectors. In
+ * addition coding a new vector can have a knock on effect on the cost
+ * of subsequent vectors and the quality of prediction from NEAR and
+ * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
+ * limited extent, for some account to be taken of these factors.
+ */
return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
}
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
{
- return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
- mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
- * error_per_bit + 128) >> 8;
+ /* Ignore mv costing if mvcost is NULL */
+ if (mvcost)
+ return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
+ mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
+ * error_per_bit + 128) >> 8;
+ return 0;
}
static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
{
/* Calculate sad error cost on full pixel basis. */
- return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
- mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
- * error_per_bit + 128) >> 8;
+ /* Ignore mv costing if mvsadcost is NULL */
+ if (mvsadcost)
+ return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
+ mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
+ * error_per_bit + 128) >> 8;
+ return 0;
}
void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
@@ -53,7 +62,7 @@ void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
int search_site_count = 0;
- // Generate offsets for 4 search sites per step.
+ /* Generate offsets for 4 search sites per step. */
Len = MAX_FIRST_STEP;
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = 0;
@@ -63,31 +72,31 @@ void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
while (Len > 0)
{
- // Compute offsets for search sites.
+ /* Compute offsets for search sites. */
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = -Len;
x->ss[search_site_count].offset = -Len * stride;
search_site_count++;
- // Compute offsets for search sites.
+ /* Compute offsets for search sites. */
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = Len;
x->ss[search_site_count].offset = Len * stride;
search_site_count++;
- // Compute offsets for search sites.
+ /* Compute offsets for search sites. */
x->ss[search_site_count].mv.col = -Len;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = -Len;
search_site_count++;
- // Compute offsets for search sites.
+ /* Compute offsets for search sites. */
x->ss[search_site_count].mv.col = Len;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = Len;
search_site_count++;
- // Contract.
+ /* Contract. */
Len /= 2;
}
@@ -100,7 +109,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
int Len;
int search_site_count = 0;
- // Generate offsets for 8 search sites per step.
+ /* Generate offsets for 8 search sites per step. */
Len = MAX_FIRST_STEP;
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = 0;
@@ -110,56 +119,56 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
while (Len > 0)
{
- // Compute offsets for search sites.
+ /* Compute offsets for search sites. */
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = -Len;
x->ss[search_site_count].offset = -Len * stride;
search_site_count++;
- // Compute offsets for search sites.
+ /* Compute offsets for search sites. */
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = Len;
x->ss[search_site_count].offset = Len * stride;
search_site_count++;
- // Compute offsets for search sites.
+ /* Compute offsets for search sites. */
x->ss[search_site_count].mv.col = -Len;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = -Len;
search_site_count++;
- // Compute offsets for search sites.
+ /* Compute offsets for search sites. */
x->ss[search_site_count].mv.col = Len;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = Len;
search_site_count++;
- // Compute offsets for search sites.
+ /* Compute offsets for search sites. */
x->ss[search_site_count].mv.col = -Len;
x->ss[search_site_count].mv.row = -Len;
x->ss[search_site_count].offset = -Len * stride - Len;
search_site_count++;
- // Compute offsets for search sites.
+ /* Compute offsets for search sites. */
x->ss[search_site_count].mv.col = Len;
x->ss[search_site_count].mv.row = -Len;
x->ss[search_site_count].offset = -Len * stride + Len;
search_site_count++;
- // Compute offsets for search sites.
+ /* Compute offsets for search sites. */
x->ss[search_site_count].mv.col = -Len;
x->ss[search_site_count].mv.row = Len;
x->ss[search_site_count].offset = Len * stride - Len;
search_site_count++;
- // Compute offsets for search sites.
+ /* Compute offsets for search sites. */
x->ss[search_site_count].mv.col = Len;
x->ss[search_site_count].mv.row = Len;
x->ss[search_site_count].offset = Len * stride + Len;
search_site_count++;
- // Contract.
+ /* Contract. */
Len /= 2;
}
@@ -176,13 +185,20 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
* 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
* could reduce the area.
*/
-#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
-#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector
-#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
-#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
+
+/* estimated cost of a motion vector (r,c) */
+#define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0)
+/* pointer to predictor base of a motionvector */
+#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset)))
+/* convert motion vector component to offset for svf calc */
+#define SP(x) (((x)&3)<<1)
+/* returns subpixel variance error function. */
+#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
-#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
-#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
+/* returns distortion + motion vector cost */
+#define ERR(r,c) (MVC(r,c)+DIST(r,c))
+/* checks if (r,c) has better score than previous best */
+#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;)
int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
int_mv *bestmv, int_mv *ref_mv,
@@ -196,7 +212,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
int tr = br, tc = bc;
- unsigned int besterr = INT_MAX;
+ unsigned int besterr;
unsigned int left, right, up, down, diag;
unsigned int sse;
unsigned int whichdir;
@@ -221,7 +237,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
unsigned char *y;
int buf_r1, buf_r2, buf_c1, buf_c2;
- // Clamping to avoid out-of-range data access
+ /* Clamping to avoid out-of-range data access */
buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
@@ -238,19 +254,21 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
- // central mv
+ /* central mv */
bestmv->as_mv.row <<= 3;
bestmv->as_mv.col <<= 3;
- // calculate central point error
+ /* calculate central point error */
besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
- // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
+ /* TODO: Each subsequent iteration checks at least one point in common
+ * with the last iteration could be 2 ( if diag selected)
+ */
while (--halfiters)
{
- // 1/2 pel
+ /* 1/2 pel */
CHECK_BETTER(left, tr, tc - 2);
CHECK_BETTER(right, tr, tc + 2);
CHECK_BETTER(up, tr - 2, tc);
@@ -274,7 +292,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
break;
}
- // no reason to check the same one again.
+ /* no reason to check the same one again. */
if (tr == br && tc == bc)
break;
@@ -282,8 +300,11 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
tc = bc;
}
- // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
- // 1/4 pel
+ /* TODO: Each subsequent iteration checks at least one point in common
+ * with the last iteration could be 2 ( if diag selected)
+ */
+
+ /* 1/4 pel */
while (--quarteriters)
{
CHECK_BETTER(left, tr, tc - 1);
@@ -309,7 +330,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
break;
}
- // no reason to check the same one again.
+ /* no reason to check the same one again. */
if (tr == br && tc == bc)
break;
@@ -367,17 +388,17 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
y_stride = pre_stride;
#endif
- // central mv
+ /* central mv */
bestmv->as_mv.row <<= 3;
bestmv->as_mv.col <<= 3;
startmv = *bestmv;
- // calculate central point error
+ /* calculate central point error */
bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
*distortion = bestmse;
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
- // go left then right and check error
+ /* go left then right and check error */
this_mv.as_mv.row = startmv.as_mv.row;
this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
@@ -403,7 +424,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
*sse1 = sse;
}
- // go up then down and check error
+ /* go up then down and check error */
this_mv.as_mv.col = startmv.as_mv.col;
this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
@@ -430,10 +451,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
}
- // now check 1 more diagonal
+ /* now check 1 more diagonal */
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
- //for(whichdir =0;whichdir<4;whichdir++)
- //{
this_mv = startmv;
switch (whichdir)
@@ -471,10 +490,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
*sse1 = sse;
}
-// }
-
- // time to check quarter pels.
+ /* time to check quarter pels. */
if (bestmv->as_mv.row < startmv.as_mv.row)
y -= y_stride;
@@ -485,7 +502,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
- // go left then right and check error
+ /* go left then right and check error */
this_mv.as_mv.row = startmv.as_mv.row;
if (startmv.as_mv.col & 7)
@@ -521,7 +538,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
*sse1 = sse;
}
- // go up then down and check error
+ /* go up then down and check error */
this_mv.as_mv.col = startmv.as_mv.col;
if (startmv.as_mv.row & 7)
@@ -558,11 +575,9 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
}
- // now check 1 more diagonal
+ /* now check 1 more diagonal */
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-// for(whichdir=0;whichdir<4;whichdir++)
-// {
this_mv = startmv;
switch (whichdir)
@@ -684,17 +699,17 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
y_stride = pre_stride;
#endif
- // central mv
+ /* central mv */
bestmv->as_mv.row <<= 3;
bestmv->as_mv.col <<= 3;
startmv = *bestmv;
- // calculate central point error
+ /* calculate central point error */
bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
*distortion = bestmse;
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
- // go left then right and check error
+ /* go left then right and check error */
this_mv.as_mv.row = startmv.as_mv.row;
this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
@@ -720,7 +735,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
*sse1 = sse;
}
- // go up then down and check error
+ /* go up then down and check error */
this_mv.as_mv.col = startmv.as_mv.col;
this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
@@ -746,7 +761,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
*sse1 = sse;
}
- // now check 1 more diagonal -
+ /* now check 1 more diagonal - */
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
this_mv = startmv;
@@ -855,7 +870,7 @@ int vp8_hex_search
int in_what_stride = pre_stride;
int br, bc;
int_mv this_mv;
- unsigned int bestsad = 0x7fffffff;
+ unsigned int bestsad;
unsigned int thissad;
unsigned char *base_offset;
unsigned char *this_offset;
@@ -869,18 +884,17 @@ int vp8_hex_search
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- // adjust ref_mv to make sure it is within MV range
+ /* adjust ref_mv to make sure it is within MV range */
vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
br = ref_mv->as_mv.row;
bc = ref_mv->as_mv.col;
- // Work out the start point for the search
+ /* Work out the start point for the search */
base_offset = (unsigned char *)(base_pre + d->offset);
this_offset = base_offset + (br * (pre_stride)) + bc;
this_mv.as_mv.row = br;
this_mv.as_mv.col = bc;
- bestsad = vfp->sdf( what, what_stride, this_offset,
- in_what_stride, 0x7fffffff)
+ bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
+ mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
#if CONFIG_MULTI_RES_ENCODING
@@ -895,8 +909,7 @@ int vp8_hex_search
dia_range = 8;
#endif
- // hex search
- //j=0
+ /* hex search */
CHECK_BOUNDS(2)
if(all_in)
@@ -906,7 +919,7 @@ int vp8_hex_search
this_mv.as_mv.row = br + hex[i].row;
this_mv.as_mv.col = bc + hex[i].col;
this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
- thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
CHECK_BETTER
}
}else
@@ -917,7 +930,7 @@ int vp8_hex_search
this_mv.as_mv.col = bc + hex[i].col;
CHECK_POINT
this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
- thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
CHECK_BETTER
}
}
@@ -943,7 +956,7 @@ int vp8_hex_search
this_mv.as_mv.row = br + next_chkpts[k][i].row;
this_mv.as_mv.col = bc + next_chkpts[k][i].col;
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
- thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
CHECK_BETTER
}
}else
@@ -954,7 +967,7 @@ int vp8_hex_search
this_mv.as_mv.col = bc + next_chkpts[k][i].col;
CHECK_POINT
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
- thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
CHECK_BETTER
}
}
@@ -971,7 +984,7 @@ int vp8_hex_search
}
}
- // check 4 1-away neighbors
+ /* check 4 1-away neighbors */
cal_neighbors:
for (j = 0; j < dia_range; j++)
{
@@ -985,7 +998,7 @@ cal_neighbors:
this_mv.as_mv.row = br + neighbors[i].row;
this_mv.as_mv.col = bc + neighbors[i].col;
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
- thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
CHECK_BETTER
}
}else
@@ -996,7 +1009,7 @@ cal_neighbors:
this_mv.as_mv.col = bc + neighbors[i].col;
CHECK_POINT
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
- thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
CHECK_BETTER
}
}
@@ -1047,7 +1060,8 @@ int vp8_diamond_search_sad_c
int tot_steps;
int_mv this_mv;
- int bestsad = INT_MAX;
+ unsigned int bestsad;
+ unsigned int thissad;
int best_site = 0;
int last_site = 0;
@@ -1058,10 +1072,12 @@ int vp8_diamond_search_sad_c
search_site *ss;
unsigned char *check_here;
- int thissad;
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
+ int *mvsadcost[2];
int_mv fcenter_mv;
+
+ mvsadcost[0] = x->mvsadcost[0];
+ mvsadcost[1] = x->mvsadcost[1];
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -1072,17 +1088,18 @@ int vp8_diamond_search_sad_c
best_mv->as_mv.row = ref_row;
best_mv->as_mv.col = ref_col;
- // Work out the start point for the search
+ /* Work out the start point for the search */
in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
best_address = in_what;
- // Check the starting position
- bestsad = fn_ptr->sdf(what, what_stride, in_what,
- in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+ /* Check the starting position */
+ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
+ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
- // search_param determines the length of the initial step and hence the number of iterations
- // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
+ /* search_param determines the length of the initial step and hence
+ * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
+ * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
+ */
ss = &x->ss[search_param * x->searches_per_step];
tot_steps = (x->ss_count / x->searches_per_step) - search_param;
@@ -1092,7 +1109,7 @@ int vp8_diamond_search_sad_c
{
for (j = 0 ; j < x->searches_per_step ; j++)
{
- // Trap illegal vectors
+ /* Trap illegal vectors */
this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
@@ -1101,14 +1118,14 @@ int vp8_diamond_search_sad_c
{
check_here = ss[i].offset + best_address;
- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
if (thissad < bestsad)
{
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvsadcost, sad_per_bit);
+ mvsadcost, sad_per_bit);
if (thissad < bestsad)
{
@@ -1135,11 +1152,8 @@ int vp8_diamond_search_sad_c
this_mv.as_mv.row = best_mv->as_mv.row << 3;
this_mv.as_mv.col = best_mv->as_mv.col << 3;
- if (bestsad == INT_MAX)
- return INT_MAX;
-
- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
}
int vp8_diamond_search_sadx4
@@ -1170,7 +1184,8 @@ int vp8_diamond_search_sadx4
int tot_steps;
int_mv this_mv;
- unsigned int bestsad = UINT_MAX;
+ unsigned int bestsad;
+ unsigned int thissad;
int best_site = 0;
int last_site = 0;
@@ -1181,10 +1196,12 @@ int vp8_diamond_search_sadx4
search_site *ss;
unsigned char *check_here;
- unsigned int thissad;
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
+ int *mvsadcost[2];
int_mv fcenter_mv;
+
+ mvsadcost[0] = x->mvsadcost[0];
+ mvsadcost[1] = x->mvsadcost[1];
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -1195,17 +1212,18 @@ int vp8_diamond_search_sadx4
best_mv->as_mv.row = ref_row;
best_mv->as_mv.col = ref_col;
- // Work out the start point for the search
+ /* Work out the start point for the search */
in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
best_address = in_what;
- // Check the starting position
- bestsad = fn_ptr->sdf(what, what_stride,
- in_what, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+ /* Check the starting position */
+ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
+ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
- // search_param determines the length of the initial step and hence the number of iterations
- // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
+ /* search_param determines the length of the initial step and hence the
+ * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
+ * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
+ */
ss = &x->ss[search_param * x->searches_per_step];
tot_steps = (x->ss_count / x->searches_per_step) - search_param;
@@ -1215,8 +1233,10 @@ int vp8_diamond_search_sadx4
{
int all_in = 1, t;
- // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
- // checking 4 bounds for each points.
+ /* To know if all neighbor points are within the bounds, 4 bounds
+ * checking are enough instead of checking 4 bounds for each
+ * points.
+ */
all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
@@ -1228,7 +1248,7 @@ int vp8_diamond_search_sadx4
for (j = 0 ; j < x->searches_per_step ; j += 4)
{
- unsigned char *block_offset[4];
+ const unsigned char *block_offset[4];
for (t = 0; t < 4; t++)
block_offset[t] = ss[i+t].offset + best_address;
@@ -1257,7 +1277,7 @@ int vp8_diamond_search_sadx4
{
for (j = 0 ; j < x->searches_per_step ; j++)
{
- // Trap illegal vectors
+ /* Trap illegal vectors */
this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
@@ -1265,14 +1285,14 @@ int vp8_diamond_search_sadx4
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
{
check_here = ss[i].offset + best_address;
- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
if (thissad < bestsad)
{
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvsadcost, sad_per_bit);
+ mvsadcost, sad_per_bit);
if (thissad < bestsad)
{
@@ -1299,11 +1319,8 @@ int vp8_diamond_search_sadx4
this_mv.as_mv.row = best_mv->as_mv.row << 3;
this_mv.as_mv.col = best_mv->as_mv.col << 3;
- if (bestsad == INT_MAX)
- return INT_MAX;
-
- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
}
int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
@@ -1321,11 +1338,11 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
unsigned char *bestaddress;
int_mv *best_mv = &d->bmi.mv;
int_mv this_mv;
- int bestsad = INT_MAX;
+ unsigned int bestsad;
+ unsigned int thissad;
int r, c;
unsigned char *check_here;
- int thissad;
int ref_row = ref_mv->as_mv.row;
int ref_col = ref_mv->as_mv.col;
@@ -1335,24 +1352,29 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int col_min = ref_col - distance;
int col_max = ref_col + distance;
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
+ int *mvsadcost[2];
int_mv fcenter_mv;
+
+ mvsadcost[0] = x->mvsadcost[0];
+ mvsadcost[1] = x->mvsadcost[1];
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- // Work out the mid point for the search
+ /* Work out the mid point for the search */
in_what = base_pre + d->offset;
bestaddress = in_what + (ref_row * pre_stride) + ref_col;
best_mv->as_mv.row = ref_row;
best_mv->as_mv.col = ref_col;
- // Baseline value at the centre
+ /* Baseline value at the centre */
bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
- in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+ in_what_stride, UINT_MAX)
+ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
- // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
+ /* Apply further limits to prevent us looking using vectors that
+ * stretch beyiond the UMV border
+ */
if (col_min < x->mv_col_min)
col_min = x->mv_col_min;
@@ -1372,11 +1394,11 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
for (c = col_min; c < col_max; c++)
{
- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvsadcost, sad_per_bit);
if (thissad < bestsad)
{
@@ -1393,11 +1415,8 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
this_mv.as_mv.row = best_mv->as_mv.row << 3;
this_mv.as_mv.col = best_mv->as_mv.col << 3;
- if (bestsad < INT_MAX)
- return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
- else
- return INT_MAX;
+ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
}
int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
@@ -1415,11 +1434,11 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
unsigned char *bestaddress;
int_mv *best_mv = &d->bmi.mv;
int_mv this_mv;
- unsigned int bestsad = UINT_MAX;
+ unsigned int bestsad;
+ unsigned int thissad;
int r, c;
unsigned char *check_here;
- unsigned int thissad;
int ref_row = ref_mv->as_mv.row;
int ref_col = ref_mv->as_mv.col;
@@ -1431,24 +1450,29 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
unsigned int sad_array[3];
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
+ int *mvsadcost[2];
int_mv fcenter_mv;
+
+ mvsadcost[0] = x->mvsadcost[0];
+ mvsadcost[1] = x->mvsadcost[1];
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- // Work out the mid point for the search
+ /* Work out the mid point for the search */
in_what = base_pre + d->offset;
bestaddress = in_what + (ref_row * pre_stride) + ref_col;
best_mv->as_mv.row = ref_row;
best_mv->as_mv.col = ref_col;
- // Baseline value at the centre
- bestsad = fn_ptr->sdf(what, what_stride,
- bestaddress, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+ /* Baseline value at the centre */
+ bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
+ in_what_stride, UINT_MAX)
+ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
- // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
+ /* Apply further limits to prevent us looking using vectors that stretch
+ * beyond the UMV border
+ */
if (col_min < x->mv_col_min)
col_min = x->mv_col_min;
@@ -1471,7 +1495,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
{
int i;
- fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
+ fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
for (i = 0; i < 3; i++)
{
@@ -1480,8 +1504,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
if (thissad < bestsad)
{
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvsadcost, sad_per_bit);
if (thissad < bestsad)
{
@@ -1499,13 +1523,13 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
while (c < col_max)
{
- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
if (thissad < bestsad)
{
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvsadcost, sad_per_bit);
if (thissad < bestsad)
{
@@ -1525,11 +1549,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
this_mv.as_mv.row = best_mv->as_mv.row << 3;
this_mv.as_mv.col = best_mv->as_mv.col << 3;
- if (bestsad < INT_MAX)
- return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
- else
- return INT_MAX;
+ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
}
int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
@@ -1547,11 +1568,11 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
unsigned char *bestaddress;
int_mv *best_mv = &d->bmi.mv;
int_mv this_mv;
- unsigned int bestsad = UINT_MAX;
+ unsigned int bestsad;
+ unsigned int thissad;
int r, c;
unsigned char *check_here;
- unsigned int thissad;
int ref_row = ref_mv->as_mv.row;
int ref_col = ref_mv->as_mv.col;
@@ -1564,24 +1585,29 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
unsigned int sad_array[3];
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
+ int *mvsadcost[2];
int_mv fcenter_mv;
+
+ mvsadcost[0] = x->mvsadcost[0];
+ mvsadcost[1] = x->mvsadcost[1];
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- // Work out the mid point for the search
+ /* Work out the mid point for the search */
in_what = base_pre + d->offset;
bestaddress = in_what + (ref_row * pre_stride) + ref_col;
best_mv->as_mv.row = ref_row;
best_mv->as_mv.col = ref_col;
- // Baseline value at the centre
+ /* Baseline value at the centre */
bestsad = fn_ptr->sdf(what, what_stride,
- bestaddress, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+ bestaddress, in_what_stride, UINT_MAX)
+ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
- // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
+ /* Apply further limits to prevent us looking using vectors that stretch
+ * beyond the UMV border
+ */
if (col_min < x->mv_col_min)
col_min = x->mv_col_min;
@@ -1604,17 +1630,17 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
{
int i;
- fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8);
+ fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
for (i = 0; i < 8; i++)
{
- thissad = (unsigned int)sad_array8[i];
+ thissad = sad_array8[i];
if (thissad < bestsad)
{
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvsadcost, sad_per_bit);
if (thissad < bestsad)
{
@@ -1687,11 +1713,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
this_mv.as_mv.row = best_mv->as_mv.row << 3;
this_mv.as_mv.col = best_mv->as_mv.col << 3;
- if (bestsad < INT_MAX)
- return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
- else
- return INT_MAX;
+ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
}
int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
@@ -1711,17 +1734,21 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv
unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
(ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
unsigned char *check_here;
- unsigned int thissad;
int_mv this_mv;
- unsigned int bestsad = INT_MAX;
+ unsigned int bestsad;
+ unsigned int thissad;
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
+ int *mvsadcost[2];
int_mv fcenter_mv;
+ mvsadcost[0] = x->mvsadcost[0];
+ mvsadcost[1] = x->mvsadcost[1];
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
+ bestsad = fn_ptr->sdf(what, what_stride, best_address,
+ in_what_stride, UINT_MAX)
+ + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
for (i=0; i<search_range; i++)
{
@@ -1766,11 +1793,8 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv
this_mv.as_mv.row = ref_mv->as_mv.row << 3;
this_mv.as_mv.col = ref_mv->as_mv.col << 3;
- if (bestsad < INT_MAX)
- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
- else
- return INT_MAX;
+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
}
int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
@@ -1790,17 +1814,21 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
(ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
unsigned char *check_here;
- unsigned int thissad;
int_mv this_mv;
- unsigned int bestsad = INT_MAX;
+ unsigned int bestsad;
+ unsigned int thissad;
- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
+ int *mvsadcost[2];
int_mv fcenter_mv;
+ mvsadcost[0] = x->mvsadcost[0];
+ mvsadcost[1] = x->mvsadcost[1];
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
+ bestsad = fn_ptr->sdf(what, what_stride, best_address,
+ in_what_stride, UINT_MAX)
+ + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
for (i=0; i<search_range; i++)
{
@@ -1815,7 +1843,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
if(all_in)
{
unsigned int sad_array[4];
- unsigned char *block_offset[4];
+ const unsigned char *block_offset[4];
block_offset[0] = best_address - in_what_stride;
block_offset[1] = best_address - 1;
block_offset[2] = best_address + 1;
@@ -1881,11 +1909,8 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
this_mv.as_mv.row = ref_mv->as_mv.row << 3;
this_mv.as_mv.col = ref_mv->as_mv.col << 3;
- if (bestsad < INT_MAX)
- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
- else
- return INT_MAX;
+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
}
#ifdef ENTROPY_STATS
@@ -1900,16 +1925,16 @@ void print_mode_context(void)
for (j = 0; j < 6; j++)
{
- fprintf(f, " { // %d \n", j);
+ fprintf(f, " { /* %d */\n", j);
fprintf(f, " ");
for (i = 0; i < 4; i++)
{
int overal_prob;
int this_prob;
- int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1];
+ int count;
- // Overall probs
+ /* Overall probs */
count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
if (count)
@@ -1920,7 +1945,7 @@ void print_mode_context(void)
if (overal_prob == 0)
overal_prob = 1;
- // context probs
+ /* context probs */
count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
if (count)
@@ -1932,8 +1957,6 @@ void print_mode_context(void)
this_prob = 1;
fprintf(f, "%5d, ", this_prob);
- //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob);
- //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob);
}
fprintf(f, " },\n");
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index cdb0cb6..890113f 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -21,9 +21,16 @@ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
#endif
-#define MAX_MVSEARCH_STEPS 8 // The maximum number of steps in a step search given the largest allowed initial step
-#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) // Max full pel mv specified in 1 pel units
-#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units
+/* The maximum number of steps in a step search given the largest allowed
+ * initial step
+ */
+#define MAX_MVSEARCH_STEPS 8
+
+/* Max full pel mv specified in 1 pel units */
+#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1)
+
+/* Maximum size of the first step in full pel units */
+#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))
extern void print_mode_context(void);
extern int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight);
diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c
index c636c48..c61563c 100644
--- a/vp8/encoder/modecosts.c
+++ b/vp8/encoder/modecosts.c
@@ -18,6 +18,8 @@
void vp8_init_mode_costs(VP8_COMP *c)
{
VP8_COMMON *x = &c->common;
+ struct rd_costs_struct *rd_costs = &c->rd_costs;
+
{
const vp8_tree_p T = vp8_bmode_tree;
@@ -29,19 +31,24 @@ void vp8_init_mode_costs(VP8_COMP *c)
do
{
- vp8_cost_tokens((int *)c->mb.bmode_costs[i][j], x->kf_bmode_prob[i][j], T);
+ vp8_cost_tokens(rd_costs->bmode_costs[i][j],
+ vp8_kf_bmode_prob[i][j], T);
}
while (++j < VP8_BINTRAMODES);
}
while (++i < VP8_BINTRAMODES);
- vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.bmode_prob, T);
+ vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.bmode_prob, T);
}
- vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.sub_mv_ref_prob, vp8_sub_mv_ref_tree);
+ vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.sub_mv_ref_prob,
+ vp8_sub_mv_ref_tree);
- vp8_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree);
- vp8_cost_tokens(c->mb.mbmode_cost[0], x->kf_ymode_prob, vp8_kf_ymode_tree);
+ vp8_cost_tokens(rd_costs->mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree);
+ vp8_cost_tokens(rd_costs->mbmode_cost[0], vp8_kf_ymode_prob,
+ vp8_kf_ymode_tree);
- vp8_cost_tokens(c->mb.intra_uv_mode_cost[1], x->fc.uv_mode_prob, vp8_uv_mode_tree);
- vp8_cost_tokens(c->mb.intra_uv_mode_cost[0], x->kf_uv_mode_prob, vp8_uv_mode_tree);
+ vp8_cost_tokens(rd_costs->intra_uv_mode_cost[1], x->fc.uv_mode_prob,
+ vp8_uv_mode_tree);
+ vp8_cost_tokens(rd_costs->intra_uv_mode_cost[0], vp8_kf_uv_mode_prob,
+ vp8_uv_mode_tree);
}
diff --git a/vp8/encoder/mr_dissim.c b/vp8/encoder/mr_dissim.c
index 7a62a06..71218cc 100644
--- a/vp8/encoder/mr_dissim.c
+++ b/vp8/encoder/mr_dissim.c
@@ -53,6 +53,7 @@ if(x->mbmi.ref_frame !=INTRA_FRAME) \
void vp8_cal_dissimilarity(VP8_COMP *cpi)
{
VP8_COMMON *cm = &cpi->common;
+ int i;
/* Note: The first row & first column in mip are outside the frame, which
* were initialized to all 0.(ref_frame, mode, mv...)
@@ -65,14 +66,25 @@ void vp8_cal_dissimilarity(VP8_COMP *cpi)
/* Store info for show/no-show frames for supporting alt_ref.
* If parent frame is alt_ref, child has one too.
*/
+ LOWER_RES_FRAME_INFO* store_info
+ = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info;
+
+ store_info->frame_type = cm->frame_type;
+
+ if(cm->frame_type != KEY_FRAME)
+ {
+ store_info->is_frame_dropped = 0;
+ for (i = 1; i < MAX_REF_FRAMES; i++)
+ store_info->low_res_ref_frames[i] = cpi->current_ref_frames[i];
+ }
+
if(cm->frame_type != KEY_FRAME)
{
int mb_row;
int mb_col;
/* Point to beginning of allocated MODE_INFO arrays. */
MODE_INFO *tmp = cm->mip + cm->mode_info_stride;
- LOWER_RES_INFO* store_mode_info
- = (LOWER_RES_INFO*)cpi->oxcf.mr_low_res_mode_info;
+ LOWER_RES_MB_INFO* store_mode_info = store_info->mb_info;
for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
{
@@ -199,3 +211,26 @@ void vp8_cal_dissimilarity(VP8_COMP *cpi)
}
}
}
+
+/* This function is called only when this frame is dropped at current
+ resolution level. */
+void vp8_store_drop_frame_info(VP8_COMP *cpi)
+{
+ /* If the frame is dropped in lower-resolution encoding, this information
+ is passed to higher resolution level so that the encoder knows there
+ is no mode & motion info available.
+ */
+ if (cpi->oxcf.mr_total_resolutions >1
+ && cpi->oxcf.mr_encoder_id < (cpi->oxcf.mr_total_resolutions - 1))
+ {
+ /* Store info for show/no-show frames for supporting alt_ref.
+ * If parent frame is alt_ref, child has one too.
+ */
+ LOWER_RES_FRAME_INFO* store_info
+ = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info;
+
+ /* Set frame_type to be INTER_FRAME since we won't drop key frame. */
+ store_info->frame_type = INTER_FRAME;
+ store_info->is_frame_dropped = 1;
+ }
+}
diff --git a/vp8/encoder/mr_dissim.h b/vp8/encoder/mr_dissim.h
index 3d2c203..f8cb135 100644
--- a/vp8/encoder/mr_dissim.h
+++ b/vp8/encoder/mr_dissim.h
@@ -15,5 +15,6 @@
extern void vp8_cal_low_res_mb_cols(VP8_COMP *cpi);
extern void vp8_cal_dissimilarity(VP8_COMP *cpi);
+extern void vp8_store_drop_frame_info(VP8_COMP *cpi);
#endif
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index cee62fa..4680f39 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -11,6 +11,7 @@
#include "vpx_config.h"
#include "vp8/common/onyxc_int.h"
+#include "vp8/common/blockd.h"
#include "onyx_int.h"
#include "vp8/common/systemdependent.h"
#include "quantize.h"
@@ -55,12 +56,8 @@ extern void vp8_deblock_frame(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *po
extern void print_parms(VP8_CONFIG *ocf, char *filenam);
extern unsigned int vp8_get_processor_freq();
extern void print_tree_update_probs();
-extern void vp8cx_create_encoder_threads(VP8_COMP *cpi);
+extern int vp8cx_create_encoder_threads(VP8_COMP *cpi);
extern void vp8cx_remove_encoder_threads(VP8_COMP *cpi);
-#if HAVE_NEON
-extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
-extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
-#endif
int vp8_estimate_entropy_savings(VP8_COMP *cpi);
@@ -143,7 +140,7 @@ extern const int qzbin_factors[129];
extern void vp8cx_init_quantizer(VP8_COMP *cpi);
extern const int vp8cx_base_skip_false_prob[128];
-// Tables relating active max Q to active min Q
+/* Tables relating active max Q to active min Q */
static const unsigned char kf_low_motion_minq[QINDEX_RANGE] =
{
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -219,9 +216,8 @@ static void save_layer_context(VP8_COMP *cpi)
{
LAYER_CONTEXT *lc = &cpi->layer_context[cpi->current_layer];
- // Save layer dependent coding state
+ /* Save layer dependent coding state */
lc->target_bandwidth = cpi->target_bandwidth;
- //lc->target_bandwidth = cpi->oxcf.target_bandwidth;
lc->starting_buffer_level = cpi->oxcf.starting_buffer_level;
lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level;
lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size;
@@ -242,7 +238,7 @@ static void save_layer_context(VP8_COMP *cpi)
lc->rate_correction_factor = cpi->rate_correction_factor;
lc->key_frame_rate_correction_factor = cpi->key_frame_rate_correction_factor;
lc->gf_rate_correction_factor = cpi->gf_rate_correction_factor;
- lc->zbin_over_quant = cpi->zbin_over_quant;
+ lc->zbin_over_quant = cpi->mb.zbin_over_quant;
lc->inter_frame_target = cpi->inter_frame_target;
lc->total_byte_count = cpi->total_byte_count;
lc->filter_level = cpi->common.filter_level;
@@ -250,15 +246,15 @@ static void save_layer_context(VP8_COMP *cpi)
lc->last_frame_percent_intra = cpi->last_frame_percent_intra;
memcpy (lc->count_mb_ref_frame_usage,
- cpi->count_mb_ref_frame_usage,
- sizeof(cpi->count_mb_ref_frame_usage));
+ cpi->mb.count_mb_ref_frame_usage,
+ sizeof(cpi->mb.count_mb_ref_frame_usage));
}
static void restore_layer_context(VP8_COMP *cpi, const int layer)
{
LAYER_CONTEXT *lc = &cpi->layer_context[layer];
- // Restore layer dependent coding state
+ /* Restore layer dependent coding state */
cpi->current_layer = layer;
cpi->target_bandwidth = lc->target_bandwidth;
cpi->oxcf.target_bandwidth = lc->target_bandwidth;
@@ -271,9 +267,7 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer)
cpi->buffer_level = lc->buffer_level;
cpi->bits_off_target = lc->bits_off_target;
cpi->total_actual_bits = lc->total_actual_bits;
- //cpi->worst_quality = lc->worst_quality;
cpi->active_worst_quality = lc->active_worst_quality;
- //cpi->best_quality = lc->best_quality;
cpi->active_best_quality = lc->active_best_quality;
cpi->ni_av_qi = lc->ni_av_qi;
cpi->ni_tot_qi = lc->ni_tot_qi;
@@ -282,26 +276,31 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer)
cpi->rate_correction_factor = lc->rate_correction_factor;
cpi->key_frame_rate_correction_factor = lc->key_frame_rate_correction_factor;
cpi->gf_rate_correction_factor = lc->gf_rate_correction_factor;
- cpi->zbin_over_quant = lc->zbin_over_quant;
+ cpi->mb.zbin_over_quant = lc->zbin_over_quant;
cpi->inter_frame_target = lc->inter_frame_target;
cpi->total_byte_count = lc->total_byte_count;
cpi->common.filter_level = lc->filter_level;
cpi->last_frame_percent_intra = lc->last_frame_percent_intra;
- memcpy (cpi->count_mb_ref_frame_usage,
+ memcpy (cpi->mb.count_mb_ref_frame_usage,
lc->count_mb_ref_frame_usage,
- sizeof(cpi->count_mb_ref_frame_usage));
+ sizeof(cpi->mb.count_mb_ref_frame_usage));
}
static void setup_features(VP8_COMP *cpi)
{
- // Set up default state for MB feature flags
- cpi->mb.e_mbd.segmentation_enabled = 0;
- cpi->mb.e_mbd.update_mb_segmentation_map = 0;
- cpi->mb.e_mbd.update_mb_segmentation_data = 0;
- vpx_memset(cpi->mb.e_mbd.mb_segment_tree_probs, 255, sizeof(cpi->mb.e_mbd.mb_segment_tree_probs));
- vpx_memset(cpi->mb.e_mbd.segment_feature_data, 0, sizeof(cpi->mb.e_mbd.segment_feature_data));
+ // If segmentation enabled set the update flags
+ if ( cpi->mb.e_mbd.segmentation_enabled )
+ {
+ cpi->mb.e_mbd.update_mb_segmentation_map = 1;
+ cpi->mb.e_mbd.update_mb_segmentation_data = 1;
+ }
+ else
+ {
+ cpi->mb.e_mbd.update_mb_segmentation_map = 0;
+ cpi->mb.e_mbd.update_mb_segmentation_data = 0;
+ }
cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 0;
cpi->mb.e_mbd.mode_ref_lf_delta_update = 0;
@@ -323,7 +322,7 @@ static void dealloc_compressor_data(VP8_COMP *cpi)
vpx_free(cpi->tplist);
cpi->tplist = NULL;
- // Delete last frame MV storage buffers
+ /* Delete last frame MV storage buffers */
vpx_free(cpi->lfmv);
cpi->lfmv = 0;
@@ -333,7 +332,7 @@ static void dealloc_compressor_data(VP8_COMP *cpi)
vpx_free(cpi->lf_ref_frame);
cpi->lf_ref_frame = 0;
- // Delete sementation map
+ /* Delete sementation map */
vpx_free(cpi->segmentation_map);
cpi->segmentation_map = 0;
@@ -349,53 +348,61 @@ static void dealloc_compressor_data(VP8_COMP *cpi)
vpx_free(cpi->tok);
cpi->tok = 0;
- // Structure used to monitor GF usage
+ /* Structure used to monitor GF usage */
vpx_free(cpi->gf_active_flags);
cpi->gf_active_flags = 0;
- // Activity mask based per mb zbin adjustments
+ /* Activity mask based per mb zbin adjustments */
vpx_free(cpi->mb_activity_map);
cpi->mb_activity_map = 0;
- vpx_free(cpi->mb_norm_activity_map);
- cpi->mb_norm_activity_map = 0;
vpx_free(cpi->mb.pip);
cpi->mb.pip = 0;
+
+#if CONFIG_MULTITHREAD
+ vpx_free(cpi->mt_current_mb_col);
+ cpi->mt_current_mb_col = NULL;
+#endif
}
static void enable_segmentation(VP8_COMP *cpi)
{
- // Set the appropriate feature bit
+ /* Set the appropriate feature bit */
cpi->mb.e_mbd.segmentation_enabled = 1;
cpi->mb.e_mbd.update_mb_segmentation_map = 1;
cpi->mb.e_mbd.update_mb_segmentation_data = 1;
}
static void disable_segmentation(VP8_COMP *cpi)
{
- // Clear the appropriate feature bit
+ /* Clear the appropriate feature bit */
cpi->mb.e_mbd.segmentation_enabled = 0;
}
-// Valid values for a segment are 0 to 3
-// Segmentation map is arrange as [Rows][Columns]
+/* Valid values for a segment are 0 to 3
+ * Segmentation map is arrange as [Rows][Columns]
+ */
static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map)
{
- // Copy in the new segmentation map
+ /* Copy in the new segmentation map */
vpx_memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols));
- // Signal that the map should be updated.
+ /* Signal that the map should be updated. */
cpi->mb.e_mbd.update_mb_segmentation_map = 1;
cpi->mb.e_mbd.update_mb_segmentation_data = 1;
}
-// The values given for each segment can be either deltas (from the default value chosen for the frame) or absolute values.
-//
-// Valid range for abs values is (0-127 for MB_LVL_ALT_Q) , (0-63 for SEGMENT_ALT_LF)
-// Valid range for delta values are (+/-127 for MB_LVL_ALT_Q) , (+/-63 for SEGMENT_ALT_LF)
-//
-// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use the absolute values given).
-//
-//
+/* The values given for each segment can be either deltas (from the default
+ * value chosen for the frame) or absolute values.
+ *
+ * Valid range for abs values is:
+ * (0-127 for MB_LVL_ALT_Q), (0-63 for SEGMENT_ALT_LF)
+ * Valid range for delta values are:
+ * (+/-127 for MB_LVL_ALT_Q), (+/-63 for SEGMENT_ALT_LF)
+ *
+ * abs_delta = SEGMENT_DELTADATA (deltas)
+ * abs_delta = SEGMENT_ABSDATA (use the absolute values given).
+ *
+ */
static void set_segment_data(VP8_COMP *cpi, signed char *feature_data, unsigned char abs_delta)
{
cpi->mb.e_mbd.mb_segement_abs_delta = abs_delta;
@@ -411,26 +418,6 @@ static void segmentation_test_function(VP8_COMP *cpi)
// Create a temporary map for segmentation data.
CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
- // MB loop to set local segmentation map
- /*for ( i = 0; i < cpi->common.mb_rows; i++ )
- {
- for ( j = 0; j < cpi->common.mb_cols; j++ )
- {
- //seg_map[(i*cpi->common.mb_cols) + j] = (j % 2) + ((i%2)* 2);
- //if ( j < cpi->common.mb_cols/2 )
-
- // Segment 1 around the edge else 0
- if ( (i == 0) || (j == 0) || (i == (cpi->common.mb_rows-1)) || (j == (cpi->common.mb_cols-1)) )
- seg_map[(i*cpi->common.mb_cols) + j] = 1;
- //else if ( (i < 2) || (j < 2) || (i > (cpi->common.mb_rows-3)) || (j > (cpi->common.mb_cols-3)) )
- // seg_map[(i*cpi->common.mb_cols) + j] = 2;
- //else if ( (i < 5) || (j < 5) || (i > (cpi->common.mb_rows-6)) || (j > (cpi->common.mb_cols-6)) )
- // seg_map[(i*cpi->common.mb_cols) + j] = 3;
- else
- seg_map[(i*cpi->common.mb_cols) + j] = 0;
- }
- }*/
-
// Set the segmentation Map
set_segmentation_map(cpi, seg_map);
@@ -453,103 +440,78 @@ static void segmentation_test_function(VP8_COMP *cpi)
set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA);
// Delete sementation map
- vpx_free(seg_map);
+ vpx_free(seg_map);
seg_map = 0;
-
}
-// A simple function to cyclically refresh the background at a lower Q
+/* A simple function to cyclically refresh the background at a lower Q */
static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
{
- unsigned char *seg_map;
+ unsigned char *seg_map = cpi->segmentation_map;
signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
int i;
int block_count = cpi->cyclic_refresh_mode_max_mbs_perframe;
int mbs_in_frame = cpi->common.mb_rows * cpi->common.mb_cols;
- // Create a temporary map for segmentation data.
- CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
+ cpi->cyclic_refresh_q = Q / 2;
- cpi->cyclic_refresh_q = Q;
+ // Set every macroblock to be eligible for update.
+ // For key frame this will reset seg map to 0.
+ vpx_memset(cpi->segmentation_map, 0, mbs_in_frame);
- for (i = Q; i > 0; i--)
- {
- if (vp8_bits_per_mb[cpi->common.frame_type][i] >= ((vp8_bits_per_mb[cpi->common.frame_type][Q]*(Q + 128)) / 64))
- //if ( vp8_bits_per_mb[cpi->common.frame_type][i] >= ((vp8_bits_per_mb[cpi->common.frame_type][Q]*((2*Q)+96))/64) )
- {
- break;
- }
- }
-
- cpi->cyclic_refresh_q = i;
-
- // Only update for inter frames
if (cpi->common.frame_type != KEY_FRAME)
{
- // Cycle through the macro_block rows
- // MB loop to set local segmentation map
- for (i = cpi->cyclic_refresh_mode_index; i < mbs_in_frame; i++)
+ /* Cycle through the macro_block rows */
+ /* MB loop to set local segmentation map */
+ i = cpi->cyclic_refresh_mode_index;
+ assert(i < mbs_in_frame);
+ do
{
- // If the MB is as a candidate for clean up then mark it for possible boost/refresh (segment 1)
- // The segment id may get reset to 0 later if the MB gets coded anything other than last frame 0,0
- // as only (last frame 0,0) MBs are eligable for refresh : that is to say Mbs likely to be background blocks.
- if (cpi->cyclic_refresh_map[i] == 0)
- {
- seg_map[i] = 1;
- }
- else
- {
- seg_map[i] = 0;
-
- // Skip blocks that have been refreshed recently anyway.
- if (cpi->cyclic_refresh_map[i] < 0)
- //cpi->cyclic_refresh_map[i] = cpi->cyclic_refresh_map[i] / 16;
- cpi->cyclic_refresh_map[i]++;
- }
-
-
- if (block_count > 0)
- block_count--;
- else
- break;
+ /* If the MB is as a candidate for clean up then mark it for
+ * possible boost/refresh (segment 1) The segment id may get
+ * reset to 0 later if the MB gets coded anything other than
+ * last frame 0,0 as only (last frame 0,0) MBs are eligable for
+ * refresh : that is to say Mbs likely to be background blocks.
+ */
+ if (cpi->cyclic_refresh_map[i] == 0)
+ {
+ seg_map[i] = 1;
+ block_count --;
+ }
+ else if (cpi->cyclic_refresh_map[i] < 0)
+ cpi->cyclic_refresh_map[i]++;
+
+ i++;
+ if (i == mbs_in_frame)
+ i = 0;
}
+ while(block_count && i != cpi->cyclic_refresh_mode_index);
- // If we have gone through the frame reset to the start
cpi->cyclic_refresh_mode_index = i;
-
- if (cpi->cyclic_refresh_mode_index >= mbs_in_frame)
- cpi->cyclic_refresh_mode_index = 0;
}
- // Set the segmentation Map
- set_segmentation_map(cpi, seg_map);
-
- // Activate segmentation.
+ /* Activate segmentation. */
+ cpi->mb.e_mbd.update_mb_segmentation_map = 1;
+ cpi->mb.e_mbd.update_mb_segmentation_data = 1;
enable_segmentation(cpi);
- // Set up the quant segment data
+ /* Set up the quant segment data */
feature_data[MB_LVL_ALT_Q][0] = 0;
feature_data[MB_LVL_ALT_Q][1] = (cpi->cyclic_refresh_q - Q);
feature_data[MB_LVL_ALT_Q][2] = 0;
feature_data[MB_LVL_ALT_Q][3] = 0;
- // Set up the loop segment data
+ /* Set up the loop segment data */
feature_data[MB_LVL_ALT_LF][0] = 0;
feature_data[MB_LVL_ALT_LF][1] = lf_adjustment;
feature_data[MB_LVL_ALT_LF][2] = 0;
feature_data[MB_LVL_ALT_LF][3] = 0;
- // Initialise the feature data structure
- // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1
+ /* Initialise the feature data structure */
set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA);
- // Delete sementation map
- vpx_free(seg_map);
-
- seg_map = 0;
-
}
static void set_default_lf_deltas(VP8_COMP *cpi)
@@ -560,16 +522,21 @@ static void set_default_lf_deltas(VP8_COMP *cpi)
vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
- // Test of ref frame deltas
+ /* Test of ref frame deltas */
cpi->mb.e_mbd.ref_lf_deltas[INTRA_FRAME] = 2;
cpi->mb.e_mbd.ref_lf_deltas[LAST_FRAME] = 0;
cpi->mb.e_mbd.ref_lf_deltas[GOLDEN_FRAME] = -2;
cpi->mb.e_mbd.ref_lf_deltas[ALTREF_FRAME] = -2;
- cpi->mb.e_mbd.mode_lf_deltas[0] = 4; // BPRED
- cpi->mb.e_mbd.mode_lf_deltas[1] = -2; // Zero
- cpi->mb.e_mbd.mode_lf_deltas[2] = 2; // New mv
- cpi->mb.e_mbd.mode_lf_deltas[3] = 4; // Split mv
+ cpi->mb.e_mbd.mode_lf_deltas[0] = 4; /* BPRED */
+
+ if(cpi->oxcf.Mode == MODE_REALTIME)
+ cpi->mb.e_mbd.mode_lf_deltas[1] = -12; /* Zero */
+ else
+ cpi->mb.e_mbd.mode_lf_deltas[1] = -2; /* Zero */
+
+ cpi->mb.e_mbd.mode_lf_deltas[2] = 2; /* New mv */
+ cpi->mb.e_mbd.mode_lf_deltas[3] = 4; /* Split mv */
}
/* Convenience macros for mapping speed and mode into a continuous
@@ -669,17 +636,16 @@ void vp8_set_speed_features(VP8_COMP *cpi)
int last_improved_quant = sf->improved_quant;
int ref_frames;
- // Initialise default mode frequency sampling variables
+ /* Initialise default mode frequency sampling variables */
for (i = 0; i < MAX_MODES; i ++)
{
cpi->mode_check_freq[i] = 0;
- cpi->mode_test_hit_counts[i] = 0;
cpi->mode_chosen_counts[i] = 0;
}
- cpi->mbs_tested_so_far = 0;
+ cpi->mb.mbs_tested_so_far = 0;
- // best quality defaults
+ /* best quality defaults */
sf->RD = 1;
sf->search_method = NSTEP;
sf->improved_quant = 1;
@@ -697,17 +663,17 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
sf->improved_mv_pred = 1;
- // default thresholds to 0
+ /* default thresholds to 0 */
for (i = 0; i < MAX_MODES; i++)
sf->thresh_mult[i] = 0;
/* Count enabled references */
ref_frames = 1;
- if (cpi->ref_frame_flags & VP8_LAST_FLAG)
+ if (cpi->ref_frame_flags & VP8_LAST_FRAME)
ref_frames++;
- if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
+ if (cpi->ref_frame_flags & VP8_GOLD_FRAME)
ref_frames++;
- if (cpi->ref_frame_flags & VP8_ALT_FLAG)
+ if (cpi->ref_frame_flags & VP8_ALTR_FRAME)
ref_frames++;
/* Convert speed to continuous range, with clamping */
@@ -779,7 +745,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
switch (Mode)
{
#if !(CONFIG_REALTIME_ONLY)
- case 0: // best quality mode
+ case 0: /* best quality mode */
sf->first_step = 0;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
break;
@@ -800,8 +766,9 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->improved_quant = 0;
sf->improved_dct = 0;
- // Only do recode loop on key frames, golden frames and
- // alt ref frames
+ /* Only do recode loop on key frames, golden frames and
+ * alt ref frames
+ */
sf->recode_loop = 2;
}
@@ -809,14 +776,14 @@ void vp8_set_speed_features(VP8_COMP *cpi)
if (Speed > 3)
{
sf->auto_filter = 1;
- sf->recode_loop = 0; // recode loop off
- sf->RD = 0; // Turn rd off
+ sf->recode_loop = 0; /* recode loop off */
+ sf->RD = 0; /* Turn rd off */
}
if (Speed > 4)
{
- sf->auto_filter = 0; // Faster selection of loop filter
+ sf->auto_filter = 0; /* Faster selection of loop filter */
}
break;
@@ -839,7 +806,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
}
if (Speed > 2)
- sf->auto_filter = 0; // Faster selection of loop filter
+ sf->auto_filter = 0; /* Faster selection of loop filter */
if (Speed > 3)
{
@@ -849,7 +816,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
if (Speed > 4)
{
- sf->auto_filter = 0; // Faster selection of loop filter
+ sf->auto_filter = 0; /* Faster selection of loop filter */
sf->search_method = HEX;
sf->iterative_sub_pixel = 0;
}
@@ -870,16 +837,16 @@ void vp8_set_speed_features(VP8_COMP *cpi)
for (i = 0; i < min; i++)
{
- sum += cpi->error_bins[i];
+ sum += cpi->mb.error_bins[i];
}
total_skip = sum;
sum = 0;
- // i starts from 2 to make sure thresh started from 2048
+ /* i starts from 2 to make sure thresh started from 2048 */
for (; i < 1024; i++)
{
- sum += cpi->error_bins[i];
+ sum += cpi->mb.error_bins[i];
if (10 * sum >= (unsigned int)(cpi->Speed - 6)*(total_mbs - total_skip))
break;
@@ -930,16 +897,17 @@ void vp8_set_speed_features(VP8_COMP *cpi)
cm->filter_type = SIMPLE_LOOPFILTER;
}
- // This has a big hit on quality. Last resort
+ /* This has a big hit on quality. Last resort */
if (Speed >= 15)
sf->half_pixel_search = 0;
- vpx_memset(cpi->error_bins, 0, sizeof(cpi->error_bins));
+ vpx_memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins));
}; /* switch */
- // Slow quant, dct and trellis not worthwhile for first pass
- // so make sure they are always turned off.
+ /* Slow quant, dct and trellis not worthwhile for first pass
+ * so make sure they are always turned off.
+ */
if ( cpi->pass == 1 )
{
sf->improved_quant = 0;
@@ -1107,27 +1075,46 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
CHECK_MEM_ERROR(cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok)));
}
- // Data used for real time vc mode to see if gf needs refreshing
- cpi->inter_zz_count = 0;
- cpi->gf_bad_count = 0;
- cpi->gf_update_recommended = 0;
+ /* Data used for real time vc mode to see if gf needs refreshing */
+ cpi->zeromv_count = 0;
- // Structures used to minitor GF usage
+ /* Structures used to monitor GF usage */
vpx_free(cpi->gf_active_flags);
CHECK_MEM_ERROR(cpi->gf_active_flags,
- vpx_calloc(1, cm->mb_rows * cm->mb_cols));
+ vpx_calloc(sizeof(*cpi->gf_active_flags),
+ cm->mb_rows * cm->mb_cols));
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
vpx_free(cpi->mb_activity_map);
CHECK_MEM_ERROR(cpi->mb_activity_map,
- vpx_calloc(sizeof(unsigned int),
+ vpx_calloc(sizeof(*cpi->mb_activity_map),
cm->mb_rows * cm->mb_cols));
- vpx_free(cpi->mb_norm_activity_map);
- CHECK_MEM_ERROR(cpi->mb_norm_activity_map,
- vpx_calloc(sizeof(unsigned int),
- cm->mb_rows * cm->mb_cols));
+ /* allocate memory for storing last frame's MVs for MV prediction. */
+ vpx_free(cpi->lfmv);
+ CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2),
+ sizeof(*cpi->lfmv)));
+ vpx_free(cpi->lf_ref_frame_sign_bias);
+ CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias,
+ vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2),
+ sizeof(*cpi->lf_ref_frame_sign_bias)));
+ vpx_free(cpi->lf_ref_frame);
+ CHECK_MEM_ERROR(cpi->lf_ref_frame,
+ vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2),
+ sizeof(*cpi->lf_ref_frame)));
+
+ /* Create the encoder segmentation map and set all entries to 0 */
+ vpx_free(cpi->segmentation_map);
+ CHECK_MEM_ERROR(cpi->segmentation_map,
+ vpx_calloc(cm->mb_rows * cm->mb_cols,
+ sizeof(*cpi->segmentation_map)));
+ cpi->cyclic_refresh_mode_index = 0;
+ vpx_free(cpi->active_map);
+ CHECK_MEM_ERROR(cpi->active_map,
+ vpx_calloc(cm->mb_rows * cm->mb_cols,
+ sizeof(*cpi->active_map)));
+ vpx_memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols));
#if CONFIG_MULTITHREAD
if (width < 640)
@@ -1138,15 +1125,22 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
cpi->mt_sync_range = 8;
else
cpi->mt_sync_range = 16;
+
+ if (cpi->oxcf.multi_threaded > 1)
+ {
+ vpx_free(cpi->mt_current_mb_col);
+ CHECK_MEM_ERROR(cpi->mt_current_mb_col,
+ vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));
+ }
+
#endif
vpx_free(cpi->tplist);
-
- CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));
+ CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cm->mb_rows));
}
-// Quant MOD
+/* Quant MOD */
static const int q_trans[] =
{
0, 1, 2, 3, 4, 5, 7, 8,
@@ -1168,7 +1162,7 @@ int vp8_reverse_trans(int x)
return i;
return 63;
-};
+}
void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
{
if(framerate < .1)
@@ -1182,16 +1176,16 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth *
cpi->oxcf.two_pass_vbrmin_section / 100);
- // Set Maximum gf/arf interval
+ /* Set Maximum gf/arf interval */
cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2);
if(cpi->max_gf_interval < 12)
cpi->max_gf_interval = 12;
- // Extended interval for genuinely static scenes
+ /* Extended interval for genuinely static scenes */
cpi->twopass.static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
- // Special conditions when altr ref frame enabled in lagged compress mode
+ /* Special conditions when altr ref frame enabled in lagged compress mode */
if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames)
{
if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1)
@@ -1213,7 +1207,7 @@ rescale(int val, int num, int denom)
int64_t llden = denom;
int64_t llval = val;
- return llval * llnum / llden;
+ return (int)(llval * llnum / llden);
}
@@ -1225,7 +1219,6 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->auto_gold = 1;
cpi->auto_adjust_gold_quantizer = 1;
- cpi->goldfreq = 7;
cm->version = oxcf->Version;
vp8_setup_version(cm);
@@ -1244,15 +1237,15 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->ref_frame_rate = cpi->frame_rate;
- // change includes all joint functionality
+ /* change includes all joint functionality */
vp8_change_config(cpi, oxcf);
- // Initialize active best and worst q and average q values.
+ /* Initialize active best and worst q and average q values. */
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
- // Initialise the starting buffer levels
+ /* Initialise the starting buffer levels */
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
@@ -1264,7 +1257,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->total_actual_bits = 0;
cpi->total_target_vs_actual = 0;
- // Temporal scalabilty
+ /* Temporal scalabilty */
if (cpi->oxcf.number_of_layers > 1)
{
unsigned int i;
@@ -1274,7 +1267,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
- // Layer configuration
+ /* Layer configuration */
lc->frame_rate =
cpi->output_frame_rate / cpi->oxcf.rate_decimator[i];
lc->target_bandwidth = cpi->oxcf.target_bitrate[i] * 1000;
@@ -1284,28 +1277,29 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
lc->maximum_buffer_size_in_ms = oxcf->maximum_buffer_size;
lc->starting_buffer_level =
- rescale(oxcf->starting_buffer_level,
+ rescale((int)(oxcf->starting_buffer_level),
lc->target_bandwidth, 1000);
if (oxcf->optimal_buffer_level == 0)
lc->optimal_buffer_level = lc->target_bandwidth / 8;
else
lc->optimal_buffer_level =
- rescale(oxcf->optimal_buffer_level,
+ rescale((int)(oxcf->optimal_buffer_level),
lc->target_bandwidth, 1000);
if (oxcf->maximum_buffer_size == 0)
lc->maximum_buffer_size = lc->target_bandwidth / 8;
else
lc->maximum_buffer_size =
- rescale(oxcf->maximum_buffer_size,
+ rescale((int)oxcf->maximum_buffer_size,
lc->target_bandwidth, 1000);
- // Work out the average size of a frame within this layer
+ /* Work out the average size of a frame within this layer */
if (i > 0)
- lc->avg_frame_size_for_layer = (cpi->oxcf.target_bitrate[i] -
- cpi->oxcf.target_bitrate[i-1]) * 1000 /
- (lc->frame_rate - prev_layer_frame_rate);
+ lc->avg_frame_size_for_layer =
+ (int)((cpi->oxcf.target_bitrate[i] -
+ cpi->oxcf.target_bitrate[i-1]) * 1000 /
+ (lc->frame_rate - prev_layer_frame_rate));
lc->active_worst_quality = cpi->oxcf.worst_allowed_q;
lc->active_best_quality = cpi->oxcf.best_allowed_q;
@@ -1321,7 +1315,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
lc->rate_correction_factor = 1.0;
lc->key_frame_rate_correction_factor = 1.0;
lc->gf_rate_correction_factor = 1.0;
- lc->inter_frame_target = 0.0;
+ lc->inter_frame_target = 0;
prev_layer_frame_rate = lc->frame_rate;
}
@@ -1358,32 +1352,29 @@ static void update_layer_contexts (VP8_COMP *cpi)
lc->target_bandwidth = oxcf->target_bitrate[i] * 1000;
lc->starting_buffer_level = rescale(
- oxcf->starting_buffer_level_in_ms,
+ (int)oxcf->starting_buffer_level_in_ms,
lc->target_bandwidth, 1000);
if (oxcf->optimal_buffer_level == 0)
lc->optimal_buffer_level = lc->target_bandwidth / 8;
else
lc->optimal_buffer_level = rescale(
- oxcf->optimal_buffer_level_in_ms,
+ (int)oxcf->optimal_buffer_level_in_ms,
lc->target_bandwidth, 1000);
if (oxcf->maximum_buffer_size == 0)
lc->maximum_buffer_size = lc->target_bandwidth / 8;
else
lc->maximum_buffer_size = rescale(
- oxcf->maximum_buffer_size_in_ms,
+ (int)oxcf->maximum_buffer_size_in_ms,
lc->target_bandwidth, 1000);
- // Work out the average size of a frame within this layer
+ /* Work out the average size of a frame within this layer */
if (i > 0)
- lc->avg_frame_size_for_layer = (oxcf->target_bitrate[i] -
- oxcf->target_bitrate[i-1]) * 1000 /
- (lc->frame_rate - prev_layer_frame_rate);
-
- lc->active_worst_quality = oxcf->worst_allowed_q;
- lc->active_best_quality = oxcf->best_allowed_q;
- lc->avg_frame_qindex = oxcf->worst_allowed_q;
+ lc->avg_frame_size_for_layer =
+ (int)((oxcf->target_bitrate[i] -
+ oxcf->target_bitrate[i-1]) * 1000 /
+ (lc->frame_rate - prev_layer_frame_rate));
prev_layer_frame_rate = lc->frame_rate;
}
@@ -1514,10 +1505,8 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->baseline_gf_interval =
cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL;
- cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
+ cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME;
- //cpi->use_golden_frame_only = 0;
- //cpi->use_last_frame_only = 0;
cm->refresh_golden_frame = 0;
cm->refresh_last_frame = 1;
cm->refresh_entropy_probs = 1;
@@ -1539,11 +1528,11 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
}
- // At the moment the first order values may not be > MAXQ
+ /* At the moment the first order values may not be > MAXQ */
if (cpi->oxcf.fixed_q > MAXQ)
cpi->oxcf.fixed_q = MAXQ;
- // local file playback mode == really big buffer
+ /* local file playback mode == really big buffer */
if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
{
cpi->oxcf.starting_buffer_level = 60000;
@@ -1554,41 +1543,41 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->oxcf.maximum_buffer_size_in_ms = 240000;
}
- // Convert target bandwidth from Kbit/s to Bit/s
+ /* Convert target bandwidth from Kbit/s to Bit/s */
cpi->oxcf.target_bandwidth *= 1000;
cpi->oxcf.starting_buffer_level =
- rescale(cpi->oxcf.starting_buffer_level,
+ rescale((int)cpi->oxcf.starting_buffer_level,
cpi->oxcf.target_bandwidth, 1000);
- // Set or reset optimal and maximum buffer levels.
+ /* Set or reset optimal and maximum buffer levels. */
if (cpi->oxcf.optimal_buffer_level == 0)
cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
else
cpi->oxcf.optimal_buffer_level =
- rescale(cpi->oxcf.optimal_buffer_level,
+ rescale((int)cpi->oxcf.optimal_buffer_level,
cpi->oxcf.target_bandwidth, 1000);
if (cpi->oxcf.maximum_buffer_size == 0)
cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
else
cpi->oxcf.maximum_buffer_size =
- rescale(cpi->oxcf.maximum_buffer_size,
+ rescale((int)cpi->oxcf.maximum_buffer_size,
cpi->oxcf.target_bandwidth, 1000);
- // Set up frame rate and related parameters rate control values.
+ /* Set up frame rate and related parameters rate control values. */
vp8_new_frame_rate(cpi, cpi->frame_rate);
- // Set absolute upper and lower quality limits
+ /* Set absolute upper and lower quality limits */
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
cpi->best_quality = cpi->oxcf.best_allowed_q;
- // active values should only be modified if out of new range
+ /* active values should only be modified if out of new range */
if (cpi->active_worst_quality > cpi->oxcf.worst_allowed_q)
{
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
}
- // less likely
+ /* less likely */
else if (cpi->active_worst_quality < cpi->oxcf.best_allowed_q)
{
cpi->active_worst_quality = cpi->oxcf.best_allowed_q;
@@ -1597,7 +1586,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
{
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
}
- // less likely
+ /* less likely */
else if (cpi->active_best_quality > cpi->oxcf.worst_allowed_q)
{
cpi->active_best_quality = cpi->oxcf.worst_allowed_q;
@@ -1607,14 +1596,9 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->cq_target_quality = cpi->oxcf.cq_level;
- // Only allow dropped frames in buffered mode
+ /* Only allow dropped frames in buffered mode */
cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode;
- if (!cm->use_bilinear_mc_filter)
- cm->mcomp_filter_type = SIXTAP;
- else
- cm->mcomp_filter_type = BILINEAR;
-
cpi->target_bandwidth = cpi->oxcf.target_bandwidth;
@@ -1627,7 +1611,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
* correct.
*/
- // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs)
+ /* VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) */
if (cpi->oxcf.Sharpness > 7)
cpi->oxcf.Sharpness = 7;
@@ -1641,7 +1625,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
Scale2Ratio(cm->horiz_scale, &hr, &hs);
Scale2Ratio(cm->vert_scale, &vr, &vs);
- // always go to the next whole number
+ /* always go to the next whole number */
cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs;
cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
}
@@ -1655,6 +1639,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cm->yv12_fb[cm->lst_fb_idx].y_height ||
cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
{
+ dealloc_raw_frame_buffers(cpi);
alloc_raw_frame_buffers(cpi);
vp8_alloc_compressor_data(cpi);
}
@@ -1667,16 +1652,16 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->Speed = cpi->oxcf.cpu_used;
- // force to allowlag to 0 if lag_in_frames is 0;
+ /* force to allowlag to 0 if lag_in_frames is 0; */
if (cpi->oxcf.lag_in_frames == 0)
{
cpi->oxcf.allow_lag = 0;
}
- // Limit on lag buffers as these are not currently dynamically allocated
+ /* Limit on lag buffers as these are not currently dynamically allocated */
else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
- // YX Temp
+ /* YX Temp */
cpi->alt_ref_source = NULL;
cpi->is_src_frame_alt_ref = 0;
@@ -1693,7 +1678,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
#endif
#if 0
- // Experimental RD Code
+ /* Experimental RD Code */
cpi->frame_distortion = 0;
cpi->last_frame_distortion = 0;
#endif
@@ -1728,7 +1713,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
VP8_COMMON *cm;
cpi = vpx_memalign(32, sizeof(VP8_COMP));
- // Check that the CPI instance is valid
+ /* Check that the CPI instance is valid */
if (!cpi)
return 0;
@@ -1762,14 +1747,15 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->prob_gf_coded = 128;
cpi->prob_intra_coded = 63;
- // Prime the recent reference frame usage counters.
- // Hereafter they will be maintained as a sort of moving average
+ /* Prime the recent reference frame usage counters.
+ * Hereafter they will be maintained as a sort of moving average
+ */
cpi->recent_ref_frame_usage[INTRA_FRAME] = 1;
cpi->recent_ref_frame_usage[LAST_FRAME] = 1;
cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1;
cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1;
- // Set reference frame sign bias for ALTREF frame to 1 (for now)
+ /* Set reference frame sign bias for ALTREF frame to 1 (for now) */
cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1;
cpi->twopass.gf_decay_rate = 0;
@@ -1779,21 +1765,12 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->alt_is_last = 0 ;
cpi->gold_is_alt = 0 ;
- // allocate memory for storing last frame's MVs for MV prediction.
- CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int_mv)));
- CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int)));
- CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int)));
-
- // Create the encoder segmentation map and set all entries to 0
- CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
- CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
- vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols));
cpi->active_map_enabled = 0;
#if 0
- // Experimental code for lagged and one pass
- // Initialise one_pass GF frames stats
- // Update stats used for GF selection
+ /* Experimental code for lagged and one pass */
+ /* Initialise one_pass GF frames stats */
+ /* Update stats used for GF selection */
if (cpi->pass == 0)
{
cpi->one_pass_frame_index = 0;
@@ -1813,10 +1790,11 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
}
#endif
- // Should we use the cyclic refresh method.
- // Currently this is tied to error resilliant mode
+ /* Should we use the cyclic refresh method.
+ * Currently this is tied to error resilliant mode
+ */
cpi->cyclic_refresh_mode_enabled = cpi->oxcf.error_resilient_mode;
- cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 40;
+ cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 5;
cpi->cyclic_refresh_mode_index = 0;
cpi->cyclic_refresh_q = 32;
@@ -1827,9 +1805,6 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
else
cpi->cyclic_refresh_map = (signed char *) NULL;
- // Test function for segmentation
- //segmentation_test_function( cpi);
-
#ifdef ENTROPY_STATS
init_context_counters();
#endif
@@ -1837,7 +1812,8 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
/*Initialize the feed-forward activity masking.*/
cpi->activity_avg = 90<<12;
- cpi->frames_since_key = 8; // Give a sensible default for the first frame.
+ /* Give a sensible default for the first frame. */
+ cpi->frames_since_key = 8;
cpi->key_frame_frequency = cpi->oxcf.key_freq;
cpi->this_key_frame_forced = 0;
cpi->next_key_frame_forced = 0;
@@ -1880,10 +1856,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
#endif
-#ifndef LLONG_MAX
-#define LLONG_MAX 9223372036854775807LL
-#endif
- cpi->first_time_stamp_ever = LLONG_MAX;
+ cpi->first_time_stamp_ever = 0x7FFFFFFF;
cpi->frames_till_gf_update_due = 0;
cpi->key_frame_count = 1;
@@ -1894,22 +1867,12 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->total_byte_count = 0;
cpi->drop_frame = 0;
- cpi->drop_count = 0;
- cpi->max_drop_count = 0;
- cpi->max_consec_dropped_frames = 4;
cpi->rate_correction_factor = 1.0;
cpi->key_frame_rate_correction_factor = 1.0;
cpi->gf_rate_correction_factor = 1.0;
cpi->twopass.est_max_qcorrection_factor = 1.0;
- cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max+1];
- cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max+1];
- cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max+1];
- cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max+1];
-
- cal_mvsadcosts(cpi->mb.mvsadcost);
-
for (i = 0; i < KEY_FRAME_CONTEXT; i++)
{
cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
@@ -1935,7 +1898,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
else if (cpi->pass == 2)
{
size_t packet_sz = sizeof(FIRSTPASS_STATS);
- int packets = oxcf->two_pass_stats_in.sz / packet_sz;
+ int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
cpi->twopass.stats_in = cpi->twopass.stats_in_start;
@@ -1948,17 +1911,16 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
if (cpi->compressor_speed == 2)
{
- cpi->cpu_freq = 0; //vp8_get_processor_freq();
cpi->avg_encode_time = 0;
cpi->avg_pick_mode_time = 0;
}
vp8_set_speed_features(cpi);
- // Set starting values of RD threshold multipliers (128 = *1)
+ /* Set starting values of RD threshold multipliers (128 = *1) */
for (i = 0; i < MAX_MODES; i++)
{
- cpi->rd_thresh_mult[i] = 128;
+ cpi->mb.rd_thresh_mult[i] = 128;
}
#ifdef ENTROPY_STATS
@@ -1966,7 +1928,11 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
#endif
#if CONFIG_MULTITHREAD
- vp8cx_create_encoder_threads(cpi);
+ if(vp8cx_create_encoder_threads(cpi))
+ {
+ vp8_remove_compressor(&cpi);
+ return 0;
+ }
#endif
cpi->fn_ptr[BLOCK_16X16].sdf = vp8_sad16x16;
@@ -2031,11 +1997,14 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->diamond_search_sad = vp8_diamond_search_sad;
cpi->refining_search_sad = vp8_refining_search_sad;
- // make sure frame 1 is okay
- cpi->error_bins[0] = cpi->common.MBs;
+ /* make sure frame 1 is okay */
+ cpi->mb.error_bins[0] = cpi->common.MBs;
- //vp8cx_init_quantizer() is first called here. Add check in vp8cx_frame_init_quantizer() so that vp8cx_init_quantizer is only called later
- //when needed. This will avoid unnecessary calls of vp8cx_init_quantizer() for every frame.
+ /* vp8cx_init_quantizer() is first called here. Add check in
+ * vp8cx_frame_init_quantizer() so that vp8cx_init_quantizer is only
+ * called later when needed. This will avoid unnecessary calls of
+ * vp8cx_init_quantizer() for every frame.
+ */
vp8cx_init_quantizer(cpi);
vp8_loop_filter_init(cm);
@@ -2043,13 +2012,33 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->common.error.setjmp = 0;
#if CONFIG_MULTI_RES_ENCODING
+
/* Calculate # of MBs in a row in lower-resolution level image. */
if (cpi->oxcf.mr_encoder_id > 0)
vp8_cal_low_res_mb_cols(cpi);
+
#endif
- return cpi;
+ /* setup RD costs to MACROBLOCK struct */
+
+ cpi->mb.mvcost[0] = &cpi->rd_costs.mvcosts[0][mv_max+1];
+ cpi->mb.mvcost[1] = &cpi->rd_costs.mvcosts[1][mv_max+1];
+ cpi->mb.mvsadcost[0] = &cpi->rd_costs.mvsadcosts[0][mvfp_max+1];
+ cpi->mb.mvsadcost[1] = &cpi->rd_costs.mvsadcosts[1][mvfp_max+1];
+ cal_mvsadcosts(cpi->mb.mvsadcost);
+
+ cpi->mb.mbmode_cost = cpi->rd_costs.mbmode_cost;
+ cpi->mb.intra_uv_mode_cost = cpi->rd_costs.intra_uv_mode_cost;
+ cpi->mb.bmode_costs = cpi->rd_costs.bmode_costs;
+ cpi->mb.inter_bmode_costs = cpi->rd_costs.inter_bmode_costs;
+ cpi->mb.token_costs = cpi->rd_costs.token_costs;
+
+ /* setup block ptrs & offsets */
+ vp8_setup_block_ptrs(&cpi->mb);
+ vp8_setup_block_dptrs(&cpi->mb.e_mbd);
+
+ return cpi;
}
@@ -2099,7 +2088,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
fprintf(f, "Layer\tBitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t"
"GLPsnrP\tVPXSSIM\t\n");
- for (i=0; i<cpi->oxcf.number_of_layers; i++)
+ for (i=0; i<(int)cpi->oxcf.number_of_layers; i++)
{
double dr = (double)cpi->bytes_in_layer[i] *
8.0 / 1000.0 / time_encoded;
@@ -2150,7 +2139,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
fprintf(f, "Layer\tBitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t"
"Time(us)\n");
- for (i=0; i<cpi->oxcf.number_of_layers; i++)
+ for (i=0; i<(int)cpi->oxcf.number_of_layers; i++)
{
double dr = (double)cpi->bytes_in_layer[i] *
8.0 / 1000.0 / time_encoded;
@@ -2204,7 +2193,6 @@ void vp8_remove_compressor(VP8_COMP **ptr)
fprintf(f, "%5d", frames_at_speed[i]);
fprintf(f, "\n");
- //fprintf(f, "%10d PM %10d %10d %10d EF %10d %10d %10d\n", cpi->Speed, cpi->avg_pick_mode_time, (tot_pm/cnt_pm), cnt_pm, cpi->avg_encode_time, 0, 0);
fclose(f);
}
@@ -2266,7 +2254,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
for (i = 0; i < 10; i++)
{
- fprintf(fmode, " { //Above Mode : %d\n", i);
+ fprintf(fmode, " { /* Above Mode : %d */\n", i);
for (j = 0; j < 10; j++)
{
@@ -2281,7 +2269,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
fprintf(fmode, " %5d, ", intra_mode_stats[i][j][k]);
}
- fprintf(fmode, "}, // left_mode %d\n", j);
+ fprintf(fmode, "}, /* left_mode %d */\n", j);
}
@@ -2459,7 +2447,7 @@ static void generate_psnr_packet(VP8_COMP *cpi)
for (i = 0; i < 4; i++)
pkt.data.psnr.psnr[i] = vp8_mse2psnr(pkt.data.psnr.samples[i], 255.0,
- pkt.data.psnr.sse[i]);
+ (double)(pkt.data.psnr.sse[i]));
vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
}
@@ -2482,28 +2470,28 @@ int vp8_update_reference(VP8_COMP *cpi, int ref_frame_flags)
cpi->common.refresh_alt_ref_frame = 0;
cpi->common.refresh_last_frame = 0;
- if (ref_frame_flags & VP8_LAST_FLAG)
+ if (ref_frame_flags & VP8_LAST_FRAME)
cpi->common.refresh_last_frame = 1;
- if (ref_frame_flags & VP8_GOLD_FLAG)
+ if (ref_frame_flags & VP8_GOLD_FRAME)
cpi->common.refresh_golden_frame = 1;
- if (ref_frame_flags & VP8_ALT_FLAG)
+ if (ref_frame_flags & VP8_ALTR_FRAME)
cpi->common.refresh_alt_ref_frame = 1;
return 0;
}
-int vp8_get_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
+int vp8_get_reference(VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd)
{
VP8_COMMON *cm = &cpi->common;
int ref_fb_idx;
- if (ref_frame_flag == VP8_LAST_FLAG)
+ if (ref_frame_flag == VP8_LAST_FRAME)
ref_fb_idx = cm->lst_fb_idx;
- else if (ref_frame_flag == VP8_GOLD_FLAG)
+ else if (ref_frame_flag == VP8_GOLD_FRAME)
ref_fb_idx = cm->gld_fb_idx;
- else if (ref_frame_flag == VP8_ALT_FLAG)
+ else if (ref_frame_flag == VP8_ALTR_FRAME)
ref_fb_idx = cm->alt_fb_idx;
else
return -1;
@@ -2512,17 +2500,17 @@ int vp8_get_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CO
return 0;
}
-int vp8_set_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
+int vp8_set_reference(VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd)
{
VP8_COMMON *cm = &cpi->common;
int ref_fb_idx;
- if (ref_frame_flag == VP8_LAST_FLAG)
+ if (ref_frame_flag == VP8_LAST_FRAME)
ref_fb_idx = cm->lst_fb_idx;
- else if (ref_frame_flag == VP8_GOLD_FLAG)
+ else if (ref_frame_flag == VP8_GOLD_FRAME)
ref_fb_idx = cm->gld_fb_idx;
- else if (ref_frame_flag == VP8_ALT_FLAG)
+ else if (ref_frame_flag == VP8_ALTR_FRAME)
ref_fb_idx = cm->alt_fb_idx;
else
return -1;
@@ -2583,7 +2571,7 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
{
VP8_COMMON *cm = &cpi->common;
- // are we resizing the image
+ /* are we resizing the image */
if (cm->horiz_scale != 0 || cm->vert_scale != 0)
{
#if CONFIG_SPATIAL_RESAMPLING
@@ -2611,51 +2599,57 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
}
-static void resize_key_frame(VP8_COMP *cpi)
+static int resize_key_frame(VP8_COMP *cpi)
{
#if CONFIG_SPATIAL_RESAMPLING
VP8_COMMON *cm = &cpi->common;
- // Do we need to apply resampling for one pass cbr.
- // In one pass this is more limited than in two pass cbr
- // The test and any change is only made one per key frame sequence
+ /* Do we need to apply resampling for one pass cbr.
+ * In one pass this is more limited than in two pass cbr
+ * The test and any change is only made one per key frame sequence
+ */
if (cpi->oxcf.allow_spatial_resampling && (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
{
int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs);
int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs);
int new_width, new_height;
- // If we are below the resample DOWN watermark then scale down a notch.
+ /* If we are below the resample DOWN watermark then scale down a
+ * notch.
+ */
if (cpi->buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100))
{
cm->horiz_scale = (cm->horiz_scale < ONETWO) ? cm->horiz_scale + 1 : ONETWO;
cm->vert_scale = (cm->vert_scale < ONETWO) ? cm->vert_scale + 1 : ONETWO;
}
- // Should we now start scaling back up
+ /* Should we now start scaling back up */
else if (cpi->buffer_level > (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100))
{
cm->horiz_scale = (cm->horiz_scale > NORMAL) ? cm->horiz_scale - 1 : NORMAL;
cm->vert_scale = (cm->vert_scale > NORMAL) ? cm->vert_scale - 1 : NORMAL;
}
- // Get the new hieght and width
+ /* Get the new hieght and width */
Scale2Ratio(cm->horiz_scale, &hr, &hs);
Scale2Ratio(cm->vert_scale, &vr, &vs);
new_width = ((hs - 1) + (cpi->oxcf.Width * hr)) / hs;
new_height = ((vs - 1) + (cpi->oxcf.Height * vr)) / vs;
- // If the image size has changed we need to reallocate the buffers
- // and resample the source image
+ /* If the image size has changed we need to reallocate the buffers
+ * and resample the source image
+ */
if ((cm->Width != new_width) || (cm->Height != new_height))
{
cm->Width = new_width;
cm->Height = new_height;
vp8_alloc_compressor_data(cpi);
scale_and_extend_source(cpi->un_scaled_source, cpi);
+ return 1;
}
}
#endif
+ return 0;
}
@@ -2663,34 +2657,35 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi)
{
VP8_COMMON *cm = &cpi->common;
- // Select an interval before next GF or altref
+ /* Select an interval before next GF or altref */
if (!cpi->auto_gold)
- cpi->frames_till_gf_update_due = cpi->goldfreq;
+ cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL;
if ((cpi->pass != 2) && cpi->frames_till_gf_update_due)
{
cpi->current_gf_interval = cpi->frames_till_gf_update_due;
- // Set the bits per frame that we should try and recover in subsequent inter frames
- // to account for the extra GF spend... note that his does not apply for GF updates
- // that occur coincident with a key frame as the extra cost of key frames is dealt
- // with elsewhere.
-
+ /* Set the bits per frame that we should try and recover in
+ * subsequent inter frames to account for the extra GF spend...
+ * note that his does not apply for GF updates that occur
+ * coincident with a key frame as the extra cost of key frames is
+ * dealt with elsewhere.
+ */
cpi->gf_overspend_bits += cpi->projected_frame_size;
cpi->non_gf_bitrate_adjustment = cpi->gf_overspend_bits / cpi->frames_till_gf_update_due;
}
- // Update data structure that monitors level of reference to last GF
+ /* Update data structure that monitors level of reference to last GF */
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
- // this frame refreshes means next frames don't unless specified by user
+ /* this frame refreshes means next frames don't unless specified by user */
cpi->common.frames_since_golden = 0;
- // Clear the alternate reference update pending flag.
+ /* Clear the alternate reference update pending flag. */
cpi->source_alt_ref_pending = 0;
- // Set the alternate refernce frame active flag
+ /* Set the alternate refernce frame active flag */
cpi->source_alt_ref_active = 1;
@@ -2699,25 +2694,29 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
{
VP8_COMMON *cm = &cpi->common;
- // Update the Golden frame usage counts.
+ /* Update the Golden frame usage counts. */
if (cm->refresh_golden_frame)
{
- // Select an interval before next GF
+ /* Select an interval before next GF */
if (!cpi->auto_gold)
- cpi->frames_till_gf_update_due = cpi->goldfreq;
+ cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL;
if ((cpi->pass != 2) && (cpi->frames_till_gf_update_due > 0))
{
cpi->current_gf_interval = cpi->frames_till_gf_update_due;
- // Set the bits per frame that we should try and recover in subsequent inter frames
- // to account for the extra GF spend... note that his does not apply for GF updates
- // that occur coincident with a key frame as the extra cost of key frames is dealt
- // with elsewhere.
+ /* Set the bits per frame that we should try and recover in
+ * subsequent inter frames to account for the extra GF spend...
+ * note that his does not apply for GF updates that occur
+ * coincident with a key frame as the extra cost of key frames
+ * is dealt with elsewhere.
+ */
if ((cm->frame_type != KEY_FRAME) && !cpi->source_alt_ref_active)
{
- // Calcluate GF bits to be recovered
- // Projected size - av frame bits available for inter frames for clip as a whole
+ /* Calcluate GF bits to be recovered
+ * Projected size - av frame bits available for inter
+ * frames for clip as a whole
+ */
cpi->gf_overspend_bits += (cpi->projected_frame_size - cpi->inter_frame_target);
}
@@ -2725,32 +2724,25 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
}
- // Update data structure that monitors level of reference to last GF
+ /* Update data structure that monitors level of reference to last GF */
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
- // this frame refreshes means next frames don't unless specified by user
+ /* this frame refreshes means next frames don't unless specified by
+ * user
+ */
cm->refresh_golden_frame = 0;
cpi->common.frames_since_golden = 0;
- //if ( cm->frame_type == KEY_FRAME )
- //{
cpi->recent_ref_frame_usage[INTRA_FRAME] = 1;
cpi->recent_ref_frame_usage[LAST_FRAME] = 1;
cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1;
cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1;
- //}
- //else
- //{
- // // Carry a potrtion of count over to begining of next gf sequence
- // cpi->recent_ref_frame_usage[INTRA_FRAME] >>= 5;
- // cpi->recent_ref_frame_usage[LAST_FRAME] >>= 5;
- // cpi->recent_ref_frame_usage[GOLDEN_FRAME] >>= 5;
- // cpi->recent_ref_frame_usage[ALTREF_FRAME] >>= 5;
- //}
-
- // ******** Fixed Q test code only ************
- // If we are going to use the ALT reference for the next group of frames set a flag to say so.
+
+ /* ******** Fixed Q test code only ************ */
+ /* If we are going to use the ALT reference for the next group of
+ * frames set a flag to say so.
+ */
if (cpi->oxcf.fixed_q >= 0 &&
cpi->oxcf.play_alternate && !cpi->common.refresh_alt_ref_frame)
{
@@ -2761,14 +2753,14 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
if (!cpi->source_alt_ref_pending)
cpi->source_alt_ref_active = 0;
- // Decrement count down till next gf
+ /* Decrement count down till next gf */
if (cpi->frames_till_gf_update_due > 0)
cpi->frames_till_gf_update_due--;
}
else if (!cpi->common.refresh_alt_ref_frame)
{
- // Decrement count down till next gf
+ /* Decrement count down till next gf */
if (cpi->frames_till_gf_update_due > 0)
cpi->frames_till_gf_update_due--;
@@ -2779,21 +2771,26 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
if (cpi->common.frames_since_golden > 1)
{
- cpi->recent_ref_frame_usage[INTRA_FRAME] += cpi->count_mb_ref_frame_usage[INTRA_FRAME];
- cpi->recent_ref_frame_usage[LAST_FRAME] += cpi->count_mb_ref_frame_usage[LAST_FRAME];
- cpi->recent_ref_frame_usage[GOLDEN_FRAME] += cpi->count_mb_ref_frame_usage[GOLDEN_FRAME];
- cpi->recent_ref_frame_usage[ALTREF_FRAME] += cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
+ cpi->recent_ref_frame_usage[INTRA_FRAME] +=
+ cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME];
+ cpi->recent_ref_frame_usage[LAST_FRAME] +=
+ cpi->mb.count_mb_ref_frame_usage[LAST_FRAME];
+ cpi->recent_ref_frame_usage[GOLDEN_FRAME] +=
+ cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME];
+ cpi->recent_ref_frame_usage[ALTREF_FRAME] +=
+ cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME];
}
}
}
-// This function updates the reference frame probability estimates that
-// will be used during mode selection
+/* This function updates the reference frame probability estimates that
+ * will be used during mode selection
+ */
static void update_rd_ref_frame_probs(VP8_COMP *cpi)
{
VP8_COMMON *cm = &cpi->common;
- const int *const rfct = cpi->count_mb_ref_frame_usage;
+ const int *const rfct = cpi->mb.count_mb_ref_frame_usage;
const int rf_intra = rfct[INTRA_FRAME];
const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
@@ -2810,7 +2807,9 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi)
cpi->prob_gf_coded = 128;
}
- // update reference frame costs since we can do better than what we got last frame.
+ /* update reference frame costs since we can do better than what we got
+ * last frame.
+ */
if (cpi->oxcf.number_of_layers == 1)
{
if (cpi->common.refresh_alt_ref_frame)
@@ -2841,7 +2840,7 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi)
}
-// 1 = key, 0 = inter
+/* 1 = key, 0 = inter */
static int decide_key_frame(VP8_COMP *cpi)
{
VP8_COMMON *cm = &cpi->common;
@@ -2853,43 +2852,22 @@ static int decide_key_frame(VP8_COMP *cpi)
if (cpi->Speed > 11)
return 0;
- // Clear down mmx registers
- vp8_clear_system_state(); //__asm emms;
+ /* Clear down mmx registers */
+ vp8_clear_system_state();
if ((cpi->compressor_speed == 2) && (cpi->Speed >= 5) && (cpi->sf.RD == 0))
{
- double change = 1.0 * abs((int)(cpi->intra_error - cpi->last_intra_error)) / (1 + cpi->last_intra_error);
- double change2 = 1.0 * abs((int)(cpi->prediction_error - cpi->last_prediction_error)) / (1 + cpi->last_prediction_error);
+ double change = 1.0 * abs((int)(cpi->mb.intra_error -
+ cpi->last_intra_error)) / (1 + cpi->last_intra_error);
+ double change2 = 1.0 * abs((int)(cpi->mb.prediction_error -
+ cpi->last_prediction_error)) / (1 + cpi->last_prediction_error);
double minerror = cm->MBs * 256;
-#if 0
-
- if (10 * cpi->intra_error / (1 + cpi->prediction_error) < 15
- && cpi->prediction_error > minerror
- && (change > .25 || change2 > .25))
- {
- FILE *f = fopen("intra_inter.stt", "a");
-
- if (cpi->prediction_error <= 0)
- cpi->prediction_error = 1;
-
- fprintf(f, "%d %d %d %d %14.4f\n",
- cm->current_video_frame,
- (int) cpi->prediction_error,
- (int) cpi->intra_error,
- (int)((10 * cpi->intra_error) / cpi->prediction_error),
- change);
-
- fclose(f);
- }
-
-#endif
-
- cpi->last_intra_error = cpi->intra_error;
- cpi->last_prediction_error = cpi->prediction_error;
+ cpi->last_intra_error = cpi->mb.intra_error;
+ cpi->last_prediction_error = cpi->mb.prediction_error;
- if (10 * cpi->intra_error / (1 + cpi->prediction_error) < 15
- && cpi->prediction_error > minerror
+ if (10 * cpi->mb.intra_error / (1 + cpi->mb.prediction_error) < 15
+ && cpi->mb.prediction_error > minerror
&& (change > .25 || change2 > .25))
{
/*(change > 1.4 || change < .75)&& cpi->this_frame_percent_intra > cpi->last_frame_percent_intra + 3*/
@@ -2900,7 +2878,7 @@ static int decide_key_frame(VP8_COMP *cpi)
}
- // If the following are true we might as well code a key frame
+ /* If the following are true we might as well code a key frame */
if (((cpi->this_frame_percent_intra == 100) &&
(cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra + 2))) ||
((cpi->this_frame_percent_intra > 95) &&
@@ -2908,9 +2886,12 @@ static int decide_key_frame(VP8_COMP *cpi)
{
code_key_frame = 1;
}
- // in addition if the following are true and this is not a golden frame then code a key frame
- // Note that on golden frames there often seems to be a pop in intra useage anyway hence this
- // restriction is designed to prevent spurious key frames. The Intra pop needs to be investigated.
+ /* in addition if the following are true and this is not a golden frame
+ * then code a key frame Note that on golden frames there often seems
+ * to be a pop in intra useage anyway hence this restriction is
+ * designed to prevent spurious key frames. The Intra pop needs to be
+ * investigated.
+ */
else if (((cpi->this_frame_percent_intra > 60) &&
(cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra * 2))) ||
((cpi->this_frame_percent_intra > 75) &&
@@ -2942,7 +2923,7 @@ static void Pass1Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
{
- // write the frame
+ /* write the frame */
FILE *yframe;
int i;
char filename[255];
@@ -2970,10 +2951,11 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
fclose(yframe);
}
#endif
-// return of 0 means drop frame
+/* return of 0 means drop frame */
-// Function to test for conditions that indeicate we should loop
-// back and recode a frame.
+/* Function to test for conditions that indeicate we should loop
+ * back and recode a frame.
+ */
static int recode_loop_test( VP8_COMP *cpi,
int high_limit, int low_limit,
int q, int maxq, int minq )
@@ -2981,32 +2963,33 @@ static int recode_loop_test( VP8_COMP *cpi,
int force_recode = 0;
VP8_COMMON *cm = &cpi->common;
- // Is frame recode allowed at all
- // Yes if either recode mode 1 is selected or mode two is selcted
- // and the frame is a key frame. golden frame or alt_ref_frame
+ /* Is frame recode allowed at all
+ * Yes if either recode mode 1 is selected or mode two is selcted
+ * and the frame is a key frame. golden frame or alt_ref_frame
+ */
if ( (cpi->sf.recode_loop == 1) ||
( (cpi->sf.recode_loop == 2) &&
( (cm->frame_type == KEY_FRAME) ||
cm->refresh_golden_frame ||
cm->refresh_alt_ref_frame ) ) )
{
- // General over and under shoot tests
+ /* General over and under shoot tests */
if ( ((cpi->projected_frame_size > high_limit) && (q < maxq)) ||
((cpi->projected_frame_size < low_limit) && (q > minq)) )
{
force_recode = 1;
}
- // Special Constrained quality tests
+ /* Special Constrained quality tests */
else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
{
- // Undershoot and below auto cq level
+ /* Undershoot and below auto cq level */
if ( (q > cpi->cq_target_quality) &&
(cpi->projected_frame_size <
((cpi->this_frame_target * 7) >> 3)))
{
force_recode = 1;
}
- // Severe undershoot and between auto and user cq level
+ /* Severe undershoot and between auto and user cq level */
else if ( (q > cpi->oxcf.cq_level) &&
(cpi->projected_frame_size < cpi->min_frame_bandwidth) &&
(cpi->active_best_quality > cpi->oxcf.cq_level))
@@ -3020,21 +3003,28 @@ static int recode_loop_test( VP8_COMP *cpi,
return force_recode;
}
-static void update_reference_frames(VP8_COMMON *cm)
+static void update_reference_frames(VP8_COMP *cpi)
{
+ VP8_COMMON *cm = &cpi->common;
YV12_BUFFER_CONFIG *yv12_fb = cm->yv12_fb;
- // At this point the new frame has been encoded.
- // If any buffer copy / swapping is signaled it should be done here.
+ /* At this point the new frame has been encoded.
+ * If any buffer copy / swapping is signaled it should be done here.
+ */
if (cm->frame_type == KEY_FRAME)
{
- yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FLAG | VP8_ALT_FLAG ;
+ yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FRAME | VP8_ALTR_FRAME ;
- yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG;
- yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG;
+ yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME;
+ yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME;
cm->alt_fb_idx = cm->gld_fb_idx = cm->new_fb_idx;
+
+#if CONFIG_MULTI_RES_ENCODING
+ cpi->current_ref_frames[GOLDEN_FRAME] = cm->current_video_frame;
+ cpi->current_ref_frames[ALTREF_FRAME] = cm->current_video_frame;
+#endif
}
else /* For non key frames */
{
@@ -3042,9 +3032,13 @@ static void update_reference_frames(VP8_COMMON *cm)
{
assert(!cm->copy_buffer_to_arf);
- cm->yv12_fb[cm->new_fb_idx].flags |= VP8_ALT_FLAG;
- cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG;
+ cm->yv12_fb[cm->new_fb_idx].flags |= VP8_ALTR_FRAME;
+ cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME;
cm->alt_fb_idx = cm->new_fb_idx;
+
+#if CONFIG_MULTI_RES_ENCODING
+ cpi->current_ref_frames[ALTREF_FRAME] = cm->current_video_frame;
+#endif
}
else if (cm->copy_buffer_to_arf)
{
@@ -3054,18 +3048,28 @@ static void update_reference_frames(VP8_COMMON *cm)
{
if(cm->alt_fb_idx != cm->lst_fb_idx)
{
- yv12_fb[cm->lst_fb_idx].flags |= VP8_ALT_FLAG;
- yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG;
+ yv12_fb[cm->lst_fb_idx].flags |= VP8_ALTR_FRAME;
+ yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME;
cm->alt_fb_idx = cm->lst_fb_idx;
+
+#if CONFIG_MULTI_RES_ENCODING
+ cpi->current_ref_frames[ALTREF_FRAME] =
+ cpi->current_ref_frames[LAST_FRAME];
+#endif
}
}
else /* if (cm->copy_buffer_to_arf == 2) */
{
if(cm->alt_fb_idx != cm->gld_fb_idx)
{
- yv12_fb[cm->gld_fb_idx].flags |= VP8_ALT_FLAG;
- yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG;
+ yv12_fb[cm->gld_fb_idx].flags |= VP8_ALTR_FRAME;
+ yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME;
cm->alt_fb_idx = cm->gld_fb_idx;
+
+#if CONFIG_MULTI_RES_ENCODING
+ cpi->current_ref_frames[ALTREF_FRAME] =
+ cpi->current_ref_frames[GOLDEN_FRAME];
+#endif
}
}
}
@@ -3074,9 +3078,13 @@ static void update_reference_frames(VP8_COMMON *cm)
{
assert(!cm->copy_buffer_to_gf);
- cm->yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FLAG;
- cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG;
+ cm->yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FRAME;
+ cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME;
cm->gld_fb_idx = cm->new_fb_idx;
+
+#if CONFIG_MULTI_RES_ENCODING
+ cpi->current_ref_frames[GOLDEN_FRAME] = cm->current_video_frame;
+#endif
}
else if (cm->copy_buffer_to_gf)
{
@@ -3086,18 +3094,28 @@ static void update_reference_frames(VP8_COMMON *cm)
{
if(cm->gld_fb_idx != cm->lst_fb_idx)
{
- yv12_fb[cm->lst_fb_idx].flags |= VP8_GOLD_FLAG;
- yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG;
+ yv12_fb[cm->lst_fb_idx].flags |= VP8_GOLD_FRAME;
+ yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME;
cm->gld_fb_idx = cm->lst_fb_idx;
+
+#if CONFIG_MULTI_RES_ENCODING
+ cpi->current_ref_frames[GOLDEN_FRAME] =
+ cpi->current_ref_frames[LAST_FRAME];
+#endif
}
}
else /* if (cm->copy_buffer_to_gf == 2) */
{
if(cm->alt_fb_idx != cm->gld_fb_idx)
{
- yv12_fb[cm->alt_fb_idx].flags |= VP8_GOLD_FLAG;
- yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG;
+ yv12_fb[cm->alt_fb_idx].flags |= VP8_GOLD_FRAME;
+ yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME;
cm->gld_fb_idx = cm->alt_fb_idx;
+
+#if CONFIG_MULTI_RES_ENCODING
+ cpi->current_ref_frames[GOLDEN_FRAME] =
+ cpi->current_ref_frames[ALTREF_FRAME];
+#endif
}
}
}
@@ -3105,14 +3123,71 @@ static void update_reference_frames(VP8_COMMON *cm)
if (cm->refresh_last_frame)
{
- cm->yv12_fb[cm->new_fb_idx].flags |= VP8_LAST_FLAG;
- cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP8_LAST_FLAG;
+ cm->yv12_fb[cm->new_fb_idx].flags |= VP8_LAST_FRAME;
+ cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP8_LAST_FRAME;
cm->lst_fb_idx = cm->new_fb_idx;
+
+#if CONFIG_MULTI_RES_ENCODING
+ cpi->current_ref_frames[LAST_FRAME] = cm->current_video_frame;
+#endif
}
+
+#if CONFIG_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity)
+ {
+ /* we shouldn't have to keep multiple copies as we know in advance which
+ * buffer we should start - for now to get something up and running
+ * I've chosen to copy the buffers
+ */
+ if (cm->frame_type == KEY_FRAME)
+ {
+ int i;
+ vp8_yv12_copy_frame(
+ cpi->Source,
+ &cpi->denoiser.yv12_running_avg[LAST_FRAME]);
+
+ vp8_yv12_extend_frame_borders(
+ &cpi->denoiser.yv12_running_avg[LAST_FRAME]);
+
+ for (i = 2; i < MAX_REF_FRAMES - 1; i++)
+ vp8_yv12_copy_frame(
+ &cpi->denoiser.yv12_running_avg[LAST_FRAME],
+ &cpi->denoiser.yv12_running_avg[i]);
+ }
+ else /* For non key frames */
+ {
+ vp8_yv12_extend_frame_borders(
+ &cpi->denoiser.yv12_running_avg[INTRA_FRAME]);
+
+ if (cm->refresh_alt_ref_frame || cm->copy_buffer_to_arf)
+ {
+ vp8_yv12_copy_frame(
+ &cpi->denoiser.yv12_running_avg[INTRA_FRAME],
+ &cpi->denoiser.yv12_running_avg[ALTREF_FRAME]);
+ }
+ if (cm->refresh_golden_frame || cm->copy_buffer_to_gf)
+ {
+ vp8_yv12_copy_frame(
+ &cpi->denoiser.yv12_running_avg[INTRA_FRAME],
+ &cpi->denoiser.yv12_running_avg[GOLDEN_FRAME]);
+ }
+ if(cm->refresh_last_frame)
+ {
+ vp8_yv12_copy_frame(
+ &cpi->denoiser.yv12_running_avg[INTRA_FRAME],
+ &cpi->denoiser.yv12_running_avg[LAST_FRAME]);
+ }
+ }
+
+ }
+#endif
+
}
void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
{
+ const FRAME_TYPE frame_type = cm->frame_type;
+
if (cm->no_lpf)
{
cm->filter_level = 0;
@@ -3130,6 +3205,11 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
else
vp8cx_pick_filter_level(cpi->Source, cpi);
+ if (cm->filter_level > 0)
+ {
+ vp8cx_set_alt_lf_level(cpi, cm->filter_level);
+ }
+
vpx_usec_timer_mark(&timer);
cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
}
@@ -3141,17 +3221,11 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
if (cm->filter_level > 0)
{
- vp8cx_set_alt_lf_level(cpi, cm->filter_level);
- vp8_loop_filter_frame(cm, &cpi->mb.e_mbd);
+ vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, frame_type);
}
vp8_yv12_extend_frame_borders(cm->frame_to_show);
-#if CONFIG_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity)
- {
- vp8_yv12_extend_frame_borders(&cpi->denoiser.yv12_running_avg);
- }
-#endif
+
}
static void encode_frame_to_data_rate
@@ -3184,13 +3258,14 @@ static void encode_frame_to_data_rate
int undershoot_seen = 0;
#endif
- int drop_mark = cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100;
+ int drop_mark = (int)(cpi->oxcf.drop_frames_water_mark *
+ cpi->oxcf.optimal_buffer_level / 100);
int drop_mark75 = drop_mark * 2 / 3;
int drop_mark50 = drop_mark / 4;
int drop_mark25 = drop_mark / 8;
- // Clear down mmx registers to allow floating point in what follows
+ /* Clear down mmx registers to allow floating point in what follows */
vp8_clear_system_state();
#if CONFIG_MULTITHREAD
@@ -3202,108 +3277,125 @@ static void encode_frame_to_data_rate
}
#endif
- // Test code for segmentation of gf/arf (0,0)
- //segmentation_test_function( cpi);
-
if(cpi->force_next_frame_intra)
{
cm->frame_type = KEY_FRAME; /* delayed intra frame */
cpi->force_next_frame_intra = 0;
}
- // For an alt ref frame in 2 pass we skip the call to the second pass function that sets the target bandwidth
+ /* For an alt ref frame in 2 pass we skip the call to the second pass
+ * function that sets the target bandwidth
+ */
#if !(CONFIG_REALTIME_ONLY)
if (cpi->pass == 2)
{
if (cpi->common.refresh_alt_ref_frame)
{
- cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame
- cpi->target_bandwidth = cpi->twopass.gf_bits * cpi->output_frame_rate; // per second target bitrate
+ /* Per frame bit target for the alt ref frame */
+ cpi->per_frame_bandwidth = cpi->twopass.gf_bits;
+ /* per second target bitrate */
+ cpi->target_bandwidth = (int)(cpi->twopass.gf_bits *
+ cpi->output_frame_rate);
}
}
else
#endif
cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_frame_rate);
- // Default turn off buffer to buffer copying
+ /* Default turn off buffer to buffer copying */
cm->copy_buffer_to_gf = 0;
cm->copy_buffer_to_arf = 0;
- // Clear zbin over-quant value and mode boost values.
- cpi->zbin_over_quant = 0;
- cpi->zbin_mode_boost = 0;
+ /* Clear zbin over-quant value and mode boost values. */
+ cpi->mb.zbin_over_quant = 0;
+ cpi->mb.zbin_mode_boost = 0;
- // Enable or disable mode based tweaking of the zbin
- // For 2 Pass Only used where GF/ARF prediction quality
- // is above a threshold
- cpi->zbin_mode_boost_enabled = 1;
+ /* Enable or disable mode based tweaking of the zbin
+ * For 2 Pass Only used where GF/ARF prediction quality
+ * is above a threshold
+ */
+ cpi->mb.zbin_mode_boost_enabled = 1;
if (cpi->pass == 2)
{
if ( cpi->gfu_boost <= 400 )
{
- cpi->zbin_mode_boost_enabled = 0;
+ cpi->mb.zbin_mode_boost_enabled = 0;
}
}
- // Current default encoder behaviour for the altref sign bias
+ /* Current default encoder behaviour for the altref sign bias */
if (cpi->source_alt_ref_active)
cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1;
else
cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 0;
- // Check to see if a key frame is signalled
- // For two pass with auto key frame enabled cm->frame_type may already be set, but not for one pass.
+ /* Check to see if a key frame is signalled
+ * For two pass with auto key frame enabled cm->frame_type may already
+ * be set, but not for one pass.
+ */
if ((cm->current_video_frame == 0) ||
(cm->frame_flags & FRAMEFLAGS_KEY) ||
(cpi->oxcf.auto_key && (cpi->frames_since_key % cpi->key_frame_frequency == 0)))
{
- // Key frame from VFW/auto-keyframe/first frame
+ /* Key frame from VFW/auto-keyframe/first frame */
cm->frame_type = KEY_FRAME;
}
- // Set default state for segment and mode based loop filter update flags
- cpi->mb.e_mbd.update_mb_segmentation_map = 0;
- cpi->mb.e_mbd.update_mb_segmentation_data = 0;
- cpi->mb.e_mbd.mode_ref_lf_delta_update = 0;
+#if CONFIG_MULTI_RES_ENCODING
+ /* In multi-resolution encoding, frame_type is decided by lowest-resolution
+ * encoder. Same frame_type is adopted while encoding at other resolution.
+ */
+ if (cpi->oxcf.mr_encoder_id)
+ {
+ LOWER_RES_FRAME_INFO* low_res_frame_info
+ = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info;
+
+ cm->frame_type = low_res_frame_info->frame_type;
- // Set various flags etc to special state if it is a key frame
+ if(cm->frame_type != KEY_FRAME)
+ {
+ cpi->mr_low_res_mv_avail = 1;
+ cpi->mr_low_res_mv_avail &= !(low_res_frame_info->is_frame_dropped);
+
+ if (cpi->ref_frame_flags & VP8_LAST_FRAME)
+ cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[LAST_FRAME]
+ == low_res_frame_info->low_res_ref_frames[LAST_FRAME]);
+
+ if (cpi->ref_frame_flags & VP8_GOLD_FRAME)
+ cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[GOLDEN_FRAME]
+ == low_res_frame_info->low_res_ref_frames[GOLDEN_FRAME]);
+
+ if (cpi->ref_frame_flags & VP8_ALTR_FRAME)
+ cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[ALTREF_FRAME]
+ == low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]);
+ }
+ }
+#endif
+
+ /* Set various flags etc to special state if it is a key frame */
if (cm->frame_type == KEY_FRAME)
{
int i;
- // Reset the loop filter deltas and segmentation map
+ // Set the loop filter deltas and segmentation map update
setup_features(cpi);
- // If segmentation is enabled force a map update for key frames
- if (cpi->mb.e_mbd.segmentation_enabled)
- {
- cpi->mb.e_mbd.update_mb_segmentation_map = 1;
- cpi->mb.e_mbd.update_mb_segmentation_data = 1;
- }
-
- // The alternate reference frame cannot be active for a key frame
+ /* The alternate reference frame cannot be active for a key frame */
cpi->source_alt_ref_active = 0;
- // Reset the RD threshold multipliers to default of * 1 (128)
+ /* Reset the RD threshold multipliers to default of * 1 (128) */
for (i = 0; i < MAX_MODES; i++)
{
- cpi->rd_thresh_mult[i] = 128;
+ cpi->mb.rd_thresh_mult[i] = 128;
}
}
- // Test code for segmentation
- //if ( (cm->frame_type == KEY_FRAME) || ((cm->current_video_frame % 2) == 0))
- //if ( (cm->current_video_frame % 2) == 0 )
- // enable_segmentation(cpi);
- //else
- // disable_segmentation(cpi);
-
#if 0
- // Experimental code for lagged compress and one pass
- // Initialise one_pass GF frames stats
- // Update stats used for GF selection
- //if ( cpi->pass == 0 )
+ /* Experimental code for lagged compress and one pass
+ * Initialise one_pass GF frames stats
+ * Update stats used for GF selection
+ */
{
cpi->one_pass_frame_index = cm->current_video_frame % MAX_LAG_BUFFERS;
@@ -3323,8 +3415,9 @@ static void encode_frame_to_data_rate
if (cpi->drop_frames_allowed)
{
- // The reset to decimation 0 is only done here for one pass.
- // Once it is set two pass leaves decimation on till the next kf.
+ /* The reset to decimation 0 is only done here for one pass.
+ * Once it is set two pass leaves decimation on till the next kf.
+ */
if ((cpi->buffer_level > drop_mark) && (cpi->decimation_factor > 0))
cpi->decimation_factor --;
@@ -3343,14 +3436,17 @@ static void encode_frame_to_data_rate
{
cpi->decimation_factor = 1;
}
- //vpx_log("Encoder: Decimation Factor: %d \n",cpi->decimation_factor);
}
- // The following decimates the frame rate according to a regular pattern (i.e. to 1/2 or 2/3 frame rate)
- // This can be used to help prevent buffer under-run in CBR mode. Alternatively it might be desirable in
- // some situations to drop frame rate but throw more bits at each frame.
- //
- // Note that dropping a key frame can be problematic if spatial resampling is also active
+ /* The following decimates the frame rate according to a regular
+ * pattern (i.e. to 1/2 or 2/3 frame rate) This can be used to help
+ * prevent buffer under-run in CBR mode. Alternatively it might be
+ * desirable in some situations to drop frame rate but throw more bits
+ * at each frame.
+ *
+ * Note that dropping a key frame can be problematic if spatial
+ * resampling is also active
+ */
if (cpi->decimation_factor > 0)
{
switch (cpi->decimation_factor)
@@ -3366,8 +3462,10 @@ static void encode_frame_to_data_rate
break;
}
- // Note that we should not throw out a key frame (especially when spatial resampling is enabled).
- if ((cm->frame_type == KEY_FRAME)) // && cpi->oxcf.allow_spatial_resampling )
+ /* Note that we should not throw out a key frame (especially when
+ * spatial resampling is enabled).
+ */
+ if ((cm->frame_type == KEY_FRAME))
{
cpi->decimation_count = cpi->decimation_factor;
}
@@ -3379,6 +3477,10 @@ static void encode_frame_to_data_rate
if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
+#if CONFIG_MULTI_RES_ENCODING
+ vp8_store_drop_frame_info(cpi);
+#endif
+
cm->current_video_frame++;
cpi->frames_since_key++;
@@ -3392,7 +3494,9 @@ static void encode_frame_to_data_rate
{
unsigned int i;
- // Propagate bits saved by dropping the frame to higher layers
+ /* Propagate bits saved by dropping the frame to higher
+ * layers
+ */
for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++)
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
@@ -3408,24 +3512,32 @@ static void encode_frame_to_data_rate
else
cpi->decimation_count = cpi->decimation_factor;
}
+ else
+ cpi->decimation_count = 0;
- // Decide how big to make the frame
+ /* Decide how big to make the frame */
if (!vp8_pick_frame_size(cpi))
{
+ /*TODO: 2 drop_frame and return code could be put together. */
+#if CONFIG_MULTI_RES_ENCODING
+ vp8_store_drop_frame_info(cpi);
+#endif
cm->current_video_frame++;
cpi->frames_since_key++;
return;
}
- // Reduce active_worst_allowed_q for CBR if our buffer is getting too full.
- // This has a knock on effect on active best quality as well.
- // For CBR if the buffer reaches its maximum level then we can no longer
- // save up bits for later frames so we might as well use them up
- // on the current frame.
+ /* Reduce active_worst_allowed_q for CBR if our buffer is getting too full.
+ * This has a knock on effect on active best quality as well.
+ * For CBR if the buffer reaches its maximum level then we can no longer
+ * save up bits for later frames so we might as well use them up
+ * on the current frame.
+ */
if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
(cpi->buffer_level >= cpi->oxcf.optimal_buffer_level) && cpi->buffered_mode)
{
- int Adjustment = cpi->active_worst_quality / 4; // Max adjustment is 1/4
+ /* Max adjustment is 1/4 */
+ int Adjustment = cpi->active_worst_quality / 4;
if (Adjustment)
{
@@ -3433,10 +3545,16 @@ static void encode_frame_to_data_rate
if (cpi->buffer_level < cpi->oxcf.maximum_buffer_size)
{
- buff_lvl_step = (cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level) / Adjustment;
+ buff_lvl_step = (int)
+ ((cpi->oxcf.maximum_buffer_size -
+ cpi->oxcf.optimal_buffer_level) /
+ Adjustment);
if (buff_lvl_step)
- Adjustment = (cpi->buffer_level - cpi->oxcf.optimal_buffer_level) / buff_lvl_step;
+ Adjustment = (int)
+ ((cpi->buffer_level -
+ cpi->oxcf.optimal_buffer_level) /
+ buff_lvl_step);
else
Adjustment = 0;
}
@@ -3448,8 +3566,9 @@ static void encode_frame_to_data_rate
}
}
- // Set an active best quality and if necessary active worst quality
- // There is some odd behavior for one pass here that needs attention.
+ /* Set an active best quality and if necessary active worst quality
+ * There is some odd behavior for one pass here that needs attention.
+ */
if ( (cpi->pass == 2) || (cpi->ni_frames > 150))
{
vp8_clear_system_state();
@@ -3465,9 +3584,10 @@ static void encode_frame_to_data_rate
else
cpi->active_best_quality = kf_high_motion_minq[Q];
- // Special case for key frames forced because we have reached
- // the maximum key frame interval. Here force the Q to a range
- // based on the ambient Q to reduce the risk of popping
+ /* Special case for key frames forced because we have reached
+ * the maximum key frame interval. Here force the Q to a range
+ * based on the ambient Q to reduce the risk of popping
+ */
if ( cpi->this_key_frame_forced )
{
if ( cpi->active_best_quality > cpi->avg_frame_qindex * 7/8)
@@ -3476,7 +3596,7 @@ static void encode_frame_to_data_rate
cpi->active_best_quality = cpi->avg_frame_qindex >> 2;
}
}
- // One pass more conservative
+ /* One pass more conservative */
else
cpi->active_best_quality = kf_high_motion_minq[Q];
}
@@ -3484,16 +3604,17 @@ static void encode_frame_to_data_rate
else if (cpi->oxcf.number_of_layers==1 &&
(cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame))
{
- // Use the lower of cpi->active_worst_quality and recent
- // average Q as basis for GF/ARF Q limit unless last frame was
- // a key frame.
+ /* Use the lower of cpi->active_worst_quality and recent
+ * average Q as basis for GF/ARF Q limit unless last frame was
+ * a key frame.
+ */
if ( (cpi->frames_since_key > 1) &&
(cpi->avg_frame_qindex < cpi->active_worst_quality) )
{
Q = cpi->avg_frame_qindex;
}
- // For constrained quality dont allow Q less than the cq level
+ /* For constrained quality dont allow Q less than the cq level */
if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
(Q < cpi->cq_target_quality) )
{
@@ -3509,14 +3630,14 @@ static void encode_frame_to_data_rate
else
cpi->active_best_quality = gf_mid_motion_minq[Q];
- // Constrained quality use slightly lower active best.
+ /* Constrained quality use slightly lower active best. */
if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY )
{
cpi->active_best_quality =
cpi->active_best_quality * 15/16;
}
}
- // One pass more conservative
+ /* One pass more conservative */
else
cpi->active_best_quality = gf_high_motion_minq[Q];
}
@@ -3524,14 +3645,16 @@ static void encode_frame_to_data_rate
{
cpi->active_best_quality = inter_minq[Q];
- // For the constant/constrained quality mode we dont want
- // q to fall below the cq level.
+ /* For the constant/constrained quality mode we dont want
+ * q to fall below the cq level.
+ */
if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
(cpi->active_best_quality < cpi->cq_target_quality) )
{
- // If we are strongly undershooting the target rate in the last
- // frames then use the user passed in cq value not the auto
- // cq value.
+ /* If we are strongly undershooting the target rate in the last
+ * frames then use the user passed in cq value not the auto
+ * cq value.
+ */
if ( cpi->rolling_actual_bits < cpi->min_frame_bandwidth )
cpi->active_best_quality = cpi->oxcf.cq_level;
else
@@ -3539,26 +3662,33 @@ static void encode_frame_to_data_rate
}
}
- // If CBR and the buffer is as full then it is reasonable to allow
- // higher quality on the frames to prevent bits just going to waste.
+ /* If CBR and the buffer is as full then it is reasonable to allow
+ * higher quality on the frames to prevent bits just going to waste.
+ */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
- // Note that the use of >= here elliminates the risk of a devide
- // by 0 error in the else if clause
+ /* Note that the use of >= here elliminates the risk of a devide
+ * by 0 error in the else if clause
+ */
if (cpi->buffer_level >= cpi->oxcf.maximum_buffer_size)
cpi->active_best_quality = cpi->best_quality;
else if (cpi->buffer_level > cpi->oxcf.optimal_buffer_level)
{
- int Fraction = ((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) * 128) / (cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level);
- int min_qadjustment = ((cpi->active_best_quality - cpi->best_quality) * Fraction) / 128;
+ int Fraction = (int)
+ (((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) * 128)
+ / (cpi->oxcf.maximum_buffer_size -
+ cpi->oxcf.optimal_buffer_level));
+ int min_qadjustment = ((cpi->active_best_quality -
+ cpi->best_quality) * Fraction) / 128;
cpi->active_best_quality -= min_qadjustment;
}
}
}
- // Make sure constrained quality mode limits are adhered to for the first
- // few frames of one pass encodes
+ /* Make sure constrained quality mode limits are adhered to for the first
+ * few frames of one pass encodes
+ */
else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
{
if ( (cm->frame_type == KEY_FRAME) ||
@@ -3572,7 +3702,7 @@ static void encode_frame_to_data_rate
}
}
- // Clip the active best and worst quality values to limits
+ /* Clip the active best and worst quality values to limits */
if (cpi->active_worst_quality > cpi->worst_quality)
cpi->active_worst_quality = cpi->worst_quality;
@@ -3582,14 +3712,14 @@ static void encode_frame_to_data_rate
if ( cpi->active_worst_quality < cpi->active_best_quality )
cpi->active_worst_quality = cpi->active_best_quality;
- // Determine initial Q to try
+ /* Determine initial Q to try */
Q = vp8_regulate_q(cpi, cpi->this_frame_target);
#if !(CONFIG_REALTIME_ONLY)
- // Set highest allowed value for Zbin over quant
+ /* Set highest allowed value for Zbin over quant */
if (cm->frame_type == KEY_FRAME)
- zbin_oq_high = 0; //ZBIN_OQ_MAX/16
+ zbin_oq_high = 0;
else if ((cpi->oxcf.number_of_layers == 1) && ((cm->refresh_alt_ref_frame ||
(cm->refresh_golden_frame && !cpi->source_alt_ref_active))))
{
@@ -3599,15 +3729,21 @@ static void encode_frame_to_data_rate
zbin_oq_high = ZBIN_OQ_MAX;
#endif
- // Setup background Q adjustment for error resilient mode.
- // For multi-layer encodes only enable this for the base layer.
- if (cpi->cyclic_refresh_mode_enabled && (cpi->current_layer==0))
+ /* Setup background Q adjustment for error resilient mode.
+ * For multi-layer encodes only enable this for the base layer.
+ */
+ if (cpi->cyclic_refresh_mode_enabled)
+ {
+ if (cpi->current_layer==0)
cyclic_background_refresh(cpi, Q, 0);
+ else
+ disable_segmentation(cpi);
+ }
vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit);
#if !(CONFIG_REALTIME_ONLY)
- // Limit Q range for the adaptive loop.
+ /* Limit Q range for the adaptive loop. */
bottom_index = cpi->active_best_quality;
top_index = cpi->active_worst_quality;
q_low = cpi->active_best_quality;
@@ -3652,11 +3788,11 @@ static void encode_frame_to_data_rate
if (cm->frame_type == KEY_FRAME)
{
- vp8_de_noise(cpi->Source, cpi->Source, l , 1, 0);
+ vp8_de_noise(cm, cpi->Source, cpi->Source, l , 1, 0);
}
else
{
- vp8_de_noise(cpi->Source, cpi->Source, l , 1, 0);
+ vp8_de_noise(cm, cpi->Source, cpi->Source, l , 1, 0);
src = cpi->Source->y_buffer;
@@ -3675,16 +3811,11 @@ static void encode_frame_to_data_rate
do
{
- vp8_clear_system_state(); //__asm emms;
-
- /*
- if(cpi->is_src_frame_alt_ref)
- Q = 127;
- */
+ vp8_clear_system_state();
vp8_set_quantizer(cpi, Q);
- // setup skip prob for costing in mode/mv decision
+ /* setup skip prob for costing in mode/mv decision */
if (cpi->common.mb_no_coeff_skip)
{
cpi->prob_skip_false = cpi->base_skip_false_prob[Q];
@@ -3728,7 +3859,9 @@ static void encode_frame_to_data_rate
*/
}
- //as this is for cost estimate, let's make sure it does not go extreme eitehr way
+ /* as this is for cost estimate, let's make sure it does not
+ * go extreme eitehr way
+ */
if (cpi->prob_skip_false < 5)
cpi->prob_skip_false = 5;
@@ -3754,7 +3887,22 @@ static void encode_frame_to_data_rate
if (cm->frame_type == KEY_FRAME)
{
- resize_key_frame(cpi);
+ if(resize_key_frame(cpi))
+ {
+ /* If the frame size has changed, need to reset Q, quantizer,
+ * and background refresh.
+ */
+ Q = vp8_regulate_q(cpi, cpi->this_frame_target);
+ if (cpi->cyclic_refresh_mode_enabled)
+ {
+ if (cpi->current_layer==0)
+ cyclic_background_refresh(cpi, Q, 0);
+ else
+ disable_segmentation(cpi);
+ }
+ vp8_set_quantizer(cpi, Q);
+ }
+
vp8_setup_key_frame(cpi);
}
@@ -3773,7 +3921,7 @@ static void encode_frame_to_data_rate
if (cm->refresh_entropy_probs == 0)
{
- // save a copy for later refresh
+ /* save a copy for later refresh */
vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc));
}
@@ -3781,61 +3929,52 @@ static void encode_frame_to_data_rate
vp8_update_coef_probs(cpi);
- // transform / motion compensation build reconstruction frame
- // +pack coef partitions
+ /* transform / motion compensation build reconstruction frame
+ * +pack coef partitions
+ */
vp8_encode_frame(cpi);
/* cpi->projected_frame_size is not needed for RT mode */
}
#else
- // transform / motion compensation build reconstruction frame
+ /* transform / motion compensation build reconstruction frame */
vp8_encode_frame(cpi);
cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi);
cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0;
#endif
- vp8_clear_system_state(); //__asm emms;
+ vp8_clear_system_state();
- // Test to see if the stats generated for this frame indicate that we should have coded a key frame
- // (assuming that we didn't)!
- if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME)
- {
- int key_frame_decision = decide_key_frame(cpi);
+ /* Test to see if the stats generated for this frame indicate that
+ * we should have coded a key frame (assuming that we didn't)!
+ */
- if (cpi->compressor_speed == 2)
- {
- /* we don't do re-encoding in realtime mode
- * if key frame is decided then we force it on next frame */
- cpi->force_next_frame_intra = key_frame_decision;
- }
+ if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME
+ && cpi->compressor_speed != 2)
+ {
#if !(CONFIG_REALTIME_ONLY)
- else if (key_frame_decision)
+ if (decide_key_frame(cpi))
{
- // Reset all our sizing numbers and recode
+ /* Reset all our sizing numbers and recode */
cm->frame_type = KEY_FRAME;
vp8_pick_frame_size(cpi);
- // Clear the Alt reference frame active flag when we have a key frame
+ /* Clear the Alt reference frame active flag when we have
+ * a key frame
+ */
cpi->source_alt_ref_active = 0;
- // Reset the loop filter deltas and segmentation map
+ // Set the loop filter deltas and segmentation map update
setup_features(cpi);
- // If segmentation is enabled force a map update for key frames
- if (cpi->mb.e_mbd.segmentation_enabled)
- {
- cpi->mb.e_mbd.update_mb_segmentation_map = 1;
- cpi->mb.e_mbd.update_mb_segmentation_data = 1;
- }
-
vp8_restore_coding_context(cpi);
Q = vp8_regulate_q(cpi, cpi->this_frame_target);
vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit);
- // Limit Q range for the adaptive loop.
+ /* Limit Q range for the adaptive loop. */
bottom_index = cpi->active_best_quality;
top_index = cpi->active_worst_quality;
q_low = cpi->active_best_quality;
@@ -3854,7 +3993,7 @@ static void encode_frame_to_data_rate
if (frame_over_shoot_limit == 0)
frame_over_shoot_limit = 1;
- // Are we are overshooting and up against the limit of active max Q.
+ /* Are we are overshooting and up against the limit of active max Q. */
if (((cpi->pass != 2) || (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) &&
(Q == cpi->active_worst_quality) &&
(cpi->active_worst_quality < cpi->worst_quality) &&
@@ -3862,50 +4001,52 @@ static void encode_frame_to_data_rate
{
int over_size_percent = ((cpi->projected_frame_size - frame_over_shoot_limit) * 100) / frame_over_shoot_limit;
- // If so is there any scope for relaxing it
+ /* If so is there any scope for relaxing it */
while ((cpi->active_worst_quality < cpi->worst_quality) && (over_size_percent > 0))
{
cpi->active_worst_quality++;
-
- over_size_percent = (int)(over_size_percent * 0.96); // Assume 1 qstep = about 4% on frame size.
+ /* Assume 1 qstep = about 4% on frame size. */
+ over_size_percent = (int)(over_size_percent * 0.96);
}
#if !(CONFIG_REALTIME_ONLY)
top_index = cpi->active_worst_quality;
#endif
- // If we have updated the active max Q do not call vp8_update_rate_correction_factors() this loop.
+ /* If we have updated the active max Q do not call
+ * vp8_update_rate_correction_factors() this loop.
+ */
active_worst_qchanged = 1;
}
else
active_worst_qchanged = 0;
#if !(CONFIG_REALTIME_ONLY)
- // Special case handling for forced key frames
+ /* Special case handling for forced key frames */
if ( (cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced )
{
int last_q = Q;
int kf_err = vp8_calc_ss_err(cpi->Source,
&cm->yv12_fb[cm->new_fb_idx]);
- // The key frame is not good enough
+ /* The key frame is not good enough */
if ( kf_err > ((cpi->ambient_err * 7) >> 3) )
{
- // Lower q_high
+ /* Lower q_high */
q_high = (Q > q_low) ? (Q - 1) : q_low;
- // Adjust Q
+ /* Adjust Q */
Q = (q_high + q_low) >> 1;
}
- // The key frame is much better than the previous frame
+ /* The key frame is much better than the previous frame */
else if ( kf_err < (cpi->ambient_err >> 1) )
{
- // Raise q_low
+ /* Raise q_low */
q_low = (Q < q_high) ? (Q + 1) : q_high;
- // Adjust Q
+ /* Adjust Q */
Q = (q_high + q_low + 1) >> 1;
}
- // Clamp Q to upper and lower limits:
+ /* Clamp Q to upper and lower limits: */
if (Q > q_high)
Q = q_high;
else if (Q < q_low)
@@ -3914,7 +4055,9 @@ static void encode_frame_to_data_rate
Loop = Q != last_q;
}
- // Is the projected frame size out of range and are we allowed to attempt to recode.
+ /* Is the projected frame size out of range and are we allowed
+ * to attempt to recode.
+ */
else if ( recode_loop_test( cpi,
frame_over_shoot_limit, frame_under_shoot_limit,
Q, top_index, bottom_index ) )
@@ -3922,45 +4065,57 @@ static void encode_frame_to_data_rate
int last_q = Q;
int Retries = 0;
- // Frame size out of permitted range:
- // Update correction factor & compute new Q to try...
+ /* Frame size out of permitted range. Update correction factor
+ * & compute new Q to try...
+ */
- // Frame is too large
+ /* Frame is too large */
if (cpi->projected_frame_size > cpi->this_frame_target)
{
- //if ( cpi->zbin_over_quant == 0 )
- q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value
+ /* Raise Qlow as to at least the current value */
+ q_low = (Q < q_high) ? (Q + 1) : q_high;
- if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low
- zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;
+ /* If we are using over quant do the same for zbin_oq_low */
+ if (cpi->mb.zbin_over_quant > 0)
+ zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ?
+ (cpi->mb.zbin_over_quant + 1) : zbin_oq_high;
- //if ( undershoot_seen || (Q == MAXQ) )
if (undershoot_seen)
{
- // Update rate_correction_factor unless cpi->active_worst_quality has changed.
+ /* Update rate_correction_factor unless
+ * cpi->active_worst_quality has changed.
+ */
if (!active_worst_qchanged)
vp8_update_rate_correction_factors(cpi, 1);
Q = (q_high + q_low + 1) / 2;
- // Adjust cpi->zbin_over_quant (only allowed when Q is max)
+ /* Adjust cpi->zbin_over_quant (only allowed when Q
+ * is max)
+ */
if (Q < MAXQ)
- cpi->zbin_over_quant = 0;
+ cpi->mb.zbin_over_quant = 0;
else
{
- zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;
- cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2;
+ zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ?
+ (cpi->mb.zbin_over_quant + 1) : zbin_oq_high;
+ cpi->mb.zbin_over_quant =
+ (zbin_oq_high + zbin_oq_low) / 2;
}
}
else
{
- // Update rate_correction_factor unless cpi->active_worst_quality has changed.
+ /* Update rate_correction_factor unless
+ * cpi->active_worst_quality has changed.
+ */
if (!active_worst_qchanged)
vp8_update_rate_correction_factors(cpi, 0);
Q = vp8_regulate_q(cpi, cpi->this_frame_target);
- while (((Q < q_low) || (cpi->zbin_over_quant < zbin_oq_low)) && (Retries < 10))
+ while (((Q < q_low) ||
+ (cpi->mb.zbin_over_quant < zbin_oq_low)) &&
+ (Retries < 10))
{
vp8_update_rate_correction_factors(cpi, 0);
Q = vp8_regulate_q(cpi, cpi->this_frame_target);
@@ -3970,47 +4125,60 @@ static void encode_frame_to_data_rate
overshoot_seen = 1;
}
- // Frame is too small
+ /* Frame is too small */
else
{
- if (cpi->zbin_over_quant == 0)
- q_high = (Q > q_low) ? (Q - 1) : q_low; // Lower q_high if not using over quant
- else // else lower zbin_oq_high
- zbin_oq_high = (cpi->zbin_over_quant > zbin_oq_low) ? (cpi->zbin_over_quant - 1) : zbin_oq_low;
+ if (cpi->mb.zbin_over_quant == 0)
+ /* Lower q_high if not using over quant */
+ q_high = (Q > q_low) ? (Q - 1) : q_low;
+ else
+ /* else lower zbin_oq_high */
+ zbin_oq_high = (cpi->mb.zbin_over_quant > zbin_oq_low) ?
+ (cpi->mb.zbin_over_quant - 1) : zbin_oq_low;
if (overshoot_seen)
{
- // Update rate_correction_factor unless cpi->active_worst_quality has changed.
+ /* Update rate_correction_factor unless
+ * cpi->active_worst_quality has changed.
+ */
if (!active_worst_qchanged)
vp8_update_rate_correction_factors(cpi, 1);
Q = (q_high + q_low) / 2;
- // Adjust cpi->zbin_over_quant (only allowed when Q is max)
+ /* Adjust cpi->zbin_over_quant (only allowed when Q
+ * is max)
+ */
if (Q < MAXQ)
- cpi->zbin_over_quant = 0;
+ cpi->mb.zbin_over_quant = 0;
else
- cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2;
+ cpi->mb.zbin_over_quant =
+ (zbin_oq_high + zbin_oq_low) / 2;
}
else
{
- // Update rate_correction_factor unless cpi->active_worst_quality has changed.
+ /* Update rate_correction_factor unless
+ * cpi->active_worst_quality has changed.
+ */
if (!active_worst_qchanged)
vp8_update_rate_correction_factors(cpi, 0);
Q = vp8_regulate_q(cpi, cpi->this_frame_target);
- // Special case reset for qlow for constrained quality.
- // This should only trigger where there is very substantial
- // undershoot on a frame and the auto cq level is above
- // the user passsed in value.
+ /* Special case reset for qlow for constrained quality.
+ * This should only trigger where there is very substantial
+ * undershoot on a frame and the auto cq level is above
+ * the user passsed in value.
+ */
if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
(Q < q_low) )
{
q_low = Q;
}
- while (((Q > q_high) || (cpi->zbin_over_quant > zbin_oq_high)) && (Retries < 10))
+ while (((Q > q_high) ||
+ (cpi->mb.zbin_over_quant > zbin_oq_high)) &&
+ (Retries < 10))
{
vp8_update_rate_correction_factors(cpi, 0);
Q = vp8_regulate_q(cpi, cpi->this_frame_target);
@@ -4021,14 +4189,16 @@ static void encode_frame_to_data_rate
undershoot_seen = 1;
}
- // Clamp Q to upper and lower limits:
+ /* Clamp Q to upper and lower limits: */
if (Q > q_high)
Q = q_high;
else if (Q < q_low)
Q = q_low;
- // Clamp cpi->zbin_over_quant
- cpi->zbin_over_quant = (cpi->zbin_over_quant < zbin_oq_low) ? zbin_oq_low : (cpi->zbin_over_quant > zbin_oq_high) ? zbin_oq_high : cpi->zbin_over_quant;
+ /* Clamp cpi->zbin_over_quant */
+ cpi->mb.zbin_over_quant = (cpi->mb.zbin_over_quant < zbin_oq_low) ?
+ zbin_oq_low : (cpi->mb.zbin_over_quant > zbin_oq_high) ?
+ zbin_oq_high : cpi->mb.zbin_over_quant;
Loop = Q != last_q;
}
@@ -4051,30 +4221,20 @@ static void encode_frame_to_data_rate
while (Loop == 1);
#if 0
- // Experimental code for lagged and one pass
- // Update stats used for one pass GF selection
- {
- /*
- int frames_so_far;
- double frame_intra_error;
- double frame_coded_error;
- double frame_pcnt_inter;
- double frame_pcnt_motion;
- double frame_mvr;
- double frame_mvr_abs;
- double frame_mvc;
- double frame_mvc_abs;
- */
-
+ /* Experimental code for lagged and one pass
+ * Update stats used for one pass GF selection
+ */
+ {
cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_coded_error = (double)cpi->prediction_error;
cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_intra_error = (double)cpi->intra_error;
cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_pcnt_inter = (double)(100 - cpi->this_frame_percent_intra) / 100.0;
}
#endif
- // Special case code to reduce pulsing when key frames are forced at a
- // fixed interval. Note the reconstruction error if it is the frame before
- // the force key frame
+ /* Special case code to reduce pulsing when key frames are forced at a
+ * fixed interval. Note the reconstruction error if it is the frame before
+ * the force key frame
+ */
if ( cpi->next_key_frame_forced && (cpi->twopass.frames_to_key == 0) )
{
cpi->ambient_err = vp8_calc_ss_err(cpi->Source,
@@ -4113,13 +4273,38 @@ static void encode_frame_to_data_rate
}
}
+ /* Count last ref frame 0,0 usage on current encoded frame. */
+ {
+ int mb_row;
+ int mb_col;
+ /* Point to beginning of MODE_INFO arrays. */
+ MODE_INFO *tmp = cm->mi;
+
+ cpi->zeromv_count = 0;
+
+ if(cm->frame_type != KEY_FRAME)
+ {
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
+ {
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col ++)
+ {
+ if(tmp->mbmi.mode == ZEROMV)
+ cpi->zeromv_count++;
+ tmp++;
+ }
+ tmp++;
+ }
+ }
+ }
+
#if CONFIG_MULTI_RES_ENCODING
vp8_cal_dissimilarity(cpi);
#endif
- // Update the GF useage maps.
- // This is done after completing the compression of a frame when all
- // modes etc. are finalized but before loop filter
+ /* Update the GF useage maps.
+ * This is done after completing the compression of a frame when all
+ * modes etc. are finalized but before loop filter
+ */
if (cpi->oxcf.number_of_layers == 1)
vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
@@ -4134,9 +4319,10 @@ static void encode_frame_to_data_rate
}
#endif
- // For inter frames the current default behavior is that when
- // cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer
- // This is purely an encoder decision at present.
+ /* For inter frames the current default behavior is that when
+ * cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer
+ * This is purely an encoder decision at present.
+ */
if (!cpi->oxcf.error_resilient_mode && cm->refresh_golden_frame)
cm->copy_buffer_to_arf = 2;
else
@@ -4147,7 +4333,8 @@ static void encode_frame_to_data_rate
#if CONFIG_MULTITHREAD
if (cpi->b_multi_threaded)
{
- sem_post(&cpi->h_event_start_lpf); /* start loopfilter in separate thread */
+ /* start loopfilter in separate thread */
+ sem_post(&cpi->h_event_start_lpf);
cpi->b_lpf_running = 1;
}
else
@@ -4156,7 +4343,7 @@ static void encode_frame_to_data_rate
vp8_loopfilter_frame(cpi, cm);
}
- update_reference_frames(cm);
+ update_reference_frames(cpi);
#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
if (cpi->oxcf.error_resilient_mode)
@@ -4171,7 +4358,7 @@ static void encode_frame_to_data_rate
sem_wait(&cpi->h_event_end_lpf);
#endif
- // build the bitstream
+ /* build the bitstream */
vp8_pack_bitstream(cpi, dest, dest_end, size);
#if CONFIG_MULTITHREAD
@@ -4187,7 +4374,7 @@ static void encode_frame_to_data_rate
* needed in motion search besides loopfilter */
cm->last_frame_type = cm->frame_type;
- // Update rate control heuristics
+ /* Update rate control heuristics */
cpi->total_byte_count += (*size);
cpi->projected_frame_size = (*size) << 3;
@@ -4208,18 +4395,21 @@ static void encode_frame_to_data_rate
vp8_adjust_key_frame_context(cpi);
}
- // Keep a record of ambient average Q.
+ /* Keep a record of ambient average Q. */
if (cm->frame_type != KEY_FRAME)
cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2;
- // Keep a record from which we can calculate the average Q excluding GF updates and key frames
+ /* Keep a record from which we can calculate the average Q excluding
+ * GF updates and key frames
+ */
if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) ||
(!cm->refresh_golden_frame && !cm->refresh_alt_ref_frame)))
{
cpi->ni_frames++;
- // Calculate the average Q for normal inter frames (not key or GFU
- // frames).
+ /* Calculate the average Q for normal inter frames (not key or GFU
+ * frames).
+ */
if ( cpi->pass == 2 )
{
cpi->ni_tot_qi += Q;
@@ -4227,81 +4417,62 @@ static void encode_frame_to_data_rate
}
else
{
- // Damp value for first few frames
+ /* Damp value for first few frames */
if (cpi->ni_frames > 150 )
{
cpi->ni_tot_qi += Q;
cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames);
}
- // For one pass, early in the clip ... average the current frame Q
- // value with the worstq entered by the user as a dampening measure
+ /* For one pass, early in the clip ... average the current frame Q
+ * value with the worstq entered by the user as a dampening measure
+ */
else
{
cpi->ni_tot_qi += Q;
cpi->ni_av_qi = ((cpi->ni_tot_qi / cpi->ni_frames) + cpi->worst_quality + 1) / 2;
}
- // If the average Q is higher than what was used in the last frame
- // (after going through the recode loop to keep the frame size within range)
- // then use the last frame value - 1.
- // The -1 is designed to stop Q and hence the data rate, from progressively
- // falling away during difficult sections, but at the same time reduce the number of
- // itterations around the recode loop.
+ /* If the average Q is higher than what was used in the last
+ * frame (after going through the recode loop to keep the frame
+ * size within range) then use the last frame value - 1. The -1
+ * is designed to stop Q and hence the data rate, from
+ * progressively falling away during difficult sections, but at
+ * the same time reduce the number of itterations around the
+ * recode loop.
+ */
if (Q > cpi->ni_av_qi)
cpi->ni_av_qi = Q - 1;
}
}
-#if 0
-
- // If the frame was massively oversize and we are below optimal buffer level drop next frame
- if ((cpi->drop_frames_allowed) &&
- (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
- (cpi->buffer_level < cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100) &&
- (cpi->projected_frame_size > (4 * cpi->this_frame_target)))
- {
- cpi->drop_frame = 1;
- }
-
-#endif
-
- // Set the count for maximum consecutive dropped frames based upon the ratio of
- // this frame size to the target average per frame bandwidth.
- // (cpi->av_per_frame_bandwidth > 0) is just a sanity check to prevent / 0.
- if (cpi->drop_frames_allowed && (cpi->av_per_frame_bandwidth > 0))
- {
- cpi->max_drop_count = cpi->projected_frame_size / cpi->av_per_frame_bandwidth;
-
- if (cpi->max_drop_count > cpi->max_consec_dropped_frames)
- cpi->max_drop_count = cpi->max_consec_dropped_frames;
- }
-
- // Update the buffer level variable.
- // Non-viewable frames are a special case and are treated as pure overhead.
+ /* Update the buffer level variable. */
+ /* Non-viewable frames are a special case and are treated as pure overhead. */
if ( !cm->show_frame )
cpi->bits_off_target -= cpi->projected_frame_size;
else
cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size;
- // Clip the buffer level to the maximum specified buffer size
+ /* Clip the buffer level to the maximum specified buffer size */
if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
- // Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass.
+ /* Rolling monitors of whether we are over or underspending used to
+ * help regulate min and Max Q in two pass.
+ */
cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4;
cpi->rolling_actual_bits = ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4;
cpi->long_rolling_target_bits = ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32;
cpi->long_rolling_actual_bits = ((cpi->long_rolling_actual_bits * 31) + cpi->projected_frame_size + 16) / 32;
- // Actual bits spent
+ /* Actual bits spent */
cpi->total_actual_bits += cpi->projected_frame_size;
- // Debug stats
+ /* Debug stats */
cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size);
cpi->buffer_level = cpi->bits_off_target;
- // Propagate values to higher temporal layers
+ /* Propagate values to higher temporal layers */
if (cpi->oxcf.number_of_layers > 1)
{
unsigned int i;
@@ -4309,12 +4480,13 @@ static void encode_frame_to_data_rate
for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++)
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
- int bits_off_for_this_layer = lc->target_bandwidth / lc->frame_rate
- - cpi->projected_frame_size;
+ int bits_off_for_this_layer =
+ (int)(lc->target_bandwidth / lc->frame_rate -
+ cpi->projected_frame_size);
lc->bits_off_target += bits_off_for_this_layer;
- // Clip buffer level to maximum buffer size for the layer
+ /* Clip buffer level to maximum buffer size for the layer */
if (lc->bits_off_target > lc->maximum_buffer_size)
lc->bits_off_target = lc->maximum_buffer_size;
@@ -4324,7 +4496,9 @@ static void encode_frame_to_data_rate
}
}
- // Update bits left to the kf and gf groups to account for overshoot or undershoot on these frames
+ /* Update bits left to the kf and gf groups to account for overshoot
+ * or undershoot on these frames
+ */
if (cm->frame_type == KEY_FRAME)
{
cpi->twopass.kf_group_bits += cpi->this_frame_target - cpi->projected_frame_size;
@@ -4357,7 +4531,7 @@ static void encode_frame_to_data_rate
cpi->last_skip_false_probs[0] = cpi->prob_skip_false;
cpi->last_skip_probs_q[0] = cm->base_qindex;
- //update the baseline
+ /* update the baseline */
cpi->base_skip_false_prob[cm->base_qindex] = cpi->prob_skip_false;
}
@@ -4367,7 +4541,7 @@ static void encode_frame_to_data_rate
{
FILE *f = fopen("tmp.stt", "a");
- vp8_clear_system_state(); //__asm emms;
+ vp8_clear_system_state();
if (cpi->twopass.total_left_stats.coded_error != 0.0)
fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
@@ -4383,7 +4557,6 @@ static void encode_frame_to_data_rate
cpi->active_best_quality, cpi->active_worst_quality,
cpi->ni_av_qi, cpi->cq_target_quality,
cpi->zbin_over_quant,
- //cpi->avg_frame_qindex, cpi->zbin_over_quant,
cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
cm->frame_type, cpi->gfu_boost,
cpi->twopass.est_max_qcorrection_factor,
@@ -4406,7 +4579,6 @@ static void encode_frame_to_data_rate
cpi->active_best_quality, cpi->active_worst_quality,
cpi->ni_av_qi, cpi->cq_target_quality,
cpi->zbin_over_quant,
- //cpi->avg_frame_qindex, cpi->zbin_over_quant,
cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
cm->frame_type, cpi->gfu_boost,
cpi->twopass.est_max_qcorrection_factor,
@@ -4436,10 +4608,6 @@ static void encode_frame_to_data_rate
#endif
- // If this was a kf or Gf note the Q
- if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
- cm->last_kf_gf_q = cm->base_qindex;
-
if (cm->refresh_golden_frame == 1)
cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN;
else
@@ -4451,49 +4619,55 @@ static void encode_frame_to_data_rate
cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF;
- if (cm->refresh_last_frame & cm->refresh_golden_frame) // both refreshed
+ if (cm->refresh_last_frame & cm->refresh_golden_frame)
+ /* both refreshed */
cpi->gold_is_last = 1;
- else if (cm->refresh_last_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other
+ else if (cm->refresh_last_frame ^ cm->refresh_golden_frame)
+ /* 1 refreshed but not the other */
cpi->gold_is_last = 0;
- if (cm->refresh_last_frame & cm->refresh_alt_ref_frame) // both refreshed
+ if (cm->refresh_last_frame & cm->refresh_alt_ref_frame)
+ /* both refreshed */
cpi->alt_is_last = 1;
- else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame) // 1 refreshed but not the other
+ else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame)
+ /* 1 refreshed but not the other */
cpi->alt_is_last = 0;
- if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame) // both refreshed
+ if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame)
+ /* both refreshed */
cpi->gold_is_alt = 1;
- else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other
+ else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame)
+ /* 1 refreshed but not the other */
cpi->gold_is_alt = 0;
- cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
+ cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME;
if (cpi->gold_is_last)
- cpi->ref_frame_flags &= ~VP8_GOLD_FLAG;
+ cpi->ref_frame_flags &= ~VP8_GOLD_FRAME;
if (cpi->alt_is_last)
- cpi->ref_frame_flags &= ~VP8_ALT_FLAG;
+ cpi->ref_frame_flags &= ~VP8_ALTR_FRAME;
if (cpi->gold_is_alt)
- cpi->ref_frame_flags &= ~VP8_ALT_FLAG;
+ cpi->ref_frame_flags &= ~VP8_ALTR_FRAME;
if (!cpi->oxcf.error_resilient_mode)
{
if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME))
- // Update the alternate reference frame stats as appropriate.
+ /* Update the alternate reference frame stats as appropriate. */
update_alt_ref_frame_stats(cpi);
else
- // Update the Golden frame stats as appropriate.
+ /* Update the Golden frame stats as appropriate. */
update_golden_frame_stats(cpi);
}
if (cm->frame_type == KEY_FRAME)
{
- // Tell the caller that the frame was coded as a key frame
+ /* Tell the caller that the frame was coded as a key frame */
*frame_flags = cm->frame_flags | FRAMEFLAGS_KEY;
- // As this frame is a key frame the next defaults to an inter frame.
+ /* As this frame is a key frame the next defaults to an inter frame. */
cm->frame_type = INTER_FRAME;
cpi->last_frame_percent_intra = 100;
@@ -4505,20 +4679,24 @@ static void encode_frame_to_data_rate
cpi->last_frame_percent_intra = cpi->this_frame_percent_intra;
}
- // Clear the one shot update flags for segmentation map and mode/ref loop filter deltas.
+ /* Clear the one shot update flags for segmentation map and mode/ref
+ * loop filter deltas.
+ */
cpi->mb.e_mbd.update_mb_segmentation_map = 0;
cpi->mb.e_mbd.update_mb_segmentation_data = 0;
cpi->mb.e_mbd.mode_ref_lf_delta_update = 0;
- // Dont increment frame counters if this was an altref buffer update not a real frame
+ /* Dont increment frame counters if this was an altref buffer update
+ * not a real frame
+ */
if (cm->show_frame)
{
cm->current_video_frame++;
cpi->frames_since_key++;
}
- // reset to normal state now that we are done.
+ /* reset to normal state now that we are done. */
@@ -4534,67 +4712,11 @@ static void encode_frame_to_data_rate
}
#endif
- // DEBUG
- //vp8_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show);
+ /* DEBUG */
+ /* vp8_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show); */
}
-
-
-static void check_gf_quality(VP8_COMP *cpi)
-{
- VP8_COMMON *cm = &cpi->common;
- int gf_active_pct = (100 * cpi->gf_active_count) / (cm->mb_rows * cm->mb_cols);
- int gf_ref_usage_pct = (cpi->count_mb_ref_frame_usage[GOLDEN_FRAME] * 100) / (cm->mb_rows * cm->mb_cols);
- int last_ref_zz_useage = (cpi->inter_zz_count * 100) / (cm->mb_rows * cm->mb_cols);
-
- // Gf refresh is not currently being signalled
- if (cpi->gf_update_recommended == 0)
- {
- if (cpi->common.frames_since_golden > 7)
- {
- // Low use of gf
- if ((gf_active_pct < 10) || ((gf_active_pct + gf_ref_usage_pct) < 15))
- {
- // ...but last frame zero zero usage is reasonbable so a new gf might be appropriate
- if (last_ref_zz_useage >= 25)
- {
- cpi->gf_bad_count ++;
-
- if (cpi->gf_bad_count >= 8) // Check that the condition is stable
- {
- cpi->gf_update_recommended = 1;
- cpi->gf_bad_count = 0;
- }
- }
- else
- cpi->gf_bad_count = 0; // Restart count as the background is not stable enough
- }
- else
- cpi->gf_bad_count = 0; // Gf useage has picked up so reset count
- }
- }
- // If the signal is set but has not been read should we cancel it.
- else if (last_ref_zz_useage < 15)
- {
- cpi->gf_update_recommended = 0;
- cpi->gf_bad_count = 0;
- }
-
-#if 0
- {
- FILE *f = fopen("gfneeded.stt", "a");
- fprintf(f, "%10d %10d %10d %10d %10ld \n",
- cm->current_video_frame,
- cpi->common.frames_since_golden,
- gf_active_pct, gf_ref_usage_pct,
- cpi->gf_update_recommended);
- fclose(f);
- }
-
-#endif
-}
-
#if !(CONFIG_REALTIME_ONLY)
static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned char * dest_end, unsigned int *frame_flags)
{
@@ -4614,7 +4736,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
}
#endif
-//For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.
+/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
#if HAVE_NEON
extern void vp8_push_neon(int64_t *store);
extern void vp8_pop_neon(int64_t *store);
@@ -4721,7 +4843,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
cpi->source = NULL;
#if !(CONFIG_REALTIME_ONLY)
- // Should we code an alternate reference frame
+ /* Should we code an alternate reference frame */
if (cpi->oxcf.error_resilient_mode == 0 &&
cpi->oxcf.play_alternate &&
cpi->source_alt_ref_pending)
@@ -4742,7 +4864,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
cm->refresh_golden_frame = 0;
cm->refresh_last_frame = 0;
cm->show_frame = 0;
- cpi->source_alt_ref_pending = 0; // Clear Pending alt Ref flag.
+ /* Clear Pending alt Ref flag. */
+ cpi->source_alt_ref_pending = 0;
cpi->is_src_frame_alt_ref = 0;
}
}
@@ -4814,7 +4937,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
cpi->last_end_time_stamp_seen = cpi->source->ts_start;
}
- // adjust frame rates based on timestamps given
+ /* adjust frame rates based on timestamps given */
if (cm->show_frame)
{
int64_t this_duration;
@@ -4832,9 +4955,10 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen;
last_duration = cpi->last_end_time_stamp_seen
- cpi->last_time_stamp_seen;
- // do a step update if the duration changes by 10%
+ /* do a step update if the duration changes by 10% */
if (last_duration)
- step = ((this_duration - last_duration) * 10 / last_duration);
+ step = (int)(((this_duration - last_duration) *
+ 10 / last_duration));
}
if (this_duration)
@@ -4849,7 +4973,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
* frame rate. If we haven't seen 1 second yet, then average
* over the whole interval seen.
*/
- interval = cpi->source->ts_end - cpi->first_time_stamp_ever;
+ interval = (double)(cpi->source->ts_end -
+ cpi->first_time_stamp_ever);
if(interval > 10000000.0)
interval = 10000000;
@@ -4862,9 +4987,9 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (cpi->oxcf.number_of_layers > 1)
{
- int i;
+ unsigned int i;
- // Update frame rates for each layer
+ /* Update frame rates for each layer */
for (i=0; i<cpi->oxcf.number_of_layers; i++)
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
@@ -4886,7 +5011,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
update_layer_contexts (cpi);
- // Restore layer specific context & set frame rate
+ /* Restore layer specific context & set frame rate */
layer = cpi->oxcf.layer_id[
cm->current_video_frame % cpi->oxcf.periodicity];
restore_layer_context (cpi, layer);
@@ -4895,12 +5020,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (cpi->compressor_speed == 2)
{
- if (cpi->oxcf.number_of_layers == 1)
- check_gf_quality(cpi);
vpx_usec_timer_start(&tsctimer);
vpx_usec_timer_start(&ticktimer);
}
+ cpi->lf_zeromv_pct = (cpi->zeromv_count * 100)/cm->MBs;
+
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
{
int i;
@@ -4924,11 +5049,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
}
#endif
- // start with a 0 size frame
+ /* start with a 0 size frame */
*size = 0;
- // Clear down mmx registers
- vp8_clear_system_state(); //__asm emms;
+ /* Clear down mmx registers */
+ vp8_clear_system_state();
cm->frame_type = INTER_FRAME;
cm->frame_flags = *frame_flags;
@@ -4937,7 +5062,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (cm->refresh_alt_ref_frame)
{
- //cm->refresh_golden_frame = 1;
cm->refresh_golden_frame = 0;
cm->refresh_last_frame = 0;
}
@@ -4982,7 +5106,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
vpx_usec_timer_mark(&tsctimer);
vpx_usec_timer_mark(&ticktimer);
- duration = vpx_usec_timer_elapsed(&ticktimer);
+ duration = (int)(vpx_usec_timer_elapsed(&ticktimer));
duration2 = (unsigned int)((double)duration / 2);
if (cm->frame_type != KEY_FRAME)
@@ -4995,7 +5119,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (duration2)
{
- //if(*frame_flags!=1)
{
if (cpi->avg_pick_mode_time == 0)
@@ -5012,8 +5135,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
vpx_memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc));
}
- // Save the contexts separately for alt ref, gold and last.
- // (TODO jbb -> Optimize this with pointers to avoid extra copies. )
+ /* Save the contexts separately for alt ref, gold and last. */
+ /* (TODO jbb -> Optimize this with pointers to avoid extra copies. ) */
if(cm->refresh_alt_ref_frame)
vpx_memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc));
@@ -5023,12 +5146,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if(cm->refresh_last_frame)
vpx_memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc));
- // if its a dropped frame honor the requests on subsequent frames
+ /* if its a dropped frame honor the requests on subsequent frames */
if (*size > 0)
{
cpi->droppable = !frame_is_reference(cpi);
- // return to normal state
+ /* return to normal state */
cm->refresh_entropy_probs = 1;
cm->refresh_alt_ref_frame = 0;
cm->refresh_golden_frame = 0;
@@ -5037,7 +5160,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
}
- // Save layer specific state
+ /* Save layer specific state */
if (cpi->oxcf.number_of_layers > 1)
save_layer_context (cpi);
@@ -5062,14 +5185,14 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (cpi->b_calculate_psnr)
{
- double ye,ue,ve;
+ uint64_t ye,ue,ve;
double frame_psnr;
YV12_BUFFER_CONFIG *orig = cpi->Source;
YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
int y_samples = orig->y_height * orig->y_width ;
int uv_samples = orig->uv_height * orig->uv_width ;
int t_samples = y_samples + 2 * uv_samples;
- int64_t sq_error, sq_error2;
+ double sq_error, sq_error2;
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height);
@@ -5080,13 +5203,13 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height);
- sq_error = ye + ue + ve;
+ sq_error = (double)(ye + ue + ve);
frame_psnr = vp8_mse2psnr(t_samples, 255.0, sq_error);
- cpi->total_y += vp8_mse2psnr(y_samples, 255.0, ye);
- cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, ue);
- cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, ve);
+ cpi->total_y += vp8_mse2psnr(y_samples, 255.0, (double)ye);
+ cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, (double)ue);
+ cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, (double)ve);
cpi->total_sq_error += sq_error;
cpi->total += frame_psnr;
#if CONFIG_POSTPROC
@@ -5095,7 +5218,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
double frame_psnr2, frame_ssim2 = 0;
double weight = 0;
- vp8_deblock(cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0);
+ vp8_deblock(cm, cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0);
vp8_clear_system_state();
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
@@ -5107,13 +5230,16 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height);
- sq_error2 = ye + ue + ve;
+ sq_error2 = (double)(ye + ue + ve);
frame_psnr2 = vp8_mse2psnr(t_samples, 255.0, sq_error2);
- cpi->totalp_y += vp8_mse2psnr(y_samples, 255.0, ye);
- cpi->totalp_u += vp8_mse2psnr(uv_samples, 255.0, ue);
- cpi->totalp_v += vp8_mse2psnr(uv_samples, 255.0, ve);
+ cpi->totalp_y += vp8_mse2psnr(y_samples,
+ 255.0, (double)ye);
+ cpi->totalp_u += vp8_mse2psnr(uv_samples,
+ 255.0, (double)ue);
+ cpi->totalp_v += vp8_mse2psnr(uv_samples,
+ 255.0, (double)ve);
cpi->total_sq_error2 += sq_error2;
cpi->totalp += frame_psnr2;
@@ -5125,7 +5251,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (cpi->oxcf.number_of_layers > 1)
{
- int i;
+ unsigned int i;
for (i=cpi->current_layer;
i<cpi->oxcf.number_of_layers; i++)
@@ -5153,7 +5279,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (cpi->oxcf.number_of_layers > 1)
{
- int i;
+ unsigned int i;
for (i=cpi->current_layer;
i<cpi->oxcf.number_of_layers; i++)
@@ -5251,7 +5377,7 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla
ret = -1;
}
-#endif //!CONFIG_POSTPROC
+#endif
vp8_clear_system_state();
return ret;
}
@@ -5260,29 +5386,53 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla
int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4])
{
signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
+ int internal_delta_q[MAX_MB_SEGMENTS];
+ const int range = 63;
+ int i;
+ // This method is currently incompatible with the cyclic refresh method
+ if ( cpi->cyclic_refresh_mode_enabled )
+ return -1;
+
+ // Check number of rows and columns match
if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols)
return -1;
+ // Range check the delta Q values and convert the external Q range values
+ // to internal ones.
+ if ( (abs(delta_q[0]) > range) || (abs(delta_q[1]) > range) ||
+ (abs(delta_q[2]) > range) || (abs(delta_q[3]) > range) )
+ return -1;
+
+ // Range check the delta lf values
+ if ( (abs(delta_lf[0]) > range) || (abs(delta_lf[1]) > range) ||
+ (abs(delta_lf[2]) > range) || (abs(delta_lf[3]) > range) )
+ return -1;
+
if (!map)
{
disable_segmentation(cpi);
return 0;
}
- // Set the segmentation Map
+ // Translate the external delta q values to internal values.
+ for ( i = 0; i < MAX_MB_SEGMENTS; i++ )
+ internal_delta_q[i] =
+ ( delta_q[i] >= 0 ) ? q_trans[delta_q[i]] : -q_trans[-delta_q[i]];
+
+ /* Set the segmentation Map */
set_segmentation_map(cpi, map);
- // Activate segmentation.
+ /* Activate segmentation. */
enable_segmentation(cpi);
- // Set up the quant segment data
- feature_data[MB_LVL_ALT_Q][0] = delta_q[0];
- feature_data[MB_LVL_ALT_Q][1] = delta_q[1];
- feature_data[MB_LVL_ALT_Q][2] = delta_q[2];
- feature_data[MB_LVL_ALT_Q][3] = delta_q[3];
+ /* Set up the quant segment data */
+ feature_data[MB_LVL_ALT_Q][0] = internal_delta_q[0];
+ feature_data[MB_LVL_ALT_Q][1] = internal_delta_q[1];
+ feature_data[MB_LVL_ALT_Q][2] = internal_delta_q[2];
+ feature_data[MB_LVL_ALT_Q][3] = internal_delta_q[3];
- // Set up the loop segment data s
+ /* Set up the loop segment data s */
feature_data[MB_LVL_ALT_LF][0] = delta_lf[0];
feature_data[MB_LVL_ALT_LF][1] = delta_lf[1];
feature_data[MB_LVL_ALT_LF][2] = delta_lf[2];
@@ -5293,8 +5443,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne
cpi->segment_encode_breakout[2] = threshold[2];
cpi->segment_encode_breakout[3] = threshold[3];
- // Initialise the feature data structure
- // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1
+ /* Initialise the feature data structure */
set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA);
return 0;
@@ -5316,7 +5465,6 @@ int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, uns
}
else
{
- //cpi->active_map_enabled = 0;
return -1 ;
}
}
@@ -5346,7 +5494,9 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest)
unsigned char *src = source->y_buffer;
unsigned char *dst = dest->y_buffer;
- // Loop through the Y plane raw and reconstruction data summing (square differences)
+ /* Loop through the Y plane raw and reconstruction data summing
+ * (square differences)
+ */
for (i = 0; i < source->y_height; i += 16)
{
for (j = 0; j < source->y_width; j += 16)
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 900141b..fb8ad35 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -25,6 +25,7 @@
#include "vp8/common/threading.h"
#include "vpx_ports/mem.h"
#include "vpx/internal/vpx_codec_internal.h"
+#include "vpx/vp8.h"
#include "mcomp.h"
#include "vp8/common/findnearmv.h"
#include "lookahead.h"
@@ -32,7 +33,6 @@
#include "vp8/encoder/denoising.h"
#endif
-//#define SPEEDSTATS 1
#define MIN_GF_INTERVAL 4
#define DEFAULT_GF_INTERVAL 7
@@ -43,7 +43,7 @@
#define AF_THRESH 25
#define AF_THRESH2 100
#define ARF_DECAY_THRESH 12
-#define MAX_MODES 20
+
#define MIN_THRESHMULT 32
#define MAX_THRESHMULT 512
@@ -73,7 +73,6 @@ typedef struct
int mvcosts[2][MVvals+1];
#ifdef MODE_STATS
- // Stats
int y_modes[5];
int uv_modes[4];
int b_modes[10];
@@ -232,22 +231,22 @@ enum
typedef struct
{
- // Layer configuration
+ /* Layer configuration */
double frame_rate;
int target_bandwidth;
- // Layer specific coding parameters
- int starting_buffer_level;
- int optimal_buffer_level;
- int maximum_buffer_size;
- int starting_buffer_level_in_ms;
- int optimal_buffer_level_in_ms;
- int maximum_buffer_size_in_ms;
+ /* Layer specific coding parameters */
+ int64_t starting_buffer_level;
+ int64_t optimal_buffer_level;
+ int64_t maximum_buffer_size;
+ int64_t starting_buffer_level_in_ms;
+ int64_t optimal_buffer_level_in_ms;
+ int64_t maximum_buffer_size_in_ms;
int avg_frame_size_for_layer;
- int buffer_level;
- int bits_off_target;
+ int64_t buffer_level;
+ int64_t bits_off_target;
int64_t total_actual_bits;
int total_target_vs_actual;
@@ -307,7 +306,7 @@ typedef struct VP8_COMP
MACROBLOCK mb;
VP8_COMMON common;
- vp8_writer bc[9]; // one boolcoder for each partition
+ vp8_writer bc[9]; /* one boolcoder for each partition */
VP8_CONFIG oxcf;
@@ -321,16 +320,20 @@ typedef struct VP8_COMP
YV12_BUFFER_CONFIG scaled_source;
YV12_BUFFER_CONFIG *last_frame_unscaled_source;
- int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref
- int source_alt_ref_active; // an alt ref frame has been encoded and is usable
-
- int is_src_frame_alt_ref; // source of frame to encode is an exact copy of an alt ref frame
+ /* frame in src_buffers has been identified to be encoded as an alt ref */
+ int source_alt_ref_pending;
+ /* an alt ref frame has been encoded and is usable */
+ int source_alt_ref_active;
+ /* source of frame to encode is an exact copy of an alt ref frame */
+ int is_src_frame_alt_ref;
- int gold_is_last; // golden frame same as last frame ( short circuit gold searches)
- int alt_is_last; // Alt reference frame same as last ( short circuit altref search)
- int gold_is_alt; // don't do both alt and gold search ( just do gold).
+ /* golden frame same as last frame ( short circuit gold searches) */
+ int gold_is_last;
+ /* Alt reference frame same as last ( short circuit altref search) */
+ int alt_is_last;
+ /* don't do both alt and gold search ( just do gold). */
+ int gold_is_alt;
- //int refresh_alt_ref_frame;
YV12_BUFFER_CONFIG pick_lf_lvl_frame;
TOKENEXTRA *tok;
@@ -342,55 +345,62 @@ typedef struct VP8_COMP
unsigned int this_key_frame_forced;
unsigned int next_key_frame_forced;
- // Ambient reconstruction err target for force key frames
+ /* Ambient reconstruction err target for force key frames */
int ambient_err;
unsigned int mode_check_freq[MAX_MODES];
- unsigned int mode_test_hit_counts[MAX_MODES];
unsigned int mode_chosen_counts[MAX_MODES];
- unsigned int mbs_tested_so_far;
- int rd_thresh_mult[MAX_MODES];
int rd_baseline_thresh[MAX_MODES];
- int rd_threshes[MAX_MODES];
int RDMULT;
int RDDIV ;
CODING_CONTEXT coding_context;
- // Rate targetting variables
- int64_t prediction_error;
+ /* Rate targetting variables */
int64_t last_prediction_error;
- int64_t intra_error;
int64_t last_intra_error;
int this_frame_target;
int projected_frame_size;
- int last_q[2]; // Separate values for Intra/Inter
+ int last_q[2]; /* Separate values for Intra/Inter */
double rate_correction_factor;
double key_frame_rate_correction_factor;
double gf_rate_correction_factor;
- int frames_till_gf_update_due; // Count down till next GF
- int current_gf_interval; // GF interval chosen when we coded the last GF
+ /* Count down till next GF */
+ int frames_till_gf_update_due;
+
+ /* GF interval chosen when we coded the last GF */
+ int current_gf_interval;
- int gf_overspend_bits; // Total bits overspent becasue of GF boost (cumulative)
+ /* Total bits overspent becasue of GF boost (cumulative) */
+ int gf_overspend_bits;
- int non_gf_bitrate_adjustment; // Used in the few frames following a GF to recover the extra bits spent in that GF
+ /* Used in the few frames following a GF to recover the extra bits
+ * spent in that GF
+ */
+ int non_gf_bitrate_adjustment;
- int kf_overspend_bits; // Extra bits spent on key frames that need to be recovered on inter frames
- int kf_bitrate_adjustment; // Current number of bit s to try and recover on each inter frame.
+ /* Extra bits spent on key frames that need to be recovered */
+ int kf_overspend_bits;
+
+ /* Current number of bit s to try and recover on each inter frame. */
+ int kf_bitrate_adjustment;
int max_gf_interval;
int baseline_gf_interval;
- int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames
+ int active_arnr_frames;
int64_t key_frame_count;
int prior_key_frame_distance[KEY_FRAME_CONTEXT];
- int per_frame_bandwidth; // Current section per frame bandwidth target
- int av_per_frame_bandwidth; // Average frame size target for clip
- int min_frame_bandwidth; // Minimum allocation that should be used for any frame
+ /* Current section per frame bandwidth target */
+ int per_frame_bandwidth;
+ /* Average frame size target for clip */
+ int av_per_frame_bandwidth;
+ /* Minimum allocation that should be used for any frame */
+ int min_frame_bandwidth;
int inter_frame_target;
double output_frame_rate;
int64_t last_time_stamp_seen;
@@ -402,12 +412,6 @@ typedef struct VP8_COMP
int ni_frames;
int avg_frame_qindex;
- int zbin_over_quant;
- int zbin_mode_boost;
- int zbin_mode_boost_enabled;
- int last_zbin_over_quant;
- int last_zbin_mode_boost;
-
int64_t total_byte_count;
int buffered_mode;
@@ -415,7 +419,7 @@ typedef struct VP8_COMP
double frame_rate;
double ref_frame_rate;
int64_t buffer_level;
- int bits_off_target;
+ int64_t bits_off_target;
int rolling_target_bits;
int rolling_actual_bits;
@@ -424,7 +428,7 @@ typedef struct VP8_COMP
int long_rolling_actual_bits;
int64_t total_actual_bits;
- int total_target_vs_actual; // debug stats
+ int total_target_vs_actual; /* debug stats */
int worst_quality;
int active_worst_quality;
@@ -433,22 +437,9 @@ typedef struct VP8_COMP
int cq_target_quality;
- int drop_frames_allowed; // Are we permitted to drop frames?
- int drop_frame; // Drop this frame?
- int drop_count; // How many frames have we dropped?
- int max_drop_count; // How many frames should we drop?
- int max_consec_dropped_frames; // Limit number of consecutive frames that can be dropped.
-
-
- int ymode_count [VP8_YMODES]; /* intra MB type cts this frame */
- int uv_mode_count[VP8_UV_MODES]; /* intra MB type cts this frame */
+ int drop_frames_allowed; /* Are we permitted to drop frames? */
+ int drop_frame; /* Drop this frame? */
- unsigned int MVcount [2] [MVvals]; /* (row,col) MV cts this frame */
-
- unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
-
- //DECLARE_ALIGNED(16, int, coef_counts_backup [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]); //not used any more
- //save vp8_tree_probs_from_distribution result for each frame to avoid repeat calculation
vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
char update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
@@ -462,7 +453,7 @@ typedef struct VP8_COMP
struct vpx_codec_pkt_list *output_pkt_list;
#if 0
- // Experimental code for lagged and one pass
+ /* Experimental code for lagged and one pass */
ONEPASS_FRAMESTATS one_pass_frame_stats[MAX_LAG_BUFFERS];
int one_pass_frame_index;
#endif
@@ -470,17 +461,14 @@ typedef struct VP8_COMP
int decimation_factor;
int decimation_count;
- // for real time encoding
- int avg_encode_time; //microsecond
- int avg_pick_mode_time; //microsecond
+ /* for real time encoding */
+ int avg_encode_time; /* microsecond */
+ int avg_pick_mode_time; /* microsecond */
int Speed;
- unsigned int cpu_freq; //Mhz
int compressor_speed;
- int interquantizer;
int auto_gold;
int auto_adjust_gold_quantizer;
- int goldfreq;
int auto_worst_q;
int cpu_used;
int pass;
@@ -494,29 +482,28 @@ typedef struct VP8_COMP
int last_skip_probs_q[3];
int recent_ref_frame_usage[MAX_REF_FRAMES];
- int count_mb_ref_frame_usage[MAX_REF_FRAMES];
int this_frame_percent_intra;
int last_frame_percent_intra;
int ref_frame_flags;
SPEED_FEATURES sf;
- int error_bins[1024];
- // Data used for real time conferencing mode to help determine if it would be good to update the gf
- int inter_zz_count;
- int gf_bad_count;
- int gf_update_recommended;
- int skip_true_count;
+ /* Count ZEROMV on all reference frames. */
+ int zeromv_count;
+ int lf_zeromv_pct;
unsigned char *segmentation_map;
- signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment data (can be deltas or absolute values)
- int segment_encode_breakout[MAX_MB_SEGMENTS]; // segment threashold for encode breakout
+ signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
+ int segment_encode_breakout[MAX_MB_SEGMENTS];
unsigned char *active_map;
unsigned int active_map_enabled;
- // Video conferencing cyclic refresh mode flags etc
- // This is a mode designed to clean up the background over time in live encoding scenarious. It uses segmentation
+
+ /* Video conferencing cyclic refresh mode flags. This is a mode
+ * designed to clean up the background over time in live encoding
+ * scenarious. It uses segmentation.
+ */
int cyclic_refresh_mode_enabled;
int cyclic_refresh_mode_max_mbs_perframe;
int cyclic_refresh_mode_index;
@@ -524,7 +511,7 @@ typedef struct VP8_COMP
signed char *cyclic_refresh_map;
#if CONFIG_MULTITHREAD
- // multithread data
+ /* multithread data */
int * mt_current_mb_col;
int mt_sync_range;
int b_multi_threaded;
@@ -538,7 +525,7 @@ typedef struct VP8_COMP
ENCODETHREAD_DATA *en_thread_data;
LPFTHREAD_DATA lpf_thread_data;
- //events
+ /* events */
sem_t *h_event_start_encoding;
sem_t h_event_end_encoding;
sem_t h_event_start_lpf;
@@ -549,7 +536,6 @@ typedef struct VP8_COMP
unsigned int partition_sz[MAX_PARTITIONS];
unsigned char *partition_d[MAX_PARTITIONS];
unsigned char *partition_d_end[MAX_PARTITIONS];
- // end of multithread data
fractional_mv_step_fp *find_fractional_mv_step;
@@ -557,10 +543,10 @@ typedef struct VP8_COMP
vp8_refining_search_fn_t refining_search_sad;
vp8_diamond_search_fn_t diamond_search_sad;
vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS];
- unsigned int time_receive_data;
- unsigned int time_compress_data;
- unsigned int time_pick_lpf;
- unsigned int time_encode_mb_row;
+ uint64_t time_receive_data;
+ uint64_t time_compress_data;
+ uint64_t time_pick_lpf;
+ uint64_t time_encode_mb_row;
int base_skip_false_prob[128];
@@ -594,16 +580,16 @@ typedef struct VP8_COMP
int gf_decay_rate;
int static_scene_max_gf_interval;
int kf_bits;
- int gf_group_error_left; // Remaining error from uncoded frames in a gf group. Two pass use only
-
- // Projected total bits available for a key frame group of frames
+ /* Remaining error from uncoded frames in a gf group. */
+ int gf_group_error_left;
+ /* Projected total bits available for a key frame group of frames */
int64_t kf_group_bits;
-
- // Error score of frames still to be coded in kf group
+ /* Error score of frames still to be coded in kf group */
int64_t kf_group_error_left;
-
- int gf_group_bits; // Projected Bits available for a group of frames including 1 GF or ARF
- int gf_bits; // Bits for the golden frame or ARF - 2 pass only
+ /* Projected Bits available for a group including 1 GF or ARF */
+ int gf_group_bits;
+ /* Bits for the golden frame or ARF */
+ int gf_bits;
int alt_extra_bits;
double est_max_qcorrection_factor;
} twopass;
@@ -641,24 +627,25 @@ typedef struct VP8_COMP
#endif
int b_calculate_psnr;
- // Per MB activity measurement
+ /* Per MB activity measurement */
unsigned int activity_avg;
unsigned int * mb_activity_map;
- int * mb_norm_activity_map;
- // Record of which MBs still refer to last golden frame either
- // directly or through 0,0
+ /* Record of which MBs still refer to last golden frame either
+ * directly or through 0,0
+ */
unsigned char *gf_active_flags;
int gf_active_count;
int output_partition;
- //Store last frame's MV info for next frame MV prediction
+ /* Store last frame's MV info for next frame MV prediction */
int_mv *lfmv;
int *lf_ref_frame_sign_bias;
int *lf_ref_frame;
- int force_next_frame_intra; /* force next frame to intra when kf_auto says so */
+ /* force next frame to intra when kf_auto says so */
+ int force_next_frame_intra;
int droppable;
@@ -666,7 +653,7 @@ typedef struct VP8_COMP
VP8_DENOISER denoiser;
#endif
- // Coding layer state variables
+ /* Coding layer state variables */
unsigned int current_layer;
LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS];
@@ -687,17 +674,29 @@ typedef struct VP8_COMP
#if CONFIG_MULTI_RES_ENCODING
/* Number of MBs per row at lower-resolution level */
int mr_low_res_mb_cols;
+ /* Indicate if lower-res mv info is available */
+ unsigned char mr_low_res_mv_avail;
+ /* The frame number of each reference frames */
+ unsigned int current_ref_frames[MAX_REF_FRAMES];
#endif
+ struct rd_costs_struct
+ {
+ int mvcosts[2][MVvals+1];
+ int mvsadcosts[2][MVfpvals+1];
+ int mbmode_cost[2][MB_MODE_COUNT];
+ int intra_uv_mode_cost[2][MB_MODE_COUNT];
+ int bmode_costs[10][10][10];
+ int inter_bmode_costs[B_MODE_COUNT];
+ int token_costs[BLOCK_TYPES][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
+ } rd_costs;
} VP8_COMP;
-void control_data_rate(VP8_COMP *cpi);
-
-void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char *dest_end, unsigned long *size);
-
-int rd_cost_intra_mb(MACROBLOCKD *x);
+void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,
+ unsigned char *dest_end, unsigned long *size);
-void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **);
+void vp8_tokenize_mb(VP8_COMP *, MACROBLOCK *, TOKENEXTRA **);
void vp8_set_speed_features(VP8_COMP *cpi);
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index dafb645..673de2b 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -61,7 +61,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
}
-static int get_inter_mbpred_error(MACROBLOCK *mb,
+int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
const vp8_variance_fn_ptr_t *vfp,
unsigned int *sse,
int_mv this_mv)
@@ -132,7 +132,7 @@ static int pick_intra4x4block(
MACROBLOCK *x,
int ib,
B_PREDICTION_MODE *best_mode,
- unsigned int *mode_costs,
+ const int *mode_costs,
int *bestrate,
int *bestdistortion)
@@ -141,20 +141,24 @@ static int pick_intra4x4block(
BLOCKD *b = &x->e_mbd.block[ib];
BLOCK *be = &x->block[ib];
int dst_stride = x->e_mbd.dst.y_stride;
- unsigned char *base_dst = x->e_mbd.dst.y_buffer;
+ unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
B_PREDICTION_MODE mode;
- int best_rd = INT_MAX; // 1<<30
+ int best_rd = INT_MAX;
int rate;
int distortion;
- for (mode = B_DC_PRED; mode <= B_HE_PRED /*B_HU_PRED*/; mode++)
+ unsigned char *Above = dst - dst_stride;
+ unsigned char *yleft = dst - 1;
+ unsigned char top_left = Above[-1];
+
+ for (mode = B_DC_PRED; mode <= B_HE_PRED; mode++)
{
int this_rd;
rate = mode_costs[mode];
- vp8_intra4x4_predict
- (base_dst + b->offset, dst_stride,
- mode, b->predictor, 16);
+
+ vp8_intra4x4_predict(Above, yleft, dst_stride, mode,
+ b->predictor, 16, top_left);
distortion = get_prediction_error(be, b);
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
@@ -167,7 +171,7 @@ static int pick_intra4x4block(
}
}
- b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode);
+ b->bmi.as_mode = *best_mode;
vp8_encode_intra4x4block(x, ib);
return best_rd;
}
@@ -185,7 +189,7 @@ static int pick_intra4x4mby_modes
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int error;
int distortion = 0;
- unsigned int *bmode_costs;
+ const int *bmode_costs;
intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
@@ -214,8 +218,9 @@ static int pick_intra4x4mby_modes
distortion += d;
mic->bmi[i].as_mode = best_mode;
- // Break out case where we have already exceeded best so far value
- // that was passed in
+ /* Break out case where we have already exceeded best so far value
+ * that was passed in
+ */
if (distortion > *best_dist)
break;
}
@@ -384,15 +389,16 @@ static void pick_intra_mbuv_mode(MACROBLOCK *mb)
}
-static void update_mvcount(VP8_COMP *cpi, MACROBLOCKD *xd, int_mv *best_ref_mv)
+static void update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
{
+ MACROBLOCKD *xd = &x->e_mbd;
/* Split MV modes currently not supported when RD is nopt enabled,
* therefore, only need to modify MVcount in NEWMV mode. */
if (xd->mode_info_context->mbmi.mode == NEWMV)
{
- cpi->MVcount[0][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.row -
+ x->MVcount[0][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.row -
best_ref_mv->as_mv.row) >> 1)]++;
- cpi->MVcount[1][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.col -
+ x->MVcount[1][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.col -
best_ref_mv->as_mv.col) >> 1)]++;
}
}
@@ -405,10 +411,9 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim,
MB_PREDICTION_MODE *parent_mode,
int_mv *parent_ref_mv, int mb_row, int mb_col)
{
- LOWER_RES_INFO* store_mode_info
- = (LOWER_RES_INFO*)cpi->oxcf.mr_low_res_mode_info;
+ LOWER_RES_MB_INFO* store_mode_info
+ = ((LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info)->mb_info;
unsigned int parent_mb_index;
- //unsigned int parent_mb_index = map_640x480_to_320x240[mb_row][mb_col];
/* Consider different down_sampling_factor. */
{
@@ -440,7 +445,6 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim,
/* Consider different down_sampling_factor.
* The result can be rounded to be more precise, but it takes more time.
*/
- //int round = cpi->oxcf.mr_down_sampling_factor.den/2;
(*parent_ref_mv).as_mv.row = store_mode_info[parent_mb_index].mv.as_mv.row
*cpi->oxcf.mr_down_sampling_factor.num
/cpi->oxcf.mr_down_sampling_factor.den;
@@ -455,10 +459,18 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim,
static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x)
{
- if (sse < x->encode_breakout)
+ MACROBLOCKD *xd = &x->e_mbd;
+
+ unsigned int threshold = (xd->block[0].dequant[1]
+ * xd->block[0].dequant[1] >>4);
+
+ if(threshold < x->encode_breakout)
+ threshold = x->encode_breakout;
+
+ if (sse < threshold )
{
- // Check u and v to make sure skip is ok
- int sse2 = 0;
+ /* Check u and v to make sure skip is ok */
+ unsigned int sse2 = 0;
sse2 = VP8_UVSSE(x);
@@ -469,7 +481,8 @@ static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x)
}
}
-static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, VP8_COMP *cpi, MACROBLOCK *x)
+static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2,
+ VP8_COMP *cpi, MACROBLOCK *x, int rd_adj)
{
MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
int_mv mv = x->e_mbd.mode_info_context->mbmi.mv;
@@ -486,16 +499,70 @@ static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, V
if((this_mode != NEWMV) ||
!(cpi->sf.half_pixel_search) || cpi->common.full_pixel==1)
- *distortion2 = get_inter_mbpred_error(x,
+ *distortion2 = vp8_get_inter_mbpred_error(x,
&cpi->fn_ptr[BLOCK_16X16],
sse, mv);
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, *distortion2);
+ /* Adjust rd to bias to ZEROMV */
+ if(this_mode == ZEROMV)
+ {
+ /* Bias to ZEROMV on LAST_FRAME reference when it is available. */
+ if ((cpi->ref_frame_flags & VP8_LAST_FRAME &
+ cpi->common.refresh_last_frame)
+ && x->e_mbd.mode_info_context->mbmi.ref_frame != LAST_FRAME)
+ rd_adj = 100;
+
+ // rd_adj <= 100
+ this_rd = ((int64_t)this_rd) * rd_adj / 100;
+ }
+
check_for_encode_breakout(*sse, x);
return this_rd;
}
+static void calculate_zeromv_rd_adjustment(VP8_COMP *cpi, MACROBLOCK *x,
+ int *rd_adjustment)
+{
+ MODE_INFO *mic = x->e_mbd.mode_info_context;
+ int_mv mv_l, mv_a, mv_al;
+ int local_motion_check = 0;
+
+ if (cpi->lf_zeromv_pct > 40)
+ {
+ /* left mb */
+ mic -= 1;
+ mv_l = mic->mbmi.mv;
+
+ if (mic->mbmi.ref_frame != INTRA_FRAME)
+ if( abs(mv_l.as_mv.row) < 8 && abs(mv_l.as_mv.col) < 8)
+ local_motion_check++;
+
+ /* above-left mb */
+ mic -= x->e_mbd.mode_info_stride;
+ mv_al = mic->mbmi.mv;
+
+ if (mic->mbmi.ref_frame != INTRA_FRAME)
+ if( abs(mv_al.as_mv.row) < 8 && abs(mv_al.as_mv.col) < 8)
+ local_motion_check++;
+
+ /* above mb */
+ mic += 1;
+ mv_a = mic->mbmi.mv;
+
+ if (mic->mbmi.ref_frame != INTRA_FRAME)
+ if( abs(mv_a.as_mv.row) < 8 && abs(mv_a.as_mv.col) < 8)
+ local_motion_check++;
+
+ if (((!x->e_mbd.mb_to_top_edge || !x->e_mbd.mb_to_left_edge)
+ && local_motion_check >0) || local_motion_check >2 )
+ *rd_adjustment = 80;
+ else if (local_motion_check > 0)
+ *rd_adjustment = 90;
+ }
+}
+
void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int recon_uvoffset, int *returnrate,
int *returndistortion, int *returnintra, int mb_row,
@@ -513,7 +580,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
MB_PREDICTION_MODE this_mode;
int num00;
int mdcounts[4];
- int best_rd = INT_MAX; // 1 << 30;
+ int best_rd = INT_MAX;
+ int rd_adjustment = 100;
int best_intra_rd = INT_MAX;
int mode_index;
int rate;
@@ -523,14 +591,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int best_mode_index = 0;
unsigned int sse = INT_MAX, best_rd_sse = INT_MAX;
#if CONFIG_TEMPORAL_DENOISING
- unsigned int zero_mv_sse = 0, best_sse = INT_MAX;
+ unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX;
#endif
int_mv mvp;
int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
int saddone=0;
- int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7)
+ /* search range got from mv_pred(). It uses step_param levels. (0-7) */
+ int sr=0;
unsigned char *plane[4][3];
int ref_frame_map[4];
@@ -539,12 +608,39 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
#if CONFIG_MULTI_RES_ENCODING
int dissim = INT_MAX;
int parent_ref_frame = 0;
+ int parent_ref_valid = cpi->oxcf.mr_encoder_id && cpi->mr_low_res_mv_avail;
int_mv parent_ref_mv;
MB_PREDICTION_MODE parent_mode = 0;
- if (cpi->oxcf.mr_encoder_id)
+ if (parent_ref_valid)
+ {
+ int parent_ref_flag;
+
get_lower_res_motion_info(cpi, xd, &dissim, &parent_ref_frame,
&parent_mode, &parent_ref_mv, mb_row, mb_col);
+
+ /* TODO(jkoleszar): The references available (ref_frame_flags) to the
+ * lower res encoder should match those available to this encoder, but
+ * there seems to be a situation where this mismatch can happen in the
+ * case of frame dropping and temporal layers. For example,
+ * GOLD being disallowed in ref_frame_flags, but being returned as
+ * parent_ref_frame.
+ *
+ * In this event, take the conservative approach of disabling the
+ * lower res info for this MB.
+ */
+ parent_ref_flag = 0;
+ if (parent_ref_frame == LAST_FRAME)
+ parent_ref_flag = (cpi->ref_frame_flags & VP8_LAST_FRAME);
+ else if (parent_ref_frame == GOLDEN_FRAME)
+ parent_ref_flag = (cpi->ref_frame_flags & VP8_GOLD_FRAME);
+ else if (parent_ref_frame == ALTREF_FRAME)
+ parent_ref_flag = (cpi->ref_frame_flags & VP8_ALTR_FRAME);
+
+ //assert(!parent_ref_frame || parent_ref_flag);
+ if (parent_ref_frame && !parent_ref_flag)
+ parent_ref_valid = 0;
+ }
#endif
mode_mv = mode_mv_sb[sign_bias];
@@ -553,6 +649,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
/* Setup search priorities */
+#if CONFIG_MULTI_RES_ENCODING
+ if (parent_ref_valid && parent_ref_frame && dissim < 8)
+ {
+ ref_frame_map[0] = -1;
+ ref_frame_map[1] = parent_ref_frame;
+ ref_frame_map[2] = -1;
+ ref_frame_map[3] = -1;
+ } else
+#endif
get_reference_search_order(cpi, ref_frame_map);
/* Check to see if there is at least 1 valid reference frame that we need
@@ -574,22 +679,29 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
- cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame
+ /* Count of the number of MBs tested so far this frame */
+ x->mbs_tested_so_far++;
*returnintra = INT_MAX;
x->skip = 0;
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
- // if we encode a new mv this is important
- // find the best new motion vector
+ /* If the frame has big static background and current MB is in low
+ * motion area, its mode decision is biased to ZEROMV mode.
+ */
+ calculate_zeromv_rd_adjustment(cpi, x, &rd_adjustment);
+
+ /* if we encode a new mv this is important
+ * find the best new motion vector
+ */
for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
{
int frame_cost;
int this_rd = INT_MAX;
int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
- if (best_rd <= cpi->rd_threshes[mode_index])
+ if (best_rd <= x->rd_threshes[mode_index])
continue;
if (this_ref_frame < 0)
@@ -597,23 +709,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
-#if CONFIG_MULTI_RES_ENCODING
- if (cpi->oxcf.mr_encoder_id)
- {
- /* If parent MB is intra, child MB is intra. */
- if (!parent_ref_frame && this_ref_frame)
- continue;
-
- /* If parent MB is inter, and it is unlikely there are multiple
- * objects in parent MB, we use parent ref frame as child MB's
- * ref frame. */
- if (parent_ref_frame && dissim < 8
- && parent_ref_frame != this_ref_frame)
- continue;
- }
-#endif
-
- // everything but intra
+ /* everything but intra */
if (x->e_mbd.mode_info_context->mbmi.ref_frame)
{
x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
@@ -628,7 +724,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
}
#if CONFIG_MULTI_RES_ENCODING
- if (cpi->oxcf.mr_encoder_id)
+ if (parent_ref_valid)
{
if (vp8_mode_order[mode_index] == NEARESTMV &&
mode_mv[NEARESTMV].as_int ==0)
@@ -638,7 +734,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
continue;
if (vp8_mode_order[mode_index] == NEWMV && parent_mode == ZEROMV
- && best_ref_mv.as_int==0) //&& dissim==0
+ && best_ref_mv.as_int==0)
continue;
else if(vp8_mode_order[mode_index] == NEWMV && dissim==0
&& best_ref_mv.as_int==parent_ref_mv.as_int)
@@ -650,22 +746,22 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
/* Check to see if the testing frequency for this mode is at its max
* If so then prevent it from being tested and increase the threshold
* for its testing */
- if (cpi->mode_test_hit_counts[mode_index] &&
+ if (x->mode_test_hit_counts[mode_index] &&
(cpi->mode_check_freq[mode_index] > 1))
{
- if (cpi->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] *
- cpi->mode_test_hit_counts[mode_index]))
+ if (x->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] *
+ x->mode_test_hit_counts[mode_index]))
{
/* Increase the threshold for coding this mode to make it less
* likely to be chosen */
- cpi->rd_thresh_mult[mode_index] += 4;
+ x->rd_thresh_mult[mode_index] += 4;
- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
+ if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
+ x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
- cpi->rd_threshes[mode_index] =
+ x->rd_threshes[mode_index] =
(cpi->rd_baseline_thresh[mode_index] >> 7) *
- cpi->rd_thresh_mult[mode_index];
+ x->rd_thresh_mult[mode_index];
continue;
}
}
@@ -673,7 +769,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
/* We have now reached the point where we are going to test the current
* mode so increment the counter for the number of times it has been
* tested */
- cpi->mode_test_hit_counts[mode_index] ++;
+ x->mode_test_hit_counts[mode_index] ++;
rate2 = 0;
distortion2 = 0;
@@ -728,7 +824,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
case SPLITMV:
- // Split MV modes currently not supported when RD is nopt enabled.
+ /* Split MV modes currently not supported when RD is not enabled. */
break;
case DC_PRED:
@@ -777,13 +873,22 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int speed_adjust = (cpi->Speed > 5) ? ((cpi->Speed >= 8)? 3 : 2) : 1;
- // Further step/diamond searches as necessary
+ /* Further step/diamond searches as necessary */
step_param = cpi->sf.first_step + speed_adjust;
#if CONFIG_MULTI_RES_ENCODING
- if (cpi->oxcf.mr_encoder_id)
+ /* If lower-res drops this frame, then higher-res encoder does
+ motion search without any previous knowledge. Also, since
+ last frame motion info is not stored, then we can not
+ use improved_mv_pred. */
+ if (cpi->oxcf.mr_encoder_id && !parent_ref_valid)
+ cpi->sf.improved_mv_pred = 0;
+
+ if (parent_ref_valid && parent_ref_frame)
{
- // Use parent MV as predictor. Adjust search range accordingly.
+ /* Use parent MV as predictor. Adjust search range
+ * accordingly.
+ */
mvp.as_int = parent_ref_mv.as_int;
mvp_full.as_mv.col = parent_ref_mv.as_mv.col>>3;
mvp_full.as_mv.row = parent_ref_mv.as_mv.row>>3;
@@ -808,7 +913,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
&near_sadidx[0]);
sr += speed_adjust;
- //adjust search range according to sr from mv prediction
+ /* adjust search range according to sr from mv prediction */
if(sr > step_param)
step_param = sr;
@@ -823,7 +928,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
}
#if CONFIG_MULTI_RES_ENCODING
- if (cpi->oxcf.mr_encoder_id && dissim <= 2 &&
+ if (parent_ref_valid && parent_ref_frame && dissim <= 2 &&
MAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row),
abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col)) <= 4)
{
@@ -860,7 +965,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
* change the behavior in lowest-resolution encoder.
* Will improve it later.
*/
- if (!cpi->oxcf.mr_encoder_id)
+ /* Set step_param to 0 to ensure large-range motion search
+ when encoder drops this frame at lower-resolution.
+ */
+ if (!parent_ref_valid)
step_param = 0;
#endif
bestsme = vp8_hex_search(x, b, d, &mvp_full, &d->bmi.mv,
@@ -877,10 +985,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
x->mvcost, &best_ref_mv);
mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
- // Further step/diamond searches as necessary
- n = 0;
- //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
-
+ /* Further step/diamond searches as necessary */
n = num00;
num00 = 0;
@@ -927,7 +1032,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
- // mv cost;
+ /* mv cost; */
rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv,
cpi->mb.mvcost, 128);
}
@@ -954,7 +1059,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
x->e_mbd.mode_info_context->mbmi.mv.as_int =
mode_mv[this_mode].as_int;
- this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x);
+ this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x,
+ rd_adjustment);
break;
default:
@@ -964,31 +1070,33 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity)
{
- // Store for later use by denoiser.
- if (this_mode == ZEROMV &&
- x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
- {
- zero_mv_sse = sse;
- }
-
- // Store the best NEWMV in x for later use in the denoiser.
- // We are restricted to the LAST_FRAME since the denoiser only keeps
- // one filter state.
- if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
- x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
- {
- best_sse = sse;
- x->e_mbd.best_sse_inter_mode = NEWMV;
- x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
- x->e_mbd.need_to_clamp_best_mvs =
- x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
- }
+
+ /* Store for later use by denoiser. */
+ if (this_mode == ZEROMV && sse < zero_mv_sse )
+ {
+ zero_mv_sse = sse;
+ x->best_zeromv_reference_frame =
+ x->e_mbd.mode_info_context->mbmi.ref_frame;
+ }
+
+ /* Store the best NEWMV in x for later use in the denoiser. */
+ if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
+ sse < best_sse)
+ {
+ best_sse = sse;
+ x->best_sse_inter_mode = NEWMV;
+ x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
+ x->need_to_clamp_best_mvs =
+ x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
+ x->best_reference_frame =
+ x->e_mbd.mode_info_context->mbmi.ref_frame;
+ }
}
#endif
if (this_rd < best_rd || x->skip)
{
- // Note index of best mode
+ /* Note index of best mode */
best_mode_index = mode_index;
*returnrate = rate2;
@@ -1001,12 +1109,12 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
/* Testing this mode gave rise to an improvement in best error
* score. Lower threshold a bit for next time
*/
- cpi->rd_thresh_mult[mode_index] =
- (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
- cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
- cpi->rd_threshes[mode_index] =
+ x->rd_thresh_mult[mode_index] =
+ (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
+ x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
+ x->rd_threshes[mode_index] =
(cpi->rd_baseline_thresh[mode_index] >> 7) *
- cpi->rd_thresh_mult[mode_index];
+ x->rd_thresh_mult[mode_index];
}
/* If the mode did not help improve the best error case then raise the
@@ -1014,33 +1122,33 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
*/
else
{
- cpi->rd_thresh_mult[mode_index] += 4;
+ x->rd_thresh_mult[mode_index] += 4;
- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
+ if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
+ x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
- cpi->rd_threshes[mode_index] =
+ x->rd_threshes[mode_index] =
(cpi->rd_baseline_thresh[mode_index] >> 7) *
- cpi->rd_thresh_mult[mode_index];
+ x->rd_thresh_mult[mode_index];
}
if (x->skip)
break;
}
- // Reduce the activation RD thresholds for the best choice mode
+ /* Reduce the activation RD thresholds for the best choice mode */
if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))
{
- int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 3);
+ int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 3);
- cpi->rd_thresh_mult[best_mode_index] =
- (cpi->rd_thresh_mult[best_mode_index]
+ x->rd_thresh_mult[best_mode_index] =
+ (x->rd_thresh_mult[best_mode_index]
>= (MIN_THRESHMULT + best_adjustment)) ?
- cpi->rd_thresh_mult[best_mode_index] - best_adjustment :
+ x->rd_thresh_mult[best_mode_index] - best_adjustment :
MIN_THRESHMULT;
- cpi->rd_threshes[best_mode_index] =
+ x->rd_threshes[best_mode_index] =
(cpi->rd_baseline_thresh[best_mode_index] >> 7) *
- cpi->rd_thresh_mult[best_mode_index];
+ x->rd_thresh_mult[best_mode_index];
}
@@ -1052,43 +1160,54 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
this_rdbin = 1023;
}
- cpi->error_bins[this_rdbin] ++;
+ x->error_bins[this_rdbin] ++;
}
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity)
{
- if (x->e_mbd.best_sse_inter_mode == DC_PRED) {
- // No best MV found.
- x->e_mbd.best_sse_inter_mode = best_mbmode.mode;
- x->e_mbd.best_sse_mv = best_mbmode.mv;
- x->e_mbd.need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs;
- best_sse = best_rd_sse;
- }
- vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
- recon_yoffset, recon_uvoffset);
-
- // Reevaluate ZEROMV after denoising.
- if (best_mbmode.ref_frame == INTRA_FRAME)
- {
- int this_rd = 0;
- rate2 = 0;
- distortion2 = 0;
- x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
- rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
- this_mode = ZEROMV;
- rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
- x->e_mbd.mode_info_context->mbmi.mode = this_mode;
- x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
- x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
- this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x);
+ if (x->best_sse_inter_mode == DC_PRED)
+ {
+ /* No best MV found. */
+ x->best_sse_inter_mode = best_mbmode.mode;
+ x->best_sse_mv = best_mbmode.mv;
+ x->need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs;
+ x->best_reference_frame = best_mbmode.ref_frame;
+ best_sse = best_rd_sse;
+ }
+ vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
+ recon_yoffset, recon_uvoffset);
- if (this_rd < best_rd || x->skip)
+
+ /* Reevaluate ZEROMV after denoising. */
+ if (best_mbmode.ref_frame == INTRA_FRAME &&
+ x->best_zeromv_reference_frame != INTRA_FRAME)
{
- vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
- sizeof(MB_MODE_INFO));
+ int this_rd = 0;
+ int this_ref_frame = x->best_zeromv_reference_frame;
+ rate2 = x->ref_frame_cost[this_ref_frame] +
+ vp8_cost_mv_ref(ZEROMV, mdcounts);
+ distortion2 = 0;
+
+ /* set up the proper prediction buffers for the frame */
+ x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
+ x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
+ x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
+ x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
+
+ x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
+ this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x,
+ rd_adjustment);
+
+ if (this_rd < best_rd)
+ {
+ vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
+ sizeof(MB_MODE_INFO));
+ }
}
- }
+
}
#endif
@@ -1122,11 +1241,11 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
!= cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame])
best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
- update_mvcount(cpi, &x->e_mbd, &best_ref_mv);
+ update_mvcount(cpi, x, &best_ref_mv);
}
-void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
+void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_)
{
int error4x4, error16x16 = INT_MAX;
int rate, best_rate = 0, distortion, best_sse;
diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
index 3d83782..35011ca 100644
--- a/vp8/encoder/pickinter.h
+++ b/vp8/encoder/pickinter.h
@@ -18,6 +18,10 @@ extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int recon_uvoffset, int *returnrate,
int *returndistortion, int *returnintra,
int mb_row, int mb_col);
-extern void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);
+extern void vp8_pick_intra_mode(MACROBLOCK *x, int *rate);
+extern int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
+ const vp8_variance_fn_ptr_t *vfp,
+ unsigned int *sse,
+ int_mv this_mv);
#endif
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 21af45a..4121349 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -74,7 +74,9 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
src += srcoffset;
dst += dstoffset;
- // Loop through the Y plane raw and reconstruction data summing (square differences)
+ /* Loop through the Y plane raw and reconstruction data summing
+ * (square differences)
+ */
for (i = 0; i < linestocopy; i += 16)
{
for (j = 0; j < source->y_width; j += 16)
@@ -92,7 +94,7 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
return Total;
}
-// Enforce a minimum filter level based upon baseline Q
+/* Enforce a minimum filter level based upon baseline Q */
static int get_min_filter_level(VP8_COMP *cpi, int base_qindex)
{
int min_filter_level;
@@ -113,14 +115,15 @@ static int get_min_filter_level(VP8_COMP *cpi, int base_qindex)
return min_filter_level;
}
-// Enforce a maximum filter level based upon baseline Q
+/* Enforce a maximum filter level based upon baseline Q */
static int get_max_filter_level(VP8_COMP *cpi, int base_qindex)
{
- // PGW August 2006: Highest filter values almost always a bad idea
+ /* PGW August 2006: Highest filter values almost always a bad idea */
- // jbb chg: 20100118 - not so any more with this overquant stuff allow high values
- // with lots of intra coming in.
- int max_filter_level = MAX_LOOP_FILTER ;//* 3 / 4;
+ /* jbb chg: 20100118 - not so any more with this overquant stuff allow
+ * high values with lots of intra coming in.
+ */
+ int max_filter_level = MAX_LOOP_FILTER;
(void)base_qindex;
if (cpi->twopass.section_intra_rating > 8)
@@ -155,7 +158,9 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
cm->last_sharpness_level = cm->sharpness_level;
}
- // Start the search at the previous frame filter level unless it is now out of range.
+ /* Start the search at the previous frame filter level unless it is
+ * now out of range.
+ */
if (cm->filter_level < min_filter_level)
cm->filter_level = min_filter_level;
else if (cm->filter_level > max_filter_level)
@@ -164,7 +169,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
filt_val = cm->filter_level;
best_filt_val = filt_val;
- // Get the err using the previous frame's filter value.
+ /* Get the err using the previous frame's filter value. */
/* Copy the unfiltered / processed recon buffer to the new buffer */
vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show);
@@ -174,17 +179,17 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
filt_val -= 1 + (filt_val > 10);
- // Search lower filter levels
+ /* Search lower filter levels */
while (filt_val >= min_filter_level)
{
- // Apply the loop filter
+ /* Apply the loop filter */
vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show);
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
- // Get the err for filtered frame
+ /* Get the err for filtered frame */
filt_err = calc_partial_ssl_err(sd, cm->frame_to_show);
- // Update the best case record or exit loop.
+ /* Update the best case record or exit loop. */
if (filt_err < best_err)
{
best_err = filt_err;
@@ -193,32 +198,34 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
else
break;
- // Adjust filter level
+ /* Adjust filter level */
filt_val -= 1 + (filt_val > 10);
}
- // Search up (note that we have already done filt_val = cm->filter_level)
+ /* Search up (note that we have already done filt_val = cm->filter_level) */
filt_val = cm->filter_level + 1 + (filt_val > 10);
if (best_filt_val == cm->filter_level)
{
- // Resist raising filter level for very small gains
+ /* Resist raising filter level for very small gains */
best_err -= (best_err >> 10);
while (filt_val < max_filter_level)
{
- // Apply the loop filter
+ /* Apply the loop filter */
vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show);
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
- // Get the err for filtered frame
+ /* Get the err for filtered frame */
filt_err = calc_partial_ssl_err(sd, cm->frame_to_show);
- // Update the best case record or exit loop.
+ /* Update the best case record or exit loop. */
if (filt_err < best_err)
{
- // Do not raise filter level if improvement is < 1 part in 4096
+ /* Do not raise filter level if improvement is < 1 part
+ * in 4096
+ */
best_err = filt_err - (filt_err >> 10);
best_filt_val = filt_val;
@@ -226,7 +233,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
else
break;
- // Adjust filter level
+ /* Adjust filter level */
filt_val += 1 + (filt_val > 10);
}
}
@@ -243,7 +250,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
cm->frame_to_show = saved_frame;
}
-// Stub function for now Alt LF not used
+/* Stub function for now Alt LF not used */
void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val)
{
MACROBLOCKD *mbd = &cpi->mb.e_mbd;
@@ -266,12 +273,14 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
int filter_step;
int filt_high = 0;
- int filt_mid = cm->filter_level; // Start search at previous frame filter level
+ /* Start search at previous frame filter level */
+ int filt_mid = cm->filter_level;
int filt_low = 0;
int filt_best;
int filt_direction = 0;
- int Bias = 0; // Bias against raising loop filter and in favor of lowering it
+ /* Bias against raising loop filter and in favor of lowering it */
+ int Bias = 0;
int ss_err[MAX_LOOP_FILTER + 1];
@@ -287,7 +296,9 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
else
cm->sharpness_level = cpi->oxcf.Sharpness;
- // Start the search at the previous frame filter level unless it is now out of range.
+ /* Start the search at the previous frame filter level unless it is
+ * now out of range.
+ */
filt_mid = cm->filter_level;
if (filt_mid < min_filter_level)
@@ -295,10 +306,10 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
else if (filt_mid > max_filter_level)
filt_mid = max_filter_level;
- // Define the initial step size
+ /* Define the initial step size */
filter_step = (filt_mid < 16) ? 4 : filt_mid / 4;
- // Get baseline error score
+ /* Get baseline error score */
/* Copy the unfiltered / processed recon buffer to the new buffer */
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
@@ -314,9 +325,8 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
while (filter_step > 0)
{
- Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; //PGW change 12/12/06 for small images
+ Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
- // jbb chg: 20100118 - in sections with lots of new material coming in don't bias as much to a low filter value
if (cpi->twopass.section_intra_rating < 20)
Bias = Bias * cpi->twopass.section_intra_rating / 20;
@@ -327,7 +337,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
{
if(ss_err[filt_low] == 0)
{
- // Get Low filter error score
+ /* Get Low filter error score */
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
vp8cx_set_alt_lf_level(cpi, filt_low);
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);
@@ -338,10 +348,12 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
else
filt_err = ss_err[filt_low];
- // If value is close to the best so far then bias towards a lower loop filter value.
+ /* If value is close to the best so far then bias towards a
+ * lower loop filter value.
+ */
if ((filt_err - Bias) < best_err)
{
- // Was it actually better than the previous best?
+ /* Was it actually better than the previous best? */
if (filt_err < best_err)
best_err = filt_err;
@@ -349,7 +361,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
}
}
- // Now look at filt_high
+ /* Now look at filt_high */
if ((filt_direction >= 0) && (filt_high != filt_mid))
{
if(ss_err[filt_high] == 0)
@@ -364,7 +376,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
else
filt_err = ss_err[filt_high];
- // Was it better than the previous best?
+ /* Was it better than the previous best? */
if (filt_err < (best_err - Bias))
{
best_err = filt_err;
@@ -372,7 +384,9 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
}
}
- // Half the step distance if the best filter value was the same as last time
+ /* Half the step distance if the best filter value was the same
+ * as last time
+ */
if (filt_best == filt_mid)
{
filter_step = filter_step / 2;
diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c
index 5119bb8..5bb49ad 100644
--- a/vp8/encoder/psnr.c
+++ b/vp8/encoder/psnr.c
@@ -22,7 +22,7 @@ double vp8_mse2psnr(double Samples, double Peak, double Mse)
if ((double)Mse > 0.0)
psnr = 10.0 * log10(Peak * Peak * Samples / Mse);
else
- psnr = MAX_PSNR; // Limit to prevent / 0
+ psnr = MAX_PSNR; /* Limit to prevent / 0 */
if (psnr > MAX_PSNR)
psnr = MAX_PSNR;
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 766d2b2..33c8ef0 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -44,21 +44,21 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
z = coeff_ptr[rc];
zbin = zbin_ptr[rc] ;
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
+ sz = (z >> 31); /* sign of z */
+ x = (z ^ sz) - sz; /* x = abs(z) */
if (x >= zbin)
{
x += round_ptr[rc];
y = (((x * quant_ptr[rc]) >> 16) + x)
- >> quant_shift_ptr[rc]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
+ >> quant_shift_ptr[rc]; /* quantize (x) */
+ x = (y ^ sz) - sz; /* get the sign back */
+ qcoeff_ptr[rc] = x; /* write to destination */
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */
if (y)
{
- eob = i; // last nonzero coeffs
+ eob = i; /* last nonzero coeffs */
}
}
}
@@ -84,17 +84,17 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
rc = vp8_default_zig_zag1d[i];
z = coeff_ptr[rc];
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
+ sz = (z >> 31); /* sign of z */
+ x = (z ^ sz) - sz; /* x = abs(z) */
- y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
+ y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */
+ x = (y ^ sz) - sz; /* get the sign back */
+ qcoeff_ptr[rc] = x; /* write to destination */
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */
if (y)
{
- eob = i; // last nonzero coeffs
+ eob = i; /* last nonzero coeffs */
}
}
*d->eob = (char)(eob + 1);
@@ -132,22 +132,22 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
zbin_boost_ptr ++;
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
+ sz = (z >> 31); /* sign of z */
+ x = (z ^ sz) - sz; /* x = abs(z) */
if (x >= zbin)
{
x += round_ptr[rc];
y = (((x * quant_ptr[rc]) >> 16) + x)
- >> quant_shift_ptr[rc]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
+ >> quant_shift_ptr[rc]; /* quantize (x) */
+ x = (y ^ sz) - sz; /* get the sign back */
+ qcoeff_ptr[rc] = x; /* write to destination */
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */
if (y)
{
- eob = i; // last nonzero coeffs
- zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
+ eob = i; /* last nonzero coeffs */
+ zbin_boost_ptr = b->zrun_zbin_boost; /* reset zero runlength */
}
}
}
@@ -240,26 +240,23 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
rc = vp8_default_zig_zag1d[i];
z = coeff_ptr[rc];
- //if ( i == 0 )
- // zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2;
- //else
zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
zbin_boost_ptr ++;
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
+ sz = (z >> 31); /* sign of z */
+ x = (z ^ sz) - sz; /* x = abs(z) */
if (x >= zbin)
{
- y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
+ y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */
+ x = (y ^ sz) - sz; /* get the sign back */
+ qcoeff_ptr[rc] = x; /* write to destination */
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */
if (y)
{
- eob = i; // last nonzero coeffs
- zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength
+ eob = i; /* last nonzero coeffs */
+ zbin_boost_ptr = &b->zrun_zbin_boost[0]; /* reset zrl */
}
}
}
@@ -441,7 +438,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
for (Q = 0; Q < QINDEX_RANGE; Q++)
{
- // dc values
+ /* dc values */
quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
@@ -469,7 +466,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
cpi->common.UVdequant[Q][0] = quant_val;
cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
- // all the ac values = ;
+ /* all the ac values = ; */
quant_val = vp8_ac_yquant(Q);
cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val;
invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1,
@@ -536,7 +533,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
for (Q = 0; Q < QINDEX_RANGE; Q++)
{
- // dc values
+ /* dc values */
quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
@@ -558,7 +555,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
cpi->common.UVdequant[Q][0] = quant_val;
cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
- // all the ac values = ;
+ /* all the ac values = ; */
for (i = 1; i < 16; i++)
{
int rc = vp8_default_zig_zag1d[i];
@@ -590,20 +587,20 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
#define ZBIN_EXTRA_Y \
(( cpi->common.Y1dequant[QIndex][1] * \
- ( cpi->zbin_over_quant + \
- cpi->zbin_mode_boost + \
+ ( x->zbin_over_quant + \
+ x->zbin_mode_boost + \
x->act_zbin_adj ) ) >> 7)
#define ZBIN_EXTRA_UV \
(( cpi->common.UVdequant[QIndex][1] * \
- ( cpi->zbin_over_quant + \
- cpi->zbin_mode_boost + \
+ ( x->zbin_over_quant + \
+ x->zbin_mode_boost + \
x->act_zbin_adj ) ) >> 7)
#define ZBIN_EXTRA_Y2 \
(( cpi->common.Y2dequant[QIndex][1] * \
- ( (cpi->zbin_over_quant / 2) + \
- cpi->zbin_mode_boost + \
+ ( (x->zbin_over_quant / 2) + \
+ x->zbin_mode_boost + \
x->act_zbin_adj ) ) >> 7)
void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
@@ -613,18 +610,18 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
MACROBLOCKD *xd = &x->e_mbd;
int zbin_extra;
- // Select the baseline MB Q index.
+ /* Select the baseline MB Q index. */
if (xd->segmentation_enabled)
{
- // Abs Value
+ /* Abs Value */
if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
-
QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
- // Delta Value
+ /* Delta Value */
else
{
QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
- QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; // Clamp to valid range
+ /* Clamp to valid range */
+ QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;
}
}
else
@@ -657,13 +654,13 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
* This will also require modifications to the x86 and neon assembly.
* */
for (i = 0; i < 16; i++)
- x->e_mbd.block[i].dequant = xd->dequant_y1; //cpi->common.Y1dequant[QIndex];
+ x->e_mbd.block[i].dequant = xd->dequant_y1;
for (i = 16; i < 24; i++)
- x->e_mbd.block[i].dequant = xd->dequant_uv; //cpi->common.UVdequant[QIndex];
- x->e_mbd.block[24].dequant = xd->dequant_y2; //cpi->common.Y2dequant[QIndex];
+ x->e_mbd.block[i].dequant = xd->dequant_uv;
+ x->e_mbd.block[24].dequant = xd->dequant_y2;
#endif
- // Y
+ /* Y */
zbin_extra = ZBIN_EXTRA_Y;
for (i = 0; i < 16; i++)
@@ -677,7 +674,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
x->block[i].zbin_extra = (short)zbin_extra;
}
- // UV
+ /* UV */
zbin_extra = ZBIN_EXTRA_UV;
for (i = 16; i < 24; i++)
@@ -691,7 +688,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
x->block[i].zbin_extra = (short)zbin_extra;
}
- // Y2
+ /* Y2 */
zbin_extra = ZBIN_EXTRA_Y2;
x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
@@ -705,35 +702,35 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
/* save this macroblock QIndex for vp8_update_zbin_extra() */
x->q_index = QIndex;
- cpi->last_zbin_over_quant = cpi->zbin_over_quant;
- cpi->last_zbin_mode_boost = cpi->zbin_mode_boost;
+ x->last_zbin_over_quant = x->zbin_over_quant;
+ x->last_zbin_mode_boost = x->zbin_mode_boost;
x->last_act_zbin_adj = x->act_zbin_adj;
}
- else if(cpi->last_zbin_over_quant != cpi->zbin_over_quant
- || cpi->last_zbin_mode_boost != cpi->zbin_mode_boost
+ else if(x->last_zbin_over_quant != x->zbin_over_quant
+ || x->last_zbin_mode_boost != x->zbin_mode_boost
|| x->last_act_zbin_adj != x->act_zbin_adj)
{
- // Y
+ /* Y */
zbin_extra = ZBIN_EXTRA_Y;
for (i = 0; i < 16; i++)
x->block[i].zbin_extra = (short)zbin_extra;
- // UV
+ /* UV */
zbin_extra = ZBIN_EXTRA_UV;
for (i = 16; i < 24; i++)
x->block[i].zbin_extra = (short)zbin_extra;
- // Y2
+ /* Y2 */
zbin_extra = ZBIN_EXTRA_Y2;
x->block[24].zbin_extra = (short)zbin_extra;
- cpi->last_zbin_over_quant = cpi->zbin_over_quant;
- cpi->last_zbin_mode_boost = cpi->zbin_mode_boost;
+ x->last_zbin_over_quant = x->zbin_over_quant;
+ x->last_zbin_mode_boost = x->zbin_mode_boost;
x->last_act_zbin_adj = x->act_zbin_adj;
}
}
@@ -744,19 +741,19 @@ void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
int QIndex = x->q_index;
int zbin_extra;
- // Y
+ /* Y */
zbin_extra = ZBIN_EXTRA_Y;
for (i = 0; i < 16; i++)
x->block[i].zbin_extra = (short)zbin_extra;
- // UV
+ /* UV */
zbin_extra = ZBIN_EXTRA_UV;
for (i = 16; i < 24; i++)
x->block[i].zbin_extra = (short)zbin_extra;
- // Y2
+ /* Y2 */
zbin_extra = ZBIN_EXTRA_Y2;
x->block[24].zbin_extra = (short)zbin_extra;
}
@@ -766,10 +763,10 @@ void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
{
- // Clear Zbin mode boost for default case
- cpi->zbin_mode_boost = 0;
+ /* Clear Zbin mode boost for default case */
+ cpi->mb.zbin_mode_boost = 0;
- // MB level quantizer setup
+ /* MB level quantizer setup */
vp8cx_mb_init_quantizer(cpi, &cpi->mb, 0);
}
@@ -801,7 +798,7 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
cm->y2dc_delta_q = new_delta_q;
- // Set Segment specific quatizers
+ /* Set Segment specific quatizers */
mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0];
mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1];
mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2];
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 472e85f..a399a38 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -41,15 +41,16 @@ extern int inter_uv_modes[4];
extern int inter_b_modes[10];
#endif
-// Bits Per MB at different Q (Multiplied by 512)
+/* Bits Per MB at different Q (Multiplied by 512) */
#define BPER_MB_NORMBITS 9
-// Work in progress recalibration of baseline rate tables based on
-// the assumption that bits per mb is inversely proportional to the
-// quantizer value.
+/* Work in progress recalibration of baseline rate tables based on
+ * the assumption that bits per mb is inversely proportional to the
+ * quantizer value.
+ */
const int vp8_bits_per_mb[2][QINDEX_RANGE] =
{
- // Intra case 450000/Qintra
+ /* Intra case 450000/Qintra */
{
1125000,900000, 750000, 642857, 562500, 500000, 450000, 450000,
409090, 375000, 346153, 321428, 300000, 281250, 264705, 264705,
@@ -68,7 +69,7 @@ const int vp8_bits_per_mb[2][QINDEX_RANGE] =
36885, 36290, 35714, 35156, 34615, 34090, 33582, 33088,
32608, 32142, 31468, 31034, 30405, 29801, 29220, 28662,
},
- // Inter case 285000/Qinter
+ /* Inter case 285000/Qinter */
{
712500, 570000, 475000, 407142, 356250, 316666, 285000, 259090,
237500, 219230, 203571, 190000, 178125, 167647, 158333, 150000,
@@ -109,7 +110,7 @@ static const int kf_boost_qadjustment[QINDEX_RANGE] =
220, 220, 220, 220, 220, 220, 220, 220,
};
-//#define GFQ_ADJUSTMENT (Q+100)
+/* #define GFQ_ADJUSTMENT (Q+100) */
#define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q]
const int vp8_gf_boost_qadjustment[QINDEX_RANGE] =
{
@@ -173,7 +174,7 @@ static const int kf_gf_boost_qlimits[QINDEX_RANGE] =
600, 600, 600, 600, 600, 600, 600, 600,
};
-// % adjustment to target kf size based on seperation from previous frame
+/* % adjustment to target kf size based on seperation from previous frame */
static const int kf_boost_seperation_adjustment[16] =
{
30, 40, 50, 55, 60, 65, 70, 75,
@@ -224,10 +225,11 @@ void vp8_save_coding_context(VP8_COMP *cpi)
{
CODING_CONTEXT *const cc = & cpi->coding_context;
- // Stores a snapshot of key state variables which can subsequently be
- // restored with a call to vp8_restore_coding_context. These functions are
- // intended for use in a re-code loop in vp8_compress_frame where the
- // quantizer value is adjusted between loop iterations.
+ /* Stores a snapshot of key state variables which can subsequently be
+ * restored with a call to vp8_restore_coding_context. These functions are
+ * intended for use in a re-code loop in vp8_compress_frame where the
+ * quantizer value is adjusted between loop iterations.
+ */
cc->frames_since_key = cpi->frames_since_key;
cc->filter_level = cpi->common.filter_level;
@@ -235,18 +237,16 @@ void vp8_save_coding_context(VP8_COMP *cpi)
cc->frames_since_golden = cpi->common.frames_since_golden;
vp8_copy(cc->mvc, cpi->common.fc.mvc);
- vp8_copy(cc->mvcosts, cpi->mb.mvcosts);
+ vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts);
- vp8_copy(cc->kf_ymode_prob, cpi->common.kf_ymode_prob);
vp8_copy(cc->ymode_prob, cpi->common.fc.ymode_prob);
- vp8_copy(cc->kf_uv_mode_prob, cpi->common.kf_uv_mode_prob);
vp8_copy(cc->uv_mode_prob, cpi->common.fc.uv_mode_prob);
- vp8_copy(cc->ymode_count, cpi->ymode_count);
- vp8_copy(cc->uv_mode_count, cpi->uv_mode_count);
+ vp8_copy(cc->ymode_count, cpi->mb.ymode_count);
+ vp8_copy(cc->uv_mode_count, cpi->mb.uv_mode_count);
- // Stats
+ /* Stats */
#ifdef MODE_STATS
vp8_copy(cc->y_modes, y_modes);
vp8_copy(cc->uv_modes, uv_modes);
@@ -264,8 +264,9 @@ void vp8_restore_coding_context(VP8_COMP *cpi)
{
CODING_CONTEXT *const cc = & cpi->coding_context;
- // Restore key state variables to the snapshot state stored in the
- // previous call to vp8_save_coding_context.
+ /* Restore key state variables to the snapshot state stored in the
+ * previous call to vp8_save_coding_context.
+ */
cpi->frames_since_key = cc->frames_since_key;
cpi->common.filter_level = cc->filter_level;
@@ -274,17 +275,15 @@ void vp8_restore_coding_context(VP8_COMP *cpi)
vp8_copy(cpi->common.fc.mvc, cc->mvc);
- vp8_copy(cpi->mb.mvcosts, cc->mvcosts);
+ vp8_copy(cpi->rd_costs.mvcosts, cc->mvcosts);
- vp8_copy(cpi->common.kf_ymode_prob, cc->kf_ymode_prob);
vp8_copy(cpi->common.fc.ymode_prob, cc->ymode_prob);
- vp8_copy(cpi->common.kf_uv_mode_prob, cc->kf_uv_mode_prob);
vp8_copy(cpi->common.fc.uv_mode_prob, cc->uv_mode_prob);
- vp8_copy(cpi->ymode_count, cc->ymode_count);
- vp8_copy(cpi->uv_mode_count, cc->uv_mode_count);
+ vp8_copy(cpi->mb.ymode_count, cc->ymode_count);
+ vp8_copy(cpi->mb.uv_mode_count, cc->uv_mode_count);
- // Stats
+ /* Stats */
#ifdef MODE_STATS
vp8_copy(y_modes, cc->y_modes);
vp8_copy(uv_modes, cc->uv_modes);
@@ -301,36 +300,30 @@ void vp8_restore_coding_context(VP8_COMP *cpi)
void vp8_setup_key_frame(VP8_COMP *cpi)
{
- // Setup for Key frame:
+ /* Setup for Key frame: */
vp8_default_coef_probs(& cpi->common);
-
- vp8_kf_default_bmode_probs(cpi->common.kf_bmode_prob);
-
vpx_memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
{
int flag[2] = {1, 1};
vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag);
}
- vpx_memset(cpi->common.fc.pre_mvc, 0, sizeof(cpi->common.fc.pre_mvc)); //initialize pre_mvc to all zero.
-
- // Make sure we initialize separate contexts for altref,gold, and normal.
- // TODO shouldn't need 3 different copies of structure to do this!
+ /* Make sure we initialize separate contexts for altref,gold, and normal.
+ * TODO shouldn't need 3 different copies of structure to do this!
+ */
vpx_memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc));
vpx_memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc));
vpx_memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc));
- //cpi->common.filter_level = 0; // Reset every key frame.
cpi->common.filter_level = cpi->common.base_qindex * 3 / 8 ;
- // Provisional interval before next GF
+ /* Provisional interval before next GF */
if (cpi->auto_gold)
- //cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL;
cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
else
- cpi->frames_till_gf_update_due = cpi->goldfreq;
+ cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL;
cpi->common.refresh_golden_frame = 1;
cpi->common.refresh_alt_ref_frame = 1;
@@ -355,12 +348,12 @@ static int estimate_bits_at_q(int frame_kind, int Q, int MBs,
static void calc_iframe_target_size(VP8_COMP *cpi)
{
- // boost defaults to half second
+ /* boost defaults to half second */
int kf_boost;
- int target;
+ uint64_t target;
- // Clear down mmx registers to allow floating point in what follows
- vp8_clear_system_state(); //__asm emms;
+ /* Clear down mmx registers to allow floating point in what follows */
+ vp8_clear_system_state();
if (cpi->oxcf.fixed_q >= 0)
{
@@ -371,10 +364,10 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
}
else if (cpi->pass == 2)
{
- // New Two pass RC
+ /* New Two pass RC */
target = cpi->per_frame_bandwidth;
}
- // First Frame is a special case
+ /* First Frame is a special case */
else if (cpi->common.current_video_frame == 0)
{
/* 1 Pass there is no information on which to base size so use
@@ -388,29 +381,29 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
}
else
{
- // if this keyframe was forced, use a more recent Q estimate
+ /* if this keyframe was forced, use a more recent Q estimate */
int Q = (cpi->common.frame_flags & FRAMEFLAGS_KEY)
? cpi->avg_frame_qindex : cpi->ni_av_qi;
- int initial_boost = 24; // Corresponds to: |2.5 * per_frame_bandwidth|
- // Boost depends somewhat on frame rate: only used for 1 layer case.
+ int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */
+ /* Boost depends somewhat on frame rate: only used for 1 layer case. */
if (cpi->oxcf.number_of_layers == 1) {
kf_boost = MAX(initial_boost, (int)(2 * cpi->output_frame_rate - 16));
}
else {
- // Initial factor: set target size to: |2.5 * per_frame_bandwidth|.
+ /* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */
kf_boost = initial_boost;
}
- // adjustment up based on q: this factor ranges from ~1.2 to 2.2.
+ /* adjustment up based on q: this factor ranges from ~1.2 to 2.2. */
kf_boost = kf_boost * kf_boost_qadjustment[Q] / 100;
- // frame separation adjustment ( down)
+ /* frame separation adjustment ( down) */
if (cpi->frames_since_key < cpi->output_frame_rate / 2)
kf_boost = (int)(kf_boost
* cpi->frames_since_key / (cpi->output_frame_rate / 2));
- // Minimal target size is |2* per_frame_bandwidth|.
+ /* Minimal target size is |2* per_frame_bandwidth|. */
if (kf_boost < 16)
kf_boost = 16;
@@ -427,10 +420,11 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
target = max_rate;
}
- cpi->this_frame_target = target;
+ cpi->this_frame_target = (int)target;
- // TODO: if we separate rate targeting from Q targetting, move this.
- // Reset the active worst quality to the baseline value for key frames.
+ /* TODO: if we separate rate targeting from Q targetting, move this.
+ * Reset the active worst quality to the baseline value for key frames.
+ */
if (cpi->pass != 2)
cpi->active_worst_quality = cpi->worst_quality;
@@ -439,9 +433,6 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
FILE *f;
f = fopen("kf_boost.stt", "a");
- //fprintf(f, " %8d %10d %10d %10d %10d %10d %10d\n",
- // cpi->common.current_video_frame, cpi->target_bandwidth, cpi->frames_to_key, kf_boost_qadjustment[cpi->ni_av_qi], cpi->kf_boost, (cpi->this_frame_target *100 / cpi->per_frame_bandwidth), cpi->this_frame_target );
-
fprintf(f, " %8u %10d %10d %10d\n",
cpi->common.current_video_frame, cpi->gfu_boost, cpi->baseline_gf_interval, cpi->source_alt_ref_pending);
@@ -451,14 +442,15 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
}
-// Do the best we can to define the parameters for the next GF based on what
-// information we have available.
+/* Do the best we can to define the parameters for the next GF based on what
+ * information we have available.
+ */
static void calc_gf_params(VP8_COMP *cpi)
{
int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
int Boost = 0;
- int gf_frame_useage = 0; // Golden frame useage since last GF
+ int gf_frame_useage = 0; /* Golden frame useage since last GF */
int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME] +
cpi->recent_ref_frame_usage[LAST_FRAME] +
cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
@@ -466,33 +458,30 @@ static void calc_gf_params(VP8_COMP *cpi)
int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
- // Reset the last boost indicator
- //cpi->last_boost = 100;
-
if (tot_mbs)
gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]) * 100 / tot_mbs;
if (pct_gf_active > gf_frame_useage)
gf_frame_useage = pct_gf_active;
- // Not two pass
+ /* Not two pass */
if (cpi->pass != 2)
{
- // Single Pass lagged mode: TBD
+ /* Single Pass lagged mode: TBD */
if (0)
{
}
- // Single Pass compression: Has to use current and historical data
+ /* Single Pass compression: Has to use current and historical data */
else
{
#if 0
- // Experimental code
+ /* Experimental code */
int index = cpi->one_pass_frame_index;
int frames_to_scan = (cpi->max_gf_interval <= MAX_LAG_BUFFERS) ? cpi->max_gf_interval : MAX_LAG_BUFFERS;
+ /* ************** Experimental code - incomplete */
/*
- // *************** Experimental code - incomplete
double decay_val = 1.0;
double IIAccumulator = 0.0;
double last_iiaccumulator = 0.0;
@@ -535,48 +524,51 @@ static void calc_gf_params(VP8_COMP *cpi)
#else
/*************************************************************/
- // OLD code
+ /* OLD code */
- // Adjust boost based upon ambient Q
+ /* Adjust boost based upon ambient Q */
Boost = GFQ_ADJUSTMENT;
- // Adjust based upon most recently measure intra useage
+ /* Adjust based upon most recently measure intra useage */
Boost = Boost * gf_intra_usage_adjustment[(cpi->this_frame_percent_intra < 15) ? cpi->this_frame_percent_intra : 14] / 100;
- // Adjust gf boost based upon GF usage since last GF
+ /* Adjust gf boost based upon GF usage since last GF */
Boost = Boost * gf_adjust_table[gf_frame_useage] / 100;
#endif
}
- // golden frame boost without recode loop often goes awry. be safe by keeping numbers down.
+ /* golden frame boost without recode loop often goes awry. be
+ * safe by keeping numbers down.
+ */
if (!cpi->sf.recode_loop)
{
if (cpi->compressor_speed == 2)
Boost = Boost / 2;
}
- // Apply an upper limit based on Q for 1 pass encodes
+ /* Apply an upper limit based on Q for 1 pass encodes */
if (Boost > kf_gf_boost_qlimits[Q] && (cpi->pass == 0))
Boost = kf_gf_boost_qlimits[Q];
- // Apply lower limits to boost.
+ /* Apply lower limits to boost. */
else if (Boost < 110)
Boost = 110;
- // Note the boost used
+ /* Note the boost used */
cpi->last_boost = Boost;
}
- // Estimate next interval
- // This is updated once the real frame size/boost is known.
+ /* Estimate next interval
+ * This is updated once the real frame size/boost is known.
+ */
if (cpi->oxcf.fixed_q == -1)
{
- if (cpi->pass == 2) // 2 Pass
+ if (cpi->pass == 2) /* 2 Pass */
{
cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
}
- else // 1 Pass
+ else /* 1 Pass */
{
cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
@@ -602,10 +594,10 @@ static void calc_gf_params(VP8_COMP *cpi)
else
cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
- // ARF on or off
+ /* ARF on or off */
if (cpi->pass != 2)
{
- // For now Alt ref is not allowed except in 2 pass modes.
+ /* For now Alt ref is not allowed except in 2 pass modes. */
cpi->source_alt_ref_pending = 0;
/*if ( cpi->oxcf.fixed_q == -1)
@@ -642,89 +634,34 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
min_frame_target = cpi->per_frame_bandwidth / 4;
- // Special alt reference frame case
+ /* Special alt reference frame case */
if((cpi->common.refresh_alt_ref_frame) && (cpi->oxcf.number_of_layers == 1))
{
if (cpi->pass == 2)
{
- cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame
+ /* Per frame bit target for the alt ref frame */
+ cpi->per_frame_bandwidth = cpi->twopass.gf_bits;
cpi->this_frame_target = cpi->per_frame_bandwidth;
}
/* One Pass ??? TBD */
- /*else
- {
- int frames_in_section;
- int allocation_chunks;
- int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
- int alt_boost;
- int max_arf_rate;
-
- alt_boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100);
- alt_boost += (cpi->frames_till_gf_update_due * 50);
-
- // If alt ref is not currently active then we have a pottential double hit with GF and ARF so reduce the boost a bit.
- // A similar thing is done on GFs that preceed a arf update.
- if ( !cpi->source_alt_ref_active )
- alt_boost = alt_boost * 3 / 4;
-
- frames_in_section = cpi->frames_till_gf_update_due+1; // Standard frames + GF
- allocation_chunks = (frames_in_section * 100) + alt_boost;
-
- // Normalize Altboost and allocations chunck down to prevent overflow
- while ( alt_boost > 1000 )
- {
- alt_boost /= 2;
- allocation_chunks /= 2;
- }
-
- else
- {
- int bits_in_section;
-
- if ( cpi->kf_overspend_bits > 0 )
- {
- Adjustment = (cpi->kf_bitrate_adjustment <= cpi->kf_overspend_bits) ? cpi->kf_bitrate_adjustment : cpi->kf_overspend_bits;
-
- if ( Adjustment > (cpi->per_frame_bandwidth - min_frame_target) )
- Adjustment = (cpi->per_frame_bandwidth - min_frame_target);
-
- cpi->kf_overspend_bits -= Adjustment;
-
- // Calculate an inter frame bandwidth target for the next few frames designed to recover
- // any extra bits spent on the key frame.
- cpi->inter_frame_target = cpi->per_frame_bandwidth - Adjustment;
- if ( cpi->inter_frame_target < min_frame_target )
- cpi->inter_frame_target = min_frame_target;
- }
- else
- cpi->inter_frame_target = cpi->per_frame_bandwidth;
-
- bits_in_section = cpi->inter_frame_target * frames_in_section;
-
- // Avoid loss of precision but avoid overflow
- if ( (bits_in_section>>7) > allocation_chunks )
- cpi->this_frame_target = alt_boost * (bits_in_section / allocation_chunks);
- else
- cpi->this_frame_target = (alt_boost * bits_in_section) / allocation_chunks;
- }
- }
- */
}
- // Normal frames (gf,and inter)
+ /* Normal frames (gf,and inter) */
else
{
- // 2 pass
+ /* 2 pass */
if (cpi->pass == 2)
{
cpi->this_frame_target = cpi->per_frame_bandwidth;
}
- // 1 pass
+ /* 1 pass */
else
{
- // Make rate adjustment to recover bits spent in key frame
- // Test to see if the key frame inter data rate correction should still be in force
+ /* Make rate adjustment to recover bits spent in key frame
+ * Test to see if the key frame inter data rate correction
+ * should still be in force
+ */
if (cpi->kf_overspend_bits > 0)
{
Adjustment = (cpi->kf_bitrate_adjustment <= cpi->kf_overspend_bits) ? cpi->kf_bitrate_adjustment : cpi->kf_overspend_bits;
@@ -734,8 +671,10 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
cpi->kf_overspend_bits -= Adjustment;
- // Calculate an inter frame bandwidth target for the next few frames designed to recover
- // any extra bits spent on the key frame.
+ /* Calculate an inter frame bandwidth target for the next
+ * few frames designed to recover any extra bits spent on
+ * the key frame.
+ */
cpi->this_frame_target = cpi->per_frame_bandwidth - Adjustment;
if (cpi->this_frame_target < min_frame_target)
@@ -744,7 +683,9 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
else
cpi->this_frame_target = cpi->per_frame_bandwidth;
- // If appropriate make an adjustment to recover bits spent on a recent GF
+ /* If appropriate make an adjustment to recover bits spent on a
+ * recent GF
+ */
if ((cpi->gf_overspend_bits > 0) && (cpi->this_frame_target > min_frame_target))
{
int Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits;
@@ -756,11 +697,11 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
cpi->this_frame_target -= Adjustment;
}
- // Apply small + and - boosts for non gf frames
+ /* Apply small + and - boosts for non gf frames */
if ((cpi->last_boost > 150) && (cpi->frames_till_gf_update_due > 0) &&
(cpi->current_gf_interval >= (MIN_GF_INTERVAL << 1)))
{
- // % Adjustment limited to the range 1% to 10%
+ /* % Adjustment limited to the range 1% to 10% */
Adjustment = (cpi->last_boost - 100) >> 5;
if (Adjustment < 1)
@@ -768,7 +709,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
else if (Adjustment > 10)
Adjustment = 10;
- // Convert to bits
+ /* Convert to bits */
Adjustment = (cpi->this_frame_target * Adjustment) / 100;
if (Adjustment > (cpi->this_frame_target - min_frame_target))
@@ -782,47 +723,53 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
}
}
- // Sanity check that the total sum of adjustments is not above the maximum allowed
- // That is that having allowed for KF and GF penalties we have not pushed the
- // current interframe target to low. If the adjustment we apply here is not capable of recovering
- // all the extra bits we have spent in the KF or GF then the remainder will have to be recovered over
- // a longer time span via other buffer / rate control mechanisms.
+ /* Sanity check that the total sum of adjustments is not above the
+ * maximum allowed That is that having allowed for KF and GF penalties
+ * we have not pushed the current interframe target to low. If the
+ * adjustment we apply here is not capable of recovering all the extra
+ * bits we have spent in the KF or GF then the remainder will have to
+ * be recovered over a longer time span via other buffer / rate control
+ * mechanisms.
+ */
if (cpi->this_frame_target < min_frame_target)
cpi->this_frame_target = min_frame_target;
if (!cpi->common.refresh_alt_ref_frame)
- // Note the baseline target data rate for this inter frame.
+ /* Note the baseline target data rate for this inter frame. */
cpi->inter_frame_target = cpi->this_frame_target;
- // One Pass specific code
+ /* One Pass specific code */
if (cpi->pass == 0)
{
- // Adapt target frame size with respect to any buffering constraints:
+ /* Adapt target frame size with respect to any buffering constraints: */
if (cpi->buffered_mode)
{
- int one_percent_bits = 1 + cpi->oxcf.optimal_buffer_level / 100;
+ int one_percent_bits = (int)
+ (1 + cpi->oxcf.optimal_buffer_level / 100);
if ((cpi->buffer_level < cpi->oxcf.optimal_buffer_level) ||
(cpi->bits_off_target < cpi->oxcf.optimal_buffer_level))
{
int percent_low = 0;
- // Decide whether or not we need to adjust the frame data rate target.
- //
- // If we are are below the optimal buffer fullness level and adherence
- // to buffering constraints is important to the end usage then adjust
- // the per frame target.
+ /* Decide whether or not we need to adjust the frame data
+ * rate target.
+ *
+ * If we are are below the optimal buffer fullness level
+ * and adherence to buffering constraints is important to
+ * the end usage then adjust the per frame target.
+ */
if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
(cpi->buffer_level < cpi->oxcf.optimal_buffer_level))
{
- percent_low =
- (cpi->oxcf.optimal_buffer_level - cpi->buffer_level) /
- one_percent_bits;
+ percent_low = (int)
+ ((cpi->oxcf.optimal_buffer_level - cpi->buffer_level) /
+ one_percent_bits);
}
- // Are we overshooting the long term clip data rate...
+ /* Are we overshooting the long term clip data rate... */
else if (cpi->bits_off_target < 0)
{
- // Adjust per frame data target downwards to compensate.
+ /* Adjust per frame data target downwards to compensate. */
percent_low = (int)(100 * -cpi->bits_off_target /
(cpi->total_byte_count * 8));
}
@@ -832,40 +779,46 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
else if (percent_low < 0)
percent_low = 0;
- // lower the target bandwidth for this frame.
+ /* lower the target bandwidth for this frame. */
cpi->this_frame_target -=
(cpi->this_frame_target * percent_low) / 200;
- // Are we using allowing control of active_worst_allowed_q
- // according to buffer level.
+ /* Are we using allowing control of active_worst_allowed_q
+ * according to buffer level.
+ */
if (cpi->auto_worst_q && cpi->ni_frames > 150)
{
- int critical_buffer_level;
-
- // For streaming applications the most important factor is
- // cpi->buffer_level as this takes into account the
- // specified short term buffering constraints. However,
- // hitting the long term clip data rate target is also
- // important.
+ int64_t critical_buffer_level;
+
+ /* For streaming applications the most important factor is
+ * cpi->buffer_level as this takes into account the
+ * specified short term buffering constraints. However,
+ * hitting the long term clip data rate target is also
+ * important.
+ */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
- // Take the smaller of cpi->buffer_level and
- // cpi->bits_off_target
+ /* Take the smaller of cpi->buffer_level and
+ * cpi->bits_off_target
+ */
critical_buffer_level =
(cpi->buffer_level < cpi->bits_off_target)
? cpi->buffer_level : cpi->bits_off_target;
}
- // For local file playback short term buffering constraints
- // are less of an issue
+ /* For local file playback short term buffering constraints
+ * are less of an issue
+ */
else
{
- // Consider only how we are doing for the clip as a
- // whole
+ /* Consider only how we are doing for the clip as a
+ * whole
+ */
critical_buffer_level = cpi->bits_off_target;
}
- // Set the active worst quality based upon the selected
- // buffer fullness number.
+ /* Set the active worst quality based upon the selected
+ * buffer fullness number.
+ */
if (critical_buffer_level < cpi->oxcf.optimal_buffer_level)
{
if ( critical_buffer_level >
@@ -877,15 +830,16 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
(critical_buffer_level -
(cpi->oxcf.optimal_buffer_level >> 2));
- // Step active worst quality down from
- // cpi->ni_av_qi when (critical_buffer_level ==
- // cpi->optimal_buffer_level) to
- // cpi->worst_quality when
- // (critical_buffer_level ==
- // cpi->optimal_buffer_level >> 2)
+ /* Step active worst quality down from
+ * cpi->ni_av_qi when (critical_buffer_level ==
+ * cpi->optimal_buffer_level) to
+ * cpi->worst_quality when
+ * (critical_buffer_level ==
+ * cpi->optimal_buffer_level >> 2)
+ */
cpi->active_worst_quality =
cpi->worst_quality -
- ((qadjustment_range * above_base) /
+ (int)((qadjustment_range * above_base) /
(cpi->oxcf.optimal_buffer_level*3>>2));
}
else
@@ -910,9 +864,9 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
&& (cpi->buffer_level > cpi->oxcf.optimal_buffer_level))
{
- percent_high = (cpi->buffer_level
+ percent_high = (int)((cpi->buffer_level
- cpi->oxcf.optimal_buffer_level)
- / one_percent_bits;
+ / one_percent_bits);
}
else if (cpi->bits_off_target > cpi->oxcf.optimal_buffer_level)
{
@@ -928,11 +882,14 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
cpi->this_frame_target += (cpi->this_frame_target *
percent_high) / 200;
- // Are we allowing control of active_worst_allowed_q according
- // to buffer level.
+ /* Are we allowing control of active_worst_allowed_q according
+ * to buffer level.
+ */
if (cpi->auto_worst_q && cpi->ni_frames > 150)
{
- // When using the relaxed buffer model stick to the user specified value
+ /* When using the relaxed buffer model stick to the
+ * user specified value
+ */
cpi->active_worst_quality = cpi->ni_av_qi;
}
else
@@ -941,26 +898,27 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
}
}
- // Set active_best_quality to prevent quality rising too high
+ /* Set active_best_quality to prevent quality rising too high */
cpi->active_best_quality = cpi->best_quality;
- // Worst quality obviously must not be better than best quality
+ /* Worst quality obviously must not be better than best quality */
if (cpi->active_worst_quality <= cpi->active_best_quality)
cpi->active_worst_quality = cpi->active_best_quality + 1;
if(cpi->active_worst_quality > 127)
cpi->active_worst_quality = 127;
}
- // Unbuffered mode (eg. video conferencing)
+ /* Unbuffered mode (eg. video conferencing) */
else
{
- // Set the active worst quality
+ /* Set the active worst quality */
cpi->active_worst_quality = cpi->worst_quality;
}
- // Special trap for constrained quality mode
- // "active_worst_quality" may never drop below cq level
- // for any frame type.
+ /* Special trap for constrained quality mode
+ * "active_worst_quality" may never drop below cq level
+ * for any frame type.
+ */
if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
cpi->active_worst_quality < cpi->cq_target_quality)
{
@@ -968,16 +926,19 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
}
}
- // Test to see if we have to drop a frame
- // The auto-drop frame code is only used in buffered mode.
- // In unbufferd mode (eg vide conferencing) the descision to
- // code or drop a frame is made outside the codec in response to real
- // world comms or buffer considerations.
- if (cpi->drop_frames_allowed && cpi->buffered_mode &&
+ /* Test to see if we have to drop a frame
+ * The auto-drop frame code is only used in buffered mode.
+ * In unbufferd mode (eg vide conferencing) the descision to
+ * code or drop a frame is made outside the codec in response to real
+ * world comms or buffer considerations.
+ */
+ if (cpi->drop_frames_allowed &&
(cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
- ((cpi->common.frame_type != KEY_FRAME))) //|| !cpi->oxcf.allow_spatial_resampling) )
+ ((cpi->common.frame_type != KEY_FRAME)))
{
- // Check for a buffer underun-crisis in which case we have to drop a frame
+ /* Check for a buffer underun-crisis in which case we have to drop
+ * a frame
+ */
if ((cpi->buffer_level < 0))
{
#if 0
@@ -988,41 +949,23 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
(cpi->buffer_level * 100) / cpi->oxcf.optimal_buffer_level);
fclose(f);
#endif
- //vpx_log("Decoder: Drop frame due to bandwidth: %d \n",cpi->buffer_level, cpi->av_per_frame_bandwidth);
-
- cpi->drop_frame = 1;
- }
-
-#if 0
- // Check for other drop frame crtieria (Note 2 pass cbr uses decimation on whole KF sections)
- else if ((cpi->buffer_level < cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100) &&
- (cpi->drop_count < cpi->max_drop_count) && (cpi->pass == 0))
- {
cpi->drop_frame = 1;
- }
-
-#endif
- if (cpi->drop_frame)
- {
- // Update the buffer level variable.
+ /* Update the buffer level variable. */
cpi->bits_off_target += cpi->av_per_frame_bandwidth;
if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
- cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
+ cpi->bits_off_target = (int)cpi->oxcf.maximum_buffer_size;
cpi->buffer_level = cpi->bits_off_target;
}
- else
- cpi->drop_count = 0;
}
- // Adjust target frame size for Golden Frames:
+ /* Adjust target frame size for Golden Frames: */
if (cpi->oxcf.error_resilient_mode == 0 &&
(cpi->frames_till_gf_update_due == 0) && !cpi->drop_frame)
{
- //int Boost = 0;
int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
- int gf_frame_useage = 0; // Golden frame useage since last GF
+ int gf_frame_useage = 0; /* Golden frame useage since last GF */
int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME] +
cpi->recent_ref_frame_usage[LAST_FRAME] +
cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
@@ -1030,30 +973,29 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
- // Reset the last boost indicator
- //cpi->last_boost = 100;
-
if (tot_mbs)
gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]) * 100 / tot_mbs;
if (pct_gf_active > gf_frame_useage)
gf_frame_useage = pct_gf_active;
- // Is a fixed manual GF frequency being used
+ /* Is a fixed manual GF frequency being used */
if (cpi->auto_gold)
{
- // For one pass throw a GF if recent frame intra useage is low or the GF useage is high
+ /* For one pass throw a GF if recent frame intra useage is
+ * low or the GF useage is high
+ */
if ((cpi->pass == 0) && (cpi->this_frame_percent_intra < 15 || gf_frame_useage >= 5))
cpi->common.refresh_golden_frame = 1;
- // Two pass GF descision
+ /* Two pass GF descision */
else if (cpi->pass == 2)
cpi->common.refresh_golden_frame = 1;
}
#if 0
- // Debug stats
+ /* Debug stats */
if (0)
{
FILE *f;
@@ -1070,7 +1012,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
{
#if 0
- if (0) // p_gw
+ if (0)
{
FILE *f;
@@ -1086,16 +1028,20 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
calc_gf_params(cpi);
}
- // If we are using alternate ref instead of gf then do not apply the boost
- // It will instead be applied to the altref update
- // Jims modified boost
+ /* If we are using alternate ref instead of gf then do not apply the
+ * boost It will instead be applied to the altref update Jims
+ * modified boost
+ */
if (!cpi->source_alt_ref_active)
{
if (cpi->oxcf.fixed_q < 0)
{
if (cpi->pass == 2)
{
- cpi->this_frame_target = cpi->per_frame_bandwidth; // The spend on the GF is defined in the two pass code for two pass encodes
+ /* The spend on the GF is defined in the two pass
+ * code for two pass encodes
+ */
+ cpi->this_frame_target = cpi->per_frame_bandwidth;
}
else
{
@@ -1104,14 +1050,16 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
int allocation_chunks = (frames_in_section * 100) + (Boost - 100);
int bits_in_section = cpi->inter_frame_target * frames_in_section;
- // Normalize Altboost and allocations chunck down to prevent overflow
+ /* Normalize Altboost and allocations chunck down to
+ * prevent overflow
+ */
while (Boost > 1000)
{
Boost /= 2;
allocation_chunks /= 2;
}
- // Avoid loss of precision but avoid overflow
+ /* Avoid loss of precision but avoid overflow */
if ((bits_in_section >> 7) > allocation_chunks)
cpi->this_frame_target = Boost * (bits_in_section / allocation_chunks);
else
@@ -1124,10 +1072,11 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
* cpi->last_boost) / 100;
}
- // If there is an active ARF at this location use the minimum
- // bits on this frame even if it is a contructed arf.
- // The active maximum quantizer insures that an appropriate
- // number of bits will be spent if needed for contstructed ARFs.
+ /* If there is an active ARF at this location use the minimum
+ * bits on this frame even if it is a contructed arf.
+ * The active maximum quantizer insures that an appropriate
+ * number of bits will be spent if needed for contstructed ARFs.
+ */
else
{
cpi->this_frame_target = 0;
@@ -1151,8 +1100,8 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var)
int projected_size_based_on_q = 0;
- // Clear down mmx registers to allow floating point in what follows
- vp8_clear_system_state(); //__asm emms;
+ /* Clear down mmx registers to allow floating point in what follows */
+ vp8_clear_system_state();
if (cpi->common.frame_type == KEY_FRAME)
{
@@ -1160,23 +1109,26 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var)
}
else
{
- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
+ if (cpi->oxcf.number_of_layers == 1 &&
+ (cpi->common.refresh_alt_ref_frame ||
+ cpi->common.refresh_golden_frame))
rate_correction_factor = cpi->gf_rate_correction_factor;
else
rate_correction_factor = cpi->rate_correction_factor;
}
- // Work out how big we would have expected the frame to be at this Q given the current correction factor.
- // Stay in double to avoid int overflow when values are large
- //projected_size_based_on_q = ((int)(.5 + rate_correction_factor * vp8_bits_per_mb[cpi->common.frame_type][Q]) * cpi->common.MBs) >> BPER_MB_NORMBITS;
+ /* Work out how big we would have expected the frame to be at this Q
+ * given the current correction factor. Stay in double to avoid int
+ * overflow when values are large
+ */
projected_size_based_on_q = (int)(((.5 + rate_correction_factor * vp8_bits_per_mb[cpi->common.frame_type][Q]) * cpi->common.MBs) / (1 << BPER_MB_NORMBITS));
- // Make some allowance for cpi->zbin_over_quant
- if (cpi->zbin_over_quant > 0)
+ /* Make some allowance for cpi->zbin_over_quant */
+ if (cpi->mb.zbin_over_quant > 0)
{
- int Z = cpi->zbin_over_quant;
+ int Z = cpi->mb.zbin_over_quant;
double Factor = 0.99;
- double factor_adjustment = 0.01 / 256.0; //(double)ZBIN_OQ_MAX;
+ double factor_adjustment = 0.01 / 256.0;
while (Z > 0)
{
@@ -1190,13 +1142,13 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var)
}
}
- // Work out a size correction factor.
- //if ( cpi->this_frame_target > 0 )
- // correction_factor = (100 * cpi->projected_frame_size) / cpi->this_frame_target;
+ /* Work out a size correction factor. */
if (projected_size_based_on_q > 0)
correction_factor = (100 * cpi->projected_frame_size) / projected_size_based_on_q;
- // More heavily damped adjustment used if we have been oscillating either side of target
+ /* More heavily damped adjustment used if we have been oscillating
+ * either side of target
+ */
switch (damp_var)
{
case 0:
@@ -1211,25 +1163,23 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var)
break;
}
- //if ( (correction_factor > 102) && (Q < cpi->active_worst_quality) )
if (correction_factor > 102)
{
- // We are not already at the worst allowable quality
+ /* We are not already at the worst allowable quality */
correction_factor = (int)(100.5 + ((correction_factor - 100) * adjustment_limit));
rate_correction_factor = ((rate_correction_factor * correction_factor) / 100);
- // Keep rate_correction_factor within limits
+ /* Keep rate_correction_factor within limits */
if (rate_correction_factor > MAX_BPB_FACTOR)
rate_correction_factor = MAX_BPB_FACTOR;
}
- //else if ( (correction_factor < 99) && (Q > cpi->active_best_quality) )
else if (correction_factor < 99)
{
- // We are not already at the best allowable quality
+ /* We are not already at the best allowable quality */
correction_factor = (int)(100.5 - ((100 - correction_factor) * adjustment_limit));
rate_correction_factor = ((rate_correction_factor * correction_factor) / 100);
- // Keep rate_correction_factor within limits
+ /* Keep rate_correction_factor within limits */
if (rate_correction_factor < MIN_BPB_FACTOR)
rate_correction_factor = MIN_BPB_FACTOR;
}
@@ -1238,7 +1188,9 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var)
cpi->key_frame_rate_correction_factor = rate_correction_factor;
else
{
- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
+ if (cpi->oxcf.number_of_layers == 1 &&
+ (cpi->common.refresh_alt_ref_frame ||
+ cpi->common.refresh_golden_frame))
cpi->gf_rate_correction_factor = rate_correction_factor;
else
cpi->rate_correction_factor = rate_correction_factor;
@@ -1250,8 +1202,8 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
{
int Q = cpi->active_worst_quality;
- // Reset Zbin OQ value
- cpi->zbin_over_quant = 0;
+ /* Reset Zbin OQ value */
+ cpi->mb.zbin_over_quant = 0;
if (cpi->oxcf.fixed_q >= 0)
{
@@ -1261,11 +1213,13 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
{
Q = cpi->oxcf.key_q;
}
- else if (cpi->common.refresh_alt_ref_frame)
+ else if (cpi->oxcf.number_of_layers == 1 &&
+ cpi->common.refresh_alt_ref_frame)
{
Q = cpi->oxcf.alt_q;
}
- else if (cpi->common.refresh_golden_frame)
+ else if (cpi->oxcf.number_of_layers == 1 &&
+ cpi->common.refresh_golden_frame)
{
Q = cpi->oxcf.gold_q;
}
@@ -1279,20 +1233,25 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
int bits_per_mb_at_this_q;
double correction_factor;
- // Select the appropriate correction factor based upon type of frame.
+ /* Select the appropriate correction factor based upon type of frame. */
if (cpi->common.frame_type == KEY_FRAME)
correction_factor = cpi->key_frame_rate_correction_factor;
else
{
- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
+ if (cpi->oxcf.number_of_layers == 1 &&
+ (cpi->common.refresh_alt_ref_frame ||
+ cpi->common.refresh_golden_frame))
correction_factor = cpi->gf_rate_correction_factor;
else
correction_factor = cpi->rate_correction_factor;
}
- // Calculate required scaling factor based on target frame size and size of frame produced using previous Q
+ /* Calculate required scaling factor based on target frame size and
+ * size of frame produced using previous Q
+ */
if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS))
- target_bits_per_mb = (target_bits_per_frame / cpi->common.MBs) << BPER_MB_NORMBITS; // Case where we would overflow int
+ /* Case where we would overflow int */
+ target_bits_per_mb = (target_bits_per_frame / cpi->common.MBs) << BPER_MB_NORMBITS;
else
target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cpi->common.MBs;
@@ -1317,18 +1276,23 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
while (++i <= cpi->active_worst_quality);
- // If we are at MAXQ then enable Q over-run which seeks to claw back additional bits through things like
- // the RD multiplier and zero bin size.
+ /* If we are at MAXQ then enable Q over-run which seeks to claw
+ * back additional bits through things like the RD multiplier
+ * and zero bin size.
+ */
if (Q >= MAXQ)
{
int zbin_oqmax;
double Factor = 0.99;
- double factor_adjustment = 0.01 / 256.0; //(double)ZBIN_OQ_MAX;
+ double factor_adjustment = 0.01 / 256.0;
if (cpi->common.frame_type == KEY_FRAME)
- zbin_oqmax = 0; //ZBIN_OQ_MAX/16
- else if (cpi->common.refresh_alt_ref_frame || (cpi->common.refresh_golden_frame && !cpi->source_alt_ref_active))
+ zbin_oqmax = 0;
+ else if (cpi->oxcf.number_of_layers == 1 &&
+ (cpi->common.refresh_alt_ref_frame ||
+ (cpi->common.refresh_golden_frame &&
+ !cpi->source_alt_ref_active)))
zbin_oqmax = 16;
else
zbin_oqmax = ZBIN_OQ_MAX;
@@ -1347,25 +1311,29 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
cpi->zbin_over_quant = (int)Oq;
}*/
- // Each incrment in the zbin is assumed to have a fixed effect on bitrate. This is not of course true.
- // The effect will be highly clip dependent and may well have sudden steps.
- // The idea here is to acheive higher effective quantizers than the normal maximum by expanding the zero
- // bin and hence decreasing the number of low magnitude non zero coefficients.
- while (cpi->zbin_over_quant < zbin_oqmax)
+ /* Each incrment in the zbin is assumed to have a fixed effect
+ * on bitrate. This is not of course true. The effect will be
+ * highly clip dependent and may well have sudden steps. The
+ * idea here is to acheive higher effective quantizers than the
+ * normal maximum by expanding the zero bin and hence
+ * decreasing the number of low magnitude non zero coefficients.
+ */
+ while (cpi->mb.zbin_over_quant < zbin_oqmax)
{
- cpi->zbin_over_quant ++;
+ cpi->mb.zbin_over_quant ++;
- if (cpi->zbin_over_quant > zbin_oqmax)
- cpi->zbin_over_quant = zbin_oqmax;
+ if (cpi->mb.zbin_over_quant > zbin_oqmax)
+ cpi->mb.zbin_over_quant = zbin_oqmax;
- // Adjust bits_per_mb_at_this_q estimate
+ /* Adjust bits_per_mb_at_this_q estimate */
bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q);
Factor += factor_adjustment;
if (Factor >= 0.999)
Factor = 0.999;
- if (bits_per_mb_at_this_q <= target_bits_per_mb) // Break out if we get down to the target rate
+ /* Break out if we get down to the target rate */
+ if (bits_per_mb_at_this_q <= target_bits_per_mb)
break;
}
@@ -1380,7 +1348,7 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi)
{
int i;
- // Average key frame frequency
+ /* Average key frame frequency */
int av_key_frame_frequency = 0;
/* First key frame at start of sequence is a special case. We have no
@@ -1431,11 +1399,11 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi)
void vp8_adjust_key_frame_context(VP8_COMP *cpi)
{
- // Clear down mmx registers to allow floating point in what follows
+ /* Clear down mmx registers to allow floating point in what follows */
vp8_clear_system_state();
- // Do we have any key frame overspend to recover?
- // Two-pass overspend handled elsewhere.
+ /* Do we have any key frame overspend to recover? */
+ /* Two-pass overspend handled elsewhere. */
if ((cpi->pass != 2)
&& (cpi->projected_frame_size > cpi->per_frame_bandwidth))
{
@@ -1469,10 +1437,12 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi)
void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit)
{
- // Set-up bounds on acceptable frame size:
+ /* Set-up bounds on acceptable frame size: */
if (cpi->oxcf.fixed_q >= 0)
{
- // Fixed Q scenario: frame size never outranges target (there is no target!)
+ /* Fixed Q scenario: frame size never outranges target
+ * (there is no target!)
+ */
*frame_under_shoot_limit = 0;
*frame_over_shoot_limit = INT_MAX;
}
@@ -1494,18 +1464,22 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit,
}
else
{
- // For CBR take buffer fullness into account
+ /* For CBR take buffer fullness into account */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
if (cpi->buffer_level >= ((cpi->oxcf.optimal_buffer_level + cpi->oxcf.maximum_buffer_size) >> 1))
{
- // Buffer is too full so relax overshoot and tighten undershoot
+ /* Buffer is too full so relax overshoot and tighten
+ * undershoot
+ */
*frame_over_shoot_limit = cpi->this_frame_target * 12 / 8;
*frame_under_shoot_limit = cpi->this_frame_target * 6 / 8;
}
else if (cpi->buffer_level <= (cpi->oxcf.optimal_buffer_level >> 1))
{
- // Buffer is too low so relax undershoot and tighten overshoot
+ /* Buffer is too low so relax undershoot and tighten
+ * overshoot
+ */
*frame_over_shoot_limit = cpi->this_frame_target * 10 / 8;
*frame_under_shoot_limit = cpi->this_frame_target * 4 / 8;
}
@@ -1515,11 +1489,13 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit,
*frame_under_shoot_limit = cpi->this_frame_target * 5 / 8;
}
}
- // VBR and CQ mode
- // Note that tighter restrictions here can help quality but hurt encode speed
+ /* VBR and CQ mode */
+ /* Note that tighter restrictions here can help quality
+ * but hurt encode speed
+ */
else
{
- // Stron overshoot limit for constrained quality
+ /* Stron overshoot limit for constrained quality */
if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
{
*frame_over_shoot_limit = cpi->this_frame_target * 11 / 8;
@@ -1534,9 +1510,10 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit,
}
}
- // For very small rate targets where the fractional adjustment
- // (eg * 7/8) may be tiny make sure there is at least a minimum
- // range.
+ /* For very small rate targets where the fractional adjustment
+ * (eg * 7/8) may be tiny make sure there is at least a minimum
+ * range.
+ */
*frame_over_shoot_limit += 200;
*frame_under_shoot_limit -= 200;
if ( *frame_under_shoot_limit < 0 )
@@ -1546,7 +1523,7 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit,
}
-// return of 0 means drop frame
+/* return of 0 means drop frame */
int vp8_pick_frame_size(VP8_COMP *cpi)
{
VP8_COMMON *cm = &cpi->common;
@@ -1557,11 +1534,10 @@ int vp8_pick_frame_size(VP8_COMP *cpi)
{
calc_pframe_target_size(cpi);
- // Check if we're dropping the frame:
+ /* Check if we're dropping the frame: */
if (cpi->drop_frame)
{
cpi->drop_frame = 0;
- cpi->drop_count++;
return 0;
}
}
diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h
index d4f7796..c43f08d 100644
--- a/vp8/encoder/ratectrl.h
+++ b/vp8/encoder/ratectrl.h
@@ -22,7 +22,7 @@ extern int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame);
extern void vp8_adjust_key_frame_context(VP8_COMP *cpi);
extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit);
-// return of 0 means drop frame
+/* return of 0 means drop frame */
extern int vp8_pick_frame_size(VP8_COMP *cpi);
#endif
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 2b706ba..ceb817c 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -21,6 +21,7 @@
#include "onyx_int.h"
#include "modecosts.h"
#include "encodeintra.h"
+#include "pickinter.h"
#include "vp8/common/entropymode.h"
#include "vp8/common/reconinter.h"
#include "vp8/common/reconintra4x4.h"
@@ -36,7 +37,6 @@
#if CONFIG_TEMPORAL_DENOISING
#include "denoising.h"
#endif
-
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
@@ -149,8 +149,8 @@ const int vp8_ref_frame_order[MAX_MODES] =
};
static void fill_token_costs(
- unsigned int c [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS],
- const vp8_prob p [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]
+ int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
+ const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]
)
{
int i, j, k;
@@ -159,21 +159,26 @@ static void fill_token_costs(
for (i = 0; i < BLOCK_TYPES; i++)
for (j = 0; j < COEF_BANDS; j++)
for (k = 0; k < PREV_COEF_CONTEXTS; k++)
- // check for pt=0 and band > 1 if block type 0 and 0 if blocktype 1
- if(k==0 && j>(i==0) )
- vp8_cost_tokens2((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree,2);
+
+ /* check for pt=0 and band > 1 if block type 0
+ * and 0 if blocktype 1
+ */
+ if (k == 0 && j > (i == 0))
+ vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2);
else
- vp8_cost_tokens((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree);
+ vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree);
}
-static int rd_iifactor [ 32 ] = { 4, 4, 3, 2, 1, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- };
+static const int rd_iifactor[32] =
+{
+ 4, 4, 3, 2, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0
+};
/* values are now correlated to quantizer */
-static int sad_per_bit16lut[QINDEX_RANGE] =
+static const int sad_per_bit16lut[QINDEX_RANGE] =
{
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
@@ -192,7 +197,7 @@ static int sad_per_bit16lut[QINDEX_RANGE] =
11, 11, 11, 11, 12, 12, 12, 12,
12, 12, 13, 13, 13, 13, 14, 14
};
-static int sad_per_bit4lut[QINDEX_RANGE] =
+static const int sad_per_bit4lut[QINDEX_RANGE] =
{
2, 2, 2, 2, 2, 2, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
@@ -218,30 +223,30 @@ void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex)
cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
}
-void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
+void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue)
{
int q;
int i;
double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
double rdconst = 2.80;
- vp8_clear_system_state(); //__asm emms;
+ vp8_clear_system_state();
- // Further tests required to see if optimum is different
- // for key frames, golden frames and arf frames.
- // if (cpi->common.refresh_golden_frame ||
- // cpi->common.refresh_alt_ref_frame)
+ /* Further tests required to see if optimum is different
+ * for key frames, golden frames and arf frames.
+ */
cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
- // Extend rate multiplier along side quantizer zbin increases
- if (cpi->zbin_over_quant > 0)
+ /* Extend rate multiplier along side quantizer zbin increases */
+ if (cpi->mb.zbin_over_quant > 0)
{
double oq_factor;
double modq;
- // Experimental code using the same basic equation as used for Q above
- // The units of cpi->zbin_over_quant are 1/128 of Q bin size
- oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
+ /* Experimental code using the same basic equation as used for Q above
+ * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
+ */
+ oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
modq = (int)((double)capped_q * oq_factor);
cpi->RDMULT = (int)(rdconst * (modq * modq));
}
@@ -260,6 +265,11 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
vp8_set_speed_features(cpi);
+ for (i = 0; i < MAX_MODES; i++)
+ {
+ x->mode_test_hit_counts[i] = 0;
+ }
+
q = (int)pow(Qvalue, 1.25);
if (q < 8)
@@ -274,14 +284,14 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
{
if (cpi->sf.thresh_mult[i] < INT_MAX)
{
- cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
+ x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
}
else
{
- cpi->rd_threshes[i] = INT_MAX;
+ x->rd_threshes[i] = INT_MAX;
}
- cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
+ cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
}
}
else
@@ -292,19 +302,19 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
{
if (cpi->sf.thresh_mult[i] < (INT_MAX / q))
{
- cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
+ x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
}
else
{
- cpi->rd_threshes[i] = INT_MAX;
+ x->rd_threshes[i] = INT_MAX;
}
- cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
+ cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
}
}
{
- // build token cost array for the type of frame we have now
+ /* build token cost array for the type of frame we have now */
FRAME_CONTEXT *l = &cpi->lfc_n;
if(cpi->common.refresh_alt_ref_frame)
@@ -323,12 +333,8 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
*/
- // TODO make these mode costs depend on last,alt or gold too. (jbb)
+ /* TODO make these mode costs depend on last,alt or gold too. (jbb) */
vp8_init_mode_costs(cpi);
-
- // TODO figure onnnnuut why making mv cost frame type dependent didn't help (jbb)
- //vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) l->mvc, flags);
-
}
}
@@ -353,14 +359,6 @@ void vp8_auto_select_speed(VP8_COMP *cpi)
#endif
- /*
- // this is done during parameter valid check
- if( cpi->oxcf.cpu_used > 16)
- cpi->oxcf.cpu_used = 16;
- if( cpi->oxcf.cpu_used < -16)
- cpi->oxcf.cpu_used = -16;
- */
-
if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress)
{
if (cpi->avg_pick_mode_time == 0)
@@ -387,10 +385,10 @@ void vp8_auto_select_speed(VP8_COMP *cpi)
cpi->avg_pick_mode_time = 0;
cpi->avg_encode_time = 0;
- // In real-time mode, cpi->speed is in [4, 16].
- if (cpi->Speed < 4) //if ( cpi->Speed < 0 )
+ /* In real-time mode, cpi->speed is in [4, 16]. */
+ if (cpi->Speed < 4)
{
- cpi->Speed = 4; //cpi->Speed = 0;
+ cpi->Speed = 4;
}
}
}
@@ -546,7 +544,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a,
if (c < 16)
cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
- pt = (c != !type); // is eob first coefficient;
+ pt = (c != !type); /* is eob first coefficient; */
*a = *l = pt;
return cost;
@@ -592,7 +590,7 @@ static void macro_block_yrd( MACROBLOCK *mb,
vp8_subtract_mby( mb->src_diff, *(mb->block[0].base_src),
mb->block[0].src_stride, mb->e_mbd.predictor, 16);
- // Fdct and building the 2nd order block
+ /* Fdct and building the 2nd order block */
for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
{
mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
@@ -600,25 +598,25 @@ static void macro_block_yrd( MACROBLOCK *mb,
*Y2DCPtr++ = beptr->coeff[16];
}
- // 2nd order fdct
+ /* 2nd order fdct */
mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
- // Quantization
+ /* Quantization */
for (b = 0; b < 16; b++)
{
mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
}
- // DC predication and Quantization of 2nd Order block
+ /* DC predication and Quantization of 2nd Order block */
mb->quantize_b(mb_y2, x_y2);
- // Distortion
+ /* Distortion */
d = vp8_mbblock_error(mb, 1) << 2;
d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
*Distortion = (d >> 4);
- // rate
+ /* rate */
*Rate = vp8_rdcost_mby(mb);
}
@@ -632,12 +630,11 @@ static void copy_predictor(unsigned char *dst, const unsigned char *predictor)
d[12] = p[12];
}
static int rd_pick_intra4x4block(
- VP8_COMP *cpi,
MACROBLOCK *x,
BLOCK *be,
BLOCKD *b,
B_PREDICTION_MODE *best_mode,
- unsigned int *bmode_costs,
+ const int *bmode_costs,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
@@ -660,7 +657,11 @@ static int rd_pick_intra4x4block(
DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16*4);
DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);
int dst_stride = x->e_mbd.dst.y_stride;
- unsigned char *base_dst = x->e_mbd.dst.y_buffer;
+ unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
+
+ unsigned char *Above = dst - dst_stride;
+ unsigned char *yleft = dst - 1;
+ unsigned char top_left = Above[-1];
for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
{
@@ -669,8 +670,8 @@ static int rd_pick_intra4x4block(
rate = bmode_costs[mode];
- vp8_intra4x4_predict(base_dst + b->offset, dst_stride, mode,
- b->predictor, 16);
+ vp8_intra4x4_predict(Above, yleft, dst_stride, mode,
+ b->predictor, 16, top_left);
vp8_subtract_b(be, b, 16);
x->short_fdct4x4(be->src_diff, be->coeff, 32);
x->quantize_b(be, b);
@@ -697,15 +698,14 @@ static int rd_pick_intra4x4block(
vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
}
}
- b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode);
+ b->bmi.as_mode = *best_mode;
- vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, base_dst + b->offset,
- dst_stride);
+ vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
return best_rd;
}
-static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
+static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate,
int *rate_y, int *Distortion, int best_rd)
{
MACROBLOCKD *const xd = &mb->e_mbd;
@@ -717,7 +717,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
- unsigned int *bmode_costs;
+ const int *bmode_costs;
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
@@ -745,7 +745,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
}
total_rd += rd_pick_intra4x4block(
- cpi, mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
+ mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
ta + vp8_block2above[i],
tl + vp8_block2left[i], &r, &ry, &d);
@@ -770,8 +770,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
}
-static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
- MACROBLOCK *x,
+static int rd_pick_intra16x16mby_mode(MACROBLOCK *x,
int *Rate,
int *rate_y,
int *Distortion)
@@ -784,7 +783,7 @@ static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
int this_rd;
MACROBLOCKD *xd = &x->e_mbd;
- //Y Search for 16x16 intra prediction mode
+ /* Y Search for 16x16 intra prediction mode */
for (mode = DC_PRED; mode <= TM_PRED; mode++)
{
xd->mode_info_context->mbmi.mode = mode;
@@ -873,7 +872,8 @@ static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
-static void rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion)
+static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
+ int *rate_tokenonly, int *distortion)
{
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
@@ -981,8 +981,9 @@ static int labels2mode(
m = ABOVE4X4;
else
{
- // the only time we should do costing for new motion vector or mode
- // is when we are on a new label (jbb May 08, 2007)
+ /* the only time we should do costing for new motion vector
+ * or mode is when we are on a new label (jbb May 08, 2007)
+ */
switch (m = this_mode)
{
case NEW4X4 :
@@ -1001,7 +1002,7 @@ static int labels2mode(
break;
}
- if (m == ABOVE4X4) // replace above with left if same
+ if (m == ABOVE4X4) /* replace above with left if same */
{
int_mv left_mv;
@@ -1062,9 +1063,6 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels
vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride, x->e_mbd.subpixel_predict);
vp8_subtract_b(be, bd, 16);
x->short_fdct4x4(be->src_diff, be->coeff, 32);
-
- // set to 0 no way to account for 2nd order DC so discount
- //be->coeff[0] = 0;
x->quantize_b(be, bd);
distortion += vp8_block_error(be->coeff, bd->dqcoeff);
@@ -1095,8 +1093,8 @@ typedef struct
int mvthresh;
int *mdcounts;
- int_mv sv_mvp[4]; // save 4 mvp from 8x8
- int sv_istep[2]; // save 2 initial step_param for 16x8/8x16
+ int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
+ int sv_istep[2]; /* save 2 initial step_param for 16x8/8x16 */
} BEST_SEG_INFO;
@@ -1143,13 +1141,13 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
labels = vp8_mbsplits[segmentation];
label_count = vp8_mbsplit_count[segmentation];
- // 64 makes this threshold really big effectively
- // making it so that we very rarely check mvs on
- // segments. setting this to 1 would make mv thresh
- // roughly equal to what it is for macroblocks
+ /* 64 makes this threshold really big effectively making it so that we
+ * very rarely check mvs on segments. setting this to 1 would make mv
+ * thresh roughly equal to what it is for macroblocks
+ */
label_mv_thresh = 1 * bsi->mvthresh / label_count ;
- // Segmentation method overheads
+ /* Segmentation method overheads */
rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
@@ -1162,7 +1160,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
B_PREDICTION_MODE mode_selected = ZERO4X4;
int bestlabelyrate = 0;
- // search for the best motion vector on this segment
+ /* search for the best motion vector on this segment */
for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++)
{
int this_rd;
@@ -1191,7 +1189,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
BLOCK *c;
BLOCKD *e;
- // Is the best so far sufficiently good that we cant justify doing and new motion search.
+ /* Is the best so far sufficiently good that we cant justify
+ * doing a new motion search.
+ */
if (best_label_rd < label_mv_thresh)
break;
@@ -1206,7 +1206,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
step_param = bsi->sv_istep[i];
}
- // use previous block's result as next block's MV predictor.
+ /* use previous block's result as next block's MV
+ * predictor.
+ */
if (segmentation == BLOCK_4X4 && i>0)
{
bsi->mvp.as_int = x->e_mbd.block[i-1].bmi.mv.as_int;
@@ -1225,7 +1227,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
mvp_full.as_mv.row = bsi->mvp.as_mv.row >>3;
mvp_full.as_mv.col = bsi->mvp.as_mv.col >>3;
- // find first label
+ /* find first label */
n = vp8_mbsplit_offset[segmentation][i];
c = &x->block[n];
@@ -1265,7 +1267,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
sseshift = segmentation_to_sseshift[segmentation];
- // Should we do a full search (best quality only)
+ /* Should we do a full search (best quality only) */
if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000)
{
/* Check if mvp_full is within the range. */
@@ -1282,7 +1284,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
}
else
{
- // The full search result is actually worse so re-instate the previous best vector
+ /* The full search result is actually worse so
+ * re-instate the previous best vector
+ */
e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
}
}
@@ -1302,7 +1306,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
bsi->ref_mv, x->mvcost);
- // Trap vectors that reach beyond the UMV borders
+ /* Trap vectors that reach beyond the UMV borders */
if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
{
@@ -1354,7 +1358,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
bsi->segment_rd = this_segment_rd;
bsi->segment_num = segmentation;
- // store everything needed to come back to this!!
+ /* store everything needed to come back to this!! */
for (i = 0; i < 16; i++)
{
bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
@@ -1516,7 +1520,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
return bsi.segment_rd;
}
-//The improved MV prediction
+/* The improved MV prediction */
void vp8_mv_pred
(
VP8_COMP *cpi,
@@ -1550,7 +1554,9 @@ void vp8_mv_pred
near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0;
near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0;
- // read in 3 nearby block's MVs from current frame as prediction candidates.
+ /* read in 3 nearby block's MVs from current frame as prediction
+ * candidates.
+ */
if (above->mbmi.ref_frame != INTRA_FRAME)
{
near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
@@ -1573,12 +1579,12 @@ void vp8_mv_pred
}
vcnt++;
- // read in 5 nearby block's MVs from last frame.
+ /* read in 5 nearby block's MVs from last frame. */
if(cpi->common.last_frame_type != KEY_FRAME)
{
mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ;
- // current in last frame
+ /* current in last frame */
if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME)
{
near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
@@ -1587,7 +1593,7 @@ void vp8_mv_pred
}
vcnt++;
- // above in last frame
+ /* above in last frame */
if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME)
{
near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int;
@@ -1596,7 +1602,7 @@ void vp8_mv_pred
}
vcnt++;
- // left in last frame
+ /* left in last frame */
if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME)
{
near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int;
@@ -1605,7 +1611,7 @@ void vp8_mv_pred
}
vcnt++;
- // right in last frame
+ /* right in last frame */
if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME)
{
near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int;
@@ -1614,7 +1620,7 @@ void vp8_mv_pred
}
vcnt++;
- // below in last frame
+ /* below in last frame */
if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME)
{
near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int;
@@ -1655,7 +1661,9 @@ void vp8_mv_pred
mv.as_mv.col = mvy[vcnt/2];
find = 1;
- //sr is set to 0 to allow calling function to decide the search range.
+ /* sr is set to 0 to allow calling function to decide the search
+ * range.
+ */
*sr = 0;
}
}
@@ -1667,33 +1675,36 @@ void vp8_mv_pred
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[])
{
-
- int near_sad[8] = {0}; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
+ /* near_sad indexes:
+ * 0-cf above, 1-cf left, 2-cf aboveleft,
+ * 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
+ */
+ int near_sad[8] = {0};
BLOCK *b = &x->block[0];
unsigned char *src_y_ptr = *(b->base_src);
- //calculate sad for current frame 3 nearby MBs.
+ /* calculate sad for current frame 3 nearby MBs. */
if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
{
near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
}else if(xd->mb_to_top_edge==0)
- { //only has left MB for sad calculation.
+ { /* only has left MB for sad calculation. */
near_sad[0] = near_sad[2] = INT_MAX;
- near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
+ near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
}else if(xd->mb_to_left_edge ==0)
- { //only has left MB for sad calculation.
+ { /* only has left MB for sad calculation. */
near_sad[1] = near_sad[2] = INT_MAX;
- near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
+ near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
}else
{
- near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
- near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
- near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff);
+ near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
+ near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
+ near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX);
}
if(cpi->common.last_frame_type != KEY_FRAME)
{
- //calculate sad for last frame 5 nearby MBs.
+ /* calculate sad for last frame 5 nearby MBs. */
unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
@@ -1703,14 +1714,14 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
if(near_sad[4] != INT_MAX)
- near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff);
+ near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX);
if(near_sad[5] != INT_MAX)
- near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff);
- near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
+ near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX);
+ near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX);
if(near_sad[6] != INT_MAX)
- near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, 0x7fffffff);
+ near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX);
if(near_sad[7] != INT_MAX)
- near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, 0x7fffffff);
+ near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX);
}
if(cpi->common.last_frame_type != KEY_FRAME)
@@ -1732,18 +1743,18 @@ static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
{
if (x->partition_info->bmi[i].mode == NEW4X4)
{
- cpi->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row
+ x->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row
- best_ref_mv->as_mv.row) >> 1)]++;
- cpi->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col
+ x->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col
- best_ref_mv->as_mv.col) >> 1)]++;
}
}
}
else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
{
- cpi->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row
+ x->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row
- best_ref_mv->as_mv.row) >> 1)]++;
- cpi->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col
+ x->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col
- best_ref_mv->as_mv.col) >> 1)]++;
}
}
@@ -1766,7 +1777,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4],
{
unsigned int sse;
unsigned int var;
- int threshold = (xd->block[0].dequant[1]
+ unsigned int threshold = (xd->block[0].dequant[1]
* xd->block[0].dequant[1] >>4);
if(threshold < x->encode_breakout)
@@ -1784,8 +1795,8 @@ static int evaluate_inter_mode_rd(int mdcounts[4],
if ((sse - var < q2dc * q2dc >>4) ||
(sse /2 > var && sse-var < 64))
{
- // Check u and v to make sure skip is ok
- int sse2= VP8_UVSSE(x);
+ /* Check u and v to make sure skip is ok */
+ unsigned int sse2 = VP8_UVSSE(x);
if (sse2 * 2 < threshold)
{
x->skip = 1;
@@ -1805,17 +1816,15 @@ static int evaluate_inter_mode_rd(int mdcounts[4],
}
- //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts); // Experimental debug code
-
- // Add in the Mv/mode cost
+ /* Add in the Mv/mode cost */
rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
- // Y cost and distortion
+ /* Y cost and distortion */
macro_block_yrd(x, &rd->rate_y, &distortion);
rd->rate2 += rd->rate_y;
rd->distortion2 += distortion;
- // UV cost and distortion
+ /* UV cost and distortion */
rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
cpi->common.full_pixel);
rd->rate2 += rd->rate_uv;
@@ -1832,9 +1841,11 @@ static int calculate_final_rd_costs(int this_rd,
VP8_COMP *cpi, MACROBLOCK *x)
{
MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
- // Where skip is allowable add in the default per mb cost for the no skip case.
- // where we then decide to skip we have to delete this and replace it with the
- // cost of signallying a skip
+
+ /* Where skip is allowable add in the default per mb cost for the no
+ * skip case. where we then decide to skip we have to delete this and
+ * replace it with the cost of signalling a skip
+ */
if (cpi->common.mb_no_coeff_skip)
{
*other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
@@ -1849,7 +1860,10 @@ static int calculate_final_rd_costs(int this_rd,
if (!disable_skip)
{
- // Test for the condition where skip block will be activated because there are no non zero coefficients and make any necessary adjustment for rate
+ /* Test for the condition where skip block will be activated
+ * because there are no non zero coefficients and make any
+ * necessary adjustment for rate
+ */
if (cpi->common.mb_no_coeff_skip)
{
int i;
@@ -1874,10 +1888,10 @@ static int calculate_final_rd_costs(int this_rd,
if (tteob == 0)
{
rd->rate2 -= (rd->rate_y + rd->rate_uv);
- //for best_yrd calculation
+ /* for best_yrd calculation */
rd->rate_uv = 0;
- // Back out no skip flag costing and add in skip flag costing
+ /* Back out no skip flag costing and add in skip flag costing */
if (cpi->prob_skip_false)
{
int prob_skip_cost;
@@ -1889,7 +1903,7 @@ static int calculate_final_rd_costs(int this_rd,
}
}
}
- // Calculate the final RD estimate for this mode
+ /* Calculate the final RD estimate for this mode */
this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame
== INTRA_FRAME)
@@ -1953,7 +1967,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int_mv mvp;
int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
int saddone=0;
- int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7)
+ /* search range got from mv_pred(). It uses step_param levels. (0-7) */
+ int sr=0;
unsigned char *plane[4][3];
int ref_frame_map[4];
@@ -1962,6 +1977,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int intra_rd_penalty = 10* vp8_dc_quant(cpi->common.base_qindex,
cpi->common.y1dc_delta_q);
+#if CONFIG_TEMPORAL_DENOISING
+ unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX,
+ best_rd_sse = INT_MAX;
+#endif
+
mode_mv = mode_mv_sb[sign_bias];
best_ref_mv.as_int = 0;
best_mode.rd = INT_MAX;
@@ -1994,7 +2014,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
*returnintra = INT_MAX;
- cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame
+ /* Count of the number of MBs tested so far this frame */
+ x->mbs_tested_so_far++;
x->skip = 0;
@@ -2005,14 +2026,16 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int other_cost = 0;
int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
- // Test best rd so far against threshold for trying this mode.
- if (best_mode.rd <= cpi->rd_threshes[mode_index])
+ /* Test best rd so far against threshold for trying this mode. */
+ if (best_mode.rd <= x->rd_threshes[mode_index])
continue;
if (this_ref_frame < 0)
continue;
- // These variables hold are rolling total cost and distortion for this mode
+ /* These variables hold are rolling total cost and distortion for
+ * this mode
+ */
rd.rate2 = 0;
rd.distortion2 = 0;
@@ -2021,9 +2044,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
x->e_mbd.mode_info_context->mbmi.mode = this_mode;
x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
- // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
- // unless ARNR filtering is enabled in which case we want
- // an unfiltered alternative
+ /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
+ * unless ARNR filtering is enabled in which case we want
+ * an unfiltered alternative
+ */
if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
{
if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
@@ -2045,45 +2069,56 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
}
}
- // Check to see if the testing frequency for this mode is at its max
- // If so then prevent it from being tested and increase the threshold for its testing
- if (cpi->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
+ /* Check to see if the testing frequency for this mode is at its
+ * max If so then prevent it from being tested and increase the
+ * threshold for its testing
+ */
+ if (x->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
{
- if (cpi->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] * cpi->mode_test_hit_counts[mode_index])
+ if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] * x->mode_test_hit_counts[mode_index])
{
- // Increase the threshold for coding this mode to make it less likely to be chosen
- cpi->rd_thresh_mult[mode_index] += 4;
+ /* Increase the threshold for coding this mode to make it
+ * less likely to be chosen
+ */
+ x->rd_thresh_mult[mode_index] += 4;
- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
+ if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
+ x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
- cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
+ x->rd_threshes[mode_index] =
+ (cpi->rd_baseline_thresh[mode_index] >> 7) *
+ x->rd_thresh_mult[mode_index];
continue;
}
}
- // We have now reached the point where we are going to test the current mode so increment the counter for the number of times it has been tested
- cpi->mode_test_hit_counts[mode_index] ++;
+ /* We have now reached the point where we are going to test the
+ * current mode so increment the counter for the number of times
+ * it has been tested
+ */
+ x->mode_test_hit_counts[mode_index] ++;
- // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
- if (cpi->zbin_mode_boost_enabled)
+ /* Experimental code. Special case for gf and arf zeromv modes.
+ * Increase zbin size to supress noise
+ */
+ if (x->zbin_mode_boost_enabled)
{
if ( this_ref_frame == INTRA_FRAME )
- cpi->zbin_mode_boost = 0;
+ x->zbin_mode_boost = 0;
else
{
if (vp8_mode_order[mode_index] == ZEROMV)
{
if (this_ref_frame != LAST_FRAME)
- cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
+ x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
else
- cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
+ x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
}
else if (vp8_mode_order[mode_index] == SPLITMV)
- cpi->zbin_mode_boost = 0;
+ x->zbin_mode_boost = 0;
else
- cpi->zbin_mode_boost = MV_ZBIN_BOOST;
+ x->zbin_mode_boost = MV_ZBIN_BOOST;
}
vp8_update_zbin_extra(cpi, x);
@@ -2091,7 +2126,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
if(!uv_intra_done && this_ref_frame == INTRA_FRAME)
{
- rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate,
+ rd_pick_intra_mbuv_mode(x, &uv_intra_rate,
&uv_intra_rate_tokenonly,
&uv_intra_distortion);
uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
@@ -2113,9 +2148,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
{
int tmp_rd;
- // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED];
+ /* Note the rate value returned here includes the cost of
+ * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
+ */
int distortion;
- tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rd.rate_y, &distortion, best_mode.yrd);
+ tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion, best_mode.yrd);
rd.rate2 += rate;
rd.distortion2 += distortion;
@@ -2140,8 +2177,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int this_rd_thresh;
int distortion;
- this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ? cpi->rd_threshes[THR_NEW1] : cpi->rd_threshes[THR_NEW3];
- this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ? cpi->rd_threshes[THR_NEW2] : this_rd_thresh;
+ this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ?
+ x->rd_threshes[THR_NEW1] : x->rd_threshes[THR_NEW3];
+ this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ?
+ x->rd_threshes[THR_NEW2] : this_rd_thresh;
tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
best_mode.yrd, mdcounts,
@@ -2150,10 +2189,12 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
rd.rate2 += rate;
rd.distortion2 += distortion;
- // If even the 'Y' rd value of split is higher than best so far then dont bother looking at UV
+ /* If even the 'Y' rd value of split is higher than best so far
+ * then dont bother looking at UV
+ */
if (tmp_rd < best_mode.yrd)
{
- // Now work out UV cost and add it in
+ /* Now work out UV cost and add it in */
rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, cpi->common.full_pixel);
rd.rate2 += rd.rate_uv;
rd.distortion2 += rd.distortion_uv;
@@ -2225,7 +2266,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
mvp_full.as_mv.col = mvp.as_mv.col>>3;
mvp_full.as_mv.row = mvp.as_mv.row>>3;
- // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search.
+ /* Get intersection of UMV window and valid MV window to
+ * reduce # of checks in diamond search.
+ */
if (x->mv_col_min < col_min )
x->mv_col_min = col_min;
if (x->mv_col_max > col_max )
@@ -2235,11 +2278,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
if (x->mv_row_max > row_max )
x->mv_row_max = row_max;
- //adjust search range according to sr from mv prediction
+ /* adjust search range according to sr from mv prediction */
if(sr > step_param)
step_param = sr;
- // Initial step/diamond search
+ /* Initial step/diamond search */
{
bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.mv,
step_param, sadpb, &num00,
@@ -2247,7 +2290,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
x->mvcost, &best_ref_mv);
mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
- // Further step/diamond searches as necessary
+ /* Further step/diamond searches as necessary */
n = 0;
further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
@@ -2293,11 +2336,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
{
int search_range;
- //It seems not a good way to set search_range. Need further investigation.
- //search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col));
search_range = 8;
- //thissme = cpi->full_search_sad(x, b, d, &d->bmi.mv.as_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
thissme = cpi->refining_search_sad(x, b, d, &d->bmi.mv, sadpb,
search_range, &cpi->fn_ptr[BLOCK_16X16],
x->mvcost, &best_ref_mv);
@@ -2330,24 +2370,31 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
- // Add the new motion vector cost to our rolling cost variable
+ /* Add the new motion vector cost to our rolling cost variable */
rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
}
case NEARESTMV:
case NEARMV:
- // Clip "next_nearest" so that it does not extend to far out of image
+ /* Clip "next_nearest" so that it does not extend to far out
+ * of image
+ */
vp8_clamp_mv2(&mode_mv[this_mode], xd);
- // Do not bother proceeding if the vector (from newmv,nearest or near) is 0,0 as this should then be coded using the zeromv mode.
+ /* Do not bother proceeding if the vector (from newmv, nearest
+ * or near) is 0,0 as this should then be coded using the zeromv
+ * mode.
+ */
if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && (mode_mv[this_mode].as_int == 0))
continue;
case ZEROMV:
- // Trap vectors that reach beyond the UMV borders
- // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point
- // because of the lack of break statements in the previous two cases.
+ /* Trap vectors that reach beyond the UMV borders
+ * Note that ALL New MV, Nearest MV Near MV and Zero MV code
+ * drops through to this point because of the lack of break
+ * statements in the previous two cases.
+ */
if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
continue;
@@ -2365,35 +2412,52 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
disable_skip, uv_intra_tteob,
intra_rd_penalty, cpi, x);
- // Keep record of best intra distortion
+ /* Keep record of best intra distortion */
if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
(this_rd < best_mode.intra_rd) )
{
best_mode.intra_rd = this_rd;
*returnintra = rd.distortion2 ;
}
-
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity)
{
- // Store the best NEWMV in x for later use in the denoiser.
- // We are restricted to the LAST_FRAME since the denoiser only keeps
- // one filter state.
- if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
- x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
- {
- x->e_mbd.best_sse_inter_mode = NEWMV;
- x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
- x->e_mbd.need_to_clamp_best_mvs =
- x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
- }
+ unsigned int sse;
+ vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&sse,
+ mode_mv[this_mode]);
+
+ if (sse < best_rd_sse)
+ best_rd_sse = sse;
+
+ /* Store for later use by denoiser. */
+ if (this_mode == ZEROMV && sse < zero_mv_sse )
+ {
+ zero_mv_sse = sse;
+ x->best_zeromv_reference_frame =
+ x->e_mbd.mode_info_context->mbmi.ref_frame;
+ }
+
+ /* Store the best NEWMV in x for later use in the denoiser. */
+ if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
+ sse < best_sse)
+ {
+ best_sse = sse;
+ vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&best_sse,
+ mode_mv[this_mode]);
+ x->best_sse_inter_mode = NEWMV;
+ x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
+ x->need_to_clamp_best_mvs =
+ x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
+ x->best_reference_frame =
+ x->e_mbd.mode_info_context->mbmi.ref_frame;
+ }
}
#endif
- // Did this mode help.. i.i is it the new best mode
+ /* Did this mode help.. i.i is it the new best mode */
if (this_rd < best_mode.rd || x->skip)
{
- // Note index of best mode so far
+ /* Note index of best mode so far */
best_mode_index = mode_index;
*returnrate = rd.rate2;
*returndistortion = rd.distortion2;
@@ -2406,95 +2470,103 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
- // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time
- cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
- cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
+ /* Testing this mode gave rise to an improvement in best error
+ * score. Lower threshold a bit for next time
+ */
+ x->rd_thresh_mult[mode_index] =
+ (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
+ x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
}
- // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around.
+ /* If the mode did not help improve the best error case then raise
+ * the threshold for testing that mode next time around.
+ */
else
{
- cpi->rd_thresh_mult[mode_index] += 4;
-
- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
+ x->rd_thresh_mult[mode_index] += 4;
- cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
+ if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
+ x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
}
+ x->rd_threshes[mode_index] =
+ (cpi->rd_baseline_thresh[mode_index] >> 7) *
+ x->rd_thresh_mult[mode_index];
if (x->skip)
break;
}
- // Reduce the activation RD thresholds for the best choice mode
+ /* Reduce the activation RD thresholds for the best choice mode */
if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))
{
- int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
-
- cpi->rd_thresh_mult[best_mode_index] = (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
- cpi->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
-
- // If we chose a split mode then reset the new MV thresholds as well
- /*if ( vp8_mode_order[best_mode_index] == SPLITMV )
- {
- best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWMV] >> 4);
- cpi->rd_thresh_mult[THR_NEWMV] = (cpi->rd_thresh_mult[THR_NEWMV] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWMV]-best_adjustment: MIN_THRESHMULT;
- cpi->rd_threshes[THR_NEWMV] = (cpi->rd_baseline_thresh[THR_NEWMV] >> 7) * cpi->rd_thresh_mult[THR_NEWMV];
-
- best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWG] >> 4);
- cpi->rd_thresh_mult[THR_NEWG] = (cpi->rd_thresh_mult[THR_NEWG] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWG]-best_adjustment: MIN_THRESHMULT;
- cpi->rd_threshes[THR_NEWG] = (cpi->rd_baseline_thresh[THR_NEWG] >> 7) * cpi->rd_thresh_mult[THR_NEWG];
-
- best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWA] >> 4);
- cpi->rd_thresh_mult[THR_NEWA] = (cpi->rd_thresh_mult[THR_NEWA] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWA]-best_adjustment: MIN_THRESHMULT;
- cpi->rd_threshes[THR_NEWA] = (cpi->rd_baseline_thresh[THR_NEWA] >> 7) * cpi->rd_thresh_mult[THR_NEWA];
- }*/
-
+ int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
+
+ x->rd_thresh_mult[best_mode_index] =
+ (x->rd_thresh_mult[best_mode_index] >=
+ (MIN_THRESHMULT + best_adjustment)) ?
+ x->rd_thresh_mult[best_mode_index] - best_adjustment :
+ MIN_THRESHMULT;
+ x->rd_threshes[best_mode_index] =
+ (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
+ x->rd_thresh_mult[best_mode_index];
}
- // Note how often each mode chosen as best
+ /* Note how often each mode chosen as best */
cpi->mode_chosen_counts[best_mode_index] ++;
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity)
{
- if (x->e_mbd.best_sse_inter_mode == DC_PRED) {
- // No best MV found.
- x->e_mbd.best_sse_inter_mode = best_mode.mbmode.mode;
- x->e_mbd.best_sse_mv = best_mode.mbmode.mv;
- x->e_mbd.need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
- }
-
- // TODO(holmer): No SSEs are calculated in rdopt.c. What else can be used?
- vp8_denoiser_denoise_mb(&cpi->denoiser, x, 0, 0,
- recon_yoffset, recon_uvoffset);
- // Reevalute ZEROMV if the current mode is INTRA.
- if (best_mode.mbmode.ref_frame == INTRA_FRAME)
- {
- int this_rd = INT_MAX;
- int disable_skip = 0;
- int other_cost = 0;
- vpx_memset(&rd, 0, sizeof(rd));
- x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
- rd.rate2 += x->ref_frame_cost[LAST_FRAME];
- rd.rate2 += vp8_cost_mv_ref(ZEROMV, mdcounts);
- x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
- x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
- x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
- this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
- this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
- disable_skip, uv_intra_tteob,
- intra_rd_penalty, cpi, x);
- if (this_rd < best_mode.rd || x->skip)
+ if (x->best_sse_inter_mode == DC_PRED)
{
- // Note index of best mode so far
- best_mode_index = mode_index;
- *returnrate = rd.rate2;
- *returndistortion = rd.distortion2;
- update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
+ /* No best MV found. */
+ x->best_sse_inter_mode = best_mode.mbmode.mode;
+ x->best_sse_mv = best_mode.mbmode.mv;
+ x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
+ x->best_reference_frame = best_mode.mbmode.ref_frame;
+ best_sse = best_rd_sse;
+ }
+ vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
+ recon_yoffset, recon_uvoffset);
+
+
+ /* Reevaluate ZEROMV after denoising. */
+ if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
+ x->best_zeromv_reference_frame != INTRA_FRAME)
+ {
+ int this_rd = INT_MAX;
+ int disable_skip = 0;
+ int other_cost = 0;
+ int this_ref_frame = x->best_zeromv_reference_frame;
+ rd.rate2 = x->ref_frame_cost[this_ref_frame] +
+ vp8_cost_mv_ref(ZEROMV, mdcounts);
+ rd.distortion2 = 0;
+
+ /* set up the proper prediction buffers for the frame */
+ x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
+ x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
+ x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
+ x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
+
+ x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
+
+ this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
+ this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
+ disable_skip, uv_intra_tteob,
+ intra_rd_penalty, cpi, x);
+ if (this_rd < best_mode.rd || x->skip)
+ {
+ /* Note index of best mode so far */
+ best_mode_index = mode_index;
+ *returnrate = rd.rate2;
+ *returndistortion = rd.distortion2;
+ update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
+ }
}
- }
+
}
#endif
@@ -2512,7 +2584,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
}
- // macroblock modes
+ /* macroblock modes */
vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
if (best_mode.mbmode.mode == B_PRED)
@@ -2539,7 +2611,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
rd_update_mvcount(cpi, x, &best_ref_mv);
}
-void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
+void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_)
{
int error4x4, error16x16;
int rate4x4, rate16x16 = 0, rateuv;
@@ -2551,15 +2623,13 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
- rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
+ rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
rate = rateuv;
- error16x16 = rd_pick_intra16x16mby_mode(cpi, x,
- &rate16x16, &rate16x16_tokenonly,
+ error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
&dist16x16);
- error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
- &rate4x4, &rate4x4_tokenonly,
+ error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
&dist4x4, error16x16);
if (error4x4 < error16x16)
diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index db939f9..1e11fa7 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -65,9 +65,9 @@ static void insertsortsad(int arr[],int idx[], int len)
}
}
-extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue);
+extern void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue);
extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);
-extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);
+extern void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate);
static void get_plane_pointers(const YV12_BUFFER_CONFIG *fb,
@@ -86,15 +86,15 @@ static void get_predictor_pointers(const VP8_COMP *cpi,
unsigned int recon_yoffset,
unsigned int recon_uvoffset)
{
- if (cpi->ref_frame_flags & VP8_LAST_FLAG)
+ if (cpi->ref_frame_flags & VP8_LAST_FRAME)
get_plane_pointers(&cpi->common.yv12_fb[cpi->common.lst_fb_idx],
plane[LAST_FRAME], recon_yoffset, recon_uvoffset);
- if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
+ if (cpi->ref_frame_flags & VP8_GOLD_FRAME)
get_plane_pointers(&cpi->common.yv12_fb[cpi->common.gld_fb_idx],
plane[GOLDEN_FRAME], recon_yoffset, recon_uvoffset);
- if (cpi->ref_frame_flags & VP8_ALT_FLAG)
+ if (cpi->ref_frame_flags & VP8_ALTR_FRAME)
get_plane_pointers(&cpi->common.yv12_fb[cpi->common.alt_fb_idx],
plane[ALTREF_FRAME], recon_yoffset, recon_uvoffset);
}
@@ -106,11 +106,11 @@ static void get_reference_search_order(const VP8_COMP *cpi,
int i=0;
ref_frame_map[i++] = INTRA_FRAME;
- if (cpi->ref_frame_flags & VP8_LAST_FLAG)
+ if (cpi->ref_frame_flags & VP8_LAST_FRAME)
ref_frame_map[i++] = LAST_FRAME;
- if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
+ if (cpi->ref_frame_flags & VP8_GOLD_FRAME)
ref_frame_map[i++] = GOLDEN_FRAME;
- if (cpi->ref_frame_flags & VP8_ALT_FLAG)
+ if (cpi->ref_frame_flags & VP8_ALTR_FRAME)
ref_frame_map[i++] = ALTREF_FRAME;
for(; i<4; i++)
ref_frame_map[i] = -1;
diff --git a/vp8/encoder/segmentation.c b/vp8/encoder/segmentation.c
index fc0967d..37972e2 100644
--- a/vp8/encoder/segmentation.c
+++ b/vp8/encoder/segmentation.c
@@ -22,22 +22,24 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x)
if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame))
{
- // Reset Gf useage monitors
+ /* Reset Gf useage monitors */
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
}
else
{
- // for each macroblock row in image
+ /* for each macroblock row in image */
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
- // for each macroblock col in image
+ /* for each macroblock col in image */
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
- // If using golden then set GF active flag if not already set.
- // If using last frame 0,0 mode then leave flag as it is
- // else if using non 0,0 motion or intra modes then clear flag if it is currently set
+ /* If using golden then set GF active flag if not already set.
+ * If using last frame 0,0 mode then leave flag as it is
+ * else if using non 0,0 motion or intra modes then clear
+ * flag if it is currently set
+ */
if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME))
{
if (*(x->gf_active_ptr) == 0)
@@ -52,12 +54,12 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x)
cpi->gf_active_count--;
}
- x->gf_active_ptr++; // Step onto next entry
- this_mb_mode_info++; // skip to next mb
+ x->gf_active_ptr++; /* Step onto next entry */
+ this_mb_mode_info++; /* skip to next mb */
}
- // this is to account for the border
+ /* this is to account for the border */
this_mb_mode_info++;
}
}
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 6c61b36..b83ae89 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -30,8 +30,8 @@
#include <math.h>
#include <limits.h>
-#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
-#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
+#define ALT_REF_MC_ENABLED 1 /* dis/enable MC in AltRef filtering */
+#define ALT_REF_SUBPEL_ENABLED 1 /* dis/enable subpel in MC AltRef filtering */
#if VP8_TEMPORAL_ALT_REF
@@ -50,7 +50,7 @@ static void vp8_temporal_filter_predictors_mb_c
int offset;
unsigned char *yptr, *uptr, *vptr;
- // Y
+ /* Y */
yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
if ((mv_row | mv_col) & 7)
@@ -63,7 +63,7 @@ static void vp8_temporal_filter_predictors_mb_c
vp8_copy_mem16x16(yptr, stride, &pred[0], 16);
}
- // U & V
+ /* U & V */
mv_row >>= 1;
mv_col >>= 1;
stride = (stride + 1) >> 1;
@@ -109,9 +109,10 @@ void vp8_temporal_filter_apply_c
int pixel_value = *frame2++;
modifier = src_byte - pixel_value;
- // This is an integer approximation of:
- // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
- // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
+ /* This is an integer approximation of:
+ * float coeff = (3.0 * modifer * modifier) / pow(2, strength);
+ * modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
+ */
modifier *= modifier;
modifier *= 3;
modifier += 1 << (strength - 1);
@@ -134,7 +135,6 @@ void vp8_temporal_filter_apply_c
}
#if ALT_REF_MC_ENABLED
-static int dummy_cost[2*mv_max+1];
static int vp8_temporal_filter_find_matching_mb_c
(
@@ -155,10 +155,7 @@ static int vp8_temporal_filter_find_matching_mb_c
int_mv best_ref_mv1;
int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
- int *mvcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
- int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
-
- // Save input state
+ /* Save input state */
unsigned char **base_src = b->base_src;
int src = b->src;
int src_stride = b->src_stride;
@@ -170,7 +167,7 @@ static int vp8_temporal_filter_find_matching_mb_c
best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >>3;
best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >>3;
- // Setup frame pointers
+ /* Setup frame pointers */
b->base_src = &arf_frame->y_buffer;
b->src_stride = arf_frame->y_stride;
b->src = mb_offset;
@@ -179,7 +176,7 @@ static int vp8_temporal_filter_find_matching_mb_c
x->e_mbd.pre.y_stride = frame_ptr->y_stride;
d->offset = mb_offset;
- // Further step/diamond searches as necessary
+ /* Further step/diamond searches as necessary */
if (cpi->Speed < 8)
{
step_param = cpi->sf.first_step + (cpi->Speed > 5);
@@ -189,29 +186,29 @@ static int vp8_temporal_filter_find_matching_mb_c
step_param = cpi->sf.first_step + 2;
}
- /*cpi->sf.search_method == HEX*/
- // TODO Check that the 16x16 vf & sdf are selected here
- bestsme = vp8_hex_search(x, b, d,
- &best_ref_mv1_full, &d->bmi.mv,
- step_param,
- sadpb,
- &cpi->fn_ptr[BLOCK_16X16],
- mvsadcost, mvcost, &best_ref_mv1);
+ /* TODO Check that the 16x16 vf & sdf are selected here */
+ /* Ignore mv costing by sending NULL cost arrays */
+ bestsme = vp8_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.mv,
+ step_param, sadpb,
+ &cpi->fn_ptr[BLOCK_16X16],
+ NULL, NULL, &best_ref_mv1);
#if ALT_REF_SUBPEL_ENABLED
- // Try sub-pixel MC?
- //if (bestsme > error_thresh && bestsme < INT_MAX)
+ /* Try sub-pixel MC? */
{
int distortion;
unsigned int sse;
+ /* Ignore mv costing by sending NULL cost array */
bestsme = cpi->find_fractional_mv_step(x, b, d,
- &d->bmi.mv, &best_ref_mv1,
- x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
- mvcost, &distortion, &sse);
+ &d->bmi.mv,
+ &best_ref_mv1,
+ x->errorperbit,
+ &cpi->fn_ptr[BLOCK_16X16],
+ NULL, &distortion, &sse);
}
#endif
- // Save input state
+ /* Save input state */
b->base_src = base_src;
b->src = src;
b->src_stride = src_stride;
@@ -246,7 +243,7 @@ static void vp8_temporal_filter_iterate_c
unsigned char *dst1, *dst2;
DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16*16 + 8*8 + 8*8);
- // Save input state
+ /* Save input state */
unsigned char *y_buffer = mbd->pre.y_buffer;
unsigned char *u_buffer = mbd->pre.u_buffer;
unsigned char *v_buffer = mbd->pre.v_buffer;
@@ -254,16 +251,17 @@ static void vp8_temporal_filter_iterate_c
for (mb_row = 0; mb_row < mb_rows; mb_row++)
{
#if ALT_REF_MC_ENABLED
- // Source frames are extended to 16 pixels. This is different than
- // L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS)
- // A 6 tap filter is used for motion search. This requires 2 pixels
- // before and 3 pixels after. So the largest Y mv on a border would
- // then be 16 - 3. The UV blocks are half the size of the Y and
- // therefore only extended by 8. The largest mv that a UV block
- // can support is 8 - 3. A UV mv is half of a Y mv.
- // (16 - 3) >> 1 == 6 which is greater than 8 - 3.
- // To keep the mv in play for both Y and UV planes the max that it
- // can be on a border is therefore 16 - 5.
+ /* Source frames are extended to 16 pixels. This is different than
+ * L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS)
+ * A 6 tap filter is used for motion search. This requires 2 pixels
+ * before and 3 pixels after. So the largest Y mv on a border would
+ * then be 16 - 3. The UV blocks are half the size of the Y and
+ * therefore only extended by 8. The largest mv that a UV block
+ * can support is 8 - 3. A UV mv is half of a Y mv.
+ * (16 - 3) >> 1 == 6 which is greater than 8 - 3.
+ * To keep the mv in play for both Y and UV planes the max that it
+ * can be on a border is therefore 16 - 5.
+ */
cpi->mb.mv_row_min = -((mb_row * 16) + (16 - 5));
cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
+ (16 - 5);
@@ -285,36 +283,41 @@ static void vp8_temporal_filter_iterate_c
for (frame = 0; frame < frame_count; frame++)
{
- int err = 0;
-
if (cpi->frames[frame] == NULL)
continue;
mbd->block[0].bmi.mv.as_mv.row = 0;
mbd->block[0].bmi.mv.as_mv.col = 0;
+ if (frame == alt_ref_index)
+ {
+ filter_weight = 2;
+ }
+ else
+ {
+ int err = 0;
#if ALT_REF_MC_ENABLED
#define THRESH_LOW 10000
#define THRESH_HIGH 20000
-
- // Find best match in this frame by MC
- err = vp8_temporal_filter_find_matching_mb_c
- (cpi,
- cpi->frames[alt_ref_index],
- cpi->frames[frame],
- mb_y_offset,
- THRESH_LOW);
-
+ /* Find best match in this frame by MC */
+ err = vp8_temporal_filter_find_matching_mb_c
+ (cpi,
+ cpi->frames[alt_ref_index],
+ cpi->frames[frame],
+ mb_y_offset,
+ THRESH_LOW);
#endif
- // Assign higher weight to matching MB if it's error
- // score is lower. If not applying MC default behavior
- // is to weight all MBs equal.
- filter_weight = err<THRESH_LOW
- ? 2 : err<THRESH_HIGH ? 1 : 0;
+ /* Assign higher weight to matching MB if it's error
+ * score is lower. If not applying MC default behavior
+ * is to weight all MBs equal.
+ */
+ filter_weight = err<THRESH_LOW
+ ? 2 : err<THRESH_HIGH ? 1 : 0;
+ }
if (filter_weight != 0)
{
- // Construct the predictors
+ /* Construct the predictors */
vp8_temporal_filter_predictors_mb_c
(mbd,
cpi->frames[frame]->y_buffer + mb_y_offset,
@@ -325,7 +328,7 @@ static void vp8_temporal_filter_iterate_c
mbd->block[0].bmi.mv.as_mv.col,
predictor);
- // Apply the filter (YUV)
+ /* Apply the filter (YUV) */
vp8_temporal_filter_apply
(f->y_buffer + mb_y_offset,
f->y_stride,
@@ -358,7 +361,7 @@ static void vp8_temporal_filter_iterate_c
}
}
- // Normalize filter output to produce AltRef frame
+ /* Normalize filter output to produce AltRef frame */
dst1 = cpi->alt_ref_buffer.y_buffer;
stride = cpi->alt_ref_buffer.y_stride;
byte = mb_y_offset;
@@ -372,7 +375,7 @@ static void vp8_temporal_filter_iterate_c
dst1[byte] = (unsigned char)pval;
- // move to next pixel
+ /* move to next pixel */
byte++;
}
@@ -389,19 +392,19 @@ static void vp8_temporal_filter_iterate_c
{
int m=k+64;
- // U
+ /* U */
unsigned int pval = accumulator[k] + (count[k] >> 1);
pval *= cpi->fixed_divide[count[k]];
pval >>= 19;
dst1[byte] = (unsigned char)pval;
- // V
+ /* V */
pval = accumulator[m] + (count[m] >> 1);
pval *= cpi->fixed_divide[count[m]];
pval >>= 19;
dst2[byte] = (unsigned char)pval;
- // move to next pixel
+ /* move to next pixel */
byte++;
}
@@ -416,7 +419,7 @@ static void vp8_temporal_filter_iterate_c
mb_uv_offset += 8*(f->uv_stride-mb_cols);
}
- // Restore input state
+ /* Restore input state */
mbd->pre.y_buffer = y_buffer;
mbd->pre.u_buffer = u_buffer;
mbd->pre.v_buffer = v_buffer;
@@ -450,8 +453,7 @@ void vp8_temporal_filter_prepare_c
switch (blur_type)
{
case 1:
- /////////////////////////////////////////
- // Backward Blur
+ /* Backward Blur */
frames_to_blur_backward = num_frames_backward;
@@ -462,8 +464,7 @@ void vp8_temporal_filter_prepare_c
break;
case 2:
- /////////////////////////////////////////
- // Forward Blur
+ /* Forward Blur */
frames_to_blur_forward = num_frames_forward;
@@ -475,8 +476,7 @@ void vp8_temporal_filter_prepare_c
case 3:
default:
- /////////////////////////////////////////
- // Center Blur
+ /* Center Blur */
frames_to_blur_forward = num_frames_forward;
frames_to_blur_backward = num_frames_backward;
@@ -486,7 +486,7 @@ void vp8_temporal_filter_prepare_c
if (frames_to_blur_backward > frames_to_blur_forward)
frames_to_blur_backward = frames_to_blur_forward;
- // When max_frames is even we have 1 more frame backward than forward
+ /* When max_frames is even we have 1 more frame backward than forward */
if (frames_to_blur_forward > (max_frames - 1) / 2)
frames_to_blur_forward = ((max_frames - 1) / 2);
@@ -499,21 +499,7 @@ void vp8_temporal_filter_prepare_c
start_frame = distance + frames_to_blur_forward;
-#ifdef DEBUGFWG
- // DEBUG FWG
- printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
- , max_frames
- , num_frames_backward
- , num_frames_forward
- , frames_to_blur
- , frames_to_blur_backward
- , frames_to_blur_forward
- , cpi->source_encode_index
- , cpi->last_alt_ref_sei
- , start_frame);
-#endif
-
- // Setup frame pointers, NULL indicates frame not included in filter
+ /* Setup frame pointers, NULL indicates frame not included in filter */
vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
for (frame = 0; frame < frames_to_blur; frame++)
{
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index ef41fa8..3b5268b 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -23,7 +23,7 @@
#ifdef ENTROPY_STATS
_int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
#endif
-void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
+void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ;
void vp8_fix_contexts(MACROBLOCKD *x);
#include "dct_value_tokens.h"
@@ -102,11 +102,12 @@ static void fill_value_tokens()
static void tokenize2nd_order_b
(
- MACROBLOCKD *x,
+ MACROBLOCK *x,
TOKENEXTRA **tp,
VP8_COMP *cpi
)
{
+ MACROBLOCKD *xd = &x->e_mbd;
int pt; /* near block/prev token context index */
int c; /* start at DC */
TOKENEXTRA *t = *tp;/* store tokens starting here */
@@ -117,11 +118,11 @@ static void tokenize2nd_order_b
int band, rc, v, token;
int eob;
- b = x->block + 24;
+ b = xd->block + 24;
qcoeff_ptr = b->qcoeff;
- a = (ENTROPY_CONTEXT *)x->above_context + 8;
- l = (ENTROPY_CONTEXT *)x->left_context + 8;
- eob = x->eobs[24];
+ a = (ENTROPY_CONTEXT *)xd->above_context + 8;
+ l = (ENTROPY_CONTEXT *)xd->left_context + 8;
+ eob = xd->eobs[24];
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
if(!eob)
@@ -131,7 +132,7 @@ static void tokenize2nd_order_b
t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
t->skip_eob_node = 0;
- ++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN];
+ ++x->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN];
t++;
*tp = t;
*a = *l = 0;
@@ -145,7 +146,7 @@ static void tokenize2nd_order_b
t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
t->skip_eob_node = 0;
- ++cpi->coef_counts [1] [0] [pt] [token];
+ ++x->coef_counts [1] [0] [pt] [token];
pt = vp8_prev_token_class[token];
t++;
c = 1;
@@ -164,7 +165,7 @@ static void tokenize2nd_order_b
t->skip_eob_node = ((pt == 0));
- ++cpi->coef_counts [1] [band] [pt] [token];
+ ++x->coef_counts [1] [band] [pt] [token];
pt = vp8_prev_token_class[token];
t++;
@@ -177,7 +178,7 @@ static void tokenize2nd_order_b
t->skip_eob_node = 0;
- ++cpi->coef_counts [1] [band] [pt] [DCT_EOB_TOKEN];
+ ++x->coef_counts [1] [band] [pt] [DCT_EOB_TOKEN];
t++;
}
@@ -189,12 +190,13 @@ static void tokenize2nd_order_b
static void tokenize1st_order_b
(
- MACROBLOCKD *x,
+ MACROBLOCK *x,
TOKENEXTRA **tp,
int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
VP8_COMP *cpi
)
{
+ MACROBLOCKD *xd = &x->e_mbd;
unsigned int block;
const BLOCKD *b;
int pt; /* near block/prev token context index */
@@ -207,15 +209,15 @@ static void tokenize1st_order_b
int band, rc, v;
int tmp1, tmp2;
- b = x->block;
+ b = xd->block;
/* Luma */
for (block = 0; block < 16; block++, b++)
{
tmp1 = vp8_block2above[block];
tmp2 = vp8_block2left[block];
qcoeff_ptr = b->qcoeff;
- a = (ENTROPY_CONTEXT *)x->above_context + tmp1;
- l = (ENTROPY_CONTEXT *)x->left_context + tmp2;
+ a = (ENTROPY_CONTEXT *)xd->above_context + tmp1;
+ l = (ENTROPY_CONTEXT *)xd->left_context + tmp2;
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
@@ -228,7 +230,7 @@ static void tokenize1st_order_b
t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt];
t->skip_eob_node = 0;
- ++cpi->coef_counts [type] [c] [pt] [DCT_EOB_TOKEN];
+ ++x->coef_counts [type] [c] [pt] [DCT_EOB_TOKEN];
t++;
*tp = t;
*a = *l = 0;
@@ -243,7 +245,7 @@ static void tokenize1st_order_b
t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt];
t->skip_eob_node = 0;
- ++cpi->coef_counts [type] [c] [pt] [token];
+ ++x->coef_counts [type] [c] [pt] [token];
pt = vp8_prev_token_class[token];
t++;
c++;
@@ -261,7 +263,7 @@ static void tokenize1st_order_b
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
t->skip_eob_node = (pt == 0);
- ++cpi->coef_counts [type] [band] [pt] [token];
+ ++x->coef_counts [type] [band] [pt] [token];
pt = vp8_prev_token_class[token];
t++;
@@ -273,7 +275,7 @@ static void tokenize1st_order_b
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
t->skip_eob_node = 0;
- ++cpi->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN];
+ ++x->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN];
t++;
}
@@ -287,8 +289,8 @@ static void tokenize1st_order_b
tmp1 = vp8_block2above[block];
tmp2 = vp8_block2left[block];
qcoeff_ptr = b->qcoeff;
- a = (ENTROPY_CONTEXT *)x->above_context + tmp1;
- l = (ENTROPY_CONTEXT *)x->left_context + tmp2;
+ a = (ENTROPY_CONTEXT *)xd->above_context + tmp1;
+ l = (ENTROPY_CONTEXT *)xd->left_context + tmp2;
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
@@ -299,7 +301,7 @@ static void tokenize1st_order_b
t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
t->skip_eob_node = 0;
- ++cpi->coef_counts [2] [0] [pt] [DCT_EOB_TOKEN];
+ ++x->coef_counts [2] [0] [pt] [DCT_EOB_TOKEN];
t++;
*tp = t;
*a = *l = 0;
@@ -314,7 +316,7 @@ static void tokenize1st_order_b
t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
t->skip_eob_node = 0;
- ++cpi->coef_counts [2] [0] [pt] [token];
+ ++x->coef_counts [2] [0] [pt] [token];
pt = vp8_prev_token_class[token];
t++;
c = 1;
@@ -333,7 +335,7 @@ static void tokenize1st_order_b
t->skip_eob_node = (pt == 0);
- ++cpi->coef_counts [2] [band] [pt] [token];
+ ++x->coef_counts [2] [band] [pt] [token];
pt = vp8_prev_token_class[token];
t++;
@@ -346,7 +348,7 @@ static void tokenize1st_order_b
t->skip_eob_node = 0;
- ++cpi->coef_counts [2] [band] [pt] [DCT_EOB_TOKEN];
+ ++x->coef_counts [2] [band] [pt] [DCT_EOB_TOKEN];
t++;
}
@@ -374,16 +376,18 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block)
}
-void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
+void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
{
+ MACROBLOCKD *xd = &x->e_mbd;
int plane_type;
int has_y2_block;
- has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED
- && x->mode_info_context->mbmi.mode != SPLITMV);
+ has_y2_block = (xd->mode_info_context->mbmi.mode != B_PRED
+ && xd->mode_info_context->mbmi.mode != SPLITMV);
- x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x, has_y2_block);
- if (x->mode_info_context->mbmi.mb_skip_coeff)
+ xd->mode_info_context->mbmi.mb_skip_coeff =
+ mb_is_skippable(xd, has_y2_block);
+ if (xd->mode_info_context->mbmi.mb_skip_coeff)
{
if (!cpi->common.mb_no_coeff_skip)
{
@@ -391,8 +395,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
}
else
{
- vp8_fix_contexts(x);
- cpi->skip_true_count++;
+ vp8_fix_contexts(xd);
+ x->skip_true_count++;
}
return;
@@ -488,7 +492,8 @@ static void stuff2nd_order_b
TOKENEXTRA **tp,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
- VP8_COMP *cpi
+ VP8_COMP *cpi,
+ MACROBLOCK *x
)
{
int pt; /* near block/prev token context index */
@@ -498,13 +503,12 @@ static void stuff2nd_order_b
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
t->skip_eob_node = 0;
- ++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN];
+ ++x->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN];
++t;
*tp = t;
pt = 0;
*a = *l = pt;
-
}
static void stuff1st_order_b
@@ -513,7 +517,8 @@ static void stuff1st_order_b
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
int type,
- VP8_COMP *cpi
+ VP8_COMP *cpi,
+ MACROBLOCK *x
)
{
int pt; /* near block/prev token context index */
@@ -524,20 +529,21 @@ static void stuff1st_order_b
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
t->skip_eob_node = 0;
- ++cpi->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN];
+ ++x->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN];
++t;
*tp = t;
pt = 0; /* 0 <-> all coeff data is zero */
*a = *l = pt;
-
}
+
static
void stuff1st_order_buv
(
TOKENEXTRA **tp,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
- VP8_COMP *cpi
+ VP8_COMP *cpi,
+ MACROBLOCK *x
)
{
int pt; /* near block/prev token context index */
@@ -547,38 +553,38 @@ void stuff1st_order_buv
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
t->skip_eob_node = 0;
- ++cpi->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN];
+ ++x->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN];
++t;
*tp = t;
pt = 0; /* 0 <-> all coeff data is zero */
*a = *l = pt;
-
}
-void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
+void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
{
- ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
- ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
+ MACROBLOCKD *xd = &x->e_mbd;
+ ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)xd->above_context;
+ ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)xd->left_context;
int plane_type;
int b;
plane_type = 3;
- if((x->mode_info_context->mbmi.mode != B_PRED
- && x->mode_info_context->mbmi.mode != SPLITMV))
+ if((xd->mode_info_context->mbmi.mode != B_PRED
+ && xd->mode_info_context->mbmi.mode != SPLITMV))
{
stuff2nd_order_b(t,
- A + vp8_block2above[24], L + vp8_block2left[24], cpi);
+ A + vp8_block2above[24], L + vp8_block2left[24], cpi, x);
plane_type = 0;
}
for (b = 0; b < 16; b++)
stuff1st_order_b(t,
A + vp8_block2above[b],
- L + vp8_block2left[b], plane_type, cpi);
+ L + vp8_block2left[b], plane_type, cpi, x);
for (b = 16; b < 24; b++)
stuff1st_order_buv(t,
A + vp8_block2above[b],
- L + vp8_block2left[b], cpi);
+ L + vp8_block2left[b], cpi, x);
}
void vp8_fix_contexts(MACROBLOCKD *x)
diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm
index f07b030..6f188cb 100644
--- a/vp8/encoder/x86/dct_mmx.asm
+++ b/vp8/encoder/x86/dct_mmx.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch)
-global sym(vp8_short_fdct4x4_mmx)
+global sym(vp8_short_fdct4x4_mmx) PRIVATE
sym(vp8_short_fdct4x4_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm
index 3d52a5d..d880ce0 100644
--- a/vp8/encoder/x86/dct_sse2.asm
+++ b/vp8/encoder/x86/dct_sse2.asm
@@ -61,7 +61,7 @@
%endmacro
;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
-global sym(vp8_short_fdct4x4_sse2)
+global sym(vp8_short_fdct4x4_sse2) PRIVATE
sym(vp8_short_fdct4x4_sse2):
STACK_FRAME_CREATE
@@ -166,7 +166,7 @@ sym(vp8_short_fdct4x4_sse2):
STACK_FRAME_DESTROY
;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
-global sym(vp8_short_fdct8x4_sse2)
+global sym(vp8_short_fdct8x4_sse2) PRIVATE
sym(vp8_short_fdct8x4_sse2):
STACK_FRAME_CREATE
diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c
new file mode 100644
index 0000000..c1ac6c1
--- /dev/null
+++ b/vp8/encoder/x86/denoising_sse2.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp8/encoder/denoising.h"
+#include "vp8/common/reconinter.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_rtcd.h"
+
+#include <emmintrin.h>
+
+union sum_union {
+ __m128i v;
+ signed char e[16];
+};
+
+int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
+ YV12_BUFFER_CONFIG *running_avg,
+ MACROBLOCK *signal, unsigned int motion_magnitude,
+ int y_offset, int uv_offset)
+{
+ unsigned char *sig = signal->thismb;
+ int sig_stride = 16;
+ unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
+ int mc_avg_y_stride = mc_running_avg->y_stride;
+ unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
+ int avg_y_stride = running_avg->y_stride;
+ int r;
+ __m128i acc_diff = _mm_setzero_si128();
+ const __m128i k_0 = _mm_setzero_si128();
+ const __m128i k_4 = _mm_set1_epi8(4);
+ const __m128i k_8 = _mm_set1_epi8(8);
+ const __m128i k_16 = _mm_set1_epi8(16);
+ /* Modify each level's adjustment according to motion_magnitude. */
+ const __m128i l3 = _mm_set1_epi8(
+ (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 : 6);
+ /* Difference between level 3 and level 2 is 2. */
+ const __m128i l32 = _mm_set1_epi8(2);
+ /* Difference between level 2 and level 1 is 1. */
+ const __m128i l21 = _mm_set1_epi8(1);
+
+ for (r = 0; r < 16; ++r)
+ {
+ /* Calculate differences */
+ const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0]));
+ const __m128i v_mc_running_avg_y = _mm_loadu_si128(
+ (__m128i *)(&mc_running_avg_y[0]));
+ __m128i v_running_avg_y;
+ const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
+ const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
+ /* Obtain the sign. FF if diff is negative. */
+ const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
+ /* Clamp absolute difference to 16 to be used to get mask. Doing this
+ * allows us to use _mm_cmpgt_epi8, which operates on signed byte. */
+ const __m128i clamped_absdiff = _mm_min_epu8(
+ _mm_or_si128(pdiff, ndiff), k_16);
+ /* Get masks for l2 l1 and l0 adjustments */
+ const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff);
+ const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff);
+ const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff);
+ /* Get adjustments for l2, l1, and l0 */
+ __m128i adj2 = _mm_and_si128(mask2, l32);
+ const __m128i adj1 = _mm_and_si128(mask1, l21);
+ const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
+ __m128i adj, padj, nadj;
+
+ /* Combine the adjustments and get absolute adjustments. */
+ adj2 = _mm_add_epi8(adj2, adj1);
+ adj = _mm_sub_epi8(l3, adj2);
+ adj = _mm_andnot_si128(mask0, adj);
+ adj = _mm_or_si128(adj, adj0);
+
+ /* Restore the sign and get positive and negative adjustments. */
+ padj = _mm_andnot_si128(diff_sign, adj);
+ nadj = _mm_and_si128(diff_sign, adj);
+
+ /* Calculate filtered value. */
+ v_running_avg_y = _mm_adds_epu8(v_sig, padj);
+ v_running_avg_y = _mm_subs_epu8(v_running_avg_y, nadj);
+ _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y);
+
+ /* Adjustments <=7, and each element in acc_diff can fit in signed
+ * char.
+ */
+ acc_diff = _mm_adds_epi8(acc_diff, padj);
+ acc_diff = _mm_subs_epi8(acc_diff, nadj);
+
+ /* Update pointers for next iteration. */
+ sig += sig_stride;
+ mc_running_avg_y += mc_avg_y_stride;
+ running_avg_y += avg_y_stride;
+ }
+
+ {
+ /* Compute the sum of all pixel differences of this MB. */
+ union sum_union s;
+ int sum_diff = 0;
+ s.v = acc_diff;
+ sum_diff = s.e[0] + s.e[1] + s.e[2] + s.e[3] + s.e[4] + s.e[5]
+ + s.e[6] + s.e[7] + s.e[8] + s.e[9] + s.e[10] + s.e[11]
+ + s.e[12] + s.e[13] + s.e[14] + s.e[15];
+
+ if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
+ {
+ return COPY_BLOCK;
+ }
+ }
+
+ vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride,
+ signal->thismb, sig_stride);
+ return FILTER_BLOCK;
+}
diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm
index 7ec7d60..fe26b18 100644
--- a/vp8/encoder/x86/encodeopt.asm
+++ b/vp8/encoder/x86/encodeopt.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr)
-global sym(vp8_block_error_xmm)
+global sym(vp8_block_error_xmm) PRIVATE
sym(vp8_block_error_xmm):
push rbp
mov rbp, rsp
@@ -60,7 +60,7 @@ sym(vp8_block_error_xmm):
ret
;int vp8_block_error_mmx(short *coeff_ptr, short *dcoef_ptr)
-global sym(vp8_block_error_mmx)
+global sym(vp8_block_error_mmx) PRIVATE
sym(vp8_block_error_mmx):
push rbp
mov rbp, rsp
@@ -126,7 +126,7 @@ sym(vp8_block_error_mmx):
;int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
-global sym(vp8_mbblock_error_mmx_impl)
+global sym(vp8_mbblock_error_mmx_impl) PRIVATE
sym(vp8_mbblock_error_mmx_impl):
push rbp
mov rbp, rsp
@@ -203,7 +203,7 @@ sym(vp8_mbblock_error_mmx_impl):
;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
-global sym(vp8_mbblock_error_xmm_impl)
+global sym(vp8_mbblock_error_xmm_impl) PRIVATE
sym(vp8_mbblock_error_xmm_impl):
push rbp
mov rbp, rsp
@@ -273,7 +273,7 @@ sym(vp8_mbblock_error_xmm_impl):
;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
-global sym(vp8_mbuverror_mmx_impl)
+global sym(vp8_mbuverror_mmx_impl) PRIVATE
sym(vp8_mbuverror_mmx_impl):
push rbp
mov rbp, rsp
@@ -330,7 +330,7 @@ sym(vp8_mbuverror_mmx_impl):
;int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
-global sym(vp8_mbuverror_xmm_impl)
+global sym(vp8_mbuverror_xmm_impl) PRIVATE
sym(vp8_mbuverror_xmm_impl):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm
index 71efd56..f498927 100644
--- a/vp8/encoder/x86/fwalsh_sse2.asm
+++ b/vp8/encoder/x86/fwalsh_sse2.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
;void vp8_short_walsh4x4_sse2(short *input, short *output, int pitch)
-global sym(vp8_short_walsh4x4_sse2)
+global sym(vp8_short_walsh4x4_sse2) PRIVATE
sym(vp8_short_walsh4x4_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm
index f29a54e..2864ce1 100644
--- a/vp8/encoder/x86/quantize_mmx.asm
+++ b/vp8/encoder/x86/quantize_mmx.asm
@@ -15,7 +15,7 @@
; short *qcoeff_ptr,short *dequant_ptr,
; short *scan_mask, short *round_ptr,
; short *quant_ptr, short *dqcoeff_ptr);
-global sym(vp8_fast_quantize_b_impl_mmx)
+global sym(vp8_fast_quantize_b_impl_mmx) PRIVATE
sym(vp8_fast_quantize_b_impl_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
index 7c249ff..724e54c 100644
--- a/vp8/encoder/x86/quantize_sse2.asm
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -16,7 +16,7 @@
; (BLOCK *b, | 0
; BLOCKD *d) | 1
-global sym(vp8_regular_quantize_b_sse2)
+global sym(vp8_regular_quantize_b_sse2) PRIVATE
sym(vp8_regular_quantize_b_sse2):
push rbp
mov rbp, rsp
@@ -240,7 +240,7 @@ ZIGZAG_LOOP 15
; (BLOCK *b, | 0
; BLOCKD *d) | 1
-global sym(vp8_fast_quantize_b_sse2)
+global sym(vp8_fast_quantize_b_sse2) PRIVATE
sym(vp8_fast_quantize_b_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm
index 70eac0c..f0e5d40 100644
--- a/vp8/encoder/x86/quantize_sse4.asm
+++ b/vp8/encoder/x86/quantize_sse4.asm
@@ -16,7 +16,7 @@
; (BLOCK *b, | 0
; BLOCKD *d) | 1
-global sym(vp8_regular_quantize_b_sse4)
+global sym(vp8_regular_quantize_b_sse4) PRIVATE
sym(vp8_regular_quantize_b_sse4):
%if ABI_IS_32BIT
diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm
index e698e90..dd526f4 100644
--- a/vp8/encoder/x86/quantize_ssse3.asm
+++ b/vp8/encoder/x86/quantize_ssse3.asm
@@ -17,7 +17,7 @@
; BLOCKD *d) | 1
;
-global sym(vp8_fast_quantize_b_ssse3)
+global sym(vp8_fast_quantize_b_ssse3) PRIVATE
sym(vp8_fast_quantize_b_ssse3):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/ssim_opt.asm b/vp8/encoder/x86/ssim_opt.asm
index c6db3d1..5964a85 100644
--- a/vp8/encoder/x86/ssim_opt.asm
+++ b/vp8/encoder/x86/ssim_opt.asm
@@ -61,7 +61,7 @@
; or pavgb At this point this is just meant to be first pass for calculating
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
; in mode selection code.
-global sym(vp8_ssim_parms_16x16_sse2)
+global sym(vp8_ssim_parms_16x16_sse2) PRIVATE
sym(vp8_ssim_parms_16x16_sse2):
push rbp
mov rbp, rsp
@@ -151,7 +151,7 @@ sym(vp8_ssim_parms_16x16_sse2):
; or pavgb At this point this is just meant to be first pass for calculating
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
; in mode selection code.
-global sym(vp8_ssim_parms_8x8_sse2)
+global sym(vp8_ssim_parms_8x8_sse2) PRIVATE
sym(vp8_ssim_parms_8x8_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm
index 75e8aa3..794dd22 100644
--- a/vp8/encoder/x86/subtract_mmx.asm
+++ b/vp8/encoder/x86/subtract_mmx.asm
@@ -14,7 +14,7 @@
;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride,
; short *diff, unsigned char *Predictor,
; int pitch);
-global sym(vp8_subtract_b_mmx_impl)
+global sym(vp8_subtract_b_mmx_impl) PRIVATE
sym(vp8_subtract_b_mmx_impl):
push rbp
mov rbp, rsp
@@ -75,7 +75,7 @@ sym(vp8_subtract_b_mmx_impl):
;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride,
;unsigned char *pred, int pred_stride)
-global sym(vp8_subtract_mby_mmx)
+global sym(vp8_subtract_mby_mmx) PRIVATE
sym(vp8_subtract_mby_mmx):
push rbp
mov rbp, rsp
@@ -150,7 +150,7 @@ sym(vp8_subtract_mby_mmx):
; int src_stride, unsigned char *upred,
; unsigned char *vpred, int pred_stride)
-global sym(vp8_subtract_mbuv_mmx)
+global sym(vp8_subtract_mbuv_mmx) PRIVATE
sym(vp8_subtract_mbuv_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm
index 008e9c7..a5d17f5 100644
--- a/vp8/encoder/x86/subtract_sse2.asm
+++ b/vp8/encoder/x86/subtract_sse2.asm
@@ -14,7 +14,7 @@
;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
; short *diff, unsigned char *Predictor,
; int pitch);
-global sym(vp8_subtract_b_sse2_impl)
+global sym(vp8_subtract_b_sse2_impl) PRIVATE
sym(vp8_subtract_b_sse2_impl):
push rbp
mov rbp, rsp
@@ -73,7 +73,7 @@ sym(vp8_subtract_b_sse2_impl):
;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride,
;unsigned char *pred, int pred_stride)
-global sym(vp8_subtract_mby_sse2)
+global sym(vp8_subtract_mby_sse2) PRIVATE
sym(vp8_subtract_mby_sse2):
push rbp
mov rbp, rsp
@@ -146,7 +146,7 @@ sym(vp8_subtract_mby_sse2):
;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc,
; int src_stride, unsigned char *upred,
; unsigned char *vpred, int pred_stride)
-global sym(vp8_subtract_mbuv_sse2)
+global sym(vp8_subtract_mbuv_sse2) PRIVATE
sym(vp8_subtract_mbuv_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/temporal_filter_apply_sse2.asm b/vp8/encoder/x86/temporal_filter_apply_sse2.asm
index b97c694..ce9d983 100644
--- a/vp8/encoder/x86/temporal_filter_apply_sse2.asm
+++ b/vp8/encoder/x86/temporal_filter_apply_sse2.asm
@@ -20,7 +20,7 @@
; int filter_weight, | 5
; unsigned int *accumulator, | 6
; unsigned short *count) | 7
-global sym(vp8_temporal_filter_apply_sse2)
+global sym(vp8_temporal_filter_apply_sse2) PRIVATE
sym(vp8_temporal_filter_apply_sse2):
push rbp
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 3a7b146..a328f46 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -30,7 +30,6 @@ VP8_COMMON_SRCS-yes += common/findnearmv.c
VP8_COMMON_SRCS-yes += common/generic/systemdependent.c
VP8_COMMON_SRCS-yes += common/idct_blk.c
VP8_COMMON_SRCS-yes += common/idctllm.c
-VP8_COMMON_SRCS-yes += common/idctllm_test.cc
VP8_COMMON_SRCS-yes += common/alloccommon.h
VP8_COMMON_SRCS-yes += common/blockd.h
VP8_COMMON_SRCS-yes += common/common.h
@@ -85,7 +84,6 @@ VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/dequantize_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idct_blk_mmx.c
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm
-VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx_test.cc
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm
@@ -122,6 +120,14 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_block_sse2.asm
endif
# common (c)
+VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idctllm_dspr2.c
+VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/filter_dspr2.c
+VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/loopfilter_filters_dspr2.c
+VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/reconinter_dspr2.c
+VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idct_blk_dspr2.c
+VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/dequantize_dspr2.c
+
+# common (c)
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/filter_arm.c
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.c
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/reconintra_arm.c
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 5fb74c4..eeac3a8 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -9,6 +9,7 @@
*/
+#include "vpx_rtcd.h"
#include "vpx/vpx_codec.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx_version.h"
@@ -22,7 +23,6 @@
struct vp8_extracfg
{
struct vpx_codec_pkt_list *pkt_list;
- vp8e_encoding_mode encoding_mode; /** best, good, realtime */
int cpu_used; /** available cpu percentage in 1/16*/
unsigned int enable_auto_alt_ref; /** if encoder decides to uses alternate reference frame */
unsigned int noise_sensitivity;
@@ -51,10 +51,8 @@ static const struct extraconfig_map extracfg_map[] =
{
NULL,
#if !(CONFIG_REALTIME_ONLY)
- VP8_BEST_QUALITY_ENCODING, /* Encoding Mode */
0, /* cpu_used */
#else
- VP8_REAL_TIME_ENCODING, /* Encoding Mode */
4, /* cpu_used */
#endif
0, /* enable_auto_alt_ref */
@@ -88,7 +86,8 @@ struct vpx_codec_alg_priv
vpx_image_t preview_img;
unsigned int next_frame_flag;
vp8_postproc_cfg_t preview_ppcfg;
- vpx_codec_pkt_list_decl(64) pkt_list; // changed to accomendate the maximum number of lagged frames allowed
+ /* pkt_list size depends on the maximum number of lagged frames allowed. */
+ vpx_codec_pkt_list_decl(64) pkt_list;
unsigned int fixed_kf_cntr;
};
@@ -146,25 +145,39 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer);
RANGE_CHECK_HI(cfg, g_threads, 64);
-#if !(CONFIG_REALTIME_ONLY)
- RANGE_CHECK_HI(cfg, g_lag_in_frames, 25);
-#else
+#if CONFIG_REALTIME_ONLY
RANGE_CHECK_HI(cfg, g_lag_in_frames, 0);
+#elif CONFIG_MULTI_RES_ENCODING
+ if (ctx->base.enc.total_encoders > 1)
+ RANGE_CHECK_HI(cfg, g_lag_in_frames, 0);
+#else
+ RANGE_CHECK_HI(cfg, g_lag_in_frames, 25);
#endif
RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ);
RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000);
RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000);
RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO);
- //RANGE_CHECK_BOOL(cfg, g_delete_firstpassfile);
- RANGE_CHECK_BOOL(cfg, rc_resize_allowed);
+
+/* TODO: add spatial re-sampling support and frame dropping in
+ * multi-res-encoder.*/
+#if CONFIG_MULTI_RES_ENCODING
+ if (ctx->base.enc.total_encoders > 1)
+ RANGE_CHECK_HI(cfg, rc_resize_allowed, 0);
+#else
+ RANGE_CHECK_BOOL(cfg, rc_resize_allowed);
+#endif
RANGE_CHECK_HI(cfg, rc_dropframe_thresh, 100);
RANGE_CHECK_HI(cfg, rc_resize_up_thresh, 100);
RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100);
-#if !(CONFIG_REALTIME_ONLY)
- RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS);
-#else
+
+#if CONFIG_REALTIME_ONLY
RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_ONE_PASS);
+#elif CONFIG_MULTI_RES_ENCODING
+ if (ctx->base.enc.total_encoders > 1)
+ RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_ONE_PASS);
+#else
+ RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS);
#endif
/* VP8 does not support a lower bound on the keyframe interval in
@@ -177,11 +190,6 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK_BOOL(vp8_cfg, enable_auto_alt_ref);
RANGE_CHECK(vp8_cfg, cpu_used, -16, 16);
-#if !(CONFIG_REALTIME_ONLY)
- RANGE_CHECK(vp8_cfg, encoding_mode, VP8_BEST_QUALITY_ENCODING, VP8_REAL_TIME_ENCODING);
-#else
- RANGE_CHECK(vp8_cfg, encoding_mode, VP8_REAL_TIME_ENCODING, VP8_REAL_TIME_ENCODING);
-#endif
#if CONFIG_REALTIME_ONLY && !CONFIG_TEMPORAL_DENOISING
RANGE_CHECK(vp8_cfg, noise_sensitivity, 0, 0);
@@ -189,7 +197,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6);
#endif
- RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION);
+ RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION,
+ VP8_EIGHT_TOKENPARTITION);
RANGE_CHECK_HI(vp8_cfg, Sharpness, 7);
RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15);
RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6);
@@ -203,7 +212,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
if (cfg->g_pass == VPX_RC_LAST_PASS)
{
size_t packet_sz = sizeof(FIRSTPASS_STATS);
- int n_packets = cfg->rc_twopass_stats_in.sz / packet_sz;
+ int n_packets = (int)(cfg->rc_twopass_stats_in.sz /
+ packet_sz);
FIRSTPASS_STATS *stats;
if (!cfg->rc_twopass_stats_in.buf)
@@ -227,7 +237,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
if (cfg->ts_number_layers > 1)
{
- int i;
+ unsigned int i;
RANGE_CHECK_HI(cfg, ts_periodicity, 16);
for (i=1; i<cfg->ts_number_layers; i++)
@@ -299,7 +309,7 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
break;
}
- if (cfg.g_pass == VPX_RC_FIRST_PASS)
+ if (cfg.g_pass == VPX_RC_FIRST_PASS || cfg.g_pass == VPX_RC_ONE_PASS)
{
oxcf->allow_lag = 0;
oxcf->lag_in_frames = 0;
@@ -355,7 +365,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
oxcf->auto_key = cfg.kf_mode == VPX_KF_AUTO
&& cfg.kf_min_dist != cfg.kf_max_dist;
- //oxcf->kf_min_dist = cfg.kf_min_dis;
oxcf->key_freq = cfg.kf_max_dist;
oxcf->number_of_layers = cfg.ts_number_layers;
@@ -385,9 +394,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
}
#endif
- //oxcf->delete_first_pass_file = cfg.g_delete_firstpassfile;
- //strcpy(oxcf->first_pass_file, cfg.g_firstpass_file);
-
oxcf->cpu_used = vp8_cfg.cpu_used;
oxcf->encode_breakout = vp8_cfg.static_thresh;
oxcf->play_alternate = vp8_cfg.enable_auto_alt_ref;
@@ -447,7 +453,7 @@ static vpx_codec_err_t vp8e_set_config(vpx_codec_alg_priv_t *ctx,
vpx_codec_err_t res;
if (((cfg->g_w != ctx->cfg.g_w) || (cfg->g_h != ctx->cfg.g_h))
- && cfg->g_lag_in_frames > 1)
+ && (cfg->g_lag_in_frames > 1 || cfg->g_pass != VPX_RC_ONE_PASS))
ERROR("Cannot change width or height after initialization");
/* Prevent increasing lag_in_frames. This check is stricter than it needs
@@ -542,19 +548,27 @@ static vpx_codec_err_t vp8e_mr_alloc_mem(const vpx_codec_enc_cfg_t *cfg,
vpx_codec_err_t res = 0;
#if CONFIG_MULTI_RES_ENCODING
+ LOWER_RES_FRAME_INFO *shared_mem_loc;
int mb_rows = ((cfg->g_w + 15) >>4);
int mb_cols = ((cfg->g_h + 15) >>4);
- *mem_loc = calloc(mb_rows*mb_cols, sizeof(LOWER_RES_INFO));
- if(!(*mem_loc))
+ shared_mem_loc = calloc(1, sizeof(LOWER_RES_FRAME_INFO));
+ if(!shared_mem_loc)
+ {
+ res = VPX_CODEC_MEM_ERROR;
+ }
+
+ shared_mem_loc->mb_info = calloc(mb_rows*mb_cols, sizeof(LOWER_RES_MB_INFO));
+ if(!(shared_mem_loc->mb_info))
{
- free(*mem_loc);
res = VPX_CODEC_MEM_ERROR;
}
else
+ {
+ *mem_loc = (void *)shared_mem_loc;
res = VPX_CODEC_OK;
+ }
#endif
-
return res;
}
@@ -568,6 +582,8 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx,
struct VP8_COMP *optr;
+ vpx_rtcd();
+
if (!ctx->priv)
{
priv = calloc(1, sizeof(struct vpx_codec_alg_priv));
@@ -616,15 +632,15 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx,
return VPX_CODEC_MEM_ERROR;
}
+ if(mr_cfg)
+ ctx->priv->enc.total_encoders = mr_cfg->mr_total_resolutions;
+ else
+ ctx->priv->enc.total_encoders = 1;
+
res = validate_config(priv, &priv->cfg, &priv->vp8_cfg, 0);
if (!res)
{
- if(mr_cfg)
- ctx->priv->enc.total_encoders = mr_cfg->mr_total_resolutions;
- else
- ctx->priv->enc.total_encoders = 1;
-
set_vp8e_config(&ctx->priv->alg_priv->oxcf,
ctx->priv->alg_priv->cfg,
ctx->priv->alg_priv->vp8_cfg,
@@ -647,7 +663,11 @@ static vpx_codec_err_t vp8e_destroy(vpx_codec_alg_priv_t *ctx)
#if CONFIG_MULTI_RES_ENCODING
/* Free multi-encoder shared memory */
if (ctx->oxcf.mr_total_resolutions > 0 && (ctx->oxcf.mr_encoder_id == ctx->oxcf.mr_total_resolutions-1))
+ {
+ LOWER_RES_FRAME_INFO *shared_mem_loc = (LOWER_RES_FRAME_INFO *)ctx->oxcf.mr_low_res_mode_info;
+ free(shared_mem_loc->mb_info);
free(ctx->oxcf.mr_low_res_mode_info);
+ }
#endif
free(ctx->cx_data);
@@ -673,7 +693,7 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
yv12->uv_stride = img->stride[VPX_PLANE_U];
yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
- yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12); //REG_YUV = 0
+ yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12);
return res;
}
@@ -733,6 +753,9 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
if (!ctx->cfg.rc_target_bitrate)
return res;
+ if (!ctx->cfg.rc_target_bitrate)
+ return res;
+
if (img)
res = validate_img(ctx, img);
@@ -756,13 +779,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
int ref = 7;
if (flags & VP8_EFLAG_NO_REF_LAST)
- ref ^= VP8_LAST_FLAG;
+ ref ^= VP8_LAST_FRAME;
if (flags & VP8_EFLAG_NO_REF_GF)
- ref ^= VP8_GOLD_FLAG;
+ ref ^= VP8_GOLD_FRAME;
if (flags & VP8_EFLAG_NO_REF_ARF)
- ref ^= VP8_ALT_FLAG;
+ ref ^= VP8_ALTR_FRAME;
vp8_use_as_reference(ctx->cpi, ref);
}
@@ -774,13 +797,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
int upd = 7;
if (flags & VP8_EFLAG_NO_UPD_LAST)
- upd ^= VP8_LAST_FLAG;
+ upd ^= VP8_LAST_FRAME;
if (flags & VP8_EFLAG_NO_UPD_GF)
- upd ^= VP8_GOLD_FLAG;
+ upd ^= VP8_GOLD_FRAME;
if (flags & VP8_EFLAG_NO_UPD_ARF)
- upd ^= VP8_ALT_FLAG;
+ upd ^= VP8_ALTR_FRAME;
vp8_update_reference(ctx->cpi, upd);
}
@@ -869,15 +892,16 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
VP8_COMP *cpi = (VP8_COMP *)ctx->cpi;
/* Add the frame packet to the list of returned packets. */
- round = 1000000 * ctx->cfg.g_timebase.num / 2 - 1;
+ round = (vpx_codec_pts_t)1000000
+ * ctx->cfg.g_timebase.num / 2 - 1;
delta = (dst_end_time_stamp - dst_time_stamp);
pkt.kind = VPX_CODEC_CX_FRAME_PKT;
pkt.data.frame.pts =
(dst_time_stamp * ctx->cfg.g_timebase.den + round)
/ ctx->cfg.g_timebase.num / 10000000;
- pkt.data.frame.duration =
- (delta * ctx->cfg.g_timebase.den + round)
- / ctx->cfg.g_timebase.num / 10000000;
+ pkt.data.frame.duration = (unsigned long)
+ ((delta * ctx->cfg.g_timebase.den + round)
+ / ctx->cfg.g_timebase.num / 10000000);
pkt.data.frame.flags = lib_flags << 16;
if (lib_flags & FRAMEFLAGS_KEY)
@@ -887,10 +911,11 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
{
pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE;
- // This timestamp should be as close as possible to the
- // prior PTS so that if a decoder uses pts to schedule when
- // to do this, we start right after last frame was decoded.
- // Invisible frames have no duration.
+ /* This timestamp should be as close as possible to the
+ * prior PTS so that if a decoder uses pts to schedule when
+ * to do this, we start right after last frame was decoded.
+ * Invisible frames have no duration.
+ */
pkt.data.frame.pts = ((cpi->last_time_stamp_seen
* ctx->cfg.g_timebase.den + round)
/ ctx->cfg.g_timebase.num / 10000000) + 1;
@@ -942,8 +967,6 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
cx_data += size;
cx_data_sz -= size;
}
-
- //printf("timestamp: %lld, duration: %d\n", pkt->data.frame.pts, pkt->data.frame.duration);
}
}
}
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 37773db..c13d697 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -11,12 +11,19 @@
#include <stdlib.h>
#include <string.h>
+#include "vpx_rtcd.h"
#include "vpx/vpx_decoder.h"
#include "vpx/vp8dx.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx_version.h"
#include "common/onyxd.h"
#include "decoder/onyxd_int.h"
+#include "common/alloccommon.h"
+#include "vpx_mem/vpx_mem.h"
+#if CONFIG_ERROR_CONCEALMENT
+#include "decoder/error_concealment.h"
+#endif
+#include "decoder/decoderthreading.h"
#define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0)
#define VP8_CAP_ERROR_CONCEALMENT (CONFIG_ERROR_CONCEALMENT ? \
@@ -69,7 +76,7 @@ struct vpx_codec_alg_priv
#endif
vpx_image_t img;
int img_setup;
- int img_avail;
+ void *user_priv;
};
static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t flags)
@@ -187,6 +194,8 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
vpx_codec_err_t res = VPX_CODEC_OK;
(void) data;
+ vpx_rtcd();
+
/* This function only allocates space for the vpx_codec_alg_priv_t
* structure. More memory may be required at the time the stream
* information becomes known.
@@ -341,16 +350,30 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
long deadline)
{
vpx_codec_err_t res = VPX_CODEC_OK;
-
- ctx->img_avail = 0;
+ unsigned int resolution_change = 0;
+ unsigned int w, h;
/* Determine the stream parameters. Note that we rely on peek_si to
* validate that we have a buffer that does not wrap around the top
* of the heap.
*/
- if (!ctx->si.h)
- res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si);
+ w = ctx->si.w;
+ h = ctx->si.h;
+
+ res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si);
+
+ if((res == VPX_CODEC_UNSUP_BITSTREAM) && !ctx->si.is_kf)
+ {
+ /* the peek function returns an error for non keyframes, however for
+ * this case, it is not an error */
+ res = VPX_CODEC_OK;
+ }
+
+ if(!ctx->decoder_init && !ctx->si.is_kf)
+ res = VPX_CODEC_UNSUP_BITSTREAM;
+ if ((ctx->si.h != h) || (ctx->si.w != w))
+ resolution_change = 1;
/* Perform deferred allocations, if required */
if (!res && ctx->defer_alloc)
@@ -426,6 +449,122 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
if (!res && ctx->pbi)
{
+ if(resolution_change)
+ {
+ VP8D_COMP *pbi = ctx->pbi;
+ VP8_COMMON *const pc = & pbi->common;
+ MACROBLOCKD *const xd = & pbi->mb;
+#if CONFIG_MULTITHREAD
+ int i;
+#endif
+ pc->Width = ctx->si.w;
+ pc->Height = ctx->si.h;
+ {
+ int prev_mb_rows = pc->mb_rows;
+
+ if (setjmp(pbi->common.error.jmp))
+ {
+ pbi->common.error.setjmp = 0;
+ /* same return value as used in vp8dx_receive_compressed_data */
+ return -1;
+ }
+
+ pbi->common.error.setjmp = 1;
+
+ if (pc->Width <= 0)
+ {
+ pc->Width = w;
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid frame width");
+ }
+
+ if (pc->Height <= 0)
+ {
+ pc->Height = h;
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid frame height");
+ }
+
+ if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height))
+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffers");
+
+ xd->pre = pc->yv12_fb[pc->lst_fb_idx];
+ xd->dst = pc->yv12_fb[pc->new_fb_idx];
+
+#if CONFIG_MULTITHREAD
+ for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+ {
+ pbi->mb_row_di[i].mbd.dst = pc->yv12_fb[pc->new_fb_idx];
+ vp8_build_block_doffsets(&pbi->mb_row_di[i].mbd);
+ }
+#endif
+ vp8_build_block_doffsets(&pbi->mb);
+
+ /* allocate memory for last frame MODE_INFO array */
+#if CONFIG_ERROR_CONCEALMENT
+
+ if (pbi->ec_enabled)
+ {
+ /* old prev_mip was released by vp8_de_alloc_frame_buffers()
+ * called in vp8_alloc_frame_buffers() */
+ pc->prev_mip = vpx_calloc(
+ (pc->mb_cols + 1) * (pc->mb_rows + 1),
+ sizeof(MODE_INFO));
+
+ if (!pc->prev_mip)
+ {
+ vp8_de_alloc_frame_buffers(pc);
+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate"
+ "last frame MODE_INFO array");
+ }
+
+ pc->prev_mi = pc->prev_mip + pc->mode_info_stride + 1;
+
+ if (vp8_alloc_overlap_lists(pbi))
+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate overlap lists "
+ "for error concealment");
+ }
+
+#endif
+
+#if CONFIG_MULTITHREAD
+ if (pbi->b_multithreaded_rd)
+ vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows);
+#else
+ (void)prev_mb_rows;
+#endif
+ }
+
+ pbi->common.error.setjmp = 0;
+
+ /* required to get past the first get_free_fb() call */
+ ctx->pbi->common.fb_idx_ref_cnt[0] = 0;
+ }
+
+ ctx->user_priv = user_priv;
+ if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline))
+ {
+ VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi;
+ res = update_error_state(ctx, &pbi->common.error);
+ }
+ }
+
+ return res;
+}
+
+static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx,
+ vpx_codec_iter_t *iter)
+{
+ vpx_image_t *img = NULL;
+
+ /* iter acts as a flip flop, so an image is only returned on the first
+ * call to get_frame.
+ */
+ if (!(*iter))
+ {
YV12_BUFFER_CONFIG sd;
int64_t time_stamp = 0, time_end_stamp = 0;
vp8_ppflags_t flags = {0};
@@ -451,34 +590,10 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
#endif
}
- if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline))
- {
- VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi;
- res = update_error_state(ctx, &pbi->common.error);
- }
-
- if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags))
+ if (0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags))
{
- yuvconfig2image(&ctx->img, &sd, user_priv);
- ctx->img_avail = 1;
- }
- }
+ yuvconfig2image(&ctx->img, &sd, ctx->user_priv);
- return res;
-}
-
-static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx,
- vpx_codec_iter_t *iter)
-{
- vpx_image_t *img = NULL;
-
- if (ctx->img_avail)
- {
- /* iter acts as a flip flop, so an image is only returned on the first
- * call to get_frame.
- */
- if (!(*iter))
- {
img = &ctx->img;
*iter = img;
}
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 019edbd..0ae2f10 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -22,16 +22,9 @@ ifeq ($(ARCH_ARM),yes)
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx_arm.mk
endif
-VP8_CX_SRCS-yes += vp8_cx_iface.c
+VP8_CX_SRCS-yes += vp8cx.mk
-# encoder
-#INCLUDES += algo/vpx_common/vpx_mem/include
-#INCLUDES += common
-#INCLUDES += common
-#INCLUDES += common
-#INCLUDES += algo/vpx_ref/cpu_id/include
-#INCLUDES += common
-#INCLUDES += encoder
+VP8_CX_SRCS-yes += vp8_cx_iface.c
VP8_CX_SRCS-yes += encoder/asm_enc_offsets.c
VP8_CX_SRCS-yes += encoder/defaultcoefcounts.h
@@ -99,6 +92,14 @@ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
+
+ifeq ($(CONFIG_TEMPORAL_DENOISING),yes)
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c
+ifeq ($(HAVE_SSE2),yes)
+vp8/encoder/x86/denoising_sse2.c.o: CFLAGS += -msse2
+endif
+endif
+
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index b16615d..b030ee5 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -9,7 +9,7 @@
##
-#VP8_CX_SRCS list is modified according to different platforms.
+VP8_CX_SRCS-$(ARCH_ARM) += vp8cx_arm.mk
#File list for arm
# encoder
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index 2cfd280..dd39190 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -18,6 +18,8 @@ VP8_DX_SRCS-no += $(VP8_COMMON_SRCS-no)
VP8_DX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes)
VP8_DX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no)
+VP8_DX_SRCS-yes += vp8dx.mk
+
VP8_DX_SRCS-yes += vp8_dx_iface.c
# common
diff --git a/vp8_multi_resolution_encoder.c b/vp8_multi_resolution_encoder.c
index 78f50c2..eae36a4 100644
--- a/vp8_multi_resolution_encoder.c
+++ b/vp8_multi_resolution_encoder.c
@@ -164,7 +164,7 @@ static void write_ivf_file_header(FILE *outfile,
mem_put_le32(header+24, frame_cnt); /* length */
mem_put_le32(header+28, 0); /* unused */
- if(fwrite(header, 1, 32, outfile));
+ (void) fwrite(header, 1, 32, outfile);
}
static void write_ivf_frame_header(FILE *outfile,
@@ -181,7 +181,7 @@ static void write_ivf_frame_header(FILE *outfile,
mem_put_le32(header+4, pts&0xFFFFFFFF);
mem_put_le32(header+8, pts >> 32);
- if(fwrite(header, 1, 12, outfile));
+ (void) fwrite(header, 1, 12, outfile);
}
int main(int argc, char **argv)
@@ -273,7 +273,7 @@ int main(int argc, char **argv)
cfg[0].g_w = width;
cfg[0].g_h = height;
cfg[0].g_threads = 1; /* number of threads used */
- cfg[0].rc_dropframe_thresh = 0;
+ cfg[0].rc_dropframe_thresh = 30;
cfg[0].rc_end_usage = VPX_CBR;
cfg[0].rc_resize_allowed = 0;
cfg[0].rc_min_quantizer = 4;
@@ -283,13 +283,17 @@ int main(int argc, char **argv)
cfg[0].rc_buf_initial_sz = 500;
cfg[0].rc_buf_optimal_sz = 600;
cfg[0].rc_buf_sz = 1000;
- //cfg[0].rc_dropframe_thresh = 10;
cfg[0].g_error_resilient = 1; /* Enable error resilient mode */
cfg[0].g_lag_in_frames = 0;
/* Disable automatic keyframe placement */
+ /* Note: These 3 settings are copied to all levels. But, except the lowest
+ * resolution level, all other levels are set to VPX_KF_DISABLED internally.
+ */
//cfg[0].kf_mode = VPX_KF_DISABLED;
- cfg[0].kf_min_dist = cfg[0].kf_max_dist = 1000;
+ cfg[0].kf_mode = VPX_KF_AUTO;
+ cfg[0].kf_min_dist = 3000;
+ cfg[0].kf_max_dist = 3000;
cfg[0].rc_target_bitrate = target_bitrate[0]; /* Set target bitrate */
cfg[0].g_timebase.num = 1; /* Set fps */
@@ -361,6 +365,12 @@ int main(int argc, char **argv)
if(vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, static_thresh))
die_codec(&codec[i], "Failed to set static threshold");
}
+ /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */
+ for ( i=0; i< NUM_ENCODERS; i++)
+ {
+ if(vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0))
+ die_codec(&codec[i], "Failed to set noise_sensitivity");
+ }
frame_avail = 1;
got_data = 0;
@@ -405,8 +415,8 @@ int main(int argc, char **argv)
switch(pkt[i]->kind) {
case VPX_CODEC_CX_FRAME_PKT:
write_ivf_frame_header(outfile[i], pkt[i]);
- if(fwrite(pkt[i]->data.frame.buf, 1, pkt[i]->data.frame.sz,
- outfile[i]));
+ (void) fwrite(pkt[i]->data.frame.buf, 1,
+ pkt[i]->data.frame.sz, outfile[i]);
break;
case VPX_CODEC_PSNR_PKT:
if (show_psnr)
diff --git a/vp8_scalable_patterns.c b/vp8_scalable_patterns.c
index 4311b1a..06270fe 100644
--- a/vp8_scalable_patterns.c
+++ b/vp8_scalable_patterns.c
@@ -93,7 +93,7 @@ static void write_ivf_file_header(FILE *outfile,
mem_put_le32(header+24, frame_cnt); /* length */
mem_put_le32(header+28, 0); /* unused */
- if(fwrite(header, 1, 32, outfile));
+ (void) fwrite(header, 1, 32, outfile);
}
@@ -111,10 +111,10 @@ static void write_ivf_frame_header(FILE *outfile,
mem_put_le32(header+4, pts&0xFFFFFFFF);
mem_put_le32(header+8, pts >> 32);
- if(fwrite(header, 1, 12, outfile));
+ (void) fwrite(header, 1, 12, outfile);
}
-static int mode_to_num_layers[9] = {2, 2, 3, 3, 3, 3, 5, 2, 3};
+static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3};
int main(int argc, char **argv) {
FILE *infile, *outfile[VPX_TS_MAX_LAYERS];
@@ -129,8 +129,8 @@ int main(int argc, char **argv) {
int got_data;
int flags = 0;
int i;
- int pts = 0; // PTS starts at 0
- int frame_duration = 1; // 1 timebase tick per frame
+ int pts = 0; /* PTS starts at 0 */
+ int frame_duration = 1; /* 1 timebase tick per frame */
int layering_mode = 0;
int frames_in_layer[VPX_TS_MAX_LAYERS] = {0};
@@ -138,7 +138,7 @@ int main(int argc, char **argv) {
int flag_periodicity;
int max_intra_size_pct;
- // Check usage and arguments
+ /* Check usage and arguments */
if (argc < 9)
die("Usage: %s <infile> <outfile> <width> <height> <rate_num> "
" <rate_den> <mode> <Rate_0> ... <Rate_nlayers-1>\n", argv[0]);
@@ -150,43 +150,43 @@ int main(int argc, char **argv) {
if (!sscanf(argv[7], "%d", &layering_mode))
die ("Invalid mode %s", argv[7]);
- if (layering_mode<0 || layering_mode>8)
- die ("Invalid mode (0..8) %s", argv[7]);
+ if (layering_mode<0 || layering_mode>11)
+ die ("Invalid mode (0..11) %s", argv[7]);
if (argc != 8+mode_to_num_layers[layering_mode])
die ("Invalid number of arguments");
- if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1))
+ if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 32))
die ("Failed to allocate image", width, height);
printf("Using %s\n",vpx_codec_iface_name(interface));
- // Populate encoder configuration
+ /* Populate encoder configuration */
res = vpx_codec_enc_config_default(interface, &cfg, 0);
if(res) {
printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
return EXIT_FAILURE;
}
- // Update the default configuration with our settings
+ /* Update the default configuration with our settings */
cfg.g_w = width;
cfg.g_h = height;
- // Timebase format e.g. 30fps: numerator=1, demoninator=30
+ /* Timebase format e.g. 30fps: numerator=1, demoninator=30 */
if (!sscanf (argv[5], "%d", &cfg.g_timebase.num ))
die ("Invalid timebase numerator %s", argv[5]);
if (!sscanf (argv[6], "%d", &cfg.g_timebase.den ))
die ("Invalid timebase denominator %s", argv[6]);
for (i=8; i<8+mode_to_num_layers[layering_mode]; i++)
- if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-8]))
+ if (!sscanf(argv[i], "%ud", &cfg.ts_target_bitrate[i-8]))
die ("Invalid data rate %s", argv[i]);
- // Real time parameters
- cfg.rc_dropframe_thresh = 0; // 30
+ /* Real time parameters */
+ cfg.rc_dropframe_thresh = 0;
cfg.rc_end_usage = VPX_CBR;
cfg.rc_resize_allowed = 0;
- cfg.rc_min_quantizer = 8;
+ cfg.rc_min_quantizer = 2;
cfg.rc_max_quantizer = 56;
cfg.rc_undershoot_pct = 100;
cfg.rc_overshoot_pct = 15;
@@ -194,25 +194,44 @@ int main(int argc, char **argv) {
cfg.rc_buf_optimal_sz = 600;
cfg.rc_buf_sz = 1000;
- // Enable error resilient mode
+ /* Enable error resilient mode */
cfg.g_error_resilient = 1;
cfg.g_lag_in_frames = 0;
cfg.kf_mode = VPX_KF_DISABLED;
- // Disable automatic keyframe placement
- cfg.kf_min_dist = cfg.kf_max_dist = 1000;
+ /* Disable automatic keyframe placement */
+ cfg.kf_min_dist = cfg.kf_max_dist = 3000;
- // Temporal scaling parameters:
- // NOTE: The 3 prediction frames cannot be used interchangeably due to
- // differences in the way they are handled throughout the code. The
- // frames should be allocated to layers in the order LAST, GF, ARF.
- // Other combinations work, but may produce slightly inferior results.
+ /* Default setting for bitrate: used in special case of 1 layer (case 0). */
+ cfg.rc_target_bitrate = cfg.ts_target_bitrate[0];
+
+ /* Temporal scaling parameters: */
+ /* NOTE: The 3 prediction frames cannot be used interchangeably due to
+ * differences in the way they are handled throughout the code. The
+ * frames should be allocated to layers in the order LAST, GF, ARF.
+ * Other combinations work, but may produce slightly inferior results.
+ */
switch (layering_mode)
{
-
case 0:
{
- // 2-layers, 2-frame period
+ /* 1-layer */
+ int ids[1] = {0};
+ cfg.ts_number_layers = 1;
+ cfg.ts_periodicity = 1;
+ cfg.ts_rate_decimator[0] = 1;
+ memcpy(cfg.ts_layer_id, ids, sizeof(ids));
+
+ flag_periodicity = cfg.ts_periodicity;
+
+ // Update L only.
+ layer_flags[0] = VPX_EFLAG_FORCE_KF |
+ VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+ break;
+ }
+ case 1:
+ {
+ /* 2-layers, 2-frame period */
int ids[2] = {0,1};
cfg.ts_number_layers = 2;
cfg.ts_periodicity = 2;
@@ -222,14 +241,14 @@ int main(int argc, char **argv) {
flag_periodicity = cfg.ts_periodicity;
#if 1
- // 0=L, 1=GF, Intra-layer prediction enabled
+ /* 0=L, 1=GF, Intra-layer prediction enabled */
layer_flags[0] = VPX_EFLAG_FORCE_KF |
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
VP8_EFLAG_NO_REF_ARF;
#else
- // 0=L, 1=GF, Intra-layer prediction disabled
+ /* 0=L, 1=GF, Intra-layer prediction disabled */
layer_flags[0] = VPX_EFLAG_FORCE_KF |
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
@@ -239,9 +258,9 @@ int main(int argc, char **argv) {
break;
}
- case 1:
+ case 2:
{
- // 2-layers, 3-frame period
+ /* 2-layers, 3-frame period */
int ids[3] = {0,1,1};
cfg.ts_number_layers = 2;
cfg.ts_periodicity = 3;
@@ -251,7 +270,7 @@ int main(int argc, char **argv) {
flag_periodicity = cfg.ts_periodicity;
- // 0=L, 1=GF, Intra-layer prediction enabled
+ /* 0=L, 1=GF, Intra-layer prediction enabled */
layer_flags[0] = VPX_EFLAG_FORCE_KF |
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
@@ -262,9 +281,9 @@ int main(int argc, char **argv) {
break;
}
- case 2:
+ case 3:
{
- // 3-layers, 6-frame period
+ /* 3-layers, 6-frame period */
int ids[6] = {0,2,2,1,2,2};
cfg.ts_number_layers = 3;
cfg.ts_periodicity = 6;
@@ -275,7 +294,7 @@ int main(int argc, char **argv) {
flag_periodicity = cfg.ts_periodicity;
- // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled
+ /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled */
layer_flags[0] = VPX_EFLAG_FORCE_KF |
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
@@ -288,9 +307,9 @@ int main(int argc, char **argv) {
break;
}
- case 3:
+ case 4:
{
- // 3-layers, 4-frame period
+ /* 3-layers, 4-frame period */
int ids[4] = {0,2,1,2};
cfg.ts_number_layers = 3;
cfg.ts_periodicity = 4;
@@ -301,7 +320,7 @@ int main(int argc, char **argv) {
flag_periodicity = cfg.ts_periodicity;
- // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled
+ /* 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled */
layer_flags[0] = VPX_EFLAG_FORCE_KF |
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
@@ -315,9 +334,9 @@ int main(int argc, char **argv) {
break;
}
- case 4:
+ case 5:
{
- // 3-layers, 4-frame period
+ /* 3-layers, 4-frame period */
int ids[4] = {0,2,1,2};
cfg.ts_number_layers = 3;
cfg.ts_periodicity = 4;
@@ -328,8 +347,9 @@ int main(int argc, char **argv) {
flag_periodicity = cfg.ts_periodicity;
- // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1,
- // disabled in layer 2
+ /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1,
+ * disabled in layer 2
+ */
layer_flags[0] = VPX_EFLAG_FORCE_KF |
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
@@ -342,9 +362,9 @@ int main(int argc, char **argv) {
break;
}
- case 5:
+ case 6:
{
- // 3-layers, 4-frame period
+ /* 3-layers, 4-frame period */
int ids[4] = {0,2,1,2};
cfg.ts_number_layers = 3;
cfg.ts_periodicity = 4;
@@ -355,7 +375,7 @@ int main(int argc, char **argv) {
flag_periodicity = cfg.ts_periodicity;
- // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled
+ /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled */
layer_flags[0] = VPX_EFLAG_FORCE_KF |
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
@@ -366,11 +386,11 @@ int main(int argc, char **argv) {
break;
}
- case 6:
+ case 7:
{
- // NOTE: Probably of academic interest only
+ /* NOTE: Probably of academic interest only */
- // 5-layers, 16-frame period
+ /* 5-layers, 16-frame period */
int ids[16] = {0,4,3,4,2,4,3,4,1,4,3,4,2,4,3,4};
cfg.ts_number_layers = 5;
cfg.ts_periodicity = 16;
@@ -405,9 +425,9 @@ int main(int argc, char **argv) {
break;
}
- case 7:
+ case 8:
{
- // 2-layers
+ /* 2-layers, with sync point at first frame of layer 1. */
int ids[2] = {0,1};
cfg.ts_number_layers = 2;
cfg.ts_periodicity = 2;
@@ -417,30 +437,49 @@ int main(int argc, char **argv) {
flag_periodicity = 8;
- // 0=L, 1=GF
+ /* 0=L, 1=GF */
+ // ARF is used as predictor for all frames, and is only updated on
+ // key frame. Sync point every 8 frames.
+
+ // Layer 0: predict from L and ARF, update L and G.
layer_flags[0] = VPX_EFLAG_FORCE_KF |
- VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
- VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
- layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
- VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
- layer_flags[2] =
- layer_flags[4] =
- layer_flags[6] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
- VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
- layer_flags[3] =
- layer_flags[5] = VP8_EFLAG_NO_REF_ARF |
- VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
- layer_flags[7] = VP8_EFLAG_NO_REF_ARF |
- VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
- VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_REF_GF |
+ VP8_EFLAG_NO_UPD_ARF;
+
+ // Layer 1: sync point: predict from L and ARF, and update G.
+ layer_flags[1] = VP8_EFLAG_NO_REF_GF |
+ VP8_EFLAG_NO_UPD_LAST |
+ VP8_EFLAG_NO_UPD_ARF;
+
+ // Layer 0, predict from L and ARF, update L.
+ layer_flags[2] = VP8_EFLAG_NO_REF_GF |
+ VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF;
+
+ // Layer 1: predict from L, G and ARF, and update G.
+ layer_flags[3] = VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_UPD_LAST |
VP8_EFLAG_NO_UPD_ENTROPY;
+
+ // Layer 0
+ layer_flags[4] = layer_flags[2];
+
+ // Layer 1
+ layer_flags[5] = layer_flags[3];
+
+ // Layer 0
+ layer_flags[6] = layer_flags[4];
+
+ // Layer 1
+ layer_flags[7] = layer_flags[5];
break;
}
- case 8:
- default:
+ case 9:
{
- // 3-layers
+ /* 3-layers */
+ // Sync points for layer 1 and 2 every 8 frames.
+
int ids[4] = {0,2,1,2};
cfg.ts_number_layers = 3;
cfg.ts_periodicity = 4;
@@ -451,7 +490,7 @@ int main(int argc, char **argv) {
flag_periodicity = 8;
- // 0=L, 1=GF, 2=ARF
+ /* 0=L, 1=GF, 2=ARF */
layer_flags[0] = VPX_EFLAG_FORCE_KF |
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
@@ -470,13 +509,109 @@ int main(int argc, char **argv) {
VP8_EFLAG_NO_UPD_ENTROPY;
break;
}
+ case 10:
+ {
+ // 3-layers structure where ARF is used as predictor for all frames,
+ // and is only updated on key frame.
+ // Sync points for layer 1 and 2 every 8 frames.
+
+ int ids[4] = {0,2,1,2};
+ cfg.ts_number_layers = 3;
+ cfg.ts_periodicity = 4;
+ cfg.ts_rate_decimator[0] = 4;
+ cfg.ts_rate_decimator[1] = 2;
+ cfg.ts_rate_decimator[2] = 1;
+ memcpy(cfg.ts_layer_id, ids, sizeof(ids));
+
+ flag_periodicity = 8;
+
+ /* 0=L, 1=GF, 2=ARF */
+
+ // Layer 0: predict from L and ARF; update L and G.
+ layer_flags[0] = VPX_EFLAG_FORCE_KF |
+ VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_REF_GF;
+
+ // Layer 2: sync point: predict from L and ARF; update none.
+ layer_flags[1] = VP8_EFLAG_NO_REF_GF |
+ VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_UPD_LAST |
+ VP8_EFLAG_NO_UPD_ENTROPY;
+
+ // Layer 1: sync point: predict from L and ARF; update G.
+ layer_flags[2] = VP8_EFLAG_NO_REF_GF |
+ VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_UPD_LAST;
+
+ // Layer 2: predict from L, G, ARF; update none.
+ layer_flags[3] = VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_UPD_LAST |
+ VP8_EFLAG_NO_UPD_ENTROPY;
+
+ // Layer 0: predict from L and ARF; update L.
+ layer_flags[4] = VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_REF_GF;
+
+ // Layer 2: predict from L, G, ARF; update none.
+ layer_flags[5] = layer_flags[3];
+
+ // Layer 1: predict from L, G, ARF; update G.
+ layer_flags[6] = VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_UPD_LAST;
+
+ // Layer 2: predict from L, G, ARF; update none.
+ layer_flags[7] = layer_flags[3];
+ break;
+ }
+ case 11:
+ default:
+ {
+ // 3-layers structure as in case 10, but no sync/refresh points for
+ // layer 1 and 2.
+
+ int ids[4] = {0,2,1,2};
+ cfg.ts_number_layers = 3;
+ cfg.ts_periodicity = 4;
+ cfg.ts_rate_decimator[0] = 4;
+ cfg.ts_rate_decimator[1] = 2;
+ cfg.ts_rate_decimator[2] = 1;
+ memcpy(cfg.ts_layer_id, ids, sizeof(ids));
+
+ flag_periodicity = 8;
+
+ /* 0=L, 1=GF, 2=ARF */
+
+ // Layer 0: predict from L and ARF; update L.
+ layer_flags[0] = VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_REF_GF;
+ layer_flags[4] = layer_flags[0];
+
+ // Layer 1: predict from L, G, ARF; update G.
+ layer_flags[2] = VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_UPD_LAST;
+ layer_flags[6] = layer_flags[2];
+
+ // Layer 2: predict from L, G, ARF; update none.
+ layer_flags[1] = VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_UPD_LAST |
+ VP8_EFLAG_NO_UPD_ENTROPY;
+ layer_flags[3] = layer_flags[1];
+ layer_flags[5] = layer_flags[1];
+ layer_flags[7] = layer_flags[1];
+ break;
+ }
}
- // Open input file
+ /* Open input file */
if(!(infile = fopen(argv[1], "rb")))
die("Failed to open %s for reading", argv[1]);
- // Open an output file for each stream
+ /* Open an output file for each stream */
for (i=0; i<cfg.ts_number_layers; i++)
{
char file_name[512];
@@ -486,24 +621,23 @@ int main(int argc, char **argv) {
write_ivf_file_header(outfile[i], &cfg, 0);
}
- // Initialize codec
+ /* Initialize codec */
if (vpx_codec_enc_init (&codec, interface, &cfg, 0))
die_codec (&codec, "Failed to initialize encoder");
- // Cap CPU & first I-frame size
+ /* Cap CPU & first I-frame size */
vpx_codec_control (&codec, VP8E_SET_CPUUSED, -6);
- vpx_codec_control (&codec, VP8E_SET_STATIC_THRESHOLD, 800);
- vpx_codec_control (&codec, VP8E_SET_NOISE_SENSITIVITY, 2);
+ vpx_codec_control (&codec, VP8E_SET_STATIC_THRESHOLD, 1);
+ vpx_codec_control (&codec, VP8E_SET_NOISE_SENSITIVITY, 1);
+ vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1);
max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5)
* ((double) cfg.g_timebase.den / cfg.g_timebase.num)
/ 10.0);
- //printf ("max_intra_size_pct=%d\n", max_intra_size_pct);
+ /* printf ("max_intra_size_pct=%d\n", max_intra_size_pct); */
vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
max_intra_size_pct);
- // vpx_codec_control (&codec, VP8E_SET_TOKEN_PARTITIONS,
- // static_cast<vp8e_token_partitions>(_tokenPartitions));
frame_avail = 1;
while (frame_avail || got_data) {
@@ -517,8 +651,8 @@ int main(int argc, char **argv) {
1, flags, VPX_DL_REALTIME))
die_codec(&codec, "Failed to encode frame");
- // Reset KF flag
- if (layering_mode != 6)
+ /* Reset KF flag */
+ if (layering_mode != 7)
layer_flags[0] &= ~VPX_EFLAG_FORCE_KF;
got_data = 0;
@@ -530,29 +664,25 @@ int main(int argc, char **argv) {
i<cfg.ts_number_layers; i++)
{
write_ivf_frame_header(outfile[i], pkt);
- if (fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
- outfile[i]));
+ (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
+ outfile[i]);
frames_in_layer[i]++;
}
break;
default:
break;
}
- printf (pkt->kind == VPX_CODEC_CX_FRAME_PKT
- && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
- fflush (stdout);
}
frame_cnt++;
pts += frame_duration;
}
- printf ("\n");
fclose (infile);
printf ("Processed %d frames.\n",frame_cnt-1);
if (vpx_codec_destroy(&codec))
die_codec (&codec, "Failed to destroy codec");
- // Try to rewrite the output file headers with the actual frame count
+ /* Try to rewrite the output file headers with the actual frame count */
for (i=0; i<cfg.ts_number_layers; i++)
{
if (!fseek(outfile[i], 0, SEEK_SET))
@@ -562,4 +692,3 @@ int main(int argc, char **argv) {
return EXIT_SUCCESS;
}
-
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index 0703d6a..4474331 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -165,7 +165,7 @@ typedef vpx_codec_err_t (*vpx_codec_control_fn_t)(vpx_codec_alg_priv_t *ctx,
* mapping. This implies that ctrl_id values chosen by the algorithm
* \ref MUST be non-zero.
*/
-typedef const struct
+typedef const struct vpx_codec_ctrl_fn_map
{
int ctrl_id;
vpx_codec_control_fn_t fn;
@@ -280,7 +280,7 @@ typedef vpx_codec_err_t
* one mapping must be present, in addition to the end-of-list.
*
*/
-typedef const struct
+typedef const struct vpx_codec_enc_cfg_map
{
int usage;
vpx_codec_enc_cfg_t cfg;
@@ -302,14 +302,14 @@ struct vpx_codec_iface
vpx_codec_ctrl_fn_map_t *ctrl_maps; /**< \copydoc ::vpx_codec_ctrl_fn_map_t */
vpx_codec_get_mmap_fn_t get_mmap; /**< \copydoc ::vpx_codec_get_mmap_fn_t */
vpx_codec_set_mmap_fn_t set_mmap; /**< \copydoc ::vpx_codec_set_mmap_fn_t */
- struct
+ struct vpx_codec_dec_iface
{
vpx_codec_peek_si_fn_t peek_si; /**< \copydoc ::vpx_codec_peek_si_fn_t */
vpx_codec_get_si_fn_t get_si; /**< \copydoc ::vpx_codec_peek_si_fn_t */
vpx_codec_decode_fn_t decode; /**< \copydoc ::vpx_codec_decode_fn_t */
vpx_codec_get_frame_fn_t get_frame; /**< \copydoc ::vpx_codec_get_frame_fn_t */
} dec;
- struct
+ struct vpx_codec_enc_iface
{
vpx_codec_enc_cfg_map_t *cfg_maps; /**< \copydoc ::vpx_codec_enc_cfg_map_t */
vpx_codec_encode_fn_t encode; /**< \copydoc ::vpx_codec_encode_fn_t */
diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
index 03ddc62..73c1c66 100644
--- a/vpx/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c
@@ -117,6 +117,13 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t *ctx,
mr_cfg.mr_down_sampling_factor.num = dsf->num;
mr_cfg.mr_down_sampling_factor.den = dsf->den;
+ /* Force Key-frame synchronization. Namely, encoder at higher
+ * resolution always use the same frame_type chosen by the
+ * lowest-resolution encoder.
+ */
+ if(mr_cfg.mr_encoder_id)
+ cfg->kf_mode = VPX_KF_DISABLED;
+
ctx->iface = iface;
ctx->name = iface->name;
ctx->priv = NULL;
@@ -126,8 +133,20 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t *ctx,
if (res)
{
- ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
+ const char *error_detail =
+ ctx->priv ? ctx->priv->err_detail : NULL;
+ /* Destroy current ctx */
+ ctx->err_detail = error_detail;
vpx_codec_destroy(ctx);
+
+ /* Destroy already allocated high-level ctx */
+ while (i)
+ {
+ ctx--;
+ ctx->err_detail = error_detail;
+ vpx_codec_destroy(ctx);
+ i--;
+ }
}
if (ctx->priv)
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 0af631c..a3c95d2 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -204,8 +204,8 @@ typedef struct vpx_roi_map
unsigned char *roi_map; /**< specify an id between 0 and 3 for each 16x16 region within a frame */
unsigned int rows; /**< number of rows */
unsigned int cols; /**< number of cols */
- int delta_q[4]; /**< quantizer delta [-64, 64] off baseline for regions with id between 0 and 3*/
- int delta_lf[4]; /**< loop filter strength delta [-32, 32] for regions with id between 0 and 3 */
+ int delta_q[4]; /**< quantizer delta [-63, 63] off baseline for regions with id between 0 and 3*/
+ int delta_lf[4]; /**< loop filter strength delta [-63, 63] for regions with id between 0 and 3 */
unsigned int static_threshold[4];/**< threshold for region to be treated as static */
} vpx_roi_map_t;
@@ -234,18 +234,6 @@ typedef struct vpx_scaling_mode
VPX_SCALING_MODE v_scaling_mode; /**< vertical scaling mode */
} vpx_scaling_mode_t;
-/*!\brief VP8 encoding mode
- *
- * This defines VP8 encoding mode
- *
- */
-typedef enum
-{
- VP8_BEST_QUALITY_ENCODING,
- VP8_GOOD_QUALITY_ENCODING,
- VP8_REAL_TIME_ENCODING
-} vp8e_encoding_mode;
-
/*!\brief VP8 token partition mode
*
* This defines VP8 partitioning mode for compressed data, i.e., the number of
@@ -298,12 +286,12 @@ VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF, unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_NOISE_SENSITIVITY, unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_SHARPNESS, unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_STATIC_THRESHOLD, unsigned int)
-VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, vp8e_token_partitions)
+VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, int) /* vp8e_token_partitions */
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES, unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH , unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_TYPE , unsigned int)
-VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, vp8e_tuning)
+VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, int) /* vp8e_tuning */
VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL , unsigned int)
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *)
diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h
index d92e165..243b7a5 100644
--- a/vpx/vpx_codec.h
+++ b/vpx/vpx_codec.h
@@ -49,15 +49,22 @@ extern "C" {
#ifndef DEPRECATED
#if defined(__GNUC__) && __GNUC__
#define DEPRECATED __attribute__ ((deprecated))
-#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
#elif defined(_MSC_VER)
#define DEPRECATED
-#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */
#else
#define DEPRECATED
-#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
#endif
+#endif /* DEPRECATED */
+
+#ifndef DECLSPEC_DEPRECATED
+#if defined(__GNUC__) && __GNUC__
+#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
+#elif defined(_MSC_VER)
+#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */
+#else
+#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
#endif
+#endif /* DECLSPEC_DEPRECATED */
/*!\brief Decorator indicating a function is potentially unused */
#ifdef UNUSED
diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk
index 427fd0f..ffa123f 100644
--- a/vpx/vpx_codec.mk
+++ b/vpx/vpx_codec.mk
@@ -11,6 +11,21 @@
API_EXPORTS += exports
+API_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h
+API_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h
+API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h
+API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h
+
+API_SRCS-$(CONFIG_VP8_DECODER) += vp8.h
+API_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h
+API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8.h
+API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h
+
+API_DOC_SRCS-yes += vpx_codec.h
+API_DOC_SRCS-yes += vpx_decoder.h
+API_DOC_SRCS-yes += vpx_encoder.h
+API_DOC_SRCS-yes += vpx_image.h
+
API_SRCS-yes += src/vpx_decoder.c
API_SRCS-yes += vpx_decoder.h
API_SRCS-yes += src/vpx_encoder.c
@@ -23,3 +38,4 @@ API_SRCS-yes += vpx_codec.mk
API_SRCS-yes += vpx_codec_impl_bottom.h
API_SRCS-yes += vpx_codec_impl_top.h
API_SRCS-yes += vpx_image.h
+API_SRCS-$(BUILD_LIBVPX) += vpx_integer.h
diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
index 7992cc4..1ccf1c5 100644
--- a/vpx/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -113,6 +113,10 @@ extern "C" {
* function directly, to ensure that the ABI version number parameter
* is properly initialized.
*
+ * If the library was configured with --disable-multithread, this call
+ * is not thread safe and should be guarded with a lock if being used
+ * in a multithreaded context.
+ *
* In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
* parameter), the storage pointed to by the cfg parameter must be
* kept readable and stable until all memory maps have been set.
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index 239036e..67d9033 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -655,6 +655,10 @@ extern "C" {
* function directly, to ensure that the ABI version number parameter
* is properly initialized.
*
+ * If the library was configured with --disable-multithread, this call
+ * is not thread safe and should be guarded with a lock if being used
+ * in a multithreaded context.
+ *
* In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
* parameter), the storage pointed to by the cfg parameter must be
* kept readable and stable until all memory maps have been set.
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c
index ebe428d..8ff95a1 100644
--- a/vpx_ports/arm_cpudetect.c
+++ b/vpx_ports/arm_cpudetect.c
@@ -32,8 +32,33 @@ static int arm_cpu_env_mask(void)
return env && *env ? (int)strtol(env, NULL, 0) : ~0;
}
+#if !CONFIG_RUNTIME_CPU_DETECT
-#if defined(_MSC_VER)
+int arm_cpu_caps(void)
+{
+ /* This function should actually be a no-op. There is no way to adjust any of
+ * these because the RTCD tables do not exist: the functions are called
+ * statically */
+ int flags;
+ int mask;
+ if (!arm_cpu_env_flags(&flags))
+ {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
+#if HAVE_EDSP
+ flags |= HAS_EDSP;
+#endif /* HAVE_EDSP */
+#if HAVE_MEDIA
+ flags |= HAS_MEDIA;
+#endif /* HAVE_MEDIA */
+#if HAVE_NEON
+ flags |= HAS_NEON;
+#endif /* HAVE_NEON */
+ return flags & mask;
+}
+
+#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
#define WIN32_LEAN_AND_MEAN
#define WIN32_EXTRA_LEAN
@@ -52,7 +77,7 @@ int arm_cpu_caps(void)
* instructions via their assembled hex code.
* All of these instructions should be essentially nops.
*/
-#if defined(HAVE_EDSP)
+#if HAVE_EDSP
if (mask & HAS_EDSP)
{
__try
@@ -66,7 +91,7 @@ int arm_cpu_caps(void)
/*Ignore exception.*/
}
}
-#if defined(HAVE_MEDIA)
+#if HAVE_MEDIA
if (mask & HAS_MEDIA)
__try
{
@@ -79,7 +104,7 @@ int arm_cpu_caps(void)
/*Ignore exception.*/
}
}
-#if defined(HAVE_NEON)
+#if HAVE_NEON
if (mask & HAS_NEON)
{
__try
@@ -93,14 +118,13 @@ int arm_cpu_caps(void)
/*Ignore exception.*/
}
}
-#endif
-#endif
-#endif
+#endif /* HAVE_NEON */
+#endif /* HAVE_MEDIA */
+#endif /* HAVE_EDSP */
return flags & mask;
}
-#elif defined(__linux__)
-#if defined(__ANDROID__)
+#elif defined(__ANDROID__) /* end _MSC_VER */
#include <cpu-features.h>
int arm_cpu_caps(void)
@@ -115,19 +139,20 @@ int arm_cpu_caps(void)
mask = arm_cpu_env_mask();
features = android_getCpuFeatures();
-#if defined(HAVE_EDSP)
+#if HAVE_EDSP
flags |= HAS_EDSP;
-#endif
-#if defined(HAVE_MEDIA)
+#endif /* HAVE_EDSP */
+#if HAVE_MEDIA
flags |= HAS_MEDIA;
-#endif
-#if defined(HAVE_NEON)
+#endif /* HAVE_MEDIA */
+#if HAVE_NEON
if (features & ANDROID_CPU_ARM_FEATURE_NEON)
flags |= HAS_NEON;
-#endif
+#endif /* HAVE_NEON */
return flags & mask;
}
-#else // !defined(__ANDROID__)
+
+#elif defined(__linux__) /* end __ANDROID__ */
#include <stdio.h>
int arm_cpu_caps(void)
@@ -153,27 +178,27 @@ int arm_cpu_caps(void)
char buf[512];
while (fgets(buf, 511, fin) != NULL)
{
-#if defined(HAVE_EDSP) || defined(HAVE_NEON)
+#if HAVE_EDSP || HAVE_NEON
if (memcmp(buf, "Features", 8) == 0)
{
char *p;
-#if defined(HAVE_EDSP)
+#if HAVE_EDSP
p=strstr(buf, " edsp");
if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
{
flags |= HAS_EDSP;
}
-#if defined(HAVE_NEON)
+#if HAVE_NEON
p = strstr(buf, " neon");
if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
{
flags |= HAS_NEON;
}
-#endif
-#endif
+#endif /* HAVE_NEON */
+#endif /* HAVE_EDSP */
}
-#endif
-#if defined(HAVE_MEDIA)
+#endif /* HAVE_EDSP || HAVE_NEON */
+#if HAVE_MEDIA
if (memcmp(buf, "CPU architecture:",17) == 0){
int version;
version = atoi(buf+17);
@@ -182,37 +207,13 @@ int arm_cpu_caps(void)
flags |= HAS_MEDIA;
}
}
-#endif
+#endif /* HAVE_MEDIA */
}
fclose(fin);
}
return flags & mask;
}
-#endif // defined(__linux__)
-#elif !CONFIG_RUNTIME_CPU_DETECT
-
-int arm_cpu_caps(void)
-{
- int flags;
- int mask;
- if (!arm_cpu_env_flags(&flags))
- {
- return flags;
- }
- mask = arm_cpu_env_mask();
-#if defined(HAVE_EDSP)
- flags |= HAS_EDSP;
-#endif
-#if defined(HAVE_MEDIA)
- flags |= HAS_MEDIA;
-#endif
-#if defined(HAVE_NEON)
- flags |= HAS_NEON;
-#endif
- return flags & mask;
-}
-
-#else
+#else /* end __linux__ */
#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \
- "available for your platform. Reconfigure without --enable-runtime-cpu-detect."
+ "available for your platform. Reconfigure with --disable-runtime-cpu-detect."
#endif
diff --git a/vpx_ports/asm_offsets.h b/vpx_ports/asm_offsets.h
index d3b4fc7..7b6ae4a 100644
--- a/vpx_ports/asm_offsets.h
+++ b/vpx_ports/asm_offsets.h
@@ -19,11 +19,11 @@
static void assert_##name(void) {switch(0){case 0:case !!(cond):;}}
#if INLINE_ASM
-#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val));
+#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val))
#define BEGIN int main(void) {
#define END return 0; }
#else
-#define DEFINE(sym, val) int sym = val;
+#define DEFINE(sym, val) const int sym = val
#define BEGIN
#define END
#endif
diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm
index 306e235..efad1a5 100644
--- a/vpx_ports/emms.asm
+++ b/vpx_ports/emms.asm
@@ -12,14 +12,14 @@
%include "vpx_ports/x86_abi_support.asm"
section .text
- global sym(vpx_reset_mmx_state)
+global sym(vpx_reset_mmx_state) PRIVATE
sym(vpx_reset_mmx_state):
emms
ret
%ifidn __OUTPUT_FORMAT__,x64
-global sym(vpx_winx64_fldcw)
+global sym(vpx_winx64_fldcw) PRIVATE
sym(vpx_winx64_fldcw):
sub rsp, 8
mov [rsp], rcx ; win x64 specific
@@ -28,7 +28,7 @@ sym(vpx_winx64_fldcw):
ret
-global sym(vpx_winx64_fstcw)
+global sym(vpx_winx64_fstcw) PRIVATE
sym(vpx_winx64_fstcw):
sub rsp, 8
fstcw [rsp]
diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h
index 0e52368..dec28d5 100644
--- a/vpx_ports/mem_ops.h
+++ b/vpx_ports/mem_ops.h
@@ -145,27 +145,27 @@ static unsigned MEM_VALUE_T mem_get_le32(const void *vmem)
#undef mem_get_sbe16
#define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16)
-mem_get_s_generic(be, 16);
+mem_get_s_generic(be, 16)
#undef mem_get_sbe24
#define mem_get_sbe24 mem_ops_wrap_symbol(mem_get_sbe24)
-mem_get_s_generic(be, 24);
+mem_get_s_generic(be, 24)
#undef mem_get_sbe32
#define mem_get_sbe32 mem_ops_wrap_symbol(mem_get_sbe32)
-mem_get_s_generic(be, 32);
+mem_get_s_generic(be, 32)
#undef mem_get_sle16
#define mem_get_sle16 mem_ops_wrap_symbol(mem_get_sle16)
-mem_get_s_generic(le, 16);
+mem_get_s_generic(le, 16)
#undef mem_get_sle24
#define mem_get_sle24 mem_ops_wrap_symbol(mem_get_sle24)
-mem_get_s_generic(le, 24);
+mem_get_s_generic(le, 24)
#undef mem_get_sle32
#define mem_get_sle32 mem_ops_wrap_symbol(mem_get_sle32)
-mem_get_s_generic(le, 32);
+mem_get_s_generic(le, 32)
#undef mem_put_be16
#define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16)
diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h
index 0fbba65..fca653a 100644
--- a/vpx_ports/mem_ops_aligned.h
+++ b/vpx_ports/mem_ops_aligned.h
@@ -99,51 +99,51 @@
#undef mem_get_be16_aligned
#define mem_get_be16_aligned mem_ops_wrap_symbol(mem_get_be16_aligned)
-mem_get_be_aligned_generic(16);
+mem_get_be_aligned_generic(16)
#undef mem_get_be32_aligned
#define mem_get_be32_aligned mem_ops_wrap_symbol(mem_get_be32_aligned)
-mem_get_be_aligned_generic(32);
+mem_get_be_aligned_generic(32)
#undef mem_get_le16_aligned
#define mem_get_le16_aligned mem_ops_wrap_symbol(mem_get_le16_aligned)
-mem_get_le_aligned_generic(16);
+mem_get_le_aligned_generic(16)
#undef mem_get_le32_aligned
#define mem_get_le32_aligned mem_ops_wrap_symbol(mem_get_le32_aligned)
-mem_get_le_aligned_generic(32);
+mem_get_le_aligned_generic(32)
#undef mem_get_sbe16_aligned
#define mem_get_sbe16_aligned mem_ops_wrap_symbol(mem_get_sbe16_aligned)
-mem_get_sbe_aligned_generic(16);
+mem_get_sbe_aligned_generic(16)
#undef mem_get_sbe32_aligned
#define mem_get_sbe32_aligned mem_ops_wrap_symbol(mem_get_sbe32_aligned)
-mem_get_sbe_aligned_generic(32);
+mem_get_sbe_aligned_generic(32)
#undef mem_get_sle16_aligned
#define mem_get_sle16_aligned mem_ops_wrap_symbol(mem_get_sle16_aligned)
-mem_get_sle_aligned_generic(16);
+mem_get_sle_aligned_generic(16)
#undef mem_get_sle32_aligned
#define mem_get_sle32_aligned mem_ops_wrap_symbol(mem_get_sle32_aligned)
-mem_get_sle_aligned_generic(32);
+mem_get_sle_aligned_generic(32)
#undef mem_put_be16_aligned
#define mem_put_be16_aligned mem_ops_wrap_symbol(mem_put_be16_aligned)
-mem_put_be_aligned_generic(16);
+mem_put_be_aligned_generic(16)
#undef mem_put_be32_aligned
#define mem_put_be32_aligned mem_ops_wrap_symbol(mem_put_be32_aligned)
-mem_put_be_aligned_generic(32);
+mem_put_be_aligned_generic(32)
#undef mem_put_le16_aligned
#define mem_put_le16_aligned mem_ops_wrap_symbol(mem_put_le16_aligned)
-mem_put_le_aligned_generic(16);
+mem_put_le_aligned_generic(16)
#undef mem_put_le32_aligned
#define mem_put_le32_aligned mem_ops_wrap_symbol(mem_put_le32_aligned)
-mem_put_le_aligned_generic(32);
+mem_put_le_aligned_generic(32)
#undef mem_get_ne_aligned_generic
#undef mem_get_se_aligned_generic
diff --git a/vpx_ports/vpx_ports.mk b/vpx_ports/vpx_ports.mk
new file mode 100644
index 0000000..e6cb52f
--- /dev/null
+++ b/vpx_ports/vpx_ports.mk
@@ -0,0 +1,26 @@
+##
+## Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+
+PORTS_SRCS-yes += vpx_ports.mk
+
+PORTS_SRCS-$(BUILD_LIBVPX) += asm_offsets.h
+PORTS_SRCS-$(BUILD_LIBVPX) += mem.h
+PORTS_SRCS-$(BUILD_LIBVPX) += vpx_timer.h
+
+ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
+PORTS_SRCS-$(BUILD_LIBVPX) += emms.asm
+PORTS_SRCS-$(BUILD_LIBVPX) += x86.h
+PORTS_SRCS-$(BUILD_LIBVPX) += x86_abi_support.asm
+PORTS_SRCS-$(BUILD_LIBVPX) += x86_cpuid.c
+endif
+
+PORTS_SRCS-$(ARCH_ARM) += arm_cpudetect.c
+PORTS_SRCS-$(ARCH_ARM) += arm.h
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index 1341c7f..9dd8c4b 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -162,7 +162,7 @@ x86_readtsc(void)
return tsc;
#else
#if ARCH_X86_64
- return __rdtsc();
+ return (unsigned int)__rdtsc();
#else
__asm rdtsc;
#endif
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index cef6a0b..0c9fe37 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -88,12 +88,41 @@
%define sym(x) x
%elifidn __OUTPUT_FORMAT__,elf64
%define sym(x) x
+%elifidn __OUTPUT_FORMAT__,elfx32
+%define sym(x) x
%elifidn __OUTPUT_FORMAT__,x64
%define sym(x) x
%else
%define sym(x) _ %+ x
%endif
+; PRIVATE
+; Macro for the attribute to hide a global symbol for the target ABI.
+; This is only active if CHROMIUM is defined.
+;
+; Chromium doesn't like exported global symbols due to symbol clashing with
+; plugins among other things.
+;
+; Requires Chromium's patched copy of yasm:
+; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761
+; http://www.tortall.net/projects/yasm/ticket/236
+;
+%ifdef CHROMIUM
+ %ifidn __OUTPUT_FORMAT__,elf32
+ %define PRIVATE :hidden
+ %elifidn __OUTPUT_FORMAT__,elf64
+ %define PRIVATE :hidden
+ %elifidn __OUTPUT_FORMAT__,elfx32
+ %define PRIVATE :hidden
+ %elifidn __OUTPUT_FORMAT__,x64
+ %define PRIVATE
+ %else
+ %define PRIVATE :private_extern
+ %endif
+%else
+ %define PRIVATE
+%endif
+
; arg()
; Return the address specification of the given argument
;
@@ -181,7 +210,16 @@
%endmacro
%endif
%endif
- %define HIDDEN_DATA(x) x
+
+ %ifdef CHROMIUM
+ %ifidn __OUTPUT_FORMAT__,macho32
+ %define HIDDEN_DATA(x) x:private_extern
+ %else
+ %define HIDDEN_DATA(x) x
+ %endif
+ %else
+ %define HIDDEN_DATA(x) x
+ %endif
%else
%macro GET_GOT 1
%endmacro
@@ -189,6 +227,9 @@
%ifidn __OUTPUT_FORMAT__,elf64
%define WRT_PLT wrt ..plt
%define HIDDEN_DATA(x) x:data hidden
+ %elifidn __OUTPUT_FORMAT__,elfx32
+ %define WRT_PLT wrt ..plt
+ %define HIDDEN_DATA(x) x:data hidden
%else
%define HIDDEN_DATA(x) x
%endif
@@ -330,5 +371,8 @@ section .text
%elifidn __OUTPUT_FORMAT__,elf64
section .note.GNU-stack noalloc noexec nowrite progbits
section .text
+%elifidn __OUTPUT_FORMAT__,elfx32
+section .note.GNU-stack noalloc noexec nowrite progbits
+section .text
%endif
diff --git a/vpx_scale/arm/neon/yv12extend_arm.c b/vpx_scale/arm/neon/yv12extend_arm.c
index 7529fc6..eabd495 100644
--- a/vpx_scale/arm/neon/yv12extend_arm.c
+++ b/vpx_scale/arm/neon/yv12extend_arm.c
@@ -8,18 +8,14 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vpx_rtcd.h"
-#include "vpx_scale/yv12config.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_scale/vpxscale.h"
+extern void vp8_yv12_copy_frame_func_neon(struct yv12_buffer_config *src_ybc,
+ struct yv12_buffer_config *dst_ybc);
-extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc,
- YV12_BUFFER_CONFIG *dst_ybc);
+void vp8_yv12_copy_frame_neon(struct yv12_buffer_config *src_ybc,
+ struct yv12_buffer_config *dst_ybc) {
+ vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);
-void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc,
- YV12_BUFFER_CONFIG *dst_ybc)
-{
- vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);
-
- vp8_yv12_extend_frame_borders_neon(dst_ybc);
+ vp8_yv12_extend_frame_borders_neon(dst_ybc);
}
diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c
index 4468e9d..c116740 100644
--- a/vpx_scale/generic/bicubic_scaler.c
+++ b/vpx_scale/generic/bicubic_scaler.c
@@ -46,245 +46,229 @@ static float a = -0.6;
// 3 2
// C0 = a*t - a*t
//
-static short c0_fixed(unsigned int t)
-{
- // put t in Q16 notation
- unsigned short v1, v2;
-
- // Q16
- v1 = (a_i * t) >> 16;
- v1 = (v1 * t) >> 16;
-
- // Q16
- v2 = (a_i * t) >> 16;
- v2 = (v2 * t) >> 16;
- v2 = (v2 * t) >> 16;
-
- // Q12
- return -((v1 - v2) >> 4);
+static short c0_fixed(unsigned int t) {
+ // put t in Q16 notation
+ unsigned short v1, v2;
+
+ // Q16
+ v1 = (a_i * t) >> 16;
+ v1 = (v1 * t) >> 16;
+
+ // Q16
+ v2 = (a_i * t) >> 16;
+ v2 = (v2 * t) >> 16;
+ v2 = (v2 * t) >> 16;
+
+ // Q12
+ return -((v1 - v2) >> 4);
}
// 2 3
// C1 = a*t + (3-2*a)*t - (2-a)*t
//
-static short c1_fixed(unsigned int t)
-{
- unsigned short v1, v2, v3;
- unsigned short two, three;
-
- // Q16
- v1 = (a_i * t) >> 16;
-
- // Q13
- two = 2 << 13;
- v2 = two - (a_i >> 3);
- v2 = (v2 * t) >> 16;
- v2 = (v2 * t) >> 16;
- v2 = (v2 * t) >> 16;
-
- // Q13
- three = 3 << 13;
- v3 = three - (2 * (a_i >> 3));
- v3 = (v3 * t) >> 16;
- v3 = (v3 * t) >> 16;
-
- // Q12
- return (((v1 >> 3) - v2 + v3) >> 1);
+static short c1_fixed(unsigned int t) {
+ unsigned short v1, v2, v3;
+ unsigned short two, three;
+
+ // Q16
+ v1 = (a_i * t) >> 16;
+
+ // Q13
+ two = 2 << 13;
+ v2 = two - (a_i >> 3);
+ v2 = (v2 * t) >> 16;
+ v2 = (v2 * t) >> 16;
+ v2 = (v2 * t) >> 16;
+
+ // Q13
+ three = 3 << 13;
+ v3 = three - (2 * (a_i >> 3));
+ v3 = (v3 * t) >> 16;
+ v3 = (v3 * t) >> 16;
+
+ // Q12
+ return (((v1 >> 3) - v2 + v3) >> 1);
}
// 2 3
// C2 = 1 - (3-a)*t + (2-a)*t
//
-static short c2_fixed(unsigned int t)
-{
- unsigned short v1, v2, v3;
- unsigned short two, three;
-
- // Q13
- v1 = 1 << 13;
-
- // Q13
- three = 3 << 13;
- v2 = three - (a_i >> 3);
- v2 = (v2 * t) >> 16;
- v2 = (v2 * t) >> 16;
-
- // Q13
- two = 2 << 13;
- v3 = two - (a_i >> 3);
- v3 = (v3 * t) >> 16;
- v3 = (v3 * t) >> 16;
- v3 = (v3 * t) >> 16;
-
- // Q12
- return (v1 - v2 + v3) >> 1;
+static short c2_fixed(unsigned int t) {
+ unsigned short v1, v2, v3;
+ unsigned short two, three;
+
+ // Q13
+ v1 = 1 << 13;
+
+ // Q13
+ three = 3 << 13;
+ v2 = three - (a_i >> 3);
+ v2 = (v2 * t) >> 16;
+ v2 = (v2 * t) >> 16;
+
+ // Q13
+ two = 2 << 13;
+ v3 = two - (a_i >> 3);
+ v3 = (v3 * t) >> 16;
+ v3 = (v3 * t) >> 16;
+ v3 = (v3 * t) >> 16;
+
+ // Q12
+ return (v1 - v2 + v3) >> 1;
}
// 2 3
// C3 = a*t - 2*a*t + a*t
//
-static short c3_fixed(unsigned int t)
-{
- int v1, v2, v3;
+static short c3_fixed(unsigned int t) {
+ int v1, v2, v3;
- // Q16
- v1 = (a_i * t) >> 16;
+ // Q16
+ v1 = (a_i * t) >> 16;
- // Q15
- v2 = 2 * (a_i >> 1);
- v2 = (v2 * t) >> 16;
- v2 = (v2 * t) >> 16;
+ // Q15
+ v2 = 2 * (a_i >> 1);
+ v2 = (v2 * t) >> 16;
+ v2 = (v2 * t) >> 16;
- // Q16
- v3 = (a_i * t) >> 16;
- v3 = (v3 * t) >> 16;
- v3 = (v3 * t) >> 16;
+ // Q16
+ v3 = (a_i * t) >> 16;
+ v3 = (v3 * t) >> 16;
+ v3 = (v3 * t) >> 16;
- // Q12
- return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3);
+ // Q12
+ return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3);
}
#else
// 3 2
// C0 = -a*t + a*t
//
-float C0(float t)
-{
- return -a * t * t * t + a * t * t;
+float C0(float t) {
+ return -a * t * t * t + a * t * t;
}
// 2 3
// C1 = -a*t + (2*a+3)*t - (a+2)*t
//
-float C1(float t)
-{
- return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t;
+float C1(float t) {
+ return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t;
}
// 2 3
// C2 = 1 - (a+3)*t + (a+2)*t
//
-float C2(float t)
-{
- return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f;
+float C2(float t) {
+ return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f;
}
// 2 3
// C3 = a*t - 2*a*t + a*t
//
-float C3(float t)
-{
- return a * t * t * t - 2.0f * a * t * t + a * t;
+float C3(float t) {
+ return a * t * t * t - 2.0f * a * t * t + a * t;
}
#endif
#if 0
-int compare_real_fixed()
-{
- int i, errors = 0;
- float mult = 1.0 / 10000.0;
- unsigned int fixed_mult = mult * 4294967296;//65536;
- unsigned int phase_offset_int;
- float phase_offset_real;
-
- for (i = 0; i < 10000; i++)
- {
- int fixed0, fixed1, fixed2, fixed3, fixed_total;
- int real0, real1, real2, real3, real_total;
-
- phase_offset_real = (float)i * mult;
- phase_offset_int = (fixed_mult * i) >> 16;
+int compare_real_fixed() {
+ int i, errors = 0;
+ float mult = 1.0 / 10000.0;
+ unsigned int fixed_mult = mult * 4294967296;// 65536;
+ unsigned int phase_offset_int;
+ float phase_offset_real;
+
+ for (i = 0; i < 10000; i++) {
+ int fixed0, fixed1, fixed2, fixed3, fixed_total;
+ int real0, real1, real2, real3, real_total;
+
+ phase_offset_real = (float)i * mult;
+ phase_offset_int = (fixed_mult * i) >> 16;
// phase_offset_int = phase_offset_real * 65536;
- fixed0 = c0_fixed(phase_offset_int);
- real0 = C0(phase_offset_real) * 4096.0;
+ fixed0 = c0_fixed(phase_offset_int);
+ real0 = C0(phase_offset_real) * 4096.0;
- if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1)))
- errors++;
+ if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1)))
+ errors++;
- fixed1 = c1_fixed(phase_offset_int);
- real1 = C1(phase_offset_real) * 4096.0;
+ fixed1 = c1_fixed(phase_offset_int);
+ real1 = C1(phase_offset_real) * 4096.0;
- if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1)))
- errors++;
+ if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1)))
+ errors++;
- fixed2 = c2_fixed(phase_offset_int);
- real2 = C2(phase_offset_real) * 4096.0;
+ fixed2 = c2_fixed(phase_offset_int);
+ real2 = C2(phase_offset_real) * 4096.0;
- if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1)))
- errors++;
+ if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1)))
+ errors++;
- fixed3 = c3_fixed(phase_offset_int);
- real3 = C3(phase_offset_real) * 4096.0;
+ fixed3 = c3_fixed(phase_offset_int);
+ real3 = C3(phase_offset_real) * 4096.0;
- if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1)))
- errors++;
+ if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1)))
+ errors++;
- fixed_total = fixed0 + fixed1 + fixed2 + fixed3;
- real_total = real0 + real1 + real2 + real3;
+ fixed_total = fixed0 + fixed1 + fixed2 + fixed3;
+ real_total = real0 + real1 + real2 + real3;
- if ((fixed_total > 4097) || (fixed_total < 4094))
- errors ++;
+ if ((fixed_total > 4097) || (fixed_total < 4094))
+ errors++;
- if ((real_total > 4097) || (real_total < 4095))
- errors ++;
- }
+ if ((real_total > 4097) || (real_total < 4095))
+ errors++;
+ }
- return errors;
+ return errors;
}
#endif
// Find greatest common denominator between two integers. Method used here is
// slow compared to Euclid's algorithm, but does not require any division.
-int gcd(int a, int b)
-{
- // Problem with this algorithm is that if a or b = 0 this function
- // will never exit. Don't want to return 0 because any computation
- // that was based on a common denoninator and tried to reduce by
- // dividing by 0 would fail. Best solution that could be thought of
- // would to be fail by returing a 1;
- if (a <= 0 || b <= 0)
- return 1;
-
- while (a != b)
- {
- if (b > a)
- b = b - a;
- else
- {
- int tmp = a;//swap large and
- a = b; //small
- b = tmp;
- }
+int gcd(int a, int b) {
+ // Problem with this algorithm is that if a or b = 0 this function
+ // will never exit. Don't want to return 0 because any computation
+ // that was based on a common denoninator and tried to reduce by
+ // dividing by 0 would fail. Best solution that could be thought of
+ // would to be fail by returing a 1;
+ if (a <= 0 || b <= 0)
+ return 1;
+
+ while (a != b) {
+ if (b > a)
+ b = b - a;
+ else {
+ int tmp = a;// swap large and
+ a = b; // small
+ b = tmp;
}
+ }
- return b;
+ return b;
}
-void bicubic_coefficient_init()
-{
- vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
- g_first_time = 0;
+void bicubic_coefficient_init() {
+ vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
+ g_first_time = 0;
}
-void bicubic_coefficient_destroy()
-{
- if (!g_first_time)
- {
- vpx_free(g_b_scaler.l_w);
+void bicubic_coefficient_destroy() {
+ if (!g_first_time) {
+ vpx_free(g_b_scaler.l_w);
- vpx_free(g_b_scaler.l_h);
+ vpx_free(g_b_scaler.l_h);
- vpx_free(g_b_scaler.l_h_uv);
+ vpx_free(g_b_scaler.l_h_uv);
- vpx_free(g_b_scaler.c_w);
+ vpx_free(g_b_scaler.c_w);
- vpx_free(g_b_scaler.c_h);
+ vpx_free(g_b_scaler.c_h);
- vpx_free(g_b_scaler.c_h_uv);
+ vpx_free(g_b_scaler.c_h_uv);
- vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
- }
+ vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
+ }
}
// Create the coeffients that will be used for the cubic interpolation.
@@ -292,311 +276,294 @@ void bicubic_coefficient_destroy()
// regimes the phase offsets will be different. There are 4 coefficents
// for each point, two on each side. The layout is that there are the
// 4 coefficents for each phase in the array and then the next phase.
-int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height)
-{
- int i;
+int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) {
+ int i;
#ifdef FIXED_POINT
- int phase_offset_int;
- unsigned int fixed_mult;
- int product_val = 0;
+ int phase_offset_int;
+ unsigned int fixed_mult;
+ int product_val = 0;
#else
- float phase_offset;
+ float phase_offset;
#endif
- int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv;
+ int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv;
- if (g_first_time)
- bicubic_coefficient_init();
+ if (g_first_time)
+ bicubic_coefficient_init();
- // check to see if the coefficents have already been set up correctly
- if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height)
- && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height))
- return 0;
+ // check to see if the coefficents have already been set up correctly
+ if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height)
+ && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height))
+ return 0;
- g_b_scaler.in_width = in_width;
- g_b_scaler.in_height = in_height;
- g_b_scaler.out_width = out_width;
- g_b_scaler.out_height = out_height;
+ g_b_scaler.in_width = in_width;
+ g_b_scaler.in_height = in_height;
+ g_b_scaler.out_width = out_width;
+ g_b_scaler.out_height = out_height;
- // Don't want to allow crazy scaling, just try and prevent a catastrophic
- // failure here. Want to fail after setting the member functions so if
- // if the scaler is called the member functions will not scale.
- if (out_width <= 0 || out_height <= 0)
- return -1;
+ // Don't want to allow crazy scaling, just try and prevent a catastrophic
+ // failure here. Want to fail after setting the member functions so if
+ // if the scaler is called the member functions will not scale.
+ if (out_width <= 0 || out_height <= 0)
+ return -1;
- // reduce in/out width and height ratios using the gcd
- gcd_w = gcd(out_width, in_width);
- gcd_h = gcd(out_height, in_height);
- gcd_h_uv = gcd(out_height, in_height / 2);
+ // reduce in/out width and height ratios using the gcd
+ gcd_w = gcd(out_width, in_width);
+ gcd_h = gcd(out_height, in_height);
+ gcd_h_uv = gcd(out_height, in_height / 2);
- // the numerator width and height are to be saved in
- // globals so they can be used during the scaling process
- // without having to be recalculated.
- g_b_scaler.nw = out_width / gcd_w;
- d_w = in_width / gcd_w;
+ // the numerator width and height are to be saved in
+ // globals so they can be used during the scaling process
+ // without having to be recalculated.
+ g_b_scaler.nw = out_width / gcd_w;
+ d_w = in_width / gcd_w;
- g_b_scaler.nh = out_height / gcd_h;
- d_h = in_height / gcd_h;
+ g_b_scaler.nh = out_height / gcd_h;
+ d_h = in_height / gcd_h;
- g_b_scaler.nh_uv = out_height / gcd_h_uv;
- d_h_uv = (in_height / 2) / gcd_h_uv;
+ g_b_scaler.nh_uv = out_height / gcd_h_uv;
+ d_h_uv = (in_height / 2) / gcd_h_uv;
- // allocate memory for the coefficents
- vpx_free(g_b_scaler.l_w);
+ // allocate memory for the coefficents
+ vpx_free(g_b_scaler.l_w);
- vpx_free(g_b_scaler.l_h);
+ vpx_free(g_b_scaler.l_h);
- vpx_free(g_b_scaler.l_h_uv);
+ vpx_free(g_b_scaler.l_h_uv);
- g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
- g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
- g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
+ g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
+ g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
+ g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
- vpx_free(g_b_scaler.c_w);
+ vpx_free(g_b_scaler.c_w);
- vpx_free(g_b_scaler.c_h);
+ vpx_free(g_b_scaler.c_h);
- vpx_free(g_b_scaler.c_h_uv);
+ vpx_free(g_b_scaler.c_h_uv);
- g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
- g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
- g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2);
+ g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
+ g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
+ g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2);
- g_b_scaler.hbuf = g_hbuf;
- g_b_scaler.hbuf_uv = g_hbuf_uv;
+ g_b_scaler.hbuf = g_hbuf;
+ g_b_scaler.hbuf_uv = g_hbuf_uv;
- // Set up polyphase filter taps. This needs to be done before
- // the scaling because of the floating point math required. The
- // coefficients are multiplied by 2^12 so that fixed point math
- // can be used in the main scaling loop.
+ // Set up polyphase filter taps. This needs to be done before
+ // the scaling because of the floating point math required. The
+ // coefficients are multiplied by 2^12 so that fixed point math
+ // can be used in the main scaling loop.
#ifdef FIXED_POINT
- fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296;
+ fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296;
- product_val = 0;
+ product_val = 0;
- for (i = 0; i < g_b_scaler.nw; i++)
- {
- if (product_val > g_b_scaler.nw)
- product_val -= g_b_scaler.nw;
+ for (i = 0; i < g_b_scaler.nw; i++) {
+ if (product_val > g_b_scaler.nw)
+ product_val -= g_b_scaler.nw;
- phase_offset_int = (fixed_mult * product_val) >> 16;
+ phase_offset_int = (fixed_mult * product_val) >> 16;
- g_b_scaler.c_w[i*4] = c3_fixed(phase_offset_int);
- g_b_scaler.c_w[i*4+1] = c2_fixed(phase_offset_int);
- g_b_scaler.c_w[i*4+2] = c1_fixed(phase_offset_int);
- g_b_scaler.c_w[i*4+3] = c0_fixed(phase_offset_int);
+ g_b_scaler.c_w[i * 4] = c3_fixed(phase_offset_int);
+ g_b_scaler.c_w[i * 4 + 1] = c2_fixed(phase_offset_int);
+ g_b_scaler.c_w[i * 4 + 2] = c1_fixed(phase_offset_int);
+ g_b_scaler.c_w[i * 4 + 3] = c0_fixed(phase_offset_int);
- product_val += d_w;
- }
+ product_val += d_w;
+ }
- fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296;
+ fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296;
- product_val = 0;
+ product_val = 0;
- for (i = 0; i < g_b_scaler.nh; i++)
- {
- if (product_val > g_b_scaler.nh)
- product_val -= g_b_scaler.nh;
+ for (i = 0; i < g_b_scaler.nh; i++) {
+ if (product_val > g_b_scaler.nh)
+ product_val -= g_b_scaler.nh;
- phase_offset_int = (fixed_mult * product_val) >> 16;
+ phase_offset_int = (fixed_mult * product_val) >> 16;
- g_b_scaler.c_h[i*4] = c0_fixed(phase_offset_int);
- g_b_scaler.c_h[i*4+1] = c1_fixed(phase_offset_int);
- g_b_scaler.c_h[i*4+2] = c2_fixed(phase_offset_int);
- g_b_scaler.c_h[i*4+3] = c3_fixed(phase_offset_int);
+ g_b_scaler.c_h[i * 4] = c0_fixed(phase_offset_int);
+ g_b_scaler.c_h[i * 4 + 1] = c1_fixed(phase_offset_int);
+ g_b_scaler.c_h[i * 4 + 2] = c2_fixed(phase_offset_int);
+ g_b_scaler.c_h[i * 4 + 3] = c3_fixed(phase_offset_int);
- product_val += d_h;
- }
+ product_val += d_h;
+ }
- fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296;
+ fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296;
- product_val = 0;
+ product_val = 0;
- for (i = 0; i < g_b_scaler.nh_uv; i++)
- {
- if (product_val > g_b_scaler.nh_uv)
- product_val -= g_b_scaler.nh_uv;
+ for (i = 0; i < g_b_scaler.nh_uv; i++) {
+ if (product_val > g_b_scaler.nh_uv)
+ product_val -= g_b_scaler.nh_uv;
- phase_offset_int = (fixed_mult * product_val) >> 16;
+ phase_offset_int = (fixed_mult * product_val) >> 16;
- g_b_scaler.c_h_uv[i*4] = c0_fixed(phase_offset_int);
- g_b_scaler.c_h_uv[i*4+1] = c1_fixed(phase_offset_int);
- g_b_scaler.c_h_uv[i*4+2] = c2_fixed(phase_offset_int);
- g_b_scaler.c_h_uv[i*4+3] = c3_fixed(phase_offset_int);
+ g_b_scaler.c_h_uv[i * 4] = c0_fixed(phase_offset_int);
+ g_b_scaler.c_h_uv[i * 4 + 1] = c1_fixed(phase_offset_int);
+ g_b_scaler.c_h_uv[i * 4 + 2] = c2_fixed(phase_offset_int);
+ g_b_scaler.c_h_uv[i * 4 + 3] = c3_fixed(phase_offset_int);
- product_val += d_h_uv;
- }
+ product_val += d_h_uv;
+ }
#else
- for (i = 0; i < g_nw; i++)
- {
- phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw;
- g_c_w[i*4] = (C3(phase_offset) * 4096.0);
- g_c_w[i*4+1] = (C2(phase_offset) * 4096.0);
- g_c_w[i*4+2] = (C1(phase_offset) * 4096.0);
- g_c_w[i*4+3] = (C0(phase_offset) * 4096.0);
- }
-
- for (i = 0; i < g_nh; i++)
- {
- phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh;
- g_c_h[i*4] = (C0(phase_offset) * 4096.0);
- g_c_h[i*4+1] = (C1(phase_offset) * 4096.0);
- g_c_h[i*4+2] = (C2(phase_offset) * 4096.0);
- g_c_h[i*4+3] = (C3(phase_offset) * 4096.0);
- }
-
- for (i = 0; i < g_nh_uv; i++)
- {
- phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv;
- g_c_h_uv[i*4] = (C0(phase_offset) * 4096.0);
- g_c_h_uv[i*4+1] = (C1(phase_offset) * 4096.0);
- g_c_h_uv[i*4+2] = (C2(phase_offset) * 4096.0);
- g_c_h_uv[i*4+3] = (C3(phase_offset) * 4096.0);
- }
+ for (i = 0; i < g_nw; i++) {
+ phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw;
+ g_c_w[i * 4] = (C3(phase_offset) * 4096.0);
+ g_c_w[i * 4 + 1] = (C2(phase_offset) * 4096.0);
+ g_c_w[i * 4 + 2] = (C1(phase_offset) * 4096.0);
+ g_c_w[i * 4 + 3] = (C0(phase_offset) * 4096.0);
+ }
+
+ for (i = 0; i < g_nh; i++) {
+ phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh;
+ g_c_h[i * 4] = (C0(phase_offset) * 4096.0);
+ g_c_h[i * 4 + 1] = (C1(phase_offset) * 4096.0);
+ g_c_h[i * 4 + 2] = (C2(phase_offset) * 4096.0);
+ g_c_h[i * 4 + 3] = (C3(phase_offset) * 4096.0);
+ }
+
+ for (i = 0; i < g_nh_uv; i++) {
+ phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv;
+ g_c_h_uv[i * 4] = (C0(phase_offset) * 4096.0);
+ g_c_h_uv[i * 4 + 1] = (C1(phase_offset) * 4096.0);
+ g_c_h_uv[i * 4 + 2] = (C2(phase_offset) * 4096.0);
+ g_c_h_uv[i * 4 + 3] = (C3(phase_offset) * 4096.0);
+ }
#endif
- // Create an array that corresponds input lines to output lines.
- // This doesn't require floating point math, but it does require
- // a division and because hardware division is not present that
- // is a call.
- for (i = 0; i < out_width; i++)
- {
- g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw;
+ // Create an array that corresponds input lines to output lines.
+ // This doesn't require floating point math, but it does require
+ // a division and because hardware division is not present that
+ // is a call.
+ for (i = 0; i < out_width; i++) {
+ g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw;
- if ((g_b_scaler.l_w[i] + 2) <= in_width)
- g_b_scaler.max_usable_out_width = i;
+ if ((g_b_scaler.l_w[i] + 2) <= in_width)
+ g_b_scaler.max_usable_out_width = i;
- }
+ }
- for (i = 0; i < out_height + 1; i++)
- {
- g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh;
- g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv;
- }
+ for (i = 0; i < out_height + 1; i++) {
+ g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh;
+ g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv;
+ }
- return 0;
+ return 0;
}
int bicubic_scale(int in_width, int in_height, int in_stride,
int out_width, int out_height, int out_stride,
- unsigned char *input_image, unsigned char *output_image)
-{
- short *RESTRICT l_w, * RESTRICT l_h;
- short *RESTRICT c_w, * RESTRICT c_h;
- unsigned char *RESTRICT ip, * RESTRICT op;
- unsigned char *RESTRICT hbuf;
- int h, w, lw, lh;
- int temp_sum;
- int phase_offset_w, phase_offset_h;
-
- c_w = g_b_scaler.c_w;
- c_h = g_b_scaler.c_h;
-
- op = output_image;
-
- l_w = g_b_scaler.l_w;
- l_h = g_b_scaler.l_h;
-
- phase_offset_h = 0;
-
- for (h = 0; h < out_height; h++)
- {
- // select the row to work on
- lh = l_h[h];
- ip = input_image + (in_stride * lh);
-
- // vp8_filter the row vertically into an temporary buffer.
- // If the phase offset == 0 then all the multiplication
- // is going to result in the output equalling the input.
- // So instead point the temporary buffer to the input.
- // Also handle the boundry condition of not being able to
- // filter that last lines.
- if (phase_offset_h && (lh < in_height - 2))
- {
- hbuf = g_b_scaler.hbuf;
-
- for (w = 0; w < in_width; w++)
- {
- temp_sum = c_h[phase_offset_h*4+3] * ip[w - in_stride];
- temp_sum += c_h[phase_offset_h*4+2] * ip[w];
- temp_sum += c_h[phase_offset_h*4+1] * ip[w + in_stride];
- temp_sum += c_h[phase_offset_h*4] * ip[w + 2*in_stride];
-
- hbuf[w] = temp_sum >> 12;
- }
- }
- else
- hbuf = ip;
-
- // increase the phase offset for the next time around.
- if (++phase_offset_h >= g_b_scaler.nh)
- phase_offset_h = 0;
-
- // now filter and expand it horizontally into the final
- // output buffer
+ unsigned char *input_image, unsigned char *output_image) {
+ short *RESTRICT l_w, * RESTRICT l_h;
+ short *RESTRICT c_w, * RESTRICT c_h;
+ unsigned char *RESTRICT ip, * RESTRICT op;
+ unsigned char *RESTRICT hbuf;
+ int h, w, lw, lh;
+ int temp_sum;
+ int phase_offset_w, phase_offset_h;
+
+ c_w = g_b_scaler.c_w;
+ c_h = g_b_scaler.c_h;
+
+ op = output_image;
+
+ l_w = g_b_scaler.l_w;
+ l_h = g_b_scaler.l_h;
+
+ phase_offset_h = 0;
+
+ for (h = 0; h < out_height; h++) {
+ // select the row to work on
+ lh = l_h[h];
+ ip = input_image + (in_stride * lh);
+
+ // vp8_filter the row vertically into an temporary buffer.
+ // If the phase offset == 0 then all the multiplication
+ // is going to result in the output equalling the input.
+ // So instead point the temporary buffer to the input.
+ // Also handle the boundry condition of not being able to
+ // filter that last lines.
+ if (phase_offset_h && (lh < in_height - 2)) {
+ hbuf = g_b_scaler.hbuf;
+
+ for (w = 0; w < in_width; w++) {
+ temp_sum = c_h[phase_offset_h * 4 + 3] * ip[w - in_stride];
+ temp_sum += c_h[phase_offset_h * 4 + 2] * ip[w];
+ temp_sum += c_h[phase_offset_h * 4 + 1] * ip[w + in_stride];
+ temp_sum += c_h[phase_offset_h * 4] * ip[w + 2 * in_stride];
+
+ hbuf[w] = temp_sum >> 12;
+ }
+ } else
+ hbuf = ip;
+
+ // increase the phase offset for the next time around.
+ if (++phase_offset_h >= g_b_scaler.nh)
+ phase_offset_h = 0;
+
+ // now filter and expand it horizontally into the final
+ // output buffer
+ phase_offset_w = 0;
+
+ for (w = 0; w < out_width; w++) {
+ // get the index to use to expand the image
+ lw = l_w[w];
+
+ temp_sum = c_w[phase_offset_w * 4] * hbuf[lw - 1];
+ temp_sum += c_w[phase_offset_w * 4 + 1] * hbuf[lw];
+ temp_sum += c_w[phase_offset_w * 4 + 2] * hbuf[lw + 1];
+ temp_sum += c_w[phase_offset_w * 4 + 3] * hbuf[lw + 2];
+ temp_sum = temp_sum >> 12;
+
+ if (++phase_offset_w >= g_b_scaler.nw)
phase_offset_w = 0;
- for (w = 0; w < out_width; w++)
- {
- // get the index to use to expand the image
- lw = l_w[w];
-
- temp_sum = c_w[phase_offset_w*4] * hbuf[lw - 1];
- temp_sum += c_w[phase_offset_w*4+1] * hbuf[lw];
- temp_sum += c_w[phase_offset_w*4+2] * hbuf[lw + 1];
- temp_sum += c_w[phase_offset_w*4+3] * hbuf[lw + 2];
- temp_sum = temp_sum >> 12;
+ // boundry conditions
+ if ((lw + 2) >= in_width)
+ temp_sum = hbuf[lw];
- if (++phase_offset_w >= g_b_scaler.nw)
- phase_offset_w = 0;
+ if (lw == 0)
+ temp_sum = hbuf[0];
- // boundry conditions
- if ((lw + 2) >= in_width)
- temp_sum = hbuf[lw];
-
- if (lw == 0)
- temp_sum = hbuf[0];
-
- op[w] = temp_sum;
- }
-
- op += out_stride;
+ op[w] = temp_sum;
}
- return 0;
+ op += out_stride;
+ }
+
+ return 0;
}
-void bicubic_scale_frame_reset()
-{
- g_b_scaler.out_width = 0;
- g_b_scaler.out_height = 0;
+void bicubic_scale_frame_reset() {
+ g_b_scaler.out_width = 0;
+ g_b_scaler.out_height = 0;
}
void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
- int new_width, int new_height)
-{
+ int new_width, int new_height) {
- dst->y_width = new_width;
- dst->y_height = new_height;
- dst->uv_width = new_width / 2;
- dst->uv_height = new_height / 2;
+ dst->y_width = new_width;
+ dst->y_height = new_height;
+ dst->uv_width = new_width / 2;
+ dst->uv_height = new_height / 2;
- dst->y_stride = dst->y_width;
- dst->uv_stride = dst->uv_width;
+ dst->y_stride = dst->y_width;
+ dst->uv_stride = dst->uv_width;
- bicubic_scale(src->y_width, src->y_height, src->y_stride,
- new_width, new_height, dst->y_stride,
- src->y_buffer, dst->y_buffer);
+ bicubic_scale(src->y_width, src->y_height, src->y_stride,
+ new_width, new_height, dst->y_stride,
+ src->y_buffer, dst->y_buffer);
- bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
- new_width / 2, new_height / 2, dst->uv_stride,
- src->u_buffer, dst->u_buffer);
+ bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
+ new_width / 2, new_height / 2, dst->uv_stride,
+ src->u_buffer, dst->u_buffer);
- bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
- new_width / 2, new_height / 2, dst->uv_stride,
- src->v_buffer, dst->v_buffer);
+ bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
+ new_width / 2, new_height / 2, dst->uv_stride,
+ src->v_buffer, dst->v_buffer);
}
diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c
index 9beb162..60c21fb 100644
--- a/vpx_scale/generic/gen_scalers.c
+++ b/vpx_scale/generic/gen_scalers.c
@@ -34,47 +34,42 @@
* SPECIAL NOTES : None.
*
****************************************************************************/
-void vp8_horizontal_line_4_5_scale_c
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- unsigned i;
- unsigned int a, b, c;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for (i = 0; i < source_width - 4; i += 4)
- {
- a = src[0];
- b = src[1];
- des [0] = (unsigned char) a;
- des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
- c = src[2] * 154;
- a = src[3];
- des [2] = (unsigned char)((b * 102 + c + 128) >> 8);
- des [3] = (unsigned char)((c + 102 * a + 128) >> 8);
- b = src[4];
- des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8);
-
- src += 4;
- des += 5;
- }
-
+void vp8_horizontal_line_4_5_scale_c(const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width) {
+ unsigned i;
+ unsigned int a, b, c;
+ unsigned char *des = dest;
+ const unsigned char *src = source;
+
+ (void) dest_width;
+
+ for (i = 0; i < source_width - 4; i += 4) {
a = src[0];
b = src[1];
- des [0] = (unsigned char)(a);
+ des [0] = (unsigned char) a;
des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
c = src[2] * 154;
a = src[3];
des [2] = (unsigned char)((b * 102 + c + 128) >> 8);
des [3] = (unsigned char)((c + 102 * a + 128) >> 8);
- des [4] = (unsigned char)(a);
+ b = src[4];
+ des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8);
+
+ src += 4;
+ des += 5;
+ }
+
+ a = src[0];
+ b = src[1];
+ des [0] = (unsigned char)(a);
+ des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
+ c = src[2] * 154;
+ a = src[3];
+ des [2] = (unsigned char)((b * 102 + c + 128) >> 8);
+ des [3] = (unsigned char)((c + 102 * a + 128) >> 8);
+ des [4] = (unsigned char)(a);
}
@@ -97,31 +92,31 @@ void vp8_horizontal_line_4_5_scale_c
* the current band.
*
****************************************************************************/
-void vp8_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- unsigned int i;
- unsigned int a, b, c, d;
- unsigned char *des = dest;
+void vp8_vertical_band_4_5_scale_c(unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b, c, d;
+ unsigned char *des = dest;
- for (i = 0; i < dest_width; i++)
- {
- a = des [0];
- b = des [dest_pitch];
+ for (i = 0; i < dest_width; i++) {
+ a = des [0];
+ b = des [dest_pitch];
- des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
+ des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
- c = des[dest_pitch*2] * 154;
- d = des[dest_pitch*3];
+ c = des[dest_pitch * 2] * 154;
+ d = des[dest_pitch * 3];
- des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8);
- des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8);
+ des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8);
+ des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8);
- /* First line in next band */
- a = des [dest_pitch * 5];
- des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8);
+ /* First line in next band */
+ a = des [dest_pitch * 5];
+ des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8);
- des ++;
- }
+ des++;
+ }
}
/****************************************************************************
@@ -144,30 +139,30 @@ void vp8_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch,
* last band.
*
****************************************************************************/
-void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- unsigned int i;
- unsigned int a, b, c, d;
- unsigned char *des = dest;
+void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b, c, d;
+ unsigned char *des = dest;
- for (i = 0; i < dest_width; ++i)
- {
- a = des[0];
- b = des[dest_pitch];
+ for (i = 0; i < dest_width; ++i) {
+ a = des[0];
+ b = des[dest_pitch];
- des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
+ des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
- c = des[dest_pitch*2] * 154;
- d = des[dest_pitch*3];
+ c = des[dest_pitch * 2] * 154;
+ d = des[dest_pitch * 3];
- des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8);
- des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8);
+ des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8);
+ des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8);
- /* No other line for interplation of this line, so .. */
- des[dest_pitch*4] = (unsigned char) d;
+ /* No other line for interplation of this line, so .. */
+ des[dest_pitch * 4] = (unsigned char) d;
- des++;
- }
+ des++;
+ }
}
/****************************************************************************
@@ -190,40 +185,35 @@ void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_p
*
*
****************************************************************************/
-void vp8_horizontal_line_2_3_scale_c
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for (i = 0; i < source_width - 2; i += 2)
- {
- a = src[0];
- b = src[1];
- c = src[2];
-
- des [0] = (unsigned char)(a);
- des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
- des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8);
-
- src += 2;
- des += 3;
- }
-
+void vp8_horizontal_line_2_3_scale_c(const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b, c;
+ unsigned char *des = dest;
+ const unsigned char *src = source;
+
+ (void) dest_width;
+
+ for (i = 0; i < source_width - 2; i += 2) {
a = src[0];
b = src[1];
+ c = src[2];
+
des [0] = (unsigned char)(a);
des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
- des [2] = (unsigned char)(b);
+ des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8);
+
+ src += 2;
+ des += 3;
+ }
+
+ a = src[0];
+ b = src[1];
+ des [0] = (unsigned char)(a);
+ des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
+ des [2] = (unsigned char)(b);
}
@@ -246,22 +236,22 @@ void vp8_horizontal_line_2_3_scale_c
* the current band.
*
****************************************************************************/
-void vp8_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; i++)
- {
- a = des [0];
- b = des [dest_pitch];
- c = des[dest_pitch*3];
- des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
- des [dest_pitch*2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8);
-
- des++;
- }
+void vp8_vertical_band_2_3_scale_c(unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b, c;
+ unsigned char *des = dest;
+
+ for (i = 0; i < dest_width; i++) {
+ a = des [0];
+ b = des [dest_pitch];
+ c = des[dest_pitch * 3];
+ des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
+ des [dest_pitch * 2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8);
+
+ des++;
+ }
}
/****************************************************************************
@@ -284,21 +274,21 @@ void vp8_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch,
* last band.
*
****************************************************************************/
-void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- unsigned int i;
- unsigned int a, b;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; ++i)
- {
- a = des [0];
- b = des [dest_pitch];
-
- des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
- des [dest_pitch*2] = (unsigned char)(b);
- des++;
- }
+void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b;
+ unsigned char *des = dest;
+
+ for (i = 0; i < dest_width; ++i) {
+ a = des [0];
+ b = des [dest_pitch];
+
+ des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
+ des [dest_pitch * 2] = (unsigned char)(b);
+ des++;
+ }
}
/****************************************************************************
@@ -321,49 +311,44 @@ void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_p
*
*
****************************************************************************/
-void vp8_horizontal_line_3_5_scale_c
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for (i = 0; i < source_width - 3; i += 3)
- {
- a = src[0];
- b = src[1];
- des [0] = (unsigned char)(a);
- des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
-
- c = src[2] ;
- des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
- des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
-
- a = src[3];
- des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8);
-
- src += 3;
- des += 5;
- }
-
+void vp8_horizontal_line_3_5_scale_c(const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b, c;
+ unsigned char *des = dest;
+ const unsigned char *src = source;
+
+ (void) dest_width;
+
+ for (i = 0; i < source_width - 3; i += 3) {
a = src[0];
b = src[1];
des [0] = (unsigned char)(a);
-
des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
- c = src[2] ;
+
+ c = src[2];
des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
- des [4] = (unsigned char)(c);
+ a = src[3];
+ des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8);
+
+ src += 3;
+ des += 5;
+ }
+
+ a = src[0];
+ b = src[1];
+ des [0] = (unsigned char)(a);
+
+ des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
+ c = src[2];
+ des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
+ des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
+
+ des [4] = (unsigned char)(c);
}
/****************************************************************************
@@ -385,28 +370,28 @@ void vp8_horizontal_line_3_5_scale_c
* the current band.
*
****************************************************************************/
-void vp8_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; i++)
- {
- a = des [0];
- b = des [dest_pitch];
- des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
-
- c = des[dest_pitch*2];
- des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
- des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
-
- /* First line in next band... */
- a = des [dest_pitch * 5];
- des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8);
-
- des++;
- }
+void vp8_vertical_band_3_5_scale_c(unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b, c;
+ unsigned char *des = dest;
+
+ for (i = 0; i < dest_width; i++) {
+ a = des [0];
+ b = des [dest_pitch];
+ des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
+
+ c = des[dest_pitch * 2];
+ des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
+ des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
+
+ /* First line in next band... */
+ a = des [dest_pitch * 5];
+ des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8);
+
+ des++;
+ }
}
/****************************************************************************
@@ -429,28 +414,28 @@ void vp8_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch,
* last band.
*
****************************************************************************/
-void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
+void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b, c;
+ unsigned char *des = dest;
- for (i = 0; i < dest_width; ++i)
- {
- a = des [0];
- b = des [dest_pitch];
+ for (i = 0; i < dest_width; ++i) {
+ a = des [0];
+ b = des [dest_pitch];
- des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
+ des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
- c = des[dest_pitch*2];
- des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
- des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
+ c = des[dest_pitch * 2];
+ des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
+ des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
- /* No other line for interplation of this line, so .. */
- des [ dest_pitch * 4 ] = (unsigned char)(c) ;
+ /* No other line for interplation of this line, so .. */
+ des [ dest_pitch * 4 ] = (unsigned char)(c);
- des++;
- }
+ des++;
+ }
}
/****************************************************************************
@@ -473,46 +458,41 @@ void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_p
*
*
****************************************************************************/
-void vp8_horizontal_line_3_4_scale_c
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for (i = 0; i < source_width - 3; i += 3)
- {
- a = src[0];
- b = src[1];
- des [0] = (unsigned char)(a);
- des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
-
- c = src[2];
- des [2] = (unsigned char)((b + c + 1) >> 1);
-
- a = src[3];
- des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8);
-
- src += 3;
- des += 4;
- }
-
+void vp8_horizontal_line_3_4_scale_c(const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b, c;
+ unsigned char *des = dest;
+ const unsigned char *src = source;
+
+ (void) dest_width;
+
+ for (i = 0; i < source_width - 3; i += 3) {
a = src[0];
b = src[1];
des [0] = (unsigned char)(a);
des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
- c = src[2] ;
+ c = src[2];
des [2] = (unsigned char)((b + c + 1) >> 1);
- des [3] = (unsigned char)(c);
+
+ a = src[3];
+ des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8);
+
+ src += 3;
+ des += 4;
+ }
+
+ a = src[0];
+ b = src[1];
+ des [0] = (unsigned char)(a);
+ des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
+
+ c = src[2];
+ des [2] = (unsigned char)((b + c + 1) >> 1);
+ des [3] = (unsigned char)(c);
}
/****************************************************************************
@@ -534,27 +514,27 @@ void vp8_horizontal_line_3_4_scale_c
* the current band.
*
****************************************************************************/
-void vp8_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; i++)
- {
- a = des [0];
- b = des [dest_pitch];
- des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
-
- c = des[dest_pitch*2];
- des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1);
-
- /* First line in next band... */
- a = des [dest_pitch*4];
- des [dest_pitch*3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8);
-
- des++;
- }
+void vp8_vertical_band_3_4_scale_c(unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b, c;
+ unsigned char *des = dest;
+
+ for (i = 0; i < dest_width; i++) {
+ a = des [0];
+ b = des [dest_pitch];
+ des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
+
+ c = des[dest_pitch * 2];
+ des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1);
+
+ /* First line in next band... */
+ a = des [dest_pitch * 4];
+ des [dest_pitch * 3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8);
+
+ des++;
+ }
}
/****************************************************************************
@@ -577,27 +557,27 @@ void vp8_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch,
* last band.
*
****************************************************************************/
-void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
+void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b, c;
+ unsigned char *des = dest;
- for (i = 0; i < dest_width; ++i)
- {
- a = des [0];
- b = des [dest_pitch];
+ for (i = 0; i < dest_width; ++i) {
+ a = des [0];
+ b = des [dest_pitch];
- des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
+ des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
- c = des[dest_pitch*2];
- des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1);
+ c = des[dest_pitch * 2];
+ des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1);
- /* No other line for interplation of this line, so .. */
- des [dest_pitch*3] = (unsigned char)(c);
+ /* No other line for interplation of this line, so .. */
+ des [dest_pitch * 3] = (unsigned char)(c);
- des++;
- }
+ des++;
+ }
}
/****************************************************************************
@@ -619,34 +599,29 @@ void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_p
* SPECIAL NOTES : None.
*
****************************************************************************/
-void vp8_horizontal_line_1_2_scale_c
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- unsigned int i;
- unsigned int a, b;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for (i = 0; i < source_width - 1; i += 1)
- {
- a = src[0];
- b = src[1];
- des [0] = (unsigned char)(a);
- des [1] = (unsigned char)((a + b + 1) >> 1);
- src += 1;
- des += 2;
- }
-
+void vp8_horizontal_line_1_2_scale_c(const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b;
+ unsigned char *des = dest;
+ const unsigned char *src = source;
+
+ (void) dest_width;
+
+ for (i = 0; i < source_width - 1; i += 1) {
a = src[0];
+ b = src[1];
des [0] = (unsigned char)(a);
- des [1] = (unsigned char)(a);
+ des [1] = (unsigned char)((a + b + 1) >> 1);
+ src += 1;
+ des += 2;
+ }
+
+ a = src[0];
+ des [0] = (unsigned char)(a);
+ des [1] = (unsigned char)(a);
}
/****************************************************************************
@@ -668,21 +643,21 @@ void vp8_horizontal_line_1_2_scale_c
* the current band.
*
****************************************************************************/
-void vp8_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- unsigned int i;
- unsigned int a, b;
- unsigned char *des = dest;
+void vp8_vertical_band_1_2_scale_c(unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b;
+ unsigned char *des = dest;
- for (i = 0; i < dest_width; i++)
- {
- a = des [0];
- b = des [dest_pitch * 2];
+ for (i = 0; i < dest_width; i++) {
+ a = des [0];
+ b = des [dest_pitch * 2];
- des[dest_pitch] = (unsigned char)((a + b + 1) >> 1);
+ des[dest_pitch] = (unsigned char)((a + b + 1) >> 1);
- des++;
- }
+ des++;
+ }
}
/****************************************************************************
@@ -705,16 +680,16 @@ void vp8_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch,
* last band.
*
****************************************************************************/
-void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- unsigned int i;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; ++i)
- {
- des[dest_pitch] = des[0];
- des++;
- }
+void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned char *des = dest;
+
+ for (i = 0; i < dest_width; ++i) {
+ des[dest_pitch] = des[0];
+ des++;
+ }
}
@@ -740,67 +715,64 @@ void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_p
* SPECIAL NOTES : None.
*
****************************************************************************/
-void vp8_horizontal_line_5_4_scale_c
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- unsigned i;
- unsigned int a, b, c, d, e;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for (i = 0; i < source_width; i += 5)
- {
- a = src[0];
- b = src[1];
- c = src[2];
- d = src[3];
- e = src[4];
-
- des[0] = (unsigned char) a;
- des[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8);
- des[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8);
- des[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8);
-
- src += 5;
- des += 4;
- }
+void vp8_horizontal_line_5_4_scale_c(const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width) {
+ unsigned i;
+ unsigned int a, b, c, d, e;
+ unsigned char *des = dest;
+ const unsigned char *src = source;
+
+ (void) dest_width;
+
+ for (i = 0; i < source_width; i += 5) {
+ a = src[0];
+ b = src[1];
+ c = src[2];
+ d = src[3];
+ e = src[4];
+
+ des[0] = (unsigned char) a;
+ des[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8);
+ des[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8);
+ des[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8);
+
+ src += 5;
+ des += 4;
+ }
}
-void vp8_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- unsigned int i;
- unsigned int a, b, c, d, e;
- unsigned char *des = dest;
- unsigned char *src = source;
+void vp8_vertical_band_5_4_scale_c(unsigned char *source,
+ unsigned int src_pitch,
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b, c, d, e;
+ unsigned char *des = dest;
+ unsigned char *src = source;
- for (i = 0; i < dest_width; i++)
- {
+ for (i = 0; i < dest_width; i++) {
- a = src[0 * src_pitch];
- b = src[1 * src_pitch];
- c = src[2 * src_pitch];
- d = src[3 * src_pitch];
- e = src[4 * src_pitch];
+ a = src[0 * src_pitch];
+ b = src[1 * src_pitch];
+ c = src[2 * src_pitch];
+ d = src[3 * src_pitch];
+ e = src[4 * src_pitch];
- des[0 * dest_pitch] = (unsigned char) a;
- des[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8);
- des[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8);
- des[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8);
+ des[0 * dest_pitch] = (unsigned char) a;
+ des[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8);
+ des[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8);
+ des[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8);
- src ++;
- des ++;
+ src++;
+ des++;
- }
+ }
}
@@ -824,63 +796,60 @@ void vp8_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch
*
*
****************************************************************************/
-void vp8_horizontal_line_5_3_scale_c
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- unsigned int i;
- unsigned int a, b, c, d , e;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for (i = 0; i < source_width; i += 5)
- {
- a = src[0];
- b = src[1];
- c = src[2];
- d = src[3];
- e = src[4];
-
- des[0] = (unsigned char) a;
- des[1] = (unsigned char)((b * 85 + c * 171 + 128) >> 8);
- des[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8);
-
- src += 5;
- des += 3;
- }
+void vp8_horizontal_line_5_3_scale_c(const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b, c, d, e;
+ unsigned char *des = dest;
+ const unsigned char *src = source;
+
+ (void) dest_width;
+
+ for (i = 0; i < source_width; i += 5) {
+ a = src[0];
+ b = src[1];
+ c = src[2];
+ d = src[3];
+ e = src[4];
+
+ des[0] = (unsigned char) a;
+ des[1] = (unsigned char)((b * 85 + c * 171 + 128) >> 8);
+ des[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8);
+
+ src += 5;
+ des += 3;
+ }
}
-void vp8_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- unsigned int i;
- unsigned int a, b, c, d, e;
- unsigned char *des = dest;
- unsigned char *src = source;
+void vp8_vertical_band_5_3_scale_c(unsigned char *source,
+ unsigned int src_pitch,
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a, b, c, d, e;
+ unsigned char *des = dest;
+ unsigned char *src = source;
- for (i = 0; i < dest_width; i++)
- {
+ for (i = 0; i < dest_width; i++) {
- a = src[0 * src_pitch];
- b = src[1 * src_pitch];
- c = src[2 * src_pitch];
- d = src[3 * src_pitch];
- e = src[4 * src_pitch];
+ a = src[0 * src_pitch];
+ b = src[1 * src_pitch];
+ c = src[2 * src_pitch];
+ d = src[3 * src_pitch];
+ e = src[4 * src_pitch];
- des[0 * dest_pitch] = (unsigned char) a;
- des[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8);
- des[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8);
+ des[0 * dest_pitch] = (unsigned char) a;
+ des[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8);
+ des[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8);
- src ++;
- des ++;
+ src++;
+ des++;
- }
+ }
}
/****************************************************************************
@@ -902,55 +871,52 @@ void vp8_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch
* SPECIAL NOTES : None.
*
****************************************************************************/
-void vp8_horizontal_line_2_1_scale_c
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- unsigned int i;
- unsigned int a;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for (i = 0; i < source_width; i += 2)
- {
- a = src[0];
- des [0] = (unsigned char)(a);
- src += 2;
- des += 1;
- }
-
-
-
-}
-void vp8_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- (void) dest_pitch;
- (void) src_pitch;
- vpx_memcpy(dest, source, dest_width);
+void vp8_horizontal_line_2_1_scale_c(const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width) {
+ unsigned int i;
+ unsigned int a;
+ unsigned char *des = dest;
+ const unsigned char *src = source;
+
+ (void) dest_width;
+
+ for (i = 0; i < source_width; i += 2) {
+ a = src[0];
+ des [0] = (unsigned char)(a);
+ src += 2;
+ des += 1;
+ }
}
-void vp8_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- int i;
- int temp;
- int width = dest_width;
-
- (void) dest_pitch;
-
- for (i = 0; i < width; i++)
- {
- temp = 8;
- temp += source[i-(int)src_pitch] * 3;
- temp += source[i] * 10;
- temp += source[i+src_pitch] * 3;
- temp >>= 4 ;
- dest[i] = (unsigned char)(temp);
- }
+void vp8_vertical_band_2_1_scale_c(unsigned char *source,
+ unsigned int src_pitch,
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ (void) dest_pitch;
+ (void) src_pitch;
+ vpx_memcpy(dest, source, dest_width);
+}
+void vp8_vertical_band_2_1_scale_i_c(unsigned char *source,
+ unsigned int src_pitch,
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width) {
+ int i;
+ int temp;
+ int width = dest_width;
+
+ (void) dest_pitch;
+
+ for (i = 0; i < width; i++) {
+ temp = 8;
+ temp += source[i - (int)src_pitch] * 3;
+ temp += source[i] * 10;
+ temp += source[i + src_pitch] * 3;
+ temp >>= 4;
+ dest[i] = (unsigned char)(temp);
+ }
}
diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c
index c02e4ff..7de85ca 100644
--- a/vpx_scale/generic/vpxscale.c
+++ b/vpx_scale/generic/vpxscale.c
@@ -20,23 +20,22 @@
/****************************************************************************
* Header Files
****************************************************************************/
-#include "vpx_rtcd.h"
+#include "./vpx_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_scale/yv12config.h"
#include "vpx_scale/scale_mode.h"
-typedef struct
-{
- int expanded_frame_width;
- int expanded_frame_height;
+typedef struct {
+ int expanded_frame_width;
+ int expanded_frame_height;
- int HScale;
- int HRatio;
- int VScale;
- int VRatio;
+ int HScale;
+ int HRatio;
+ int VScale;
+ int VRatio;
- YV12_BUFFER_CONFIG *src_yuv_config;
- YV12_BUFFER_CONFIG *dst_yuv_config;
+ YV12_BUFFER_CONFIG *src_yuv_config;
+ YV12_BUFFER_CONFIG *dst_yuv_config;
} SCALE_VARS;
@@ -60,15 +59,14 @@ typedef struct
****************************************************************************/
static
void horizontal_line_copy(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- (void) dest_width;
-
- duck_memcpy(dest, source, source_width);
+ const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width
+) {
+ (void) dest_width;
+
+ duck_memcpy(dest, source, source_width);
}
/****************************************************************************
*
@@ -90,16 +88,15 @@ void horizontal_line_copy(
****************************************************************************/
static
void null_scale(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-)
-{
- (void) dest;
- (void) dest_pitch;
- (void) dest_width;
-
- return;
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width
+) {
+ (void) dest;
+ (void) dest_pitch;
+ (void) dest_width;
+
+ return;
}
/****************************************************************************
@@ -127,35 +124,33 @@ void null_scale(
static
void scale1d_2t1_i
(
- const unsigned char *source,
- int source_step,
- unsigned int source_scale,
- unsigned int source_length,
- unsigned char *dest,
- int dest_step,
- unsigned int dest_scale,
- unsigned int dest_length
-)
-{
- unsigned int i, j;
- unsigned int temp;
- int source_pitch = source_step;
- (void) source_length;
- (void) source_scale;
- (void) dest_scale;
-
- source_step *= 2;
- dest[0] = source[0];
-
- for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step)
- {
- temp = 8;
- temp += 3 * source[j-source_pitch];
- temp += 10 * source[j];
- temp += 3 * source[j+source_pitch];
- temp >>= 4;
- dest[i] = (char)(temp);
- }
+ const unsigned char *source,
+ int source_step,
+ unsigned int source_scale,
+ unsigned int source_length,
+ unsigned char *dest,
+ int dest_step,
+ unsigned int dest_scale,
+ unsigned int dest_length
+) {
+ unsigned int i, j;
+ unsigned int temp;
+ int source_pitch = source_step;
+ (void) source_length;
+ (void) source_scale;
+ (void) dest_scale;
+
+ source_step *= 2;
+ dest[0] = source[0];
+
+ for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step) {
+ temp = 8;
+ temp += 3 * source[j - source_pitch];
+ temp += 10 * source[j];
+ temp += 3 * source[j + source_pitch];
+ temp >>= 4;
+ dest[i] = (char)(temp);
+ }
}
/****************************************************************************
@@ -183,27 +178,26 @@ void scale1d_2t1_i
static
void scale1d_2t1_ps
(
- const unsigned char *source,
- int source_step,
- unsigned int source_scale,
- unsigned int source_length,
- unsigned char *dest,
- int dest_step,
- unsigned int dest_scale,
- unsigned int dest_length
-)
-{
- unsigned int i, j;
-
- (void) source_length;
- (void) source_scale;
- (void) dest_scale;
-
- source_step *= 2;
- j = 0;
-
- for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step)
- dest[i] = source[j];
+ const unsigned char *source,
+ int source_step,
+ unsigned int source_scale,
+ unsigned int source_length,
+ unsigned char *dest,
+ int dest_step,
+ unsigned int dest_scale,
+ unsigned int dest_length
+) {
+ unsigned int i, j;
+
+ (void) source_length;
+ (void) source_scale;
+ (void) dest_scale;
+
+ source_step *= 2;
+ j = 0;
+
+ for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step)
+ dest[i] = source[j];
}
/****************************************************************************
*
@@ -230,45 +224,42 @@ void scale1d_2t1_ps
static
void scale1d_c
(
- const unsigned char *source,
- int source_step,
- unsigned int source_scale,
- unsigned int source_length,
- unsigned char *dest,
- int dest_step,
- unsigned int dest_scale,
- unsigned int dest_length
-)
-{
- unsigned int i;
- unsigned int round_value = dest_scale / 2;
- unsigned int left_modifier = dest_scale;
- unsigned int right_modifier = 0;
- unsigned char left_pixel = *source;
- unsigned char right_pixel = *(source + source_step);
-
- (void) source_length;
-
- /* These asserts are needed if there are boundary issues... */
- /*assert ( dest_scale > source_scale );*/
- /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/
-
- for (i = 0; i < dest_length * dest_step; i += dest_step)
- {
- dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale);
-
- right_modifier += source_scale;
-
- while (right_modifier > dest_scale)
- {
- right_modifier -= dest_scale;
- source += source_step;
- left_pixel = *source;
- right_pixel = *(source + source_step);
- }
-
- left_modifier = dest_scale - right_modifier;
+ const unsigned char *source,
+ int source_step,
+ unsigned int source_scale,
+ unsigned int source_length,
+ unsigned char *dest,
+ int dest_step,
+ unsigned int dest_scale,
+ unsigned int dest_length
+) {
+ unsigned int i;
+ unsigned int round_value = dest_scale / 2;
+ unsigned int left_modifier = dest_scale;
+ unsigned int right_modifier = 0;
+ unsigned char left_pixel = *source;
+ unsigned char right_pixel = *(source + source_step);
+
+ (void) source_length;
+
+ /* These asserts are needed if there are boundary issues... */
+ /*assert ( dest_scale > source_scale );*/
+ /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/
+
+ for (i = 0; i < dest_length * dest_step; i += dest_step) {
+ dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale);
+
+ right_modifier += source_scale;
+
+ while (right_modifier > dest_scale) {
+ right_modifier -= dest_scale;
+ source += source_step;
+ left_pixel = *source;
+ right_pixel = *(source + source_step);
}
+
+ left_modifier = dest_scale - right_modifier;
+ }
}
/****************************************************************************
@@ -304,246 +295,221 @@ void scale1d_c
static
void Scale2D
(
- /*const*/
- unsigned char *source,
- int source_pitch,
- unsigned int source_width,
- unsigned int source_height,
- unsigned char *dest,
- int dest_pitch,
- unsigned int dest_width,
- unsigned int dest_height,
- unsigned char *temp_area,
- unsigned char temp_area_height,
- unsigned int hscale,
- unsigned int hratio,
- unsigned int vscale,
- unsigned int vratio,
- unsigned int interlaced
-)
-{
- /*unsigned*/
- int i, j, k;
- int bands;
- int dest_band_height;
- int source_band_height;
-
- typedef void (*Scale1D)(const unsigned char * source, int source_step, unsigned int source_scale, unsigned int source_length,
- unsigned char * dest, int dest_step, unsigned int dest_scale, unsigned int dest_length);
-
- Scale1D Scale1Dv = scale1d_c;
- Scale1D Scale1Dh = scale1d_c;
-
- void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL;
- void (*vert_band_scale)(unsigned char *, unsigned int, unsigned char *, unsigned int, unsigned int) = NULL;
-
- int ratio_scalable = 1;
- int interpolation = 0;
-
- unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */
- unsigned char *line_src;
-
-
- source_base = (unsigned char *)source;
-
- if (source_pitch < 0)
- {
- int offset;
-
- offset = (source_height - 1);
- offset *= source_pitch;
-
- source_base += offset;
- }
-
- /* find out the ratio for each direction */
- switch (hratio * 10 / hscale)
- {
+ /*const*/
+ unsigned char *source,
+ int source_pitch,
+ unsigned int source_width,
+ unsigned int source_height,
+ unsigned char *dest,
+ int dest_pitch,
+ unsigned int dest_width,
+ unsigned int dest_height,
+ unsigned char *temp_area,
+ unsigned char temp_area_height,
+ unsigned int hscale,
+ unsigned int hratio,
+ unsigned int vscale,
+ unsigned int vratio,
+ unsigned int interlaced
+) {
+ /*unsigned*/
+ int i, j, k;
+ int bands;
+ int dest_band_height;
+ int source_band_height;
+
+ typedef void (*Scale1D)(const unsigned char * source, int source_step, unsigned int source_scale, unsigned int source_length,
+ unsigned char * dest, int dest_step, unsigned int dest_scale, unsigned int dest_length);
+
+ Scale1D Scale1Dv = scale1d_c;
+ Scale1D Scale1Dh = scale1d_c;
+
+ void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL;
+ void (*vert_band_scale)(unsigned char *, unsigned int, unsigned char *, unsigned int, unsigned int) = NULL;
+
+ int ratio_scalable = 1;
+ int interpolation = 0;
+
+ unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */
+ unsigned char *line_src;
+
+
+ source_base = (unsigned char *)source;
+
+ if (source_pitch < 0) {
+ int offset;
+
+ offset = (source_height - 1);
+ offset *= source_pitch;
+
+ source_base += offset;
+ }
+
+ /* find out the ratio for each direction */
+ switch (hratio * 10 / hscale) {
case 8:
- /* 4-5 Scale in Width direction */
- horiz_line_scale = vp8_horizontal_line_5_4_scale;
- break;
+ /* 4-5 Scale in Width direction */
+ horiz_line_scale = vp8_horizontal_line_5_4_scale;
+ break;
case 6:
- /* 3-5 Scale in Width direction */
- horiz_line_scale = vp8_horizontal_line_5_3_scale;
- break;
+ /* 3-5 Scale in Width direction */
+ horiz_line_scale = vp8_horizontal_line_5_3_scale;
+ break;
case 5:
- /* 1-2 Scale in Width direction */
- horiz_line_scale = vp8_horizontal_line_2_1_scale;
- break;
+ /* 1-2 Scale in Width direction */
+ horiz_line_scale = vp8_horizontal_line_2_1_scale;
+ break;
default:
- /* The ratio is not acceptable now */
- /* throw("The ratio is not acceptable for now!"); */
- ratio_scalable = 0;
- break;
- }
+ /* The ratio is not acceptable now */
+ /* throw("The ratio is not acceptable for now!"); */
+ ratio_scalable = 0;
+ break;
+ }
- switch (vratio * 10 / vscale)
- {
+ switch (vratio * 10 / vscale) {
case 8:
- /* 4-5 Scale in vertical direction */
- vert_band_scale = vp8_vertical_band_5_4_scale;
- source_band_height = 5;
- dest_band_height = 4;
- break;
+ /* 4-5 Scale in vertical direction */
+ vert_band_scale = vp8_vertical_band_5_4_scale;
+ source_band_height = 5;
+ dest_band_height = 4;
+ break;
case 6:
- /* 3-5 Scale in vertical direction */
- vert_band_scale = vp8_vertical_band_5_3_scale;
- source_band_height = 5;
- dest_band_height = 3;
- break;
+ /* 3-5 Scale in vertical direction */
+ vert_band_scale = vp8_vertical_band_5_3_scale;
+ source_band_height = 5;
+ dest_band_height = 3;
+ break;
case 5:
- /* 1-2 Scale in vertical direction */
+ /* 1-2 Scale in vertical direction */
- if (interlaced)
- {
- /* if the content is interlaced, point sampling is used */
- vert_band_scale = vp8_vertical_band_2_1_scale;
- }
- else
- {
+ if (interlaced) {
+ /* if the content is interlaced, point sampling is used */
+ vert_band_scale = vp8_vertical_band_2_1_scale;
+ } else {
- interpolation = 1;
- /* if the content is progressive, interplo */
- vert_band_scale = vp8_vertical_band_2_1_scale_i;
+ interpolation = 1;
+ /* if the content is progressive, interplo */
+ vert_band_scale = vp8_vertical_band_2_1_scale_i;
- }
+ }
- source_band_height = 2;
- dest_band_height = 1;
- break;
+ source_band_height = 2;
+ dest_band_height = 1;
+ break;
default:
- /* The ratio is not acceptable now */
- /* throw("The ratio is not acceptable for now!"); */
- ratio_scalable = 0;
- break;
+ /* The ratio is not acceptable now */
+ /* throw("The ratio is not acceptable for now!"); */
+ ratio_scalable = 0;
+ break;
+ }
+
+ if (ratio_scalable) {
+ if (source_height == dest_height) {
+ /* for each band of the image */
+ for (k = 0; k < (int)dest_height; k++) {
+ horiz_line_scale(source, source_width, dest, dest_width);
+ source += source_pitch;
+ dest += dest_pitch;
+ }
+
+ return;
}
- if (ratio_scalable)
- {
- if (source_height == dest_height)
- {
- /* for each band of the image */
- for (k = 0; k < (int)dest_height; k++)
- {
- horiz_line_scale(source, source_width, dest, dest_width);
- source += source_pitch;
- dest += dest_pitch;
- }
-
- return;
- }
-
- if (interpolation)
- {
- if (source < source_base)
- source = source_base;
-
- horiz_line_scale(source, source_width, temp_area, dest_width);
- }
-
- for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++)
- {
- /* scale one band horizontally */
- for (i = 0; i < source_band_height; i++)
- {
- /* Trap case where we could read off the base of the source buffer */
-
- line_src = (unsigned char *)source + i * source_pitch;
-
- if (line_src < source_base)
- line_src = source_base;
-
- horiz_line_scale(line_src, source_width,
- temp_area + (i + 1)*dest_pitch, dest_width);
- }
-
- /* Vertical scaling is in place */
- vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width);
-
- if (interpolation)
- vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width);
-
- /* Next band... */
- source += (unsigned long) source_band_height * source_pitch;
- dest += (unsigned long) dest_band_height * dest_pitch;
- }
-
- return;
+ if (interpolation) {
+ if (source < source_base)
+ source = source_base;
+
+ horiz_line_scale(source, source_width, temp_area, dest_width);
}
- if (hscale == 2 && hratio == 1)
- Scale1Dh = scale1d_2t1_ps;
+ for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++) {
+ /* scale one band horizontally */
+ for (i = 0; i < source_band_height; i++) {
+ /* Trap case where we could read off the base of the source buffer */
- if (vscale == 2 && vratio == 1)
- {
- if (interlaced)
- Scale1Dv = scale1d_2t1_ps;
- else
- Scale1Dv = scale1d_2t1_i;
- }
+ line_src = (unsigned char *)source + i * source_pitch;
- if (source_height == dest_height)
- {
- /* for each band of the image */
- for (k = 0; k < (int)dest_height; k++)
- {
- Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width);
- source += source_pitch;
- dest += dest_pitch;
- }
-
- return;
- }
+ if (line_src < source_base)
+ line_src = source_base;
+
+ horiz_line_scale(line_src, source_width,
+ temp_area + (i + 1)*dest_pitch, dest_width);
+ }
+
+ /* Vertical scaling is in place */
+ vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width);
- if (dest_height > source_height)
- {
- dest_band_height = temp_area_height - 1;
- source_band_height = dest_band_height * source_height / dest_height;
+ if (interpolation)
+ vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width);
+
+ /* Next band... */
+ source += (unsigned long) source_band_height * source_pitch;
+ dest += (unsigned long) dest_band_height * dest_pitch;
}
+
+ return;
+ }
+
+ if (hscale == 2 && hratio == 1)
+ Scale1Dh = scale1d_2t1_ps;
+
+ if (vscale == 2 && vratio == 1) {
+ if (interlaced)
+ Scale1Dv = scale1d_2t1_ps;
else
- {
- source_band_height = temp_area_height - 1;
- dest_band_height = source_band_height * vratio / vscale;
+ Scale1Dv = scale1d_2t1_i;
+ }
+
+ if (source_height == dest_height) {
+ /* for each band of the image */
+ for (k = 0; k < (int)dest_height; k++) {
+ Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width);
+ source += source_pitch;
+ dest += dest_pitch;
}
- /* first row needs to be done so that we can stay one row ahead for vertical zoom */
- Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width);
+ return;
+ }
+
+ if (dest_height > source_height) {
+ dest_band_height = temp_area_height - 1;
+ source_band_height = dest_band_height * source_height / dest_height;
+ } else {
+ source_band_height = temp_area_height - 1;
+ dest_band_height = source_band_height * vratio / vscale;
+ }
- /* for each band of the image */
- bands = (dest_height + dest_band_height - 1) / dest_band_height;
-
- for (k = 0; k < bands; k++)
- {
- /* scale one band horizontally */
- for (i = 1; i < source_band_height + 1; i++)
- {
- if (k * source_band_height + i < (int) source_height)
- {
- Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1,
- temp_area + i * dest_pitch, 1, hratio, dest_width);
- }
- else /* Duplicate the last row */
- {
- /* copy temp_area row 0 over from last row in the past */
- duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch);
- }
- }
-
- /* scale one band vertically */
- for (j = 0; j < (int)dest_width; j++)
- {
- Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1,
- &dest[j], dest_pitch, vratio, dest_band_height);
- }
+ /* first row needs to be done so that we can stay one row ahead for vertical zoom */
+ Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width);
+ /* for each band of the image */
+ bands = (dest_height + dest_band_height - 1) / dest_band_height;
+
+ for (k = 0; k < bands; k++) {
+ /* scale one band horizontally */
+ for (i = 1; i < source_band_height + 1; i++) {
+ if (k * source_band_height + i < (int) source_height) {
+ Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1,
+ temp_area + i * dest_pitch, 1, hratio, dest_width);
+ } else { /* Duplicate the last row */
/* copy temp_area row 0 over from last row in the past */
- duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch);
+ duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch);
+ }
+ }
- /* move to the next band */
- source += source_band_height * source_pitch;
- dest += dest_band_height * dest_pitch;
+ /* scale one band vertically */
+ for (j = 0; j < (int)dest_width; j++) {
+ Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1,
+ &dest[j], dest_pitch, vratio, dest_band_height);
}
+
+ /* copy temp_area row 0 over from last row in the past */
+ duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch);
+
+ /* move to the next band */
+ source += source_band_height * source_pitch;
+ dest += dest_band_height * dest_pitch;
+ }
}
/****************************************************************************
@@ -572,57 +538,56 @@ void Scale2D
****************************************************************************/
void vp8_scale_frame
(
- YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst,
- unsigned char *temp_area,
- unsigned char temp_height,
- unsigned int hscale,
- unsigned int hratio,
- unsigned int vscale,
- unsigned int vratio,
- unsigned int interlaced
-)
-{
- int i;
- int dw = (hscale - 1 + src->y_width * hratio) / hscale;
- int dh = (vscale - 1 + src->y_height * vratio) / vscale;
-
- /* call our internal scaling routines!! */
- Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height,
- (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh,
- temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
-
- if (dw < (int)dst->y_width)
- for (i = 0; i < dh; i++)
- duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i*dst->y_stride+dw-2], dst->y_width - dw + 1);
-
- if (dh < (int)dst->y_height)
- for (i = dh - 1; i < (int)dst->y_height; i++)
- duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1);
-
- Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height,
- (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2,
- temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
-
- if (dw / 2 < (int)dst->uv_width)
- for (i = 0; i < dst->uv_height; i++)
- duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1);
-
- if (dh / 2 < (int)dst->uv_height)
- for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++)
- duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width);
-
- Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height,
- (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2,
- temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
-
- if (dw / 2 < (int)dst->uv_width)
- for (i = 0; i < dst->uv_height; i++)
- duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1);
-
- if (dh / 2 < (int) dst->uv_height)
- for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++)
- duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width);
+ YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst,
+ unsigned char *temp_area,
+ unsigned char temp_height,
+ unsigned int hscale,
+ unsigned int hratio,
+ unsigned int vscale,
+ unsigned int vratio,
+ unsigned int interlaced
+) {
+ int i;
+ int dw = (hscale - 1 + src->y_width * hratio) / hscale;
+ int dh = (vscale - 1 + src->y_height * vratio) / vscale;
+
+ /* call our internal scaling routines!! */
+ Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height,
+ (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh,
+ temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
+
+ if (dw < (int)dst->y_width)
+ for (i = 0; i < dh; i++)
+ duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i * dst->y_stride + dw - 2], dst->y_width - dw + 1);
+
+ if (dh < (int)dst->y_height)
+ for (i = dh - 1; i < (int)dst->y_height; i++)
+ duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1);
+
+ Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height,
+ (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2,
+ temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
+
+ if (dw / 2 < (int)dst->uv_width)
+ for (i = 0; i < dst->uv_height; i++)
+ duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1);
+
+ if (dh / 2 < (int)dst->uv_height)
+ for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++)
+ duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width);
+
+ Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height,
+ (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2,
+ temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
+
+ if (dw / 2 < (int)dst->uv_width)
+ for (i = 0; i < dst->uv_height; i++)
+ duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1);
+
+ if (dh / 2 < (int) dst->uv_height)
+ for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++)
+ duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width);
}
/****************************************************************************
*
@@ -651,183 +616,177 @@ void vp8_scale_frame
static
int any_ratio_2d_scale
(
- SCALE_VARS *si,
- const unsigned char *source,
- int source_pitch,
- unsigned int source_width,
- unsigned int source_height,
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width,
- unsigned int dest_height
-)
-{
- unsigned int i, k;
- unsigned int src_band_height = 0;
- unsigned int dest_band_height = 0;
-
- /* suggested scale factors */
- int hs = si->HScale;
- int hr = si->HRatio;
- int vs = si->VScale;
- int vr = si->VRatio;
-
- /* assume the ratios are scalable instead of should be centered */
- int ratio_scalable = 1;
-
- const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch)));
- const unsigned char *line_src;
-
- void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL;
- void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL;
- void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL;
-
- (void) si;
-
- /* find out the ratio for each direction */
- switch (hr * 30 / hs)
- {
+ SCALE_VARS *si,
+ const unsigned char *source,
+ int source_pitch,
+ unsigned int source_width,
+ unsigned int source_height,
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width,
+ unsigned int dest_height
+) {
+ unsigned int i, k;
+ unsigned int src_band_height = 0;
+ unsigned int dest_band_height = 0;
+
+ /* suggested scale factors */
+ int hs = si->HScale;
+ int hr = si->HRatio;
+ int vs = si->VScale;
+ int vr = si->VRatio;
+
+ /* assume the ratios are scalable instead of should be centered */
+ int ratio_scalable = 1;
+
+ const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch)));
+ const unsigned char *line_src;
+
+ void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL;
+ void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL;
+ void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL;
+
+ (void) si;
+
+ /* find out the ratio for each direction */
+ switch (hr * 30 / hs) {
case 24:
- /* 4-5 Scale in Width direction */
- horiz_line_scale = vp8_horizontal_line_4_5_scale;
- break;
+ /* 4-5 Scale in Width direction */
+ horiz_line_scale = vp8_horizontal_line_4_5_scale;
+ break;
case 22:
- /* 3-4 Scale in Width direction */
- horiz_line_scale = vp8_horizontal_line_3_4_scale;
- break;
+ /* 3-4 Scale in Width direction */
+ horiz_line_scale = vp8_horizontal_line_3_4_scale;
+ break;
case 20:
- /* 4-5 Scale in Width direction */
- horiz_line_scale = vp8_horizontal_line_2_3_scale;
- break;
+ /* 4-5 Scale in Width direction */
+ horiz_line_scale = vp8_horizontal_line_2_3_scale;
+ break;
case 18:
- /* 3-5 Scale in Width direction */
- horiz_line_scale = vp8_horizontal_line_3_5_scale;
- break;
+ /* 3-5 Scale in Width direction */
+ horiz_line_scale = vp8_horizontal_line_3_5_scale;
+ break;
case 15:
- /* 1-2 Scale in Width direction */
- horiz_line_scale = vp8_horizontal_line_1_2_scale;
- break;
+ /* 1-2 Scale in Width direction */
+ horiz_line_scale = vp8_horizontal_line_1_2_scale;
+ break;
case 30:
- /* no scale in Width direction */
- horiz_line_scale = horizontal_line_copy;
- break;
+ /* no scale in Width direction */
+ horiz_line_scale = horizontal_line_copy;
+ break;
default:
- /* The ratio is not acceptable now */
- /* throw("The ratio is not acceptable for now!"); */
- ratio_scalable = 0;
- break;
- }
+ /* The ratio is not acceptable now */
+ /* throw("The ratio is not acceptable for now!"); */
+ ratio_scalable = 0;
+ break;
+ }
- switch (vr * 30 / vs)
- {
+ switch (vr * 30 / vs) {
case 24:
- /* 4-5 Scale in vertical direction */
- vert_band_scale = vp8_vertical_band_4_5_scale;
- last_vert_band_scale = vp8_last_vertical_band_4_5_scale;
- src_band_height = 4;
- dest_band_height = 5;
- break;
+ /* 4-5 Scale in vertical direction */
+ vert_band_scale = vp8_vertical_band_4_5_scale;
+ last_vert_band_scale = vp8_last_vertical_band_4_5_scale;
+ src_band_height = 4;
+ dest_band_height = 5;
+ break;
case 22:
- /* 3-4 Scale in vertical direction */
- vert_band_scale = vp8_vertical_band_3_4_scale;
- last_vert_band_scale = vp8_last_vertical_band_3_4_scale;
- src_band_height = 3;
- dest_band_height = 4;
- break;
+ /* 3-4 Scale in vertical direction */
+ vert_band_scale = vp8_vertical_band_3_4_scale;
+ last_vert_band_scale = vp8_last_vertical_band_3_4_scale;
+ src_band_height = 3;
+ dest_band_height = 4;
+ break;
case 20:
- /* 2-3 Scale in vertical direction */
- vert_band_scale = vp8_vertical_band_2_3_scale;
- last_vert_band_scale = vp8_last_vertical_band_2_3_scale;
- src_band_height = 2;
- dest_band_height = 3;
- break;
+ /* 2-3 Scale in vertical direction */
+ vert_band_scale = vp8_vertical_band_2_3_scale;
+ last_vert_band_scale = vp8_last_vertical_band_2_3_scale;
+ src_band_height = 2;
+ dest_band_height = 3;
+ break;
case 18:
- /* 3-5 Scale in vertical direction */
- vert_band_scale = vp8_vertical_band_3_5_scale;
- last_vert_band_scale = vp8_last_vertical_band_3_5_scale;
- src_band_height = 3;
- dest_band_height = 5;
- break;
+ /* 3-5 Scale in vertical direction */
+ vert_band_scale = vp8_vertical_band_3_5_scale;
+ last_vert_band_scale = vp8_last_vertical_band_3_5_scale;
+ src_band_height = 3;
+ dest_band_height = 5;
+ break;
case 15:
- /* 1-2 Scale in vertical direction */
- vert_band_scale = vp8_vertical_band_1_2_scale;
- last_vert_band_scale = vp8_last_vertical_band_1_2_scale;
- src_band_height = 1;
- dest_band_height = 2;
- break;
+ /* 1-2 Scale in vertical direction */
+ vert_band_scale = vp8_vertical_band_1_2_scale;
+ last_vert_band_scale = vp8_last_vertical_band_1_2_scale;
+ src_band_height = 1;
+ dest_band_height = 2;
+ break;
case 30:
- /* no scale in Width direction */
- vert_band_scale = null_scale;
- last_vert_band_scale = null_scale;
- src_band_height = 4;
- dest_band_height = 4;
- break;
+ /* no scale in Width direction */
+ vert_band_scale = null_scale;
+ last_vert_band_scale = null_scale;
+ src_band_height = 4;
+ dest_band_height = 4;
+ break;
default:
- /* The ratio is not acceptable now */
- /* throw("The ratio is not acceptable for now!"); */
- ratio_scalable = 0;
- break;
- }
+ /* The ratio is not acceptable now */
+ /* throw("The ratio is not acceptable for now!"); */
+ ratio_scalable = 0;
+ break;
+ }
- if (ratio_scalable == 0)
- return ratio_scalable;
+ if (ratio_scalable == 0)
+ return ratio_scalable;
- horiz_line_scale(source, source_width, dest, dest_width);
+ horiz_line_scale(source, source_width, dest, dest_width);
- /* except last band */
- for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++)
- {
- /* scale one band horizontally */
- for (i = 1; i < src_band_height; i++)
- {
- /* Trap case where we could read off the base of the source buffer */
- line_src = source + i * source_pitch;
+ /* except last band */
+ for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++) {
+ /* scale one band horizontally */
+ for (i = 1; i < src_band_height; i++) {
+ /* Trap case where we could read off the base of the source buffer */
+ line_src = source + i * source_pitch;
- if (line_src < source_base)
- line_src = source_base;
+ if (line_src < source_base)
+ line_src = source_base;
- horiz_line_scale(line_src, source_width,
- dest + i * dest_pitch, dest_width);
- }
+ horiz_line_scale(line_src, source_width,
+ dest + i * dest_pitch, dest_width);
+ }
- /* first line of next band */
- /* Trap case where we could read off the base of the source buffer */
- line_src = source + src_band_height * source_pitch;
+ /* first line of next band */
+ /* Trap case where we could read off the base of the source buffer */
+ line_src = source + src_band_height * source_pitch;
- if (line_src < source_base)
- line_src = source_base;
+ if (line_src < source_base)
+ line_src = source_base;
- horiz_line_scale(line_src, source_width,
- dest + dest_band_height * dest_pitch,
- dest_width);
+ horiz_line_scale(line_src, source_width,
+ dest + dest_band_height * dest_pitch,
+ dest_width);
- /* Vertical scaling is in place */
- vert_band_scale(dest, dest_pitch, dest_width);
+ /* Vertical scaling is in place */
+ vert_band_scale(dest, dest_pitch, dest_width);
- /* Next band... */
- source += src_band_height * source_pitch;
- dest += dest_band_height * dest_pitch;
- }
+ /* Next band... */
+ source += src_band_height * source_pitch;
+ dest += dest_band_height * dest_pitch;
+ }
- /* scale one band horizontally */
- for (i = 1; i < src_band_height; i++)
- {
- /* Trap case where we could read off the base of the source buffer */
- line_src = source + i * source_pitch;
+ /* scale one band horizontally */
+ for (i = 1; i < src_band_height; i++) {
+ /* Trap case where we could read off the base of the source buffer */
+ line_src = source + i * source_pitch;
- if (line_src < source_base)
- line_src = source_base;
+ if (line_src < source_base)
+ line_src = source_base;
- horiz_line_scale(line_src, source_width,
- dest + i * dest_pitch,
- dest_width);
- }
+ horiz_line_scale(line_src, source_width,
+ dest + i * dest_pitch,
+ dest_width);
+ }
- /* Vertical scaling is in place */
- last_vert_band_scale(dest, dest_pitch, dest_width);
+ /* Vertical scaling is in place */
+ last_vert_band_scale(dest, dest_pitch, dest_width);
- return ratio_scalable;
+ return ratio_scalable;
}
/****************************************************************************
@@ -849,70 +808,69 @@ int any_ratio_2d_scale
*
****************************************************************************/
static
-int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset)
-{
- int i;
- int ew;
- int eh;
-
- /* suggested scale factors */
- int hs = scale_vars->HScale;
- int hr = scale_vars->HRatio;
- int vs = scale_vars->VScale;
- int vr = scale_vars->VRatio;
-
- int ratio_scalable = 1;
-
- int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs;
- int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs;
- int dw = scale_vars->expanded_frame_width;
- int dh = scale_vars->expanded_frame_height;
- YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config;
- YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config;
-
- if (hr == 3)
- ew = (sw + 2) / 3 * 3 * hs / hr;
- else
- ew = (sw + 7) / 8 * 8 * hs / hr;
-
- if (vr == 3)
- eh = (sh + 2) / 3 * 3 * vs / vr;
- else
- eh = (sh + 7) / 8 * 8 * vs / vr;
-
- ratio_scalable = any_ratio_2d_scale(scale_vars,
- (const unsigned char *)src_yuv_config->y_buffer,
- src_yuv_config->y_stride, sw, sh,
- (unsigned char *) dst_yuv_config->y_buffer + YOffset,
- dst_yuv_config->y_stride, dw, dh);
-
- for (i = 0; i < eh; i++)
- duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw);
-
- for (i = dh; i < eh; i++)
- duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew);
-
- if (ratio_scalable == 0)
- return ratio_scalable;
+int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) {
+ int i;
+ int ew;
+ int eh;
+
+ /* suggested scale factors */
+ int hs = scale_vars->HScale;
+ int hr = scale_vars->HRatio;
+ int vs = scale_vars->VScale;
+ int vr = scale_vars->VRatio;
+
+ int ratio_scalable = 1;
+
+ int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs;
+ int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs;
+ int dw = scale_vars->expanded_frame_width;
+ int dh = scale_vars->expanded_frame_height;
+ YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config;
+ YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config;
+
+ if (hr == 3)
+ ew = (sw + 2) / 3 * 3 * hs / hr;
+ else
+ ew = (sw + 7) / 8 * 8 * hs / hr;
+
+ if (vr == 3)
+ eh = (sh + 2) / 3 * 3 * vs / vr;
+ else
+ eh = (sh + 7) / 8 * 8 * vs / vr;
+
+ ratio_scalable = any_ratio_2d_scale(scale_vars,
+ (const unsigned char *)src_yuv_config->y_buffer,
+ src_yuv_config->y_stride, sw, sh,
+ (unsigned char *) dst_yuv_config->y_buffer + YOffset,
+ dst_yuv_config->y_stride, dw, dh);
+
+ for (i = 0; i < eh; i++)
+ duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw);
+
+ for (i = dh; i < eh; i++)
+ duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew);
+
+ if (ratio_scalable == 0)
+ return ratio_scalable;
- sw = (sw + 1) >> 1;
- sh = (sh + 1) >> 1;
- dw = (dw + 1) >> 1;
- dh = (dh + 1) >> 1;
+ sw = (sw + 1) >> 1;
+ sh = (sh + 1) >> 1;
+ dw = (dw + 1) >> 1;
+ dh = (dh + 1) >> 1;
- any_ratio_2d_scale(scale_vars,
- (const unsigned char *)src_yuv_config->u_buffer,
- src_yuv_config->y_stride / 2, sw, sh,
- (unsigned char *)dst_yuv_config->u_buffer + UVOffset,
- dst_yuv_config->uv_stride, dw, dh);
+ any_ratio_2d_scale(scale_vars,
+ (const unsigned char *)src_yuv_config->u_buffer,
+ src_yuv_config->y_stride / 2, sw, sh,
+ (unsigned char *)dst_yuv_config->u_buffer + UVOffset,
+ dst_yuv_config->uv_stride, dw, dh);
- any_ratio_2d_scale(scale_vars,
- (const unsigned char *)src_yuv_config->v_buffer,
- src_yuv_config->y_stride / 2, sw, sh,
- (unsigned char *)dst_yuv_config->v_buffer + UVOffset,
- dst_yuv_config->uv_stride, dw, dh);
+ any_ratio_2d_scale(scale_vars,
+ (const unsigned char *)src_yuv_config->v_buffer,
+ src_yuv_config->y_stride / 2, sw, sh,
+ (unsigned char *)dst_yuv_config->v_buffer + UVOffset,
+ dst_yuv_config->uv_stride, dw, dh);
- return ratio_scalable;
+ return ratio_scalable;
}
/****************************************************************************
@@ -931,52 +889,48 @@ int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset)
*
****************************************************************************/
static void
-center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config)
-{
- int i;
- int row_offset, col_offset;
- unsigned char *src_data_pointer;
- unsigned char *dst_data_pointer;
-
- /* center values */
- row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2;
- col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2;
-
- /* Y's */
- src_data_pointer = src_yuv_config->y_buffer;
- dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset;
-
- for (i = 0; i < src_yuv_config->y_height; i++)
- {
- duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width);
- dst_data_pointer += dst_yuv_config->y_stride;
- src_data_pointer += src_yuv_config->y_stride;
- }
-
- row_offset /= 2;
- col_offset /= 2;
-
- /* U's */
- src_data_pointer = src_yuv_config->u_buffer;
- dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
-
- for (i = 0; i < src_yuv_config->uv_height; i++)
- {
- duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width);
- dst_data_pointer += dst_yuv_config->uv_stride;
- src_data_pointer += src_yuv_config->uv_stride;
- }
-
- /* V's */
- src_data_pointer = src_yuv_config->v_buffer;
- dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
-
- for (i = 0; i < src_yuv_config->uv_height; i++)
- {
- duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width);
- dst_data_pointer += dst_yuv_config->uv_stride;
- src_data_pointer += src_yuv_config->uv_stride;
- }
+center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config) {
+ int i;
+ int row_offset, col_offset;
+ unsigned char *src_data_pointer;
+ unsigned char *dst_data_pointer;
+
+ /* center values */
+ row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2;
+ col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2;
+
+ /* Y's */
+ src_data_pointer = src_yuv_config->y_buffer;
+ dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset;
+
+ for (i = 0; i < src_yuv_config->y_height; i++) {
+ duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width);
+ dst_data_pointer += dst_yuv_config->y_stride;
+ src_data_pointer += src_yuv_config->y_stride;
+ }
+
+ row_offset /= 2;
+ col_offset /= 2;
+
+ /* U's */
+ src_data_pointer = src_yuv_config->u_buffer;
+ dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
+
+ for (i = 0; i < src_yuv_config->uv_height; i++) {
+ duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width);
+ dst_data_pointer += dst_yuv_config->uv_stride;
+ src_data_pointer += src_yuv_config->uv_stride;
+ }
+
+ /* V's */
+ src_data_pointer = src_yuv_config->v_buffer;
+ dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
+
+ for (i = 0; i < src_yuv_config->uv_height; i++) {
+ duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width);
+ dst_data_pointer += dst_yuv_config->uv_stride;
+ src_data_pointer += src_yuv_config->uv_stride;
+ }
}
/****************************************************************************
@@ -999,61 +953,58 @@ center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_con
void
vp8_yv12_scale_or_center
(
- YV12_BUFFER_CONFIG *src_yuv_config,
- YV12_BUFFER_CONFIG *dst_yuv_config,
- int expanded_frame_width,
- int expanded_frame_height,
- int scaling_mode,
- int HScale,
- int HRatio,
- int VScale,
- int VRatio
-)
-{
- /*if ( ppi->post_processing_level )
- update_umvborder ( ppi, frame_buffer );*/
-
-
- switch (scaling_mode)
- {
+ YV12_BUFFER_CONFIG *src_yuv_config,
+ YV12_BUFFER_CONFIG *dst_yuv_config,
+ int expanded_frame_width,
+ int expanded_frame_height,
+ int scaling_mode,
+ int HScale,
+ int HRatio,
+ int VScale,
+ int VRatio
+) {
+ /*if ( ppi->post_processing_level )
+ update_umvborder ( ppi, frame_buffer );*/
+
+
+ switch (scaling_mode) {
case SCALE_TO_FIT:
- case MAINTAIN_ASPECT_RATIO:
- {
- SCALE_VARS scale_vars;
- /* center values */
+ case MAINTAIN_ASPECT_RATIO: {
+ SCALE_VARS scale_vars;
+ /* center values */
#if 1
- int row = (dst_yuv_config->y_height - expanded_frame_height) / 2;
- int col = (dst_yuv_config->y_width - expanded_frame_width) / 2;
- /*int YOffset = row * dst_yuv_config->y_width + col;
- int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/
- int YOffset = row * dst_yuv_config->y_stride + col;
- int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1);
+ int row = (dst_yuv_config->y_height - expanded_frame_height) / 2;
+ int col = (dst_yuv_config->y_width - expanded_frame_width) / 2;
+ /*int YOffset = row * dst_yuv_config->y_width + col;
+ int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/
+ int YOffset = row * dst_yuv_config->y_stride + col;
+ int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1);
#else
- int row = (src_yuv_config->y_height - expanded_frame_height) / 2;
- int col = (src_yuv_config->y_width - expanded_frame_width) / 2;
- int YOffset = row * src_yuv_config->y_width + col;
- int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1);
+ int row = (src_yuv_config->y_height - expanded_frame_height) / 2;
+ int col = (src_yuv_config->y_width - expanded_frame_width) / 2;
+ int YOffset = row * src_yuv_config->y_width + col;
+ int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1);
#endif
- scale_vars.dst_yuv_config = dst_yuv_config;
- scale_vars.src_yuv_config = src_yuv_config;
- scale_vars.HScale = HScale;
- scale_vars.HRatio = HRatio;
- scale_vars.VScale = VScale;
- scale_vars.VRatio = VRatio;
- scale_vars.expanded_frame_width = expanded_frame_width;
- scale_vars.expanded_frame_height = expanded_frame_height;
+ scale_vars.dst_yuv_config = dst_yuv_config;
+ scale_vars.src_yuv_config = src_yuv_config;
+ scale_vars.HScale = HScale;
+ scale_vars.HRatio = HRatio;
+ scale_vars.VScale = VScale;
+ scale_vars.VRatio = VRatio;
+ scale_vars.expanded_frame_width = expanded_frame_width;
+ scale_vars.expanded_frame_height = expanded_frame_height;
- /* perform center and scale */
- any_ratio_frame_scale(&scale_vars, YOffset, UVOffset);
+ /* perform center and scale */
+ any_ratio_frame_scale(&scale_vars, YOffset, UVOffset);
- break;
+ break;
}
case CENTER:
- center_image(src_yuv_config, dst_yuv_config);
- break;
+ center_image(src_yuv_config, dst_yuv_config);
+ break;
default:
- break;
- }
+ break;
+ }
}
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index eff594e..4cb2a41 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -20,81 +20,73 @@
*
****************************************************************************/
int
-vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf)
-{
- if (ybf)
- {
- vpx_free(ybf->buffer_alloc);
-
- /* buffer_alloc isn't accessed by most functions. Rather y_buffer,
- u_buffer and v_buffer point to buffer_alloc and are used. Clear out
- all of this so that a freed pointer isn't inadvertently used */
- vpx_memset (ybf, 0, sizeof (YV12_BUFFER_CONFIG));
- }
- else
- {
- return -1;
- }
-
- return 0;
+vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) {
+ if (ybf) {
+ vpx_free(ybf->buffer_alloc);
+
+ /* buffer_alloc isn't accessed by most functions. Rather y_buffer,
+ u_buffer and v_buffer point to buffer_alloc and are used. Clear out
+ all of this so that a freed pointer isn't inadvertently used */
+ vpx_memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG));
+ } else {
+ return -1;
+ }
+
+ return 0;
}
/****************************************************************************
*
****************************************************************************/
int
-vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border)
-{
-/*NOTE:*/
-
- if (ybf)
- {
- int y_stride = ((width + 2 * border) + 31) & ~31;
- int yplane_size = (height + 2 * border) * y_stride;
- int uv_width = width >> 1;
- int uv_height = height >> 1;
- /** There is currently a bunch of code which assumes
- * uv_stride == y_stride/2, so enforce this here. */
- int uv_stride = y_stride >> 1;
- int uvplane_size = (uv_height + border) * uv_stride;
-
- vp8_yv12_de_alloc_frame_buffer(ybf);
-
- /** Only support allocating buffers that have a height and width that
- * are multiples of 16, and a border that's a multiple of 32.
- * The border restriction is required to get 16-byte alignment of the
- * start of the chroma rows without intoducing an arbitrary gap
- * between planes, which would break the semantics of things like
- * vpx_img_set_rect(). */
- if ((width & 0xf) | (height & 0xf) | (border & 0x1f))
- return -3;
-
- ybf->y_width = width;
- ybf->y_height = height;
- ybf->y_stride = y_stride;
-
- ybf->uv_width = uv_width;
- ybf->uv_height = uv_height;
- ybf->uv_stride = uv_stride;
-
- ybf->border = border;
- ybf->frame_size = yplane_size + 2 * uvplane_size;
-
- ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size);
-
- if (ybf->buffer_alloc == NULL)
- return -1;
-
- ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border;
- ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2;
- ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2;
-
- ybf->corrupted = 0; /* assume not currupted by errors */
- }
- else
- {
- return -2;
- }
-
- return 0;
+vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) {
+ /*NOTE:*/
+
+ if (ybf) {
+ int y_stride = ((width + 2 * border) + 31) & ~31;
+ int yplane_size = (height + 2 * border) * y_stride;
+ int uv_width = width >> 1;
+ int uv_height = height >> 1;
+ /** There is currently a bunch of code which assumes
+ * uv_stride == y_stride/2, so enforce this here. */
+ int uv_stride = y_stride >> 1;
+ int uvplane_size = (uv_height + border) * uv_stride;
+
+ vp8_yv12_de_alloc_frame_buffer(ybf);
+
+ /** Only support allocating buffers that have a height and width that
+ * are multiples of 16, and a border that's a multiple of 32.
+ * The border restriction is required to get 16-byte alignment of the
+ * start of the chroma rows without intoducing an arbitrary gap
+ * between planes, which would break the semantics of things like
+ * vpx_img_set_rect(). */
+ if ((width & 0xf) | (height & 0xf) | (border & 0x1f))
+ return -3;
+
+ ybf->y_width = width;
+ ybf->y_height = height;
+ ybf->y_stride = y_stride;
+
+ ybf->uv_width = uv_width;
+ ybf->uv_height = uv_height;
+ ybf->uv_stride = uv_stride;
+
+ ybf->border = border;
+ ybf->frame_size = yplane_size + 2 * uvplane_size;
+
+ ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size);
+
+ if (ybf->buffer_alloc == NULL)
+ return -1;
+
+ ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border;
+ ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2;
+ ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2;
+
+ ybf->corrupted = 0; /* assume not currupted by errors */
+ } else {
+ return -2;
+ }
+
+ return 0;
}
diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c
index 638633b..247078c 100644
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c
@@ -21,184 +21,174 @@
*
****************************************************************************/
void
-vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf)
-{
- int i;
- unsigned char *src_ptr1, *src_ptr2;
- unsigned char *dest_ptr1, *dest_ptr2;
-
- unsigned int Border;
- int plane_stride;
- int plane_height;
- int plane_width;
-
- /***********/
- /* Y Plane */
- /***********/
- Border = ybf->border;
- plane_stride = ybf->y_stride;
- plane_height = ybf->y_height;
- plane_width = ybf->y_width;
-
- /* copy the left and right most columns out */
- src_ptr1 = ybf->y_buffer;
- src_ptr2 = src_ptr1 + plane_width - 1;
- dest_ptr1 = src_ptr1 - Border;
- dest_ptr2 = src_ptr2 + 1;
-
- for (i = 0; i < plane_height; i++)
- {
- vpx_memset(dest_ptr1, src_ptr1[0], Border);
- vpx_memset(dest_ptr2, src_ptr2[0], Border);
- src_ptr1 += plane_stride;
- src_ptr2 += plane_stride;
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- /* Now copy the top and bottom source lines into each line of the respective borders */
- src_ptr1 = ybf->y_buffer - Border;
- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
- dest_ptr1 = src_ptr1 - (Border * plane_stride);
- dest_ptr2 = src_ptr2 + plane_stride;
-
- for (i = 0; i < (int)Border; i++)
- {
- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
-
- /***********/
- /* U Plane */
- /***********/
- plane_stride = ybf->uv_stride;
- plane_height = ybf->uv_height;
- plane_width = ybf->uv_width;
- Border /= 2;
-
- /* copy the left and right most columns out */
- src_ptr1 = ybf->u_buffer;
- src_ptr2 = src_ptr1 + plane_width - 1;
- dest_ptr1 = src_ptr1 - Border;
- dest_ptr2 = src_ptr2 + 1;
-
- for (i = 0; i < plane_height; i++)
- {
- vpx_memset(dest_ptr1, src_ptr1[0], Border);
- vpx_memset(dest_ptr2, src_ptr2[0], Border);
- src_ptr1 += plane_stride;
- src_ptr2 += plane_stride;
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- /* Now copy the top and bottom source lines into each line of the respective borders */
- src_ptr1 = ybf->u_buffer - Border;
- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
- dest_ptr1 = src_ptr1 - (Border * plane_stride);
- dest_ptr2 = src_ptr2 + plane_stride;
-
- for (i = 0; i < (int)(Border); i++)
- {
- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- /***********/
- /* V Plane */
- /***********/
-
- /* copy the left and right most columns out */
- src_ptr1 = ybf->v_buffer;
- src_ptr2 = src_ptr1 + plane_width - 1;
- dest_ptr1 = src_ptr1 - Border;
- dest_ptr2 = src_ptr2 + 1;
-
- for (i = 0; i < plane_height; i++)
- {
- vpx_memset(dest_ptr1, src_ptr1[0], Border);
- vpx_memset(dest_ptr2, src_ptr2[0], Border);
- src_ptr1 += plane_stride;
- src_ptr2 += plane_stride;
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- /* Now copy the top and bottom source lines into each line of the respective borders */
- src_ptr1 = ybf->v_buffer - Border;
- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
- dest_ptr1 = src_ptr1 - (Border * plane_stride);
- dest_ptr2 = src_ptr2 + plane_stride;
-
- for (i = 0; i < (int)(Border); i++)
- {
- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
+vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) {
+ int i;
+ unsigned char *src_ptr1, *src_ptr2;
+ unsigned char *dest_ptr1, *dest_ptr2;
+
+ unsigned int Border;
+ int plane_stride;
+ int plane_height;
+ int plane_width;
+
+ /***********/
+ /* Y Plane */
+ /***********/
+ Border = ybf->border;
+ plane_stride = ybf->y_stride;
+ plane_height = ybf->y_height;
+ plane_width = ybf->y_width;
+
+ /* copy the left and right most columns out */
+ src_ptr1 = ybf->y_buffer;
+ src_ptr2 = src_ptr1 + plane_width - 1;
+ dest_ptr1 = src_ptr1 - Border;
+ dest_ptr2 = src_ptr2 + 1;
+
+ for (i = 0; i < plane_height; i++) {
+ vpx_memset(dest_ptr1, src_ptr1[0], Border);
+ vpx_memset(dest_ptr2, src_ptr2[0], Border);
+ src_ptr1 += plane_stride;
+ src_ptr2 += plane_stride;
+ dest_ptr1 += plane_stride;
+ dest_ptr2 += plane_stride;
+ }
+
+ /* Now copy the top and bottom source lines into each line of the respective borders */
+ src_ptr1 = ybf->y_buffer - Border;
+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
+ dest_ptr1 = src_ptr1 - (Border * plane_stride);
+ dest_ptr2 = src_ptr2 + plane_stride;
+
+ for (i = 0; i < (int)Border; i++) {
+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
+ dest_ptr1 += plane_stride;
+ dest_ptr2 += plane_stride;
+ }
+
+
+ /***********/
+ /* U Plane */
+ /***********/
+ plane_stride = ybf->uv_stride;
+ plane_height = ybf->uv_height;
+ plane_width = ybf->uv_width;
+ Border /= 2;
+
+ /* copy the left and right most columns out */
+ src_ptr1 = ybf->u_buffer;
+ src_ptr2 = src_ptr1 + plane_width - 1;
+ dest_ptr1 = src_ptr1 - Border;
+ dest_ptr2 = src_ptr2 + 1;
+
+ for (i = 0; i < plane_height; i++) {
+ vpx_memset(dest_ptr1, src_ptr1[0], Border);
+ vpx_memset(dest_ptr2, src_ptr2[0], Border);
+ src_ptr1 += plane_stride;
+ src_ptr2 += plane_stride;
+ dest_ptr1 += plane_stride;
+ dest_ptr2 += plane_stride;
+ }
+
+ /* Now copy the top and bottom source lines into each line of the respective borders */
+ src_ptr1 = ybf->u_buffer - Border;
+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
+ dest_ptr1 = src_ptr1 - (Border * plane_stride);
+ dest_ptr2 = src_ptr2 + plane_stride;
+
+ for (i = 0; i < (int)(Border); i++) {
+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
+ dest_ptr1 += plane_stride;
+ dest_ptr2 += plane_stride;
+ }
+
+ /***********/
+ /* V Plane */
+ /***********/
+
+ /* copy the left and right most columns out */
+ src_ptr1 = ybf->v_buffer;
+ src_ptr2 = src_ptr1 + plane_width - 1;
+ dest_ptr1 = src_ptr1 - Border;
+ dest_ptr2 = src_ptr2 + 1;
+
+ for (i = 0; i < plane_height; i++) {
+ vpx_memset(dest_ptr1, src_ptr1[0], Border);
+ vpx_memset(dest_ptr2, src_ptr2[0], Border);
+ src_ptr1 += plane_stride;
+ src_ptr2 += plane_stride;
+ dest_ptr1 += plane_stride;
+ dest_ptr2 += plane_stride;
+ }
+
+ /* Now copy the top and bottom source lines into each line of the respective borders */
+ src_ptr1 = ybf->v_buffer - Border;
+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
+ dest_ptr1 = src_ptr1 - (Border * plane_stride);
+ dest_ptr2 = src_ptr2 + plane_stride;
+
+ for (i = 0; i < (int)(Border); i++) {
+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
+ dest_ptr1 += plane_stride;
+ dest_ptr2 += plane_stride;
+ }
}
static void
-extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf)
-{
- int i;
- unsigned char *src_ptr1, *src_ptr2;
- unsigned char *dest_ptr1, *dest_ptr2;
-
- unsigned int Border;
- int plane_stride;
- int plane_height;
- int plane_width;
-
- /***********/
- /* Y Plane */
- /***********/
- Border = ybf->border;
- plane_stride = ybf->y_stride;
- plane_height = ybf->y_height;
- plane_width = ybf->y_width;
-
- /* copy the left and right most columns out */
- src_ptr1 = ybf->y_buffer;
- src_ptr2 = src_ptr1 + plane_width - 1;
- dest_ptr1 = src_ptr1 - Border;
- dest_ptr2 = src_ptr2 + 1;
-
- for (i = 0; i < plane_height; i++)
- {
- vpx_memset(dest_ptr1, src_ptr1[0], Border);
- vpx_memset(dest_ptr2, src_ptr2[0], Border);
- src_ptr1 += plane_stride;
- src_ptr2 += plane_stride;
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- /* Now copy the top and bottom source lines into each line of the respective borders */
- src_ptr1 = ybf->y_buffer - Border;
- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
- dest_ptr1 = src_ptr1 - (Border * plane_stride);
- dest_ptr2 = src_ptr2 + plane_stride;
-
- for (i = 0; i < (int)Border; i++)
- {
- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- plane_stride /= 2;
- plane_height /= 2;
- plane_width /= 2;
- Border /= 2;
+extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) {
+ int i;
+ unsigned char *src_ptr1, *src_ptr2;
+ unsigned char *dest_ptr1, *dest_ptr2;
+
+ unsigned int Border;
+ int plane_stride;
+ int plane_height;
+ int plane_width;
+
+ /***********/
+ /* Y Plane */
+ /***********/
+ Border = ybf->border;
+ plane_stride = ybf->y_stride;
+ plane_height = ybf->y_height;
+ plane_width = ybf->y_width;
+
+ /* copy the left and right most columns out */
+ src_ptr1 = ybf->y_buffer;
+ src_ptr2 = src_ptr1 + plane_width - 1;
+ dest_ptr1 = src_ptr1 - Border;
+ dest_ptr2 = src_ptr2 + 1;
+
+ for (i = 0; i < plane_height; i++) {
+ vpx_memset(dest_ptr1, src_ptr1[0], Border);
+ vpx_memset(dest_ptr2, src_ptr2[0], Border);
+ src_ptr1 += plane_stride;
+ src_ptr2 += plane_stride;
+ dest_ptr1 += plane_stride;
+ dest_ptr2 += plane_stride;
+ }
+
+ /* Now copy the top and bottom source lines into each line of the respective borders */
+ src_ptr1 = ybf->y_buffer - Border;
+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
+ dest_ptr1 = src_ptr1 - (Border * plane_stride);
+ dest_ptr2 = src_ptr2 + plane_stride;
+
+ for (i = 0; i < (int)Border; i++) {
+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
+ dest_ptr1 += plane_stride;
+ dest_ptr2 += plane_stride;
+ }
+
+ plane_stride /= 2;
+ plane_height /= 2;
+ plane_width /= 2;
+ Border /= 2;
}
@@ -221,57 +211,53 @@ extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf)
*
****************************************************************************/
void
-vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc)
-{
- int row;
- unsigned char *source, *dest;
-
- source = src_ybc->y_buffer;
- dest = dst_ybc->y_buffer;
-
- for (row = 0; row < src_ybc->y_height; row++)
- {
- vpx_memcpy(dest, source, src_ybc->y_width);
- source += src_ybc->y_stride;
- dest += dst_ybc->y_stride;
- }
-
- source = src_ybc->u_buffer;
- dest = dst_ybc->u_buffer;
-
- for (row = 0; row < src_ybc->uv_height; row++)
- {
- vpx_memcpy(dest, source, src_ybc->uv_width);
- source += src_ybc->uv_stride;
- dest += dst_ybc->uv_stride;
- }
-
- source = src_ybc->v_buffer;
- dest = dst_ybc->v_buffer;
-
- for (row = 0; row < src_ybc->uv_height; row++)
- {
- vpx_memcpy(dest, source, src_ybc->uv_width);
- source += src_ybc->uv_stride;
- dest += dst_ybc->uv_stride;
- }
-
- vp8_yv12_extend_frame_borders_c(dst_ybc);
+vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc,
+ YV12_BUFFER_CONFIG *dst_ybc) {
+ int row;
+ unsigned char *source, *dest;
+
+ source = src_ybc->y_buffer;
+ dest = dst_ybc->y_buffer;
+
+ for (row = 0; row < src_ybc->y_height; row++) {
+ vpx_memcpy(dest, source, src_ybc->y_width);
+ source += src_ybc->y_stride;
+ dest += dst_ybc->y_stride;
+ }
+
+ source = src_ybc->u_buffer;
+ dest = dst_ybc->u_buffer;
+
+ for (row = 0; row < src_ybc->uv_height; row++) {
+ vpx_memcpy(dest, source, src_ybc->uv_width);
+ source += src_ybc->uv_stride;
+ dest += dst_ybc->uv_stride;
+ }
+
+ source = src_ybc->v_buffer;
+ dest = dst_ybc->v_buffer;
+
+ for (row = 0; row < src_ybc->uv_height; row++) {
+ vpx_memcpy(dest, source, src_ybc->uv_width);
+ source += src_ybc->uv_stride;
+ dest += dst_ybc->uv_stride;
+ }
+
+ vp8_yv12_extend_frame_borders_c(dst_ybc);
}
-void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc)
-{
- int row;
- unsigned char *source, *dest;
+void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc,
+ YV12_BUFFER_CONFIG *dst_ybc) {
+ int row;
+ unsigned char *source, *dest;
- source = src_ybc->y_buffer;
- dest = dst_ybc->y_buffer;
+ source = src_ybc->y_buffer;
+ dest = dst_ybc->y_buffer;
- for (row = 0; row < src_ybc->y_height; row++)
- {
- vpx_memcpy(dest, source, src_ybc->y_width);
- source += src_ybc->y_stride;
- dest += dst_ybc->y_stride;
- }
+ for (row = 0; row < src_ybc->y_height; row++) {
+ vpx_memcpy(dest, source, src_ybc->y_width);
+ source += src_ybc->y_stride;
+ dest += dst_ybc->y_stride;
+ }
}
diff --git a/vpx_scale/include/generic/vpxscale_arbitrary.h b/vpx_scale/include/generic/vpxscale_arbitrary.h
index 39de181..c535252 100644
--- a/vpx_scale/include/generic/vpxscale_arbitrary.h
+++ b/vpx_scale/include/generic/vpxscale_arbitrary.h
@@ -14,33 +14,32 @@
#include "vpx_scale/yv12config.h"
-typedef struct
-{
- int in_width;
- int in_height;
-
- int out_width;
- int out_height;
- int max_usable_out_width;
-
- // numerator for the width and height
- int nw;
- int nh;
- int nh_uv;
-
- // output to input correspondance array
- short *l_w;
- short *l_h;
- short *l_h_uv;
-
- // polyphase coefficients
- short *c_w;
- short *c_h;
- short *c_h_uv;
-
- // buffer for horizontal filtering.
- unsigned char *hbuf;
- unsigned char *hbuf_uv;
+typedef struct {
+ int in_width;
+ int in_height;
+
+ int out_width;
+ int out_height;
+ int max_usable_out_width;
+
+ // numerator for the width and height
+ int nw;
+ int nh;
+ int nh_uv;
+
+ // output to input correspondance array
+ short *l_w;
+ short *l_h;
+ short *l_h_uv;
+
+ // polyphase coefficients
+ short *c_w;
+ short *c_h;
+ short *c_h_uv;
+
+ // buffer for horizontal filtering.
+ unsigned char *hbuf;
+ unsigned char *hbuf_uv;
} BICUBIC_SCALER_STRUCT;
int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height);
diff --git a/vpx_scale/scale_mode.h b/vpx_scale/scale_mode.h
index 1476e64..5581385 100644
--- a/vpx_scale/scale_mode.h
+++ b/vpx_scale/scale_mode.h
@@ -17,12 +17,11 @@
#ifndef SCALE_MODE_H
#define SCALE_MODE_H
-typedef enum
-{
- MAINTAIN_ASPECT_RATIO = 0x0,
- SCALE_TO_FIT = 0x1,
- CENTER = 0x2,
- OTHER = 0x3
+typedef enum {
+ MAINTAIN_ASPECT_RATIO = 0x0,
+ SCALE_TO_FIT = 0x1,
+ CENTER = 0x2,
+ OTHER = 0x3
} SCALE_MODE;
diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h
index 8919a24..3c2194d 100644
--- a/vpx_scale/vpxscale.h
+++ b/vpx_scale/vpxscale.h
@@ -14,29 +14,24 @@
#include "vpx_scale/yv12config.h"
-extern void vp8_yv12_scale_or_center
-(
- YV12_BUFFER_CONFIG *src_yuv_config,
- YV12_BUFFER_CONFIG *dst_yuv_config,
- int expanded_frame_width,
- int expanded_frame_height,
- int scaling_mode,
- int HScale,
- int HRatio,
- int VScale,
- int VRatio
-);
-extern void vp8_scale_frame
-(
- YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst,
- unsigned char *temp_area,
- unsigned char temp_height,
- unsigned int hscale,
- unsigned int hratio,
- unsigned int vscale,
- unsigned int vratio,
- unsigned int interlaced
-);
+extern void vp8_yv12_scale_or_center(YV12_BUFFER_CONFIG *src_yuv_config,
+ YV12_BUFFER_CONFIG *dst_yuv_config,
+ int expanded_frame_width,
+ int expanded_frame_height,
+ int scaling_mode,
+ int HScale,
+ int HRatio,
+ int VScale,
+ int VRatio);
+
+extern void vp8_scale_frame(YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst,
+ unsigned char *temp_area,
+ unsigned char temp_height,
+ unsigned int hscale,
+ unsigned int hratio,
+ unsigned int vscale,
+ unsigned int vratio,
+ unsigned int interlaced);
#endif
diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c
index 3711fe5..2d96cc7 100644
--- a/vpx_scale/win32/scaleopt.c
+++ b/vpx_scale/win32/scaleopt.c
@@ -61,114 +61,112 @@ __declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51,
static
void horizontal_line_3_5_scale_mmx
(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- (void) dest_width;
+ const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width
+) {
+ (void) dest_width;
- __asm
- {
+ __asm {
- push ebx
+ push ebx
- mov esi, source
- mov edi, dest
+ mov esi, source
+ mov edi, dest
- mov ecx, source_width
- lea edx, [esi+ecx-3];
+ mov ecx, source_width
+ lea edx, [esi+ecx-3];
- movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx
- movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx
+ movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx
+ movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx
- movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx
- pxor mm7, mm7 // clear mm7
+ movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx
+ pxor mm7, mm7 // clear mm7
- horiz_line_3_5_loop:
+ horiz_line_3_5_loop:
- mov eax, DWORD PTR [esi] // eax = 00 01 02 03
- mov ebx, eax
+ mov eax, DWORD PTR [esi] // eax = 00 01 02 03
+ mov ebx, eax
- and ebx, 0xffff00 // ebx = xx 01 02 xx
- mov ecx, eax // ecx = 00 01 02 03
+ and ebx, 0xffff00 // ebx = xx 01 02 xx
+ mov ecx, eax // ecx = 00 01 02 03
- and eax, 0xffff0000 // eax = xx xx 02 03
- xor ecx, eax // ecx = 00 01 xx xx
+ and eax, 0xffff0000 // eax = xx xx 02 03
+ xor ecx, eax // ecx = 00 01 xx xx
- shr ebx, 8 // ebx = 01 02 xx xx
- or eax, ebx // eax = 01 02 02 03
+ shr ebx, 8 // ebx = 01 02 xx xx
+ or eax, ebx // eax = 01 02 02 03
- shl ebx, 16 // ebx = xx xx 01 02
- movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx
+ shl ebx, 16 // ebx = xx xx 01 02
+ movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx
- or ebx, ecx // ebx = 00 01 01 02
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx
+ or ebx, ecx // ebx = 00 01 01 02
+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx
- movd mm0, ebx // mm0 = 00 01 01 02
- pmullw mm1, mm6 //
+ movd mm0, ebx // mm0 = 00 01 01 02
+ pmullw mm1, mm6 //
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
- pmullw mm0, mm5 //
+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
+ pmullw mm0, mm5 //
- mov [edi], ebx // writeoutput 00 xx xx xx
- add esi, 3
+ mov [edi], ebx // writeoutput 00 xx xx xx
+ add esi, 3
- add edi, 5
- paddw mm0, mm1
+ add edi, 5
+ paddw mm0, mm1
- paddw mm0, mm4
- psrlw mm0, 8
+ paddw mm0, mm4
+ psrlw mm0, 8
- cmp esi, edx
- packuswb mm0, mm7
+ cmp esi, edx
+ packuswb mm0, mm7
- movd DWORD Ptr [edi-4], mm0
- jl horiz_line_3_5_loop
+ movd DWORD Ptr [edi-4], mm0
+ jl horiz_line_3_5_loop
-//Exit:
- mov eax, DWORD PTR [esi] // eax = 00 01 02 03
- mov ebx, eax
+// Exit:
+ mov eax, DWORD PTR [esi] // eax = 00 01 02 03
+ mov ebx, eax
- and ebx, 0xffff00 // ebx = xx 01 02 xx
- mov ecx, eax // ecx = 00 01 02 03
+ and ebx, 0xffff00 // ebx = xx 01 02 xx
+ mov ecx, eax // ecx = 00 01 02 03
- and eax, 0xffff0000 // eax = xx xx 02 03
- xor ecx, eax // ecx = 00 01 xx xx
+ and eax, 0xffff0000 // eax = xx xx 02 03
+ xor ecx, eax // ecx = 00 01 xx xx
- shr ebx, 8 // ebx = 01 02 xx xx
- or eax, ebx // eax = 01 02 02 03
+ shr ebx, 8 // ebx = 01 02 xx xx
+ or eax, ebx // eax = 01 02 02 03
- shl eax, 8 // eax = xx 01 02 02
- and eax, 0xffff0000 // eax = xx xx 02 02
+ shl eax, 8 // eax = xx 01 02 02
+ and eax, 0xffff0000 // eax = xx xx 02 02
- or eax, ebx // eax = 01 02 02 02
+ or eax, ebx // eax = 01 02 02 02
- shl ebx, 16 // ebx = xx xx 01 02
- movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx
+ shl ebx, 16 // ebx = xx xx 01 02
+ movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx
- or ebx, ecx // ebx = 00 01 01 02
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx
+ or ebx, ecx // ebx = 00 01 01 02
+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx
- movd mm0, ebx // mm0 = 00 01 01 02
- pmullw mm1, mm6 //
+ movd mm0, ebx // mm0 = 00 01 01 02
+ pmullw mm1, mm6 //
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
- pmullw mm0, mm5 //
+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
+ pmullw mm0, mm5 //
- mov [edi], ebx // writeoutput 00 xx xx xx
- paddw mm0, mm1
+ mov [edi], ebx // writeoutput 00 xx xx xx
+ paddw mm0, mm1
- paddw mm0, mm4
- psrlw mm0, 8
+ paddw mm0, mm4
+ psrlw mm0, 8
- packuswb mm0, mm7
- movd DWORD Ptr [edi+1], mm0
+ packuswb mm0, mm7
+ movd DWORD Ptr [edi+1], mm0
- pop ebx
+ pop ebx
- }
+ }
}
@@ -194,120 +192,118 @@ void horizontal_line_3_5_scale_mmx
static
void horizontal_line_4_5_scale_mmx
(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- (void)dest_width;
+ const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width
+) {
+ (void)dest_width;
- __asm
- {
+ __asm {
- mov esi, source
- mov edi, dest
+ mov esi, source
+ mov edi, dest
- mov ecx, source_width
- lea edx, [esi+ecx-8];
+ mov ecx, source_width
+ lea edx, [esi+ecx-8];
- movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx
- movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx
+ movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx
+ movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx
- movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx
- pxor mm7, mm7 // clear mm7
+ movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx
+ pxor mm7, mm7 // clear mm7
- horiz_line_4_5_loop:
+ horiz_line_4_5_loop:
- movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07
- movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08
+ movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07
+ movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08
- movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
- movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08
+ movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
+ movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08
- movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
+ movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
- pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
+ pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
- pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
- punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
+ pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
+ punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
- movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
- pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
+ movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
+ pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
- punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
- pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51
+ punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
+ pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51
- paddw mm0, mm1 // added round values
- paddw mm0, mm4
+ paddw mm0, mm1 // added round values
+ paddw mm0, mm4
- psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
- packuswb mm0, mm7
+ psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
+ packuswb mm0, mm7
- movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
- add edi, 10
+ movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
+ add edi, 10
- add esi, 8
- paddw mm2, mm3 //
+ add esi, 8
+ paddw mm2, mm3 //
- paddw mm2, mm4 // added round values
- cmp esi, edx
+ paddw mm2, mm4 // added round values
+ cmp esi, edx
- psrlw mm2, 8
- packuswb mm2, mm7
+ psrlw mm2, 8
+ packuswb mm2, mm7
- movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09
- jl horiz_line_4_5_loop
+ movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09
+ jl horiz_line_4_5_loop
-//Exit:
- movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07
- movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07
+// Exit:
+ movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07
+ movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07
- movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
- psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00
+ movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
+ psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00
- movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00
- pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00
+ movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00
+ pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00
- psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07
- por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07
+ psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07
+ por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07
- movq mm3, mm1
+ movq mm3, mm1
- movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
+ movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
- pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
+ pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
- pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
- punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
+ pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
+ punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
- movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
- pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
+ movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
+ pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
- punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
- pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51
+ punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
+ pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51
- paddw mm0, mm1 // added round values
- paddw mm0, mm4
+ paddw mm0, mm1 // added round values
+ paddw mm0, mm4
- psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
- packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx
+ psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
+ packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx
- movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
- paddw mm2, mm3 //
+ movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
+ paddw mm2, mm3 //
- paddw mm2, mm4 // added round values
- psrlw mm2, 8
+ paddw mm2, mm4 // added round values
+ psrlw mm2, 8
- packuswb mm2, mm7
- movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09
+ packuswb mm2, mm7
+ movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09
- }
+ }
}
/****************************************************************************
@@ -332,167 +328,165 @@ void horizontal_line_4_5_scale_mmx
static
void vertical_band_4_5_scale_mmx
(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-)
-{
- __asm
- {
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width
+) {
+ __asm {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, dest_pitch // Get the pitch size
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
+ lea edi, [esi+ecx*2] // tow lines below
+ add edi, ecx // three lines below
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
+ pxor mm7, mm7 // clear out mm7
+ mov edx, dest_width // Loop counter
- vs_4_5_loop:
+ vs_4_5_loop:
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
+ movq mm0, QWORD ptr [esi] // src[0];
+ movq mm1, QWORD ptr [esi+ecx] // src[1];
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
+ movq mm2, mm0 // Make a copy
+ punpcklbw mm0, mm7 // unpack low to word
- movq mm5, one_fifth
- punpckhbw mm2, mm7 // unpack high to word
+ movq mm5, one_fifth
+ punpckhbw mm2, mm7 // unpack high to word
- pmullw mm0, mm5 // a * 1/5
+ pmullw mm0, mm5 // a * 1/5
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
+ movq mm3, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low to word
- pmullw mm2, mm5 // a * 1/5
- movq mm6, four_fifths // constan
+ pmullw mm2, mm5 // a * 1/5
+ movq mm6, four_fifths // constan
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 4/5
+ movq mm4, mm1 // copy of low b
+ pmullw mm4, mm6 // b * 4/5
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
+ punpckhbw mm3, mm7 // unpack high to word
+ movq mm5, mm3 // copy of high b
- pmullw mm5, mm6 // b * 4/5
- paddw mm0, mm4 // a * 1/5 + b * 4/5
+ pmullw mm5, mm6 // b * 4/5
+ paddw mm0, mm4 // a * 1/5 + b * 4/5
- paddw mm2, mm5 // a * 1/5 + b * 4/5
- paddw mm0, round_values // + 128
+ paddw mm2, mm5 // a * 1/5 + b * 4/5
+ paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
- psrlw mm0, 8
+ paddw mm2, round_values // + 128
+ psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des [1]
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
+ movq QWORD ptr [esi+ecx], mm0 // write des[1]
+ movq mm0, [esi+ecx*2] // mm0 = src[2]
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
+ // mm1, mm3 --- Src[1]
+ // mm0 --- Src[2]
+ // mm7 for unpacking
- movq mm5, two_fifths
- movq mm2, mm0 // make a copy
+ movq mm5, two_fifths
+ movq mm2, mm0 // make a copy
- pmullw mm1, mm5 // b * 2/5
- movq mm6, three_fifths
+ pmullw mm1, mm5 // b * 2/5
+ movq mm6, three_fifths
- punpcklbw mm0, mm7 // unpack low to word
- pmullw mm3, mm5 // b * 2/5
+ punpcklbw mm0, mm7 // unpack low to word
+ pmullw mm3, mm5 // b * 2/5
- movq mm4, mm0 // make copy of c
- punpckhbw mm2, mm7 // unpack high to word
+ movq mm4, mm0 // make copy of c
+ punpckhbw mm2, mm7 // unpack high to word
- pmullw mm4, mm6 // c * 3/5
- movq mm5, mm2
+ pmullw mm4, mm6 // c * 3/5
+ movq mm5, mm2
- pmullw mm5, mm6 // c * 3/5
- paddw mm1, mm4 // b * 2/5 + c * 3/5
+ pmullw mm5, mm6 // c * 3/5
+ paddw mm1, mm4 // b * 2/5 + c * 3/5
- paddw mm3, mm5 // b * 2/5 + c * 3/5
- paddw mm1, round_values // + 128
+ paddw mm3, mm5 // b * 2/5 + c * 3/5
+ paddw mm1, round_values // + 128
- paddw mm3, round_values // + 128
- psrlw mm1, 8
+ paddw mm3, round_values // + 128
+ psrlw mm1, 8
- psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
+ psrlw mm3, 8
+ packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- movq mm1, [edi] // mm1=Src[3];
+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
+ movq mm1, [edi] // mm1=Src[3];
- // mm0, mm2 --- Src[2]
- // mm1 --- Src[3]
- // mm6 --- 3/5
- // mm7 for unpacking
+ // mm0, mm2 --- Src[2]
+ // mm1 --- Src[3]
+ // mm6 --- 3/5
+ // mm7 for unpacking
- pmullw mm0, mm6 // c * 3/5
- movq mm5, two_fifths // mm5 = 2/5
+ pmullw mm0, mm6 // c * 3/5
+ movq mm5, two_fifths // mm5 = 2/5
- movq mm3, mm1 // make a copy
- pmullw mm2, mm6 // c * 3/5
+ movq mm3, mm1 // make a copy
+ pmullw mm2, mm6 // c * 3/5
- punpcklbw mm1, mm7 // unpack low
- movq mm4, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low
+ movq mm4, mm1 // make a copy
- punpckhbw mm3, mm7 // unpack high
- pmullw mm4, mm5 // d * 2/5
+ punpckhbw mm3, mm7 // unpack high
+ pmullw mm4, mm5 // d * 2/5
- movq mm6, mm3 // make a copy
- pmullw mm6, mm5 // d * 2/5
+ movq mm6, mm3 // make a copy
+ pmullw mm6, mm5 // d * 2/5
- paddw mm0, mm4 // c * 3/5 + d * 2/5
- paddw mm2, mm6 // c * 3/5 + d * 2/5
+ paddw mm0, mm4 // c * 3/5 + d * 2/5
+ paddw mm2, mm6 // c * 3/5 + d * 2/5
- paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
+ paddw mm0, round_values // + 128
+ paddw mm2, round_values // + 128
- psrlw mm0, 8
- psrlw mm2, 8
+ psrlw mm0, 8
+ psrlw mm2, 8
- packuswb mm0, mm2 // des[3]
- movq QWORD ptr [edi], mm0 // write des[3]
+ packuswb mm0, mm2 // des[3]
+ movq QWORD ptr [edi], mm0 // write des[3]
- // mm1, mm3 --- Src[3]
- // mm7 -- cleared for unpacking
+ // mm1, mm3 --- Src[3]
+ // mm7 -- cleared for unpacking
- movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group
+ movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group
- movq mm5, four_fifths // mm5 = 4/5
- pmullw mm1, mm5 // d * 4/5
+ movq mm5, four_fifths // mm5 = 4/5
+ pmullw mm1, mm5 // d * 4/5
- movq mm6, one_fifth // mm6 = 1/5
- movq mm2, mm0 // make a copy
+ movq mm6, one_fifth // mm6 = 1/5
+ movq mm2, mm0 // make a copy
- pmullw mm3, mm5 // d * 4/5
- punpcklbw mm0, mm7 // unpack low
+ pmullw mm3, mm5 // d * 4/5
+ punpcklbw mm0, mm7 // unpack low
- pmullw mm0, mm6 // an * 1/5
- punpckhbw mm2, mm7 // unpack high
+ pmullw mm0, mm6 // an * 1/5
+ punpckhbw mm2, mm7 // unpack high
- paddw mm1, mm0 // d * 4/5 + an * 1/5
- pmullw mm2, mm6 // an * 1/5
+ paddw mm1, mm0 // d * 4/5 + an * 1/5
+ pmullw mm2, mm6 // an * 1/5
- paddw mm3, mm2 // d * 4/5 + an * 1/5
- paddw mm1, round_values // + 128
+ paddw mm3, mm2 // d * 4/5 + an * 1/5
+ paddw mm1, round_values // + 128
- paddw mm3, round_values // + 128
- psrlw mm1, 8
+ paddw mm3, round_values // + 128
+ psrlw mm1, 8
- psrlw mm3, 8
- packuswb mm1, mm3 // des[4]
+ psrlw mm3, 8
+ packuswb mm1, mm3 // des[4]
- movq QWORD ptr [edi+ecx], mm1 // write des[4]
+ movq QWORD ptr [edi+ecx], mm1 // write des[4]
- add edi, 8
- add esi, 8
+ add edi, 8
+ add esi, 8
- sub edx, 8
- jg vs_4_5_loop
- }
+ sub edx, 8
+ jg vs_4_5_loop
+ }
}
/****************************************************************************
@@ -517,139 +511,137 @@ void vertical_band_4_5_scale_mmx
static
void last_vertical_band_4_5_scale_mmx
(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-)
-{
- __asm
- {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width
+) {
+ __asm {
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, dest_pitch // Get the pitch size
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
+ lea edi, [esi+ecx*2] // tow lines below
+ add edi, ecx // three lines below
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
+ pxor mm7, mm7 // clear out mm7
+ mov edx, dest_width // Loop counter
- last_vs_4_5_loop:
+ last_vs_4_5_loop:
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
+ movq mm0, QWORD ptr [esi] // src[0];
+ movq mm1, QWORD ptr [esi+ecx] // src[1];
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
+ movq mm2, mm0 // Make a copy
+ punpcklbw mm0, mm7 // unpack low to word
- movq mm5, one_fifth
- punpckhbw mm2, mm7 // unpack high to word
+ movq mm5, one_fifth
+ punpckhbw mm2, mm7 // unpack high to word
- pmullw mm0, mm5 // a * 1/5
+ pmullw mm0, mm5 // a * 1/5
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
+ movq mm3, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low to word
- pmullw mm2, mm5 // a * 1/5
- movq mm6, four_fifths // constan
+ pmullw mm2, mm5 // a * 1/5
+ movq mm6, four_fifths // constan
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 4/5
+ movq mm4, mm1 // copy of low b
+ pmullw mm4, mm6 // b * 4/5
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
+ punpckhbw mm3, mm7 // unpack high to word
+ movq mm5, mm3 // copy of high b
- pmullw mm5, mm6 // b * 4/5
- paddw mm0, mm4 // a * 1/5 + b * 4/5
+ pmullw mm5, mm6 // b * 4/5
+ paddw mm0, mm4 // a * 1/5 + b * 4/5
- paddw mm2, mm5 // a * 1/5 + b * 4/5
- paddw mm0, round_values // + 128
+ paddw mm2, mm5 // a * 1/5 + b * 4/5
+ paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
- psrlw mm0, 8
+ paddw mm2, round_values // + 128
+ psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des [1]
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
+ movq QWORD ptr [esi+ecx], mm0 // write des[1]
+ movq mm0, [esi+ecx*2] // mm0 = src[2]
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
+ // mm1, mm3 --- Src[1]
+ // mm0 --- Src[2]
+ // mm7 for unpacking
- movq mm5, two_fifths
- movq mm2, mm0 // make a copy
+ movq mm5, two_fifths
+ movq mm2, mm0 // make a copy
- pmullw mm1, mm5 // b * 2/5
- movq mm6, three_fifths
+ pmullw mm1, mm5 // b * 2/5
+ movq mm6, three_fifths
- punpcklbw mm0, mm7 // unpack low to word
- pmullw mm3, mm5 // b * 2/5
+ punpcklbw mm0, mm7 // unpack low to word
+ pmullw mm3, mm5 // b * 2/5
- movq mm4, mm0 // make copy of c
- punpckhbw mm2, mm7 // unpack high to word
+ movq mm4, mm0 // make copy of c
+ punpckhbw mm2, mm7 // unpack high to word
- pmullw mm4, mm6 // c * 3/5
- movq mm5, mm2
+ pmullw mm4, mm6 // c * 3/5
+ movq mm5, mm2
- pmullw mm5, mm6 // c * 3/5
- paddw mm1, mm4 // b * 2/5 + c * 3/5
+ pmullw mm5, mm6 // c * 3/5
+ paddw mm1, mm4 // b * 2/5 + c * 3/5
- paddw mm3, mm5 // b * 2/5 + c * 3/5
- paddw mm1, round_values // + 128
+ paddw mm3, mm5 // b * 2/5 + c * 3/5
+ paddw mm1, round_values // + 128
- paddw mm3, round_values // + 128
- psrlw mm1, 8
+ paddw mm3, round_values // + 128
+ psrlw mm1, 8
- psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
+ psrlw mm3, 8
+ packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- movq mm1, [edi] // mm1=Src[3];
+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
+ movq mm1, [edi] // mm1=Src[3];
- movq QWORD ptr [edi+ecx], mm1 // write des[4];
+ movq QWORD ptr [edi+ecx], mm1 // write des[4];
- // mm0, mm2 --- Src[2]
- // mm1 --- Src[3]
- // mm6 --- 3/5
- // mm7 for unpacking
+ // mm0, mm2 --- Src[2]
+ // mm1 --- Src[3]
+ // mm6 --- 3/5
+ // mm7 for unpacking
- pmullw mm0, mm6 // c * 3/5
- movq mm5, two_fifths // mm5 = 2/5
+ pmullw mm0, mm6 // c * 3/5
+ movq mm5, two_fifths // mm5 = 2/5
- movq mm3, mm1 // make a copy
- pmullw mm2, mm6 // c * 3/5
+ movq mm3, mm1 // make a copy
+ pmullw mm2, mm6 // c * 3/5
- punpcklbw mm1, mm7 // unpack low
- movq mm4, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low
+ movq mm4, mm1 // make a copy
- punpckhbw mm3, mm7 // unpack high
- pmullw mm4, mm5 // d * 2/5
+ punpckhbw mm3, mm7 // unpack high
+ pmullw mm4, mm5 // d * 2/5
- movq mm6, mm3 // make a copy
- pmullw mm6, mm5 // d * 2/5
+ movq mm6, mm3 // make a copy
+ pmullw mm6, mm5 // d * 2/5
- paddw mm0, mm4 // c * 3/5 + d * 2/5
- paddw mm2, mm6 // c * 3/5 + d * 2/5
+ paddw mm0, mm4 // c * 3/5 + d * 2/5
+ paddw mm2, mm6 // c * 3/5 + d * 2/5
- paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
+ paddw mm0, round_values // + 128
+ paddw mm2, round_values // + 128
- psrlw mm0, 8
- psrlw mm2, 8
+ psrlw mm0, 8
+ psrlw mm2, 8
- packuswb mm0, mm2 // des[3]
- movq QWORD ptr [edi], mm0 // write des[3]
+ packuswb mm0, mm2 // des[3]
+ movq QWORD ptr [edi], mm0 // write des[3]
- // mm1, mm3 --- Src[3]
- // mm7 -- cleared for unpacking
- add edi, 8
- add esi, 8
+ // mm1, mm3 --- Src[3]
+ // mm7 -- cleared for unpacking
+ add edi, 8
+ add esi, 8
- sub edx, 8
- jg last_vs_4_5_loop
- }
+ sub edx, 8
+ jg last_vs_4_5_loop
+ }
}
/****************************************************************************
@@ -674,153 +666,151 @@ void last_vertical_band_4_5_scale_mmx
static
void vertical_band_3_5_scale_mmx
(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-)
-{
- __asm
- {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width
+) {
+ __asm {
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, dest_pitch // Get the pitch size
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
+ lea edi, [esi+ecx*2] // tow lines below
+ add edi, ecx // three lines below
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
+ pxor mm7, mm7 // clear out mm7
+ mov edx, dest_width // Loop counter
- vs_3_5_loop:
+ vs_3_5_loop:
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
+ movq mm0, QWORD ptr [esi] // src[0];
+ movq mm1, QWORD ptr [esi+ecx] // src[1];
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
+ movq mm2, mm0 // Make a copy
+ punpcklbw mm0, mm7 // unpack low to word
- movq mm5, two_fifths // mm5 = 2/5
- punpckhbw mm2, mm7 // unpack high to word
+ movq mm5, two_fifths // mm5 = 2/5
+ punpckhbw mm2, mm7 // unpack high to word
- pmullw mm0, mm5 // a * 2/5
+ pmullw mm0, mm5 // a * 2/5
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
+ movq mm3, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low to word
- pmullw mm2, mm5 // a * 2/5
- movq mm6, three_fifths // mm6 = 3/5
+ pmullw mm2, mm5 // a * 2/5
+ movq mm6, three_fifths // mm6 = 3/5
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 3/5
+ movq mm4, mm1 // copy of low b
+ pmullw mm4, mm6 // b * 3/5
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
+ punpckhbw mm3, mm7 // unpack high to word
+ movq mm5, mm3 // copy of high b
- pmullw mm5, mm6 // b * 3/5
- paddw mm0, mm4 // a * 2/5 + b * 3/5
+ pmullw mm5, mm6 // b * 3/5
+ paddw mm0, mm4 // a * 2/5 + b * 3/5
- paddw mm2, mm5 // a * 2/5 + b * 3/5
- paddw mm0, round_values // + 128
+ paddw mm2, mm5 // a * 2/5 + b * 3/5
+ paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
- psrlw mm0, 8
+ paddw mm2, round_values // + 128
+ psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des [1]
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
+ movq QWORD ptr [esi+ecx], mm0 // write des[1]
+ movq mm0, [esi+ecx*2] // mm0 = src[2]
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
+ // mm1, mm3 --- Src[1]
+ // mm0 --- Src[2]
+ // mm7 for unpacking
- movq mm4, mm1 // b low
- pmullw mm1, four_fifths // b * 4/5 low
+ movq mm4, mm1 // b low
+ pmullw mm1, four_fifths // b * 4/5 low
- movq mm5, mm3 // b high
- pmullw mm3, four_fifths // b * 4/5 high
+ movq mm5, mm3 // b high
+ pmullw mm3, four_fifths // b * 4/5 high
- movq mm2, mm0 // c
- pmullw mm4, one_fifth // b * 1/5
+ movq mm2, mm0 // c
+ pmullw mm4, one_fifth // b * 1/5
- punpcklbw mm0, mm7 // c low
- pmullw mm5, one_fifth // b * 1/5
+ punpcklbw mm0, mm7 // c low
+ pmullw mm5, one_fifth // b * 1/5
- movq mm6, mm0 // make copy of c low
- punpckhbw mm2, mm7 // c high
+ movq mm6, mm0 // make copy of c low
+ punpckhbw mm2, mm7 // c high
- pmullw mm6, one_fifth // c * 1/5 low
- movq mm7, mm2 // make copy of c high
+ pmullw mm6, one_fifth // c * 1/5 low
+ movq mm7, mm2 // make copy of c high
- pmullw mm7, one_fifth // c * 1/5 high
- paddw mm1, mm6 // b * 4/5 + c * 1/5 low
+ pmullw mm7, one_fifth // c * 1/5 high
+ paddw mm1, mm6 // b * 4/5 + c * 1/5 low
- paddw mm3, mm7 // b * 4/5 + c * 1/5 high
- movq mm6, mm0 // make copy of c low
+ paddw mm3, mm7 // b * 4/5 + c * 1/5 high
+ movq mm6, mm0 // make copy of c low
- pmullw mm6, four_fifths // c * 4/5 low
- movq mm7, mm2 // make copy of c high
+ pmullw mm6, four_fifths // c * 4/5 low
+ movq mm7, mm2 // make copy of c high
- pmullw mm7, four_fifths // c * 4/5 high
+ pmullw mm7, four_fifths // c * 4/5 high
- paddw mm4, mm6 // b * 1/5 + c * 4/5 low
- paddw mm5, mm7 // b * 1/5 + c * 4/5 high
+ paddw mm4, mm6 // b * 1/5 + c * 4/5 low
+ paddw mm5, mm7 // b * 1/5 + c * 4/5 high
- paddw mm1, round_values // + 128
- paddw mm3, round_values // + 128
+ paddw mm1, round_values // + 128
+ paddw mm3, round_values // + 128
- psrlw mm1, 8
- psrlw mm3, 8
+ psrlw mm1, 8
+ psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
+ packuswb mm1, mm3 // des[2]
+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- paddw mm4, round_values // + 128
- paddw mm5, round_values // + 128
+ paddw mm4, round_values // + 128
+ paddw mm5, round_values // + 128
- psrlw mm4, 8
- psrlw mm5, 8
+ psrlw mm4, 8
+ psrlw mm5, 8
- packuswb mm4, mm5 // des[3]
- movq QWORD ptr [edi], mm4 // write des[3]
+ packuswb mm4, mm5 // des[3]
+ movq QWORD ptr [edi], mm4 // write des[3]
- // mm0, mm2 --- Src[3]
+ // mm0, mm2 --- Src[3]
- pxor mm7, mm7 // clear mm7 for unpacking
- movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group
+ pxor mm7, mm7 // clear mm7 for unpacking
+ movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group
- movq mm5, three_fifths // mm5 = 3/5
- pmullw mm0, mm5 // d * 3/5
+ movq mm5, three_fifths // mm5 = 3/5
+ pmullw mm0, mm5 // d * 3/5
- movq mm6, two_fifths // mm6 = 2/5
- movq mm3, mm1 // make a copy
+ movq mm6, two_fifths // mm6 = 2/5
+ movq mm3, mm1 // make a copy
- pmullw mm2, mm5 // d * 3/5
- punpcklbw mm1, mm7 // unpack low
+ pmullw mm2, mm5 // d * 3/5
+ punpcklbw mm1, mm7 // unpack low
- pmullw mm1, mm6 // an * 2/5
- punpckhbw mm3, mm7 // unpack high
+ pmullw mm1, mm6 // an * 2/5
+ punpckhbw mm3, mm7 // unpack high
- paddw mm0, mm1 // d * 3/5 + an * 2/5
- pmullw mm3, mm6 // an * 2/5
+ paddw mm0, mm1 // d * 3/5 + an * 2/5
+ pmullw mm3, mm6 // an * 2/5
- paddw mm2, mm3 // d * 3/5 + an * 2/5
- paddw mm0, round_values // + 128
+ paddw mm2, mm3 // d * 3/5 + an * 2/5
+ paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
- psrlw mm0, 8
+ paddw mm2, round_values // + 128
+ psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des[4]
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des[4]
- movq QWORD ptr [edi+ecx], mm0 // write des[4]
+ movq QWORD ptr [edi+ecx], mm0 // write des[4]
- add edi, 8
- add esi, 8
+ add edi, 8
+ add esi, 8
- sub edx, 8
- jg vs_3_5_loop
- }
+ sub edx, 8
+ jg vs_3_5_loop
+ }
}
/****************************************************************************
@@ -845,129 +835,127 @@ void vertical_band_3_5_scale_mmx
static
void last_vertical_band_3_5_scale_mmx
(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-)
-{
- __asm
- {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width
+) {
+ __asm {
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, dest_pitch // Get the pitch size
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
+ lea edi, [esi+ecx*2] // tow lines below
+ add edi, ecx // three lines below
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
+ pxor mm7, mm7 // clear out mm7
+ mov edx, dest_width // Loop counter
- last_vs_3_5_loop:
+ last_vs_3_5_loop:
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
+ movq mm0, QWORD ptr [esi] // src[0];
+ movq mm1, QWORD ptr [esi+ecx] // src[1];
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
+ movq mm2, mm0 // Make a copy
+ punpcklbw mm0, mm7 // unpack low to word
- movq mm5, two_fifths // mm5 = 2/5
- punpckhbw mm2, mm7 // unpack high to word
+ movq mm5, two_fifths // mm5 = 2/5
+ punpckhbw mm2, mm7 // unpack high to word
- pmullw mm0, mm5 // a * 2/5
+ pmullw mm0, mm5 // a * 2/5
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
+ movq mm3, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low to word
- pmullw mm2, mm5 // a * 2/5
- movq mm6, three_fifths // mm6 = 3/5
+ pmullw mm2, mm5 // a * 2/5
+ movq mm6, three_fifths // mm6 = 3/5
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 3/5
+ movq mm4, mm1 // copy of low b
+ pmullw mm4, mm6 // b * 3/5
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
+ punpckhbw mm3, mm7 // unpack high to word
+ movq mm5, mm3 // copy of high b
- pmullw mm5, mm6 // b * 3/5
- paddw mm0, mm4 // a * 2/5 + b * 3/5
+ pmullw mm5, mm6 // b * 3/5
+ paddw mm0, mm4 // a * 2/5 + b * 3/5
- paddw mm2, mm5 // a * 2/5 + b * 3/5
- paddw mm0, round_values // + 128
+ paddw mm2, mm5 // a * 2/5 + b * 3/5
+ paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
- psrlw mm0, 8
+ paddw mm2, round_values // + 128
+ psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des [1]
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
+ movq QWORD ptr [esi+ecx], mm0 // write des[1]
+ movq mm0, [esi+ecx*2] // mm0 = src[2]
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
+ // mm1, mm3 --- Src[1]
+ // mm0 --- Src[2]
+ // mm7 for unpacking
- movq mm4, mm1 // b low
- pmullw mm1, four_fifths // b * 4/5 low
+ movq mm4, mm1 // b low
+ pmullw mm1, four_fifths // b * 4/5 low
- movq QWORD ptr [edi+ecx], mm0 // write des[4]
+ movq QWORD ptr [edi+ecx], mm0 // write des[4]
- movq mm5, mm3 // b high
- pmullw mm3, four_fifths // b * 4/5 high
+ movq mm5, mm3 // b high
+ pmullw mm3, four_fifths // b * 4/5 high
- movq mm2, mm0 // c
- pmullw mm4, one_fifth // b * 1/5
+ movq mm2, mm0 // c
+ pmullw mm4, one_fifth // b * 1/5
- punpcklbw mm0, mm7 // c low
- pmullw mm5, one_fifth // b * 1/5
+ punpcklbw mm0, mm7 // c low
+ pmullw mm5, one_fifth // b * 1/5
- movq mm6, mm0 // make copy of c low
- punpckhbw mm2, mm7 // c high
+ movq mm6, mm0 // make copy of c low
+ punpckhbw mm2, mm7 // c high
- pmullw mm6, one_fifth // c * 1/5 low
- movq mm7, mm2 // make copy of c high
+ pmullw mm6, one_fifth // c * 1/5 low
+ movq mm7, mm2 // make copy of c high
- pmullw mm7, one_fifth // c * 1/5 high
- paddw mm1, mm6 // b * 4/5 + c * 1/5 low
+ pmullw mm7, one_fifth // c * 1/5 high
+ paddw mm1, mm6 // b * 4/5 + c * 1/5 low
- paddw mm3, mm7 // b * 4/5 + c * 1/5 high
- movq mm6, mm0 // make copy of c low
+ paddw mm3, mm7 // b * 4/5 + c * 1/5 high
+ movq mm6, mm0 // make copy of c low
- pmullw mm6, four_fifths // c * 4/5 low
- movq mm7, mm2 // make copy of c high
+ pmullw mm6, four_fifths // c * 4/5 low
+ movq mm7, mm2 // make copy of c high
- pmullw mm7, four_fifths // c * 4/5 high
+ pmullw mm7, four_fifths // c * 4/5 high
- paddw mm4, mm6 // b * 1/5 + c * 4/5 low
- paddw mm5, mm7 // b * 1/5 + c * 4/5 high
+ paddw mm4, mm6 // b * 1/5 + c * 4/5 low
+ paddw mm5, mm7 // b * 1/5 + c * 4/5 high
- paddw mm1, round_values // + 128
- paddw mm3, round_values // + 128
+ paddw mm1, round_values // + 128
+ paddw mm3, round_values // + 128
- psrlw mm1, 8
- psrlw mm3, 8
+ psrlw mm1, 8
+ psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
+ packuswb mm1, mm3 // des[2]
+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- paddw mm4, round_values // + 128
- paddw mm5, round_values // + 128
+ paddw mm4, round_values // + 128
+ paddw mm5, round_values // + 128
- psrlw mm4, 8
- psrlw mm5, 8
+ psrlw mm4, 8
+ psrlw mm5, 8
- packuswb mm4, mm5 // des[3]
- movq QWORD ptr [edi], mm4 // write des[3]
+ packuswb mm4, mm5 // des[3]
+ movq QWORD ptr [edi], mm4 // write des[3]
- // mm0, mm2 --- Src[3]
+ // mm0, mm2 --- Src[3]
- add edi, 8
- add esi, 8
+ add edi, 8
+ add esi, 8
- sub edx, 8
- jg last_vs_3_5_loop
- }
+ sub edx, 8
+ jg last_vs_3_5_loop
+ }
}
/****************************************************************************
@@ -992,52 +980,50 @@ void last_vertical_band_3_5_scale_mmx
static
void vertical_band_1_2_scale_mmx
(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-)
-{
- __asm
- {
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width
+) {
+ __asm {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, dest_pitch // Get the pitch size
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
+ pxor mm7, mm7 // clear out mm7
+ mov edx, dest_width // Loop counter
- vs_1_2_loop:
+ vs_1_2_loop:
- movq mm0, [esi] // get Src[0]
- movq mm1, [esi + ecx * 2] // get Src[1]
+ movq mm0, [esi] // get Src[0]
+ movq mm1, [esi + ecx * 2] // get Src[1]
- movq mm2, mm0 // make copy before unpack
- movq mm3, mm1 // make copy before unpack
+ movq mm2, mm0 // make copy before unpack
+ movq mm3, mm1 // make copy before unpack
- punpcklbw mm0, mm7 // low Src[0]
- movq mm6, four_ones // mm6= 1, 1, 1, 1
+ punpcklbw mm0, mm7 // low Src[0]
+ movq mm6, four_ones // mm6= 1, 1, 1, 1
- punpcklbw mm1, mm7 // low Src[1]
- paddw mm0, mm1 // low (a + b)
+ punpcklbw mm1, mm7 // low Src[1]
+ paddw mm0, mm1 // low (a + b)
- punpckhbw mm2, mm7 // high Src[0]
- paddw mm0, mm6 // low (a + b + 1)
+ punpckhbw mm2, mm7 // high Src[0]
+ paddw mm0, mm6 // low (a + b + 1)
- punpckhbw mm3, mm7
- paddw mm2, mm3 // high (a + b )
+ punpckhbw mm3, mm7
+ paddw mm2, mm3 // high (a + b )
- psraw mm0, 1 // low (a + b +1 )/2
- paddw mm2, mm6 // high (a + b + 1)
+ psraw mm0, 1 // low (a + b +1 )/2
+ paddw mm2, mm6 // high (a + b + 1)
- psraw mm2, 1 // high (a + b + 1)/2
- packuswb mm0, mm2 // pack results
+ psraw mm2, 1 // high (a + b + 1)/2
+ packuswb mm0, mm2 // pack results
- movq [esi+ecx], mm0 // write out eight bytes
- add esi, 8
+ movq [esi+ecx], mm0 // write out eight bytes
+ add esi, 8
- sub edx, 8
- jg vs_1_2_loop
- }
+ sub edx, 8
+ jg vs_1_2_loop
+ }
}
@@ -1063,28 +1049,26 @@ void vertical_band_1_2_scale_mmx
static
void last_vertical_band_1_2_scale_mmx
(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-)
-{
- __asm
- {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width
+) {
+ __asm {
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, dest_pitch // Get the pitch size
- mov edx, dest_width // Loop counter
+ mov edx, dest_width // Loop counter
- last_vs_1_2_loop:
+ last_vs_1_2_loop:
- movq mm0, [esi] // get Src[0]
- movq [esi+ecx], mm0 // write out eight bytes
+ movq mm0, [esi] // get Src[0]
+ movq [esi+ecx], mm0 // write out eight bytes
- add esi, 8
- sub edx, 8
+ add esi, 8
+ sub edx, 8
- jg last_vs_1_2_loop
- }
+ jg last_vs_1_2_loop
+ }
}
/****************************************************************************
@@ -1108,106 +1092,104 @@ void last_vertical_band_1_2_scale_mmx
static
void horizontal_line_1_2_scale_mmx
(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- (void) dest_width;
+ const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width
+) {
+ (void) dest_width;
- __asm
- {
- mov esi, source
- mov edi, dest
+ __asm {
+ mov esi, source
+ mov edi, dest
- pxor mm7, mm7
- movq mm6, four_ones
+ pxor mm7, mm7
+ movq mm6, four_ones
- mov ecx, source_width
+ mov ecx, source_width
- hs_1_2_loop:
+ hs_1_2_loop:
- movq mm0, [esi]
- movq mm1, [esi+1]
+ movq mm0, [esi]
+ movq mm1, [esi+1]
- movq mm2, mm0
- movq mm3, mm1
+ movq mm2, mm0
+ movq mm3, mm1
- movq mm4, mm0
- punpcklbw mm0, mm7
+ movq mm4, mm0
+ punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- paddw mm0, mm1
+ punpcklbw mm1, mm7
+ paddw mm0, mm1
- paddw mm0, mm6
- punpckhbw mm2, mm7
+ paddw mm0, mm6
+ punpckhbw mm2, mm7
- punpckhbw mm3, mm7
- paddw mm2, mm3
+ punpckhbw mm3, mm7
+ paddw mm2, mm3
- paddw mm2, mm6
- psraw mm0, 1
+ paddw mm2, mm6
+ psraw mm0, 1
- psraw mm2, 1
- packuswb mm0, mm2
+ psraw mm2, 1
+ packuswb mm0, mm2
- movq mm2, mm4
- punpcklbw mm2, mm0
+ movq mm2, mm4
+ punpcklbw mm2, mm0
- movq [edi], mm2
- punpckhbw mm4, mm0
+ movq [edi], mm2
+ punpckhbw mm4, mm0
- movq [edi+8], mm4
- add esi, 8
+ movq [edi+8], mm4
+ add esi, 8
- add edi, 16
- sub ecx, 8
+ add edi, 16
+ sub ecx, 8
- cmp ecx, 8
- jg hs_1_2_loop
+ cmp ecx, 8
+ jg hs_1_2_loop
// last eight pixel
- movq mm0, [esi]
- movq mm1, mm0
+ movq mm0, [esi]
+ movq mm1, mm0
- movq mm2, mm0
- movq mm3, mm1
+ movq mm2, mm0
+ movq mm3, mm1
- psrlq mm1, 8
- psrlq mm3, 56
+ psrlq mm1, 8
+ psrlq mm3, 56
- psllq mm3, 56
- por mm1, mm3
+ psllq mm3, 56
+ por mm1, mm3
- movq mm3, mm1
- movq mm4, mm0
+ movq mm3, mm1
+ movq mm4, mm0
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
- paddw mm0, mm1
- paddw mm0, mm6
+ paddw mm0, mm1
+ paddw mm0, mm6
- punpckhbw mm2, mm7
- punpckhbw mm3, mm7
+ punpckhbw mm2, mm7
+ punpckhbw mm3, mm7
- paddw mm2, mm3
- paddw mm2, mm6
+ paddw mm2, mm3
+ paddw mm2, mm6
- psraw mm0, 1
- psraw mm2, 1
+ psraw mm0, 1
+ psraw mm2, 1
- packuswb mm0, mm2
- movq mm2, mm4
+ packuswb mm0, mm2
+ movq mm2, mm4
- punpcklbw mm2, mm0
- movq [edi], mm2
+ punpcklbw mm2, mm0
+ movq [edi], mm2
- punpckhbw mm4, mm0
- movq [edi+8], mm4
- }
+ punpckhbw mm4, mm0
+ movq [edi+8], mm4
+ }
}
@@ -1240,86 +1222,84 @@ __declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128,
static
void horizontal_line_5_4_scale_mmx
(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- /*
- unsigned i;
- unsigned int a, b, c, d, e;
- unsigned char *des = dest;
- const unsigned char *src = source;
+ const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width
+) {
+ /*
+ unsigned i;
+ unsigned int a, b, c, d, e;
+ unsigned char *des = dest;
+ const unsigned char *src = source;
- (void) dest_width;
+ (void) dest_width;
- for ( i=0; i<source_width; i+=5 )
- {
- a = src[0];
- b = src[1];
- c = src[2];
- d = src[3];
- e = src[4];
+ for ( i=0; i<source_width; i+=5 )
+ {
+ a = src[0];
+ b = src[1];
+ c = src[2];
+ d = src[3];
+ e = src[4];
- des[0] = a;
- des[1] = ((b*192 + c* 64 + 128)>>8);
- des[2] = ((c*128 + d*128 + 128)>>8);
- des[3] = ((d* 64 + e*192 + 128)>>8);
+ des[0] = a;
+ des[1] = ((b*192 + c* 64 + 128)>>8);
+ des[2] = ((c*128 + d*128 + 128)>>8);
+ des[3] = ((d* 64 + e*192 + 128)>>8);
- src += 5;
- des += 4;
- }
- */
- (void) dest_width;
+ src += 5;
+ des += 4;
+ }
+ */
+ (void) dest_width;
- __asm
- {
+ __asm {
- mov esi, source ;
- mov edi, dest ;
+ mov esi, source;
+ mov edi, dest;
- mov ecx, source_width ;
- movq mm5, const54_1 ;
+ mov ecx, source_width;
+ movq mm5, const54_1;
- pxor mm7, mm7 ;
- movq mm6, const54_2 ;
+ pxor mm7, mm7;
+ movq mm6, const54_2;
- movq mm4, round_values ;
- lea edx, [esi+ecx] ;
- horizontal_line_5_4_loop:
+ movq mm4, round_values;
+ lea edx, [esi+ecx];
+ horizontal_line_5_4_loop:
- movq mm0, QWORD PTR [esi] ;
- 00 01 02 03 04 05 06 07
- movq mm1, mm0 ;
- 00 01 02 03 04 05 06 07
+ movq mm0, QWORD PTR [esi];
+ 00 01 02 03 04 05 06 07
+ movq mm1, mm0;
+ 00 01 02 03 04 05 06 07
- psrlq mm0, 8 ;
- 01 02 03 04 05 06 07 xx
- punpcklbw mm1, mm7 ;
- xx 00 xx 01 xx 02 xx 03
+ psrlq mm0, 8;
+ 01 02 03 04 05 06 07 xx
+ punpcklbw mm1, mm7;
+ xx 00 xx 01 xx 02 xx 03
- punpcklbw mm0, mm7 ;
- xx 01 xx 02 xx 03 xx 04
- pmullw mm1, mm5
+ punpcklbw mm0, mm7;
+ xx 01 xx 02 xx 03 xx 04
+ pmullw mm1, mm5
- pmullw mm0, mm6
- add esi, 5
+ pmullw mm0, mm6
+ add esi, 5
- add edi, 4
- paddw mm1, mm0
+ add edi, 4
+ paddw mm1, mm0
- paddw mm1, mm4
- psrlw mm1, 8
+ paddw mm1, mm4
+ psrlw mm1, 8
- cmp esi, edx
- packuswb mm1, mm7
+ cmp esi, edx
+ packuswb mm1, mm7
- movd DWORD PTR [edi-4], mm1
+ movd DWORD PTR [edi-4], mm1
- jl horizontal_line_5_4_loop
+ jl horizontal_line_5_4_loop
- }
+ }
}
__declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64, 64, 64 };
@@ -1327,86 +1307,84 @@ __declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128,
__declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 };
static
-void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
+void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
- __asm
- {
- push ebx
+ __asm {
+ push ebx
- mov esi, source // Get the source and destination pointer
- mov ecx, src_pitch // Get the pitch size
+ mov esi, source // Get the source and destination pointer
+ mov ecx, src_pitch // Get the pitch size
- mov edi, dest // tow lines below
- pxor mm7, mm7 // clear out mm7
+ mov edi, dest // tow lines below
+ pxor mm7, mm7 // clear out mm7
- mov edx, dest_pitch // Loop counter
- mov ebx, dest_width
+ mov edx, dest_pitch // Loop counter
+ mov ebx, dest_width
- vs_5_4_loop:
+ vs_5_4_loop:
- movd mm0, DWORD ptr [esi] // src[0];
- movd mm1, DWORD ptr [esi+ecx] // src[1];
+ movd mm0, DWORD ptr [esi] // src[0];
+ movd mm1, DWORD ptr [esi+ecx] // src[1];
- movd mm2, DWORD ptr [esi+ecx*2]
- lea eax, [esi+ecx*2] //
+ movd mm2, DWORD ptr [esi+ecx*2]
+ lea eax, [esi+ecx*2] //
- punpcklbw mm1, mm7
- punpcklbw mm2, mm7
+ punpcklbw mm1, mm7
+ punpcklbw mm2, mm7
- movq mm3, mm2
- pmullw mm1, three_fourths
+ movq mm3, mm2
+ pmullw mm1, three_fourths
- pmullw mm2, one_fourths
- movd mm4, [eax+ecx]
+ pmullw mm2, one_fourths
+ movd mm4, [eax+ecx]
- pmullw mm3, two_fourths
- punpcklbw mm4, mm7
+ pmullw mm3, two_fourths
+ punpcklbw mm4, mm7
- movq mm5, mm4
- pmullw mm4, two_fourths
+ movq mm5, mm4
+ pmullw mm4, two_fourths
- paddw mm1, mm2
- movd mm6, [eax+ecx*2]
+ paddw mm1, mm2
+ movd mm6, [eax+ecx*2]
- pmullw mm5, one_fourths
- paddw mm1, round_values;
+ pmullw mm5, one_fourths
+ paddw mm1, round_values;
- paddw mm3, mm4
- psrlw mm1, 8
+ paddw mm3, mm4
+ psrlw mm1, 8
- punpcklbw mm6, mm7
- paddw mm3, round_values
+ punpcklbw mm6, mm7
+ paddw mm3, round_values
- pmullw mm6, three_fourths
- psrlw mm3, 8
+ pmullw mm6, three_fourths
+ psrlw mm3, 8
- packuswb mm1, mm7
- packuswb mm3, mm7
+ packuswb mm1, mm7
+ packuswb mm3, mm7
- movd DWORD PTR [edi], mm0
- movd DWORD PTR [edi+edx], mm1
+ movd DWORD PTR [edi], mm0
+ movd DWORD PTR [edi+edx], mm1
- paddw mm5, mm6
- movd DWORD PTR [edi+edx*2], mm3
+ paddw mm5, mm6
+ movd DWORD PTR [edi+edx*2], mm3
- lea eax, [edi+edx*2]
- paddw mm5, round_values
+ lea eax, [edi+edx*2]
+ paddw mm5, round_values
- psrlw mm5, 8
- add edi, 4
+ psrlw mm5, 8
+ add edi, 4
- packuswb mm5, mm7
- movd DWORD PTR [eax+edx], mm5
+ packuswb mm5, mm7
+ movd DWORD PTR [eax+edx], mm5
- add esi, 4
- sub ebx, 4
+ add esi, 4
+ sub ebx, 4
- jg vs_5_4_loop
+ jg vs_5_4_loop
- pop ebx
- }
+ pop ebx
+ }
}
@@ -1417,96 +1395,94 @@ __declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85,
static
void horizontal_line_5_3_scale_mmx
(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
+ const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width
+) {
- (void) dest_width;
- __asm
- {
+ (void) dest_width;
+ __asm {
- mov esi, source ;
- mov edi, dest ;
+ mov esi, source;
+ mov edi, dest;
- mov ecx, source_width ;
- movq mm5, const53_1 ;
+ mov ecx, source_width;
+ movq mm5, const53_1;
- pxor mm7, mm7 ;
- movq mm6, const53_2 ;
+ pxor mm7, mm7;
+ movq mm6, const53_2;
- movq mm4, round_values ;
- lea edx, [esi+ecx-5] ;
- horizontal_line_5_3_loop:
+ movq mm4, round_values;
+ lea edx, [esi+ecx-5];
+ horizontal_line_5_3_loop:
- movq mm0, QWORD PTR [esi] ;
- 00 01 02 03 04 05 06 07
- movq mm1, mm0 ;
- 00 01 02 03 04 05 06 07
+ movq mm0, QWORD PTR [esi];
+ 00 01 02 03 04 05 06 07
+ movq mm1, mm0;
+ 00 01 02 03 04 05 06 07
- psllw mm0, 8 ;
- xx 00 xx 02 xx 04 xx 06
- psrlw mm1, 8 ;
- 01 xx 03 xx 05 xx 07 xx
+ psllw mm0, 8;
+ xx 00 xx 02 xx 04 xx 06
+ psrlw mm1, 8;
+ 01 xx 03 xx 05 xx 07 xx
- psrlw mm0, 8 ;
- 00 xx 02 xx 04 xx 06 xx
- psllq mm1, 16 ;
- xx xx 01 xx 03 xx 05 xx
+ psrlw mm0, 8;
+ 00 xx 02 xx 04 xx 06 xx
+ psllq mm1, 16;
+ xx xx 01 xx 03 xx 05 xx
- pmullw mm0, mm6
+ pmullw mm0, mm6
- pmullw mm1, mm5
- add esi, 5
+ pmullw mm1, mm5
+ add esi, 5
- add edi, 3
- paddw mm1, mm0
+ add edi, 3
+ paddw mm1, mm0
- paddw mm1, mm4
- psrlw mm1, 8
+ paddw mm1, mm4
+ psrlw mm1, 8
- cmp esi, edx
- packuswb mm1, mm7
+ cmp esi, edx
+ packuswb mm1, mm7
- movd DWORD PTR [edi-3], mm1
- jl horizontal_line_5_3_loop
+ movd DWORD PTR [edi-3], mm1
+ jl horizontal_line_5_3_loop
-//exit condition
- movq mm0, QWORD PTR [esi] ;
- 00 01 02 03 04 05 06 07
- movq mm1, mm0 ;
- 00 01 02 03 04 05 06 07
+// exit condition
+ movq mm0, QWORD PTR [esi];
+ 00 01 02 03 04 05 06 07
+ movq mm1, mm0;
+ 00 01 02 03 04 05 06 07
- psllw mm0, 8 ;
- xx 00 xx 02 xx 04 xx 06
- psrlw mm1, 8 ;
- 01 xx 03 xx 05 xx 07 xx
+ psllw mm0, 8;
+ xx 00 xx 02 xx 04 xx 06
+ psrlw mm1, 8;
+ 01 xx 03 xx 05 xx 07 xx
- psrlw mm0, 8 ;
- 00 xx 02 xx 04 xx 06 xx
- psllq mm1, 16 ;
- xx xx 01 xx 03 xx 05 xx
+ psrlw mm0, 8;
+ 00 xx 02 xx 04 xx 06 xx
+ psllq mm1, 16;
+ xx xx 01 xx 03 xx 05 xx
- pmullw mm0, mm6
+ pmullw mm0, mm6
- pmullw mm1, mm5
- paddw mm1, mm0
+ pmullw mm1, mm5
+ paddw mm1, mm0
- paddw mm1, mm4
- psrlw mm1, 8
+ paddw mm1, mm4
+ psrlw mm1, 8
- packuswb mm1, mm7
- movd eax, mm1
+ packuswb mm1, mm7
+ movd eax, mm1
- mov edx, eax
- shr edx, 16
+ mov edx, eax
+ shr edx, 16
- mov WORD PTR[edi], ax
- mov BYTE PTR[edi+2], dl
+ mov WORD PTR[edi], ax
+ mov BYTE PTR[edi+2], dl
- }
+ }
}
@@ -1514,75 +1490,73 @@ __declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85
__declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 };
static
-void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
+void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
- __asm
- {
- push ebx
+ __asm {
+ push ebx
- mov esi, source // Get the source and destination pointer
- mov ecx, src_pitch // Get the pitch size
+ mov esi, source // Get the source and destination pointer
+ mov ecx, src_pitch // Get the pitch size
- mov edi, dest // tow lines below
- pxor mm7, mm7 // clear out mm7
+ mov edi, dest // tow lines below
+ pxor mm7, mm7 // clear out mm7
- mov edx, dest_pitch // Loop counter
- movq mm5, one_thirds
+ mov edx, dest_pitch // Loop counter
+ movq mm5, one_thirds
- movq mm6, two_thirds
- mov ebx, dest_width;
+ movq mm6, two_thirds
+ mov ebx, dest_width;
- vs_5_3_loop:
+ vs_5_3_loop:
- movd mm0, DWORD ptr [esi] // src[0];
- movd mm1, DWORD ptr [esi+ecx] // src[1];
+ movd mm0, DWORD ptr [esi] // src[0];
+ movd mm1, DWORD ptr [esi+ecx] // src[1];
- movd mm2, DWORD ptr [esi+ecx*2]
- lea eax, [esi+ecx*2] //
+ movd mm2, DWORD ptr [esi+ecx*2]
+ lea eax, [esi+ecx*2] //
- punpcklbw mm1, mm7
- punpcklbw mm2, mm7
+ punpcklbw mm1, mm7
+ punpcklbw mm2, mm7
- pmullw mm1, mm5
- pmullw mm2, mm6
+ pmullw mm1, mm5
+ pmullw mm2, mm6
- movd mm3, DWORD ptr [eax+ecx]
- movd mm4, DWORD ptr [eax+ecx*2]
+ movd mm3, DWORD ptr [eax+ecx]
+ movd mm4, DWORD ptr [eax+ecx*2]
- punpcklbw mm3, mm7
- punpcklbw mm4, mm7
+ punpcklbw mm3, mm7
+ punpcklbw mm4, mm7
- pmullw mm3, mm6
- pmullw mm4, mm5
+ pmullw mm3, mm6
+ pmullw mm4, mm5
- movd DWORD PTR [edi], mm0
- paddw mm1, mm2
+ movd DWORD PTR [edi], mm0
+ paddw mm1, mm2
- paddw mm1, round_values
- psrlw mm1, 8
+ paddw mm1, round_values
+ psrlw mm1, 8
- packuswb mm1, mm7
- paddw mm3, mm4
+ packuswb mm1, mm7
+ paddw mm3, mm4
- paddw mm3, round_values
- movd DWORD PTR [edi+edx], mm1
+ paddw mm3, round_values
+ movd DWORD PTR [edi+edx], mm1
- psrlw mm3, 8
- packuswb mm3, mm7
+ psrlw mm3, 8
+ packuswb mm3, mm7
- movd DWORD PTR [edi+edx*2], mm3
+ movd DWORD PTR [edi+edx*2], mm3
- add edi, 4
- add esi, 4
+ add edi, 4
+ add esi, 4
- sub ebx, 4
- jg vs_5_3_loop
+ sub ebx, 4
+ jg vs_5_3_loop
- pop ebx
- }
+ pop ebx
+ }
}
@@ -1609,48 +1583,45 @@ void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch,
static
void horizontal_line_2_1_scale_mmx
(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- (void) dest_width;
- (void) source_width;
- __asm
- {
- mov esi, source
- mov edi, dest
-
- pxor mm7, mm7
- mov ecx, dest_width
-
- xor edx, edx
- hs_2_1_loop:
-
- movq mm0, [esi+edx*2]
- psllw mm0, 8
-
- psrlw mm0, 8
- packuswb mm0, mm7
-
- movd DWORD Ptr [edi+edx], mm0;
- add edx, 4
-
- cmp edx, ecx
- jl hs_2_1_loop
-
- }
+ const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width
+) {
+ (void) dest_width;
+ (void) source_width;
+ __asm {
+ mov esi, source
+ mov edi, dest
+
+ pxor mm7, mm7
+ mov ecx, dest_width
+
+ xor edx, edx
+ hs_2_1_loop:
+
+ movq mm0, [esi+edx*2]
+ psllw mm0, 8
+
+ psrlw mm0, 8
+ packuswb mm0, mm7
+
+ movd DWORD Ptr [edi+edx], mm0;
+ add edx, 4
+
+ cmp edx, ecx
+ jl hs_2_1_loop
+
+ }
}
static
-void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- (void) dest_pitch;
- (void) src_pitch;
- vpx_memcpy(dest, source, dest_width);
+void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
+ (void) dest_pitch;
+ (void) src_pitch;
+ vpx_memcpy(dest, source, dest_width);
}
@@ -1658,91 +1629,88 @@ __declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 4
__declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 160, 160, 160 };
static
-void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
+void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
- (void) dest_pitch;
- __asm
- {
- mov esi, source
- mov edi, dest
+ (void) dest_pitch;
+ __asm {
+ mov esi, source
+ mov edi, dest
- mov eax, src_pitch
- mov edx, dest_width
+ mov eax, src_pitch
+ mov edx, dest_width
- pxor mm7, mm7
- sub esi, eax //back one line
+ pxor mm7, mm7
+ sub esi, eax // back one line
- lea ecx, [esi+edx];
- movq mm6, round_values;
+ lea ecx, [esi+edx];
+ movq mm6, round_values;
- movq mm5, three_sixteenths;
- movq mm4, ten_sixteenths;
+ movq mm5, three_sixteenths;
+ movq mm4, ten_sixteenths;
- vs_2_1_i_loop:
- movd mm0, [esi] //
- movd mm1, [esi+eax] //
+ vs_2_1_i_loop:
+ movd mm0, [esi] //
+ movd mm1, [esi+eax] //
- movd mm2, [esi+eax*2] //
- punpcklbw mm0, mm7
+ movd mm2, [esi+eax*2] //
+ punpcklbw mm0, mm7
- pmullw mm0, mm5
- punpcklbw mm1, mm7
+ pmullw mm0, mm5
+ punpcklbw mm1, mm7
- pmullw mm1, mm4
- punpcklbw mm2, mm7
+ pmullw mm1, mm4
+ punpcklbw mm2, mm7
- pmullw mm2, mm5
- paddw mm0, round_values
+ pmullw mm2, mm5
+ paddw mm0, round_values
- paddw mm1, mm2
- paddw mm0, mm1
+ paddw mm1, mm2
+ paddw mm0, mm1
- psrlw mm0, 8
- packuswb mm0, mm7
+ psrlw mm0, 8
+ packuswb mm0, mm7
- movd DWORD PTR [edi], mm0
- add esi, 4
+ movd DWORD PTR [edi], mm0
+ add esi, 4
- add edi, 4;
- cmp esi, ecx
- jl vs_2_1_i_loop
+ add edi, 4;
+ cmp esi, ecx
+ jl vs_2_1_i_loop
- }
+ }
}
void
-register_mmxscalers(void)
-{
- vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx;
- vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx;
- vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx;
- vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx;
- vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx;
- vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx;
- vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx;
- vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx;
- vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx;
-
- vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c;
- vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c;
- vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c;
- vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c;
- vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c;
- vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c;
-
-
-
- vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx;
- vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx;
- vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx;
- vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx;
- vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx;
- vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx;
- vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx;
+register_mmxscalers(void) {
+ vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx;
+ vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx;
+ vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx;
+ vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx;
+ vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx;
+ vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx;
+ vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx;
+ vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx;
+ vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx;
+
+ vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c;
+ vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c;
+ vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c;
+ vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c;
+ vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c;
+ vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c;
+
+
+
+ vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx;
+ vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx;
+ vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx;
+ vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx;
+ vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx;
+ vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx;
+ vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx;
diff --git a/vpx_scale/win32/scalesystemdependent.c b/vpx_scale/win32/scalesystemdependent.c
index 19e61c3..98913d1 100644
--- a/vpx_scale/win32/scalesystemdependent.c
+++ b/vpx_scale/win32/scalesystemdependent.c
@@ -46,46 +46,42 @@ extern void register_mmxscalers(void);
*
****************************************************************************/
void
-vp8_scale_machine_specific_config(void)
-{
- // If MMX supported then set to use MMX versions of functions else
- // use original 'C' versions.
- int mmx_enabled;
- int xmm_enabled;
- int wmt_enabled;
+vp8_scale_machine_specific_config(void) {
+ // If MMX supported then set to use MMX versions of functions else
+ // use original 'C' versions.
+ int mmx_enabled;
+ int xmm_enabled;
+ int wmt_enabled;
- vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
+ vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
- if (mmx_enabled || xmm_enabled || wmt_enabled)
- {
- register_mmxscalers();
- }
- else
- {
- vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c;
- vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c;
- vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c;
- vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c;
- vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c;
- vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c;
- vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c;
- vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c;
- vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c;
- vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c;
- vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c;
- vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c;
- vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c;
- vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c;
- vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c;
+ if (mmx_enabled || xmm_enabled || wmt_enabled) {
+ register_mmxscalers();
+ } else {
+ vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c;
+ vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c;
+ vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c;
+ vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c;
+ vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c;
+ vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c;
+ vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c;
+ vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c;
+ vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c;
+ vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c;
+ vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c;
+ vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c;
+ vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c;
+ vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c;
+ vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c;
- vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c;
- vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c;
- vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c;
- vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c;
- vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c;
- vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c;
- vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c;
+ vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c;
+ vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c;
+ vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c;
+ vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c;
+ vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c;
+ vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c;
+ vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c;
- }
+ }
}
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index 800f700..6a8a1fc 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -16,54 +16,54 @@ extern "C"
{
#endif
-#define VP7BORDERINPIXELS 48
#define VP8BORDERINPIXELS 32
+#define VP9BORDERINPIXELS 64
+#define VP9_INTERP_EXTEND 4
- /*************************************
- For INT_YUV:
+ /*************************************
+ For INT_YUV:
- Y = (R+G*2+B)/4;
- U = (R-B)/2;
- V = (G*2 - R - B)/4;
- And
- R = Y+U-V;
- G = Y+V;
- B = Y-U-V;
- ************************************/
- typedef enum
- {
- REG_YUV = 0, /* Regular yuv */
- INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */
- }
- YUV_TYPE;
+ Y = (R+G*2+B)/4;
+ U = (R-B)/2;
+ V = (G*2 - R - B)/4;
+ And
+ R = Y+U-V;
+ G = Y+V;
+ B = Y-U-V;
+ ************************************/
+ typedef enum
+ {
+ REG_YUV = 0, /* Regular yuv */
+ INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */
+ }
+ YUV_TYPE;
- typedef struct yv12_buffer_config
- {
- int y_width;
- int y_height;
- int y_stride;
-/* int yinternal_width; */
+ typedef struct yv12_buffer_config {
+ int y_width;
+ int y_height;
+ int y_stride;
+ /* int yinternal_width; */
- int uv_width;
- int uv_height;
- int uv_stride;
-/* int uvinternal_width; */
+ int uv_width;
+ int uv_height;
+ int uv_stride;
+ /* int uvinternal_width; */
- unsigned char *y_buffer;
- unsigned char *u_buffer;
- unsigned char *v_buffer;
+ unsigned char *y_buffer;
+ unsigned char *u_buffer;
+ unsigned char *v_buffer;
- unsigned char *buffer_alloc;
- int border;
- int frame_size;
- YUV_TYPE clrtype;
+ unsigned char *buffer_alloc;
+ int border;
+ int frame_size;
+ YUV_TYPE clrtype;
- int corrupted;
- int flags;
- } YV12_BUFFER_CONFIG;
+ int corrupted;
+ int flags;
+ } YV12_BUFFER_CONFIG;
- int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border);
- int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf);
+ int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border);
+ int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf);
#ifdef __cplusplus
}
diff --git a/vpxdec.c b/vpxdec.c
index 4482f3d..9b728bf 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -52,7 +52,7 @@ static const char *exec_name;
static const struct
{
char const *name;
- const vpx_codec_iface_t *iface;
+ vpx_codec_iface_t *iface;
unsigned int fourcc;
unsigned int fourcc_mask;
} ifaces[] =
@@ -152,7 +152,8 @@ static void usage_exit()
"write to. If the\n argument does not include any escape "
"characters, the output will be\n written to a single file. "
"Otherwise, the filename will be calculated by\n expanding "
- "the following escape characters:\n"
+ "the following escape characters:\n");
+ fprintf(stderr,
"\n\t%%w - Frame width"
"\n\t%%h - Frame height"
"\n\t%%<n> - Frame number, zero padded to <n> places (1..9)"
@@ -356,7 +357,7 @@ void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5)
}
else
{
- if(fwrite(buf, 1, len, out));
+ (void) fwrite(buf, 1, len, out);
}
}
@@ -502,7 +503,7 @@ nestegg_seek_cb(int64_t offset, int whence, void * userdata)
case NESTEGG_SEEK_CUR: whence = SEEK_CUR; break;
case NESTEGG_SEEK_END: whence = SEEK_END; break;
};
- return fseek(userdata, offset, whence)? -1 : 0;
+ return fseek(userdata, (long)offset, whence)? -1 : 0;
}
@@ -559,7 +560,7 @@ webm_guess_framerate(struct input_ctx *input,
goto fail;
*fps_num = (i - 1) * 1000000;
- *fps_den = tstamp / 1000;
+ *fps_den = (unsigned int)(tstamp / 1000);
return 0;
fail:
nestegg_destroy(input->nestegg_ctx);
@@ -580,10 +581,10 @@ file_is_webm(struct input_ctx *input,
unsigned int i, n;
int track_type = -1;
- nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb,
- input->infile};
+ nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, 0};
nestegg_video_params params;
+ io.userdata = input->infile;
if(nestegg_init(&input->nestegg_ctx, io, NULL))
goto fail;
@@ -647,7 +648,7 @@ void generate_filename(const char *pattern, char *out, size_t q_len,
{
size_t pat_len;
- // parse the pattern
+ /* parse the pattern */
q[q_len - 1] = '\0';
switch(p[1])
{
@@ -677,7 +678,7 @@ void generate_filename(const char *pattern, char *out, size_t q_len,
{
size_t copy_len;
- // copy the next segment
+ /* copy the next segment */
if(!next_pat)
copy_len = strlen(p);
else
@@ -922,7 +923,7 @@ int main(int argc, const char **argv_)
p = strchr(p, '%');
if(p && p[1] >= '1' && p[1] <= '9')
{
- // pattern contains sequence number, so it's not unique.
+ /* pattern contains sequence number, so it's not unique. */
single_file = 0;
break;
}
@@ -962,7 +963,8 @@ int main(int argc, const char **argv_)
That will have to wait until these tools support WebM natively.*/
sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
"420jpeg", width, height, fps_num, fps_den, 'p');
- out_put(out, (unsigned char *)buffer, strlen(buffer), do_md5);
+ out_put(out, (unsigned char *)buffer,
+ (unsigned int)strlen(buffer), do_md5);
}
/* Try to determine the codec from the fourcc. */
@@ -1040,7 +1042,7 @@ int main(int argc, const char **argv_)
vpx_usec_timer_start(&timer);
- if (vpx_codec_decode(&decoder, buf, buf_sz, NULL, 0))
+ if (vpx_codec_decode(&decoder, buf, (unsigned int)buf_sz, NULL, 0))
{
const char *detail = vpx_codec_error_detail(&decoder);
fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder));
@@ -1052,7 +1054,7 @@ int main(int argc, const char **argv_)
}
vpx_usec_timer_mark(&timer);
- dx_time += vpx_usec_timer_elapsed(&timer);
+ dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
++frame_in;
@@ -1064,9 +1066,14 @@ int main(int argc, const char **argv_)
}
frames_corrupted += corrupted;
+ vpx_usec_timer_start(&timer);
+
if ((img = vpx_codec_get_frame(&decoder, &iter)))
++frame_out;
+ vpx_usec_timer_mark(&timer);
+ dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
+
if (progress)
show_progress(frame_in, frame_out, dx_time);
diff --git a/vpxenc.c b/vpxenc.c
index d32b21b..c9547ea 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -54,11 +54,7 @@ typedef __int64 off_t;
#define off_t off64_t
#endif
-#if defined(_MSC_VER)
-#define LITERALU64(n) n
-#else
-#define LITERALU64(n) n##LLU
-#endif
+#define LITERALU64(hi,lo) ((((uint64_t)hi)<<32)|lo)
/* We should use 32-bit file operations in WebM file format
* when building ARM executable file (.axf) with RVCT */
@@ -68,12 +64,28 @@ typedef long off_t;
#define ftello ftell
#endif
+/* Swallow warnings about unused results of fread/fwrite */
+static size_t wrap_fread(void *ptr, size_t size, size_t nmemb,
+ FILE *stream)
+{
+ return fread(ptr, size, nmemb, stream);
+}
+#define fread wrap_fread
+
+static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb,
+ FILE *stream)
+{
+ return fwrite(ptr, size, nmemb, stream);
+}
+#define fwrite wrap_fwrite
+
+
static const char *exec_name;
static const struct codec_item
{
char const *name;
- const vpx_codec_iface_t *iface;
+ vpx_codec_iface_t *iface;
unsigned int fourcc;
} codecs[] =
{
@@ -245,7 +257,7 @@ void stats_write(stats_io_t *stats, const void *pkt, size_t len)
{
if (stats->file)
{
- if(fwrite(pkt, 1, len, stats->file));
+ (void) fwrite(pkt, 1, len, stats->file);
}
else
{
@@ -338,7 +350,7 @@ static int read_frame(struct input_state *input, vpx_image_t *img)
* write_ivf_frame_header() for documentation on the frame header
* layout.
*/
- if(fread(junk, 1, IVF_FRAME_HDR_SZ, f));
+ (void) fread(junk, 1, IVF_FRAME_HDR_SZ, f);
}
for (plane = 0; plane < 3; plane++)
@@ -468,7 +480,7 @@ static void write_ivf_file_header(FILE *outfile,
mem_put_le32(header + 24, frame_cnt); /* length */
mem_put_le32(header + 28, 0); /* unused */
- if(fwrite(header, 1, 32, outfile));
+ (void) fwrite(header, 1, 32, outfile);
}
@@ -482,18 +494,18 @@ static void write_ivf_frame_header(FILE *outfile,
return;
pts = pkt->data.frame.pts;
- mem_put_le32(header, pkt->data.frame.sz);
+ mem_put_le32(header, (int)pkt->data.frame.sz);
mem_put_le32(header + 4, pts & 0xFFFFFFFF);
mem_put_le32(header + 8, pts >> 32);
- if(fwrite(header, 1, 12, outfile));
+ (void) fwrite(header, 1, 12, outfile);
}
static void write_ivf_frame_size(FILE *outfile, size_t size)
{
char header[4];
- mem_put_le32(header, size);
- fwrite(header, 1, 4, outfile);
+ mem_put_le32(header, (int)size);
+ (void) fwrite(header, 1, 4, outfile);
}
@@ -541,13 +553,13 @@ struct EbmlGlobal
void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
{
- if(fwrite(buffer_in, 1, len, glob->stream));
+ (void) fwrite(buffer_in, 1, len, glob->stream);
}
#define WRITE_BUFFER(s) \
for(i = len-1; i>=0; i--)\
{ \
- x = *(const s *)buffer_in >> (i * CHAR_BIT); \
+ x = (char)(*(const s *)buffer_in >> (i * CHAR_BIT)); \
Ebml_Write(glob, &x, 1); \
}
void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, int buffer_size, unsigned long len)
@@ -597,9 +609,9 @@ static void
Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc,
unsigned long class_id)
{
- //todo this is always taking 8 bytes, this may need later optimization
- //this is a key that says length unknown
- uint64_t unknownLen = LITERALU64(0x01FFFFFFFFFFFFFF);
+ /* todo this is always taking 8 bytes, this may need later optimization */
+ /* this is a key that says length unknown */
+ uint64_t unknownLen = LITERALU64(0x01FFFFFF, 0xFFFFFFFF);
Ebml_WriteID(glob, class_id);
*ebmlLoc = ftello(glob->stream);
@@ -617,7 +629,7 @@ Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc)
/* Calculate the size of this element */
size = pos - *ebmlLoc - 8;
- size |= LITERALU64(0x0100000000000000);
+ size |= LITERALU64(0x01000000,0x00000000);
/* Seek back to the beginning of the element and write the new size */
fseeko(glob->stream, *ebmlLoc, SEEK_SET);
@@ -664,7 +676,7 @@ write_webm_seek_info(EbmlGlobal *ebml)
Ebml_EndSubElement(ebml, &start);
}
{
- //segment info
+ /* segment info */
EbmlLoc startInfo;
uint64_t frame_time;
char version_string[64];
@@ -686,7 +698,7 @@ write_webm_seek_info(EbmlGlobal *ebml)
Ebml_StartSubElement(ebml, &startInfo, Info);
Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000);
Ebml_SerializeFloat(ebml, Segment_Duration,
- ebml->last_pts_ms + frame_time);
+ (double)(ebml->last_pts_ms + frame_time));
Ebml_SerializeString(ebml, 0x4D80, version_string);
Ebml_SerializeString(ebml, 0x5741, version_string);
Ebml_EndSubElement(ebml, &startInfo);
@@ -704,16 +716,16 @@ write_webm_file_header(EbmlGlobal *glob,
EbmlLoc start;
Ebml_StartSubElement(glob, &start, EBML);
Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
- Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); //EBML Read Version
- Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); //EBML Max ID Length
- Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); //EBML Max Size Length
- Ebml_SerializeString(glob, DocType, "webm"); //Doc Type
- Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); //Doc Type Version
- Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); //Doc Type Read Version
+ Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1);
+ Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4);
+ Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8);
+ Ebml_SerializeString(glob, DocType, "webm");
+ Ebml_SerializeUnsigned(glob, DocTypeVersion, 2);
+ Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2);
Ebml_EndSubElement(glob, &start);
}
{
- Ebml_StartSubElement(glob, &glob->startSegment, Segment); //segment
+ Ebml_StartSubElement(glob, &glob->startSegment, Segment);
glob->position_reference = ftello(glob->stream);
glob->framerate = *fps;
write_webm_seek_info(glob);
@@ -731,7 +743,7 @@ write_webm_file_header(EbmlGlobal *glob,
Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
glob->track_id_pos = ftello(glob->stream);
Ebml_SerializeUnsigned32(glob, TrackUID, trackID);
- Ebml_SerializeUnsigned(glob, TrackType, 1); //video is always 1
+ Ebml_SerializeUnsigned(glob, TrackType, 1);
Ebml_SerializeString(glob, CodecID, "V_VP8");
{
unsigned int pixelWidth = cfg->g_w;
@@ -744,13 +756,13 @@ write_webm_file_header(EbmlGlobal *glob,
Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt);
Ebml_SerializeFloat(glob, FrameRate, frameRate);
- Ebml_EndSubElement(glob, &videoStart); //Video
+ Ebml_EndSubElement(glob, &videoStart);
}
- Ebml_EndSubElement(glob, &start); //Track Entry
+ Ebml_EndSubElement(glob, &start); /* Track Entry */
}
Ebml_EndSubElement(glob, &trackStart);
}
- // segment element is open
+ /* segment element is open */
}
}
@@ -778,7 +790,7 @@ write_webm_block(EbmlGlobal *glob,
if(pts_ms - glob->cluster_timecode > SHRT_MAX)
start_cluster = 1;
else
- block_timecode = pts_ms - glob->cluster_timecode;
+ block_timecode = (unsigned short)pts_ms - glob->cluster_timecode;
is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY);
if(start_cluster || is_keyframe)
@@ -789,9 +801,9 @@ write_webm_block(EbmlGlobal *glob,
/* Open the new cluster */
block_timecode = 0;
glob->cluster_open = 1;
- glob->cluster_timecode = pts_ms;
+ glob->cluster_timecode = (uint32_t)pts_ms;
glob->cluster_pos = ftello(glob->stream);
- Ebml_StartSubElement(glob, &glob->startCluster, Cluster); //cluster
+ Ebml_StartSubElement(glob, &glob->startCluster, Cluster); /* cluster */
Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode);
/* Save a cue point if this is a keyframe. */
@@ -816,7 +828,7 @@ write_webm_block(EbmlGlobal *glob,
/* Write the Simple Block */
Ebml_WriteID(glob, SimpleBlock);
- block_length = pkt->data.frame.sz + 4;
+ block_length = (unsigned long)pkt->data.frame.sz + 4;
block_length |= 0x10000000;
Ebml_Serialize(glob, &block_length, sizeof(block_length), 4);
@@ -833,7 +845,7 @@ write_webm_block(EbmlGlobal *glob,
flags |= 0x08;
Ebml_Write(glob, &flags, 1);
- Ebml_Write(glob, pkt->data.frame.buf, pkt->data.frame.sz);
+ Ebml_Write(glob, pkt->data.frame.buf, (unsigned long)pkt->data.frame.sz);
}
@@ -865,7 +877,6 @@ write_webm_file_footer(EbmlGlobal *glob, long hash)
Ebml_SerializeUnsigned(glob, CueTrack, 1);
Ebml_SerializeUnsigned64(glob, CueClusterPosition,
cue->loc - glob->position_reference);
- //Ebml_SerializeUnsigned(glob, CueBlockNumber, cue->blockNumber);
Ebml_EndSubElement(glob, &start);
}
Ebml_EndSubElement(glob, &start);
@@ -942,7 +953,7 @@ static double vp8_mse2psnr(double Samples, double Peak, double Mse)
if ((double)Mse > 0.0)
psnr = 10.0 * log10(Peak * Peak * Samples / Mse);
else
- psnr = 60; // Limit to prevent / 0
+ psnr = 60; /* Limit to prevent / 0 */
if (psnr > 60)
psnr = 60;
@@ -978,6 +989,8 @@ static const arg_def_t good_dl = ARG_DEF(NULL, "good", 0,
"Use Good Quality Deadline");
static const arg_def_t rt_dl = ARG_DEF(NULL, "rt", 0,
"Use Realtime Quality Deadline");
+static const arg_def_t quietarg = ARG_DEF("q", "quiet", 0,
+ "Do not print encode progress");
static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0,
"Show encoder parameters");
static const arg_def_t psnrarg = ARG_DEF(NULL, "psnr", 0,
@@ -997,7 +1010,7 @@ static const arg_def_t *main_args[] =
&debugmode,
&outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline,
&best_dl, &good_dl, &rt_dl,
- &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, &rate_hist_n,
+ &quietarg, &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, &rate_hist_n,
NULL
};
@@ -1225,7 +1238,7 @@ static int merge_hist_buckets(struct hist_bucket *bucket,
{
int last_bucket = buckets - 1;
- // merge the small bucket with an adjacent one.
+ /* merge the small bucket with an adjacent one. */
if(small_bucket == 0)
merge_bucket = 1;
else if(small_bucket == last_bucket)
@@ -1325,7 +1338,7 @@ static void show_histogram(const struct hist_bucket *bucket,
int j;
float pct;
- pct = 100.0 * (float)bucket[i].count / (float)total;
+ pct = (float)(100.0 * bucket[i].count / total);
len = HIST_BAR_MAX * bucket[i].count / scale;
if(len < 1)
len = 1;
@@ -1393,7 +1406,7 @@ static void init_rate_histogram(struct rate_hist *hist,
*/
hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000;
- // prevent division by zero
+ /* prevent division by zero */
if (hist->samples == 0)
hist->samples=1;
@@ -1427,7 +1440,7 @@ static void update_rate_histogram(struct rate_hist *hist,
idx = hist->frames++ % hist->samples;
hist->pts[idx] = now;
- hist->sz[idx] = pkt->data.frame.sz;
+ hist->sz[idx] = (int)pkt->data.frame.sz;
if(now < cfg->rc_buf_initial_sz)
return;
@@ -1449,15 +1462,15 @@ static void update_rate_histogram(struct rate_hist *hist,
return;
avg_bitrate = sum_sz * 8 * 1000 / (now - then);
- idx = avg_bitrate * (RATE_BINS/2) / (cfg->rc_target_bitrate * 1000);
+ idx = (int)(avg_bitrate * (RATE_BINS/2) / (cfg->rc_target_bitrate * 1000));
if(idx < 0)
idx = 0;
if(idx > RATE_BINS-1)
idx = RATE_BINS-1;
if(hist->bucket[idx].low > avg_bitrate)
- hist->bucket[idx].low = avg_bitrate;
+ hist->bucket[idx].low = (int)avg_bitrate;
if(hist->bucket[idx].high < avg_bitrate)
- hist->bucket[idx].high = avg_bitrate;
+ hist->bucket[idx].high = (int)avg_bitrate;
hist->bucket[idx].count++;
hist->total++;
}
@@ -1495,6 +1508,7 @@ struct global_config
int usage;
int deadline;
int use_i420;
+ int quiet;
int verbose;
int limit;
int show_psnr;
@@ -1619,6 +1633,8 @@ static void parse_global_config(struct global_config *global, char **argv)
global->use_i420 = 0;
else if (arg_match(&arg, &use_i420, argi))
global->use_i420 = 1;
+ else if (arg_match(&arg, &quietarg, argi))
+ global->quiet = 1;
else if (arg_match(&arg, &verbosearg, argi))
global->verbose = 1;
else if (arg_match(&arg, &limit, argi))
@@ -2000,7 +2016,7 @@ static void set_default_kf_interval(struct stream_state *stream,
{
double framerate = (double)global->framerate.num/global->framerate.den;
if (framerate > 0.0)
- stream->config.cfg.kf_max_dist = 5.0*framerate;
+ stream->config.cfg.kf_max_dist = (unsigned int)(5.0*framerate);
}
}
@@ -2180,7 +2196,7 @@ static void encode_frame(struct stream_state *stream,
/ cfg->g_timebase.num / global->framerate.num;
vpx_usec_timer_start(&timer);
vpx_codec_encode(&stream->encoder, img, frame_start,
- next_frame_start - frame_start,
+ (unsigned long)(next_frame_start - frame_start),
0, global->deadline);
vpx_usec_timer_mark(&timer);
stream->cx_time += vpx_usec_timer_elapsed(&timer);
@@ -2224,8 +2240,9 @@ static void get_cx_data(struct stream_state *stream,
{
stream->frames_out++;
}
- fprintf(stderr, " %6luF",
- (unsigned long)pkt->data.frame.sz);
+ if (!global->quiet)
+ fprintf(stderr, " %6luF",
+ (unsigned long)pkt->data.frame.sz);
update_rate_histogram(&stream->rate_hist, cfg, pkt);
if(stream->config.write_webm)
@@ -2233,7 +2250,8 @@ static void get_cx_data(struct stream_state *stream,
/* Update the hash */
if(!stream->ebml.debug)
stream->hash = murmur(pkt->data.frame.buf,
- pkt->data.frame.sz, stream->hash);
+ (int)pkt->data.frame.sz,
+ stream->hash);
write_webm_block(&stream->ebml, cfg, pkt);
}
@@ -2259,15 +2277,16 @@ static void get_cx_data(struct stream_state *stream,
}
}
- fwrite(pkt->data.frame.buf, 1,
- pkt->data.frame.sz, stream->file);
+ (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
+ stream->file);
}
stream->nbytes += pkt->data.raw.sz;
break;
case VPX_CODEC_STATS_PKT:
stream->frames_out++;
- fprintf(stderr, " %6luS",
- (unsigned long)pkt->data.twopass_stats.sz);
+ if (!global->quiet)
+ fprintf(stderr, " %6luS",
+ (unsigned long)pkt->data.twopass_stats.sz);
stats_write(&stream->stats,
pkt->data.twopass_stats.buf,
pkt->data.twopass_stats.sz);
@@ -2283,7 +2302,8 @@ static void get_cx_data(struct stream_state *stream,
stream->psnr_samples_total += pkt->data.psnr.samples[0];
for (i = 0; i < 4; i++)
{
- fprintf(stderr, "%.3lf ", pkt->data.psnr.psnr[i]);
+ if (!global->quiet)
+ fprintf(stderr, "%.3f ", pkt->data.psnr.psnr[i]);
stream->psnr_totals[i] += pkt->data.psnr.psnr[i];
}
stream->psnr_count++;
@@ -2306,13 +2326,13 @@ static void show_psnr(struct stream_state *stream)
return;
fprintf(stderr, "Stream %d PSNR (Overall/Avg/Y/U/V)", stream->index);
- ovpsnr = vp8_mse2psnr(stream->psnr_samples_total, 255.0,
- stream->psnr_sse_total);
- fprintf(stderr, " %.3lf", ovpsnr);
+ ovpsnr = vp8_mse2psnr((double)stream->psnr_samples_total, 255.0,
+ (double)stream->psnr_sse_total);
+ fprintf(stderr, " %.3f", ovpsnr);
for (i = 0; i < 4; i++)
{
- fprintf(stderr, " %.3lf", stream->psnr_totals[i]/stream->psnr_count);
+ fprintf(stderr, " %.3f", stream->psnr_totals[i]/stream->psnr_count);
}
fprintf(stderr, "\n");
}
@@ -2320,7 +2340,7 @@ static void show_psnr(struct stream_state *stream)
float usec_to_fps(uint64_t usec, unsigned int frames)
{
- return usec > 0 ? (float)frames * 1000000.0 / (float)usec : 0;
+ return (float)(usec > 0 ? frames * 1000000.0 / (float)usec : 0);
}
@@ -2437,7 +2457,7 @@ int main(int argc, const char **argv_)
vpx_img_alloc(&raw,
input.use_i420 ? VPX_IMG_FMT_I420
: VPX_IMG_FMT_YV12,
- input.w, input.h, 1);
+ input.w, input.h, 32);
FOREACH_STREAM(init_rate_histogram(&stream->rate_hist,
&stream->config.cfg,
@@ -2462,18 +2482,21 @@ int main(int argc, const char **argv_)
if (frame_avail)
frames_in++;
- if(stream_cnt == 1)
- fprintf(stderr,
- "\rPass %d/%d frame %4d/%-4d %7"PRId64"B \033[K",
- pass + 1, global.passes, frames_in,
- streams->frames_out, (int64_t)streams->nbytes);
- else
- fprintf(stderr,
- "\rPass %d/%d frame %4d %7lu %s (%.2f fps)\033[K",
- pass + 1, global.passes, frames_in,
- cx_time > 9999999 ? cx_time / 1000 : cx_time,
- cx_time > 9999999 ? "ms" : "us",
- usec_to_fps(cx_time, frames_in));
+ if (!global.quiet)
+ {
+ if(stream_cnt == 1)
+ fprintf(stderr,
+ "\rPass %d/%d frame %4d/%-4d %7"PRId64"B \033[K",
+ pass + 1, global.passes, frames_in,
+ streams->frames_out, (int64_t)streams->nbytes);
+ else
+ fprintf(stderr,
+ "\rPass %d/%d frame %4d %7lu %s (%.2f fps)\033[K",
+ pass + 1, global.passes, frames_in,
+ cx_time > 9999999 ? cx_time / 1000 : cx_time,
+ cx_time > 9999999 ? "ms" : "us",
+ usec_to_fps(cx_time, frames_in));
+ }
}
else
@@ -2484,7 +2507,7 @@ int main(int argc, const char **argv_)
frame_avail ? &raw : NULL,
frames_in));
vpx_usec_timer_mark(&timer);
- cx_time += vpx_usec_timer_elapsed(&timer);
+ cx_time += (unsigned long)vpx_usec_timer_elapsed(&timer);
FOREACH_STREAM(update_quantizer_histogram(stream));
@@ -2497,20 +2520,21 @@ int main(int argc, const char **argv_)
if(stream_cnt > 1)
fprintf(stderr, "\n");
- FOREACH_STREAM(fprintf(
- stderr,
- "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s"
- " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1,
- global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes,
- frames_in ? (unsigned long)(stream->nbytes * 8 / frames_in) : 0,
- frames_in ? (int64_t)stream->nbytes * 8
- * (int64_t)global.framerate.num / global.framerate.den
- / frames_in
- : 0,
- stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time,
- stream->cx_time > 9999999 ? "ms" : "us",
- usec_to_fps(stream->cx_time, frames_in));
- );
+ if (!global.quiet)
+ FOREACH_STREAM(fprintf(
+ stderr,
+ "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s"
+ " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1,
+ global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes,
+ frames_in ? (unsigned long)(stream->nbytes * 8 / frames_in) : 0,
+ frames_in ? (int64_t)stream->nbytes * 8
+ * (int64_t)global.framerate.num / global.framerate.den
+ / frames_in
+ : 0,
+ stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time,
+ stream->cx_time > 9999999 ? "ms" : "us",
+ usec_to_fps(stream->cx_time, frames_in));
+ );
if (global.show_psnr)
FOREACH_STREAM(show_psnr(stream));
diff --git a/y4minput.c b/y4minput.c
index dd51421..ff9ffbc 100644
--- a/y4minput.c
+++ b/y4minput.c
@@ -662,7 +662,7 @@ int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip){
_nskip--;
}
else{
- ret=fread(buffer+i,1,1,_fin);
+ ret=(int)fread(buffer+i,1,1,_fin);
if(ret<1)return -1;
}
if(buffer[i]=='\n')break;
@@ -818,7 +818,7 @@ int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *_img){
int c_sz;
int ret;
/*Read and skip the frame header.*/
- ret=fread(frame,1,6,_fin);
+ ret=(int)fread(frame,1,6,_fin);
if(ret<6)return 0;
if(memcmp(frame,"FRAME",5)){
fprintf(stderr,"Loss of framing in Y4M input data\n");