From 44502fcc3e45f23a8a6b7627ffaba0538f0197cd8876125cd3b65f7b636ea9a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ismail=20D=C3=B6nmez?= Date: Tue, 10 Dec 2013 19:38:49 +0000 Subject: [PATCH] Accepting request 210407 from home:sumski:branches:multimedia:libs Update to 1.3.0 OBS-URL: https://build.opensuse.org/request/show/210407 OBS-URL: https://build.opensuse.org/package/show/multimedia:libs/libvpx?expand=0&rev=47 --- libvpx-1.3.0.tar.xz | 3 + libvpx-armv7-use-hard-float.patch | 20 +- libvpx-configure-add-s390.patch | 22 +- libvpx-v1.1.0.tar.bz2 | 3 - libvpx.changes | 48 + libvpx.spec | 14 +- version_1.1.0_to_1.2.0.diff | 45439 ---------------------------- 7 files changed, 78 insertions(+), 45471 deletions(-) create mode 100644 libvpx-1.3.0.tar.xz delete mode 100644 libvpx-v1.1.0.tar.bz2 delete mode 100644 version_1.1.0_to_1.2.0.diff diff --git a/libvpx-1.3.0.tar.xz b/libvpx-1.3.0.tar.xz new file mode 100644 index 0000000..969a450 --- /dev/null +++ b/libvpx-1.3.0.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4087c5195c35ff1de6d9449919c35207822c1b5dc78a6684d88d64f93ec7c6a +size 1752356 diff --git a/libvpx-armv7-use-hard-float.patch b/libvpx-armv7-use-hard-float.patch index 8fb72a5..da417e1 100644 --- a/libvpx-armv7-use-hard-float.patch +++ b/libvpx-armv7-use-hard-float.patch @@ -1,13 +1,13 @@ -Index: build/make/configure.sh -=================================================================== ---- build/make/configure.sh.orig -+++ build/make/configure.sh -@@ -789,8 +789,8 @@ process_common_toolchain() { - check_add_asflags --defsym ARCHITECTURE=${arch_int} - tune_cflags="-mtune=" - if [ ${tgt_isa} == "armv7" ]; then -- check_add_cflags -march=armv7-a -mfloat-abi=softfp -- check_add_asflags -march=armv7-a -mfloat-abi=softfp +diff --git a/build/make/configure.sh b/build/make/configure.sh +index cf78764..3213d97 100755 +--- a/build/make/configure.sh ++++ b/build/make/configure.sh +@@ -820,8 +820,8 @@ process_common_toolchain() { + #endif + EOF + fi +- check_add_cflags -march=armv7-a -mfloat-abi=${float_abi} +- check_add_asflags -march=armv7-a -mfloat-abi=${float_abi} + check_add_cflags -march=armv7-a -mfloat-abi=hard + check_add_asflags -march=armv7-a -mfloat-abi=hard diff --git a/libvpx-configure-add-s390.patch b/libvpx-configure-add-s390.patch index ca07651..3ec702e 100644 --- a/libvpx-configure-add-s390.patch +++ b/libvpx-configure-add-s390.patch @@ -1,8 +1,8 @@ -Index: build/make/configure.sh -=================================================================== ---- build/make/configure.sh.orig -+++ build/make/configure.sh -@@ -614,6 +614,12 @@ process_common_toolchain() { +diff --git a/build/make/configure.sh b/build/make/configure.sh +index 8dcb9bb..cf78764 100755 +--- a/build/make/configure.sh ++++ b/build/make/configure.sh +@@ -627,6 +627,12 @@ process_common_toolchain() { *powerpc*) tgt_isa=ppc32 ;; @@ -15,16 +15,16 @@ Index: build/make/configure.sh *sparc*) tgt_isa=sparc ;; -Index: configure -=================================================================== ---- configure.orig -+++ configure -@@ -104,6 +104,8 @@ all_platforms="${all_platforms} ppc32-li +diff --git a/configure b/configure +index 4ff3fc7..c590057 100755 +--- a/configure ++++ b/configure +@@ -108,6 +108,8 @@ all_platforms="${all_platforms} ppc32-linux-gcc" all_platforms="${all_platforms} ppc64-darwin8-gcc" all_platforms="${all_platforms} ppc64-darwin9-gcc" all_platforms="${all_platforms} ppc64-linux-gcc" +all_platforms="${all_platforms} s390-linux-gcc" +all_platforms="${all_platforms} s390x-linux-gcc" all_platforms="${all_platforms} sparc-solaris-gcc" + all_platforms="${all_platforms} x86-android-gcc" all_platforms="${all_platforms} x86-darwin8-gcc" - all_platforms="${all_platforms} x86-darwin8-icc" diff --git a/libvpx-v1.1.0.tar.bz2 b/libvpx-v1.1.0.tar.bz2 deleted file mode 100644 index 42c2e70..0000000 --- a/libvpx-v1.1.0.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9ce074cf4b3bcd9a49ff93e05485b71c273bfc3685a305e55a0e7fa51beb72c5 -size 1653485 diff --git a/libvpx.changes b/libvpx.changes index 51d3aee..f14d91d 100644 --- a/libvpx.changes +++ b/libvpx.changes @@ -1,3 +1,51 @@ +------------------------------------------------------------------- +Tue Dec 10 16:26:06 UTC 2013 - hrvoje.senjan@gmail.com + +- Update to version 1.3.0 + + This release is ABI and API compatible with Duclair (v1.0.0). + + This release introduces the VP9 codec in a backward-compatible + way. All existing users of VP8 can continue to use the library + without modification. However, some VP8 options do not map to + VP9 in the same manner. + + Enhancements: + - Get rid of bashisms in the main build scripts + - Added usage info on command line options + - Add lossless compression mode + - Dll build of libvpx + - Add additional Mac OS X targets: 10.7, 10.8 and 10.9 + - Add option to disable documentation + - configure: add --enable-external-build support + - make: support V=1 as short form of verbose=yes + - configure: support mingw-w64 + - configure: support hardfloat armv7 CHOSTS + - configure: add support for android x86 + - Add estimated completion time to vpxenc + - Don't exit on decode errors in vpxenc + - vpxenc: support scaling prior to encoding + - vpxdec: support scaling output + - vpxenc: improve progress indicators with --skip + - msvs: Don't link to winmm.lib + - Add a new script for producing vcxproj files + - Produce Visual Studio 10 and 11 project files + - Produce Windows Phone project files + - msvs-build: use msbuild for vs >= 2005 + - configure: default configure log to config.log + - Add encoding option --static-thresh + + Speed: + - Miscellaneous speed optimizations for VP8 and VP9. + + Quality: + - In general, quality is consistent with the Eider release. + + Bug Fixes: + - This release represents approximately a year of engineering + effort, and contains multiple bug fixes. Please refer to git + history for details. +- Rebase libvpx-armv7-use-hard-float.patch and + libvpx-configure-add-s390.patch to this release +- Droped version_1.1.0_to_1.2.0.diff +- Added xz BuildRequires +- Remove Source URL, as the tarball is not available at that place +- Tarball is generated from v1.3.0 tag + ------------------------------------------------------------------- Mon Jan 28 17:07:51 UTC 2013 - adrian@suse.de diff --git a/libvpx.spec b/libvpx.spec index b6d5720..6ee54ab 100644 --- a/libvpx.spec +++ b/libvpx.spec @@ -17,15 +17,13 @@ Name: libvpx -Version: 1.2.0 +Version: 1.3.0 Release: 0 Summary: VP8 codec library License: BSD-3-Clause and GPL-2.0+ Group: Productivity/Multimedia/Other Url: http://www.webmproject.org/ -Source0: http://webm.googlecode.com/files/%{name}-v1.1.0.tar.bz2 -# The upstream project did not release a tar ball, just a git tag of version 1.2.0 :/ -Patch0: version_1.1.0_to_1.2.0.diff +Source0: %{name}-%{version}.tar.xz # PATCH-FIX-UPSTREAM libvpx-define-config_pic.patch dimstar@opensuse.org -- For older compilers, CONFIG_PIC need to be defined. Patch1: libvpx-define-config_pic.patch Patch2: libvpx-configure-add-s390.patch @@ -33,6 +31,7 @@ Patch3: libvpx-disable-cross-for-arm.patch Patch4: libvpx-armv7-use-hard-float.patch # Needed to be able to create pkgconfig() provides. BuildRequires: pkg-config +BuildRequires: xz BuildRequires: yasm BuildRoot: %{_tmppath}/%{name}-%{version}-build @@ -90,12 +89,11 @@ and audio streams compressed with the Vorbis audio codec. The WebM file structure is based on the Matroska container. %prep -%setup -q -n %name-v1.1.0 -%patch0 -p1 +%setup -q %patch1 -p1 -%patch2 +%patch2 -p1 %patch3 -%patch4 +%patch4 -p1 %build cd build diff --git a/version_1.1.0_to_1.2.0.diff b/version_1.1.0_to_1.2.0.diff deleted file mode 100644 index 1e40126..0000000 --- a/version_1.1.0_to_1.2.0.diff +++ /dev/null @@ -1,45439 +0,0 @@ -diff --git a/.gitignore b/.gitignore -index 110146d..4074b0b 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -32,6 +32,8 @@ - /ivfdec.dox - /ivfenc - /ivfenc.dox -+/libvpx.so* -+/libvpx.ver - /obj_int_extract - /postproc - /postproc.c -@@ -43,6 +45,7 @@ - /simple_encoder - /simple_encoder.c - /simple_encoder.dox -+/test_libvpx - /twopass_encoder - /twopass_encoder.c - /twopass_encoder.dox -@@ -55,7 +58,14 @@ - /vp8cx_set_ref - /vp8cx_set_ref.c - /vp8cx_set_ref.dox -+/vpx.pc - /vpx_config.c - /vpx_config.h -+/vpx_rtcd.h - /vpx_version.h -+/vpxdec -+/vpxenc - TAGS -+.cproject -+.project -+.settings -diff --git a/CHANGELOG b/CHANGELOG -index dcb9f73..ef64a96 100644 ---- a/CHANGELOG -+++ b/CHANGELOG -@@ -1,3 +1,32 @@ -+2012-12-21 v1.2.0 -+ This release acts as a checkpoint for a large amount of internal refactoring -+ and testing. It also contains a number of small bugfixes, so all users are -+ encouraged to upgrade. -+ -+ - Upgrading: -+ This release is ABI and API compatible with Duclair (v1.0.0). Users -+ of older releases should refer to the Upgrading notes in this -+ document for that release. -+ -+ - Enhancements: -+ VP8 optimizations for MIPS dspr2 -+ vpxenc: add -quiet option -+ -+ - Speed: -+ Encoder and decoder speed is consistent with the Eider release. -+ -+ - Quality: -+ In general, quality is consistent with the Eider release. -+ -+ Minor tweaks to ARNR filtering -+ Minor improvements to real time encoding with multiple temporal layers -+ -+ - Bug Fixes: -+ Fixes multithreaded encoder race condition in loopfilter -+ Fixes multi-resolution threaded encoding -+ Fix potential encoder dead-lock after picture resize -+ -+ - 2012-05-09 v1.1.0 "Eider" - This introduces a number of enhancements, mostly focused on real-time - encoding. In addition, it fixes a decoder bug (first introduced in -diff --git a/README b/README -index 0dfb0fe..0475dad 100644 ---- a/README -+++ b/README -@@ -1,5 +1,5 @@ - vpx Multi-Format Codec SDK --README - 19 May 2010 -+README - 21 June 2012 - - Welcome to the WebM VP8 Codec SDK! - -@@ -15,11 +15,19 @@ COMPILING THE APPLICATIONS/LIBRARIES: - * Building the documentation requires PHP[3] and Doxygen[4]. If you do not - have these packages, you must pass --disable-install-docs to the - configure script. -+ * Downloading the data for the unit tests requires curl[5] and sha1sum. -+ sha1sum is provided via the GNU coreutils, installed by default on -+ many *nix platforms, as well as MinGW and Cygwin. If coreutils is not -+ available, a compatible version of sha1sum can be built from -+ source[6]. These requirements are optional if not running the unit -+ tests. - - [1]: http://www.tortall.net/projects/yasm - [2]: http://www.cygwin.com - [3]: http://php.net - [4]: http://www.doxygen.org -+ [5]: http://curl.haxx.se -+ [6]: http://www.microbrew.org/tools/md5sha1sum/ - - 2. Out-of-tree builds - Out of tree builds are a supported method of building the application. For -@@ -94,5 +102,5 @@ COMPILING THE APPLICATIONS/LIBRARIES: - - SUPPORT - This library is an open source project supported by its community. Please -- please email webm-users@webmproject.org for help. -+ please email webm-discuss@webmproject.org for help. - -diff --git a/build/make/Android.mk b/build/make/Android.mk -index 6fcd4ae..c6b9cf9 100644 ---- a/build/make/Android.mk -+++ b/build/make/Android.mk -@@ -27,15 +27,22 @@ - # Android.mk file in the libvpx directory: - # LOCAL_PATH := $(call my-dir) - # include $(CLEAR_VARS) --# include libvpx/build/make/Android.mk -+# include jni/libvpx/build/make/Android.mk - # - # There are currently two TARGET_ARCH_ABI targets for ARM. - # armeabi and armeabi-v7a. armeabi-v7a is selected by creating an - # Application.mk in the jni directory that contains: - # APP_ABI := armeabi-v7a - # -+# By default libvpx will detect at runtime the existance of NEON extension. -+# For this we import the 'cpufeatures' module from the NDK sources. -+# libvpx can also be configured without this runtime detection method. -+# Configuring with --disable-runtime-cpu-detect will assume presence of NEON. -+# Configuring with --disable-runtime-cpu-detect --disable-neon will remove any -+# NEON dependency. -+ - # To change to building armeabi, run ./libvpx/configure again, but with --# --target=arm5te-android-gcc and and modify the Application.mk file to -+# --target=arm5te-android-gcc and modify the Application.mk file to - # set APP_ABI := armeabi - # - # Running ndk-build will build libvpx and include it in your project. -@@ -166,7 +173,9 @@ LOCAL_MODULE := libvpx - - LOCAL_LDLIBS := -llog - --LOCAL_STATIC_LIBRARIES := cpufeatures -+ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes) -+ LOCAL_STATIC_LIBRARIES := cpufeatures -+endif - - $(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_rtcd.h - -@@ -196,4 +205,7 @@ ifeq ($(CONFIG_VP8_ENCODER), yes) - $(LIBVPX_PATH)/vp8/encoder/asm_enc_offsets.c)) - endif - -+ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes) - $(call import-module,cpufeatures) -+endif -+ -diff --git a/build/make/Makefile b/build/make/Makefile -index b6cf320..1088c84 100644 ---- a/build/make/Makefile -+++ b/build/make/Makefile -@@ -21,6 +21,7 @@ all: .DEFAULT - clean:: .DEFAULT - install:: .DEFAULT - test:: .DEFAULT -+testdata:: .DEFAULT - - - # Note: md5sum is not installed on OS X, but openssl is. Openssl may not be -@@ -66,6 +67,7 @@ endif - BUILD_ROOT?=. - VPATH=$(SRC_PATH_BARE) - CFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH) -+CXXFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH) - ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT)/ -I$(SRC_PATH)/ - DIST_DIR?=dist - HOSTCC?=gcc -@@ -98,6 +100,8 @@ dist: - install:: - .PHONY: test - test:: -+.PHONY: testdata -+testdata:: - - $(BUILD_PFX)%.c.d: %.c - $(if $(quiet),@echo " [DEP] $@") -@@ -111,11 +115,11 @@ $(BUILD_PFX)%.c.o: %.c - $(BUILD_PFX)%.cc.d: %.cc - $(if $(quiet),@echo " [DEP] $@") - $(qexec)mkdir -p $(dir $@) -- $(qexec)g++ $(INTERNAL_CFLAGS) $(CFLAGS) -M $< | $(fmt_deps) > $@ -+ $(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -M $< | $(fmt_deps) > $@ - - $(BUILD_PFX)%.cc.o: %.cc - $(if $(quiet),@echo " [CXX] $@") -- $(qexec)g++ $(INTERNAL_CFLAGS) $(CFLAGS) -c -o $@ $< -+ $(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $< - - $(BUILD_PFX)%.asm.d: %.asm - $(if $(quiet),@echo " [DEP] $@") -@@ -213,7 +217,7 @@ define linkerxx_template - $(1): $(filter-out -%,$(2)) - $(1): - $(if $(quiet),@echo " [LD] $$@") -- $(qexec)g++ $$(strip $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -o $$@ $(2) $(3) $$(extralibs)) -+ $(qexec)$$(CXX) $$(strip $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -o $$@ $(2) $(3) $$(extralibs)) - endef - # make-3.80 has a bug with expanding large input strings to the eval function, - # which was triggered in some cases by the following component of -diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl -index c55ed0f..95be467 100755 ---- a/build/make/ads2gas.pl -+++ b/build/make/ads2gas.pl -@@ -26,12 +26,22 @@ print "\t.equ DO1STROUNDING, 0\n"; - - while () - { -+ undef $comment; -+ undef $line; -+ $comment_char = ";"; -+ $comment_sub = "@"; -+ -+ # Handle comments. -+ if (/$comment_char/) -+ { -+ $comment = ""; -+ ($line, $comment) = /(.*?)$comment_char(.*)/; -+ $_ = $line; -+ } -+ - # Load and store alignment - s/@/,:/g; - -- # Comment character -- s/;/@/g; -- - # Hexadecimal constants prefaced by 0x - s/#&/#0x/g; - -@@ -51,16 +61,27 @@ while () - s/:SHR:/ >> /g; - - # Convert ELSE to .else -- s/ELSE/.else/g; -+ s/\bELSE\b/.else/g; - - # Convert ENDIF to .endif -- s/ENDIF/.endif/g; -+ s/\bENDIF\b/.endif/g; - - # Convert ELSEIF to .elseif -- s/ELSEIF/.elseif/g; -+ s/\bELSEIF\b/.elseif/g; - - # Convert LTORG to .ltorg -- s/LTORG/.ltorg/g; -+ s/\bLTORG\b/.ltorg/g; -+ -+ # Convert endfunc to nothing. -+ s/\bendfunc\b//ig; -+ -+ # Convert FUNCTION to nothing. -+ s/\bFUNCTION\b//g; -+ s/\bfunction\b//g; -+ -+ s/\bENTRY\b//g; -+ s/\bMSARMASM\b/0/g; -+ s/^\s+end\s+$//g; - - # Convert IF :DEF:to .if - # gcc doesn't have the ability to do a conditional -@@ -106,6 +127,7 @@ while () - if (s/RN\s+([Rr]\d+|lr)/.req $1/) - { - print; -+ print "$comment_sub$comment\n" if defined $comment; - next; - } - -@@ -114,6 +136,9 @@ while () - s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/; - s/IMPORT\s+\|([\$\w]*)\|/.global $1/; - -+ s/EXPORT\s+([\$\w]*)/.global $1/; -+ s/export\s+([\$\w]*)/.global $1/; -+ - # No vertical bars required; make additional symbol with prepended - # underscore - s/^\|(\$?\w+)\|/_$1\n\t$1:/g; -@@ -124,11 +149,19 @@ while () - s/^([a-zA-Z_0-9\$]+)/$1:/ if !/EQU/; - - # ALIGN directive -- s/ALIGN/.balign/g; -+ s/\bALIGN\b/.balign/g; - - # ARM code - s/\sARM/.arm/g; - -+ # push/pop -+ s/(push\s+)(r\d+)/stmdb sp\!, \{$2\}/g; -+ s/(pop\s+)(r\d+)/ldmia sp\!, \{$2\}/g; -+ -+ # NEON code -+ s/(vld1.\d+\s+)(q\d+)/$1\{$2\}/g; -+ s/(vtbl.\d+\s+[^,]+),([^,]+)/$1,\{$2\}/g; -+ - # eabi_attributes numerical equivalents can be found in the - # "ARM IHI 0045C" document. - -@@ -157,10 +190,10 @@ while () - } - - # EQU directive -- s/(.*)EQU(.*)/.equ $1, $2/; -+ s/(\S+\s+)EQU(\s+\S+)/.equ $1, $2/; - - # Begin macro definition -- if (/MACRO/) { -+ if (/\bMACRO\b/) { - $_ = ; - s/^/.macro/; - s/\$//g; # remove formal param reference -@@ -169,9 +202,10 @@ while () - - # For macros, use \ to reference formal params - s/\$/\\/g; # End macro definition -- s/MEND/.endm/; # No need to tell it where to stop assembling -+ s/\bMEND\b/.endm/; # No need to tell it where to stop assembling - next if /^\s*END\s*$/; - print; -+ print "$comment_sub$comment\n" if defined $comment; - } - - # Mark that this object doesn't need an executable stack. -diff --git a/build/make/configure.sh b/build/make/configure.sh -index 3c772e5..c99a01c 100755 ---- a/build/make/configure.sh -+++ b/build/make/configure.sh -@@ -166,6 +166,17 @@ is_in(){ - - add_cflags() { - CFLAGS="${CFLAGS} $@" -+ CXXFLAGS="${CXXFLAGS} $@" -+} -+ -+ -+add_cflags_only() { -+ CFLAGS="${CFLAGS} $@" -+} -+ -+ -+add_cxxflags_only() { -+ CXXFLAGS="${CXXFLAGS} $@" - } - - -@@ -277,6 +288,13 @@ check_cc() { - check_cmd ${CC} ${CFLAGS} "$@" -c -o ${TMP_O} ${TMP_C} - } - -+check_cxx() { -+ log check_cxx "$@" -+ cat >${TMP_C} -+ log_file ${TMP_C} -+ check_cmd ${CXX} ${CXXFLAGS} "$@" -c -o ${TMP_O} ${TMP_C} -+} -+ - check_cpp() { - log check_cpp "$@" - cat > ${TMP_C} -@@ -310,8 +328,25 @@ int x; - EOF - } - -+check_cxxflags() { -+ log check_cxxflags "$@" -+ -+ # Catch CFLAGS that trigger CXX warnings -+ case "$CXX" in -+ *g++*) check_cxx -Werror "$@" <> $1 << EOF -@@ -379,6 +416,7 @@ TOOLCHAIN=${toolchain} - ASM_CONVERSION=${asm_conversion_cmd:-${source_path}/build/make/ads2gas.pl} - - CC=${CC} -+CXX=${CXX} - AR=${AR} - LD=${LD} - AS=${AS} -@@ -386,6 +424,7 @@ STRIP=${STRIP} - NM=${NM} - - CFLAGS = ${CFLAGS} -+CXXFLAGS = ${CXXFLAGS} - ARFLAGS = -rus\$(if \$(quiet),c,v) - LDFLAGS = ${LDFLAGS} - ASFLAGS = ${ASFLAGS} -@@ -538,6 +577,7 @@ post_process_cmdline() { - - setup_gnu_toolchain() { - CC=${CC:-${CROSS}gcc} -+ CXX=${CXX:-${CROSS}g++} - AR=${AR:-${CROSS}ar} - LD=${LD:-${CROSS}${link_with_cc:-ld}} - AS=${AS:-${CROSS}as} -@@ -549,10 +589,19 @@ setup_gnu_toolchain() { - - process_common_toolchain() { - if [ -z "$toolchain" ]; then -- gcctarget="$(gcc -dumpmachine 2> /dev/null)" -+ gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}" - - # detect tgt_isa - case "$gcctarget" in -+ armv6*) -+ tgt_isa=armv6 -+ ;; -+ armv7*) -+ tgt_isa=armv7 -+ ;; -+ armv5te*) -+ tgt_isa=armv5te -+ ;; - *x86_64*|*amd64*) - tgt_isa=x86_64 - ;; -@@ -718,6 +767,7 @@ process_common_toolchain() { - ;; - armv5te) - soft_enable edsp -+ disable fast_unaligned - ;; - esac - -@@ -733,17 +783,23 @@ process_common_toolchain() { - check_add_asflags --defsym ARCHITECTURE=${arch_int} - tune_cflags="-mtune=" - if [ ${tgt_isa} == "armv7" ]; then -+ check_add_cflags -march=armv7-a -mfloat-abi=softfp -+ check_add_asflags -march=armv7-a -mfloat-abi=softfp -+ - if enabled neon - then - check_add_cflags -mfpu=neon #-ftree-vectorize - check_add_asflags -mfpu=neon - fi -- check_add_cflags -march=armv7-a -mcpu=cortex-a8 -mfloat-abi=softfp -- check_add_asflags -mcpu=cortex-a8 -mfloat-abi=softfp #-march=armv7-a -+ -+ if [ -z "${tune_cpu}" ]; then -+ tune_cpu=cortex-a8 -+ fi - else - check_add_cflags -march=${tgt_isa} - check_add_asflags -march=${tgt_isa} - fi -+ - enabled debug && add_asflags -g - asm_conversion_cmd="${source_path}/build/make/ads2gas.pl" - ;; -@@ -792,6 +848,7 @@ process_common_toolchain() { - -name "arm-linux-androideabi-gcc*" -print -quit` - TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi- - CC=${TOOLCHAIN_PATH}gcc -+ CXX=${TOOLCHAIN_PATH}g++ - AR=${TOOLCHAIN_PATH}ar - LD=${TOOLCHAIN_PATH}gcc - AS=${TOOLCHAIN_PATH}as -@@ -810,12 +867,17 @@ process_common_toolchain() { - add_cflags "--sysroot=${alt_libc}" - add_ldflags "--sysroot=${alt_libc}" - -- add_cflags "-I${SDK_PATH}/sources/android/cpufeatures/" -+ # linker flag that routes around a CPU bug in some -+ # Cortex-A8 implementations (NDK Dev Guide) -+ add_ldflags "-Wl,--fix-cortex-a8" - - enable pic - soft_enable realtime_only - if [ ${tgt_isa} == "armv7" ]; then -- enable runtime_cpu_detect -+ soft_enable runtime_cpu_detect -+ fi -+ if enabled runtime_cpu_detect; then -+ add_cflags "-I${SDK_PATH}/sources/android/cpufeatures" - fi - ;; - -@@ -827,6 +889,7 @@ process_common_toolchain() { - SDK_PATH=${sdk_path} - fi - TOOLCHAIN_PATH=${SDK_PATH}/usr/bin -+ CXX=${TOOLCHAIN_PATH}/g++ - CC=${TOOLCHAIN_PATH}/gcc - AR=${TOOLCHAIN_PATH}/ar - LD=${TOOLCHAIN_PATH}/arm-apple-darwin10-llvm-gcc-4.2 -@@ -890,13 +953,16 @@ process_common_toolchain() { - esac - ;; - mips*) -- CROSS=${CROSS:-mipsel-linux-uclibc-} - link_with_cc=gcc - setup_gnu_toolchain - tune_cflags="-mtune=" -+ if enabled dspr2; then -+ check_add_cflags -mips32r2 -mdspr2 -+ disable fast_unaligned -+ fi - check_add_cflags -march=${tgt_isa} -- check_add_asflags -march=${tgt_isa} -- check_add_asflags -KPIC -+ check_add_asflags -march=${tgt_isa} -+ check_add_asflags -KPIC - ;; - ppc*) - enable ppc -@@ -924,6 +990,11 @@ process_common_toolchain() { - x86*) - bits=32 - enabled x86_64 && bits=64 -+ check_cpp </dev/null 2>&1 && AS=yasm - [ "${AS}" = auto -o -z "${AS}" ] \ - && die "Neither yasm nor nasm have been found" -- ;; -+ ;; - esac - log_echo " using $AS" - [ "${AS##*/}" = nasm ] && add_asflags -Ox -@@ -1065,7 +1143,7 @@ process_common_toolchain() { - - # Work around longjmp interception on glibc >= 2.11, to improve binary - # compatibility. See http://code.google.com/p/webm/issues/detail?id=166 -- enabled linux && check_add_cflags -D_FORTIFY_SOURCE=0 -+ enabled linux && check_add_cflags -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 - - # Check for strip utility variant - ${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable gnu_strip -@@ -1080,12 +1158,24 @@ EOF - # Almost every platform uses pthreads. - if enabled multithread; then - case ${toolchain} in -- *-win*);; -+ *-win*-vs*);; - *-android-gcc);; - *) check_header pthread.h && add_extralibs -lpthread - esac - fi - -+ # only for MIPS platforms -+ case ${toolchain} in -+ mips*) -+ if enabled dspr2; then -+ if enabled big_endian; then -+ echo "dspr2 optimizations are available only for little endian platforms" -+ disable dspr2 -+ fi -+ fi -+ ;; -+ esac -+ - # for sysconf(3) and friends. - check_header unistd.h - -diff --git a/build/make/gen_asm_deps.sh b/build/make/gen_asm_deps.sh -index 717f870..0b4e3aa 100755 ---- a/build/make/gen_asm_deps.sh -+++ b/build/make/gen_asm_deps.sh -@@ -42,7 +42,7 @@ done - - [ -n "$srcfile" ] || show_help - sfx=${sfx:-asm} --includes=$(LC_ALL=C egrep -i "include +\"?+[a-z0-9_/]+\.${sfx}" $srcfile | -+includes=$(LC_ALL=C egrep -i "include +\"?[a-z0-9_/]+\.${sfx}" $srcfile | - perl -p -e "s;.*?([a-z0-9_/]+.${sfx}).*;\1;") - #" restore editor state - for inc in ${includes}; do -diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c -index 04e14a6..bf317bd 100644 ---- a/build/make/obj_int_extract.c -+++ b/build/make/obj_int_extract.c -@@ -680,7 +680,7 @@ int parse_coff(uint8_t *buf, size_t sz) - uint32_t symoffset; - - char **sectionlist; //this array holds all section names in their correct order. -- //it is used to check if the symbol is in .bss or .data section. -+ //it is used to check if the symbol is in .bss or .rdata section. - - nsections = get_le16(buf + 2); - symtab_ptr = get_le32(buf + 8); -@@ -725,15 +725,15 @@ int parse_coff(uint8_t *buf, size_t sz) - } - strcpy(sectionlist[i], sectionname); - -- if (!strcmp(sectionname, ".data")) sectionrawdata_ptr = get_le32(ptr + 20); -+ if (!strcmp(sectionname, ".rdata")) sectionrawdata_ptr = get_le32(ptr + 20); - - ptr += 40; - } - - //log_msg("COFF: Symbol table at offset %u\n", symtab_ptr); -- //log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr); -+ //log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr); - -- /* The compiler puts the data with non-zero offset in .data section, but puts the data with -+ /* The compiler puts the data with non-zero offset in .rdata section, but puts the data with - zero offset in .bss section. So, if the data in in .bss section, set offset=0. - Note from Wiki: In an object module compiled from C, the bss section contains - the local variables (but not functions) that were declared with the static keyword, -diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh -index 1dffde5..ddf9e09 100755 ---- a/build/make/rtcd.sh -+++ b/build/make/rtcd.sh -@@ -211,6 +211,8 @@ common_top() { - $(process_forward_decls) - - $(declare_function_pointers c $ALL_ARCHS) -+ -+void ${symbol:-rtcd}(void); - EOF - } - -@@ -231,11 +233,10 @@ x86() { - - cat <planes[plane]; - - for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) { -- if(fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w), -- outfile)); -+ (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w), -+ outfile); - buf += img->stride[plane]; - } - } -diff --git a/examples/encoder_tmpl.c b/examples/encoder_tmpl.c -index cc70b00..e2b65ec 100644 ---- a/examples/encoder_tmpl.c -+++ b/examples/encoder_tmpl.c -@@ -85,7 +85,7 @@ static void write_ivf_file_header(FILE *outfile, - mem_put_le32(header+24, frame_cnt); /* length */ - mem_put_le32(header+28, 0); /* unused */ - -- if(fwrite(header, 1, 32, outfile)); -+ (void) fwrite(header, 1, 32, outfile); - } - - -@@ -103,7 +103,7 @@ static void write_ivf_frame_header(FILE *outfile, - mem_put_le32(header+4, pts&0xFFFFFFFF); - mem_put_le32(header+8, pts >> 32); - -- if(fwrite(header, 1, 12, outfile)); -+ (void) fwrite(header, 1, 12, outfile); - } - - int main(int argc, char **argv) { -diff --git a/examples/encoder_tmpl.txt b/examples/encoder_tmpl.txt -index 0042071..1afbd8b 100644 ---- a/examples/encoder_tmpl.txt -+++ b/examples/encoder_tmpl.txt -@@ -61,13 +61,14 @@ if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt, - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME - case VPX_CODEC_CX_FRAME_PKT: - write_ivf_frame_header(outfile, pkt); -- if(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, -- outfile)); -+ (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, -+ outfile); - break; - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY -+vpx_img_free(&raw); - if(vpx_codec_destroy(&codec)) - die_codec(&codec, "Failed to destroy codec"); - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY -diff --git a/examples/twopass_encoder.txt b/examples/twopass_encoder.txt -index 4683bc7..2f81a90 100644 ---- a/examples/twopass_encoder.txt -+++ b/examples/twopass_encoder.txt -@@ -71,5 +71,17 @@ Pass Progress Reporting - It's sometimes helpful to see when each pass completes. - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_LOOP_END - printf("Pass %d complete.\n", pass+1); -+ if(vpx_codec_destroy(&codec)) -+ die_codec(&codec, "Failed to destroy codec"); - } - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_LOOP_END -+ -+ -+Clean-up -+----------------------------- -+Destruction of the encoder instance must be done on each pass. The -+raw image should be destroyed at the end as usual. -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY -+vpx_img_free(&raw); -+free(stats.buf); -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY -diff --git a/libmkv/EbmlIDs.h b/libmkv/EbmlIDs.h -index 3418e36..e3ce585 100644 ---- a/libmkv/EbmlIDs.h -+++ b/libmkv/EbmlIDs.h -@@ -1,16 +1,16 @@ --// Copyright (c) 2010 The WebM project authors. All Rights Reserved. --// --// Use of this source code is governed by a BSD-style license --// that can be found in the LICENSE file in the root of the source --// tree. An additional intellectual property rights grant can be found --// in the file PATENTS. All contributing project authors may --// be found in the AUTHORS file in the root of the source tree. -- -- -+/* -+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ - #ifndef MKV_DEFS_HPP - #define MKV_DEFS_HPP 1 - --//Commenting out values not available in webm, but available in matroska -+/* Commenting out values not available in webm, but available in matroska */ - - enum mkv - { -@@ -22,7 +22,7 @@ enum mkv - DocType = 0x4282, - DocTypeVersion = 0x4287, - DocTypeReadVersion = 0x4285, --// CRC_32 = 0xBF, -+/* CRC_32 = 0xBF, */ - Void = 0xEC, - SignatureSlot = 0x1B538667, - SignatureAlgo = 0x7E8A, -@@ -32,61 +32,61 @@ enum mkv - SignatureElements = 0x7E5B, - SignatureElementList = 0x7E7B, - SignedElement = 0x6532, -- //segment -+ /* segment */ - Segment = 0x18538067, -- //Meta Seek Information -+ /* Meta Seek Information */ - SeekHead = 0x114D9B74, - Seek = 0x4DBB, - SeekID = 0x53AB, - SeekPosition = 0x53AC, -- //Segment Information -+ /* Segment Information */ - Info = 0x1549A966, --// SegmentUID = 0x73A4, --// SegmentFilename = 0x7384, --// PrevUID = 0x3CB923, --// PrevFilename = 0x3C83AB, --// NextUID = 0x3EB923, --// NextFilename = 0x3E83BB, --// SegmentFamily = 0x4444, --// ChapterTranslate = 0x6924, --// ChapterTranslateEditionUID = 0x69FC, --// ChapterTranslateCodec = 0x69BF, --// ChapterTranslateID = 0x69A5, -+/* SegmentUID = 0x73A4, */ -+/* SegmentFilename = 0x7384, */ -+/* PrevUID = 0x3CB923, */ -+/* PrevFilename = 0x3C83AB, */ -+/* NextUID = 0x3EB923, */ -+/* NextFilename = 0x3E83BB, */ -+/* SegmentFamily = 0x4444, */ -+/* ChapterTranslate = 0x6924, */ -+/* ChapterTranslateEditionUID = 0x69FC, */ -+/* ChapterTranslateCodec = 0x69BF, */ -+/* ChapterTranslateID = 0x69A5, */ - TimecodeScale = 0x2AD7B1, - Segment_Duration = 0x4489, - DateUTC = 0x4461, --// Title = 0x7BA9, -+/* Title = 0x7BA9, */ - MuxingApp = 0x4D80, - WritingApp = 0x5741, -- //Cluster -+ /* Cluster */ - Cluster = 0x1F43B675, - Timecode = 0xE7, --// SilentTracks = 0x5854, --// SilentTrackNumber = 0x58D7, --// Position = 0xA7, -+/* SilentTracks = 0x5854, */ -+/* SilentTrackNumber = 0x58D7, */ -+/* Position = 0xA7, */ - PrevSize = 0xAB, - BlockGroup = 0xA0, - Block = 0xA1, --// BlockVirtual = 0xA2, --// BlockAdditions = 0x75A1, --// BlockMore = 0xA6, --// BlockAddID = 0xEE, --// BlockAdditional = 0xA5, -+/* BlockVirtual = 0xA2, */ -+/* BlockAdditions = 0x75A1, */ -+/* BlockMore = 0xA6, */ -+/* BlockAddID = 0xEE, */ -+/* BlockAdditional = 0xA5, */ - BlockDuration = 0x9B, --// ReferencePriority = 0xFA, -+/* ReferencePriority = 0xFA, */ - ReferenceBlock = 0xFB, --// ReferenceVirtual = 0xFD, --// CodecState = 0xA4, --// Slices = 0x8E, --// TimeSlice = 0xE8, -+/* ReferenceVirtual = 0xFD, */ -+/* CodecState = 0xA4, */ -+/* Slices = 0x8E, */ -+/* TimeSlice = 0xE8, */ - LaceNumber = 0xCC, --// FrameNumber = 0xCD, --// BlockAdditionID = 0xCB, --// MkvDelay = 0xCE, --// Cluster_Duration = 0xCF, -+/* FrameNumber = 0xCD, */ -+/* BlockAdditionID = 0xCB, */ -+/* MkvDelay = 0xCE, */ -+/* Cluster_Duration = 0xCF, */ - SimpleBlock = 0xA3, --// EncryptedBlock = 0xAF, -- //Track -+/* EncryptedBlock = 0xAF, */ -+ /* Track */ - Tracks = 0x1654AE6B, - TrackEntry = 0xAE, - TrackNumber = 0xD7, -@@ -96,28 +96,28 @@ enum mkv - FlagDefault = 0x88, - FlagForced = 0x55AA, - FlagLacing = 0x9C, --// MinCache = 0x6DE7, --// MaxCache = 0x6DF8, -+/* MinCache = 0x6DE7, */ -+/* MaxCache = 0x6DF8, */ - DefaultDuration = 0x23E383, --// TrackTimecodeScale = 0x23314F, --// TrackOffset = 0x537F, --// MaxBlockAdditionID = 0x55EE, -+/* TrackTimecodeScale = 0x23314F, */ -+/* TrackOffset = 0x537F, */ -+/* MaxBlockAdditionID = 0x55EE, */ - Name = 0x536E, - Language = 0x22B59C, - CodecID = 0x86, - CodecPrivate = 0x63A2, - CodecName = 0x258688, --// AttachmentLink = 0x7446, --// CodecSettings = 0x3A9697, --// CodecInfoURL = 0x3B4040, --// CodecDownloadURL = 0x26B240, --// CodecDecodeAll = 0xAA, --// TrackOverlay = 0x6FAB, --// TrackTranslate = 0x6624, --// TrackTranslateEditionUID = 0x66FC, --// TrackTranslateCodec = 0x66BF, --// TrackTranslateTrackID = 0x66A5, -- //video -+/* AttachmentLink = 0x7446, */ -+/* CodecSettings = 0x3A9697, */ -+/* CodecInfoURL = 0x3B4040, */ -+/* CodecDownloadURL = 0x26B240, */ -+/* CodecDecodeAll = 0xAA, */ -+/* TrackOverlay = 0x6FAB, */ -+/* TrackTranslate = 0x6624, */ -+/* TrackTranslateEditionUID = 0x66FC, */ -+/* TrackTranslateCodec = 0x66BF, */ -+/* TrackTranslateTrackID = 0x66A5, */ -+ /* video */ - Video = 0xE0, - FlagInterlaced = 0x9A, - StereoMode = 0x53B8, -@@ -131,101 +131,101 @@ enum mkv - DisplayHeight = 0x54BA, - DisplayUnit = 0x54B2, - AspectRatioType = 0x54B3, --// ColourSpace = 0x2EB524, --// GammaValue = 0x2FB523, -+/* ColourSpace = 0x2EB524, */ -+/* GammaValue = 0x2FB523, */ - FrameRate = 0x2383E3, -- //end video -- //audio -+ /* end video */ -+ /* audio */ - Audio = 0xE1, - SamplingFrequency = 0xB5, - OutputSamplingFrequency = 0x78B5, - Channels = 0x9F, --// ChannelPositions = 0x7D7B, -+/* ChannelPositions = 0x7D7B, */ - BitDepth = 0x6264, -- //end audio -- //content encoding --// ContentEncodings = 0x6d80, --// ContentEncoding = 0x6240, --// ContentEncodingOrder = 0x5031, --// ContentEncodingScope = 0x5032, --// ContentEncodingType = 0x5033, --// ContentCompression = 0x5034, --// ContentCompAlgo = 0x4254, --// ContentCompSettings = 0x4255, --// ContentEncryption = 0x5035, --// ContentEncAlgo = 0x47e1, --// ContentEncKeyID = 0x47e2, --// ContentSignature = 0x47e3, --// ContentSigKeyID = 0x47e4, --// ContentSigAlgo = 0x47e5, --// ContentSigHashAlgo = 0x47e6, -- //end content encoding -- //Cueing Data -+ /* end audio */ -+ /* content encoding */ -+/* ContentEncodings = 0x6d80, */ -+/* ContentEncoding = 0x6240, */ -+/* ContentEncodingOrder = 0x5031, */ -+/* ContentEncodingScope = 0x5032, */ -+/* ContentEncodingType = 0x5033, */ -+/* ContentCompression = 0x5034, */ -+/* ContentCompAlgo = 0x4254, */ -+/* ContentCompSettings = 0x4255, */ -+/* ContentEncryption = 0x5035, */ -+/* ContentEncAlgo = 0x47e1, */ -+/* ContentEncKeyID = 0x47e2, */ -+/* ContentSignature = 0x47e3, */ -+/* ContentSigKeyID = 0x47e4, */ -+/* ContentSigAlgo = 0x47e5, */ -+/* ContentSigHashAlgo = 0x47e6, */ -+ /* end content encoding */ -+ /* Cueing Data */ - Cues = 0x1C53BB6B, - CuePoint = 0xBB, - CueTime = 0xB3, - CueTrackPositions = 0xB7, - CueTrack = 0xF7, - CueClusterPosition = 0xF1, -- CueBlockNumber = 0x5378, --// CueCodecState = 0xEA, --// CueReference = 0xDB, --// CueRefTime = 0x96, --// CueRefCluster = 0x97, --// CueRefNumber = 0x535F, --// CueRefCodecState = 0xEB, -- //Attachment --// Attachments = 0x1941A469, --// AttachedFile = 0x61A7, --// FileDescription = 0x467E, --// FileName = 0x466E, --// FileMimeType = 0x4660, --// FileData = 0x465C, --// FileUID = 0x46AE, --// FileReferral = 0x4675, -- //Chapters --// Chapters = 0x1043A770, --// EditionEntry = 0x45B9, --// EditionUID = 0x45BC, --// EditionFlagHidden = 0x45BD, --// EditionFlagDefault = 0x45DB, --// EditionFlagOrdered = 0x45DD, --// ChapterAtom = 0xB6, --// ChapterUID = 0x73C4, --// ChapterTimeStart = 0x91, --// ChapterTimeEnd = 0x92, --// ChapterFlagHidden = 0x98, --// ChapterFlagEnabled = 0x4598, --// ChapterSegmentUID = 0x6E67, --// ChapterSegmentEditionUID = 0x6EBC, --// ChapterPhysicalEquiv = 0x63C3, --// ChapterTrack = 0x8F, --// ChapterTrackNumber = 0x89, --// ChapterDisplay = 0x80, --// ChapString = 0x85, --// ChapLanguage = 0x437C, --// ChapCountry = 0x437E, --// ChapProcess = 0x6944, --// ChapProcessCodecID = 0x6955, --// ChapProcessPrivate = 0x450D, --// ChapProcessCommand = 0x6911, --// ChapProcessTime = 0x6922, --// ChapProcessData = 0x6933, -- //Tagging --// Tags = 0x1254C367, --// Tag = 0x7373, --// Targets = 0x63C0, --// TargetTypeValue = 0x68CA, --// TargetType = 0x63CA, --// Tagging_TrackUID = 0x63C5, --// Tagging_EditionUID = 0x63C9, --// Tagging_ChapterUID = 0x63C4, --// AttachmentUID = 0x63C6, --// SimpleTag = 0x67C8, --// TagName = 0x45A3, --// TagLanguage = 0x447A, --// TagDefault = 0x4484, --// TagString = 0x4487, --// TagBinary = 0x4485, -+ CueBlockNumber = 0x5378 -+/* CueCodecState = 0xEA, */ -+/* CueReference = 0xDB, */ -+/* CueRefTime = 0x96, */ -+/* CueRefCluster = 0x97, */ -+/* CueRefNumber = 0x535F, */ -+/* CueRefCodecState = 0xEB, */ -+ /* Attachment */ -+/* Attachments = 0x1941A469, */ -+/* AttachedFile = 0x61A7, */ -+/* FileDescription = 0x467E, */ -+/* FileName = 0x466E, */ -+/* FileMimeType = 0x4660, */ -+/* FileData = 0x465C, */ -+/* FileUID = 0x46AE, */ -+/* FileReferral = 0x4675, */ -+ /* Chapters */ -+/* Chapters = 0x1043A770, */ -+/* EditionEntry = 0x45B9, */ -+/* EditionUID = 0x45BC, */ -+/* EditionFlagHidden = 0x45BD, */ -+/* EditionFlagDefault = 0x45DB, */ -+/* EditionFlagOrdered = 0x45DD, */ -+/* ChapterAtom = 0xB6, */ -+/* ChapterUID = 0x73C4, */ -+/* ChapterTimeStart = 0x91, */ -+/* ChapterTimeEnd = 0x92, */ -+/* ChapterFlagHidden = 0x98, */ -+/* ChapterFlagEnabled = 0x4598, */ -+/* ChapterSegmentUID = 0x6E67, */ -+/* ChapterSegmentEditionUID = 0x6EBC, */ -+/* ChapterPhysicalEquiv = 0x63C3, */ -+/* ChapterTrack = 0x8F, */ -+/* ChapterTrackNumber = 0x89, */ -+/* ChapterDisplay = 0x80, */ -+/* ChapString = 0x85, */ -+/* ChapLanguage = 0x437C, */ -+/* ChapCountry = 0x437E, */ -+/* ChapProcess = 0x6944, */ -+/* ChapProcessCodecID = 0x6955, */ -+/* ChapProcessPrivate = 0x450D, */ -+/* ChapProcessCommand = 0x6911, */ -+/* ChapProcessTime = 0x6922, */ -+/* ChapProcessData = 0x6933, */ -+ /* Tagging */ -+/* Tags = 0x1254C367, */ -+/* Tag = 0x7373, */ -+/* Targets = 0x63C0, */ -+/* TargetTypeValue = 0x68CA, */ -+/* TargetType = 0x63CA, */ -+/* Tagging_TrackUID = 0x63C5, */ -+/* Tagging_EditionUID = 0x63C9, */ -+/* Tagging_ChapterUID = 0x63C4, */ -+/* AttachmentUID = 0x63C6, */ -+/* SimpleTag = 0x67C8, */ -+/* TagName = 0x45A3, */ -+/* TagLanguage = 0x447A, */ -+/* TagDefault = 0x4484, */ -+/* TagString = 0x4487, */ -+/* TagBinary = 0x4485, */ - }; - #endif -diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c -index fbf2c66..d70f06e 100644 ---- a/libmkv/EbmlWriter.c -+++ b/libmkv/EbmlWriter.c -@@ -1,12 +1,12 @@ --// Copyright (c) 2010 The WebM project authors. All Rights Reserved. --// --// Use of this source code is governed by a BSD-style license --// that can be found in the LICENSE file in the root of the source --// tree. An additional intellectual property rights grant can be found --// in the file PATENTS. All contributing project authors may --// be found in the AUTHORS file in the root of the source tree. -- -- -+/* -+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ - #include "EbmlWriter.h" - #include - #include -@@ -18,11 +18,13 @@ - #define LITERALU64(n) n##LLU - #endif - --void Ebml_WriteLen(EbmlGlobal *glob, long long val) -+void Ebml_WriteLen(EbmlGlobal *glob, int64_t val) - { -- //TODO check and make sure we are not > than 0x0100000000000000LLU -- unsigned char size = 8; //size in bytes to output -- unsigned long long minVal = LITERALU64(0x00000000000000ff); //mask to compare for byte size -+ /* TODO check and make sure we are not > than 0x0100000000000000LLU */ -+ unsigned char size = 8; /* size in bytes to output */ -+ -+ /* mask to compare for byte size */ -+ int64_t minVal = 0xff; - - for (size = 1; size < 8; size ++) - { -@@ -32,7 +34,7 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val) - minVal = (minVal << 7); - } - -- val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7)); -+ val |= (((uint64_t)0x80) << ((size - 1) * 7)); - - Ebml_Serialize(glob, (void *) &val, sizeof(val), size); - } -@@ -40,23 +42,25 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val) - void Ebml_WriteString(EbmlGlobal *glob, const char *str) - { - const size_t size_ = strlen(str); -- const unsigned long long size = size_; -+ const uint64_t size = size_; - Ebml_WriteLen(glob, size); -- //TODO: it's not clear from the spec whether the nul terminator -- //should be serialized too. For now we omit the null terminator. -- Ebml_Write(glob, str, size); -+ /* TODO: it's not clear from the spec whether the nul terminator -+ * should be serialized too. For now we omit the null terminator. -+ */ -+ Ebml_Write(glob, str, (unsigned long)size); - } - - void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) - { - const size_t strlen = wcslen(wstr); - -- //TODO: it's not clear from the spec whether the nul terminator -- //should be serialized too. For now we include it. -- const unsigned long long size = strlen; -+ /* TODO: it's not clear from the spec whether the nul terminator -+ * should be serialized too. For now we include it. -+ */ -+ const uint64_t size = strlen; - - Ebml_WriteLen(glob, size); -- Ebml_Write(glob, wstr, size); -+ Ebml_Write(glob, wstr, (unsigned long)size); - } - - void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) -@@ -85,12 +89,12 @@ void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t - - void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) - { -- unsigned char size = 8; //size in bytes to output -+ unsigned char size = 8; /* size in bytes to output */ - unsigned char sizeSerialized = 0; - unsigned long minVal; - - Ebml_WriteID(glob, class_id); -- minVal = 0x7fLU; //mask to compare for byte size -+ minVal = 0x7fLU; /* mask to compare for byte size */ - - for (size = 1; size < 4; size ++) - { -@@ -106,7 +110,7 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l - Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); - Ebml_Serialize(glob, &ui, sizeof(ui), size); - } --//TODO: perhaps this is a poor name for this id serializer helper function -+/* TODO: perhaps this is a poor name for this id serializer helper function */ - void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) - { - int size; -@@ -168,4 +172,4 @@ void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) - } - } - --//TODO Serialize Date -+/* TODO Serialize Date */ -diff --git a/libmkv/EbmlWriter.h b/libmkv/EbmlWriter.h -index 324c9bc..b94f757 100644 ---- a/libmkv/EbmlWriter.h -+++ b/libmkv/EbmlWriter.h -@@ -1,26 +1,30 @@ -+/* -+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ - #ifndef EBMLWRITER_HPP - #define EBMLWRITER_HPP -- --// Copyright (c) 2010 The WebM project authors. All Rights Reserved. --// --// Use of this source code is governed by a BSD-style license --// that can be found in the LICENSE file in the root of the source --// tree. An additional intellectual property rights grant can be found --// in the file PATENTS. All contributing project authors may --// be found in the AUTHORS file in the root of the source tree. -- --//note: you must define write and serialize functions as well as your own EBML_GLOBAL --//These functions MUST be implemented - #include - #include "vpx/vpx_integer.h" - -+/* note: you must define write and serialize functions as well as your own -+ * EBML_GLOBAL -+ * -+ * These functions MUST be implemented -+ */ -+ - typedef struct EbmlGlobal EbmlGlobal; - void Ebml_Serialize(EbmlGlobal *glob, const void *, int, unsigned long); - void Ebml_Write(EbmlGlobal *glob, const void *, unsigned long); --///// - -+/*****/ - --void Ebml_WriteLen(EbmlGlobal *glob, long long val); -+void Ebml_WriteLen(EbmlGlobal *glob, int64_t val); - void Ebml_WriteString(EbmlGlobal *glob, const char *str); - void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr); - void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id); -@@ -28,11 +32,11 @@ void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t - void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui); - void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui); - void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d); --//TODO make this more generic to signed -+/* TODO make this more generic to signed */ - void Ebml_WriteSigned16(EbmlGlobal *glob, short val); - void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s); - void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s); - void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length); - void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize); --//TODO need date function -+/* TODO need date function */ - #endif -diff --git a/libs.mk b/libs.mk -index e2ba737..4115dd8 100644 ---- a/libs.mk -+++ b/libs.mk -@@ -20,8 +20,16 @@ endif - CODEC_SRCS-yes += CHANGELOG - CODEC_SRCS-yes += libs.mk - -+# If this is a universal (fat) binary, then all the subarchitectures have -+# already been built and our job is to stitch them together. The -+# BUILD_LIBVPX variable indicates whether we should be building -+# (compiling, linking) the library. The LIPO_LIBVPX variable indicates -+# that we're stitching. -+$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes) -+ - include $(SRC_PATH_BARE)/vpx/vpx_codec.mk - CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS)) -+CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS)) - - include $(SRC_PATH_BARE)/vpx_mem/vpx_mem.mk - CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS)) -@@ -29,17 +37,17 @@ CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS)) - include $(SRC_PATH_BARE)/vpx_scale/vpx_scale.mk - CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS)) - -+include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk -+CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS)) -+ - - ifeq ($(CONFIG_VP8_ENCODER),yes) - VP8_PREFIX=vp8/ - include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx.mk - CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS)) - CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS)) -- CODEC_SRCS-yes += $(VP8_PREFIX)vp8cx.mk vpx/vp8.h vpx/vp8cx.h -- CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp8cx_arm.mk - INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h - INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/% -- CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h - CODEC_DOC_SECTIONS += vp8 vp8_encoder - endif - -@@ -48,10 +56,8 @@ ifeq ($(CONFIG_VP8_DECODER),yes) - include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx.mk - CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS)) - CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS)) -- CODEC_SRCS-yes += $(VP8_PREFIX)vp8dx.mk vpx/vp8.h vpx/vp8dx.h - INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h - INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/% -- CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h - CODEC_DOC_SECTIONS += vp8 vp8_decoder - endif - -@@ -66,6 +72,7 @@ endif - - ifeq ($(CONFIG_MSVS),yes) - CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd) -+GTEST_LIB=$(if $(CONFIG_STATIC_MSVCRT),gtestmt,gtestmd) - # This variable uses deferred expansion intentionally, since the results of - # $(wildcard) may change during the course of the Make. - VS_PLATFORMS = $(foreach d,$(wildcard */Release/$(CODEC_LIB).lib),$(word 1,$(subst /, ,$(d)))) -@@ -82,29 +89,10 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Release/%) - INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Debug/%) - endif - --# If this is a universal (fat) binary, then all the subarchitectures have --# already been built and our job is to stitch them together. The --# BUILD_LIBVPX variable indicates whether we should be building --# (compiling, linking) the library. The LIPO_LIBVPX variable indicates --# that we're stitching. --$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes) -- - CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh - CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.sh --CODEC_SRCS-$(BUILD_LIBVPX) += vpx/vpx_integer.h --CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/asm_offsets.h --CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_timer.h --CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem.h - CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c - INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c --ifeq ($(ARCH_X86)$(ARCH_X86_64),yes) --CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emms.asm --CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h --CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm --CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c --endif --CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c --CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm.h - CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com - CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc - CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec -@@ -146,7 +134,7 @@ ifeq ($(CONFIG_MSVS),yes) - obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c - @cp $(SRC_PATH_BARE)/build/x86-msvs/obj_int_extract.bat . - @echo " [CREATE] $@" -- $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ -+ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ - --exe \ - --target=$(TOOLCHAIN) \ - --name=obj_int_extract \ -@@ -162,14 +150,14 @@ PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.bat - - vpx.def: $(call enabled,CODEC_EXPORTS) - @echo " [CREATE] $@" -- $(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\ -+ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\ - --name=vpx\ - --out=$@ $^ - CLEAN-OBJS += vpx.def - - vpx.vcproj: $(CODEC_SRCS) vpx.def - @echo " [CREATE] $@" -- $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ -+ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ - --lib \ - --target=$(TOOLCHAIN) \ - $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ -@@ -242,6 +230,7 @@ vpx.pc: config.mk libs.mk - $(qexec)echo 'Requires:' >> $@ - $(qexec)echo 'Conflicts:' >> $@ - $(qexec)echo 'Libs: -L$${libdir} -lvpx' >> $@ -+ $(qexec)echo 'Libs.private: -lm -lpthread' >> $@ - $(qexec)echo 'Cflags: -I$${includedir}' >> $@ - INSTALL-LIBS-yes += $(LIBSUBDIR)/pkgconfig/vpx.pc - INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc -@@ -284,38 +273,44 @@ OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU' - - ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC)) - $(BUILD_PFX)asm_com_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S -- LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ -+ @echo " [CREATE] $@" -+ $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ - $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S: $(VP8_PREFIX)common/asm_com_offsets.c - CLEAN-OBJS += $(BUILD_PFX)asm_com_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S - - $(BUILD_PFX)asm_enc_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S -- LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ -+ @echo " [CREATE] $@" -+ $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ - $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S: $(VP8_PREFIX)encoder/asm_enc_offsets.c - CLEAN-OBJS += $(BUILD_PFX)asm_enc_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S - - $(BUILD_PFX)asm_dec_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S -- LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ -+ @echo " [CREATE] $@" -+ $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ - $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S: $(VP8_PREFIX)decoder/asm_dec_offsets.c - CLEAN-OBJS += $(BUILD_PFX)asm_dec_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S - else - ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC)) - asm_com_offsets.asm: obj_int_extract - asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o -- ./obj_int_extract rvds $< $(ADS2GAS) > $@ -+ @echo " [CREATE] $@" -+ $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@ - OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o - CLEAN-OBJS += asm_com_offsets.asm - $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm - - asm_enc_offsets.asm: obj_int_extract - asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o -- ./obj_int_extract rvds $< $(ADS2GAS) > $@ -+ @echo " [CREATE] $@" -+ $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@ - OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o - CLEAN-OBJS += asm_enc_offsets.asm - $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm - - asm_dec_offsets.asm: obj_int_extract - asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o -- ./obj_int_extract rvds $< $(ADS2GAS) > $@ -+ @echo " [CREATE] $@" -+ $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@ - OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o - CLEAN-OBJS += asm_dec_offsets.asm - $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm -@@ -328,7 +323,6 @@ CLEAN-OBJS += $(BUILD_PFX)vpx_version.h - # - # Rule to generate runtime cpu detection files - # --$(OBJS-yes:.o=.d): $(BUILD_PFX)vpx_rtcd.h - $(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS))) - @echo " [CREATE] $@" - $(qexec)$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$(TGT_ISA) \ -@@ -337,25 +331,43 @@ $(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_S - $(RTCD_OPTIONS) $^ > $@ - CLEAN-OBJS += $(BUILD_PFX)vpx_rtcd.h - --CODEC_DOC_SRCS += vpx/vpx_codec.h \ -- vpx/vpx_decoder.h \ -- vpx/vpx_encoder.h \ -- vpx/vpx_image.h -- - ## - ## libvpx test directives - ## -- - ifeq ($(CONFIG_UNIT_TESTS),yes) -+LIBVPX_TEST_DATA_PATH ?= . -+ -+include $(SRC_PATH_BARE)/test/test.mk -+LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS)) -+LIBVPX_TEST_BINS=./test_libvpx -+LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\ -+ $(call enabled,LIBVPX_TEST_DATA)) -+libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1) -+ -+$(LIBVPX_TEST_DATA): -+ @echo " [DOWNLOAD] $@" -+ $(qexec)trap 'rm -f $@' INT TERM &&\ -+ curl -L -o $@ $(call libvpx_test_data_url,$(@F)) -+ -+testdata:: $(LIBVPX_TEST_DATA) -+ $(qexec)if [ -x "$$(which sha1sum)" ]; then\ -+ echo "Checking test data:";\ -+ if [ -n "$(LIBVPX_TEST_DATA)" ]; then\ -+ for f in $(call enabled,LIBVPX_TEST_DATA); do\ -+ grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\ -+ (cd $(LIBVPX_TEST_DATA_PATH); sha1sum -c);\ -+ done; \ -+ fi; \ -+ else\ -+ echo "Skipping test data integrity check, sha1sum not found.";\ -+ fi -+ - ifeq ($(CONFIG_EXTERNAL_BUILD),yes) - ifeq ($(CONFIG_MSVS),yes) - --LIBVPX_TEST_SRCS=$(filter %_test.cc,$(call enabled,CODEC_SRCS)) --LIBVPX_TEST_BINS=$(sort $(LIBVPX_TEST_SRCS:.cc.o=)) -- - gtest.vcproj: $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc - @echo " [CREATE] $@" -- $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ -+ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ - --lib \ - --target=$(TOOLCHAIN) \ - $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ -@@ -368,27 +380,22 @@ gtest.vcproj: $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc - - PROJECTS-$(CONFIG_MSVS) += gtest.vcproj - --define unit_test_vcproj_template --$(notdir $(1:.cc=.vcproj)): $(SRC_PATH_BARE)/$(1) -- @echo " [vcproj] $$@" -- $$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh\ -- --exe\ -- --target=$$(TOOLCHAIN)\ -- --name=$(notdir $(1:.cc=))\ -- --ver=$$(CONFIG_VS_VERSION)\ -- $$(if $$(CONFIG_STATIC_MSVCRT),--static-crt) \ -- --out=$$@ $$(INTERNAL_CFLAGS) $$(CFLAGS) \ -+test_libvpx.vcproj: $(LIBVPX_TEST_SRCS) -+ @echo " [CREATE] $@" -+ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ -+ --exe \ -+ --target=$(TOOLCHAIN) \ -+ --name=test_libvpx \ -+ --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \ -+ --ver=$(CONFIG_VS_VERSION) \ -+ $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ -+ --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \ - -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \ -- -L. -lvpxmt -lwinmm -lgtestmt $$^ --endef -+ -L. -l$(CODEC_LIB) -lwinmm -l$(GTEST_LIB) $^ - --$(foreach proj,$(LIBVPX_TEST_BINS),\ -- $(eval $(call unit_test_vcproj_template,$(proj)))) -+PROJECTS-$(CONFIG_MSVS) += test_libvpx.vcproj - --PROJECTS-$(CONFIG_MSVS) += $(foreach proj,$(LIBVPX_TEST_BINS),\ -- $(notdir $(proj:.cc=.vcproj))) -- --test:: -+test:: testdata - @set -e; for t in $(addprefix Win32/Release/,$(notdir $(LIBVPX_TEST_BINS:.cc=.exe))); do $$t; done - endif - else -@@ -396,28 +403,35 @@ else - include $(SRC_PATH_BARE)/third_party/googletest/gtest.mk - GTEST_SRCS := $(addprefix third_party/googletest/src/,$(call enabled,GTEST_SRCS)) - GTEST_OBJS=$(call objs,$(GTEST_SRCS)) --$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src --$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include -+$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src -+$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include - OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS) - LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a - $(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS) - --LIBVPX_TEST_SRCS=$(filter %_test.cc,$(call enabled,CODEC_SRCS)) --LIBVPX_TEST_OBJS=$(call objs,$(LIBVPX_TEST_SRCS)) --$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src --$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include --LIBVPX_TEST_BINS=$(sort $(LIBVPX_TEST_OBJS:.cc.o=)) -+LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS))) -+$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src -+$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include - OBJS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_OBJS) -+BINS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_BINS) -+ -+# Install test sources only if codec source is included -+INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\ -+ $(shell find $(SRC_PATH_BARE)/third_party/googletest -type f)) -+INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS) - -+CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx) -+CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a) - $(foreach bin,$(LIBVPX_TEST_BINS),\ -- $(if $(BUILD_LIBVPX),$(eval $(bin): libvpx.a libgtest.a ))\ -+ $(if $(BUILD_LIBVPX),$(eval $(bin): \ -+ lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\ - $(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\ -- $(bin).cc.o \ -+ $(LIBVPX_TEST_OBJS) \ - -L. -lvpx -lgtest -lpthread -lm)\ - )))\ - $(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\ - --test:: $(LIBVPX_TEST_BINS) -+test:: $(LIBVPX_TEST_BINS) testdata - @set -e; for t in $(LIBVPX_TEST_BINS); do $$t; done - - endif -@@ -435,3 +449,6 @@ libs.doxy: $(CODEC_DOC_SRCS) - @echo "PREDEFINED = VPX_CODEC_DISABLE_COMPAT" >> $@ - @echo "INCLUDE_PATH += ." >> $@; - @echo "ENABLED_SECTIONS += $(sort $(CODEC_DOC_SECTIONS))" >> $@ -+ -+## Generate vpx_rtcd.h for all objects -+$(OBJS-yes:.o=.d): $(BUILD_PFX)vpx_rtcd.h -diff --git a/nestegg/src/nestegg.c b/nestegg/src/nestegg.c -index 63a0e83..cc87788 100644 ---- a/nestegg/src/nestegg.c -+++ b/nestegg/src/nestegg.c -@@ -1272,7 +1272,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac - if (total > block_size) - return -1; - -- entry = ne_find_track_entry(ctx, track - 1); -+ entry = ne_find_track_entry(ctx, (unsigned int)(track - 1)); - if (!entry) - return -1; - -@@ -1291,7 +1291,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac - - pkt = ne_alloc(sizeof(*pkt)); - pkt->track = track - 1; -- pkt->timecode = abs_timecode * tc_scale * track_scale; -+ pkt->timecode = (uint64_t)(abs_timecode * tc_scale * track_scale); - - ctx->log(ctx, NESTEGG_LOG_DEBUG, "%sblock t %lld pts %f f %llx frames: %llu", - block_id == ID_BLOCK ? "" : "simple", pkt->track, pkt->timecode / 1e9, flags, frames); -@@ -1774,35 +1774,35 @@ nestegg_track_video_params(nestegg * ctx, unsigned int track, - - if (ne_get_uint(entry->video.pixel_width, &value) != 0) - return -1; -- params->width = value; -+ params->width = (unsigned int)value; - - if (ne_get_uint(entry->video.pixel_height, &value) != 0) - return -1; -- params->height = value; -+ params->height = (unsigned int)value; - - value = 0; - ne_get_uint(entry->video.pixel_crop_bottom, &value); -- params->crop_bottom = value; -+ params->crop_bottom = (unsigned int)value; - - value = 0; - ne_get_uint(entry->video.pixel_crop_top, &value); -- params->crop_top = value; -+ params->crop_top = (unsigned int)value; - - value = 0; - ne_get_uint(entry->video.pixel_crop_left, &value); -- params->crop_left = value; -+ params->crop_left = (unsigned int)value; - - value = 0; - ne_get_uint(entry->video.pixel_crop_right, &value); -- params->crop_right = value; -+ params->crop_right = (unsigned int)value; - - value = params->width; - ne_get_uint(entry->video.display_width, &value); -- params->display_width = value; -+ params->display_width = (unsigned int)value; - - value = params->height; - ne_get_uint(entry->video.display_height, &value); -- params->display_height = value; -+ params->display_height = (unsigned int)value; - - return 0; - } -@@ -1828,11 +1828,11 @@ nestegg_track_audio_params(nestegg * ctx, unsigned int track, - - value = 1; - ne_get_uint(entry->audio.channels, &value); -- params->channels = value; -+ params->channels = (unsigned int)value; - - value = 16; - ne_get_uint(entry->audio.bit_depth, &value); -- params->depth = value; -+ params->depth = (unsigned int)value; - - return 0; - } -@@ -1888,7 +1888,7 @@ nestegg_free_packet(nestegg_packet * pkt) - int - nestegg_packet_track(nestegg_packet * pkt, unsigned int * track) - { -- *track = pkt->track; -+ *track = (unsigned int)pkt->track; - return 0; - } - -diff --git a/solution.mk b/solution.mk -index 2de1d8d..948305f 100644 ---- a/solution.mk -+++ b/solution.mk -@@ -8,18 +8,19 @@ - ## be found in the AUTHORS file in the root of the source tree. - ## - -+# libvpx reverse dependencies (targets that depend on libvpx) -+VPX_NONDEPS=$(addsuffix .vcproj,vpx gtest obj_int_extract) -+VPX_RDEPS=$(foreach vcp,\ -+ $(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.vcproj=):vpx) - - vpx.sln: $(wildcard *.vcproj) - @echo " [CREATE] $@" - $(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \ -- $(if $(filter %vpx.vcproj,$^),\ -- $(foreach vcp,$(filter-out %vpx.vcproj %gtest.vcproj %obj_int_extract.vcproj,$^),\ -- --dep=$(vcp:.vcproj=):vpx) \ -- $(foreach vcp,$(filter %_test.vcproj,$^),\ -- --dep=$(vcp:.vcproj=):gtest)) \ -- --dep=vpx:obj_int_extract \ -- --ver=$(CONFIG_VS_VERSION)\ -- --out=$@ $^ -+ $(if $(filter vpx.vcproj,$^),$(VPX_RDEPS)) \ -+ --dep=vpx:obj_int_extract \ -+ --dep=test_libvpx:gtest \ -+ --ver=$(CONFIG_VS_VERSION)\ -+ --out=$@ $^ - vpx.sln.mk: vpx.sln - @true - -diff --git a/test/acm_random.h b/test/acm_random.h -new file mode 100644 -index 0000000..514894e ---- /dev/null -+++ b/test/acm_random.h -@@ -0,0 +1,53 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+#ifndef LIBVPX_TEST_ACM_RANDOM_H_ -+#define LIBVPX_TEST_ACM_RANDOM_H_ -+ -+#include -+ -+#include "vpx/vpx_integer.h" -+ -+namespace libvpx_test { -+ -+class ACMRandom { -+ public: -+ ACMRandom() { -+ Reset(DeterministicSeed()); -+ } -+ -+ explicit ACMRandom(int seed) { -+ Reset(seed); -+ } -+ -+ void Reset(int seed) { -+ srand(seed); -+ } -+ -+ uint8_t Rand8(void) { -+ return (rand() >> 8) & 0xff; -+ } -+ -+ int PseudoUniform(int range) { -+ return (rand() >> 8) % range; -+ } -+ -+ int operator()(int n) { -+ return PseudoUniform(n); -+ } -+ -+ static int DeterministicSeed(void) { -+ return 0xbaba; -+ } -+}; -+ -+} // namespace libvpx_test -+ -+#endif // LIBVPX_TEST_ACM_RANDOM_H_ -diff --git a/test/altref_test.cc b/test/altref_test.cc -new file mode 100644 -index 0000000..ca05577 ---- /dev/null -+++ b/test/altref_test.cc -@@ -0,0 +1,71 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#include "third_party/googletest/src/include/gtest/gtest.h" -+#include "test/encode_test_driver.h" -+#include "test/i420_video_source.h" -+ -+namespace { -+ -+// lookahead range: [kLookAheadMin, kLookAheadMax). -+const int kLookAheadMin = 5; -+const int kLookAheadMax = 26; -+ -+class AltRefTest : public libvpx_test::EncoderTest, -+ public ::testing::TestWithParam { -+ protected: -+ AltRefTest() : altref_count_(0) {} -+ virtual ~AltRefTest() {} -+ -+ virtual void SetUp() { -+ InitializeConfig(); -+ SetMode(libvpx_test::kTwoPassGood); -+ } -+ -+ virtual void BeginPassHook(unsigned int pass) { -+ altref_count_ = 0; -+ } -+ -+ virtual bool Continue() const { -+ return !HasFatalFailure() && !abort_; -+ } -+ -+ virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, -+ libvpx_test::Encoder *encoder) { -+ if (video->frame() == 1) { -+ encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); -+ encoder->Control(VP8E_SET_CPUUSED, 3); -+ } -+ } -+ -+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { -+ if (pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE) ++altref_count_; -+ } -+ -+ int altref_count() const { return altref_count_; } -+ -+ private: -+ int altref_count_; -+}; -+ -+TEST_P(AltRefTest, MonotonicTimestamps) { -+ const vpx_rational timebase = { 33333333, 1000000000 }; -+ cfg_.g_timebase = timebase; -+ cfg_.rc_target_bitrate = 1000; -+ cfg_.g_lag_in_frames = GetParam(); -+ -+ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, -+ timebase.den, timebase.num, 0, 30); -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ EXPECT_GE(altref_count(), 1); -+} -+ -+INSTANTIATE_TEST_CASE_P(NonZeroLag, AltRefTest, -+ ::testing::Range(kLookAheadMin, kLookAheadMax)); -+} // namespace -diff --git a/test/boolcoder_test.cc b/test/boolcoder_test.cc -new file mode 100644 -index 0000000..4e21be8 ---- /dev/null -+++ b/test/boolcoder_test.cc -@@ -0,0 +1,90 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+extern "C" { -+#include "vp8/encoder/boolhuff.h" -+#include "vp8/decoder/dboolhuff.h" -+} -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "test/acm_random.h" -+#include "third_party/googletest/src/include/gtest/gtest.h" -+#include "vpx/vpx_integer.h" -+ -+namespace { -+const int num_tests = 10; -+} // namespace -+ -+using libvpx_test::ACMRandom; -+ -+TEST(VP8, TestBitIO) { -+ ACMRandom rnd(ACMRandom::DeterministicSeed()); -+ for (int n = 0; n < num_tests; ++n) { -+ for (int method = 0; method <= 7; ++method) { // we generate various proba -+ const int bits_to_test = 1000; -+ uint8_t probas[bits_to_test]; -+ -+ for (int i = 0; i < bits_to_test; ++i) { -+ const int parity = i & 1; -+ probas[i] = -+ (method == 0) ? 0 : (method == 1) ? 255 : -+ (method == 2) ? 128 : -+ (method == 3) ? rnd.Rand8() : -+ (method == 4) ? (parity ? 0 : 255) : -+ // alternate between low and high proba: -+ (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) : -+ (method == 6) ? -+ (parity ? rnd(64) : 255 - rnd(64)) : -+ (parity ? rnd(32) : 255 - rnd(32)); -+ } -+ for (int bit_method = 0; bit_method <= 3; ++bit_method) { -+ const int random_seed = 6432; -+ const int buffer_size = 10000; -+ ACMRandom bit_rnd(random_seed); -+ BOOL_CODER bw; -+ uint8_t bw_buffer[buffer_size]; -+ vp8_start_encode(&bw, bw_buffer, bw_buffer + buffer_size); -+ -+ int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0; -+ for (int i = 0; i < bits_to_test; ++i) { -+ if (bit_method == 2) { -+ bit = (i & 1); -+ } else if (bit_method == 3) { -+ bit = bit_rnd(2); -+ } -+ vp8_encode_bool(&bw, bit, static_cast(probas[i])); -+ } -+ -+ vp8_stop_encode(&bw); -+ -+ BOOL_DECODER br; -+ vp8dx_start_decode(&br, bw_buffer, buffer_size); -+ bit_rnd.Reset(random_seed); -+ for (int i = 0; i < bits_to_test; ++i) { -+ if (bit_method == 2) { -+ bit = (i & 1); -+ } else if (bit_method == 3) { -+ bit = bit_rnd(2); -+ } -+ GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit) -+ << "pos: "<< i << " / " << bits_to_test -+ << " bit_method: " << bit_method -+ << " method: " << method; -+ } -+ } -+ } -+ } -+} -diff --git a/test/config_test.cc b/test/config_test.cc -new file mode 100644 -index 0000000..c4da46e ---- /dev/null -+++ b/test/config_test.cc -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#include "third_party/googletest/src/include/gtest/gtest.h" -+#include "test/encode_test_driver.h" -+#include "test/video_source.h" -+ -+namespace { -+ -+class ConfigTest : public ::libvpx_test::EncoderTest, -+ public ::testing::TestWithParam { -+ public: -+ ConfigTest() : frame_count_in_(0), frame_count_out_(0), frame_count_max_(0) {} -+ -+ protected: -+ virtual void SetUp() { -+ InitializeConfig(); -+ SetMode(GetParam()); -+ } -+ -+ virtual void BeginPassHook(unsigned int /*pass*/) { -+ frame_count_in_ = 0; -+ frame_count_out_ = 0; -+ } -+ -+ virtual void PreEncodeFrameHook(libvpx_test::VideoSource* /*video*/) { -+ ++frame_count_in_; -+ abort_ |= (frame_count_in_ >= frame_count_max_); -+ } -+ -+ virtual void FramePktHook(const vpx_codec_cx_pkt_t* /*pkt*/) { -+ ++frame_count_out_; -+ } -+ -+ virtual bool Continue() const { -+ return !HasFatalFailure() && !abort_; -+ } -+ -+ unsigned int frame_count_in_; -+ unsigned int frame_count_out_; -+ unsigned int frame_count_max_; -+}; -+ -+TEST_P(ConfigTest, LagIsDisabled) { -+ frame_count_max_ = 2; -+ cfg_.g_lag_in_frames = 15; -+ -+ libvpx_test::DummyVideoSource video; -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ -+ EXPECT_EQ(frame_count_in_, frame_count_out_); -+} -+ -+INSTANTIATE_TEST_CASE_P(OnePassModes, ConfigTest, ONE_PASS_TEST_MODES); -+} // namespace -diff --git a/test/cq_test.cc b/test/cq_test.cc -new file mode 100644 -index 0000000..42ee2a2 ---- /dev/null -+++ b/test/cq_test.cc -@@ -0,0 +1,106 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#include -+#include "third_party/googletest/src/include/gtest/gtest.h" -+#include "test/encode_test_driver.h" -+#include "test/i420_video_source.h" -+ -+// CQ level range: [kCQLevelMin, kCQLevelMax). -+const int kCQLevelMin = 4; -+const int kCQLevelMax = 63; -+const int kCQLevelStep = 8; -+const int kCQTargetBitrate = 2000; -+ -+namespace { -+ -+class CQTest : public libvpx_test::EncoderTest, -+ public ::testing::TestWithParam { -+ protected: -+ CQTest() : cq_level_(GetParam()) { init_flags_ = VPX_CODEC_USE_PSNR; } -+ virtual ~CQTest() {} -+ -+ virtual void SetUp() { -+ InitializeConfig(); -+ SetMode(libvpx_test::kTwoPassGood); -+ } -+ -+ virtual void BeginPassHook(unsigned int /*pass*/) { -+ file_size_ = 0; -+ psnr_ = 0.0; -+ n_frames_ = 0; -+ } -+ -+ virtual bool Continue() const { -+ return !HasFatalFailure() && !abort_; -+ } -+ -+ virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, -+ libvpx_test::Encoder *encoder) { -+ if (video->frame() == 1) { -+ if (cfg_.rc_end_usage == VPX_CQ) { -+ encoder->Control(VP8E_SET_CQ_LEVEL, cq_level_); -+ } -+ encoder->Control(VP8E_SET_CPUUSED, 3); -+ } -+ } -+ -+ virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { -+ psnr_ += pow(10.0, pkt->data.psnr.psnr[0] / 10.0); -+ n_frames_++; -+ } -+ -+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { -+ file_size_ += pkt->data.frame.sz; -+ } -+ -+ double GetLinearPSNROverBitrate() const { -+ double avg_psnr = log10(psnr_ / n_frames_) * 10.0; -+ return pow(10.0, avg_psnr / 10.0) / file_size_; -+ } -+ -+ int file_size() const { return file_size_; } -+ int n_frames() const { return n_frames_; } -+ -+ private: -+ int cq_level_; -+ int file_size_; -+ double psnr_; -+ int n_frames_; -+}; -+ -+int prev_actual_bitrate = kCQTargetBitrate; -+TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) { -+ const vpx_rational timebase = { 33333333, 1000000000 }; -+ cfg_.g_timebase = timebase; -+ cfg_.rc_target_bitrate = kCQTargetBitrate; -+ cfg_.g_lag_in_frames = 25; -+ -+ cfg_.rc_end_usage = VPX_CQ; -+ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, -+ timebase.den, timebase.num, 0, 30); -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ const double cq_psnr_lin = GetLinearPSNROverBitrate(); -+ const int cq_actual_bitrate = file_size() * 8 * 30 / (n_frames() * 1000); -+ EXPECT_LE(cq_actual_bitrate, kCQTargetBitrate); -+ EXPECT_LE(cq_actual_bitrate, prev_actual_bitrate); -+ prev_actual_bitrate = cq_actual_bitrate; -+ -+ // try targeting the approximate same bitrate with VBR mode -+ cfg_.rc_end_usage = VPX_VBR; -+ cfg_.rc_target_bitrate = cq_actual_bitrate; -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ const double vbr_psnr_lin = GetLinearPSNROverBitrate(); -+ EXPECT_GE(cq_psnr_lin, vbr_psnr_lin); -+} -+ -+INSTANTIATE_TEST_CASE_P(CQLevelRange, CQTest, -+ ::testing::Range(kCQLevelMin, kCQLevelMax, -+ kCQLevelStep)); -+} // namespace -diff --git a/test/datarate_test.cc b/test/datarate_test.cc -new file mode 100644 -index 0000000..6fbcb64 ---- /dev/null -+++ b/test/datarate_test.cc -@@ -0,0 +1,178 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#include "test/encode_test_driver.h" -+#include "test/i420_video_source.h" -+#include "third_party/googletest/src/include/gtest/gtest.h" -+namespace { -+ -+class DatarateTest : public ::libvpx_test::EncoderTest, -+ public ::testing::TestWithParam { -+ protected: -+ virtual void SetUp() { -+ InitializeConfig(); -+ SetMode(GetParam()); -+ ResetModel(); -+ } -+ -+ virtual void ResetModel() { -+ last_pts_ = 0; -+ bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz; -+ frame_number_ = 0; -+ first_drop_ = 0; -+ bits_total_ = 0; -+ duration_ = 0.0; -+ } -+ -+ virtual bool Continue() const { -+ return !HasFatalFailure() && !abort_; -+ } -+ -+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, -+ ::libvpx_test::Encoder *encoder) { -+ const vpx_rational_t tb = video->timebase(); -+ timebase_ = static_cast(tb.num) / tb.den; -+ duration_ = 0; -+ } -+ -+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { -+ // Time since last timestamp = duration. -+ vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_; -+ -+ // TODO(jimbankoski): Remove these lines when the issue: -+ // http://code.google.com/p/webm/issues/detail?id=496 is fixed. -+ // For now the codec assumes buffer starts at starting buffer rate -+ // plus one frame's time. -+ if (last_pts_ == 0) -+ duration = 1; -+ -+ // Add to the buffer the bits we'd expect from a constant bitrate server. -+ bits_in_buffer_model_ += duration * timebase_ * cfg_.rc_target_bitrate -+ * 1000; -+ -+ /* Test the buffer model here before subtracting the frame. Do so because -+ * the way the leaky bucket model works in libvpx is to allow the buffer to -+ * empty - and then stop showing frames until we've got enough bits to -+ * show one. As noted in comment below (issue 495), this does not currently -+ * apply to key frames. For now exclude key frames in condition below. */ -+ bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true: false; -+ if (!key_frame) { -+ ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame " -+ << pkt->data.frame.pts; -+ } -+ -+ const int frame_size_in_bits = pkt->data.frame.sz * 8; -+ -+ // Subtract from the buffer the bits associated with a played back frame. -+ bits_in_buffer_model_ -= frame_size_in_bits; -+ -+ // Update the running total of bits for end of test datarate checks. -+ bits_total_ += frame_size_in_bits ; -+ -+ // If first drop not set and we have a drop set it to this time. -+ if (!first_drop_ && duration > 1) -+ first_drop_ = last_pts_ + 1; -+ -+ // Update the most recent pts. -+ last_pts_ = pkt->data.frame.pts; -+ -+ // We update this so that we can calculate the datarate minus the last -+ // frame encoded in the file. -+ bits_in_last_frame_ = frame_size_in_bits; -+ -+ ++frame_number_; -+ } -+ -+ virtual void EndPassHook(void) { -+ if (bits_total_) { -+ const double file_size_in_kb = bits_total_ / 1000; /* bits per kilobit */ -+ -+ duration_ = (last_pts_ + 1) * timebase_; -+ -+ // Effective file datarate includes the time spent prebuffering. -+ effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0 -+ / (cfg_.rc_buf_initial_sz / 1000.0 + duration_); -+ -+ file_datarate_ = file_size_in_kb / duration_; -+ } -+ } -+ -+ vpx_codec_pts_t last_pts_; -+ int bits_in_buffer_model_; -+ double timebase_; -+ int frame_number_; -+ vpx_codec_pts_t first_drop_; -+ int64_t bits_total_; -+ double duration_; -+ double file_datarate_; -+ double effective_datarate_; -+ int bits_in_last_frame_; -+}; -+ -+TEST_P(DatarateTest, BasicBufferModel) { -+ cfg_.rc_buf_initial_sz = 500; -+ cfg_.rc_dropframe_thresh = 1; -+ cfg_.rc_max_quantizer = 56; -+ cfg_.rc_end_usage = VPX_CBR; -+ // 2 pass cbr datarate control has a bug hidden by the small # of -+ // frames selected in this encode. The problem is that even if the buffer is -+ // negative we produce a keyframe on a cutscene. Ignoring datarate -+ // constraints -+ // TODO(jimbankoski): ( Fix when issue -+ // http://code.google.com/p/webm/issues/detail?id=495 is addressed. ) -+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, -+ 30, 1, 0, 140); -+ -+ // There is an issue for low bitrates in real-time mode, where the -+ // effective_datarate slightly overshoots the target bitrate. -+ // This is same the issue as noted about (#495). -+ // TODO(jimbankoski/marpan): Update test to run for lower bitrates (< 100), -+ // when the issue is resolved. -+ for (int i = 100; i < 800; i += 200) { -+ cfg_.rc_target_bitrate = i; -+ ResetModel(); -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_) -+ << " The datarate for the file exceeds the target!"; -+ -+ ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3) -+ << " The datarate for the file missed the target!"; -+ } -+} -+ -+TEST_P(DatarateTest, ChangingDropFrameThresh) { -+ cfg_.rc_buf_initial_sz = 500; -+ cfg_.rc_max_quantizer = 36; -+ cfg_.rc_end_usage = VPX_CBR; -+ cfg_.rc_target_bitrate = 200; -+ cfg_.kf_mode = VPX_KF_DISABLED; -+ -+ const int frame_count = 40; -+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, -+ 30, 1, 0, frame_count); -+ -+ // Here we check that the first dropped frame gets earlier and earlier -+ // as the drop frame threshold is increased. -+ -+ const int kDropFrameThreshTestStep = 30; -+ vpx_codec_pts_t last_drop = frame_count; -+ for (int i = 1; i < 91; i += kDropFrameThreshTestStep) { -+ cfg_.rc_dropframe_thresh = i; -+ ResetModel(); -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ ASSERT_LE(first_drop_, last_drop) -+ << " The first dropped frame for drop_thresh " << i -+ << " > first dropped frame for drop_thresh " -+ << i - kDropFrameThreshTestStep; -+ last_drop = first_drop_; -+ } -+} -+ -+INSTANTIATE_TEST_CASE_P(AllModes, DatarateTest, ALL_TEST_MODES); -+} // namespace -diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc -new file mode 100644 -index 0000000..84afe7f ---- /dev/null -+++ b/test/decode_test_driver.cc -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#include "test/decode_test_driver.h" -+#include "third_party/googletest/src/include/gtest/gtest.h" -+#include "test/register_state_check.h" -+#include "test/video_source.h" -+ -+namespace libvpx_test { -+#if CONFIG_VP8_DECODER -+void Decoder::DecodeFrame(const uint8_t *cxdata, int size) { -+ if (!decoder_.priv) { -+ const vpx_codec_err_t res_init = vpx_codec_dec_init(&decoder_, -+ &vpx_codec_vp8_dx_algo, -+ &cfg_, 0); -+ ASSERT_EQ(VPX_CODEC_OK, res_init) << DecodeError(); -+ } -+ -+ vpx_codec_err_t res_dec; -+ REGISTER_STATE_CHECK(res_dec = vpx_codec_decode(&decoder_, -+ cxdata, size, NULL, 0)); -+ ASSERT_EQ(VPX_CODEC_OK, res_dec) << DecodeError(); -+} -+ -+void DecoderTest::RunLoop(CompressedVideoSource *video) { -+ vpx_codec_dec_cfg_t dec_cfg = {0}; -+ Decoder decoder(dec_cfg, 0); -+ -+ // Decode frames. -+ for (video->Begin(); video->cxdata(); video->Next()) { -+ decoder.DecodeFrame(video->cxdata(), video->frame_size()); -+ -+ DxDataIterator dec_iter = decoder.GetDxData(); -+ const vpx_image_t *img = NULL; -+ -+ // Get decompressed data -+ while ((img = dec_iter.Next())) -+ DecompressedFrameHook(*img, video->frame_number()); -+ } -+} -+#endif -+} // namespace libvpx_test -diff --git a/test/decode_test_driver.h b/test/decode_test_driver.h -new file mode 100644 -index 0000000..6408bee ---- /dev/null -+++ b/test/decode_test_driver.h -@@ -0,0 +1,97 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+#ifndef TEST_DECODE_TEST_DRIVER_H_ -+#define TEST_DECODE_TEST_DRIVER_H_ -+#include -+#include "third_party/googletest/src/include/gtest/gtest.h" -+#include "vpx_config.h" -+#include "vpx/vpx_decoder.h" -+#include "vpx/vp8dx.h" -+ -+namespace libvpx_test { -+ -+class CompressedVideoSource; -+ -+// Provides an object to handle decoding output -+class DxDataIterator { -+ public: -+ explicit DxDataIterator(vpx_codec_ctx_t *decoder) -+ : decoder_(decoder), iter_(NULL) {} -+ -+ const vpx_image_t *Next() { -+ return vpx_codec_get_frame(decoder_, &iter_); -+ } -+ -+ private: -+ vpx_codec_ctx_t *decoder_; -+ vpx_codec_iter_t iter_; -+}; -+ -+// Provides a simplified interface to manage one video decoding. -+// -+// TODO: similar to Encoder class, the exact services should be -+// added as more tests are added. -+class Decoder { -+ public: -+ Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline) -+ : cfg_(cfg), deadline_(deadline) { -+ memset(&decoder_, 0, sizeof(decoder_)); -+ } -+ -+ ~Decoder() { -+ vpx_codec_destroy(&decoder_); -+ } -+ -+ void DecodeFrame(const uint8_t *cxdata, int size); -+ -+ DxDataIterator GetDxData() { -+ return DxDataIterator(&decoder_); -+ } -+ -+ void set_deadline(unsigned long deadline) { -+ deadline_ = deadline; -+ } -+ -+ void Control(int ctrl_id, int arg) { -+ const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg); -+ ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError(); -+ } -+ -+ protected: -+ const char *DecodeError() { -+ const char *detail = vpx_codec_error_detail(&decoder_); -+ return detail ? detail : vpx_codec_error(&decoder_); -+ } -+ -+ vpx_codec_ctx_t decoder_; -+ vpx_codec_dec_cfg_t cfg_; -+ unsigned int deadline_; -+}; -+ -+// Common test functionality for all Decoder tests. -+class DecoderTest { -+ public: -+ // Main loop. -+ virtual void RunLoop(CompressedVideoSource *video); -+ -+ // Hook to be called on every decompressed frame. -+ virtual void DecompressedFrameHook(const vpx_image_t& img, -+ const unsigned int frame_number) {} -+ -+ protected: -+ DecoderTest() {} -+ -+ virtual ~DecoderTest() {} -+}; -+ -+} // namespace libvpx_test -+ -+#endif // TEST_DECODE_TEST_DRIVER_H_ -diff --git a/test/encode_test_driver.cc b/test/encode_test_driver.cc -new file mode 100644 -index 0000000..56339ca ---- /dev/null -+++ b/test/encode_test_driver.cc -@@ -0,0 +1,206 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#include "vpx_config.h" -+#include "test/encode_test_driver.h" -+#if CONFIG_VP8_DECODER -+#include "test/decode_test_driver.h" -+#endif -+#include "test/register_state_check.h" -+#include "test/video_source.h" -+#include "third_party/googletest/src/include/gtest/gtest.h" -+ -+namespace libvpx_test { -+void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) { -+ if (video->img()) -+ EncodeFrameInternal(*video, frame_flags); -+ else -+ Flush(); -+ -+ // Handle twopass stats -+ CxDataIterator iter = GetCxData(); -+ -+ while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) { -+ if (pkt->kind != VPX_CODEC_STATS_PKT) -+ continue; -+ -+ stats_->Append(*pkt); -+ } -+} -+ -+void Encoder::EncodeFrameInternal(const VideoSource &video, -+ const unsigned long frame_flags) { -+ vpx_codec_err_t res; -+ const vpx_image_t *img = video.img(); -+ -+ // Handle first frame initialization -+ if (!encoder_.priv) { -+ cfg_.g_w = img->d_w; -+ cfg_.g_h = img->d_h; -+ cfg_.g_timebase = video.timebase(); -+ cfg_.rc_twopass_stats_in = stats_->buf(); -+ res = vpx_codec_enc_init(&encoder_, &vpx_codec_vp8_cx_algo, &cfg_, -+ init_flags_); -+ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); -+ } -+ -+ // Handle frame resizing -+ if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) { -+ cfg_.g_w = img->d_w; -+ cfg_.g_h = img->d_h; -+ res = vpx_codec_enc_config_set(&encoder_, &cfg_); -+ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); -+ } -+ -+ // Encode the frame -+ REGISTER_STATE_CHECK( -+ res = vpx_codec_encode(&encoder_, -+ video.img(), video.pts(), video.duration(), -+ frame_flags, deadline_)); -+ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); -+} -+ -+void Encoder::Flush() { -+ const vpx_codec_err_t res = vpx_codec_encode(&encoder_, NULL, 0, 0, 0, -+ deadline_); -+ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); -+} -+ -+void EncoderTest::SetMode(TestMode mode) { -+ switch (mode) { -+ case kRealTime: -+ deadline_ = VPX_DL_REALTIME; -+ break; -+ -+ case kOnePassGood: -+ case kTwoPassGood: -+ deadline_ = VPX_DL_GOOD_QUALITY; -+ break; -+ -+ case kOnePassBest: -+ case kTwoPassBest: -+ deadline_ = VPX_DL_BEST_QUALITY; -+ break; -+ -+ default: -+ ASSERT_TRUE(false) << "Unexpected mode " << mode; -+ } -+ -+ if (mode == kTwoPassGood || mode == kTwoPassBest) -+ passes_ = 2; -+ else -+ passes_ = 1; -+} -+// The function should return "true" most of the time, therefore no early -+// break-out is implemented within the match checking process. -+static bool compare_img(const vpx_image_t *img1, -+ const vpx_image_t *img2) { -+ bool match = (img1->fmt == img2->fmt) && -+ (img1->d_w == img2->d_w) && -+ (img1->d_h == img2->d_h); -+ -+ const unsigned int width_y = img1->d_w; -+ const unsigned int height_y = img1->d_h; -+ unsigned int i; -+ for (i = 0; i < height_y; ++i) -+ match = ( memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y], -+ img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y], -+ width_y) == 0) && match; -+ const unsigned int width_uv = (img1->d_w + 1) >> 1; -+ const unsigned int height_uv = (img1->d_h + 1) >> 1; -+ for (i = 0; i < height_uv; ++i) -+ match = ( memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U], -+ img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U], -+ width_uv) == 0) && match; -+ for (i = 0; i < height_uv; ++i) -+ match = ( memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V], -+ img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V], -+ width_uv) == 0) && match; -+ return match; -+} -+ -+void EncoderTest::RunLoop(VideoSource *video) { -+#if CONFIG_VP8_DECODER -+ vpx_codec_dec_cfg_t dec_cfg = {0}; -+#endif -+ -+ stats_.Reset(); -+ -+ for (unsigned int pass = 0; pass < passes_; pass++) { -+ last_pts_ = 0; -+ -+ if (passes_ == 1) -+ cfg_.g_pass = VPX_RC_ONE_PASS; -+ else if (pass == 0) -+ cfg_.g_pass = VPX_RC_FIRST_PASS; -+ else -+ cfg_.g_pass = VPX_RC_LAST_PASS; -+ -+ BeginPassHook(pass); -+ Encoder encoder(cfg_, deadline_, init_flags_, &stats_); -+#if CONFIG_VP8_DECODER -+ Decoder decoder(dec_cfg, 0); -+ bool has_cxdata = false; -+#endif -+ bool again; -+ for (again = true, video->Begin(); again; video->Next()) { -+ again = video->img() != NULL; -+ -+ PreEncodeFrameHook(video); -+ PreEncodeFrameHook(video, &encoder); -+ encoder.EncodeFrame(video, frame_flags_); -+ -+ CxDataIterator iter = encoder.GetCxData(); -+ -+ while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) { -+ again = true; -+ -+ switch (pkt->kind) { -+ case VPX_CODEC_CX_FRAME_PKT: -+#if CONFIG_VP8_DECODER -+ has_cxdata = true; -+ decoder.DecodeFrame((const uint8_t*)pkt->data.frame.buf, -+ pkt->data.frame.sz); -+#endif -+ ASSERT_GE(pkt->data.frame.pts, last_pts_); -+ last_pts_ = pkt->data.frame.pts; -+ FramePktHook(pkt); -+ break; -+ -+ case VPX_CODEC_PSNR_PKT: -+ PSNRPktHook(pkt); -+ break; -+ -+ default: -+ break; -+ } -+ } -+ -+#if CONFIG_VP8_DECODER -+ if (has_cxdata) { -+ const vpx_image_t *img_enc = encoder.GetPreviewFrame(); -+ DxDataIterator dec_iter = decoder.GetDxData(); -+ const vpx_image_t *img_dec = dec_iter.Next(); -+ if(img_enc && img_dec) { -+ const bool res = compare_img(img_enc, img_dec); -+ ASSERT_TRUE(res)<< "Encoder/Decoder mismatch found."; -+ } -+ } -+#endif -+ if (!Continue()) -+ break; -+ } -+ -+ EndPassHook(); -+ -+ if (!Continue()) -+ break; -+ } -+} -+} // namespace libvpx_test -diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h -new file mode 100644 -index 0000000..0141fa9 ---- /dev/null -+++ b/test/encode_test_driver.h -@@ -0,0 +1,197 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#ifndef TEST_ENCODE_TEST_DRIVER_H_ -+#define TEST_ENCODE_TEST_DRIVER_H_ -+#include -+#include -+#include "third_party/googletest/src/include/gtest/gtest.h" -+#include "vpx/vpx_encoder.h" -+#include "vpx/vp8cx.h" -+ -+namespace libvpx_test { -+ -+class VideoSource; -+ -+enum TestMode { -+ kRealTime, -+ kOnePassGood, -+ kOnePassBest, -+ kTwoPassGood, -+ kTwoPassBest -+}; -+#define ALL_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \ -+ ::libvpx_test::kOnePassGood, \ -+ ::libvpx_test::kOnePassBest, \ -+ ::libvpx_test::kTwoPassGood, \ -+ ::libvpx_test::kTwoPassBest) -+ -+#define ONE_PASS_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \ -+ ::libvpx_test::kOnePassGood, \ -+ ::libvpx_test::kOnePassBest) -+ -+ -+// Provides an object to handle the libvpx get_cx_data() iteration pattern -+class CxDataIterator { -+ public: -+ explicit CxDataIterator(vpx_codec_ctx_t *encoder) -+ : encoder_(encoder), iter_(NULL) {} -+ -+ const vpx_codec_cx_pkt_t *Next() { -+ return vpx_codec_get_cx_data(encoder_, &iter_); -+ } -+ -+ private: -+ vpx_codec_ctx_t *encoder_; -+ vpx_codec_iter_t iter_; -+}; -+ -+// Implements an in-memory store for libvpx twopass statistics -+class TwopassStatsStore { -+ public: -+ void Append(const vpx_codec_cx_pkt_t &pkt) { -+ buffer_.append(reinterpret_cast(pkt.data.twopass_stats.buf), -+ pkt.data.twopass_stats.sz); -+ } -+ -+ vpx_fixed_buf_t buf() { -+ const vpx_fixed_buf_t buf = { &buffer_[0], buffer_.size() }; -+ return buf; -+ } -+ -+ void Reset() { -+ buffer_.clear(); -+ } -+ -+ protected: -+ std::string buffer_; -+}; -+ -+ -+// Provides a simplified interface to manage one video encoding pass, given -+// a configuration and video source. -+// -+// TODO(jkoleszar): The exact services it provides and the appropriate -+// level of abstraction will be fleshed out as more tests are written. -+class Encoder { -+ public: -+ Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline, -+ const unsigned long init_flags, TwopassStatsStore *stats) -+ : cfg_(cfg), deadline_(deadline), init_flags_(init_flags), stats_(stats) { -+ memset(&encoder_, 0, sizeof(encoder_)); -+ } -+ -+ ~Encoder() { -+ vpx_codec_destroy(&encoder_); -+ } -+ -+ CxDataIterator GetCxData() { -+ return CxDataIterator(&encoder_); -+ } -+ -+ const vpx_image_t *GetPreviewFrame() { -+ return vpx_codec_get_preview_frame(&encoder_); -+ } -+ // This is a thin wrapper around vpx_codec_encode(), so refer to -+ // vpx_encoder.h for its semantics. -+ void EncodeFrame(VideoSource *video, const unsigned long frame_flags); -+ -+ // Convenience wrapper for EncodeFrame() -+ void EncodeFrame(VideoSource *video) { -+ EncodeFrame(video, 0); -+ } -+ -+ void Control(int ctrl_id, int arg) { -+ const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); -+ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); -+ } -+ -+ void set_deadline(unsigned long deadline) { -+ deadline_ = deadline; -+ } -+ -+ protected: -+ const char *EncoderError() { -+ const char *detail = vpx_codec_error_detail(&encoder_); -+ return detail ? detail : vpx_codec_error(&encoder_); -+ } -+ -+ // Encode an image -+ void EncodeFrameInternal(const VideoSource &video, -+ const unsigned long frame_flags); -+ -+ // Flush the encoder on EOS -+ void Flush(); -+ -+ vpx_codec_ctx_t encoder_; -+ vpx_codec_enc_cfg_t cfg_; -+ unsigned long deadline_; -+ unsigned long init_flags_; -+ TwopassStatsStore *stats_; -+}; -+ -+// Common test functionality for all Encoder tests. -+// -+// This class is a mixin which provides the main loop common to all -+// encoder tests. It provides hooks which can be overridden by subclasses -+// to implement each test's specific behavior, while centralizing the bulk -+// of the boilerplate. Note that it doesn't inherit the gtest testing -+// classes directly, so that tests can be parameterized differently. -+class EncoderTest { -+ protected: -+ EncoderTest() : abort_(false), init_flags_(0), frame_flags_(0), -+ last_pts_(0) {} -+ -+ virtual ~EncoderTest() {} -+ -+ // Initialize the cfg_ member with the default configuration. -+ void InitializeConfig() { -+ const vpx_codec_err_t res = vpx_codec_enc_config_default( -+ &vpx_codec_vp8_cx_algo, &cfg_, 0); -+ ASSERT_EQ(VPX_CODEC_OK, res); -+ } -+ -+ // Map the TestMode enum to the deadline_ and passes_ variables. -+ void SetMode(TestMode mode); -+ -+ // Main loop. -+ virtual void RunLoop(VideoSource *video); -+ -+ // Hook to be called at the beginning of a pass. -+ virtual void BeginPassHook(unsigned int pass) {} -+ -+ // Hook to be called at the end of a pass. -+ virtual void EndPassHook() {} -+ -+ // Hook to be called before encoding a frame. -+ virtual void PreEncodeFrameHook(VideoSource *video) {} -+ virtual void PreEncodeFrameHook(VideoSource *video, Encoder *encoder) {} -+ -+ // Hook to be called on every compressed data packet. -+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {} -+ -+ // Hook to be called on every PSNR packet. -+ virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {} -+ -+ // Hook to determine whether the encode loop should continue. -+ virtual bool Continue() const { return !abort_; } -+ -+ bool abort_; -+ vpx_codec_enc_cfg_t cfg_; -+ unsigned int passes_; -+ unsigned long deadline_; -+ TwopassStatsStore stats_; -+ unsigned long init_flags_; -+ unsigned long frame_flags_; -+ vpx_codec_pts_t last_pts_; -+}; -+ -+} // namespace libvpx_test -+ -+#endif // TEST_ENCODE_TEST_DRIVER_H_ -diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc -new file mode 100644 -index 0000000..25c6731 ---- /dev/null -+++ b/test/error_resilience_test.cc -@@ -0,0 +1,90 @@ -+/* -+ Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ -+ Use of this source code is governed by a BSD-style license -+ that can be found in the LICENSE file in the root of the source -+ tree. An additional intellectual property rights grant can be found -+ in the file PATENTS. All contributing project authors may -+ be found in the AUTHORS file in the root of the source tree. -+*/ -+#include "third_party/googletest/src/include/gtest/gtest.h" -+#include "test/encode_test_driver.h" -+#include "test/i420_video_source.h" -+ -+namespace { -+ -+class ErrorResilienceTest : public libvpx_test::EncoderTest, -+ public ::testing::TestWithParam { -+ protected: -+ ErrorResilienceTest() { -+ psnr_ = 0.0; -+ nframes_ = 0; -+ encoding_mode_ = static_cast(GetParam()); -+ } -+ virtual ~ErrorResilienceTest() {} -+ -+ virtual void SetUp() { -+ InitializeConfig(); -+ SetMode(encoding_mode_); -+ } -+ -+ virtual void BeginPassHook(unsigned int /*pass*/) { -+ psnr_ = 0.0; -+ nframes_ = 0; -+ } -+ -+ virtual bool Continue() const { -+ return !HasFatalFailure() && !abort_; -+ } -+ -+ virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { -+ psnr_ += pkt->data.psnr.psnr[0]; -+ nframes_++; -+ } -+ -+ double GetAveragePsnr() const { -+ if (nframes_) -+ return psnr_ / nframes_; -+ return 0.0; -+ } -+ -+ private: -+ double psnr_; -+ unsigned int nframes_; -+ libvpx_test::TestMode encoding_mode_; -+}; -+ -+TEST_P(ErrorResilienceTest, OnVersusOff) { -+ const vpx_rational timebase = { 33333333, 1000000000 }; -+ cfg_.g_timebase = timebase; -+ cfg_.rc_target_bitrate = 2000; -+ cfg_.g_lag_in_frames = 25; -+ -+ init_flags_ = VPX_CODEC_USE_PSNR; -+ -+ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, -+ timebase.den, timebase.num, 0, 30); -+ -+ // Error resilient mode OFF. -+ cfg_.g_error_resilient = 0; -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ const double psnr_resilience_off = GetAveragePsnr(); -+ EXPECT_GT(psnr_resilience_off, 25.0); -+ -+ // Error resilient mode ON. -+ cfg_.g_error_resilient = 1; -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ const double psnr_resilience_on = GetAveragePsnr(); -+ EXPECT_GT(psnr_resilience_on, 25.0); -+ -+ // Test that turning on error resilient mode hurts by 10% at most. -+ if (psnr_resilience_off > 0.0) { -+ const double psnr_ratio = psnr_resilience_on / psnr_resilience_off; -+ EXPECT_GE(psnr_ratio, 0.9); -+ EXPECT_LE(psnr_ratio, 1.1); -+ } -+} -+ -+INSTANTIATE_TEST_CASE_P(OnOffTest, ErrorResilienceTest, -+ ONE_PASS_TEST_MODES); -+} // namespace -diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc -new file mode 100644 -index 0000000..619b23d ---- /dev/null -+++ b/test/fdct4x4_test.cc -@@ -0,0 +1,169 @@ -+/* -+* Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+* -+* Use of this source code is governed by a BSD-style license -+* that can be found in the LICENSE file in the root of the source -+* tree. An additional intellectual property rights grant can be found -+* in the file PATENTS. All contributing project authors may -+* be found in the AUTHORS file in the root of the source tree. -+*/ -+ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+extern "C" { -+#include "vpx_rtcd.h" -+} -+ -+#include "test/acm_random.h" -+#include "third_party/googletest/src/include/gtest/gtest.h" -+#include "vpx/vpx_integer.h" -+ -+ -+namespace { -+ -+const int cospi8sqrt2minus1 = 20091; -+const int sinpi8sqrt2 = 35468; -+ -+void reference_idct4x4(const int16_t *input, int16_t *output) { -+ const int16_t *ip = input; -+ int16_t *op = output; -+ -+ for (int i = 0; i < 4; ++i) { -+ const int a1 = ip[0] + ip[8]; -+ const int b1 = ip[0] - ip[8]; -+ const int temp1 = (ip[4] * sinpi8sqrt2) >> 16; -+ const int temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); -+ const int c1 = temp1 - temp2; -+ const int temp3 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); -+ const int temp4 = (ip[12] * sinpi8sqrt2) >> 16; -+ const int d1 = temp3 + temp4; -+ op[0] = a1 + d1; -+ op[12] = a1 - d1; -+ op[4] = b1 + c1; -+ op[8] = b1 - c1; -+ ++ip; -+ ++op; -+ } -+ ip = output; -+ op = output; -+ for (int i = 0; i < 4; ++i) { -+ const int a1 = ip[0] + ip[2]; -+ const int b1 = ip[0] - ip[2]; -+ const int temp1 = (ip[1] * sinpi8sqrt2) >> 16; -+ const int temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); -+ const int c1 = temp1 - temp2; -+ const int temp3 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); -+ const int temp4 = (ip[3] * sinpi8sqrt2) >> 16; -+ const int d1 = temp3 + temp4; -+ op[0] = (a1 + d1 + 4) >> 3; -+ op[3] = (a1 - d1 + 4) >> 3; -+ op[1] = (b1 + c1 + 4) >> 3; -+ op[2] = (b1 - c1 + 4) >> 3; -+ ip += 4; -+ op += 4; -+ } -+} -+ -+using libvpx_test::ACMRandom; -+ -+TEST(Vp8FdctTest, SignBiasCheck) { -+ ACMRandom rnd(ACMRandom::DeterministicSeed()); -+ int16_t test_input_block[16]; -+ int16_t test_output_block[16]; -+ const int pitch = 8; -+ int count_sign_block[16][2]; -+ const int count_test_block = 1000000; -+ -+ memset(count_sign_block, 0, sizeof(count_sign_block)); -+ -+ for (int i = 0; i < count_test_block; ++i) { -+ // Initialize a test block with input range [-255, 255]. -+ for (int j = 0; j < 16; ++j) -+ test_input_block[j] = rnd.Rand8() - rnd.Rand8(); -+ -+ vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch); -+ -+ for (int j = 0; j < 16; ++j) { -+ if (test_output_block[j] < 0) -+ ++count_sign_block[j][0]; -+ else if (test_output_block[j] > 0) -+ ++count_sign_block[j][1]; -+ } -+ } -+ -+ bool bias_acceptable = true; -+ for (int j = 0; j < 16; ++j) -+ bias_acceptable = bias_acceptable && -+ (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 10000); -+ -+ EXPECT_EQ(true, bias_acceptable) -+ << "Error: 4x4 FDCT has a sign bias > 1% for input range [-255, 255]"; -+ -+ memset(count_sign_block, 0, sizeof(count_sign_block)); -+ -+ for (int i = 0; i < count_test_block; ++i) { -+ // Initialize a test block with input range [-15, 15]. -+ for (int j = 0; j < 16; ++j) -+ test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4); -+ -+ vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch); -+ -+ for (int j = 0; j < 16; ++j) { -+ if (test_output_block[j] < 0) -+ ++count_sign_block[j][0]; -+ else if (test_output_block[j] > 0) -+ ++count_sign_block[j][1]; -+ } -+ } -+ -+ bias_acceptable = true; -+ for (int j = 0; j < 16; ++j) -+ bias_acceptable = bias_acceptable && -+ (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 100000); -+ -+ EXPECT_EQ(true, bias_acceptable) -+ << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]"; -+}; -+ -+TEST(Vp8FdctTest, RoundTripErrorCheck) { -+ ACMRandom rnd(ACMRandom::DeterministicSeed()); -+ int max_error = 0; -+ double total_error = 0; -+ const int count_test_block = 1000000; -+ for (int i = 0; i < count_test_block; ++i) { -+ int16_t test_input_block[16]; -+ int16_t test_temp_block[16]; -+ int16_t test_output_block[16]; -+ -+ // Initialize a test block with input range [-255, 255]. -+ for (int j = 0; j < 16; ++j) -+ test_input_block[j] = rnd.Rand8() - rnd.Rand8(); -+ -+ const int pitch = 8; -+ vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch); -+ reference_idct4x4(test_temp_block, test_output_block); -+ -+ for (int j = 0; j < 16; ++j) { -+ const int diff = test_input_block[j] - test_output_block[j]; -+ const int error = diff * diff; -+ if (max_error < error) -+ max_error = error; -+ total_error += error; -+ } -+ } -+ -+ EXPECT_GE(1, max_error ) -+ << "Error: FDCT/IDCT has an individual roundtrip error > 1"; -+ -+ EXPECT_GE(count_test_block, total_error) -+ << "Error: FDCT/IDCT has average roundtrip error > 1 per block"; -+}; -+ -+} // namespace -diff --git a/test/i420_video_source.h b/test/i420_video_source.h -new file mode 100644 -index 0000000..219bd33 ---- /dev/null -+++ b/test/i420_video_source.h -@@ -0,0 +1,117 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#ifndef TEST_I420_VIDEO_SOURCE_H_ -+#define TEST_I420_VIDEO_SOURCE_H_ -+#include -+#include -+ -+#include "test/video_source.h" -+ -+namespace libvpx_test { -+ -+// This class extends VideoSource to allow parsing of raw yv12 -+// so that we can do actual file encodes. -+class I420VideoSource : public VideoSource { -+ public: -+ I420VideoSource(const std::string &file_name, -+ unsigned int width, unsigned int height, -+ int rate_numerator, int rate_denominator, -+ unsigned int start, int limit) -+ : file_name_(file_name), -+ input_file_(NULL), -+ img_(NULL), -+ start_(start), -+ limit_(limit), -+ frame_(0), -+ width_(0), -+ height_(0), -+ framerate_numerator_(rate_numerator), -+ framerate_denominator_(rate_denominator) { -+ -+ // This initializes raw_sz_, width_, height_ and allocates an img. -+ SetSize(width, height); -+ } -+ -+ virtual ~I420VideoSource() { -+ vpx_img_free(img_); -+ if (input_file_) -+ fclose(input_file_); -+ } -+ -+ virtual void Begin() { -+ if (input_file_) -+ fclose(input_file_); -+ input_file_ = OpenTestDataFile(file_name_); -+ ASSERT_TRUE(input_file_) << "Input file open failed. Filename: " -+ << file_name_; -+ if (start_) { -+ fseek(input_file_, raw_sz_ * start_, SEEK_SET); -+ } -+ -+ frame_ = start_; -+ FillFrame(); -+ } -+ -+ virtual void Next() { -+ ++frame_; -+ FillFrame(); -+ } -+ -+ virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; } -+ -+ // Models a stream where Timebase = 1/FPS, so pts == frame. -+ virtual vpx_codec_pts_t pts() const { return frame_; } -+ -+ virtual unsigned long duration() const { return 1; } -+ -+ virtual vpx_rational_t timebase() const { -+ const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ }; -+ return t; -+ } -+ -+ virtual unsigned int frame() const { return frame_; } -+ -+ virtual unsigned int limit() const { return limit_; } -+ -+ void SetSize(unsigned int width, unsigned int height) { -+ if (width != width_ || height != height_) { -+ vpx_img_free(img_); -+ img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 1); -+ ASSERT_TRUE(img_ != NULL); -+ width_ = width; -+ height_ = height; -+ raw_sz_ = width * height * 3 / 2; -+ } -+ } -+ -+ virtual void FillFrame() { -+ // Read a frame from input_file. -+ if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) { -+ limit_ = frame_; -+ } -+ } -+ -+ protected: -+ std::string file_name_; -+ FILE *input_file_; -+ vpx_image_t *img_; -+ size_t raw_sz_; -+ unsigned int start_; -+ unsigned int limit_; -+ unsigned int frame_; -+ unsigned int width_; -+ unsigned int height_; -+ unsigned int framerate_numerator_; -+ unsigned int framerate_denominator_; -+}; -+ -+} // namespace libvpx_test -+ -+#endif // TEST_I420_VIDEO_SOURCE_H_ -diff --git a/test/idctllm_test.cc b/test/idctllm_test.cc -new file mode 100644 -index 0000000..1be5fa0 ---- /dev/null -+++ b/test/idctllm_test.cc -@@ -0,0 +1,126 @@ -+/* -+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+ -+extern "C" { -+#include "vpx_config.h" -+#include "vpx_rtcd.h" -+} -+#include "test/register_state_check.h" -+#include "third_party/googletest/src/include/gtest/gtest.h" -+ -+typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr, -+ int pred_stride, unsigned char *dst_ptr, -+ int dst_stride); -+namespace { -+class IDCTTest : public ::testing::TestWithParam -+{ -+ protected: -+ virtual void SetUp() -+ { -+ int i; -+ -+ UUT = GetParam(); -+ memset(input, 0, sizeof(input)); -+ /* Set up guard blocks */ -+ for(i=0; i<256; i++) -+ output[i] = ((i&0xF)<4&&(i<64))?0:-1; -+ } -+ -+ idct_fn_t UUT; -+ short input[16]; -+ unsigned char output[256]; -+ unsigned char predict[256]; -+}; -+ -+TEST_P(IDCTTest, TestGuardBlocks) -+{ -+ int i; -+ -+ for(i=0; i<256; i++) -+ if((i&0xF) < 4 && i<64) -+ EXPECT_EQ(0, output[i]) << i; -+ else -+ EXPECT_EQ(255, output[i]); -+} -+ -+TEST_P(IDCTTest, TestAllZeros) -+{ -+ int i; -+ -+ REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); -+ -+ for(i=0; i<256; i++) -+ if((i&0xF) < 4 && i<64) -+ EXPECT_EQ(0, output[i]) << "i==" << i; -+ else -+ EXPECT_EQ(255, output[i]) << "i==" << i; -+} -+ -+TEST_P(IDCTTest, TestAllOnes) -+{ -+ int i; -+ -+ input[0] = 4; -+ REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); -+ -+ for(i=0; i<256; i++) -+ if((i&0xF) < 4 && i<64) -+ EXPECT_EQ(1, output[i]) << "i==" << i; -+ else -+ EXPECT_EQ(255, output[i]) << "i==" << i; -+} -+ -+TEST_P(IDCTTest, TestAddOne) -+{ -+ int i; -+ -+ for(i=0; i<256; i++) -+ predict[i] = i; -+ -+ input[0] = 4; -+ REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16)); -+ -+ for(i=0; i<256; i++) -+ if((i&0xF) < 4 && i<64) -+ EXPECT_EQ(i+1, output[i]) << "i==" << i; -+ else -+ EXPECT_EQ(255, output[i]) << "i==" << i; -+} -+ -+TEST_P(IDCTTest, TestWithData) -+{ -+ int i; -+ -+ for(i=0; i<16; i++) -+ input[i] = i; -+ -+ REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); -+ -+ for(i=0; i<256; i++) -+ if((i&0xF) > 3 || i>63) -+ EXPECT_EQ(255, output[i]) << "i==" << i; -+ else if(i == 0) -+ EXPECT_EQ(11, output[i]) << "i==" << i; -+ else if(i == 34) -+ EXPECT_EQ(1, output[i]) << "i==" << i; -+ else if(i == 2 || i == 17 || i == 32) -+ EXPECT_EQ(3, output[i]) << "i==" << i; -+ else -+ EXPECT_EQ(0, output[i]) << "i==" << i; -+} -+ -+INSTANTIATE_TEST_CASE_P(C, IDCTTest, -+ ::testing::Values(vp8_short_idct4x4llm_c)); -+#if HAVE_MMX -+INSTANTIATE_TEST_CASE_P(MMX, IDCTTest, -+ ::testing::Values(vp8_short_idct4x4llm_mmx)); -+#endif -+} -diff --git a/test/intrapred_test.cc b/test/intrapred_test.cc -new file mode 100644 -index 0000000..4c16c3f ---- /dev/null -+++ b/test/intrapred_test.cc -@@ -0,0 +1,357 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+ -+#include -+#include "test/acm_random.h" -+#include "test/register_state_check.h" -+#include "third_party/googletest/src/include/gtest/gtest.h" -+extern "C" { -+#include "vpx_config.h" -+#include "vpx_rtcd.h" -+#include "vp8/common/blockd.h" -+#include "vpx_mem/vpx_mem.h" -+} -+ -+namespace { -+ -+using libvpx_test::ACMRandom; -+ -+class IntraPredBase { -+ protected: -+ void SetupMacroblock(uint8_t *data, int block_size, int stride, -+ int num_planes) { -+ memset(&mb_, 0, sizeof(mb_)); -+ memset(&mi_, 0, sizeof(mi_)); -+ mb_.up_available = 1; -+ mb_.left_available = 1; -+ mb_.mode_info_context = &mi_; -+ stride_ = stride; -+ block_size_ = block_size; -+ num_planes_ = num_planes; -+ for (int p = 0; p < num_planes; p++) -+ data_ptr_[p] = data + stride * (block_size + 1) * p + -+ stride + block_size; -+ } -+ -+ void FillRandom() { -+ // Fill edges with random data -+ ACMRandom rnd(ACMRandom::DeterministicSeed()); -+ for (int p = 0; p < num_planes_; p++) { -+ for (int x = -1 ; x <= block_size_; x++) -+ data_ptr_[p][x - stride_] = rnd.Rand8(); -+ for (int y = 0; y < block_size_; y++) -+ data_ptr_[p][y * stride_ - 1] = rnd.Rand8(); -+ } -+ } -+ -+ virtual void Predict(MB_PREDICTION_MODE mode) = 0; -+ -+ void SetLeftUnavailable() { -+ mb_.left_available = 0; -+ for (int p = 0; p < num_planes_; p++) -+ for (int i = -1; i < block_size_; ++i) -+ data_ptr_[p][stride_ * i - 1] = 129; -+ } -+ -+ void SetTopUnavailable() { -+ mb_.up_available = 0; -+ for (int p = 0; p < num_planes_; p++) -+ memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2); -+ } -+ -+ void SetTopLeftUnavailable() { -+ SetLeftUnavailable(); -+ SetTopUnavailable(); -+ } -+ -+ int BlockSizeLog2Min1() const { -+ switch (block_size_) { -+ case 16: -+ return 3; -+ case 8: -+ return 2; -+ default: -+ return 0; -+ } -+ } -+ -+ // check DC prediction output against a reference -+ void CheckDCPrediction() const { -+ for (int p = 0; p < num_planes_; p++) { -+ // calculate expected DC -+ int expected; -+ if (mb_.up_available || mb_.left_available) { -+ int sum = 0, shift = BlockSizeLog2Min1() + mb_.up_available + -+ mb_.left_available; -+ if (mb_.up_available) -+ for (int x = 0; x < block_size_; x++) -+ sum += data_ptr_[p][x - stride_]; -+ if (mb_.left_available) -+ for (int y = 0; y < block_size_; y++) -+ sum += data_ptr_[p][y * stride_ - 1]; -+ expected = (sum + (1 << (shift - 1))) >> shift; -+ } else -+ expected = 0x80; -+ -+ // check that all subsequent lines are equal to the first -+ for (int y = 1; y < block_size_; ++y) -+ ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_], -+ block_size_)); -+ // within the first line, ensure that each pixel has the same value -+ for (int x = 1; x < block_size_; ++x) -+ ASSERT_EQ(data_ptr_[p][0], data_ptr_[p][x]); -+ // now ensure that that pixel has the expected (DC) value -+ ASSERT_EQ(expected, data_ptr_[p][0]); -+ } -+ } -+ -+ // check V prediction output against a reference -+ void CheckVPrediction() const { -+ // check that all lines equal the top border -+ for (int p = 0; p < num_planes_; p++) -+ for (int y = 0; y < block_size_; y++) -+ ASSERT_EQ(0, memcmp(&data_ptr_[p][-stride_], -+ &data_ptr_[p][y * stride_], block_size_)); -+ } -+ -+ // check H prediction output against a reference -+ void CheckHPrediction() const { -+ // for each line, ensure that each pixel is equal to the left border -+ for (int p = 0; p < num_planes_; p++) -+ for (int y = 0; y < block_size_; y++) -+ for (int x = 0; x < block_size_; x++) -+ ASSERT_EQ(data_ptr_[p][-1 + y * stride_], -+ data_ptr_[p][x + y * stride_]); -+ } -+ -+ static int ClipByte(int value) { -+ if (value > 255) -+ return 255; -+ else if (value < 0) -+ return 0; -+ return value; -+ } -+ -+ // check TM prediction output against a reference -+ void CheckTMPrediction() const { -+ for (int p = 0; p < num_planes_; p++) -+ for (int y = 0; y < block_size_; y++) -+ for (int x = 0; x < block_size_; x++) { -+ const int expected = ClipByte(data_ptr_[p][x - stride_] -+ + data_ptr_[p][stride_ * y - 1] -+ - data_ptr_[p][-1 - stride_]); -+ ASSERT_EQ(expected, data_ptr_[p][y * stride_ + x]); -+ } -+ } -+ -+ // Actual test -+ void RunTest() { -+ { -+ SCOPED_TRACE("DC_PRED"); -+ FillRandom(); -+ Predict(DC_PRED); -+ CheckDCPrediction(); -+ } -+ { -+ SCOPED_TRACE("DC_PRED LEFT"); -+ FillRandom(); -+ SetLeftUnavailable(); -+ Predict(DC_PRED); -+ CheckDCPrediction(); -+ } -+ { -+ SCOPED_TRACE("DC_PRED TOP"); -+ FillRandom(); -+ SetTopUnavailable(); -+ Predict(DC_PRED); -+ CheckDCPrediction(); -+ } -+ { -+ SCOPED_TRACE("DC_PRED TOP_LEFT"); -+ FillRandom(); -+ SetTopLeftUnavailable(); -+ Predict(DC_PRED); -+ CheckDCPrediction(); -+ } -+ { -+ SCOPED_TRACE("H_PRED"); -+ FillRandom(); -+ Predict(H_PRED); -+ CheckHPrediction(); -+ } -+ { -+ SCOPED_TRACE("V_PRED"); -+ FillRandom(); -+ Predict(V_PRED); -+ CheckVPrediction(); -+ } -+ { -+ SCOPED_TRACE("TM_PRED"); -+ FillRandom(); -+ Predict(TM_PRED); -+ CheckTMPrediction(); -+ } -+ } -+ -+ MACROBLOCKD mb_; -+ MODE_INFO mi_; -+ uint8_t *data_ptr_[2]; // in the case of Y, only [0] is used -+ int stride_; -+ int block_size_; -+ int num_planes_; -+}; -+ -+typedef void (*intra_pred_y_fn_t)(MACROBLOCKD *x, -+ uint8_t *yabove_row, -+ uint8_t *yleft, -+ int left_stride, -+ uint8_t *ypred_ptr, -+ int y_stride); -+ -+class IntraPredYTest : public ::testing::TestWithParam, -+ protected IntraPredBase { -+ public: -+ static void SetUpTestCase() { -+ data_array_ = reinterpret_cast( -+ vpx_memalign(kDataAlignment, kDataBufferSize)); -+ } -+ -+ static void TearDownTestCase() { -+ vpx_free(data_array_); -+ data_array_ = NULL; -+ } -+ -+ protected: -+ static const int kBlockSize = 16; -+ static const int kDataAlignment = 16; -+ static const int kStride = kBlockSize * 3; -+ // We use 48 so that the data pointer of the first pixel in each row of -+ // each macroblock is 16-byte aligned, and this gives us access to the -+ // top-left and top-right corner pixels belonging to the top-left/right -+ // macroblocks. -+ // We use 17 lines so we have one line above us for top-prediction. -+ static const int kDataBufferSize = kStride * (kBlockSize + 1); -+ -+ virtual void SetUp() { -+ pred_fn_ = GetParam(); -+ SetupMacroblock(data_array_, kBlockSize, kStride, 1); -+ } -+ -+ virtual void Predict(MB_PREDICTION_MODE mode) { -+ mb_.mode_info_context->mbmi.mode = mode; -+ REGISTER_STATE_CHECK(pred_fn_(&mb_, -+ data_ptr_[0] - kStride, -+ data_ptr_[0] - 1, kStride, -+ data_ptr_[0], kStride)); -+ } -+ -+ intra_pred_y_fn_t pred_fn_; -+ static uint8_t* data_array_; -+}; -+ -+uint8_t* IntraPredYTest::data_array_ = NULL; -+ -+TEST_P(IntraPredYTest, IntraPredTests) { -+ RunTest(); -+} -+ -+INSTANTIATE_TEST_CASE_P(C, IntraPredYTest, -+ ::testing::Values( -+ vp8_build_intra_predictors_mby_s_c)); -+#if HAVE_SSE2 -+INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest, -+ ::testing::Values( -+ vp8_build_intra_predictors_mby_s_sse2)); -+#endif -+#if HAVE_SSSE3 -+INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest, -+ ::testing::Values( -+ vp8_build_intra_predictors_mby_s_ssse3)); -+#endif -+ -+typedef void (*intra_pred_uv_fn_t)(MACROBLOCKD *x, -+ uint8_t *uabove_row, -+ uint8_t *vabove_row, -+ uint8_t *uleft, -+ uint8_t *vleft, -+ int left_stride, -+ uint8_t *upred_ptr, -+ uint8_t *vpred_ptr, -+ int pred_stride); -+ -+class IntraPredUVTest : public ::testing::TestWithParam, -+ protected IntraPredBase { -+ public: -+ static void SetUpTestCase() { -+ data_array_ = reinterpret_cast( -+ vpx_memalign(kDataAlignment, kDataBufferSize)); -+ } -+ -+ static void TearDownTestCase() { -+ vpx_free(data_array_); -+ data_array_ = NULL; -+ } -+ -+ protected: -+ static const int kBlockSize = 8; -+ static const int kDataAlignment = 8; -+ static const int kStride = kBlockSize * 3; -+ // We use 24 so that the data pointer of the first pixel in each row of -+ // each macroblock is 8-byte aligned, and this gives us access to the -+ // top-left and top-right corner pixels belonging to the top-left/right -+ // macroblocks. -+ // We use 9 lines so we have one line above us for top-prediction. -+ // [0] = U, [1] = V -+ static const int kDataBufferSize = 2 * kStride * (kBlockSize + 1); -+ -+ virtual void SetUp() { -+ pred_fn_ = GetParam(); -+ SetupMacroblock(data_array_, kBlockSize, kStride, 2); -+ } -+ -+ virtual void Predict(MB_PREDICTION_MODE mode) { -+ mb_.mode_info_context->mbmi.uv_mode = mode; -+ pred_fn_(&mb_, data_ptr_[0] - kStride, data_ptr_[1] - kStride, -+ data_ptr_[0] - 1, data_ptr_[1] - 1, kStride, -+ data_ptr_[0], data_ptr_[1], kStride); -+ } -+ -+ intra_pred_uv_fn_t pred_fn_; -+ // We use 24 so that the data pointer of the first pixel in each row of -+ // each macroblock is 8-byte aligned, and this gives us access to the -+ // top-left and top-right corner pixels belonging to the top-left/right -+ // macroblocks. -+ // We use 9 lines so we have one line above us for top-prediction. -+ // [0] = U, [1] = V -+ static uint8_t* data_array_; -+}; -+ -+uint8_t* IntraPredUVTest::data_array_ = NULL; -+ -+TEST_P(IntraPredUVTest, IntraPredTests) { -+ RunTest(); -+} -+ -+INSTANTIATE_TEST_CASE_P(C, IntraPredUVTest, -+ ::testing::Values( -+ vp8_build_intra_predictors_mbuv_s_c)); -+#if HAVE_SSE2 -+INSTANTIATE_TEST_CASE_P(SSE2, IntraPredUVTest, -+ ::testing::Values( -+ vp8_build_intra_predictors_mbuv_s_sse2)); -+#endif -+#if HAVE_SSSE3 -+INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredUVTest, -+ ::testing::Values( -+ vp8_build_intra_predictors_mbuv_s_ssse3)); -+#endif -+ -+} // namespace -diff --git a/test/ivf_video_source.h b/test/ivf_video_source.h -new file mode 100644 -index 0000000..48c3a7d ---- /dev/null -+++ b/test/ivf_video_source.h -@@ -0,0 +1,109 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#ifndef TEST_IVF_VIDEO_SOURCE_H_ -+#define TEST_IVF_VIDEO_SOURCE_H_ -+#include -+#include -+#include -+#include -+#include "test/video_source.h" -+ -+namespace libvpx_test { -+const unsigned int kCodeBufferSize = 256 * 1024; -+const unsigned int kIvfFileHdrSize = 32; -+const unsigned int kIvfFrameHdrSize = 12; -+ -+static unsigned int MemGetLe32(const uint8_t *mem) { -+ return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]); -+} -+ -+// This class extends VideoSource to allow parsing of ivf files, -+// so that we can do actual file decodes. -+class IVFVideoSource : public CompressedVideoSource { -+ public: -+ IVFVideoSource(const std::string &file_name) -+ : file_name_(file_name), -+ input_file_(NULL), -+ compressed_frame_buf_(NULL), -+ frame_sz_(0), -+ frame_(0), -+ end_of_file_(false) { -+ } -+ -+ virtual ~IVFVideoSource() { -+ delete[] compressed_frame_buf_; -+ -+ if (input_file_) -+ fclose(input_file_); -+ } -+ -+ virtual void Init() { -+ // Allocate a buffer for read in the compressed video frame. -+ compressed_frame_buf_ = new uint8_t[libvpx_test::kCodeBufferSize]; -+ ASSERT_TRUE(compressed_frame_buf_) << "Allocate frame buffer failed"; -+ } -+ -+ virtual void Begin() { -+ input_file_ = OpenTestDataFile(file_name_); -+ ASSERT_TRUE(input_file_) << "Input file open failed. Filename: " -+ << file_name_; -+ -+ // Read file header -+ uint8_t file_hdr[kIvfFileHdrSize]; -+ ASSERT_EQ(kIvfFileHdrSize, fread(file_hdr, 1, kIvfFileHdrSize, input_file_)) -+ << "File header read failed."; -+ // Check file header -+ ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' && file_hdr[2] == 'I' -+ && file_hdr[3] == 'F') << "Input is not an IVF file."; -+ -+ FillFrame(); -+ } -+ -+ virtual void Next() { -+ ++frame_; -+ FillFrame(); -+ } -+ -+ void FillFrame() { -+ uint8_t frame_hdr[kIvfFrameHdrSize]; -+ // Check frame header and read a frame from input_file. -+ if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_) -+ != kIvfFrameHdrSize) { -+ end_of_file_ = true; -+ } else { -+ end_of_file_ = false; -+ -+ frame_sz_ = MemGetLe32(frame_hdr); -+ ASSERT_LE(frame_sz_, kCodeBufferSize) -+ << "Frame is too big for allocated code buffer"; -+ ASSERT_EQ(frame_sz_, -+ fread(compressed_frame_buf_, 1, frame_sz_, input_file_)) -+ << "Failed to read complete frame"; -+ } -+ } -+ -+ virtual const uint8_t *cxdata() const { -+ return end_of_file_ ? NULL : compressed_frame_buf_; -+ } -+ virtual const unsigned int frame_size() const { return frame_sz_; } -+ virtual const unsigned int frame_number() const { return frame_; } -+ -+ protected: -+ std::string file_name_; -+ FILE *input_file_; -+ uint8_t *compressed_frame_buf_; -+ unsigned int frame_sz_; -+ unsigned int frame_; -+ bool end_of_file_; -+}; -+ -+} // namespace libvpx_test -+ -+#endif // TEST_IVF_VIDEO_SOURCE_H_ -diff --git a/test/keyframe_test.cc b/test/keyframe_test.cc -new file mode 100644 -index 0000000..d0c81df ---- /dev/null -+++ b/test/keyframe_test.cc -@@ -0,0 +1,145 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#include -+#include -+#include "test/encode_test_driver.h" -+#include "test/i420_video_source.h" -+#include "third_party/googletest/src/include/gtest/gtest.h" -+ -+namespace { -+ -+class KeyframeTest : public ::libvpx_test::EncoderTest, -+ public ::testing::TestWithParam { -+ protected: -+ virtual void SetUp() { -+ InitializeConfig(); -+ SetMode(GetParam()); -+ kf_count_ = 0; -+ kf_count_max_ = INT_MAX; -+ kf_do_force_kf_ = false; -+ set_cpu_used_ = 0; -+ } -+ -+ virtual bool Continue() const { -+ return !HasFatalFailure() && !abort_; -+ } -+ -+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, -+ ::libvpx_test::Encoder *encoder) { -+ if (kf_do_force_kf_) -+ frame_flags_ = (video->frame() % 3) ? 0 : VPX_EFLAG_FORCE_KF; -+ if (set_cpu_used_ && video->frame() == 1) -+ encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); -+ } -+ -+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { -+ if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { -+ kf_pts_list_.push_back(pkt->data.frame.pts); -+ kf_count_++; -+ abort_ |= kf_count_ > kf_count_max_; -+ } -+ } -+ -+ bool kf_do_force_kf_; -+ int kf_count_; -+ int kf_count_max_; -+ std::vector kf_pts_list_; -+ int set_cpu_used_; -+}; -+ -+TEST_P(KeyframeTest, TestRandomVideoSource) { -+ // Validate that encoding the RandomVideoSource produces multiple keyframes. -+ // This validates the results of the TestDisableKeyframes test. -+ kf_count_max_ = 2; // early exit successful tests. -+ -+ ::libvpx_test::RandomVideoSource video; -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ -+ // In realtime mode - auto placed keyframes are exceedingly rare, don't -+ // bother with this check if(GetParam() > 0) -+ if(GetParam() > 0) -+ EXPECT_GT(kf_count_, 1); -+} -+ -+TEST_P(KeyframeTest, TestDisableKeyframes) { -+ cfg_.kf_mode = VPX_KF_DISABLED; -+ kf_count_max_ = 1; // early exit failed tests. -+ -+ ::libvpx_test::RandomVideoSource video; -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ -+ EXPECT_EQ(1, kf_count_); -+} -+ -+TEST_P(KeyframeTest, TestForceKeyframe) { -+ cfg_.kf_mode = VPX_KF_DISABLED; -+ kf_do_force_kf_ = true; -+ -+ ::libvpx_test::DummyVideoSource video; -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ -+ // verify that every third frame is a keyframe. -+ for (std::vector::const_iterator iter = kf_pts_list_.begin(); -+ iter != kf_pts_list_.end(); ++iter) { -+ ASSERT_EQ(0, *iter % 3) << "Unexpected keyframe at frame " << *iter; -+ } -+} -+ -+TEST_P(KeyframeTest, TestKeyframeMaxDistance) { -+ cfg_.kf_max_dist = 25; -+ -+ ::libvpx_test::DummyVideoSource video; -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ -+ // verify that keyframe interval matches kf_max_dist -+ for (std::vector::const_iterator iter = kf_pts_list_.begin(); -+ iter != kf_pts_list_.end(); ++iter) { -+ ASSERT_EQ(0, *iter % 25) << "Unexpected keyframe at frame " << *iter; -+ } -+} -+ -+TEST_P(KeyframeTest, TestAutoKeyframe) { -+ cfg_.kf_mode = VPX_KF_AUTO; -+ kf_do_force_kf_ = false; -+ -+ // Force a deterministic speed step in Real Time mode, as the faster modes -+ // may not produce a keyframe like we expect. This is necessary when running -+ // on very slow environments (like Valgrind). The step -11 was determined -+ // experimentally as the fastest mode that still throws the keyframe. -+ if (deadline_ == VPX_DL_REALTIME) -+ set_cpu_used_ = -11; -+ -+ // This clip has a cut scene every 30 frames -> Frame 0, 30, 60, 90, 120. -+ // I check only the first 40 frames to make sure there's a keyframe at frame -+ // 0 and 30. -+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, -+ 30, 1, 0, 40); -+ -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ -+ // In realtime mode - auto placed keyframes are exceedingly rare, don't -+ // bother with this check -+ if(GetParam() > 0) -+ EXPECT_EQ(2u, kf_pts_list_.size()) << " Not the right number of keyframes "; -+ -+ // Verify that keyframes match the file keyframes in the file. -+ for (std::vector::const_iterator iter = kf_pts_list_.begin(); -+ iter != kf_pts_list_.end(); ++iter) { -+ -+ if (deadline_ == VPX_DL_REALTIME && *iter > 0) -+ EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame " -+ << *iter; -+ else -+ EXPECT_EQ(0, *iter % 30) << "Unexpected keyframe at frame " << *iter; -+ } -+} -+ -+INSTANTIATE_TEST_CASE_P(AllModes, KeyframeTest, ALL_TEST_MODES); -+} // namespace -diff --git a/test/pp_filter_test.cc b/test/pp_filter_test.cc -new file mode 100644 -index 0000000..9227449 ---- /dev/null -+++ b/test/pp_filter_test.cc -@@ -0,0 +1,107 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#include "test/register_state_check.h" -+#include "third_party/googletest/src/include/gtest/gtest.h" -+extern "C" { -+#include "vpx_config.h" -+#include "vpx_rtcd.h" -+#include "vpx/vpx_integer.h" -+#include "vpx_mem/vpx_mem.h" -+} -+ -+typedef void (*post_proc_func_t)(unsigned char *src_ptr, -+ unsigned char *dst_ptr, -+ int src_pixels_per_line, -+ int dst_pixels_per_line, -+ int cols, -+ unsigned char *flimit, -+ int size); -+ -+namespace { -+ -+class Vp8PostProcessingFilterTest -+ : public ::testing::TestWithParam {}; -+ -+// Test routine for the VP8 post-processing function -+// vp8_post_proc_down_and_across_mb_row_c. -+ -+TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) { -+ // Size of the underlying data block that will be filtered. -+ const int block_width = 16; -+ const int block_height = 16; -+ -+ // 5-tap filter needs 2 padding rows above and below the block in the input. -+ const int input_width = block_width; -+ const int input_height = block_height + 4; -+ const int input_stride = input_width; -+ const int input_size = input_width * input_height; -+ -+ // Filter extends output block by 8 samples at left and right edges. -+ const int output_width = block_width + 16; -+ const int output_height = block_height; -+ const int output_stride = output_width; -+ const int output_size = output_width * output_height; -+ -+ uint8_t *const src_image = -+ reinterpret_cast(vpx_calloc(input_size, 1)); -+ uint8_t *const dst_image = -+ reinterpret_cast(vpx_calloc(output_size, 1)); -+ -+ // Pointers to top-left pixel of block in the input and output images. -+ uint8_t *const src_image_ptr = src_image + (input_stride << 1); -+ uint8_t *const dst_image_ptr = dst_image + 8; -+ uint8_t *const flimits = reinterpret_cast(vpx_memalign(16, block_width)); -+ (void)vpx_memset(flimits, 255, block_width); -+ -+ // Initialize pixels in the input: -+ // block pixels to value 1, -+ // border pixels to value 10. -+ (void)vpx_memset(src_image, 10, input_size); -+ uint8_t *pixel_ptr = src_image_ptr; -+ for (int i = 0; i < block_height; ++i) { -+ for (int j = 0; j < block_width; ++j) { -+ pixel_ptr[j] = 1; -+ } -+ pixel_ptr += input_stride; -+ } -+ -+ // Initialize pixels in the output to 99. -+ (void)vpx_memset(dst_image, 99, output_size); -+ -+ REGISTER_STATE_CHECK(GetParam()(src_image_ptr, dst_image_ptr, input_stride, -+ output_stride, block_width, flimits, 16)); -+ -+ static const uint8_t expected_data[block_height] = { -+ 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4 -+ }; -+ -+ pixel_ptr = dst_image_ptr; -+ for (int i = 0; i < block_height; ++i) { -+ for (int j = 0; j < block_width; ++j) { -+ EXPECT_EQ(expected_data[i], pixel_ptr[j]) -+ << "Vp8PostProcessingFilterTest failed with invalid filter output"; -+ } -+ pixel_ptr += output_stride; -+ } -+ -+ vpx_free(src_image); -+ vpx_free(dst_image); -+ vpx_free(flimits); -+}; -+ -+INSTANTIATE_TEST_CASE_P(C, Vp8PostProcessingFilterTest, -+ ::testing::Values(vp8_post_proc_down_and_across_mb_row_c)); -+ -+#if HAVE_SSE2 -+INSTANTIATE_TEST_CASE_P(SSE2, Vp8PostProcessingFilterTest, -+ ::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2)); -+#endif -+ -+} // namespace -diff --git a/test/register_state_check.h b/test/register_state_check.h -new file mode 100644 -index 0000000..fb3f53b ---- /dev/null -+++ b/test/register_state_check.h -@@ -0,0 +1,95 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+#ifndef LIBVPX_TEST_REGISTER_STATE_CHECK_H_ -+#define LIBVPX_TEST_REGISTER_STATE_CHECK_H_ -+ -+#ifdef _WIN64 -+ -+#define _WIN32_LEAN_AND_MEAN -+#include -+#include -+ -+#include "third_party/googletest/src/include/gtest/gtest.h" -+ -+namespace testing { -+namespace internal { -+ -+inline bool operator==(const M128A& lhs, const M128A& rhs) { -+ return (lhs.Low == rhs.Low && lhs.High == rhs.High); -+} -+ -+} // namespace internal -+} // namespace testing -+ -+namespace libvpx_test { -+ -+// Compares the state of xmm[6-15] at construction with their state at -+// destruction. These registers should be preserved by the callee on -+// Windows x64. -+// Usage: -+// { -+// RegisterStateCheck reg_check; -+// FunctionToVerify(); -+// } -+class RegisterStateCheck { -+ public: -+ RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); } -+ ~RegisterStateCheck() { EXPECT_TRUE(Check()); } -+ -+ private: -+ static bool StoreRegisters(CONTEXT* const context) { -+ const HANDLE this_thread = GetCurrentThread(); -+ EXPECT_TRUE(this_thread != NULL); -+ context->ContextFlags = CONTEXT_FLOATING_POINT; -+ const bool context_saved = GetThreadContext(this_thread, context) == TRUE; -+ EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError(); -+ return context_saved; -+ } -+ -+ // Compares the register state. Returns true if the states match. -+ bool Check() const { -+ if (!initialized_) return false; -+ CONTEXT post_context; -+ if (!StoreRegisters(&post_context)) return false; -+ -+ const M128A* xmm_pre = &pre_context_.Xmm6; -+ const M128A* xmm_post = &post_context.Xmm6; -+ for (int i = 6; i <= 15; ++i) { -+ EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!"; -+ ++xmm_pre; -+ ++xmm_post; -+ } -+ return !testing::Test::HasNonfatalFailure(); -+ } -+ -+ bool initialized_; -+ CONTEXT pre_context_; -+}; -+ -+#define REGISTER_STATE_CHECK(statement) do { \ -+ libvpx_test::RegisterStateCheck reg_check; \ -+ statement; \ -+} while (false) -+ -+} // namespace libvpx_test -+ -+#else // !_WIN64 -+ -+namespace libvpx_test { -+ -+class RegisterStateCheck {}; -+#define REGISTER_STATE_CHECK(statement) statement -+ -+} // namespace libvpx_test -+ -+#endif // _WIN64 -+ -+#endif // LIBVPX_TEST_REGISTER_STATE_CHECK_H_ -diff --git a/test/resize_test.cc b/test/resize_test.cc -new file mode 100644 -index 0000000..c846157 ---- /dev/null -+++ b/test/resize_test.cc -@@ -0,0 +1,104 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#include -+#include -+#include "test/encode_test_driver.h" -+#include "test/video_source.h" -+#include "third_party/googletest/src/include/gtest/gtest.h" -+ -+namespace { -+ -+const unsigned int kInitialWidth = 320; -+const unsigned int kInitialHeight = 240; -+ -+unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) { -+ if (frame < 10) -+ return val; -+ if (frame < 20) -+ return val / 2; -+ if (frame < 30) -+ return val * 2 / 3; -+ if (frame < 40) -+ return val / 4; -+ if (frame < 50) -+ return val * 7 / 8; -+ return val; -+} -+ -+class ResizingVideoSource : public ::libvpx_test::DummyVideoSource { -+ public: -+ ResizingVideoSource() { -+ SetSize(kInitialWidth, kInitialHeight); -+ limit_ = 60; -+ } -+ -+ protected: -+ virtual void Next() { -+ ++frame_; -+ SetSize(ScaleForFrameNumber(frame_, kInitialWidth), -+ ScaleForFrameNumber(frame_, kInitialHeight)); -+ FillFrame(); -+ } -+}; -+ -+class ResizeTest : public ::libvpx_test::EncoderTest, -+ public ::testing::TestWithParam { -+ protected: -+ struct FrameInfo { -+ FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h) -+ : pts(_pts), w(_w), h(_h) {} -+ -+ vpx_codec_pts_t pts; -+ unsigned int w; -+ unsigned int h; -+ }; -+ -+ virtual void SetUp() { -+ InitializeConfig(); -+ SetMode(GetParam()); -+ } -+ -+ virtual bool Continue() const { -+ return !HasFatalFailure() && !abort_; -+ } -+ -+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { -+ if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { -+ const unsigned char *buf = -+ reinterpret_cast(pkt->data.frame.buf); -+ const unsigned int w = (buf[6] | (buf[7] << 8)) & 0x3fff; -+ const unsigned int h = (buf[8] | (buf[9] << 8)) & 0x3fff; -+ -+ frame_info_list_.push_back(FrameInfo(pkt->data.frame.pts, w, h)); -+ } -+ } -+ -+ std::vector< FrameInfo > frame_info_list_; -+}; -+ -+TEST_P(ResizeTest, TestExternalResizeWorks) { -+ ResizingVideoSource video; -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+ -+ for (std::vector::iterator info = frame_info_list_.begin(); -+ info != frame_info_list_.end(); ++info) { -+ const vpx_codec_pts_t pts = info->pts; -+ const unsigned int expected_w = ScaleForFrameNumber(pts, kInitialWidth); -+ const unsigned int expected_h = ScaleForFrameNumber(pts, kInitialHeight); -+ -+ EXPECT_EQ(expected_w, info->w) -+ << "Frame " << pts << "had unexpected width"; -+ EXPECT_EQ(expected_h, info->h) -+ << "Frame " << pts << "had unexpected height"; -+ } -+} -+ -+INSTANTIATE_TEST_CASE_P(OnePass, ResizeTest, ONE_PASS_TEST_MODES); -+} // namespace -diff --git a/test/sad_test.cc b/test/sad_test.cc -new file mode 100644 -index 0000000..5a0653b ---- /dev/null -+++ b/test/sad_test.cc -@@ -0,0 +1,253 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+ -+#include -+#include -+#include -+ -+extern "C" { -+#include "./vpx_config.h" -+#include "./vpx_rtcd.h" -+#include "vp8/common/blockd.h" -+#include "vpx_mem/vpx_mem.h" -+} -+ -+#include "test/acm_random.h" -+#include "test/register_state_check.h" -+#include "test/util.h" -+#include "third_party/googletest/src/include/gtest/gtest.h" -+ -+ -+typedef unsigned int (*sad_m_by_n_fn_t)(const unsigned char *source_ptr, -+ int source_stride, -+ const unsigned char *reference_ptr, -+ int reference_stride, -+ unsigned int max_sad); -+ -+using libvpx_test::ACMRandom; -+ -+namespace { -+class SADTest : public PARAMS(int, int, sad_m_by_n_fn_t) { -+ public: -+ static void SetUpTestCase() { -+ source_data_ = reinterpret_cast( -+ vpx_memalign(kDataAlignment, kDataBufferSize)); -+ reference_data_ = reinterpret_cast( -+ vpx_memalign(kDataAlignment, kDataBufferSize)); -+ } -+ -+ static void TearDownTestCase() { -+ vpx_free(source_data_); -+ source_data_ = NULL; -+ vpx_free(reference_data_); -+ reference_data_ = NULL; -+ } -+ -+ protected: -+ static const int kDataAlignment = 16; -+ static const int kDataBufferSize = 16 * 32; -+ -+ virtual void SetUp() { -+ sad_fn_ = GET_PARAM(2); -+ height_ = GET_PARAM(1); -+ width_ = GET_PARAM(0); -+ source_stride_ = width_ * 2; -+ reference_stride_ = width_ * 2; -+ rnd_.Reset(ACMRandom::DeterministicSeed()); -+ } -+ -+ sad_m_by_n_fn_t sad_fn_; -+ virtual unsigned int SAD(unsigned int max_sad) { -+ unsigned int ret; -+ REGISTER_STATE_CHECK(ret = sad_fn_(source_data_, source_stride_, -+ reference_data_, reference_stride_, -+ max_sad)); -+ return ret; -+ } -+ -+ // Sum of Absolute Differences. Given two blocks, calculate the absolute -+ // difference between two pixels in the same relative location; accumulate. -+ unsigned int ReferenceSAD(unsigned int max_sad) { -+ unsigned int sad = 0; -+ -+ for (int h = 0; h < height_; ++h) { -+ for (int w = 0; w < width_; ++w) { -+ sad += abs(source_data_[h * source_stride_ + w] -+ - reference_data_[h * reference_stride_ + w]); -+ } -+ if (sad > max_sad) { -+ break; -+ } -+ } -+ return sad; -+ } -+ -+ void FillConstant(uint8_t *data, int stride, uint8_t fill_constant) { -+ for (int h = 0; h < height_; ++h) { -+ for (int w = 0; w < width_; ++w) { -+ data[h * stride + w] = fill_constant; -+ } -+ } -+ } -+ -+ void FillRandom(uint8_t *data, int stride) { -+ for (int h = 0; h < height_; ++h) { -+ for (int w = 0; w < width_; ++w) { -+ data[h * stride + w] = rnd_.Rand8(); -+ } -+ } -+ } -+ -+ void CheckSad(unsigned int max_sad) { -+ unsigned int reference_sad, exp_sad; -+ -+ reference_sad = ReferenceSAD(max_sad); -+ exp_sad = SAD(max_sad); -+ -+ if (reference_sad <= max_sad) { -+ ASSERT_EQ(exp_sad, reference_sad); -+ } else { -+ // Alternative implementations are not required to check max_sad -+ ASSERT_GE(exp_sad, reference_sad); -+ } -+ } -+ -+ // Handle blocks up to 16x16 with stride up to 32 -+ int height_, width_; -+ static uint8_t* source_data_; -+ int source_stride_; -+ static uint8_t* reference_data_; -+ int reference_stride_; -+ -+ ACMRandom rnd_; -+}; -+ -+uint8_t* SADTest::source_data_ = NULL; -+uint8_t* SADTest::reference_data_ = NULL; -+ -+TEST_P(SADTest, MaxRef) { -+ FillConstant(source_data_, source_stride_, 0); -+ FillConstant(reference_data_, reference_stride_, 255); -+ CheckSad(UINT_MAX); -+} -+ -+TEST_P(SADTest, MaxSrc) { -+ FillConstant(source_data_, source_stride_, 255); -+ FillConstant(reference_data_, reference_stride_, 0); -+ CheckSad(UINT_MAX); -+} -+ -+TEST_P(SADTest, ShortRef) { -+ int tmp_stride = reference_stride_; -+ reference_stride_ >>= 1; -+ FillRandom(source_data_, source_stride_); -+ FillRandom(reference_data_, reference_stride_); -+ CheckSad(UINT_MAX); -+ reference_stride_ = tmp_stride; -+} -+ -+TEST_P(SADTest, UnalignedRef) { -+ // The reference frame, but not the source frame, may be unaligned for -+ // certain types of searches. -+ int tmp_stride = reference_stride_; -+ reference_stride_ -= 1; -+ FillRandom(source_data_, source_stride_); -+ FillRandom(reference_data_, reference_stride_); -+ CheckSad(UINT_MAX); -+ reference_stride_ = tmp_stride; -+} -+ -+TEST_P(SADTest, ShortSrc) { -+ int tmp_stride = source_stride_; -+ source_stride_ >>= 1; -+ FillRandom(source_data_, source_stride_); -+ FillRandom(reference_data_, reference_stride_); -+ CheckSad(UINT_MAX); -+ source_stride_ = tmp_stride; -+} -+ -+TEST_P(SADTest, MaxSAD) { -+ // Verify that, when max_sad is set, the implementation does not return a -+ // value lower than the reference. -+ FillConstant(source_data_, source_stride_, 255); -+ FillConstant(reference_data_, reference_stride_, 0); -+ CheckSad(128); -+} -+ -+using std::tr1::make_tuple; -+ -+const sad_m_by_n_fn_t sad_16x16_c = vp8_sad16x16_c; -+const sad_m_by_n_fn_t sad_8x16_c = vp8_sad8x16_c; -+const sad_m_by_n_fn_t sad_16x8_c = vp8_sad16x8_c; -+const sad_m_by_n_fn_t sad_8x8_c = vp8_sad8x8_c; -+const sad_m_by_n_fn_t sad_4x4_c = vp8_sad4x4_c; -+INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::Values( -+ make_tuple(16, 16, sad_16x16_c), -+ make_tuple(8, 16, sad_8x16_c), -+ make_tuple(16, 8, sad_16x8_c), -+ make_tuple(8, 8, sad_8x8_c), -+ make_tuple(4, 4, sad_4x4_c))); -+ -+// ARM tests -+#if HAVE_MEDIA -+const sad_m_by_n_fn_t sad_16x16_armv6 = vp8_sad16x16_armv6; -+INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::Values( -+ make_tuple(16, 16, sad_16x16_armv6))); -+ -+#endif -+#if HAVE_NEON -+const sad_m_by_n_fn_t sad_16x16_neon = vp8_sad16x16_neon; -+const sad_m_by_n_fn_t sad_8x16_neon = vp8_sad8x16_neon; -+const sad_m_by_n_fn_t sad_16x8_neon = vp8_sad16x8_neon; -+const sad_m_by_n_fn_t sad_8x8_neon = vp8_sad8x8_neon; -+const sad_m_by_n_fn_t sad_4x4_neon = vp8_sad4x4_neon; -+INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::Values( -+ make_tuple(16, 16, sad_16x16_neon), -+ make_tuple(8, 16, sad_8x16_neon), -+ make_tuple(16, 8, sad_16x8_neon), -+ make_tuple(8, 8, sad_8x8_neon), -+ make_tuple(4, 4, sad_4x4_neon))); -+#endif -+ -+// X86 tests -+#if HAVE_MMX -+const sad_m_by_n_fn_t sad_16x16_mmx = vp8_sad16x16_mmx; -+const sad_m_by_n_fn_t sad_8x16_mmx = vp8_sad8x16_mmx; -+const sad_m_by_n_fn_t sad_16x8_mmx = vp8_sad16x8_mmx; -+const sad_m_by_n_fn_t sad_8x8_mmx = vp8_sad8x8_mmx; -+const sad_m_by_n_fn_t sad_4x4_mmx = vp8_sad4x4_mmx; -+INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::Values( -+ make_tuple(16, 16, sad_16x16_mmx), -+ make_tuple(8, 16, sad_8x16_mmx), -+ make_tuple(16, 8, sad_16x8_mmx), -+ make_tuple(8, 8, sad_8x8_mmx), -+ make_tuple(4, 4, sad_4x4_mmx))); -+#endif -+#if HAVE_SSE2 -+const sad_m_by_n_fn_t sad_16x16_wmt = vp8_sad16x16_wmt; -+const sad_m_by_n_fn_t sad_8x16_wmt = vp8_sad8x16_wmt; -+const sad_m_by_n_fn_t sad_16x8_wmt = vp8_sad16x8_wmt; -+const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt; -+const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt; -+INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::Values( -+ make_tuple(16, 16, sad_16x16_wmt), -+ make_tuple(8, 16, sad_8x16_wmt), -+ make_tuple(16, 8, sad_16x8_wmt), -+ make_tuple(8, 8, sad_8x8_wmt), -+ make_tuple(4, 4, sad_4x4_wmt))); -+#endif -+#if HAVE_SSSE3 -+const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3; -+INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values( -+ make_tuple(16, 16, sad_16x16_sse3))); -+#endif -+ -+} // namespace -diff --git a/test/set_roi.cc b/test/set_roi.cc -new file mode 100644 -index 0000000..3b6112e ---- /dev/null -+++ b/test/set_roi.cc -@@ -0,0 +1,182 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "third_party/googletest/src/include/gtest/gtest.h" -+#include "vpx/vpx_integer.h" -+#include "vpx_mem/vpx_mem.h" -+extern "C" { -+#include "vp8/encoder/onyx_int.h" -+} -+ -+namespace { -+ -+TEST(Vp8RoiMapTest, ParameterCheck) { -+ int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 }; -+ int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 }; -+ unsigned int threshold[MAX_MB_SEGMENTS] = { 0, 100, 200, 300 }; -+ -+ const int internalq_trans[] = { -+ 0, 1, 2, 3, 4, 5, 7, 8, -+ 9, 10, 12, 13, 15, 17, 18, 19, -+ 20, 21, 23, 24, 25, 26, 27, 28, -+ 29, 30, 31, 33, 35, 37, 39, 41, -+ 43, 45, 47, 49, 51, 53, 55, 57, -+ 59, 61, 64, 67, 70, 73, 76, 79, -+ 82, 85, 88, 91, 94, 97, 100, 103, -+ 106, 109, 112, 115, 118, 121, 124, 127, -+ }; -+ -+ // Initialize elements of cpi with valid defaults. -+ VP8_COMP cpi; -+ cpi.mb.e_mbd.mb_segement_abs_delta = SEGMENT_DELTADATA; -+ cpi.cyclic_refresh_mode_enabled = 0; -+ cpi.mb.e_mbd.segmentation_enabled = 0; -+ cpi.mb.e_mbd.update_mb_segmentation_map = 0; -+ cpi.mb.e_mbd.update_mb_segmentation_data = 0; -+ cpi.common.mb_rows = 240 >> 4; -+ cpi.common.mb_cols = 320 >> 4; -+ const int mbs = (cpi.common.mb_rows * cpi.common.mb_cols); -+ vpx_memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data)); -+ -+ // Segment map -+ cpi.segmentation_map = reinterpret_cast(vpx_calloc(mbs, 1)); -+ -+ // Allocate memory for the source memory map. -+ unsigned char *roi_map = -+ reinterpret_cast(vpx_calloc(mbs, 1)); -+ vpx_memset(&roi_map[mbs >> 2], 1, (mbs >> 2)); -+ vpx_memset(&roi_map[mbs >> 1], 2, (mbs >> 2)); -+ vpx_memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2)); -+ -+ // Do a test call with valid parameters. -+ int roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, -+ cpi.common.mb_cols, delta_q, delta_lf, -+ threshold); -+ EXPECT_EQ(0, roi_retval) -+ << "vp8_set_roimap roi failed with default test parameters"; -+ -+ // Check that the values in the cpi structure get set as expected. -+ if (roi_retval == 0) { -+ // Check that the segment map got set. -+ const int mapcompare = memcmp(roi_map, cpi.segmentation_map, mbs); -+ EXPECT_EQ(0, mapcompare) << "segment map error"; -+ -+ // Check the q deltas (note the need to translate into -+ // the interanl range of 0-127. -+ for (int i = 0; i < MAX_MB_SEGMENTS; ++i) { -+ const int transq = internalq_trans[abs(delta_q[i])]; -+ if (abs(cpi.segment_feature_data[MB_LVL_ALT_Q][i]) != transq) { -+ EXPECT_EQ(transq, cpi.segment_feature_data[MB_LVL_ALT_Q][i]) -+ << "segment delta_q error"; -+ break; -+ } -+ } -+ -+ // Check the loop filter deltas -+ for (int i = 0; i < MAX_MB_SEGMENTS; ++i) { -+ if (cpi.segment_feature_data[MB_LVL_ALT_LF][i] != delta_lf[i]) { -+ EXPECT_EQ(delta_lf[i], cpi.segment_feature_data[MB_LVL_ALT_LF][i]) -+ << "segment delta_lf error"; -+ break; -+ } -+ } -+ -+ // Check the breakout thresholds -+ for (int i = 0; i < MAX_MB_SEGMENTS; ++i) { -+ unsigned int breakout = -+ static_cast(cpi.segment_encode_breakout[i]); -+ -+ if (threshold[i] != breakout) { -+ EXPECT_EQ(threshold[i], breakout) -+ << "breakout threshold error"; -+ break; -+ } -+ } -+ -+ // Segmentation, and segmentation update flages should be set. -+ EXPECT_EQ(1, cpi.mb.e_mbd.segmentation_enabled) -+ << "segmentation_enabled error"; -+ EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_map) -+ << "update_mb_segmentation_map error"; -+ EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_data) -+ << "update_mb_segmentation_data error"; -+ -+ -+ // Try a range of delta q and lf parameters (some legal, some not) -+ for (int i = 0; i < 1000; ++i) { -+ int rand_deltas[4]; -+ int deltas_valid; -+ rand_deltas[0] = (rand() % 160) - 80; -+ rand_deltas[1] = (rand() % 160) - 80; -+ rand_deltas[2] = (rand() % 160) - 80; -+ rand_deltas[3] = (rand() % 160) - 80; -+ -+ deltas_valid = ((abs(rand_deltas[0]) <= 63) && -+ (abs(rand_deltas[1]) <= 63) && -+ (abs(rand_deltas[2]) <= 63) && -+ (abs(rand_deltas[3]) <= 63)) ? 0 : -1; -+ -+ // Test with random delta q values. -+ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, -+ cpi.common.mb_cols, rand_deltas, -+ delta_lf, threshold); -+ EXPECT_EQ(deltas_valid, roi_retval) << "dq range check error"; -+ -+ // One delta_q error shown at a time -+ if (deltas_valid != roi_retval) -+ break; -+ -+ // Test with random loop filter values. -+ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, -+ cpi.common.mb_cols, delta_q, -+ rand_deltas, threshold); -+ EXPECT_EQ(deltas_valid, roi_retval) << "dlf range check error"; -+ -+ // One delta loop filter error shown at a time -+ if (deltas_valid != roi_retval) -+ break; -+ } -+ -+ // Test that we report and error if cyclic refresh is enabled. -+ cpi.cyclic_refresh_mode_enabled = 1; -+ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, -+ cpi.common.mb_cols, delta_q, -+ delta_lf, threshold); -+ EXPECT_EQ(-1, roi_retval) << "cyclic refresh check error"; -+ cpi.cyclic_refresh_mode_enabled = 0; -+ -+ // Test invalid number of rows or colums. -+ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows + 1, -+ cpi.common.mb_cols, delta_q, -+ delta_lf, threshold); -+ EXPECT_EQ(-1, roi_retval) << "MB rows bounds check error"; -+ -+ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, -+ cpi.common.mb_cols - 1, delta_q, -+ delta_lf, threshold); -+ EXPECT_EQ(-1, roi_retval) << "MB cols bounds check error"; -+ } -+ -+ // Free allocated memory -+ if (cpi.segmentation_map) -+ vpx_free(cpi.segmentation_map); -+ if (roi_map) -+ vpx_free(roi_map); -+}; -+ -+} // namespace -diff --git a/test/sixtap_predict_test.cc b/test/sixtap_predict_test.cc -new file mode 100644 -index 0000000..c9dcceb ---- /dev/null -+++ b/test/sixtap_predict_test.cc -@@ -0,0 +1,224 @@ -+/* -+* Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+* -+* Use of this source code is governed by a BSD-style license -+* that can be found in the LICENSE file in the root of the source -+* tree. An additional intellectual property rights grant can be found -+* in the file PATENTS. All contributing project authors may -+* be found in the AUTHORS file in the root of the source tree. -+*/ -+ -+#include -+#include -+#include -+#include "test/acm_random.h" -+#include "test/register_state_check.h" -+#include "test/util.h" -+#include "third_party/googletest/src/include/gtest/gtest.h" -+extern "C" { -+#include "./vpx_config.h" -+#include "./vpx_rtcd.h" -+#include "vpx/vpx_integer.h" -+#include "vpx_mem/vpx_mem.h" -+} -+ -+namespace { -+ -+typedef void (*sixtap_predict_fn_t)(uint8_t *src_ptr, -+ int src_pixels_per_line, -+ int xoffset, -+ int yoffset, -+ uint8_t *dst_ptr, -+ int dst_pitch); -+ -+class SixtapPredictTest : public PARAMS(int, int, sixtap_predict_fn_t) { -+ public: -+ static void SetUpTestCase() { -+ src_ = reinterpret_cast(vpx_memalign(kDataAlignment, kSrcSize)); -+ dst_ = reinterpret_cast(vpx_memalign(kDataAlignment, kDstSize)); -+ dst_c_ = reinterpret_cast(vpx_memalign(kDataAlignment, kDstSize)); -+ } -+ -+ static void TearDownTestCase() { -+ vpx_free(src_); -+ src_ = NULL; -+ vpx_free(dst_); -+ dst_ = NULL; -+ vpx_free(dst_c_); -+ dst_c_ = NULL; -+ } -+ -+ protected: -+ // Make test arrays big enough for 16x16 functions. Six-tap filters -+ // need 5 extra pixels outside of the macroblock. -+ static const int kSrcStride = 21; -+ static const int kDstStride = 16; -+ static const int kDataAlignment = 16; -+ static const int kSrcSize = kSrcStride * kSrcStride + 1; -+ static const int kDstSize = kDstStride * kDstStride; -+ -+ virtual void SetUp() { -+ width_ = GET_PARAM(0); -+ height_ = GET_PARAM(1); -+ sixtap_predict_ = GET_PARAM(2); -+ memset(src_, 0, sizeof(src_)); -+ memset(dst_, 0, sizeof(dst_)); -+ memset(dst_c_, 0, sizeof(dst_c_)); -+ } -+ -+ int width_; -+ int height_; -+ sixtap_predict_fn_t sixtap_predict_; -+ // The src stores the macroblock we will filter on, and makes it 1 byte larger -+ // in order to test unaligned access. The result is stored in dst and dst_c(c -+ // reference code result). -+ static uint8_t* src_; -+ static uint8_t* dst_; -+ static uint8_t* dst_c_; -+}; -+ -+uint8_t* SixtapPredictTest::src_ = NULL; -+uint8_t* SixtapPredictTest::dst_ = NULL; -+uint8_t* SixtapPredictTest::dst_c_ = NULL; -+ -+TEST_P(SixtapPredictTest, TestWithPresetData) { -+ // Test input -+ static const uint8_t test_data[kSrcSize] = { -+ 216, 184, 4, 191, 82, 92, 41, 0, 1, 226, 236, 172, 20, 182, 42, 226, 177, -+ 79, 94, 77, 179, 203, 206, 198, 22, 192, 19, 75, 17, 192, 44, 233, 120, -+ 48, 168, 203, 141, 210, 203, 143, 180, 184, 59, 201, 110, 102, 171, 32, -+ 182, 10, 109, 105, 213, 60, 47, 236, 253, 67, 55, 14, 3, 99, 247, 124, -+ 148, 159, 71, 34, 114, 19, 177, 38, 203, 237, 239, 58, 83, 155, 91, 10, -+ 166, 201, 115, 124, 5, 163, 104, 2, 231, 160, 16, 234, 4, 8, 103, 153, -+ 167, 174, 187, 26, 193, 109, 64, 141, 90, 48, 200, 174, 204, 36, 184, -+ 114, 237, 43, 238, 242, 207, 86, 245, 182, 247, 6, 161, 251, 14, 8, 148, -+ 182, 182, 79, 208, 120, 188, 17, 6, 23, 65, 206, 197, 13, 242, 126, 128, -+ 224, 170, 110, 211, 121, 197, 200, 47, 188, 207, 208, 184, 221, 216, 76, -+ 148, 143, 156, 100, 8, 89, 117, 14, 112, 183, 221, 54, 197, 208, 180, 69, -+ 176, 94, 180, 131, 215, 121, 76, 7, 54, 28, 216, 238, 249, 176, 58, 142, -+ 64, 215, 242, 72, 49, 104, 87, 161, 32, 52, 216, 230, 4, 141, 44, 181, -+ 235, 224, 57, 195, 89, 134, 203, 144, 162, 163, 126, 156, 84, 185, 42, -+ 148, 145, 29, 221, 194, 134, 52, 100, 166, 105, 60, 140, 110, 201, 184, -+ 35, 181, 153, 93, 121, 243, 227, 68, 131, 134, 232, 2, 35, 60, 187, 77, -+ 209, 76, 106, 174, 15, 241, 227, 115, 151, 77, 175, 36, 187, 121, 221, -+ 223, 47, 118, 61, 168, 105, 32, 237, 236, 167, 213, 238, 202, 17, 170, -+ 24, 226, 247, 131, 145, 6, 116, 117, 121, 11, 194, 41, 48, 126, 162, 13, -+ 93, 209, 131, 154, 122, 237, 187, 103, 217, 99, 60, 200, 45, 78, 115, 69, -+ 49, 106, 200, 194, 112, 60, 56, 234, 72, 251, 19, 120, 121, 182, 134, 215, -+ 135, 10, 114, 2, 247, 46, 105, 209, 145, 165, 153, 191, 243, 12, 5, 36, -+ 119, 206, 231, 231, 11, 32, 209, 83, 27, 229, 204, 149, 155, 83, 109, 35, -+ 93, 223, 37, 84, 14, 142, 37, 160, 52, 191, 96, 40, 204, 101, 77, 67, 52, -+ 53, 43, 63, 85, 253, 147, 113, 226, 96, 6, 125, 179, 115, 161, 17, 83, -+ 198, 101, 98, 85, 139, 3, 137, 75, 99, 178, 23, 201, 255, 91, 253, 52, -+ 134, 60, 138, 131, 208, 251, 101, 48, 2, 227, 228, 118, 132, 245, 202, -+ 75, 91, 44, 160, 231, 47, 41, 50, 147, 220, 74, 92, 219, 165, 89, 16 -+ }; -+ -+ // Expected result -+ static const uint8_t expected_dst[kDstSize] = { -+ 117, 102, 74, 135, 42, 98, 175, 206, 70, 73, 222, 197, 50, 24, 39, 49, 38, -+ 105, 90, 47, 169, 40, 171, 215, 200, 73, 109, 141, 53, 85, 177, 164, 79, -+ 208, 124, 89, 212, 18, 81, 145, 151, 164, 217, 153, 91, 154, 102, 102, -+ 159, 75, 164, 152, 136, 51, 213, 219, 186, 116, 193, 224, 186, 36, 231, -+ 208, 84, 211, 155, 167, 35, 59, 42, 76, 216, 149, 73, 201, 78, 149, 184, -+ 100, 96, 196, 189, 198, 188, 235, 195, 117, 129, 120, 129, 49, 25, 133, -+ 113, 69, 221, 114, 70, 143, 99, 157, 108, 189, 140, 78, 6, 55, 65, 240, -+ 255, 245, 184, 72, 90, 100, 116, 131, 39, 60, 234, 167, 33, 160, 88, 185, -+ 200, 157, 159, 176, 127, 151, 138, 102, 168, 106, 170, 86, 82, 219, 189, -+ 76, 33, 115, 197, 106, 96, 198, 136, 97, 141, 237, 151, 98, 137, 191, -+ 185, 2, 57, 95, 142, 91, 255, 185, 97, 137, 76, 162, 94, 173, 131, 193, -+ 161, 81, 106, 72, 135, 222, 234, 137, 66, 137, 106, 243, 210, 147, 95, -+ 15, 137, 110, 85, 66, 16, 96, 167, 147, 150, 173, 203, 140, 118, 196, -+ 84, 147, 160, 19, 95, 101, 123, 74, 132, 202, 82, 166, 12, 131, 166, -+ 189, 170, 159, 85, 79, 66, 57, 152, 132, 203, 194, 0, 1, 56, 146, 180, -+ 224, 156, 28, 83, 181, 79, 76, 80, 46, 160, 175, 59, 106, 43, 87, 75, -+ 136, 85, 189, 46, 71, 200, 90 -+ }; -+ -+ uint8_t *src = const_cast(test_data); -+ -+ REGISTER_STATE_CHECK(sixtap_predict_(&src[kSrcStride * 2 + 2 + 1], kSrcStride, -+ 2, 2, dst_, kDstStride)); -+ -+ for (int i = 0; i < height_; ++i) -+ for (int j = 0; j < width_; ++j) -+ ASSERT_EQ(expected_dst[i * kDstStride + j], dst_[i * kDstStride + j]) -+ << "i==" << (i * width_ + j); -+} -+ -+using libvpx_test::ACMRandom; -+ -+TEST_P(SixtapPredictTest, TestWithRandomData) { -+ ACMRandom rnd(ACMRandom::DeterministicSeed()); -+ for (int i = 0; i < kSrcSize; ++i) -+ src_[i] = rnd.Rand8(); -+ -+ // Run tests for all possible offsets. -+ for (int xoffset = 0; xoffset < 8; ++xoffset) { -+ for (int yoffset = 0; yoffset < 8; ++yoffset) { -+ // Call c reference function. -+ // Move start point to next pixel to test if the function reads -+ // unaligned data correctly. -+ vp8_sixtap_predict16x16_c(&src_[kSrcStride * 2 + 2 + 1], kSrcStride, -+ xoffset, yoffset, dst_c_, kDstStride); -+ -+ // Run test. -+ REGISTER_STATE_CHECK( -+ sixtap_predict_(&src_[kSrcStride * 2 + 2 + 1], kSrcStride, -+ xoffset, yoffset, dst_, kDstStride)); -+ -+ for (int i = 0; i < height_; ++i) -+ for (int j = 0; j < width_; ++j) -+ ASSERT_EQ(dst_c_[i * kDstStride + j], dst_[i * kDstStride + j]) -+ << "i==" << (i * width_ + j); -+ } -+ } -+} -+ -+using std::tr1::make_tuple; -+ -+const sixtap_predict_fn_t sixtap_16x16_c = vp8_sixtap_predict16x16_c; -+const sixtap_predict_fn_t sixtap_8x8_c = vp8_sixtap_predict8x8_c; -+const sixtap_predict_fn_t sixtap_8x4_c = vp8_sixtap_predict8x4_c; -+const sixtap_predict_fn_t sixtap_4x4_c = vp8_sixtap_predict4x4_c; -+INSTANTIATE_TEST_CASE_P( -+ C, SixtapPredictTest, ::testing::Values( -+ make_tuple(16, 16, sixtap_16x16_c), -+ make_tuple(8, 8, sixtap_8x8_c), -+ make_tuple(8, 4, sixtap_8x4_c), -+ make_tuple(4, 4, sixtap_4x4_c))); -+#if HAVE_MMX -+const sixtap_predict_fn_t sixtap_16x16_mmx = vp8_sixtap_predict16x16_mmx; -+const sixtap_predict_fn_t sixtap_8x8_mmx = vp8_sixtap_predict8x8_mmx; -+const sixtap_predict_fn_t sixtap_8x4_mmx = vp8_sixtap_predict8x4_mmx; -+const sixtap_predict_fn_t sixtap_4x4_mmx = vp8_sixtap_predict4x4_mmx; -+INSTANTIATE_TEST_CASE_P( -+ MMX, SixtapPredictTest, ::testing::Values( -+ make_tuple(16, 16, sixtap_16x16_mmx), -+ make_tuple(8, 8, sixtap_8x8_mmx), -+ make_tuple(8, 4, sixtap_8x4_mmx), -+ make_tuple(4, 4, sixtap_4x4_mmx))); -+#endif -+#if HAVE_SSE2 -+const sixtap_predict_fn_t sixtap_16x16_sse2 = vp8_sixtap_predict16x16_sse2; -+const sixtap_predict_fn_t sixtap_8x8_sse2 = vp8_sixtap_predict8x8_sse2; -+const sixtap_predict_fn_t sixtap_8x4_sse2 = vp8_sixtap_predict8x4_sse2; -+INSTANTIATE_TEST_CASE_P( -+ SSE2, SixtapPredictTest, ::testing::Values( -+ make_tuple(16, 16, sixtap_16x16_sse2), -+ make_tuple(8, 8, sixtap_8x8_sse2), -+ make_tuple(8, 4, sixtap_8x4_sse2))); -+#endif -+#if HAVE_SSSE3 -+const sixtap_predict_fn_t sixtap_16x16_ssse3 = vp8_sixtap_predict16x16_ssse3; -+const sixtap_predict_fn_t sixtap_8x8_ssse3 = vp8_sixtap_predict8x8_ssse3; -+const sixtap_predict_fn_t sixtap_8x4_ssse3 = vp8_sixtap_predict8x4_ssse3; -+const sixtap_predict_fn_t sixtap_4x4_ssse3 = vp8_sixtap_predict4x4_ssse3; -+INSTANTIATE_TEST_CASE_P( -+ SSSE3, SixtapPredictTest, ::testing::Values( -+ make_tuple(16, 16, sixtap_16x16_ssse3), -+ make_tuple(8, 8, sixtap_8x8_ssse3), -+ make_tuple(8, 4, sixtap_8x4_ssse3), -+ make_tuple(4, 4, sixtap_4x4_ssse3))); -+#endif -+} // namespace -diff --git a/test/subtract_test.cc b/test/subtract_test.cc -new file mode 100644 -index 0000000..60acf81 ---- /dev/null -+++ b/test/subtract_test.cc -@@ -0,0 +1,114 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+#include "third_party/googletest/src/include/gtest/gtest.h" -+#include "test/acm_random.h" -+#include "test/register_state_check.h" -+extern "C" { -+#include "vpx_config.h" -+#include "vpx_rtcd.h" -+#include "vp8/common/blockd.h" -+#include "vp8/encoder/block.h" -+#include "vpx_mem/vpx_mem.h" -+} -+ -+typedef void (*subtract_b_fn_t)(BLOCK *be, BLOCKD *bd, int pitch); -+ -+namespace { -+ -+class SubtractBlockTest : public ::testing::TestWithParam {}; -+ -+using libvpx_test::ACMRandom; -+ -+TEST_P(SubtractBlockTest, SimpleSubtract) { -+ ACMRandom rnd(ACMRandom::DeterministicSeed()); -+ BLOCK be; -+ BLOCKD bd; -+ // in libvpx, this stride is always 16 -+ const int kDiffPredStride = 16; -+ const int kSrcStride[] = {32, 16, 8, 4, 0}; -+ const int kBlockWidth = 4; -+ const int kBlockHeight = 4; -+ -+ // Allocate... align to 16 for mmx/sse tests -+ uint8_t *source = reinterpret_cast( -+ vpx_memalign(16, kBlockHeight * kSrcStride[0] * sizeof(*source))); -+ be.src_diff = reinterpret_cast( -+ vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*be.src_diff))); -+ bd.predictor = reinterpret_cast( -+ vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor))); -+ -+ for(int i = 0; kSrcStride[i] > 0; ++i) { -+ // start at block0 -+ be.src = 0; -+ be.base_src = &source; -+ be.src_stride = kSrcStride[i]; -+ -+ // set difference -+ int16_t *src_diff = be.src_diff; -+ for (int r = 0; r < kBlockHeight; ++r) { -+ for (int c = 0; c < kBlockWidth; ++c) { -+ src_diff[c] = 0xa5a5; -+ } -+ src_diff += kDiffPredStride; -+ } -+ -+ // set destination -+ uint8_t *base_src = *be.base_src; -+ for (int r = 0; r < kBlockHeight; ++r) { -+ for (int c = 0; c < kBlockWidth; ++c) { -+ base_src[c] = rnd.Rand8(); -+ } -+ base_src += be.src_stride; -+ } -+ -+ // set predictor -+ uint8_t *predictor = bd.predictor; -+ for (int r = 0; r < kBlockHeight; ++r) { -+ for (int c = 0; c < kBlockWidth; ++c) { -+ predictor[c] = rnd.Rand8(); -+ } -+ predictor += kDiffPredStride; -+ } -+ -+ REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride)); -+ -+ base_src = *be.base_src; -+ src_diff = be.src_diff; -+ predictor = bd.predictor; -+ for (int r = 0; r < kBlockHeight; ++r) { -+ for (int c = 0; c < kBlockWidth; ++c) { -+ EXPECT_EQ(base_src[c], (src_diff[c] + predictor[c])) << "r = " << r -+ << ", c = " << c; -+ } -+ src_diff += kDiffPredStride; -+ predictor += kDiffPredStride; -+ base_src += be.src_stride; -+ } -+ } -+ vpx_free(be.src_diff); -+ vpx_free(source); -+ vpx_free(bd.predictor); -+} -+ -+INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest, -+ ::testing::Values(vp8_subtract_b_c)); -+ -+#if HAVE_MMX -+INSTANTIATE_TEST_CASE_P(MMX, SubtractBlockTest, -+ ::testing::Values(vp8_subtract_b_mmx)); -+#endif -+ -+#if HAVE_SSE2 -+INSTANTIATE_TEST_CASE_P(SSE2, SubtractBlockTest, -+ ::testing::Values(vp8_subtract_b_sse2)); -+#endif -+ -+} // namespace -diff --git a/test/test-data.sha1 b/test/test-data.sha1 -new file mode 100644 -index 0000000..c1b6a83 ---- /dev/null -+++ b/test/test-data.sha1 -@@ -0,0 +1,123 @@ -+d5dfb0151c9051f8c85999255645d7a23916d3c0 hantro_collage_w352h288.yuv -+5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf -+65bf1bbbced81b97bd030f376d1b7f61a224793f vp80-00-comprehensive-002.ivf -+906b4c1e99eb734504c504b3f1ad8052137ce672 vp80-00-comprehensive-003.ivf -+ec144b1af53af895db78355785650b96dd3f0ade vp80-00-comprehensive-004.ivf -+afc7091785c62f1c121c4554a2830c30704587d9 vp80-00-comprehensive-005.ivf -+42ea9d55c818145d06a9b633b8e85c6a6164fd3e vp80-00-comprehensive-006.ivf -+e5b3a73ab79fe024c14309d653d6bed92902ee3b vp80-00-comprehensive-007.ivf -+f3c50a58875930adfb84525c0ef59d7e4c08540c vp80-00-comprehensive-008.ivf -+4b2841fdb83db51ae322096ae468bbb9dc2c8362 vp80-00-comprehensive-009.ivf -+efbff736e3a91ab6a98c5bc2dce65d645944c7b1 vp80-00-comprehensive-010.ivf -+6b315102cae008d22a3d2c231be92cb704a222f8 vp80-00-comprehensive-011.ivf -+f3214a4fea14c2d5ec689936c1613f274c859ee8 vp80-00-comprehensive-012.ivf -+e4094e96d308c8a35b74c480a43d853c5294cd34 vp80-00-comprehensive-013.ivf -+5b0adfaf60a69e0aaf3ec021a39d0a68fc0e1b5a vp80-00-comprehensive-014.ivf -+e8467688ddf26b5000664f904faf0d70506aa653 vp80-00-comprehensive-015.ivf -+aab55582337dfd2a39ff54fb2576a91910d49337 vp80-00-comprehensive-016.ivf -+1ba24724f80203c9bae4f1d0f99d534721980016 vp80-00-comprehensive-017.ivf -+143a15512b46f436280ddb4d0e6411eb4af434f2 vp80-00-comprehensive-018.ivf -+c5baeaf5714fdfb3a8bc960a8e33ac438e83b16b vp80-01-intra-1400.ivf -+f383955229afe3408453e316d11553d923ca60d5 vp80-01-intra-1411.ivf -+84e1f4343f174c9f3c83f834bac3196fb325bf2c vp80-01-intra-1416.ivf -+fb6e712a47dd57a28a3727d2ae2c97a8b7c7ca51 vp80-01-intra-1417.ivf -+71ea772d3e9d315b8cbecf41207b8a237c34853b vp80-02-inter-1402.ivf -+d85dbc4271525dcd128c503f936fe69091d1f8d0 vp80-02-inter-1412.ivf -+d4e5d3ad56511867d025f93724d090f92ba6ec3d vp80-02-inter-1418.ivf -+91791cbcc37c60f35dbd8090bacb54e5ec6dd4fa vp80-02-inter-1424.ivf -+17fbfe2fea70f6e2f3fa6ca4efaae6c0b03b5f02 vp80-03-segmentation-01.ivf -+3c3600dbbcde08e20d54c66fe3b7eadd4f09bdbb vp80-03-segmentation-02.ivf -+c156778d5340967d4b369c490848076e92f1f875 vp80-03-segmentation-03.ivf -+d25dcff6c60e87a1af70945b8911b6b4998533b0 vp80-03-segmentation-04.ivf -+362baba2ce454c9db21218f35e81c27a5ed0b730 vp80-03-segmentation-1401.ivf -+d223ae7ee748ce07e74c4679bfd219e84aa9f4b0 vp80-03-segmentation-1403.ivf -+033adf7f3a13836a3f1cffcb87c1972900f2b5c6 vp80-03-segmentation-1407.ivf -+4d51dfbf9f3e2c590ec99d1d6f59dd731d04375f vp80-03-segmentation-1408.ivf -+f37a62b197c2600d75e0ccfbb31b60efdedac251 vp80-03-segmentation-1409.ivf -+eb25bd7bfba5b2f6935018a930f42d123b1e7fcd vp80-03-segmentation-1410.ivf -+b9d5c436663a30c27cfff84b53a002e501258843 vp80-03-segmentation-1413.ivf -+6da92b9d1a180cc3a8afe348ab12258f5a37be1a vp80-03-segmentation-1414.ivf -+a4f5842602886bd669f115f93d8a35c035cb0948 vp80-03-segmentation-1415.ivf -+f295dceb8ef278b77251b3f9df8aee22e161d547 vp80-03-segmentation-1425.ivf -+198dbf9f36f733200e432664cc8c5752d59779de vp80-03-segmentation-1426.ivf -+7704804e32f5de976803929934a7fafe101ac7b0 vp80-03-segmentation-1427.ivf -+831ccd862ea95ca025d2f3bd8b88678752f5416d vp80-03-segmentation-1432.ivf -+b3c11978529289f9109f2766fcaba3ebc40e11ef vp80-03-segmentation-1435.ivf -+a835a731f5520ebfc1002c40121264d0020559ac vp80-03-segmentation-1436.ivf -+1d1732942f773bb2a5775fcb9689b1579ce28eab vp80-03-segmentation-1437.ivf -+db04799adfe089dfdf74dbd43cc05ede7161f99e vp80-03-segmentation-1441.ivf -+7caf39b3f20cfd52b998210878062e52a5edf1e6 vp80-03-segmentation-1442.ivf -+3607f6bb4ee106c38fa1ea370dc4ff8b8cde2261 vp80-04-partitions-1404.ivf -+93cc323b6b6867f1b12dd48773424549c6960a6b vp80-04-partitions-1405.ivf -+047eedb14b865bdac8a3538e63801054e0295e9c vp80-04-partitions-1406.ivf -+0f1233bd2bc33f56ce5e495dbd455d122339f384 vp80-05-sharpness-1428.ivf -+51767fc136488a9535c2a4c38067c542ee2048df vp80-05-sharpness-1429.ivf -+9805aa107672de25d6fb8c35e20d06deca5efe18 vp80-05-sharpness-1430.ivf -+61db6b965f9c27aebe71b85bf2d5877e58e4bbdf vp80-05-sharpness-1431.ivf -+10420d266290d2923555f84af38eeb96edbd3ae8 vp80-05-sharpness-1433.ivf -+3ed24f9a80cddfdf75824ba95cdb4ff9286cb443 vp80-05-sharpness-1434.ivf -+c87599cbecd72d4cd4f7ace3313b7a6bc6eb8163 vp80-05-sharpness-1438.ivf -+aff51d865c2621b60510459244ea83e958e4baed vp80-05-sharpness-1439.ivf -+da386e72b19b5485a6af199c5eb60ef25e510dd1 vp80-05-sharpness-1440.ivf -+6759a095203d96ccd267ce09b1b050b8cc4c2f1f vp80-05-sharpness-1443.ivf -+db55ec7fd02c864ba996ff060b25b1e08611330b vp80-00-comprehensive-001.ivf.md5 -+29db0ad011cba1e45f856d5623cd38dac3e3bf19 vp80-00-comprehensive-002.ivf.md5 -+e84f258f69e173e7d68f8f8c037a0a3766902182 vp80-00-comprehensive-003.ivf.md5 -+eb7912eaf69559a16fd82bc3f5fb1524cf4a4466 vp80-00-comprehensive-004.ivf.md5 -+4206f71c94894bd5b5b376f6c09b3817dbc65206 vp80-00-comprehensive-005.ivf.md5 -+4f89b356f6f2fecb928f330a10f804f00f5325f5 vp80-00-comprehensive-006.ivf.md5 -+2813236a32964dd8007e17648bcf035a20fcda6c vp80-00-comprehensive-007.ivf.md5 -+10746c72098f872803c900e17c5680e451f5f498 vp80-00-comprehensive-008.ivf.md5 -+39a23d0692ce64421a7bb7cdf6ccec5928d37fff vp80-00-comprehensive-009.ivf.md5 -+f6e3de8931a0cc659bda8fbc14050346955e72d4 vp80-00-comprehensive-010.ivf.md5 -+101683ec195b6e944f7cd1e468fc8921439363e6 vp80-00-comprehensive-011.ivf.md5 -+1f592751ce46d8688998fa0fa4fbdcda0fd4058c vp80-00-comprehensive-012.ivf.md5 -+6066176f90ca790251e795fca1a5797d59999841 vp80-00-comprehensive-013.ivf.md5 -+2656da94ba93691f23edc4d60b3a09e2be46c217 vp80-00-comprehensive-014.ivf.md5 -+c6e0d5f5d61460c8ac8edfa4e701f10312c03133 vp80-00-comprehensive-015.ivf.md5 -+ee60fee501d8493e34e8d6a1fe315b51ed09b24a vp80-00-comprehensive-016.ivf.md5 -+9f1914ceffcad4546c0a29de3ef591d8bea304dc vp80-00-comprehensive-017.ivf.md5 -+e0305178fe288a9fd8082b39e2d03181edb19054 vp80-00-comprehensive-018.ivf.md5 -+612494da2fa799cc9d76dcdd835ae6c7cb2e5c05 vp80-01-intra-1400.ivf.md5 -+48ea06097ac8269c5e8c2131d3d0639f431fcf0e vp80-01-intra-1411.ivf.md5 -+6e2ab4e7677ad0ba868083ca6bc387ee922b400c vp80-01-intra-1416.ivf.md5 -+eca0a90348959ce3854142f8d8641b13050e8349 vp80-01-intra-1417.ivf.md5 -+920feea203145d5c2258a91c4e6991934a79a99e vp80-02-inter-1402.ivf.md5 -+f71d97909fe2b3dd65be7e1f56c72237f0cef200 vp80-02-inter-1412.ivf.md5 -+e911254569a30bbb2a237ff8b79f69ed9da0672d vp80-02-inter-1418.ivf.md5 -+58c789c50c9bb9cc90580bed291164a0939d28ba vp80-02-inter-1424.ivf.md5 -+ff3e2f441327b9c20a0b37c524e0f5a48a36de7b vp80-03-segmentation-01.ivf.md5 -+0791f417f076a542ae66fbc3426ab4d94cbd6c75 vp80-03-segmentation-02.ivf.md5 -+722e50f1a6a91c34302d68681faffc1c26d1cc57 vp80-03-segmentation-03.ivf.md5 -+c701f1885bcfb27fb8e70cc65606b289172ef889 vp80-03-segmentation-04.ivf.md5 -+f79bc9ec189a2b4807632a3d0c5bf04a178b5300 vp80-03-segmentation-1401.ivf.md5 -+b9aa4c74c0219b639811c44760d0b24cd8bb436a vp80-03-segmentation-1403.ivf.md5 -+70d5a2207ca1891bcaebd5cf6dd88ce8d57b4334 vp80-03-segmentation-1407.ivf.md5 -+265f962ee781531f9a93b9309461316fd32b2a1d vp80-03-segmentation-1408.ivf.md5 -+0c4ecbbd6dc042d30e626d951b65f460dd6cd563 vp80-03-segmentation-1409.ivf.md5 -+cf779af36a937f06570a0fca9db64ba133451dee vp80-03-segmentation-1410.ivf.md5 -+0e6c5036d51ab078842f133934926c598a9cff02 vp80-03-segmentation-1413.ivf.md5 -+eb3930aaf229116c80d507516c34759c3f6cdf69 vp80-03-segmentation-1414.ivf.md5 -+123d6c0f72ee87911c4ae7538e87b7d163b22d6c vp80-03-segmentation-1415.ivf.md5 -+e70551d1a38920e097a5d8782390b79ecaeb7505 vp80-03-segmentation-1425.ivf.md5 -+44e8f4117e46dbb302b2cfd81171cc1a1846e431 vp80-03-segmentation-1426.ivf.md5 -+52636e54aee5f95bbace37021bd67de5db767e9a vp80-03-segmentation-1427.ivf.md5 -+b1ad3eff20215c28e295b15ef3636ed926d59cba vp80-03-segmentation-1432.ivf.md5 -+24c22a552fa28a90e5978f67f57181cc2d7546d7 vp80-03-segmentation-1435.ivf.md5 -+96c49c390abfced18a7a8c9b9ea10af778e10edb vp80-03-segmentation-1436.ivf.md5 -+f95eb6214571434f1f73ab7833b9ccdf47588020 vp80-03-segmentation-1437.ivf.md5 -+1c0700ca27c9b0090a7747a4b0b4dc21d1843181 vp80-03-segmentation-1441.ivf.md5 -+81d4f23ca32667ee958bae579c8f5e97ba72eb97 vp80-03-segmentation-1442.ivf.md5 -+272efcef07a3a30fbca51bfd566063d8258ec0be vp80-04-partitions-1404.ivf.md5 -+66ed219ab812ac801b256d35cf495d193d4cf478 vp80-04-partitions-1405.ivf.md5 -+36083f37f56f502bd60ec5e07502ee9e6b8699b0 vp80-04-partitions-1406.ivf.md5 -+6ca909bf168a64c09415626294665dc1be3d1973 vp80-05-sharpness-1428.ivf.md5 -+1667d2ee2334e5fdea8a8a866f4ccf3cf76f033a vp80-05-sharpness-1429.ivf.md5 -+71bcbe5357d36a19df5b07fbe3e27bffa8893f0a vp80-05-sharpness-1430.ivf.md5 -+89a09b1dffce2d55770a89e58d9925c70ef79bf8 vp80-05-sharpness-1431.ivf.md5 -+08444a18b4e6ba3450c0796dd728d48c399a2dc9 vp80-05-sharpness-1433.ivf.md5 -+6d6223719a90c13e848aa2a8a6642098cdb5977a vp80-05-sharpness-1434.ivf.md5 -+41d70bb5fa45bc88da1604a0af466930b8dd77b5 vp80-05-sharpness-1438.ivf.md5 -+086c56378df81b6cee264d7540a7b8f2b405c7a4 vp80-05-sharpness-1439.ivf.md5 -+d32dc2c4165eb266ea4c23c14a45459b363def32 vp80-05-sharpness-1440.ivf.md5 -+8c69dc3d8e563f56ffab5ad1e400d9e689dd23df vp80-05-sharpness-1443.ivf.md5 -\ No newline at end of file -diff --git a/test/test.mk b/test/test.mk -new file mode 100644 -index 0000000..982be5b ---- /dev/null -+++ b/test/test.mk -@@ -0,0 +1,179 @@ -+LIBVPX_TEST_SRCS-yes += acm_random.h -+LIBVPX_TEST_SRCS-yes += register_state_check.h -+LIBVPX_TEST_SRCS-yes += test.mk -+LIBVPX_TEST_SRCS-yes += test_libvpx.cc -+LIBVPX_TEST_SRCS-yes += util.h -+LIBVPX_TEST_SRCS-yes += video_source.h -+ -+## -+## BLACK BOX TESTS -+## -+## Black box tests only use the public API. -+## -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += datarate_test.cc -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.cc -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.h -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += error_resilience_test.cc -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += i420_video_source.h -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc -+ -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ../md5_utils.h ../md5_utils.c -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.cc -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.h -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ivf_video_source.h -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += test_vector_test.cc -+## -+## WHITE BOX TESTS -+## -+## Whitebox tests invoke functions not exposed via the public API. Certain -+## shared library builds don't make these functions accessible. -+## -+ifeq ($(CONFIG_SHARED),) -+ -+# These tests require both the encoder and decoder to be built. -+ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes) -+LIBVPX_TEST_SRCS-yes += boolcoder_test.cc -+endif -+ -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += fdct4x4_test.cc -+LIBVPX_TEST_SRCS-yes += idctllm_test.cc -+LIBVPX_TEST_SRCS-yes += intrapred_test.cc -+LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc -+LIBVPX_TEST_SRCS-yes += sad_test.cc -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc -+LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc -+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc -+ -+endif -+ -+ -+## -+## TEST DATA -+## -+LIBVPX_TEST_DATA-$(CONFIG_VP8_ENCODER) += hantro_collage_w352h288.yuv -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf.md5 -+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf.md5 -diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc -new file mode 100644 -index 0000000..cfd5d28 ---- /dev/null -+++ b/test/test_libvpx.cc -@@ -0,0 +1,45 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#include -+#include "vpx_config.h" -+#if ARCH_X86 || ARCH_X86_64 -+extern "C" { -+#include "vpx_ports/x86.h" -+} -+#endif -+#include "third_party/googletest/src/include/gtest/gtest.h" -+ -+static void append_gtest_filter(const char *str) { -+ std::string filter = ::testing::FLAGS_gtest_filter; -+ filter += str; -+ ::testing::FLAGS_gtest_filter = filter; -+} -+ -+int main(int argc, char **argv) { -+ ::testing::InitGoogleTest(&argc, argv); -+ -+#if ARCH_X86 || ARCH_X86_64 -+ const int simd_caps = x86_simd_caps(); -+ if(!(simd_caps & HAS_MMX)) -+ append_gtest_filter(":-MMX/*"); -+ if(!(simd_caps & HAS_SSE)) -+ append_gtest_filter(":-SSE/*"); -+ if(!(simd_caps & HAS_SSE2)) -+ append_gtest_filter(":-SSE2/*"); -+ if(!(simd_caps & HAS_SSE3)) -+ append_gtest_filter(":-SSE3/*"); -+ if(!(simd_caps & HAS_SSSE3)) -+ append_gtest_filter(":-SSSE3/*"); -+ if(!(simd_caps & HAS_SSE4_1)) -+ append_gtest_filter(":-SSE4_1/*"); -+#endif -+ -+ return RUN_ALL_TESTS(); -+} -diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc -new file mode 100644 -index 0000000..938457b ---- /dev/null -+++ b/test/test_vector_test.cc -@@ -0,0 +1,144 @@ -+/* -+ Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ -+ Use of this source code is governed by a BSD-style license -+ that can be found in the LICENSE file in the root of the source -+ tree. An additional intellectual property rights grant can be found -+ in the file PATENTS. All contributing project authors may -+ be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+#include -+#include -+#include -+#include "third_party/googletest/src/include/gtest/gtest.h" -+#include "test/decode_test_driver.h" -+#include "test/ivf_video_source.h" -+extern "C" { -+#include "./md5_utils.h" -+#include "vpx_mem/vpx_mem.h" -+} -+ -+#if defined(_MSC_VER) -+#define snprintf sprintf_s -+#endif -+ -+namespace { -+// There are 61 test vectors in total. -+const char *kTestVectors[] = { -+ "vp80-00-comprehensive-001.ivf", -+ "vp80-00-comprehensive-002.ivf", "vp80-00-comprehensive-003.ivf", -+ "vp80-00-comprehensive-004.ivf", "vp80-00-comprehensive-005.ivf", -+ "vp80-00-comprehensive-006.ivf", "vp80-00-comprehensive-007.ivf", -+ "vp80-00-comprehensive-008.ivf", "vp80-00-comprehensive-009.ivf", -+ "vp80-00-comprehensive-010.ivf", "vp80-00-comprehensive-011.ivf", -+ "vp80-00-comprehensive-012.ivf", "vp80-00-comprehensive-013.ivf", -+ "vp80-00-comprehensive-014.ivf", "vp80-00-comprehensive-015.ivf", -+ "vp80-00-comprehensive-016.ivf", "vp80-00-comprehensive-017.ivf", -+ "vp80-00-comprehensive-018.ivf", "vp80-01-intra-1400.ivf", -+ "vp80-01-intra-1411.ivf", "vp80-01-intra-1416.ivf", -+ "vp80-01-intra-1417.ivf", "vp80-02-inter-1402.ivf", -+ "vp80-02-inter-1412.ivf", "vp80-02-inter-1418.ivf", -+ "vp80-02-inter-1424.ivf", "vp80-03-segmentation-01.ivf", -+ "vp80-03-segmentation-02.ivf", "vp80-03-segmentation-03.ivf", -+ "vp80-03-segmentation-04.ivf", "vp80-03-segmentation-1401.ivf", -+ "vp80-03-segmentation-1403.ivf", "vp80-03-segmentation-1407.ivf", -+ "vp80-03-segmentation-1408.ivf", "vp80-03-segmentation-1409.ivf", -+ "vp80-03-segmentation-1410.ivf", "vp80-03-segmentation-1413.ivf", -+ "vp80-03-segmentation-1414.ivf", "vp80-03-segmentation-1415.ivf", -+ "vp80-03-segmentation-1425.ivf", "vp80-03-segmentation-1426.ivf", -+ "vp80-03-segmentation-1427.ivf", "vp80-03-segmentation-1432.ivf", -+ "vp80-03-segmentation-1435.ivf", "vp80-03-segmentation-1436.ivf", -+ "vp80-03-segmentation-1437.ivf", "vp80-03-segmentation-1441.ivf", -+ "vp80-03-segmentation-1442.ivf", "vp80-04-partitions-1404.ivf", -+ "vp80-04-partitions-1405.ivf", "vp80-04-partitions-1406.ivf", -+ "vp80-05-sharpness-1428.ivf", "vp80-05-sharpness-1429.ivf", -+ "vp80-05-sharpness-1430.ivf", "vp80-05-sharpness-1431.ivf", -+ "vp80-05-sharpness-1433.ivf", "vp80-05-sharpness-1434.ivf", -+ "vp80-05-sharpness-1438.ivf", "vp80-05-sharpness-1439.ivf", -+ "vp80-05-sharpness-1440.ivf", "vp80-05-sharpness-1443.ivf" -+}; -+ -+class TestVectorTest : public libvpx_test::DecoderTest, -+ public ::testing::TestWithParam { -+ protected: -+ TestVectorTest() : md5_file_(NULL) {} -+ -+ virtual ~TestVectorTest() { -+ if (md5_file_) -+ fclose(md5_file_); -+ } -+ -+ void OpenMD5File(const std::string& md5_file_name_) { -+ md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_); -+ ASSERT_TRUE(md5_file_) << "Md5 file open failed. Filename: " -+ << md5_file_name_; -+ } -+ -+ virtual void DecompressedFrameHook(const vpx_image_t& img, -+ const unsigned int frame_number) { -+ char expected_md5[33]; -+ char junk[128]; -+ -+ // Read correct md5 checksums. -+ const int res = fscanf(md5_file_, "%s %s", expected_md5, junk); -+ ASSERT_NE(res, EOF) << "Read md5 data failed"; -+ expected_md5[32] = '\0'; -+ -+ MD5Context md5; -+ MD5Init(&md5); -+ -+ // Compute and update md5 for each raw in decompressed data. -+ for (int plane = 0; plane < 3; ++plane) { -+ uint8_t *buf = img.planes[plane]; -+ -+ for (unsigned int y = 0; y < (plane ? (img.d_h + 1) >> 1 : img.d_h); -+ ++y) { -+ MD5Update(&md5, buf, (plane ? (img.d_w + 1) >> 1 : img.d_w)); -+ buf += img.stride[plane]; -+ } -+ } -+ -+ uint8_t md5_sum[16]; -+ MD5Final(md5_sum, &md5); -+ -+ char actual_md5[33]; -+ // Convert to get the actual md5. -+ for (int i = 0; i < 16; i++) { -+ snprintf(&actual_md5[i * 2], sizeof(actual_md5) - i * 2, "%02x", -+ md5_sum[i]); -+ } -+ actual_md5[32] = '\0'; -+ -+ // Check md5 match. -+ ASSERT_STREQ(expected_md5, actual_md5) -+ << "Md5 checksums don't match: frame number = " << frame_number; -+ } -+ -+ private: -+ FILE *md5_file_; -+}; -+ -+// This test runs through the whole set of test vectors, and decodes them. -+// The md5 checksums are computed for each frame in the video file. If md5 -+// checksums match the correct md5 data, then the test is passed. Otherwise, -+// the test failed. -+TEST_P(TestVectorTest, MD5Match) { -+ const std::string filename = GetParam(); -+ // Open compressed video file. -+ libvpx_test::IVFVideoSource video(filename); -+ -+ video.Init(); -+ -+ // Construct md5 file name. -+ const std::string md5_filename = filename + ".md5"; -+ OpenMD5File(md5_filename); -+ -+ // Decode frame, and check the md5 matching. -+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); -+} -+ -+INSTANTIATE_TEST_CASE_P(TestVectorSequence, TestVectorTest, -+ ::testing::ValuesIn(kTestVectors)); -+ -+} // namespace -diff --git a/test/util.h b/test/util.h -new file mode 100644 -index 0000000..06a70cc ---- /dev/null -+++ b/test/util.h -@@ -0,0 +1,18 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+#ifndef TEST_UTIL_H_ -+#define TEST_UTIL_H_ -+ -+// Macros -+#define PARAMS(...) ::testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > > -+#define GET_PARAM(k) std::tr1::get< k >(GetParam()) -+ -+#endif // TEST_UTIL_H_ -diff --git a/test/video_source.h b/test/video_source.h -new file mode 100644 -index 0000000..9772657 ---- /dev/null -+++ b/test/video_source.h -@@ -0,0 +1,175 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+#ifndef TEST_VIDEO_SOURCE_H_ -+#define TEST_VIDEO_SOURCE_H_ -+ -+#include -+#include -+#include -+#include "test/acm_random.h" -+#include "vpx/vpx_encoder.h" -+ -+namespace libvpx_test { -+ -+static FILE *OpenTestDataFile(const std::string& file_name) { -+ std::string path_to_source = file_name; -+ const char *kDataPath = getenv("LIBVPX_TEST_DATA_PATH"); -+ -+ if (kDataPath) { -+ path_to_source = kDataPath; -+ path_to_source += "/"; -+ path_to_source += file_name; -+ } -+ -+ return fopen(path_to_source.c_str(), "rb"); -+} -+ -+// Abstract base class for test video sources, which provide a stream of -+// vpx_image_t images with associated timestamps and duration. -+class VideoSource { -+ public: -+ virtual ~VideoSource() {} -+ -+ // Prepare the stream for reading, rewind/open as necessary. -+ virtual void Begin() = 0; -+ -+ // Advance the cursor to the next frame -+ virtual void Next() = 0; -+ -+ // Get the current video frame, or NULL on End-Of-Stream. -+ virtual vpx_image_t *img() const = 0; -+ -+ // Get the presentation timestamp of the current frame. -+ virtual vpx_codec_pts_t pts() const = 0; -+ -+ // Get the current frame's duration -+ virtual unsigned long duration() const = 0; -+ -+ // Get the timebase for the stream -+ virtual vpx_rational_t timebase() const = 0; -+ -+ // Get the current frame counter, starting at 0. -+ virtual unsigned int frame() const = 0; -+ -+ // Get the current file limit. -+ virtual unsigned int limit() const = 0; -+}; -+ -+ -+class DummyVideoSource : public VideoSource { -+ public: -+ DummyVideoSource() : img_(NULL), limit_(100), width_(0), height_(0) { -+ SetSize(80, 64); -+ } -+ -+ virtual ~DummyVideoSource() { vpx_img_free(img_); } -+ -+ virtual void Begin() { -+ frame_ = 0; -+ FillFrame(); -+ } -+ -+ virtual void Next() { -+ ++frame_; -+ FillFrame(); -+ } -+ -+ virtual vpx_image_t *img() const { -+ return (frame_ < limit_) ? img_ : NULL; -+ } -+ -+ // Models a stream where Timebase = 1/FPS, so pts == frame. -+ virtual vpx_codec_pts_t pts() const { return frame_; } -+ -+ virtual unsigned long duration() const { return 1; } -+ -+ virtual vpx_rational_t timebase() const { -+ const vpx_rational_t t = {1, 30}; -+ return t; -+ } -+ -+ virtual unsigned int frame() const { return frame_; } -+ -+ virtual unsigned int limit() const { return limit_; } -+ -+ void SetSize(unsigned int width, unsigned int height) { -+ if (width != width_ || height != height_) { -+ vpx_img_free(img_); -+ raw_sz_ = ((width + 31)&~31) * height * 3 / 2; -+ img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 32); -+ width_ = width; -+ height_ = height; -+ } -+ } -+ -+ protected: -+ virtual void FillFrame() { memset(img_->img_data, 0, raw_sz_); } -+ -+ vpx_image_t *img_; -+ size_t raw_sz_; -+ unsigned int limit_; -+ unsigned int frame_; -+ unsigned int width_; -+ unsigned int height_; -+}; -+ -+ -+class RandomVideoSource : public DummyVideoSource { -+ public: -+ RandomVideoSource(int seed = ACMRandom::DeterministicSeed()) -+ : rnd_(seed), -+ seed_(seed) { } -+ -+ protected: -+ // Reset the RNG to get a matching stream for the second pass -+ virtual void Begin() { -+ frame_ = 0; -+ rnd_.Reset(seed_); -+ FillFrame(); -+ } -+ -+ // 15 frames of noise, followed by 15 static frames. Reset to 0 rather -+ // than holding previous frames to encourage keyframes to be thrown. -+ virtual void FillFrame() { -+ if (frame_ % 30 < 15) -+ for (size_t i = 0; i < raw_sz_; ++i) -+ img_->img_data[i] = rnd_.Rand8(); -+ else -+ memset(img_->img_data, 0, raw_sz_); -+ } -+ -+ ACMRandom rnd_; -+ int seed_; -+}; -+ -+// Abstract base class for test video sources, which provide a stream of -+// decompressed images to the decoder. -+class CompressedVideoSource { -+ public: -+ virtual ~CompressedVideoSource() {} -+ -+ virtual void Init() = 0; -+ -+ // Prepare the stream for reading, rewind/open as necessary. -+ virtual void Begin() = 0; -+ -+ // Advance the cursor to the next frame -+ virtual void Next() = 0; -+ -+ virtual const uint8_t *cxdata() const = 0; -+ -+ virtual const unsigned int frame_size() const = 0; -+ -+ virtual const unsigned int frame_number() const = 0; -+}; -+ -+} // namespace libvpx_test -+ -+#endif // TEST_VIDEO_SOURCE_H_ -diff --git a/third_party/libyuv/source/scale.c b/third_party/libyuv/source/scale.c -index 930a7ae..c142a17 100644 ---- a/third_party/libyuv/source/scale.c -+++ b/third_party/libyuv/source/scale.c -@@ -60,7 +60,7 @@ void SetUseReferenceImpl(int use) { - - #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) - #define HAS_SCALEROWDOWN2_NEON --void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */, -+void ScaleRowDown2_NEON(const uint8* src_ptr, int src_stride, - uint8* dst, int dst_width) { - asm volatile ( - "1: \n" -@@ -102,7 +102,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride, - } - - #define HAS_SCALEROWDOWN4_NEON --static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */, -+static void ScaleRowDown4_NEON(const uint8* src_ptr, int src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "1: \n" -@@ -160,7 +160,7 @@ static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride, - // Down scale from 4 to 3 pixels. Use the neon multilane read/write - // to load up the every 4th pixel into a 4 different registers. - // Point samples 32 pixels to 24 pixels. --static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */, -+static void ScaleRowDown34_NEON(const uint8* src_ptr, int src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "1: \n" -@@ -284,7 +284,7 @@ const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) = - 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 }; - - // 32 -> 12 --static void ScaleRowDown38_NEON(const uint8* src_ptr, int, -+static void ScaleRowDown38_NEON(const uint8* src_ptr, int src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "vld1.u8 {q3}, [%3] \n" -diff --git a/tools/ftfy.sh b/tools/ftfy.sh -index 95fd397..c5cfdea 100755 ---- a/tools/ftfy.sh -+++ b/tools/ftfy.sh -@@ -34,7 +34,7 @@ vpx_style() { - --align-pointer=name \ - --indent-preprocessor --convert-tabs --indent-labels \ - --suffix=none --quiet "$@" -- sed -i 's/[[:space:]]\{1,\},/,/g' "$@" -+ sed -i "" 's/[[:space:]]\{1,\},/,/g' "$@" - } - - -diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c -index d58e49c..8af9e90 100644 ---- a/vp8/common/alloccommon.c -+++ b/vp8/common/alloccommon.c -@@ -17,23 +17,6 @@ - #include "entropymode.h" - #include "systemdependent.h" - -- --extern void vp8_init_scan_order_mask(); -- --static void update_mode_info_border(MODE_INFO *mi, int rows, int cols) --{ -- int i; -- vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1)); -- -- for (i = 0; i < rows; i++) -- { -- /* TODO(holmer): Bug? This updates the last element of each row -- * rather than the border element! -- */ -- vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO)); -- } --} -- - void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) - { - int i; -@@ -45,16 +28,20 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) - vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer); - if (oci->post_proc_buffer_int_used) - vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int); -+ -+ vpx_free(oci->pp_limits_buffer); -+ oci->pp_limits_buffer = NULL; - #endif - - vpx_free(oci->above_context); - vpx_free(oci->mip); -+#if CONFIG_ERROR_CONCEALMENT - vpx_free(oci->prev_mip); -+ oci->prev_mip = NULL; -+#endif - -- oci->above_context = 0; -- oci->mip = 0; -- oci->prev_mip = 0; -- -+ oci->above_context = NULL; -+ oci->mip = NULL; - } - - int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) -@@ -76,10 +63,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) - oci->fb_idx_ref_cnt[i] = 0; - oci->yv12_fb[i].flags = 0; - if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0) -- { -- vp8_de_alloc_frame_buffers(oci); -- return 1; -- } -+ goto allocation_fail; - } - - oci->new_fb_idx = 0; -@@ -93,22 +77,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) - oci->fb_idx_ref_cnt[3] = 1; - - if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0) -- { -- vp8_de_alloc_frame_buffers(oci); -- return 1; -- } -- --#if CONFIG_POSTPROC -- if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) -- { -- vp8_de_alloc_frame_buffers(oci); -- return 1; -- } -- -- oci->post_proc_buffer_int_used = 0; -- vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); -- vpx_memset((&oci->post_proc_buffer)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size); --#endif -+ goto allocation_fail; - - oci->mb_rows = height >> 4; - oci->mb_cols = width >> 4; -@@ -117,44 +86,43 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) - oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); - - if (!oci->mip) -- { -- vp8_de_alloc_frame_buffers(oci); -- return 1; -- } -+ goto allocation_fail; - - oci->mi = oci->mip + oci->mode_info_stride + 1; - -- /* allocate memory for last frame MODE_INFO array */ --#if CONFIG_ERROR_CONCEALMENT -- oci->prev_mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); -- -- if (!oci->prev_mip) -- { -- vp8_de_alloc_frame_buffers(oci); -- return 1; -- } -- -- oci->prev_mi = oci->prev_mip + oci->mode_info_stride + 1; --#else -- oci->prev_mip = NULL; -- oci->prev_mi = NULL; --#endif -+ /* Allocation of previous mode info will be done in vp8_decode_frame() -+ * as it is a decoder only data */ - - oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1); - - if (!oci->above_context) -- { -- vp8_de_alloc_frame_buffers(oci); -- return 1; -- } -+ goto allocation_fail; - -- update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols); --#if CONFIG_ERROR_CONCEALMENT -- update_mode_info_border(oci->prev_mi, oci->mb_rows, oci->mb_cols); -+#if CONFIG_POSTPROC -+ if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) -+ goto allocation_fail; -+ -+ oci->post_proc_buffer_int_used = 0; -+ vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); -+ vpx_memset(oci->post_proc_buffer.buffer_alloc, 128, -+ oci->post_proc_buffer.frame_size); -+ -+ /* Allocate buffer to store post-processing filter coefficients. -+ * -+ * Note: Round up mb_cols to support SIMD reads -+ */ -+ oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1)); -+ if (!oci->pp_limits_buffer) -+ goto allocation_fail; - #endif - - return 0; -+ -+allocation_fail: -+ vp8_de_alloc_frame_buffers(oci); -+ return 1; - } -+ - void vp8_setup_version(VP8_COMMON *cm) - { - switch (cm->version) -diff --git a/vp8/common/arm/armv6/intra4x4_predict_v6.asm b/vp8/common/arm/armv6/intra4x4_predict_v6.asm -index a974cd1..c5ec824 100644 ---- a/vp8/common/arm/armv6/intra4x4_predict_v6.asm -+++ b/vp8/common/arm/armv6/intra4x4_predict_v6.asm -@@ -18,15 +18,23 @@ - AREA ||.text||, CODE, READONLY, ALIGN=2 - - --;void vp8_intra4x4_predict(unsigned char *src, int src_stride, int b_mode, --; unsigned char *dst, int dst_stride) -- -+;void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, -+; B_PREDICTION_MODE left_stride, int b_mode, -+; unsigned char *dst, int dst_stride, -+; unsigned char top_left) -+ -+; r0: *Above -+; r1: *yleft -+; r2: left_stride -+; r3: b_mode -+; sp + #40: dst -+; sp + #44: dst_stride -+; sp + #48: top_left - |vp8_intra4x4_predict_armv6| PROC - push {r4-r12, lr} - -- -- cmp r2, #10 -- addlt pc, pc, r2, lsl #2 ; position independent switch -+ cmp r3, #10 -+ addlt pc, pc, r3, lsl #2 ; position independent switch - pop {r4-r12, pc} ; default - b b_dc_pred - b b_tm_pred -@@ -41,13 +49,13 @@ - - b_dc_pred - ; load values -- ldr r8, [r0, -r1] ; Above -- ldrb r4, [r0, #-1]! ; Left[0] -+ ldr r8, [r0] ; Above -+ ldrb r4, [r1], r2 ; Left[0] - mov r9, #0 -- ldrb r5, [r0, r1] ; Left[1] -- ldrb r6, [r0, r1, lsl #1]! ; Left[2] -+ ldrb r5, [r1], r2 ; Left[1] -+ ldrb r6, [r1], r2 ; Left[2] - usad8 r12, r8, r9 -- ldrb r7, [r0, r1] ; Left[3] -+ ldrb r7, [r1] ; Left[3] - - ; calculate dc - add r4, r4, r5 -@@ -55,31 +63,30 @@ b_dc_pred - add r4, r4, r7 - add r4, r4, r12 - add r4, r4, #4 -- ldr r0, [sp, #40] ; load stride -+ ldr r0, [sp, #44] ; dst_stride - mov r12, r4, asr #3 ; (expected_dc + 4) >> 3 - - add r12, r12, r12, lsl #8 -- add r3, r3, r0 -+ ldr r3, [sp, #40] ; dst - add r12, r12, r12, lsl #16 - - ; store values -- str r12, [r3, -r0] -+ str r12, [r3], r0 -+ str r12, [r3], r0 -+ str r12, [r3], r0 - str r12, [r3] -- str r12, [r3, r0] -- str r12, [r3, r0, lsl #1] - - pop {r4-r12, pc} - - b_tm_pred -- sub r10, r0, #1 ; Left -- ldr r8, [r0, -r1] ; Above -- ldrb r9, [r10, -r1] ; top_left -- ldrb r4, [r0, #-1]! ; Left[0] -- ldrb r5, [r10, r1]! ; Left[1] -- ldrb r6, [r0, r1, lsl #1] ; Left[2] -- ldrb r7, [r10, r1, lsl #1] ; Left[3] -- ldr r0, [sp, #40] ; load stride -- -+ ldr r8, [r0] ; Above -+ ldrb r9, [sp, #48] ; top_left -+ ldrb r4, [r1], r2 ; Left[0] -+ ldrb r5, [r1], r2 ; Left[1] -+ ldrb r6, [r1], r2 ; Left[2] -+ ldrb r7, [r1] ; Left[3] -+ ldr r0, [sp, #44] ; dst_stride -+ ldr r3, [sp, #40] ; dst - - add r9, r9, r9, lsl #16 ; [tl|tl] - uxtb16 r10, r8 ; a[2|0] -@@ -126,25 +133,26 @@ b_tm_pred - str r12, [r3], r0 - - add r12, r4, r5, lsl #8 ; [3|2|1|0] -- str r12, [r3], r0 -+ str r12, [r3] - - pop {r4-r12, pc} - - b_ve_pred -- ldr r8, [r0, -r1]! ; a[3|2|1|0] -+ ldr r8, [r0] ; a[3|2|1|0] - ldr r11, c00FF00FF -- ldrb r9, [r0, #-1] ; top_left -+ ldrb r9, [sp, #48] ; top_left - ldrb r10, [r0, #4] ; a[4] - - ldr r0, c00020002 - - uxtb16 r4, r8 ; a[2|0] - uxtb16 r5, r8, ror #8 ; a[3|1] -- ldr r2, [sp, #40] ; stride -+ ldr r2, [sp, #44] ; dst_stride - pkhbt r9, r9, r5, lsl #16 ; a[1|-1] - - add r9, r9, r4, lsl #1 ;[a[1]+2*a[2] | tl+2*a[0] ] - uxtab16 r9, r9, r5 ;[a[1]+2*a[2]+a[3] | tl+2*a[0]+a[1] ] -+ ldr r3, [sp, #40] ; dst - uxtab16 r9, r9, r0 ;[a[1]+2*a[2]+a[3]+2| tl+2*a[0]+a[1]+2] - - add r0, r0, r10, lsl #16 ;[a[4]+2 | 2] -@@ -154,25 +162,23 @@ b_ve_pred - - and r9, r11, r9, asr #2 - and r4, r11, r4, asr #2 -- add r3, r3, r2 ; dst + dst_stride - add r9, r9, r4, lsl #8 - - ; store values -- str r9, [r3, -r2] -+ str r9, [r3], r2 -+ str r9, [r3], r2 -+ str r9, [r3], r2 - str r9, [r3] -- str r9, [r3, r2] -- str r9, [r3, r2, lsl #1] - - pop {r4-r12, pc} - - - b_he_pred -- sub r10, r0, #1 ; Left -- ldrb r4, [r0, #-1]! ; Left[0] -- ldrb r8, [r10, -r1] ; top_left -- ldrb r5, [r10, r1]! ; Left[1] -- ldrb r6, [r0, r1, lsl #1] ; Left[2] -- ldrb r7, [r10, r1, lsl #1] ; Left[3] -+ ldrb r4, [r1], r2 ; Left[0] -+ ldrb r8, [sp, #48] ; top_left -+ ldrb r5, [r1], r2 ; Left[1] -+ ldrb r6, [r1], r2 ; Left[2] -+ ldrb r7, [r1] ; Left[3] - - add r8, r8, r4 ; tl + l[0] - add r9, r4, r5 ; l[0] + l[1] -@@ -197,7 +203,8 @@ b_he_pred - pkhtb r10, r10, r10, asr #16 ; l[-|2|-|2] - pkhtb r11, r11, r11, asr #16 ; l[-|3|-|3] - -- ldr r0, [sp, #40] ; stride -+ ldr r0, [sp, #44] ; dst_stride -+ ldr r3, [sp, #40] ; dst - - add r8, r8, r8, lsl #8 ; l[0|0|0|0] - add r9, r9, r9, lsl #8 ; l[1|1|1|1] -@@ -206,16 +213,16 @@ b_he_pred - - ; store values - str r8, [r3], r0 -- str r9, [r3] -- str r10, [r3, r0] -- str r11, [r3, r0, lsl #1] -+ str r9, [r3], r0 -+ str r10, [r3], r0 -+ str r11, [r3] - - pop {r4-r12, pc} - - b_ld_pred -- ldr r4, [r0, -r1]! ; Above -+ ldr r4, [r0] ; Above[0-3] - ldr r12, c00020002 -- ldr r5, [r0, #4] -+ ldr r5, [r0, #4] ; Above[4-7] - ldr lr, c00FF00FF - - uxtb16 r6, r4 ; a[2|0] -@@ -225,7 +232,6 @@ b_ld_pred - pkhtb r10, r6, r8 ; a[2|4] - pkhtb r11, r7, r9 ; a[3|5] - -- - add r4, r6, r7, lsl #1 ; [a2+2*a3 | a0+2*a1] - add r4, r4, r10, ror #16 ; [a2+2*a3+a4 | a0+2*a1+a2] - uxtab16 r4, r4, r12 ; [a2+2*a3+a4+2 | a0+2*a1+a2+2] -@@ -244,7 +250,8 @@ b_ld_pred - add r7, r7, r9, asr #16 ; [ a5+2*a6+a7] - uxtah r7, r7, r12 ; [ a5+2*a6+a7+2] - -- ldr r0, [sp, #40] ; stride -+ ldr r0, [sp, #44] ; dst_stride -+ ldr r3, [sp, #40] ; dst - - ; scale down - and r4, lr, r4, asr #2 -@@ -266,18 +273,17 @@ b_ld_pred - mov r6, r6, lsr #16 - mov r11, r10, lsr #8 - add r11, r11, r6, lsl #24 ; [6|5|4|3] -- str r11, [r3], r0 -+ str r11, [r3] - - pop {r4-r12, pc} - - b_rd_pred -- sub r12, r0, r1 ; Above = src - src_stride -- ldrb r7, [r0, #-1]! ; l[0] = pp[3] -- ldr lr, [r12] ; Above = pp[8|7|6|5] -- ldrb r8, [r12, #-1]! ; tl = pp[4] -- ldrb r6, [r12, r1, lsl #1] ; l[1] = pp[2] -- ldrb r5, [r0, r1, lsl #1] ; l[2] = pp[1] -- ldrb r4, [r12, r1, lsl #2] ; l[3] = pp[0] -+ ldrb r7, [r1], r2 ; l[0] = pp[3] -+ ldr lr, [r0] ; Above = pp[8|7|6|5] -+ ldrb r8, [sp, #48] ; tl = pp[4] -+ ldrb r6, [r1], r2 ; l[1] = pp[2] -+ ldrb r5, [r1], r2 ; l[2] = pp[1] -+ ldrb r4, [r1], r2 ; l[3] = pp[0] - - - uxtb16 r9, lr ; p[7|5] -@@ -307,7 +313,8 @@ b_rd_pred - add r7, r7, r10 ; [p6+2*p7+p8 | p4+2*p5+p6] - uxtab16 r7, r7, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2] - -- ldr r0, [sp, #40] ; stride -+ ldr r0, [sp, #44] ; dst_stride -+ ldr r3, [sp, #40] ; dst - - ; scale down - and r7, lr, r7, asr #2 -@@ -328,18 +335,17 @@ b_rd_pred - - mov r11, r10, lsl #8 ; [3|2|1|-] - uxtab r11, r11, r4 ; [3|2|1|0] -- str r11, [r3], r0 -+ str r11, [r3] - - pop {r4-r12, pc} - - b_vr_pred -- sub r12, r0, r1 ; Above = src - src_stride -- ldrb r7, [r0, #-1]! ; l[0] = pp[3] -- ldr lr, [r12] ; Above = pp[8|7|6|5] -- ldrb r8, [r12, #-1]! ; tl = pp[4] -- ldrb r6, [r12, r1, lsl #1] ; l[1] = pp[2] -- ldrb r5, [r0, r1, lsl #1] ; l[2] = pp[1] -- ldrb r4, [r12, r1, lsl #2] ; l[3] = pp[0] -+ ldrb r7, [r1], r2 ; l[0] = pp[3] -+ ldr lr, [r0] ; Above = pp[8|7|6|5] -+ ldrb r8, [sp, #48] ; tl = pp[4] -+ ldrb r6, [r1], r2 ; l[1] = pp[2] -+ ldrb r5, [r1], r2 ; l[2] = pp[1] -+ ldrb r4, [r1] ; l[3] = pp[0] - - add r5, r5, r7, lsl #16 ; p[3|1] - add r6, r6, r8, lsl #16 ; p[4|2] -@@ -376,7 +382,8 @@ b_vr_pred - add r8, r8, r10 ; [p6+2*p7+p8 | p4+2*p5+p6] - uxtab16 r8, r8, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2] - -- ldr r0, [sp, #40] ; stride -+ ldr r0, [sp, #44] ; dst_stride -+ ldr r3, [sp, #40] ; dst - - ; scale down - and r5, lr, r5, asr #2 ; [B|A] -@@ -397,14 +404,14 @@ b_vr_pred - pkhtb r10, r7, r5, asr #16 ; [-|H|-|B] - str r2, [r3], r0 - add r12, r12, r10, lsl #8 ; [H|D|B|A] -- str r12, [r3], r0 -+ str r12, [r3] - - pop {r4-r12, pc} - - b_vl_pred -- ldr r4, [r0, -r1]! ; [3|2|1|0] -+ ldr r4, [r0] ; [3|2|1|0] = Above[0-3] - ldr r12, c00020002 -- ldr r5, [r0, #4] ; [7|6|5|4] -+ ldr r5, [r0, #4] ; [7|6|5|4] = Above[4-7] - ldr lr, c00FF00FF - ldr r2, c00010001 - -@@ -441,7 +448,8 @@ b_vl_pred - add r9, r9, r11 ; [p5+2*p6+p7 | p3+2*p4+p5] - uxtab16 r9, r9, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2] - -- ldr r0, [sp, #40] ; stride -+ ldr r0, [sp, #44] ; dst_stride -+ ldr r3, [sp, #40] ; dst - - ; scale down - and r5, lr, r5, asr #2 ; [D|C] -@@ -449,7 +457,6 @@ b_vl_pred - and r8, lr, r8, asr #2 ; [I|D] - and r9, lr, r9, asr #2 ; [J|H] - -- - add r10, r4, r6, lsl #8 ; [F|B|E|A] - str r10, [r3], r0 - -@@ -463,18 +470,17 @@ b_vl_pred - str r12, [r3], r0 - - add r10, r7, r10, lsl #8 ; [J|H|D|G] -- str r10, [r3], r0 -+ str r10, [r3] - - pop {r4-r12, pc} - - b_hd_pred -- sub r12, r0, r1 ; Above = src - src_stride -- ldrb r7, [r0, #-1]! ; l[0] = pp[3] -- ldr lr, [r12] ; Above = pp[8|7|6|5] -- ldrb r8, [r12, #-1]! ; tl = pp[4] -- ldrb r6, [r0, r1] ; l[1] = pp[2] -- ldrb r5, [r0, r1, lsl #1] ; l[2] = pp[1] -- ldrb r4, [r12, r1, lsl #2] ; l[3] = pp[0] -+ ldrb r7, [r1], r2 ; l[0] = pp[3] -+ ldr lr, [r0] ; Above = pp[8|7|6|5] -+ ldrb r8, [sp, #48] ; tl = pp[4] -+ ldrb r6, [r1], r2 ; l[1] = pp[2] -+ ldrb r5, [r1], r2 ; l[2] = pp[1] -+ ldrb r4, [r1] ; l[3] = pp[0] - - uxtb16 r9, lr ; p[7|5] - uxtb16 r10, lr, ror #8 ; p[8|6] -@@ -492,7 +498,6 @@ b_hd_pred - pkhtb r1, r9, r10 ; p[7|6] - pkhbt r10, r8, r10, lsl #16 ; p[6|5] - -- - uadd16 r11, r4, r5 ; [p1+p2 | p0+p1] - uhadd16 r11, r11, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1] - ; [B|A] -@@ -518,7 +523,8 @@ b_hd_pred - and r5, lr, r5, asr #2 ; [H|G] - and r6, lr, r6, asr #2 ; [J|I] - -- ldr lr, [sp, #40] ; stride -+ ldr lr, [sp, #44] ; dst_stride -+ ldr r3, [sp, #40] ; dst - - pkhtb r2, r0, r6 ; [-|F|-|I] - pkhtb r12, r6, r5, asr #16 ; [-|J|-|H] -@@ -527,7 +533,6 @@ b_hd_pred - mov r12, r12, ror #24 ; [J|I|H|F] - str r12, [r3], lr - -- - mov r7, r11, asr #16 ; [-|-|-|B] - str r2, [r3], lr - add r7, r7, r0, lsl #16 ; [-|E|-|B] -@@ -536,21 +541,20 @@ b_hd_pred - str r7, [r3], lr - - add r5, r11, r4, lsl #8 ; [D|B|C|A] -- str r5, [r3], lr -+ str r5, [r3] - - pop {r4-r12, pc} - - - - b_hu_pred -- ldrb r4, [r0, #-1]! ; Left[0] -+ ldrb r4, [r1], r2 ; Left[0] - ldr r12, c00020002 -- ldrb r5, [r0, r1]! ; Left[1] -+ ldrb r5, [r1], r2 ; Left[1] - ldr lr, c00FF00FF -- ldrb r6, [r0, r1]! ; Left[2] -+ ldrb r6, [r1], r2 ; Left[2] - ldr r2, c00010001 -- ldrb r7, [r0, r1] ; Left[3] -- -+ ldrb r7, [r1] ; Left[3] - - add r4, r4, r5, lsl #16 ; [1|0] - add r5, r5, r6, lsl #16 ; [2|1] -@@ -563,7 +567,8 @@ b_hu_pred - add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1] - add r4, r4, r9 ; [p1+2*p2+p3 | p0+2*p1+p2] - uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2] -- ldr r2, [sp, #40] ; stride -+ ldr r2, [sp, #44] ; dst_stride -+ ldr r3, [sp, #40] ; dst - and r4, lr, r4, asr #2 ; [D|C] - - add r10, r6, r7 ; [p2+p3] -@@ -587,9 +592,9 @@ b_hu_pred - - add r10, r11, lsl #8 ; [-|-|F|E] - add r10, r10, r9, lsl #16 ; [G|G|F|E] -- str r10, [r3] -+ str r10, [r3], r2 - -- str r7, [r3, r2] -+ str r7, [r3] - - pop {r4-r12, pc} - -diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm -index 65a4680..79ff02c 100644 ---- a/vp8/common/arm/neon/dc_only_idct_add_neon.asm -+++ b/vp8/common/arm/neon/dc_only_idct_add_neon.asm -@@ -46,7 +46,7 @@ - vst1.32 {d2[1]}, [r3], r12 - vst1.32 {d4[0]}, [r3], r12 - vst1.32 {d4[1]}, [r3] -- -+ - bx lr - - ENDP -diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h -index a4c1d92..f7ff577 100644 ---- a/vp8/common/blockd.h -+++ b/vp8/common/blockd.h -@@ -161,22 +161,32 @@ typedef struct - uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */ - } MB_MODE_INFO; - --typedef struct -+typedef struct modeinfo - { - MB_MODE_INFO mbmi; - union b_mode_info bmi[16]; - } MODE_INFO; - - #if CONFIG_MULTI_RES_ENCODING --/* The information needed to be stored for higher-resolution encoder */ -+/* The mb-level information needed to be stored for higher-resolution encoder */ - typedef struct - { - MB_PREDICTION_MODE mode; - MV_REFERENCE_FRAME ref_frame; - int_mv mv; -- //union b_mode_info bmi[16]; -- int dissim; // dissimilarity level of the macroblock --} LOWER_RES_INFO; -+ int dissim; /* dissimilarity level of the macroblock */ -+} LOWER_RES_MB_INFO; -+ -+/* The frame-level information needed to be stored for higher-resolution -+ * encoder */ -+typedef struct -+{ -+ FRAME_TYPE frame_type; -+ int is_frame_dropped; -+ /* The frame number of each reference frames */ -+ unsigned int low_res_ref_frames[MAX_REF_FRAMES]; -+ LOWER_RES_MB_INFO *mb_info; -+} LOWER_RES_FRAME_INFO; - #endif - - typedef struct blockd -@@ -216,12 +226,6 @@ typedef struct macroblockd - MODE_INFO *mode_info_context; - int mode_info_stride; - --#if CONFIG_TEMPORAL_DENOISING -- MB_PREDICTION_MODE best_sse_inter_mode; -- int_mv best_sse_mv; -- unsigned char need_to_clamp_best_mvs; --#endif -- - FRAME_TYPE frame_type; - - int up_available; -diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c -index a95a923..8c046a4 100644 ---- a/vp8/common/entropy.c -+++ b/vp8/common/entropy.c -@@ -101,7 +101,7 @@ const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */ - /* vp8_coef_encodings generated with: - vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree); - */ --const vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = -+vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = - { - {2, 2}, - {6, 3}, -diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c -index de7e828..091e4c7 100644 ---- a/vp8/common/entropymode.c -+++ b/vp8/common/entropymode.c -@@ -160,9 +160,7 @@ const vp8_tree_index vp8_small_mvtree [14] = - void vp8_init_mbmode_probs(VP8_COMMON *x) - { - vpx_memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob)); -- vpx_memcpy(x->kf_ymode_prob, vp8_kf_ymode_prob, sizeof(vp8_kf_ymode_prob)); - vpx_memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob)); -- vpx_memcpy(x->kf_uv_mode_prob, vp8_kf_uv_mode_prob, sizeof(vp8_kf_uv_mode_prob)); - vpx_memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob)); - } - -@@ -171,7 +169,3 @@ void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1]) - vpx_memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob)); - } - --void vp8_kf_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]) --{ -- vpx_memcpy(p, vp8_kf_bmode_prob, sizeof(vp8_kf_bmode_prob)); --} -diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h -index 70200cb..1df0f64 100644 ---- a/vp8/common/entropymode.h -+++ b/vp8/common/entropymode.h -@@ -24,11 +24,11 @@ typedef enum - SUBMVREF_LEFT_ABOVE_ZED - } sumvfref_t; - --typedef const int vp8_mbsplit[16]; -+typedef int vp8_mbsplit[16]; - - #define VP8_NUMMBSPLITS 4 - --extern vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS]; -+extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS]; - - extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */ - -@@ -67,9 +67,14 @@ extern const vp8_tree_index vp8_small_mvtree[]; - - extern const struct vp8_token_struct vp8_small_mvencodings[8]; - --void vp8_init_mbmode_probs(VP8_COMMON *x); -+/* Key frame default mode probs */ -+extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES] -+[VP8_BINTRAMODES-1]; -+extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1]; -+extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1]; - --void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]); -+void vp8_init_mbmode_probs(VP8_COMMON *x); -+void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]); - void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]); - - #endif -diff --git a/vp8/common/extend.c b/vp8/common/extend.c -index 9089e16..c9bdd21 100644 ---- a/vp8/common/extend.c -+++ b/vp8/common/extend.c -@@ -116,7 +116,7 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, - int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1); - int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); - -- // If the side is not touching the bounder then don't extend. -+ /* If the side is not touching the bounder then don't extend. */ - if (srcy) - et = 0; - if (srcx) -@@ -157,7 +157,10 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, - - - /* note the extension is only for the last row, for intra prediction purpose */ --void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr) -+void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, -+ unsigned char *YPtr, -+ unsigned char *UPtr, -+ unsigned char *VPtr) - { - int i; - -diff --git a/vp8/common/filter.h b/vp8/common/filter.h -index 0f225c2..b7591f2 100644 ---- a/vp8/common/filter.h -+++ b/vp8/common/filter.h -@@ -19,4 +19,4 @@ - extern const short vp8_bilinear_filters[8][2]; - extern const short vp8_sub_pel_filters[8][6]; - --#endif //FILTER_H -+#endif -diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c -index 2a30166..5a6ac7b 100644 ---- a/vp8/common/generic/systemdependent.c -+++ b/vp8/common/generic/systemdependent.c -@@ -83,57 +83,6 @@ static int get_cpu_count() - #endif - - --#if HAVE_PTHREAD_H --#include --static void once(void (*func)(void)) --{ -- static pthread_once_t lock = PTHREAD_ONCE_INIT; -- pthread_once(&lock, func); --} -- -- --#elif defined(_WIN32) --static void once(void (*func)(void)) --{ -- /* Using a static initializer here rather than InitializeCriticalSection() -- * since there's no race-free context in which to execute it. Protecting -- * it with an atomic op like InterlockedCompareExchangePointer introduces -- * an x86 dependency, and InitOnceExecuteOnce requires Vista. -- */ -- static CRITICAL_SECTION lock = {(void *)-1, -1, 0, 0, 0, 0}; -- static int done; -- -- EnterCriticalSection(&lock); -- -- if (!done) -- { -- func(); -- done = 1; -- } -- -- LeaveCriticalSection(&lock); --} -- -- --#else --/* No-op version that performs no synchronization. vpx_rtcd() is idempotent, -- * so as long as your platform provides atomic loads/stores of pointers -- * no synchronization is strictly necessary. -- */ -- --static void once(void (*func)(void)) --{ -- static int done; -- -- if(!done) -- { -- func(); -- done = 1; -- } --} --#endif -- -- - void vp8_machine_specific_config(VP8_COMMON *ctx) - { - #if CONFIG_MULTITHREAD -@@ -145,6 +94,4 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) - #elif ARCH_X86 || ARCH_X86_64 - ctx->cpu_caps = x86_simd_caps(); - #endif -- -- once(vpx_rtcd); - } -diff --git a/vp8/common/idctllm_test.cc b/vp8/common/idctllm_test.cc -deleted file mode 100755 -index 0f6ebe7..0000000 ---- a/vp8/common/idctllm_test.cc -+++ /dev/null -@@ -1,31 +0,0 @@ --/* -- * Copyright (c) 2010 The WebM project authors. All Rights Reserved. -- * -- * Use of this source code is governed by a BSD-style license -- * that can be found in the LICENSE file in the root of the source -- * tree. An additional intellectual property rights grant can be found -- * in the file PATENTS. All contributing project authors may -- * be found in the AUTHORS file in the root of the source tree. -- */ -- -- -- extern "C" { -- void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr, -- int pred_stride, unsigned char *dst_ptr, -- int dst_stride); --} -- --#include "vpx_config.h" --#include "idctllm_test.h" --namespace --{ -- --INSTANTIATE_TEST_CASE_P(C, IDCTTest, -- ::testing::Values(vp8_short_idct4x4llm_c)); -- --} // namespace -- --int main(int argc, char **argv) { -- ::testing::InitGoogleTest(&argc, argv); -- return RUN_ALL_TESTS(); --} -diff --git a/vp8/common/idctllm_test.h b/vp8/common/idctllm_test.h -deleted file mode 100755 -index a6a694b..0000000 ---- a/vp8/common/idctllm_test.h -+++ /dev/null -@@ -1,113 +0,0 @@ --/* -- * Copyright (c) 2010 The WebM project authors. All Rights Reserved. -- * -- * Use of this source code is governed by a BSD-style license -- * that can be found in the LICENSE file in the root of the source -- * tree. An additional intellectual property rights grant can be found -- * in the file PATENTS. All contributing project authors may -- * be found in the AUTHORS file in the root of the source tree. -- */ -- -- -- #include "third_party/googletest/src/include/gtest/gtest.h" --typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr, -- int pred_stride, unsigned char *dst_ptr, -- int dst_stride); --namespace { --class IDCTTest : public ::testing::TestWithParam --{ -- protected: -- virtual void SetUp() -- { -- int i; -- -- UUT = GetParam(); -- memset(input, 0, sizeof(input)); -- /* Set up guard blocks */ -- for(i=0; i<256; i++) -- output[i] = ((i&0xF)<4&&(i<64))?0:-1; -- } -- -- idct_fn_t UUT; -- short input[16]; -- unsigned char output[256]; -- unsigned char predict[256]; --}; -- --TEST_P(IDCTTest, TestGuardBlocks) --{ -- int i; -- -- for(i=0; i<256; i++) -- if((i&0xF) < 4 && i<64) -- EXPECT_EQ(0, output[i]) << i; -- else -- EXPECT_EQ(255, output[i]); --} -- --TEST_P(IDCTTest, TestAllZeros) --{ -- int i; -- -- UUT(input, output, 16, output, 16); -- -- for(i=0; i<256; i++) -- if((i&0xF) < 4 && i<64) -- EXPECT_EQ(0, output[i]) << "i==" << i; -- else -- EXPECT_EQ(255, output[i]) << "i==" << i; --} -- --TEST_P(IDCTTest, TestAllOnes) --{ -- int i; -- -- input[0] = 4; -- UUT(input, output, 16, output, 16); -- -- for(i=0; i<256; i++) -- if((i&0xF) < 4 && i<64) -- EXPECT_EQ(1, output[i]) << "i==" << i; -- else -- EXPECT_EQ(255, output[i]) << "i==" << i; --} -- --TEST_P(IDCTTest, TestAddOne) --{ -- int i; -- -- for(i=0; i<256; i++) -- predict[i] = i; -- -- input[0] = 4; -- UUT(input, predict, 16, output, 16); -- -- for(i=0; i<256; i++) -- if((i&0xF) < 4 && i<64) -- EXPECT_EQ(i+1, output[i]) << "i==" << i; -- else -- EXPECT_EQ(255, output[i]) << "i==" << i; --} -- --TEST_P(IDCTTest, TestWithData) --{ -- int i; -- -- for(i=0; i<16; i++) -- input[i] = i; -- -- UUT(input, output, 16, output, 16); -- -- for(i=0; i<256; i++) -- if((i&0xF) > 3 || i>63) -- EXPECT_EQ(255, output[i]) << "i==" << i; -- else if(i == 0) -- EXPECT_EQ(11, output[i]) << "i==" << i; -- else if(i == 34) -- EXPECT_EQ(1, output[i]) << "i==" << i; -- else if(i == 2 || i == 17 || i == 32) -- EXPECT_EQ(3, output[i]) << "i==" << i; -- else -- EXPECT_EQ(0, output[i]) << "i==" << i; --} --} -diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c -index 3f05efe..41b4f12 100644 ---- a/vp8/common/loopfilter.c -+++ b/vp8/common/loopfilter.c -@@ -196,18 +196,122 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm, - } - } - --void vp8_loop_filter_frame --( -- VP8_COMMON *cm, -- MACROBLOCKD *mbd --) -+ -+void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context, -+ int mb_row, int post_ystride, int post_uvstride, -+ unsigned char *y_ptr, unsigned char *u_ptr, -+ unsigned char *v_ptr) - { -- YV12_BUFFER_CONFIG *post = cm->frame_to_show; -+ int mb_col; -+ int filter_level; - loop_filter_info_n *lfi_n = &cm->lf_info; - loop_filter_info lfi; -- - FRAME_TYPE frame_type = cm->frame_type; - -+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) -+ { -+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED && -+ mode_info_context->mbmi.mode != SPLITMV && -+ mode_info_context->mbmi.mb_skip_coeff); -+ -+ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; -+ const int seg = mode_info_context->mbmi.segment_id; -+ const int ref_frame = mode_info_context->mbmi.ref_frame; -+ -+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; -+ -+ if (filter_level) -+ { -+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; -+ lfi.mblim = lfi_n->mblim[filter_level]; -+ lfi.blim = lfi_n->blim[filter_level]; -+ lfi.lim = lfi_n->lim[filter_level]; -+ lfi.hev_thr = lfi_n->hev_thr[hev_index]; -+ -+ if (mb_col > 0) -+ vp8_loop_filter_mbv -+ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); -+ -+ if (!skip_lf) -+ vp8_loop_filter_bv -+ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); -+ -+ /* don't apply across umv border */ -+ if (mb_row > 0) -+ vp8_loop_filter_mbh -+ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); -+ -+ if (!skip_lf) -+ vp8_loop_filter_bh -+ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); -+ } -+ -+ y_ptr += 16; -+ u_ptr += 8; -+ v_ptr += 8; -+ -+ mode_info_context++; /* step to next MB */ -+ } -+ -+} -+ -+void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context, -+ int mb_row, int post_ystride, int post_uvstride, -+ unsigned char *y_ptr, unsigned char *u_ptr, -+ unsigned char *v_ptr) -+{ -+ int mb_col; -+ int filter_level; -+ loop_filter_info_n *lfi_n = &cm->lf_info; -+ -+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) -+ { -+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED && -+ mode_info_context->mbmi.mode != SPLITMV && -+ mode_info_context->mbmi.mb_skip_coeff); -+ -+ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; -+ const int seg = mode_info_context->mbmi.segment_id; -+ const int ref_frame = mode_info_context->mbmi.ref_frame; -+ -+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; -+ -+ if (filter_level) -+ { -+ if (mb_col > 0) -+ vp8_loop_filter_simple_mbv -+ (y_ptr, post_ystride, lfi_n->mblim[filter_level]); -+ -+ if (!skip_lf) -+ vp8_loop_filter_simple_bv -+ (y_ptr, post_ystride, lfi_n->blim[filter_level]); -+ -+ /* don't apply across umv border */ -+ if (mb_row > 0) -+ vp8_loop_filter_simple_mbh -+ (y_ptr, post_ystride, lfi_n->mblim[filter_level]); -+ -+ if (!skip_lf) -+ vp8_loop_filter_simple_bh -+ (y_ptr, post_ystride, lfi_n->blim[filter_level]); -+ } -+ -+ y_ptr += 16; -+ u_ptr += 8; -+ v_ptr += 8; -+ -+ mode_info_context++; /* step to next MB */ -+ } -+ -+} -+void vp8_loop_filter_frame(VP8_COMMON *cm, -+ MACROBLOCKD *mbd, -+ int frame_type) -+{ -+ YV12_BUFFER_CONFIG *post = cm->frame_to_show; -+ loop_filter_info_n *lfi_n = &cm->lf_info; -+ loop_filter_info lfi; -+ - int mb_row; - int mb_col; - int mb_rows = cm->mb_rows; -diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h -index 0fa8375..b3af2d6 100644 ---- a/vp8/common/loopfilter.h -+++ b/vp8/common/loopfilter.h -@@ -69,6 +69,7 @@ typedef void loop_filter_uvfunction - /* assorted loopfilter functions which get used elsewhere */ - struct VP8Common; - struct macroblockd; -+struct modeinfo; - - void vp8_loop_filter_init(struct VP8Common *cm); - -@@ -76,7 +77,8 @@ void vp8_loop_filter_frame_init(struct VP8Common *cm, - struct macroblockd *mbd, - int default_filt_lvl); - --void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd); -+void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd, -+ int frame_type); - - void vp8_loop_filter_partial_frame(struct VP8Common *cm, - struct macroblockd *mbd, -@@ -89,4 +91,15 @@ void vp8_loop_filter_frame_yonly(struct VP8Common *cm, - void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi, - int sharpness_lvl); - -+void vp8_loop_filter_row_normal(struct VP8Common *cm, -+ struct modeinfo *mode_info_context, -+ int mb_row, int post_ystride, int post_uvstride, -+ unsigned char *y_ptr, unsigned char *u_ptr, -+ unsigned char *v_ptr); -+ -+void vp8_loop_filter_row_simple(struct VP8Common *cm, -+ struct modeinfo *mode_info_context, -+ int mb_row, int post_ystride, int post_uvstride, -+ unsigned char *y_ptr, unsigned char *u_ptr, -+ unsigned char *v_ptr); - #endif -diff --git a/vp8/common/mfqe.c b/vp8/common/mfqe.c -index ca67e91..3dff150 100644 ---- a/vp8/common/mfqe.c -+++ b/vp8/common/mfqe.c -@@ -160,9 +160,9 @@ static void multiframe_quality_enhance_block - vsad = (vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse)); - vsad = (sse + 32)>>6; - #else -- sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, INT_MAX)+128)>>8; -- usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, INT_MAX)+32)>>6; -- vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, INT_MAX)+32)>>6; -+ sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8; -+ usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6; -+ vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6; - #endif - } - else /* if (blksize == 8) */ -@@ -177,16 +177,16 @@ static void multiframe_quality_enhance_block - vsad = (vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse)); - vsad = (sse + 8)>>4; - #else -- sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, INT_MAX)+32)>>6; -- usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, INT_MAX)+8)>>4; -- vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, INT_MAX)+8)>>4; -+ sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6; -+ usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4; -+ vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4; - #endif - } - - actrisk = (actd > act * 5); - -- /* thr = qdiff/8 + log2(act) + log4(qprev) */ -- thr = (qdiff >> 3); -+ /* thr = qdiff/16 + log2(act) + log4(qprev) */ -+ thr = (qdiff >> 4); - while (actd >>= 1) thr++; - while (qprev >>= 2) thr++; - -diff --git a/vp8/common/mips/dspr2/dequantize_dspr2.c b/vp8/common/mips/dspr2/dequantize_dspr2.c -new file mode 100644 -index 0000000..6823325 ---- /dev/null -+++ b/vp8/common/mips/dspr2/dequantize_dspr2.c -@@ -0,0 +1,33 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+ -+#include "vpx_config.h" -+#include "vpx_rtcd.h" -+#include "vpx_mem/vpx_mem.h" -+ -+#if HAVE_DSPR2 -+void vp8_dequant_idct_add_dspr2(short *input, short *dq, -+ unsigned char *dest, int stride) -+{ -+ int i; -+ -+ for (i = 0; i < 16; i++) -+ { -+ input[i] = dq[i] * input[i]; -+ } -+ -+ vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride); -+ -+ vpx_memset(input, 0, 32); -+ -+} -+ -+#endif -diff --git a/vp8/common/mips/dspr2/filter_dspr2.c b/vp8/common/mips/dspr2/filter_dspr2.c -new file mode 100644 -index 0000000..71fdcd7 ---- /dev/null -+++ b/vp8/common/mips/dspr2/filter_dspr2.c -@@ -0,0 +1,2823 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+ -+#include -+#include "vpx_rtcd.h" -+#include "vpx_ports/mem.h" -+ -+#if HAVE_DSPR2 -+#define CROP_WIDTH 256 -+unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH]; -+ -+static const unsigned short sub_pel_filterss[8][3] = -+{ -+ { 0, 0, 0}, -+ { 0, 0x0601, 0x7b0c}, -+ { 0x0201, 0x0b08, 0x6c24}, -+ { 0, 0x0906, 0x5d32}, -+ { 0x0303, 0x1010, 0x4d4d}, -+ { 0, 0x0609, 0x325d}, -+ { 0x0102, 0x080b, 0x246c}, -+ { 0, 0x0106, 0x0c7b}, -+}; -+ -+ -+static const int sub_pel_filters_int[8][3] = -+{ -+ { 0, 0, 0}, -+ { 0x0000fffa, 0x007b000c, 0xffff0000}, -+ { 0x0002fff5, 0x006c0024, 0xfff80001}, -+ { 0x0000fff7, 0x005d0032, 0xfffa0000}, -+ { 0x0003fff0, 0x004d004d, 0xfff00003}, -+ { 0x0000fffa, 0x0032005d, 0xfff70000}, -+ { 0x0001fff8, 0x0024006c, 0xfff50002}, -+ { 0x0000ffff, 0x000c007b, 0xfffa0000}, -+}; -+ -+ -+static const int sub_pel_filters_inv[8][3] = -+{ -+ { 0, 0, 0}, -+ { 0xfffa0000, 0x000c007b, 0x0000ffff}, -+ { 0xfff50002, 0x0024006c, 0x0001fff8}, -+ { 0xfff70000, 0x0032005d, 0x0000fffa}, -+ { 0xfff00003, 0x004d004d, 0x0003fff0}, -+ { 0xfffa0000, 0x005d0032, 0x0000fff7}, -+ { 0xfff80001, 0x006c0024, 0x0002fff5}, -+ { 0xffff0000, 0x007b000c, 0x0000fffa}, -+}; -+ -+ -+static const int sub_pel_filters_int_tap_4[8][2] = -+{ -+ { 0, 0}, -+ { 0xfffa007b, 0x000cffff}, -+ { 0, 0}, -+ { 0xfff7005d, 0x0032fffa}, -+ { 0, 0}, -+ { 0xfffa0032, 0x005dfff7}, -+ { 0, 0}, -+ { 0xffff000c, 0x007bfffa}, -+}; -+ -+ -+static const int sub_pel_filters_inv_tap_4[8][2] = -+{ -+ { 0, 0}, -+ { 0x007bfffa, 0xffff000c}, -+ { 0, 0}, -+ { 0x005dfff7, 0xfffa0032}, -+ { 0, 0}, -+ { 0x0032fffa, 0xfff7005d}, -+ { 0, 0}, -+ { 0x000cffff, 0xfffa007b}, -+}; -+ -+inline void prefetch_load(unsigned char *src) -+{ -+ __asm__ __volatile__ ( -+ "pref 0, 0(%[src]) \n\t" -+ : -+ : [src] "r" (src) -+ ); -+} -+ -+ -+inline void prefetch_store(unsigned char *dst) -+{ -+ __asm__ __volatile__ ( -+ "pref 1, 0(%[dst]) \n\t" -+ : -+ : [dst] "r" (dst) -+ ); -+} -+ -+void dsputil_static_init(void) -+{ -+ int i; -+ -+ for (i = 0; i < 256; i++) ff_cropTbl[i + CROP_WIDTH] = i; -+ -+ for (i = 0; i < CROP_WIDTH; i++) -+ { -+ ff_cropTbl[i] = 0; -+ ff_cropTbl[i + CROP_WIDTH + 256] = 255; -+ } -+} -+ -+void vp8_filter_block2d_first_pass_4 -+( -+ unsigned char *RESTRICT src_ptr, -+ unsigned char *RESTRICT dst_ptr, -+ unsigned int src_pixels_per_line, -+ unsigned int output_height, -+ int xoffset, -+ int pitch -+) -+{ -+ unsigned int i; -+ int Temp1, Temp2, Temp3, Temp4; -+ -+ unsigned int vector4a = 64; -+ int vector1b, vector2b, vector3b; -+ unsigned int tp1, tp2, tn1, tn2; -+ unsigned int p1, p2, p3; -+ unsigned int n1, n2, n3; -+ unsigned char *cm = ff_cropTbl + CROP_WIDTH; -+ -+ vector3b = sub_pel_filters_inv[xoffset][2]; -+ -+ /* if (xoffset == 0) we don't need any filtering */ -+ if (vector3b == 0) -+ { -+ for (i = 0; i < output_height; i++) -+ { -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr + src_pixels_per_line); -+ dst_ptr[0] = src_ptr[0]; -+ dst_ptr[1] = src_ptr[1]; -+ dst_ptr[2] = src_ptr[2]; -+ dst_ptr[3] = src_ptr[3]; -+ -+ /* next row... */ -+ src_ptr += src_pixels_per_line; -+ dst_ptr += 4; -+ } -+ } -+ else -+ { -+ if (vector3b > 65536) -+ { -+ /* 6 tap filter */ -+ -+ vector1b = sub_pel_filters_inv[xoffset][0]; -+ vector2b = sub_pel_filters_inv[xoffset][1]; -+ -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr + src_pixels_per_line); -+ -+ for (i = output_height; i--;) -+ { -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "ulw %[tp1], -2(%[src_ptr]) \n\t" -+ "ulw %[tp2], 2(%[src_ptr]) \n\t" -+ -+ /* even 1. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[p1], %[tp1] \n\t" -+ "preceu.ph.qbl %[p2], %[tp1] \n\t" -+ "preceu.ph.qbr %[p3], %[tp2] \n\t" -+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" -+ -+ /* even 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[p1], %[tp2] \n\t" -+ "balign %[tp2], %[tp1], 3 \n\t" -+ "extp %[Temp1], $ac3, 9 \n\t" -+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" -+ -+ /* odd 1. pixel */ -+ "ulw %[tn2], 3(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[n1], %[tp2] \n\t" -+ "preceu.ph.qbl %[n2], %[tp2] \n\t" -+ "preceu.ph.qbr %[n3], %[tn2] \n\t" -+ "extp %[Temp3], $ac2, 9 \n\t" -+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" -+ -+ /* even 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[n1], %[tn2] \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" -+ "extp %[Temp4], $ac2, 9 \n\t" -+ -+ /* clamp */ -+ "lbux %[tp1], %[Temp1](%[cm]) \n\t" -+ "lbux %[tn1], %[Temp2](%[cm]) \n\t" -+ "lbux %[tp2], %[Temp3](%[cm]) \n\t" -+ "lbux %[n2], %[Temp4](%[cm]) \n\t" -+ -+ /* store bytes */ -+ "sb %[tp1], 0(%[dst_ptr]) \n\t" -+ "sb %[tn1], 1(%[dst_ptr]) \n\t" -+ "sb %[tp2], 2(%[dst_ptr]) \n\t" -+ "sb %[n2], 3(%[dst_ptr]) \n\t" -+ -+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1), -+ [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2), -+ [p3] "=&r" (p3), [n1] "=&r" (n1), [n2] "=&r" (n2), -+ [n3] "=&r" (n3), [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr), -+ [vector3b] "r" (vector3b), [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* Next row... */ -+ src_ptr += src_pixels_per_line; -+ dst_ptr += pitch; -+ } -+ } -+ else -+ { -+ /* 4 tap filter */ -+ -+ vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; -+ vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; -+ -+ for (i = output_height; i--;) -+ { -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "ulw %[tp1], -1(%[src_ptr]) \n\t" -+ "ulw %[tp2], 3(%[src_ptr]) \n\t" -+ -+ /* even 1. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[p1], %[tp1] \n\t" -+ "preceu.ph.qbl %[p2], %[tp1] \n\t" -+ "preceu.ph.qbr %[p3], %[tp2] \n\t" -+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" -+ -+ /* even 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" -+ "extp %[Temp1], $ac3, 9 \n\t" -+ -+ /* odd 1. pixel */ -+ "srl %[tn1], %[tp2], 8 \n\t" -+ "balign %[tp2], %[tp1], 3 \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[n1], %[tp2] \n\t" -+ "preceu.ph.qbl %[n2], %[tp2] \n\t" -+ "preceu.ph.qbr %[n3], %[tn1] \n\t" -+ "extp %[Temp3], $ac2, 9 \n\t" -+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" -+ -+ /* odd 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" -+ "extp %[Temp4], $ac2, 9 \n\t" -+ -+ /* clamp and store results */ -+ "lbux %[tp1], %[Temp1](%[cm]) \n\t" -+ "lbux %[tn1], %[Temp2](%[cm]) \n\t" -+ "lbux %[tp2], %[Temp3](%[cm]) \n\t" -+ "sb %[tp1], 0(%[dst_ptr]) \n\t" -+ "sb %[tn1], 1(%[dst_ptr]) \n\t" -+ "lbux %[n2], %[Temp4](%[cm]) \n\t" -+ "sb %[tp2], 2(%[dst_ptr]) \n\t" -+ "sb %[n2], 3(%[dst_ptr]) \n\t" -+ -+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1), -+ [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), -+ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), -+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr), -+ [src_ptr] "r" (src_ptr) -+ ); -+ /* Next row... */ -+ src_ptr += src_pixels_per_line; -+ dst_ptr += pitch; -+ } -+ } -+ } -+} -+ -+void vp8_filter_block2d_first_pass_8_all -+( -+ unsigned char *RESTRICT src_ptr, -+ unsigned char *RESTRICT dst_ptr, -+ unsigned int src_pixels_per_line, -+ unsigned int output_height, -+ int xoffset, -+ int pitch -+) -+{ -+ unsigned int i; -+ int Temp1, Temp2, Temp3, Temp4; -+ -+ unsigned int vector4a = 64; -+ unsigned int vector1b, vector2b, vector3b; -+ unsigned int tp1, tp2, tn1, tn2; -+ unsigned int p1, p2, p3, p4; -+ unsigned int n1, n2, n3, n4; -+ -+ unsigned char *cm = ff_cropTbl + CROP_WIDTH; -+ -+ /* if (xoffset == 0) we don't need any filtering */ -+ if (xoffset == 0) -+ { -+ for (i = 0; i < output_height; i++) -+ { -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr + src_pixels_per_line); -+ -+ dst_ptr[0] = src_ptr[0]; -+ dst_ptr[1] = src_ptr[1]; -+ dst_ptr[2] = src_ptr[2]; -+ dst_ptr[3] = src_ptr[3]; -+ dst_ptr[4] = src_ptr[4]; -+ dst_ptr[5] = src_ptr[5]; -+ dst_ptr[6] = src_ptr[6]; -+ dst_ptr[7] = src_ptr[7]; -+ -+ /* next row... */ -+ src_ptr += src_pixels_per_line; -+ dst_ptr += 8; -+ } -+ } -+ else -+ { -+ vector3b = sub_pel_filters_inv[xoffset][2]; -+ -+ if (vector3b > 65536) -+ { -+ /* 6 tap filter */ -+ -+ vector1b = sub_pel_filters_inv[xoffset][0]; -+ vector2b = sub_pel_filters_inv[xoffset][1]; -+ -+ for (i = output_height; i--;) -+ { -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr + src_pixels_per_line); -+ -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "ulw %[tp1], -2(%[src_ptr]) \n\t" -+ "ulw %[tp2], 2(%[src_ptr]) \n\t" -+ -+ /* even 1. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[p1], %[tp1] \n\t" -+ "preceu.ph.qbl %[p2], %[tp1] \n\t" -+ "preceu.ph.qbr %[p3], %[tp2] \n\t" -+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" -+ -+ /* even 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[p1], %[tp2] \n\t" -+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" -+ -+ "balign %[tp2], %[tp1], 3 \n\t" -+ "extp %[Temp1], $ac3, 9 \n\t" -+ "ulw %[tn2], 3(%[src_ptr]) \n\t" -+ -+ /* odd 1. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[n1], %[tp2] \n\t" -+ "preceu.ph.qbl %[n2], %[tp2] \n\t" -+ "preceu.ph.qbr %[n3], %[tn2] \n\t" -+ "extp %[Temp3], $ac2, 9 \n\t" -+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" -+ -+ /* odd 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[n1], %[tn2] \n\t" -+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" -+ "ulw %[tp1], 6(%[src_ptr]) \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[p2], %[tp1] \n\t" -+ "extp %[Temp4], $ac2, 9 \n\t" -+ -+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2), -+ [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), -+ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), -+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), -+ [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* clamp and store results */ -+ dst_ptr[0] = cm[Temp1]; -+ dst_ptr[1] = cm[Temp2]; -+ dst_ptr[2] = cm[Temp3]; -+ dst_ptr[3] = cm[Temp4]; -+ -+ /* next 4 pixels */ -+ __asm__ __volatile__ ( -+ /* even 3. pixel */ -+ "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[p1], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[p2], %[vector3b] \n\t" -+ -+ /* even 4. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[p4], %[tp1] \n\t" -+ "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" -+ -+ "ulw %[tn1], 7(%[src_ptr]) \n\t" -+ "extp %[Temp1], $ac3, 9 \n\t" -+ -+ /* odd 3. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[n2], %[tn1] \n\t" -+ "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[n1], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[n2], %[vector3b] \n\t" -+ "extp %[Temp3], $ac2, 9 \n\t" -+ -+ /* odd 4. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[n4], %[tn1] \n\t" -+ "dpa.w.ph $ac2, %[n1], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ "extp %[Temp4], $ac2, 9 \n\t" -+ -+ : [tn1] "=&r" (tn1), [n2] "=&r" (n2), -+ [p4] "=&r" (p4), [n4] "=&r" (n4), -+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) -+ : [tp1] "r" (tp1), [vector1b] "r" (vector1b), [p2] "r" (p2), -+ [vector2b] "r" (vector2b), [n1] "r" (n1), [p1] "r" (p1), -+ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), -+ [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* clamp and store results */ -+ dst_ptr[4] = cm[Temp1]; -+ dst_ptr[5] = cm[Temp2]; -+ dst_ptr[6] = cm[Temp3]; -+ dst_ptr[7] = cm[Temp4]; -+ -+ src_ptr += src_pixels_per_line; -+ dst_ptr += pitch; -+ } -+ } -+ else -+ { -+ /* 4 tap filter */ -+ -+ vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; -+ vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; -+ -+ for (i = output_height; i--;) -+ { -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr + src_pixels_per_line); -+ -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "ulw %[tp1], -1(%[src_ptr]) \n\t" -+ -+ /* even 1. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[p1], %[tp1] \n\t" -+ "preceu.ph.qbl %[p2], %[tp1] \n\t" -+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" -+ -+ "ulw %[tp2], 3(%[src_ptr]) \n\t" -+ -+ /* even 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbr %[p3], %[tp2] \n\t" -+ "preceu.ph.qbl %[p4], %[tp2] \n\t" -+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" -+ "extp %[Temp1], $ac3, 9 \n\t" -+ -+ "balign %[tp2], %[tp1], 3 \n\t" -+ -+ /* odd 1. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[n1], %[tp2] \n\t" -+ "preceu.ph.qbl %[n2], %[tp2] \n\t" -+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" -+ "extp %[Temp3], $ac2, 9 \n\t" -+ -+ "ulw %[tn2], 4(%[src_ptr]) \n\t" -+ -+ /* odd 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbr %[n3], %[tn2] \n\t" -+ "preceu.ph.qbl %[n4], %[tn2] \n\t" -+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" -+ "ulw %[tp1], 7(%[src_ptr]) \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp4], $ac2, 9 \n\t" -+ -+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), -+ [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2), -+ [p3] "=&r" (p3), [p4] "=&r" (p4), [n1] "=&r" (n1), -+ [n2] "=&r" (n2), [n3] "=&r" (n3), [n4] "=&r" (n4), -+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* clamp and store results */ -+ dst_ptr[0] = cm[Temp1]; -+ dst_ptr[1] = cm[Temp2]; -+ dst_ptr[2] = cm[Temp3]; -+ dst_ptr[3] = cm[Temp4]; -+ -+ /* next 4 pixels */ -+ __asm__ __volatile__ ( -+ /* even 3. pixel */ -+ "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" -+ -+ /* even 4. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbr %[p2], %[tp1] \n\t" -+ "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" -+ "extp %[Temp1], $ac3, 9 \n\t" -+ -+ /* odd 3. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[n4], %[vector2b] \n\t" -+ "ulw %[tn1], 8(%[src_ptr]) \n\t" -+ "extp %[Temp3], $ac2, 9 \n\t" -+ -+ /* odd 4. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbr %[n2], %[tn1] \n\t" -+ "dpa.w.ph $ac2, %[n4], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ "extp %[Temp4], $ac2, 9 \n\t" -+ -+ : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2), -+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) -+ : [tp1] "r" (tp1), [p3] "r" (p3), [p4] "r" (p4), -+ [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr), -+ [n3] "r" (n3), [n4] "r" (n4) -+ ); -+ -+ /* clamp and store results */ -+ dst_ptr[4] = cm[Temp1]; -+ dst_ptr[5] = cm[Temp2]; -+ dst_ptr[6] = cm[Temp3]; -+ dst_ptr[7] = cm[Temp4]; -+ -+ /* next row... */ -+ src_ptr += src_pixels_per_line; -+ dst_ptr += pitch; -+ } -+ } -+ } -+} -+ -+ -+void vp8_filter_block2d_first_pass16_6tap -+( -+ unsigned char *RESTRICT src_ptr, -+ unsigned char *RESTRICT dst_ptr, -+ unsigned int src_pixels_per_line, -+ unsigned int output_height, -+ int xoffset, -+ int pitch -+) -+{ -+ unsigned int i; -+ int Temp1, Temp2, Temp3, Temp4; -+ -+ unsigned int vector4a; -+ unsigned int vector1b, vector2b, vector3b; -+ unsigned int tp1, tp2, tn1, tn2; -+ unsigned int p1, p2, p3, p4; -+ unsigned int n1, n2, n3, n4; -+ unsigned char *cm = ff_cropTbl + CROP_WIDTH; -+ -+ vector1b = sub_pel_filters_inv[xoffset][0]; -+ vector2b = sub_pel_filters_inv[xoffset][1]; -+ vector3b = sub_pel_filters_inv[xoffset][2]; -+ vector4a = 64; -+ -+ for (i = output_height; i--;) -+ { -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr + src_pixels_per_line); -+ -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "ulw %[tp1], -2(%[src_ptr]) \n\t" -+ "ulw %[tp2], 2(%[src_ptr]) \n\t" -+ -+ /* even 1. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[p1], %[tp1] \n\t" -+ "preceu.ph.qbl %[p2], %[tp1] \n\t" -+ "preceu.ph.qbr %[p3], %[tp2] \n\t" -+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" -+ -+ /* even 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[p1], %[tp2] \n\t" -+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" -+ -+ "balign %[tp2], %[tp1], 3 \n\t" -+ "ulw %[tn2], 3(%[src_ptr]) \n\t" -+ "extp %[Temp1], $ac3, 9 \n\t" -+ -+ /* odd 1. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[n1], %[tp2] \n\t" -+ "preceu.ph.qbl %[n2], %[tp2] \n\t" -+ "preceu.ph.qbr %[n3], %[tn2] \n\t" -+ "extp %[Temp3], $ac2, 9 \n\t" -+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" -+ -+ /* odd 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[n1], %[tn2] \n\t" -+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" -+ "ulw %[tp1], 6(%[src_ptr]) \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[p2], %[tp1] \n\t" -+ "extp %[Temp4], $ac2, 9 \n\t" -+ -+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2), -+ [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), -+ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), -+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), -+ [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* clamp and store results */ -+ dst_ptr[0] = cm[Temp1]; -+ dst_ptr[1] = cm[Temp2]; -+ dst_ptr[2] = cm[Temp3]; -+ dst_ptr[3] = cm[Temp4]; -+ -+ /* next 4 pixels */ -+ __asm__ __volatile__ ( -+ /* even 3. pixel */ -+ "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[p1], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[p2], %[vector3b] \n\t" -+ -+ /* even 4. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[p4], %[tp1] \n\t" -+ "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" -+ "ulw %[tn1], 7(%[src_ptr]) \n\t" -+ "extp %[Temp1], $ac3, 9 \n\t" -+ -+ /* odd 3. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[n2], %[tn1] \n\t" -+ "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[n1], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[n2], %[vector3b] \n\t" -+ "extp %[Temp3], $ac2, 9 \n\t" -+ -+ /* odd 4. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[n4], %[tn1] \n\t" -+ "dpa.w.ph $ac2, %[n1], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" -+ "ulw %[tp2], 10(%[src_ptr]) \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[p1], %[tp2] \n\t" -+ "extp %[Temp4], $ac2, 9 \n\t" -+ -+ : [tn1] "=&r" (tn1), [tp2] "=&r" (tp2), [n2] "=&r" (n2), -+ [p4] "=&r" (p4), [n4] "=&r" (n4), -+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [tp1] "r" (tp1), [n1] "r" (n1), [p1] "r" (p1), -+ [vector4a] "r" (vector4a), [p2] "r" (p2), [vector3b] "r" (vector3b), -+ [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* clamp and store results */ -+ dst_ptr[4] = cm[Temp1]; -+ dst_ptr[5] = cm[Temp2]; -+ dst_ptr[6] = cm[Temp3]; -+ dst_ptr[7] = cm[Temp4]; -+ -+ /* next 4 pixels */ -+ __asm__ __volatile__ ( -+ /* even 5. pixel */ -+ "dpa.w.ph $ac3, %[p2], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[p1], %[vector3b] \n\t" -+ -+ /* even 6. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[p3], %[tp2] \n\t" -+ "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[p3], %[vector3b] \n\t" -+ -+ "ulw %[tn1], 11(%[src_ptr]) \n\t" -+ "extp %[Temp1], $ac3, 9 \n\t" -+ -+ /* odd 5. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[n1], %[tn1] \n\t" -+ "dpa.w.ph $ac3, %[n2], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[n4], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" -+ "extp %[Temp3], $ac2, 9 \n\t" -+ -+ /* odd 6. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[n3], %[tn1] \n\t" -+ "dpa.w.ph $ac2, %[n4], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[n1], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[n3], %[vector3b] \n\t" -+ "ulw %[tp1], 14(%[src_ptr]) \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[p4], %[tp1] \n\t" -+ "extp %[Temp4], $ac2, 9 \n\t" -+ -+ : [tn1] "=&r" (tn1), [tp1] "=&r" (tp1), -+ [n1] "=&r" (n1), [p3] "=&r" (p3), [n3] "=&r" (n3), -+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [tp2] "r" (tp2), [p2] "r" (p2), [n2] "r" (n2), -+ [p4] "r" (p4), [n4] "r" (n4), [p1] "r" (p1), [src_ptr] "r" (src_ptr), -+ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b) -+ ); -+ -+ /* clamp and store results */ -+ dst_ptr[8] = cm[Temp1]; -+ dst_ptr[9] = cm[Temp2]; -+ dst_ptr[10] = cm[Temp3]; -+ dst_ptr[11] = cm[Temp4]; -+ -+ /* next 4 pixels */ -+ __asm__ __volatile__ ( -+ /* even 7. pixel */ -+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[p3], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[p4], %[vector3b] \n\t" -+ -+ /* even 8. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[p2], %[tp1] \n\t" -+ "dpa.w.ph $ac2, %[p3], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[p4], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[p2], %[vector3b] \n\t" -+ "ulw %[tn1], 15(%[src_ptr]) \n\t" -+ "extp %[Temp1], $ac3, 9 \n\t" -+ -+ /* odd 7. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "preceu.ph.qbr %[n4], %[tn1] \n\t" -+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[n3], %[vector2b] \n\t" -+ "dpa.w.ph $ac3, %[n4], %[vector3b] \n\t" -+ "extp %[Temp3], $ac2, 9 \n\t" -+ -+ /* odd 8. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "preceu.ph.qbl %[n2], %[tn1] \n\t" -+ "dpa.w.ph $ac2, %[n3], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[n4], %[vector2b] \n\t" -+ "dpa.w.ph $ac2, %[n2], %[vector3b] \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ "extp %[Temp4], $ac2, 9 \n\t" -+ -+ /* clamp and store results */ -+ "lbux %[tp1], %[Temp1](%[cm]) \n\t" -+ "lbux %[tn1], %[Temp2](%[cm]) \n\t" -+ "lbux %[p2], %[Temp3](%[cm]) \n\t" -+ "sb %[tp1], 12(%[dst_ptr]) \n\t" -+ "sb %[tn1], 13(%[dst_ptr]) \n\t" -+ "lbux %[n2], %[Temp4](%[cm]) \n\t" -+ "sb %[p2], 14(%[dst_ptr]) \n\t" -+ "sb %[n2], 15(%[dst_ptr]) \n\t" -+ -+ : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2), [n4] "=&r" (n4), -+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [tp1] "r" (tp1), [p4] "r" (p4), [n1] "r" (n1), [p1] "r" (p1), -+ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), [p3] "r" (p3), -+ [n3] "r" (n3), [src_ptr] "r" (src_ptr), -+ [cm] "r" (cm), [dst_ptr] "r" (dst_ptr) -+ ); -+ -+ src_ptr += src_pixels_per_line; -+ dst_ptr += pitch; -+ } -+} -+ -+ -+void vp8_filter_block2d_first_pass16_0 -+( -+ unsigned char *RESTRICT src_ptr, -+ unsigned char *RESTRICT output_ptr, -+ unsigned int src_pixels_per_line -+) -+{ -+ int Temp1, Temp2, Temp3, Temp4; -+ int i; -+ -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_store(output_ptr + 32); -+ -+ /* copy memory from src buffer to dst buffer */ -+ for (i = 0; i < 7; i++) -+ { -+ __asm__ __volatile__ ( -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "ulw %[Temp3], 8(%[src_ptr]) \n\t" -+ "ulw %[Temp4], 12(%[src_ptr]) \n\t" -+ "sw %[Temp1], 0(%[output_ptr]) \n\t" -+ "sw %[Temp2], 4(%[output_ptr]) \n\t" -+ "sw %[Temp3], 8(%[output_ptr]) \n\t" -+ "sw %[Temp4], 12(%[output_ptr]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), -+ [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr) -+ : [src_pixels_per_line] "r" (src_pixels_per_line), -+ [output_ptr] "r" (output_ptr) -+ ); -+ -+ __asm__ __volatile__ ( -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "ulw %[Temp3], 8(%[src_ptr]) \n\t" -+ "ulw %[Temp4], 12(%[src_ptr]) \n\t" -+ "sw %[Temp1], 16(%[output_ptr]) \n\t" -+ "sw %[Temp2], 20(%[output_ptr]) \n\t" -+ "sw %[Temp3], 24(%[output_ptr]) \n\t" -+ "sw %[Temp4], 28(%[output_ptr]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), -+ [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr) -+ : [src_pixels_per_line] "r" (src_pixels_per_line), -+ [output_ptr] "r" (output_ptr) -+ ); -+ -+ __asm__ __volatile__ ( -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "ulw %[Temp3], 8(%[src_ptr]) \n\t" -+ "ulw %[Temp4], 12(%[src_ptr]) \n\t" -+ "sw %[Temp1], 32(%[output_ptr]) \n\t" -+ "sw %[Temp2], 36(%[output_ptr]) \n\t" -+ "sw %[Temp3], 40(%[output_ptr]) \n\t" -+ "sw %[Temp4], 44(%[output_ptr]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), -+ [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr) -+ : [src_pixels_per_line] "r" (src_pixels_per_line), -+ [output_ptr] "r" (output_ptr) -+ ); -+ -+ output_ptr += 48; -+ } -+} -+ -+ -+void vp8_filter_block2d_first_pass16_4tap -+( -+ unsigned char *RESTRICT src_ptr, -+ unsigned char *RESTRICT output_ptr, -+ unsigned int src_pixels_per_line, -+ unsigned int output_width, -+ unsigned int output_height, -+ int xoffset, -+ int yoffset, -+ unsigned char *RESTRICT dst_ptr, -+ int pitch -+) -+{ -+ unsigned int i, j; -+ int Temp1, Temp2, Temp3, Temp4; -+ -+ unsigned int vector4a; -+ int vector1b, vector2b; -+ unsigned int tp1, tp2, tp3, tn1; -+ unsigned int p1, p2, p3; -+ unsigned int n1, n2, n3; -+ unsigned char *cm = ff_cropTbl + CROP_WIDTH; -+ -+ vector4a = 64; -+ -+ vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; -+ vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; -+ -+ /* if (yoffset == 0) don't need temp buffer, data will be stored in dst_ptr */ -+ if (yoffset == 0) -+ { -+ output_height -= 5; -+ src_ptr += (src_pixels_per_line + src_pixels_per_line); -+ -+ for (i = output_height; i--;) -+ { -+ __asm__ __volatile__ ( -+ "ulw %[tp3], -1(%[src_ptr]) \n\t" -+ : [tp3] "=&r" (tp3) -+ : [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* processing 4 adjacent pixels */ -+ for (j = 0; j < 16; j += 4) -+ { -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "ulw %[tp2], 3(%[src_ptr]) \n\t" -+ "move %[tp1], %[tp3] \n\t" -+ -+ /* even 1. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "mthi $0, $ac3 \n\t" -+ "move %[tp3], %[tp2] \n\t" -+ "preceu.ph.qbr %[p1], %[tp1] \n\t" -+ "preceu.ph.qbl %[p2], %[tp1] \n\t" -+ "preceu.ph.qbr %[p3], %[tp2] \n\t" -+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" -+ -+ /* even 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "mthi $0, $ac2 \n\t" -+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" -+ "extr.w %[Temp1], $ac3, 7 \n\t" -+ -+ /* odd 1. pixel */ -+ "ulw %[tn1], 4(%[src_ptr]) \n\t" -+ "balign %[tp2], %[tp1], 3 \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "mthi $0, $ac3 \n\t" -+ "preceu.ph.qbr %[n1], %[tp2] \n\t" -+ "preceu.ph.qbl %[n2], %[tp2] \n\t" -+ "preceu.ph.qbr %[n3], %[tn1] \n\t" -+ "extr.w %[Temp3], $ac2, 7 \n\t" -+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" -+ -+ /* odd 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "mthi $0, $ac2 \n\t" -+ "extr.w %[Temp2], $ac3, 7 \n\t" -+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" -+ "extr.w %[Temp4], $ac2, 7 \n\t" -+ -+ /* clamp and store results */ -+ "lbux %[tp1], %[Temp1](%[cm]) \n\t" -+ "lbux %[tn1], %[Temp2](%[cm]) \n\t" -+ "lbux %[tp2], %[Temp3](%[cm]) \n\t" -+ "sb %[tp1], 0(%[dst_ptr]) \n\t" -+ "sb %[tn1], 1(%[dst_ptr]) \n\t" -+ "lbux %[n2], %[Temp4](%[cm]) \n\t" -+ "sb %[tp2], 2(%[dst_ptr]) \n\t" -+ "sb %[n2], 3(%[dst_ptr]) \n\t" -+ -+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tp3] "=&r" (tp3), -+ [tn1] "=&r" (tn1), [p1] "=&r" (p1), [p2] "=&r" (p2), -+ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), -+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [p3] "=&r" (p3), -+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr), -+ [src_ptr] "r" (src_ptr) -+ ); -+ -+ src_ptr += 4; -+ } -+ -+ /* Next row... */ -+ src_ptr += src_pixels_per_line - 16; -+ dst_ptr += pitch; -+ } -+ } -+ else -+ { -+ for (i = output_height; i--;) -+ { -+ /* processing 4 adjacent pixels */ -+ for (j = 0; j < 16; j += 4) -+ { -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "ulw %[tp1], -1(%[src_ptr]) \n\t" -+ "ulw %[tp2], 3(%[src_ptr]) \n\t" -+ -+ /* even 1. pixel */ -+ "mtlo %[vector4a], $ac3 \n\t" -+ "mthi $0, $ac3 \n\t" -+ "preceu.ph.qbr %[p1], %[tp1] \n\t" -+ "preceu.ph.qbl %[p2], %[tp1] \n\t" -+ "preceu.ph.qbr %[p3], %[tp2] \n\t" -+ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" -+ -+ /* even 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "mthi $0, $ac2 \n\t" -+ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" -+ "extr.w %[Temp1], $ac3, 7 \n\t" -+ -+ /* odd 1. pixel */ -+ "ulw %[tn1], 4(%[src_ptr]) \n\t" -+ "balign %[tp2], %[tp1], 3 \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "mthi $0, $ac3 \n\t" -+ "preceu.ph.qbr %[n1], %[tp2] \n\t" -+ "preceu.ph.qbl %[n2], %[tp2] \n\t" -+ "preceu.ph.qbr %[n3], %[tn1] \n\t" -+ "extr.w %[Temp3], $ac2, 7 \n\t" -+ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" -+ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" -+ -+ /* odd 2. pixel */ -+ "mtlo %[vector4a], $ac2 \n\t" -+ "mthi $0, $ac2 \n\t" -+ "extr.w %[Temp2], $ac3, 7 \n\t" -+ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" -+ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" -+ "extr.w %[Temp4], $ac2, 7 \n\t" -+ -+ /* clamp and store results */ -+ "lbux %[tp1], %[Temp1](%[cm]) \n\t" -+ "lbux %[tn1], %[Temp2](%[cm]) \n\t" -+ "lbux %[tp2], %[Temp3](%[cm]) \n\t" -+ "sb %[tp1], 0(%[output_ptr]) \n\t" -+ "sb %[tn1], 1(%[output_ptr]) \n\t" -+ "lbux %[n2], %[Temp4](%[cm]) \n\t" -+ "sb %[tp2], 2(%[output_ptr]) \n\t" -+ "sb %[n2], 3(%[output_ptr]) \n\t" -+ -+ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1), -+ [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), -+ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), -+ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector4a] "r" (vector4a), [cm] "r" (cm), -+ [output_ptr] "r" (output_ptr), [src_ptr] "r" (src_ptr) -+ ); -+ -+ src_ptr += 4; -+ } -+ -+ /* next row... */ -+ src_ptr += src_pixels_per_line; -+ output_ptr += output_width; -+ } -+ } -+} -+ -+ -+void vp8_filter_block2d_second_pass4 -+( -+ unsigned char *RESTRICT src_ptr, -+ unsigned char *RESTRICT output_ptr, -+ int output_pitch, -+ int yoffset -+) -+{ -+ unsigned int i; -+ -+ int Temp1, Temp2, Temp3, Temp4; -+ unsigned int vector1b, vector2b, vector3b, vector4a; -+ -+ unsigned char src_ptr_l2; -+ unsigned char src_ptr_l1; -+ unsigned char src_ptr_0; -+ unsigned char src_ptr_r1; -+ unsigned char src_ptr_r2; -+ unsigned char src_ptr_r3; -+ -+ unsigned char *cm = ff_cropTbl + CROP_WIDTH; -+ -+ vector4a = 64; -+ -+ /* load filter coefficients */ -+ vector1b = sub_pel_filterss[yoffset][0]; -+ vector2b = sub_pel_filterss[yoffset][2]; -+ vector3b = sub_pel_filterss[yoffset][1]; -+ -+ if (vector1b) -+ { -+ /* 6 tap filter */ -+ -+ for (i = 2; i--;) -+ { -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr); -+ -+ /* do not allow compiler to reorder instructions */ -+ __asm__ __volatile__ ( -+ ".set noreorder \n\t" -+ : -+ : -+ ); -+ -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "lbu %[src_ptr_l2], -8(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 4(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 8(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 12(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac2 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -7(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 5(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 9(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 13(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp1], $ac2, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -6(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 6(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 10(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 14(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac0 \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -5(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 7(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 11(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 15(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac1 \n\t" -+ "extp %[Temp3], $ac0, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp4], $ac1, 9 \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), -+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), -+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), -+ [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), -+ [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* clamp and store results */ -+ output_ptr[0] = cm[Temp1]; -+ output_ptr[1] = cm[Temp2]; -+ output_ptr[2] = cm[Temp3]; -+ output_ptr[3] = cm[Temp4]; -+ -+ output_ptr += output_pitch; -+ -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "lbu %[src_ptr_l2], -4(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], 0(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 12(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 16(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac2 \n\t" -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -3(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], 1(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 13(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 17(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp1], $ac2, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -2(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], 2(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 14(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 18(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac0 \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -1(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], 3(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 15(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 19(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac1 \n\t" -+ "extp %[Temp3], $ac0, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp4], $ac1, 9 \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), -+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), -+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), -+ [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), -+ [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* clamp and store results */ -+ output_ptr[0] = cm[Temp1]; -+ output_ptr[1] = cm[Temp2]; -+ output_ptr[2] = cm[Temp3]; -+ output_ptr[3] = cm[Temp4]; -+ -+ src_ptr += 8; -+ output_ptr += output_pitch; -+ } -+ } -+ else -+ { -+ /* 4 tap filter */ -+ -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr); -+ -+ for (i = 2; i--;) -+ { -+ /* do not allow compiler to reorder instructions */ -+ __asm__ __volatile__ ( -+ ".set noreorder \n\t" -+ : -+ : -+ ); -+ -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 4(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 8(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac2 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 5(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 9(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp1], $ac2, 9 \n\t" -+ -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 6(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 10(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac0 \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 7(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 11(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac1 \n\t" -+ "extp %[Temp3], $ac0, 9 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp4], $ac1, 9 \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), -+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), -+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) -+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), -+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* clamp and store results */ -+ output_ptr[0] = cm[Temp1]; -+ output_ptr[1] = cm[Temp2]; -+ output_ptr[2] = cm[Temp3]; -+ output_ptr[3] = cm[Temp4]; -+ -+ output_ptr += output_pitch; -+ -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "lbu %[src_ptr_l1], 0(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 12(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac2 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l1], 1(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 13(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp1], $ac2, 9 \n\t" -+ -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l1], 2(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 14(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac0 \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l1], 3(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 15(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac1 \n\t" -+ "extp %[Temp3], $ac0, 9 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp4], $ac1, 9 \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), -+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), -+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) -+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), -+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* clamp and store results */ -+ output_ptr[0] = cm[Temp1]; -+ output_ptr[1] = cm[Temp2]; -+ output_ptr[2] = cm[Temp3]; -+ output_ptr[3] = cm[Temp4]; -+ -+ src_ptr += 8; -+ output_ptr += output_pitch; -+ } -+ } -+} -+ -+ -+void vp8_filter_block2d_second_pass_8 -+( -+ unsigned char *RESTRICT src_ptr, -+ unsigned char *RESTRICT output_ptr, -+ int output_pitch, -+ unsigned int output_height, -+ unsigned int output_width, -+ unsigned int yoffset -+) -+{ -+ unsigned int i; -+ -+ int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8; -+ unsigned int vector1b, vector2b, vector3b, vector4a; -+ -+ unsigned char src_ptr_l2; -+ unsigned char src_ptr_l1; -+ unsigned char src_ptr_0; -+ unsigned char src_ptr_r1; -+ unsigned char src_ptr_r2; -+ unsigned char src_ptr_r3; -+ unsigned char *cm = ff_cropTbl + CROP_WIDTH; -+ -+ vector4a = 64; -+ -+ vector1b = sub_pel_filterss[yoffset][0]; -+ vector2b = sub_pel_filterss[yoffset][2]; -+ vector3b = sub_pel_filterss[yoffset][1]; -+ -+ if (vector1b) -+ { -+ /* 6 tap filter */ -+ -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr); -+ -+ for (i = output_height; i--;) -+ { -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "lbu %[src_ptr_l2], -16(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 16(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 24(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac2 \n\t" -+ -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -15(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 17(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 25(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp1], $ac2, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -14(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -6(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 18(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 26(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac0 \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -13(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -5(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 19(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 27(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac1 \n\t" -+ "extp %[Temp3], $ac0, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), -+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), -+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), -+ [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), -+ [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "lbu %[src_ptr_l2], -12(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 12(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 20(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 28(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac2 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp4], $ac1, 9 \n\t" -+ -+ "lbu %[src_ptr_l2], -11(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 13(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 21(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 29(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp5], $ac2, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -10(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 14(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 22(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 30(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac0 \n\t" -+ "extp %[Temp6], $ac3, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -9(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 15(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 23(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 31(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac1 \n\t" -+ "extp %[Temp7], $ac0, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp8], $ac1, 9 \n\t" -+ -+ : [Temp4] "=&r" (Temp4), [Temp5] "=&r" (Temp5), -+ [Temp6] "=&r" (Temp6), [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), -+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), -+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), -+ [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), -+ [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* clamp and store results */ -+ output_ptr[0] = cm[Temp1]; -+ output_ptr[1] = cm[Temp2]; -+ output_ptr[2] = cm[Temp3]; -+ output_ptr[3] = cm[Temp4]; -+ output_ptr[4] = cm[Temp5]; -+ output_ptr[5] = cm[Temp6]; -+ output_ptr[6] = cm[Temp7]; -+ output_ptr[7] = cm[Temp8]; -+ -+ src_ptr += 8; -+ output_ptr += output_pitch; -+ } -+ } -+ else -+ { -+ /* 4 tap filter */ -+ -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr); -+ -+ for (i = output_height; i--;) -+ { -+ __asm__ __volatile__ ( -+ "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 16(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac2 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ : [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), -+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) -+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), -+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) -+ ); -+ -+ __asm__ __volatile__ ( -+ "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 17(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp1], $ac2, 9 \n\t" -+ -+ : [Temp1] "=r" (Temp1), -+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), -+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) -+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), -+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) -+ ); -+ -+ src_ptr_l1 = src_ptr[-6]; -+ src_ptr_0 = src_ptr[2]; -+ src_ptr_r1 = src_ptr[10]; -+ src_ptr_r2 = src_ptr[18]; -+ -+ __asm__ __volatile__ ( -+ "mtlo %[vector4a], $ac0 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ -+ : [Temp2] "=r" (Temp2) -+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), -+ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), -+ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), -+ [vector4a] "r" (vector4a) -+ ); -+ -+ src_ptr_l1 = src_ptr[-5]; -+ src_ptr_0 = src_ptr[3]; -+ src_ptr_r1 = src_ptr[11]; -+ src_ptr_r2 = src_ptr[19]; -+ -+ __asm__ __volatile__ ( -+ "mtlo %[vector4a], $ac1 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp3], $ac0, 9 \n\t" -+ -+ : [Temp3] "=r" (Temp3) -+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), -+ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), -+ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), -+ [vector4a] "r" (vector4a) -+ ); -+ -+ src_ptr_l1 = src_ptr[-4]; -+ src_ptr_0 = src_ptr[4]; -+ src_ptr_r1 = src_ptr[12]; -+ src_ptr_r2 = src_ptr[20]; -+ -+ __asm__ __volatile__ ( -+ "mtlo %[vector4a], $ac2 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp4], $ac1, 9 \n\t" -+ -+ : [Temp4] "=r" (Temp4) -+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), -+ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), -+ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), -+ [vector4a] "r" (vector4a) -+ ); -+ -+ src_ptr_l1 = src_ptr[-3]; -+ src_ptr_0 = src_ptr[5]; -+ src_ptr_r1 = src_ptr[13]; -+ src_ptr_r2 = src_ptr[21]; -+ -+ __asm__ __volatile__ ( -+ "mtlo %[vector4a], $ac3 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp5], $ac2, 9 \n\t" -+ -+ : [Temp5] "=&r" (Temp5) -+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), -+ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), -+ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), -+ [vector4a] "r" (vector4a) -+ ); -+ -+ src_ptr_l1 = src_ptr[-2]; -+ src_ptr_0 = src_ptr[6]; -+ src_ptr_r1 = src_ptr[14]; -+ src_ptr_r2 = src_ptr[22]; -+ -+ __asm__ __volatile__ ( -+ "mtlo %[vector4a], $ac0 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp6], $ac3, 9 \n\t" -+ -+ : [Temp6] "=r" (Temp6) -+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), -+ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), -+ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), -+ [vector4a] "r" (vector4a) -+ ); -+ -+ src_ptr_l1 = src_ptr[-1]; -+ src_ptr_0 = src_ptr[7]; -+ src_ptr_r1 = src_ptr[15]; -+ src_ptr_r2 = src_ptr[23]; -+ -+ __asm__ __volatile__ ( -+ "mtlo %[vector4a], $ac1 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp7], $ac0, 9 \n\t" -+ "extp %[Temp8], $ac1, 9 \n\t" -+ -+ : [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8) -+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), -+ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), -+ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), -+ [vector4a] "r" (vector4a) -+ ); -+ -+ /* clamp and store results */ -+ output_ptr[0] = cm[Temp1]; -+ output_ptr[1] = cm[Temp2]; -+ output_ptr[2] = cm[Temp3]; -+ output_ptr[3] = cm[Temp4]; -+ output_ptr[4] = cm[Temp5]; -+ output_ptr[5] = cm[Temp6]; -+ output_ptr[6] = cm[Temp7]; -+ output_ptr[7] = cm[Temp8]; -+ -+ src_ptr += 8; -+ output_ptr += output_pitch; -+ } -+ } -+} -+ -+ -+void vp8_filter_block2d_second_pass161 -+( -+ unsigned char *RESTRICT src_ptr, -+ unsigned char *RESTRICT output_ptr, -+ int output_pitch, -+ const unsigned short *vp8_filter -+) -+{ -+ unsigned int i, j; -+ -+ int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8; -+ unsigned int vector4a; -+ unsigned int vector1b, vector2b, vector3b; -+ -+ unsigned char src_ptr_l2; -+ unsigned char src_ptr_l1; -+ unsigned char src_ptr_0; -+ unsigned char src_ptr_r1; -+ unsigned char src_ptr_r2; -+ unsigned char src_ptr_r3; -+ unsigned char *cm = ff_cropTbl + CROP_WIDTH; -+ -+ vector4a = 64; -+ -+ vector1b = vp8_filter[0]; -+ vector2b = vp8_filter[2]; -+ vector3b = vp8_filter[1]; -+ -+ if (vector1b == 0) -+ { -+ /* 4 tap filter */ -+ -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr + 16); -+ -+ for (i = 16; i--;) -+ { -+ /* unrolling for loop */ -+ for (j = 0; j < 16; j += 8) -+ { -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "lbu %[src_ptr_l1], -16(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 16(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 32(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac2 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l1], -15(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 17(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 33(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp1], $ac2, 9 \n\t" -+ -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l1], -14(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 18(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 34(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac1 \n\t" -+ "extp %[Temp2], $ac3, 9 \n\t" -+ -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l1], -13(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 19(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 35(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp3], $ac1, 9 \n\t" -+ -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l1], -12(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 20(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 36(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac2 \n\t" -+ "extp %[Temp4], $ac3, 9 \n\t" -+ -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l1], -11(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 21(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 37(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp5], $ac2, 9 \n\t" -+ -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l1], -10(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 22(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 38(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac1 \n\t" -+ "extp %[Temp6], $ac3, 9 \n\t" -+ -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l1], -9(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 23(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 39(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp7], $ac1, 9 \n\t" -+ -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp8], $ac3, 9 \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4), -+ [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6), -+ [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), -+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), -+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) -+ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), -+ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* clamp and store results */ -+ output_ptr[j] = cm[Temp1]; -+ output_ptr[j + 1] = cm[Temp2]; -+ output_ptr[j + 2] = cm[Temp3]; -+ output_ptr[j + 3] = cm[Temp4]; -+ output_ptr[j + 4] = cm[Temp5]; -+ output_ptr[j + 5] = cm[Temp6]; -+ output_ptr[j + 6] = cm[Temp7]; -+ output_ptr[j + 7] = cm[Temp8]; -+ -+ src_ptr += 8; -+ } -+ -+ output_ptr += output_pitch; -+ } -+ } -+ else -+ { -+ /* 4 tap filter */ -+ -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr + 16); -+ -+ /* unroll for loop */ -+ for (i = 16; i--;) -+ { -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "lbu %[src_ptr_l2], -32(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -16(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 16(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 32(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 48(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac2 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -31(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -15(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 17(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 33(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 49(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac0 \n\t" -+ "extp %[Temp1], $ac2, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -30(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -14(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 18(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 34(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 50(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac1 \n\t" -+ "extp %[Temp2], $ac0, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -29(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -13(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 19(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 35(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 51(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp3], $ac1, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -28(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -12(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 20(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 36(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 52(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac2 \n\t" -+ "extp %[Temp4], $ac3, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -27(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -11(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 21(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 37(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 53(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac0 \n\t" -+ "extp %[Temp5], $ac2, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -26(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -10(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 22(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 38(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 54(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac1 \n\t" -+ "extp %[Temp6], $ac0, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -25(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -9(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 23(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 39(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 55(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp7], $ac1, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp8], $ac3, 9 \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4), -+ [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6), -+ [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), -+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), -+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), -+ [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), -+ [src_ptr] "r" (src_ptr) -+ ); -+ -+ /* clamp and store results */ -+ output_ptr[0] = cm[Temp1]; -+ output_ptr[1] = cm[Temp2]; -+ output_ptr[2] = cm[Temp3]; -+ output_ptr[3] = cm[Temp4]; -+ output_ptr[4] = cm[Temp5]; -+ output_ptr[5] = cm[Temp6]; -+ output_ptr[6] = cm[Temp7]; -+ output_ptr[7] = cm[Temp8]; -+ -+ /* apply filter with vectors pairs */ -+ __asm__ __volatile__ ( -+ "lbu %[src_ptr_l2], -24(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 8(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 24(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 40(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 56(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac2 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -23(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 9(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 25(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 41(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 57(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac0 \n\t" -+ "extp %[Temp1], $ac2, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -22(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -6(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 10(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 26(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 42(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 58(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac1 \n\t" -+ "extp %[Temp2], $ac0, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -21(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -5(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 11(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 27(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 43(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 59(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp3], $ac1, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -20(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 12(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 28(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 44(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 60(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac2 \n\t" -+ "extp %[Temp4], $ac3, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -19(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 13(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 29(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 45(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 61(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac0 \n\t" -+ "extp %[Temp5], $ac2, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -18(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 14(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 30(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 46(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 62(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac1 \n\t" -+ "extp %[Temp6], $ac0, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" -+ -+ "lbu %[src_ptr_l2], -17(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_0], 15(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r1], 31(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r2], 47(%[src_ptr]) \n\t" -+ "lbu %[src_ptr_r3], 63(%[src_ptr]) \n\t" -+ "mtlo %[vector4a], $ac3 \n\t" -+ "extp %[Temp7], $ac1, 9 \n\t" -+ -+ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" -+ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" -+ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" -+ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" -+ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" -+ "extp %[Temp8], $ac3, 9 \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), -+ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4), -+ [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6), -+ [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), -+ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), -+ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), -+ [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) -+ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), -+ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), -+ [src_ptr] "r" (src_ptr) -+ ); -+ -+ src_ptr += 16; -+ output_ptr[8] = cm[Temp1]; -+ output_ptr[9] = cm[Temp2]; -+ output_ptr[10] = cm[Temp3]; -+ output_ptr[11] = cm[Temp4]; -+ output_ptr[12] = cm[Temp5]; -+ output_ptr[13] = cm[Temp6]; -+ output_ptr[14] = cm[Temp7]; -+ output_ptr[15] = cm[Temp8]; -+ -+ output_ptr += output_pitch; -+ } -+ } -+} -+ -+ -+void vp8_sixtap_predict4x4_dspr2 -+( -+ unsigned char *RESTRICT src_ptr, -+ int src_pixels_per_line, -+ int xoffset, -+ int yoffset, -+ unsigned char *RESTRICT dst_ptr, -+ int dst_pitch -+) -+{ -+ unsigned char FData[9 * 4]; /* Temp data bufffer used in filtering */ -+ unsigned int pos = 16; -+ -+ /* bit positon for extract from acc */ -+ __asm__ __volatile__ ( -+ "wrdsp %[pos], 1 \n\t" -+ : -+ : [pos] "r" (pos) -+ ); -+ -+ if (yoffset) -+ { -+ /* First filter 1-D horizontally... */ -+ vp8_filter_block2d_first_pass_4(src_ptr - (2 * src_pixels_per_line), FData, -+ src_pixels_per_line, 9, xoffset, 4); -+ /* then filter verticaly... */ -+ vp8_filter_block2d_second_pass4(FData + 8, dst_ptr, dst_pitch, yoffset); -+ } -+ else -+ /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ -+ vp8_filter_block2d_first_pass_4(src_ptr, dst_ptr, src_pixels_per_line, -+ 4, xoffset, dst_pitch); -+} -+ -+ -+void vp8_sixtap_predict8x8_dspr2 -+( -+ unsigned char *RESTRICT src_ptr, -+ int src_pixels_per_line, -+ int xoffset, -+ int yoffset, -+ unsigned char *RESTRICT dst_ptr, -+ int dst_pitch -+) -+{ -+ -+ unsigned char FData[13 * 8]; /* Temp data bufffer used in filtering */ -+ unsigned int pos, Temp1, Temp2; -+ -+ pos = 16; -+ -+ /* bit positon for extract from acc */ -+ __asm__ __volatile__ ( -+ "wrdsp %[pos], 1 \n\t" -+ : -+ : [pos] "r" (pos) -+ ); -+ -+ if (yoffset) -+ { -+ -+ src_ptr = src_ptr - (2 * src_pixels_per_line); -+ -+ if (xoffset) -+ /* filter 1-D horizontally... */ -+ vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line, -+ 13, xoffset, 8); -+ -+ else -+ { -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr + 2 * src_pixels_per_line); -+ -+ __asm__ __volatile__ ( -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 0(%[FData]) \n\t" -+ "sw %[Temp2], 4(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 8(%[FData]) \n\t" -+ "sw %[Temp2], 12(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 16(%[FData]) \n\t" -+ "sw %[Temp2], 20(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 24(%[FData]) \n\t" -+ "sw %[Temp2], 28(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 32(%[FData]) \n\t" -+ "sw %[Temp2], 36(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 40(%[FData]) \n\t" -+ "sw %[Temp2], 44(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 48(%[FData]) \n\t" -+ "sw %[Temp2], 52(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 56(%[FData]) \n\t" -+ "sw %[Temp2], 60(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 64(%[FData]) \n\t" -+ "sw %[Temp2], 68(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 72(%[FData]) \n\t" -+ "sw %[Temp2], 76(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 80(%[FData]) \n\t" -+ "sw %[Temp2], 84(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 88(%[FData]) \n\t" -+ "sw %[Temp2], 92(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 96(%[FData]) \n\t" -+ "sw %[Temp2], 100(%[FData]) \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) -+ : [FData] "r" (FData), [src_ptr] "r" (src_ptr), -+ [src_pixels_per_line] "r" (src_pixels_per_line) -+ ); -+ } -+ -+ /* filter verticaly... */ -+ vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 8, 8, yoffset); -+ } -+ -+ /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ -+ else -+ { -+ if (xoffset) -+ vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line, -+ 8, xoffset, dst_pitch); -+ -+ else -+ { -+ /* copy from src buffer to dst buffer */ -+ __asm__ __volatile__ ( -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 0(%[dst_ptr]) \n\t" -+ "sw %[Temp2], 4(%[dst_ptr]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 8(%[dst_ptr]) \n\t" -+ "sw %[Temp2], 12(%[dst_ptr]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 16(%[dst_ptr]) \n\t" -+ "sw %[Temp2], 20(%[dst_ptr]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 24(%[dst_ptr]) \n\t" -+ "sw %[Temp2], 28(%[dst_ptr]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 32(%[dst_ptr]) \n\t" -+ "sw %[Temp2], 36(%[dst_ptr]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 40(%[dst_ptr]) \n\t" -+ "sw %[Temp2], 44(%[dst_ptr]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 48(%[dst_ptr]) \n\t" -+ "sw %[Temp2], 52(%[dst_ptr]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 56(%[dst_ptr]) \n\t" -+ "sw %[Temp2], 60(%[dst_ptr]) \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) -+ : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr), -+ [src_pixels_per_line] "r" (src_pixels_per_line) -+ ); -+ } -+ } -+} -+ -+ -+void vp8_sixtap_predict8x4_dspr2 -+( -+ unsigned char *RESTRICT src_ptr, -+ int src_pixels_per_line, -+ int xoffset, -+ int yoffset, -+ unsigned char *RESTRICT dst_ptr, -+ int dst_pitch -+) -+{ -+ unsigned char FData[9 * 8]; /* Temp data bufffer used in filtering */ -+ unsigned int pos, Temp1, Temp2; -+ -+ pos = 16; -+ -+ /* bit positon for extract from acc */ -+ __asm__ __volatile__ ( -+ "wrdsp %[pos], 1 \n\t" -+ : -+ : [pos] "r" (pos) -+ ); -+ -+ if (yoffset) -+ { -+ -+ src_ptr = src_ptr - (2 * src_pixels_per_line); -+ -+ if (xoffset) -+ /* filter 1-D horizontally... */ -+ vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line, -+ 9, xoffset, 8); -+ -+ else -+ { -+ /* prefetch src_ptr data to cache memory */ -+ prefetch_load(src_ptr + 2 * src_pixels_per_line); -+ -+ __asm__ __volatile__ ( -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 0(%[FData]) \n\t" -+ "sw %[Temp2], 4(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 8(%[FData]) \n\t" -+ "sw %[Temp2], 12(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 16(%[FData]) \n\t" -+ "sw %[Temp2], 20(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 24(%[FData]) \n\t" -+ "sw %[Temp2], 28(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 32(%[FData]) \n\t" -+ "sw %[Temp2], 36(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 40(%[FData]) \n\t" -+ "sw %[Temp2], 44(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 48(%[FData]) \n\t" -+ "sw %[Temp2], 52(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 56(%[FData]) \n\t" -+ "sw %[Temp2], 60(%[FData]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 64(%[FData]) \n\t" -+ "sw %[Temp2], 68(%[FData]) \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) -+ : [FData] "r" (FData), [src_ptr] "r" (src_ptr), -+ [src_pixels_per_line] "r" (src_pixels_per_line) -+ ); -+ } -+ -+ /* filter verticaly... */ -+ vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 4, 8, yoffset); -+ } -+ -+ /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ -+ else -+ { -+ if (xoffset) -+ vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line, -+ 4, xoffset, dst_pitch); -+ -+ else -+ { -+ /* copy from src buffer to dst buffer */ -+ __asm__ __volatile__ ( -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 0(%[dst_ptr]) \n\t" -+ "sw %[Temp2], 4(%[dst_ptr]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 8(%[dst_ptr]) \n\t" -+ "sw %[Temp2], 12(%[dst_ptr]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 16(%[dst_ptr]) \n\t" -+ "sw %[Temp2], 20(%[dst_ptr]) \n\t" -+ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" -+ -+ "ulw %[Temp1], 0(%[src_ptr]) \n\t" -+ "ulw %[Temp2], 4(%[src_ptr]) \n\t" -+ "sw %[Temp1], 24(%[dst_ptr]) \n\t" -+ "sw %[Temp2], 28(%[dst_ptr]) \n\t" -+ -+ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) -+ : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr), -+ [src_pixels_per_line] "r" (src_pixels_per_line) -+ ); -+ } -+ } -+} -+ -+ -+void vp8_sixtap_predict16x16_dspr2 -+( -+ unsigned char *RESTRICT src_ptr, -+ int src_pixels_per_line, -+ int xoffset, -+ int yoffset, -+ unsigned char *RESTRICT dst_ptr, -+ int dst_pitch -+) -+{ -+ const unsigned short *VFilter; -+ unsigned char FData[21 * 16]; /* Temp data bufffer used in filtering */ -+ unsigned int pos; -+ -+ VFilter = sub_pel_filterss[yoffset]; -+ -+ pos = 16; -+ -+ /* bit positon for extract from acc */ -+ __asm__ __volatile__ ( -+ "wrdsp %[pos], 1 \n\t" -+ : -+ : [pos] "r" (pos) -+ ); -+ -+ if (yoffset) -+ { -+ -+ src_ptr = src_ptr - (2 * src_pixels_per_line); -+ -+ switch (xoffset) -+ { -+ /* filter 1-D horizontally... */ -+ case 2: -+ case 4: -+ case 6: -+ /* 6 tap filter */ -+ vp8_filter_block2d_first_pass16_6tap(src_ptr, FData, src_pixels_per_line, -+ 21, xoffset, 16); -+ break; -+ -+ case 0: -+ /* only copy buffer */ -+ vp8_filter_block2d_first_pass16_0(src_ptr, FData, src_pixels_per_line); -+ break; -+ -+ case 1: -+ case 3: -+ case 5: -+ case 7: -+ /* 4 tap filter */ -+ vp8_filter_block2d_first_pass16_4tap(src_ptr, FData, src_pixels_per_line, 16, -+ 21, xoffset, yoffset, dst_ptr, dst_pitch); -+ break; -+ } -+ -+ /* filter verticaly... */ -+ vp8_filter_block2d_second_pass161(FData + 32, dst_ptr, dst_pitch, VFilter); -+ } -+ else -+ { -+ /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ -+ switch (xoffset) -+ { -+ case 2: -+ case 4: -+ case 6: -+ /* 6 tap filter */ -+ vp8_filter_block2d_first_pass16_6tap(src_ptr, dst_ptr, src_pixels_per_line, -+ 16, xoffset, dst_pitch); -+ break; -+ -+ case 1: -+ case 3: -+ case 5: -+ case 7: -+ /* 4 tap filter */ -+ vp8_filter_block2d_first_pass16_4tap(src_ptr, dst_ptr, src_pixels_per_line, 16, -+ 21, xoffset, yoffset, dst_ptr, dst_pitch); -+ break; -+ } -+ } -+} -+ -+#endif -diff --git a/vp8/common/mips/dspr2/idct_blk_dspr2.c b/vp8/common/mips/dspr2/idct_blk_dspr2.c -new file mode 100644 -index 0000000..1e0ebd1 ---- /dev/null -+++ b/vp8/common/mips/dspr2/idct_blk_dspr2.c -@@ -0,0 +1,88 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+#include "vpx_config.h" -+#include "vpx_rtcd.h" -+ -+#if HAVE_DSPR2 -+ -+void vp8_dequant_idct_add_y_block_dspr2 -+(short *q, short *dq, -+ unsigned char *dst, int stride, char *eobs) -+{ -+ int i, j; -+ -+ for (i = 0; i < 4; i++) -+ { -+ for (j = 0; j < 4; j++) -+ { -+ if (*eobs++ > 1) -+ vp8_dequant_idct_add_dspr2(q, dq, dst, stride); -+ else -+ { -+ vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dst, stride, dst, stride); -+ ((int *)q)[0] = 0; -+ } -+ -+ q += 16; -+ dst += 4; -+ } -+ -+ dst += 4 * stride - 16; -+ } -+} -+ -+void vp8_dequant_idct_add_uv_block_dspr2 -+(short *q, short *dq, -+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) -+{ -+ int i, j; -+ -+ for (i = 0; i < 2; i++) -+ { -+ for (j = 0; j < 2; j++) -+ { -+ if (*eobs++ > 1) -+ vp8_dequant_idct_add_dspr2(q, dq, dstu, stride); -+ else -+ { -+ vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstu, stride, dstu, stride); -+ ((int *)q)[0] = 0; -+ } -+ -+ q += 16; -+ dstu += 4; -+ } -+ -+ dstu += 4 * stride - 8; -+ } -+ -+ for (i = 0; i < 2; i++) -+ { -+ for (j = 0; j < 2; j++) -+ { -+ if (*eobs++ > 1) -+ vp8_dequant_idct_add_dspr2(q, dq, dstv, stride); -+ else -+ { -+ vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstv, stride, dstv, stride); -+ ((int *)q)[0] = 0; -+ } -+ -+ q += 16; -+ dstv += 4; -+ } -+ -+ dstv += 4 * stride - 8; -+ } -+} -+ -+#endif -+ -diff --git a/vp8/common/mips/dspr2/idctllm_dspr2.c b/vp8/common/mips/dspr2/idctllm_dspr2.c -new file mode 100644 -index 0000000..25b7936 ---- /dev/null -+++ b/vp8/common/mips/dspr2/idctllm_dspr2.c -@@ -0,0 +1,369 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+#include "vpx_rtcd.h" -+ -+#if HAVE_DSPR2 -+#define CROP_WIDTH 256 -+ -+/****************************************************************************** -+ * Notes: -+ * -+ * This implementation makes use of 16 bit fixed point version of two multiply -+ * constants: -+ * 1. sqrt(2) * cos (pi/8) -+ * 2. sqrt(2) * sin (pi/8) -+ * Since the first constant is bigger than 1, to maintain the same 16 bit -+ * fixed point precision as the second one, we use a trick of -+ * x * a = x + x*(a-1) -+ * so -+ * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). -+ ****************************************************************************/ -+extern unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH]; -+static const int cospi8sqrt2minus1 = 20091; -+static const int sinpi8sqrt2 = 35468; -+ -+inline void prefetch_load_short(short *src) -+{ -+ __asm__ __volatile__ ( -+ "pref 0, 0(%[src]) \n\t" -+ : -+ : [src] "r" (src) -+ ); -+} -+ -+void vp8_short_idct4x4llm_dspr2(short *input, unsigned char *pred_ptr, -+ int pred_stride, unsigned char *dst_ptr, -+ int dst_stride) -+{ -+ int r, c; -+ int a1, b1, c1, d1; -+ short output[16]; -+ short *ip = input; -+ short *op = output; -+ int temp1, temp2; -+ int shortpitch = 4; -+ -+ int c2, d2; -+ int temp3, temp4; -+ unsigned char *cm = ff_cropTbl + CROP_WIDTH; -+ -+ /* prepare data for load */ -+ prefetch_load_short(ip + 8); -+ -+ /* first loop is unrolled */ -+ a1 = ip[0] + ip[8]; -+ b1 = ip[0] - ip[8]; -+ -+ temp1 = (ip[4] * sinpi8sqrt2) >> 16; -+ temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); -+ c1 = temp1 - temp2; -+ -+ temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); -+ temp2 = (ip[12] * sinpi8sqrt2) >> 16; -+ d1 = temp1 + temp2; -+ -+ temp3 = (ip[5] * sinpi8sqrt2) >> 16; -+ temp4 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16); -+ c2 = temp3 - temp4; -+ -+ temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16); -+ temp4 = (ip[13] * sinpi8sqrt2) >> 16; -+ d2 = temp3 + temp4; -+ -+ op[0] = a1 + d1; -+ op[12] = a1 - d1; -+ op[4] = b1 + c1; -+ op[8] = b1 - c1; -+ -+ a1 = ip[1] + ip[9]; -+ b1 = ip[1] - ip[9]; -+ -+ op[1] = a1 + d2; -+ op[13] = a1 - d2; -+ op[5] = b1 + c2; -+ op[9] = b1 - c2; -+ -+ a1 = ip[2] + ip[10]; -+ b1 = ip[2] - ip[10]; -+ -+ temp1 = (ip[6] * sinpi8sqrt2) >> 16; -+ temp2 = ip[14] + ((ip[14] * cospi8sqrt2minus1) >> 16); -+ c1 = temp1 - temp2; -+ -+ temp1 = ip[6] + ((ip[6] * cospi8sqrt2minus1) >> 16); -+ temp2 = (ip[14] * sinpi8sqrt2) >> 16; -+ d1 = temp1 + temp2; -+ -+ temp3 = (ip[7] * sinpi8sqrt2) >> 16; -+ temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16); -+ c2 = temp3 - temp4; -+ -+ temp3 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16); -+ temp4 = (ip[15] * sinpi8sqrt2) >> 16; -+ d2 = temp3 + temp4; -+ -+ op[2] = a1 + d1; -+ op[14] = a1 - d1; -+ op[6] = b1 + c1; -+ op[10] = b1 - c1; -+ -+ a1 = ip[3] + ip[11]; -+ b1 = ip[3] - ip[11]; -+ -+ op[3] = a1 + d2; -+ op[15] = a1 - d2; -+ op[7] = b1 + c2; -+ op[11] = b1 - c2; -+ -+ ip = output; -+ -+ /* prepare data for load */ -+ prefetch_load_short(ip + shortpitch); -+ -+ /* second loop is unrolled */ -+ a1 = ip[0] + ip[2]; -+ b1 = ip[0] - ip[2]; -+ -+ temp1 = (ip[1] * sinpi8sqrt2) >> 16; -+ temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); -+ c1 = temp1 - temp2; -+ -+ temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); -+ temp2 = (ip[3] * sinpi8sqrt2) >> 16; -+ d1 = temp1 + temp2; -+ -+ temp3 = (ip[5] * sinpi8sqrt2) >> 16; -+ temp4 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16); -+ c2 = temp3 - temp4; -+ -+ temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16); -+ temp4 = (ip[7] * sinpi8sqrt2) >> 16; -+ d2 = temp3 + temp4; -+ -+ op[0] = (a1 + d1 + 4) >> 3; -+ op[3] = (a1 - d1 + 4) >> 3; -+ op[1] = (b1 + c1 + 4) >> 3; -+ op[2] = (b1 - c1 + 4) >> 3; -+ -+ a1 = ip[4] + ip[6]; -+ b1 = ip[4] - ip[6]; -+ -+ op[4] = (a1 + d2 + 4) >> 3; -+ op[7] = (a1 - d2 + 4) >> 3; -+ op[5] = (b1 + c2 + 4) >> 3; -+ op[6] = (b1 - c2 + 4) >> 3; -+ -+ a1 = ip[8] + ip[10]; -+ b1 = ip[8] - ip[10]; -+ -+ temp1 = (ip[9] * sinpi8sqrt2) >> 16; -+ temp2 = ip[11] + ((ip[11] * cospi8sqrt2minus1) >> 16); -+ c1 = temp1 - temp2; -+ -+ temp1 = ip[9] + ((ip[9] * cospi8sqrt2minus1) >> 16); -+ temp2 = (ip[11] * sinpi8sqrt2) >> 16; -+ d1 = temp1 + temp2; -+ -+ temp3 = (ip[13] * sinpi8sqrt2) >> 16; -+ temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16); -+ c2 = temp3 - temp4; -+ -+ temp3 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16); -+ temp4 = (ip[15] * sinpi8sqrt2) >> 16; -+ d2 = temp3 + temp4; -+ -+ op[8] = (a1 + d1 + 4) >> 3; -+ op[11] = (a1 - d1 + 4) >> 3; -+ op[9] = (b1 + c1 + 4) >> 3; -+ op[10] = (b1 - c1 + 4) >> 3; -+ -+ a1 = ip[12] + ip[14]; -+ b1 = ip[12] - ip[14]; -+ -+ op[12] = (a1 + d2 + 4) >> 3; -+ op[15] = (a1 - d2 + 4) >> 3; -+ op[13] = (b1 + c2 + 4) >> 3; -+ op[14] = (b1 - c2 + 4) >> 3; -+ -+ ip = output; -+ -+ for (r = 0; r < 4; r++) -+ { -+ for (c = 0; c < 4; c++) -+ { -+ short a = ip[c] + pred_ptr[c] ; -+ dst_ptr[c] = cm[a] ; -+ } -+ -+ ip += 4; -+ dst_ptr += dst_stride; -+ pred_ptr += pred_stride; -+ } -+} -+ -+void vp8_dc_only_idct_add_dspr2(short input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride) -+{ -+ int a1; -+ int i, absa1; -+ int t2, vector_a1, vector_a; -+ -+ /* a1 = ((input_dc + 4) >> 3); */ -+ __asm__ __volatile__ ( -+ "addi %[a1], %[input_dc], 4 \n\t" -+ "sra %[a1], %[a1], 3 \n\t" -+ : [a1] "=r" (a1) -+ : [input_dc] "r" (input_dc) -+ ); -+ -+ if (a1 < 0) -+ { -+ /* use quad-byte -+ * input and output memory are four byte aligned -+ */ -+ __asm__ __volatile__ ( -+ "abs %[absa1], %[a1] \n\t" -+ "replv.qb %[vector_a1], %[absa1] \n\t" -+ : [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1) -+ : [a1] "r" (a1) -+ ); -+ -+ /* use (a1 - predptr[c]) instead a1 + predptr[c] */ -+ for (i = 4; i--;) -+ { -+ __asm__ __volatile__ ( -+ "lw %[t2], 0(%[pred_ptr]) \n\t" -+ "add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t" -+ "subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t" -+ "sw %[vector_a], 0(%[dst_ptr]) \n\t" -+ "add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" -+ : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), -+ [dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr) -+ : [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1) -+ ); -+ } -+ } -+ else -+ { -+ /* use quad-byte -+ * input and output memory are four byte aligned -+ */ -+ __asm__ __volatile__ ( -+ "replv.qb %[vector_a1], %[a1] \n\t" -+ : [vector_a1] "=r" (vector_a1) -+ : [a1] "r" (a1) -+ ); -+ -+ for (i = 4; i--;) -+ { -+ __asm__ __volatile__ ( -+ "lw %[t2], 0(%[pred_ptr]) \n\t" -+ "add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t" -+ "addu_s.qb %[vector_a], %[vector_a1], %[t2] \n\t" -+ "sw %[vector_a], 0(%[dst_ptr]) \n\t" -+ "add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" -+ : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), -+ [dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr) -+ : [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1) -+ ); -+ } -+ } -+ -+} -+ -+void vp8_short_inv_walsh4x4_dspr2(short *input, short *mb_dqcoeff) -+{ -+ short output[16]; -+ int i; -+ int a1, b1, c1, d1; -+ int a2, b2, c2, d2; -+ short *ip = input; -+ short *op = output; -+ -+ prefetch_load_short(ip); -+ -+ for (i = 4; i--;) -+ { -+ a1 = ip[0] + ip[12]; -+ b1 = ip[4] + ip[8]; -+ c1 = ip[4] - ip[8]; -+ d1 = ip[0] - ip[12]; -+ -+ op[0] = a1 + b1; -+ op[4] = c1 + d1; -+ op[8] = a1 - b1; -+ op[12] = d1 - c1; -+ -+ ip++; -+ op++; -+ } -+ -+ ip = output; -+ op = output; -+ -+ prefetch_load_short(ip); -+ -+ for (i = 4; i--;) -+ { -+ a1 = ip[0] + ip[3] + 3; -+ b1 = ip[1] + ip[2]; -+ c1 = ip[1] - ip[2]; -+ d1 = ip[0] - ip[3] + 3; -+ -+ a2 = a1 + b1; -+ b2 = d1 + c1; -+ c2 = a1 - b1; -+ d2 = d1 - c1; -+ -+ op[0] = a2 >> 3; -+ op[1] = b2 >> 3; -+ op[2] = c2 >> 3; -+ op[3] = d2 >> 3; -+ -+ ip += 4; -+ op += 4; -+ } -+ -+ for (i = 0; i < 16; i++) -+ { -+ mb_dqcoeff[i * 16] = output[i]; -+ } -+} -+ -+void vp8_short_inv_walsh4x4_1_dspr2(short *input, short *mb_dqcoeff) -+{ -+ int a1; -+ -+ a1 = ((input[0] + 3) >> 3); -+ -+ __asm__ __volatile__ ( -+ "sh %[a1], 0(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 32(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 64(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 96(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 128(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 160(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 192(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 224(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 256(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 288(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 320(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 352(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 384(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 416(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 448(%[mb_dqcoeff]) \n\t" -+ "sh %[a1], 480(%[mb_dqcoeff]) \n\t" -+ -+ : -+ : [a1] "r" (a1), [mb_dqcoeff] "r" (mb_dqcoeff) -+ ); -+} -+ -+#endif -diff --git a/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c -new file mode 100644 -index 0000000..b8e5e4d ---- /dev/null -+++ b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c -@@ -0,0 +1,2622 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+ -+#include -+#include "vpx_rtcd.h" -+#include "vp8/common/onyxc_int.h" -+ -+#if HAVE_DSPR2 -+typedef unsigned char uc; -+ -+/* prefetch data for load */ -+inline void prefetch_load_lf(unsigned char *src) -+{ -+ __asm__ __volatile__ ( -+ "pref 0, 0(%[src]) \n\t" -+ : -+ : [src] "r" (src) -+ ); -+} -+ -+ -+/* prefetch data for store */ -+inline void prefetch_store_lf(unsigned char *dst) -+{ -+ __asm__ __volatile__ ( -+ "pref 1, 0(%[dst]) \n\t" -+ : -+ : [dst] "r" (dst) -+ ); -+} -+ -+/* processing 4 pixels at the same time -+ * compute hev and mask in the same function -+ */ -+static __inline void vp8_filter_mask_vec_mips -+( -+ uint32_t limit, -+ uint32_t flimit, -+ uint32_t p1, -+ uint32_t p0, -+ uint32_t p3, -+ uint32_t p2, -+ uint32_t q0, -+ uint32_t q1, -+ uint32_t q2, -+ uint32_t q3, -+ uint32_t thresh, -+ uint32_t *hev, -+ uint32_t *mask -+) -+{ -+ uint32_t c, r, r3, r_k; -+ uint32_t s1, s2, s3; -+ uint32_t ones = 0xFFFFFFFF; -+ uint32_t hev1; -+ -+ __asm__ __volatile__ ( -+ /* mask |= (abs(p3 - p2) > limit) */ -+ "subu_s.qb %[c], %[p3], %[p2] \n\t" -+ "subu_s.qb %[r_k], %[p2], %[p3] \n\t" -+ "or %[r_k], %[r_k], %[c] \n\t" -+ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" -+ "or %[r], $0, %[c] \n\t" -+ -+ /* mask |= (abs(p2 - p1) > limit) */ -+ "subu_s.qb %[c], %[p2], %[p1] \n\t" -+ "subu_s.qb %[r_k], %[p1], %[p2] \n\t" -+ "or %[r_k], %[r_k], %[c] \n\t" -+ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" -+ "or %[r], %[r], %[c] \n\t" -+ -+ /* mask |= (abs(p1 - p0) > limit) -+ * hev |= (abs(p1 - p0) > thresh) -+ */ -+ "subu_s.qb %[c], %[p1], %[p0] \n\t" -+ "subu_s.qb %[r_k], %[p0], %[p1] \n\t" -+ "or %[r_k], %[r_k], %[c] \n\t" -+ "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" -+ "or %[r3], $0, %[c] \n\t" -+ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" -+ "or %[r], %[r], %[c] \n\t" -+ -+ /* mask |= (abs(q1 - q0) > limit) -+ * hev |= (abs(q1 - q0) > thresh) -+ */ -+ "subu_s.qb %[c], %[q1], %[q0] \n\t" -+ "subu_s.qb %[r_k], %[q0], %[q1] \n\t" -+ "or %[r_k], %[r_k], %[c] \n\t" -+ "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" -+ "or %[r3], %[r3], %[c] \n\t" -+ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" -+ "or %[r], %[r], %[c] \n\t" -+ -+ /* mask |= (abs(q2 - q1) > limit) */ -+ "subu_s.qb %[c], %[q2], %[q1] \n\t" -+ "subu_s.qb %[r_k], %[q1], %[q2] \n\t" -+ "or %[r_k], %[r_k], %[c] \n\t" -+ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" -+ "or %[r], %[r], %[c] \n\t" -+ "sll %[r3], %[r3], 24 \n\t" -+ -+ /* mask |= (abs(q3 - q2) > limit) */ -+ "subu_s.qb %[c], %[q3], %[q2] \n\t" -+ "subu_s.qb %[r_k], %[q2], %[q3] \n\t" -+ "or %[r_k], %[r_k], %[c] \n\t" -+ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" -+ "or %[r], %[r], %[c] \n\t" -+ -+ : [c] "=&r" (c), [r_k] "=&r" (r_k), -+ [r] "=&r" (r), [r3] "=&r" (r3) -+ : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2), -+ [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), -+ [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh) -+ ); -+ -+ __asm__ __volatile__ ( -+ /* abs(p0 - q0) */ -+ "subu_s.qb %[c], %[p0], %[q0] \n\t" -+ "subu_s.qb %[r_k], %[q0], %[p0] \n\t" -+ "wrdsp %[r3] \n\t" -+ "or %[s1], %[r_k], %[c] \n\t" -+ -+ /* abs(p1 - q1) */ -+ "subu_s.qb %[c], %[p1], %[q1] \n\t" -+ "addu_s.qb %[s3], %[s1], %[s1] \n\t" -+ "pick.qb %[hev1], %[ones], $0 \n\t" -+ "subu_s.qb %[r_k], %[q1], %[p1] \n\t" -+ "or %[s2], %[r_k], %[c] \n\t" -+ -+ /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ -+ "shrl.qb %[s2], %[s2], 1 \n\t" -+ "addu_s.qb %[s1], %[s2], %[s3] \n\t" -+ "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" -+ "or %[r], %[r], %[c] \n\t" -+ "sll %[r], %[r], 24 \n\t" -+ -+ "wrdsp %[r] \n\t" -+ "pick.qb %[s2], $0, %[ones] \n\t" -+ -+ : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1), -+ [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3) -+ : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), -+ [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit) -+ ); -+ -+ *hev = hev1; -+ *mask = s2; -+} -+ -+ -+/* inputs & outputs are quad-byte vectors */ -+static __inline void vp8_filter_mips -+( -+ uint32_t mask, -+ uint32_t hev, -+ uint32_t *ps1, -+ uint32_t *ps0, -+ uint32_t *qs0, -+ uint32_t *qs1 -+) -+{ -+ int32_t vp8_filter_l, vp8_filter_r; -+ int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; -+ int32_t subr_r, subr_l; -+ uint32_t t1, t2, HWM, t3; -+ uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r; -+ -+ int32_t vps1, vps0, vqs0, vqs1; -+ int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r; -+ uint32_t N128; -+ -+ N128 = 0x80808080; -+ t1 = 0x03000300; -+ t2 = 0x04000400; -+ t3 = 0x01000100; -+ HWM = 0xFF00FF00; -+ -+ vps0 = (*ps0) ^ N128; -+ vps1 = (*ps1) ^ N128; -+ vqs0 = (*qs0) ^ N128; -+ vqs1 = (*qs1) ^ N128; -+ -+ /* use halfword pairs instead quad-bytes because of accuracy */ -+ vps0_l = vps0 & HWM; -+ vps0_r = vps0 << 8; -+ vps0_r = vps0_r & HWM; -+ -+ vps1_l = vps1 & HWM; -+ vps1_r = vps1 << 8; -+ vps1_r = vps1_r & HWM; -+ -+ vqs0_l = vqs0 & HWM; -+ vqs0_r = vqs0 << 8; -+ vqs0_r = vqs0_r & HWM; -+ -+ vqs1_l = vqs1 & HWM; -+ vqs1_r = vqs1 << 8; -+ vqs1_r = vqs1_r & HWM; -+ -+ mask_l = mask & HWM; -+ mask_r = mask << 8; -+ mask_r = mask_r & HWM; -+ -+ hev_l = hev & HWM; -+ hev_r = hev << 8; -+ hev_r = hev_r & HWM; -+ -+ __asm__ __volatile__ ( -+ /* vp8_filter = vp8_signed_char_clamp(ps1 - qs1); */ -+ "subq_s.ph %[vp8_filter_l], %[vps1_l], %[vqs1_l] \n\t" -+ "subq_s.ph %[vp8_filter_r], %[vps1_r], %[vqs1_r] \n\t" -+ -+ /* qs0 - ps0 */ -+ "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" -+ "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" -+ -+ /* vp8_filter &= hev; */ -+ "and %[vp8_filter_l], %[vp8_filter_l], %[hev_l] \n\t" -+ "and %[vp8_filter_r], %[vp8_filter_r], %[hev_r] \n\t" -+ -+ /* vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); */ -+ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" -+ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" -+ "xor %[invhev_l], %[hev_l], %[HWM] \n\t" -+ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" -+ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" -+ "xor %[invhev_r], %[hev_r], %[HWM] \n\t" -+ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" -+ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" -+ -+ /* vp8_filter &= mask; */ -+ "and %[vp8_filter_l], %[vp8_filter_l], %[mask_l] \n\t" -+ "and %[vp8_filter_r], %[vp8_filter_r], %[mask_r] \n\t" -+ -+ : [vp8_filter_l] "=&r" (vp8_filter_l), [vp8_filter_r] "=&r" (vp8_filter_r), -+ [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r), -+ [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r) -+ -+ : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l), -+ [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r), -+ [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r), -+ [mask_l] "r" (mask_l), [mask_r] "r" (mask_r), -+ [hev_l] "r" (hev_l), [hev_r] "r" (hev_r), -+ [HWM] "r" (HWM) -+ ); -+ -+ /* save bottom 3 bits so that we round one side +4 and the other +3 */ -+ __asm__ __volatile__ ( -+ /* Filter2 = vp8_signed_char_clamp(vp8_filter + 3) >>= 3; */ -+ "addq_s.ph %[Filter1_l], %[vp8_filter_l], %[t2] \n\t" -+ "addq_s.ph %[Filter1_r], %[vp8_filter_r], %[t2] \n\t" -+ -+ /* Filter1 = vp8_signed_char_clamp(vp8_filter + 4) >>= 3; */ -+ "addq_s.ph %[Filter2_l], %[vp8_filter_l], %[t1] \n\t" -+ "addq_s.ph %[Filter2_r], %[vp8_filter_r], %[t1] \n\t" -+ "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" -+ "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" -+ -+ "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" -+ "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" -+ -+ "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" -+ "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" -+ -+ /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */ -+ "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" -+ "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" -+ -+ /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */ -+ "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" -+ "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" -+ -+ : [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r), -+ [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r), -+ [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), -+ [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) -+ -+ : [t1] "r" (t1), [t2] "r" (t2), -+ [vp8_filter_l] "r" (vp8_filter_l), [vp8_filter_r] "r" (vp8_filter_r), -+ [HWM] "r" (HWM) -+ ); -+ -+ __asm__ __volatile__ ( -+ /* (vp8_filter += 1) >>= 1 */ -+ "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" -+ "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" -+ -+ /* vp8_filter &= ~hev; */ -+ "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" -+ "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" -+ -+ /* vps1 = vp8_signed_char_clamp(ps1 + vp8_filter); */ -+ "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" -+ "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" -+ -+ /* vqs1 = vp8_signed_char_clamp(qs1 - vp8_filter); */ -+ "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" -+ "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" -+ -+ : [Filter1_l] "+r" (Filter1_l), [Filter1_r] "+r" (Filter1_r), -+ [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), -+ [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r) -+ -+ : [t3] "r" (t3), [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r) -+ ); -+ -+ /* Create quad-bytes from halfword pairs */ -+ vqs0_l = vqs0_l & HWM; -+ vqs1_l = vqs1_l & HWM; -+ vps0_l = vps0_l & HWM; -+ vps1_l = vps1_l & HWM; -+ -+ __asm__ __volatile__ ( -+ "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" -+ "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" -+ "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" -+ "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" -+ -+ : [vps1_r] "+r" (vps1_r), [vqs1_r] "+r" (vqs1_r), -+ [vps0_r] "+r" (vps0_r), [vqs0_r] "+r" (vqs0_r) -+ : -+ ); -+ -+ vqs0 = vqs0_l | vqs0_r; -+ vqs1 = vqs1_l | vqs1_r; -+ vps0 = vps0_l | vps0_r; -+ vps1 = vps1_l | vps1_r; -+ -+ *ps0 = vps0 ^ N128; -+ *ps1 = vps1 ^ N128; -+ *qs0 = vqs0 ^ N128; -+ *qs1 = vqs1 ^ N128; -+} -+ -+void vp8_loop_filter_horizontal_edge_mips -+( -+ unsigned char *s, -+ int p, -+ unsigned int flimit, -+ unsigned int limit, -+ unsigned int thresh, -+ int count -+) -+{ -+ uint32_t mask; -+ uint32_t hev; -+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; -+ unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; -+ -+ mask = 0; -+ hev = 0; -+ p1 = 0; -+ p2 = 0; -+ p3 = 0; -+ p4 = 0; -+ -+ /* prefetch data for store */ -+ prefetch_store_lf(s); -+ -+ /* loop filter designed to work using chars so that we can make maximum use -+ * of 8 bit simd instructions. -+ */ -+ -+ sm1 = s - (p << 2); -+ s0 = s - p - p - p; -+ s1 = s - p - p ; -+ s2 = s - p; -+ s3 = s; -+ s4 = s + p; -+ s5 = s + p + p; -+ s6 = s + p + p + p; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p1 = *((uint32_t *)(s1)); -+ p2 = *((uint32_t *)(s2)); -+ p3 = *((uint32_t *)(s3)); -+ p4 = *((uint32_t *)(s4)); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ pm1 = *((uint32_t *)(sm1)); -+ p0 = *((uint32_t *)(s0)); -+ p5 = *((uint32_t *)(s5)); -+ p6 = *((uint32_t *)(s6)); -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); -+ -+ /* unpack processed 4x4 neighborhood */ -+ *((uint32_t *)s1) = p1; -+ *((uint32_t *)s2) = p2; -+ *((uint32_t *)s3) = p3; -+ *((uint32_t *)s4) = p4; -+ } -+ } -+ -+ sm1 += 4; -+ s0 += 4; -+ s1 += 4; -+ s2 += 4; -+ s3 += 4; -+ s4 += 4; -+ s5 += 4; -+ s6 += 4; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p1 = *((uint32_t *)(s1)); -+ p2 = *((uint32_t *)(s2)); -+ p3 = *((uint32_t *)(s3)); -+ p4 = *((uint32_t *)(s4)); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ pm1 = *((uint32_t *)(sm1)); -+ p0 = *((uint32_t *)(s0)); -+ p5 = *((uint32_t *)(s5)); -+ p6 = *((uint32_t *)(s6)); -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); -+ -+ /* unpack processed 4x4 neighborhood */ -+ *((uint32_t *)s1) = p1; -+ *((uint32_t *)s2) = p2; -+ *((uint32_t *)s3) = p3; -+ *((uint32_t *)s4) = p4; -+ } -+ } -+ -+ sm1 += 4; -+ s0 += 4; -+ s1 += 4; -+ s2 += 4; -+ s3 += 4; -+ s4 += 4; -+ s5 += 4; -+ s6 += 4; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p1 = *((uint32_t *)(s1)); -+ p2 = *((uint32_t *)(s2)); -+ p3 = *((uint32_t *)(s3)); -+ p4 = *((uint32_t *)(s4)); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ pm1 = *((uint32_t *)(sm1)); -+ p0 = *((uint32_t *)(s0)); -+ p5 = *((uint32_t *)(s5)); -+ p6 = *((uint32_t *)(s6)); -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); -+ -+ /* unpack processed 4x4 neighborhood */ -+ *((uint32_t *)s1) = p1; -+ *((uint32_t *)s2) = p2; -+ *((uint32_t *)s3) = p3; -+ *((uint32_t *)s4) = p4; -+ } -+ } -+ -+ sm1 += 4; -+ s0 += 4; -+ s1 += 4; -+ s2 += 4; -+ s3 += 4; -+ s4 += 4; -+ s5 += 4; -+ s6 += 4; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p1 = *((uint32_t *)(s1)); -+ p2 = *((uint32_t *)(s2)); -+ p3 = *((uint32_t *)(s3)); -+ p4 = *((uint32_t *)(s4)); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ pm1 = *((uint32_t *)(sm1)); -+ p0 = *((uint32_t *)(s0)); -+ p5 = *((uint32_t *)(s5)); -+ p6 = *((uint32_t *)(s6)); -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); -+ -+ /* unpack processed 4x4 neighborhood */ -+ *((uint32_t *)s1) = p1; -+ *((uint32_t *)s2) = p2; -+ *((uint32_t *)s3) = p3; -+ *((uint32_t *)s4) = p4; -+ } -+ } -+} -+ -+void vp8_loop_filter_uvhorizontal_edge_mips -+( -+ unsigned char *s, -+ int p, -+ unsigned int flimit, -+ unsigned int limit, -+ unsigned int thresh, -+ int count -+) -+{ -+ uint32_t mask; -+ uint32_t hev; -+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; -+ unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; -+ -+ mask = 0; -+ hev = 0; -+ p1 = 0; -+ p2 = 0; -+ p3 = 0; -+ p4 = 0; -+ -+ /* loop filter designed to work using chars so that we can make maximum use -+ * of 8 bit simd instructions. -+ */ -+ -+ sm1 = s - (p << 2); -+ s0 = s - p - p - p; -+ s1 = s - p - p ; -+ s2 = s - p; -+ s3 = s; -+ s4 = s + p; -+ s5 = s + p + p; -+ s6 = s + p + p + p; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p1 = *((uint32_t *)(s1)); -+ p2 = *((uint32_t *)(s2)); -+ p3 = *((uint32_t *)(s3)); -+ p4 = *((uint32_t *)(s4)); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ pm1 = *((uint32_t *)(sm1)); -+ p0 = *((uint32_t *)(s0)); -+ p5 = *((uint32_t *)(s5)); -+ p6 = *((uint32_t *)(s6)); -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); -+ -+ /* unpack processed 4x4 neighborhood */ -+ *((uint32_t *)s1) = p1; -+ *((uint32_t *)s2) = p2; -+ *((uint32_t *)s3) = p3; -+ *((uint32_t *)s4) = p4; -+ } -+ } -+ -+ sm1 += 4; -+ s0 += 4; -+ s1 += 4; -+ s2 += 4; -+ s3 += 4; -+ s4 += 4; -+ s5 += 4; -+ s6 += 4; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p1 = *((uint32_t *)(s1)); -+ p2 = *((uint32_t *)(s2)); -+ p3 = *((uint32_t *)(s3)); -+ p4 = *((uint32_t *)(s4)); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ pm1 = *((uint32_t *)(sm1)); -+ p0 = *((uint32_t *)(s0)); -+ p5 = *((uint32_t *)(s5)); -+ p6 = *((uint32_t *)(s6)); -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); -+ -+ /* unpack processed 4x4 neighborhood */ -+ *((uint32_t *)s1) = p1; -+ *((uint32_t *)s2) = p2; -+ *((uint32_t *)s3) = p3; -+ *((uint32_t *)s4) = p4; -+ } -+ } -+} -+ -+void vp8_loop_filter_vertical_edge_mips -+( -+ unsigned char *s, -+ int p, -+ const unsigned int flimit, -+ const unsigned int limit, -+ const unsigned int thresh, -+ int count -+) -+{ -+ int i; -+ uint32_t mask, hev; -+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; -+ unsigned char *s1, *s2, *s3, *s4; -+ uint32_t prim1, prim2, sec3, sec4, prim3, prim4; -+ -+ hev = 0; -+ mask = 0; -+ i = 0; -+ pm1 = 0; -+ p0 = 0; -+ p1 = 0; -+ p2 = 0; -+ p3 = 0; -+ p4 = 0; -+ p5 = 0; -+ p6 = 0; -+ -+ /* loop filter designed to work using chars so that we can make maximum use -+ * of 8 bit simd instructions. -+ */ -+ -+ /* apply filter on 4 pixesl at the same time */ -+ do -+ { -+ -+ /* prefetch data for store */ -+ prefetch_store_lf(s + p); -+ -+ s1 = s; -+ s2 = s + p; -+ s3 = s2 + p; -+ s4 = s3 + p; -+ s = s4 + p; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p2 = *((uint32_t *)(s1 - 4)); -+ p6 = *((uint32_t *)(s1)); -+ p1 = *((uint32_t *)(s2 - 4)); -+ p5 = *((uint32_t *)(s2)); -+ p0 = *((uint32_t *)(s3 - 4)); -+ p4 = *((uint32_t *)(s3)); -+ pm1 = *((uint32_t *)(s4 - 4)); -+ p3 = *((uint32_t *)(s4)); -+ -+ /* transpose pm1, p0, p1, p2 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" -+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" -+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" -+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" -+ -+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" -+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" -+ "append %[p1], %[sec3], 16 \n\t" -+ "append %[pm1], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* transpose p3, p4, p5, p6 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" -+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" -+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" -+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" -+ -+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" -+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" -+ "append %[p5], %[sec3], 16 \n\t" -+ "append %[p3], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); -+ -+ /* unpack processed 4x4 neighborhood -+ * don't use transpose on output data -+ * because memory isn't aligned -+ */ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s4]) \n\t" -+ "sb %[p3], 0(%[s4]) \n\t" -+ "sb %[p2], -1(%[s4]) \n\t" -+ "sb %[p1], -2(%[s4]) \n\t" -+ : -+ : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), -+ [p2] "r" (p2), [p1] "r" (p1) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s3]) \n\t" -+ "sb %[p3], 0(%[s3]) \n\t" -+ "sb %[p2], -1(%[s3]) \n\t" -+ "sb %[p1], -2(%[s3]) \n\t" -+ : [p1] "+r" (p1) -+ : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s2]) \n\t" -+ "sb %[p3], 0(%[s2]) \n\t" -+ "sb %[p2], -1(%[s2]) \n\t" -+ "sb %[p1], -2(%[s2]) \n\t" -+ : -+ : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), -+ [p2] "r" (p2), [p1] "r" (p1) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s1]) \n\t" -+ "sb %[p3], 0(%[s1]) \n\t" -+ "sb %[p2], -1(%[s1]) \n\t" -+ "sb %[p1], -2(%[s1]) \n\t" -+ : -+ : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), -+ [p2] "r" (p2), [p1] "r" (p1) -+ ); -+ } -+ } -+ -+ s1 = s; -+ s2 = s + p; -+ s3 = s2 + p; -+ s4 = s3 + p; -+ s = s4 + p; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p2 = *((uint32_t *)(s1 - 4)); -+ p6 = *((uint32_t *)(s1)); -+ p1 = *((uint32_t *)(s2 - 4)); -+ p5 = *((uint32_t *)(s2)); -+ p0 = *((uint32_t *)(s3 - 4)); -+ p4 = *((uint32_t *)(s3)); -+ pm1 = *((uint32_t *)(s4 - 4)); -+ p3 = *((uint32_t *)(s4)); -+ -+ /* transpose pm1, p0, p1, p2 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" -+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" -+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" -+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" -+ -+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" -+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" -+ "append %[p1], %[sec3], 16 \n\t" -+ "append %[pm1], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* transpose p3, p4, p5, p6 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" -+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" -+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" -+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" -+ -+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" -+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" -+ "append %[p5], %[sec3], 16 \n\t" -+ "append %[p3], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); -+ -+ /* unpack processed 4x4 neighborhood -+ * don't use transpose on output data -+ * because memory isn't aligned -+ */ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s4]) \n\t" -+ "sb %[p3], 0(%[s4]) \n\t" -+ "sb %[p2], -1(%[s4]) \n\t" -+ "sb %[p1], -2(%[s4]) \n\t" -+ : -+ : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), -+ [p2] "r" (p2), [p1] "r" (p1) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s3]) \n\t" -+ "sb %[p3], 0(%[s3]) \n\t" -+ "sb %[p2], -1(%[s3]) \n\t" -+ "sb %[p1], -2(%[s3]) \n\t" -+ : [p1] "+r" (p1) -+ : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s2]) \n\t" -+ "sb %[p3], 0(%[s2]) \n\t" -+ "sb %[p2], -1(%[s2]) \n\t" -+ "sb %[p1], -2(%[s2]) \n\t" -+ : -+ : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), -+ [p2] "r" (p2), [p1] "r" (p1) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s1]) \n\t" -+ "sb %[p3], 0(%[s1]) \n\t" -+ "sb %[p2], -1(%[s1]) \n\t" -+ "sb %[p1], -2(%[s1]) \n\t" -+ : -+ : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), -+ [p2] "r" (p2), [p1] "r" (p1) -+ ); -+ } -+ } -+ -+ i += 8; -+ } -+ -+ while (i < count); -+} -+ -+void vp8_loop_filter_uvvertical_edge_mips -+( -+ unsigned char *s, -+ int p, -+ unsigned int flimit, -+ unsigned int limit, -+ unsigned int thresh, -+ int count -+) -+{ -+ uint32_t mask, hev; -+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; -+ unsigned char *s1, *s2, *s3, *s4; -+ uint32_t prim1, prim2, sec3, sec4, prim3, prim4; -+ -+ /* loop filter designed to work using chars so that we can make maximum use -+ * of 8 bit simd instructions. -+ */ -+ -+ /* apply filter on 4 pixesl at the same time */ -+ -+ s1 = s; -+ s2 = s + p; -+ s3 = s2 + p; -+ s4 = s3 + p; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p2 = *((uint32_t *)(s1 - 4)); -+ p6 = *((uint32_t *)(s1)); -+ p1 = *((uint32_t *)(s2 - 4)); -+ p5 = *((uint32_t *)(s2)); -+ p0 = *((uint32_t *)(s3 - 4)); -+ p4 = *((uint32_t *)(s3)); -+ pm1 = *((uint32_t *)(s4 - 4)); -+ p3 = *((uint32_t *)(s4)); -+ -+ /* transpose pm1, p0, p1, p2 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" -+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" -+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" -+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" -+ -+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" -+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" -+ "append %[p1], %[sec3], 16 \n\t" -+ "append %[pm1], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* transpose p3, p4, p5, p6 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" -+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" -+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" -+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" -+ -+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" -+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" -+ "append %[p5], %[sec3], 16 \n\t" -+ "append %[p3], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); -+ -+ /* unpack processed 4x4 neighborhood -+ * don't use transpose on output data -+ * because memory isn't aligned -+ */ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s4]) \n\t" -+ "sb %[p3], 0(%[s4]) \n\t" -+ "sb %[p2], -1(%[s4]) \n\t" -+ "sb %[p1], -2(%[s4]) \n\t" -+ : -+ : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), -+ [p2] "r" (p2), [p1] "r" (p1) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s3]) \n\t" -+ "sb %[p3], 0(%[s3]) \n\t" -+ "sb %[p2], -1(%[s3]) \n\t" -+ "sb %[p1], -2(%[s3]) \n\t" -+ : [p1] "+r" (p1) -+ : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s2]) \n\t" -+ "sb %[p3], 0(%[s2]) \n\t" -+ "sb %[p2], -1(%[s2]) \n\t" -+ "sb %[p1], -2(%[s2]) \n\t" -+ : -+ : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), -+ [p2] "r" (p2), [p1] "r" (p1) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s1]) \n\t" -+ "sb %[p3], 0(%[s1]) \n\t" -+ "sb %[p2], -1(%[s1]) \n\t" -+ "sb %[p1], -2(%[s1]) \n\t" -+ : -+ : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), [p2] "r" (p2), [p1] "r" (p1) -+ ); -+ } -+ } -+ -+ s1 = s4 + p; -+ s2 = s1 + p; -+ s3 = s2 + p; -+ s4 = s3 + p; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p2 = *((uint32_t *)(s1 - 4)); -+ p6 = *((uint32_t *)(s1)); -+ p1 = *((uint32_t *)(s2 - 4)); -+ p5 = *((uint32_t *)(s2)); -+ p0 = *((uint32_t *)(s3 - 4)); -+ p4 = *((uint32_t *)(s3)); -+ pm1 = *((uint32_t *)(s4 - 4)); -+ p3 = *((uint32_t *)(s4)); -+ -+ /* transpose pm1, p0, p1, p2 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" -+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" -+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" -+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" -+ -+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" -+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" -+ "append %[p1], %[sec3], 16 \n\t" -+ "append %[pm1], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* transpose p3, p4, p5, p6 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" -+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" -+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" -+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" -+ -+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" -+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" -+ "append %[p5], %[sec3], 16 \n\t" -+ "append %[p3], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); -+ -+ /* unpack processed 4x4 neighborhood -+ * don't use transpose on output data -+ * because memory isn't aligned -+ */ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s4]) \n\t" -+ "sb %[p3], 0(%[s4]) \n\t" -+ "sb %[p2], -1(%[s4]) \n\t" -+ "sb %[p1], -2(%[s4]) \n\t" -+ : -+ : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), -+ [p2] "r" (p2), [p1] "r" (p1) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s3]) \n\t" -+ "sb %[p3], 0(%[s3]) \n\t" -+ "sb %[p2], -1(%[s3]) \n\t" -+ "sb %[p1], -2(%[s3]) \n\t" -+ : [p1] "+r" (p1) -+ : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s2]) \n\t" -+ "sb %[p3], 0(%[s2]) \n\t" -+ "sb %[p2], -1(%[s2]) \n\t" -+ "sb %[p1], -2(%[s2]) \n\t" -+ : -+ : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), -+ [p2] "r" (p2), [p1] "r" (p1) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p4], 1(%[s1]) \n\t" -+ "sb %[p3], 0(%[s1]) \n\t" -+ "sb %[p2], -1(%[s1]) \n\t" -+ "sb %[p1], -2(%[s1]) \n\t" -+ : -+ : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), -+ [p2] "r" (p2), [p1] "r" (p1) -+ ); -+ } -+ } -+} -+ -+/* inputs & outputs are quad-byte vectors */ -+static __inline void vp8_mbfilter_mips -+( -+ uint32_t mask, -+ uint32_t hev, -+ uint32_t *ps2, -+ uint32_t *ps1, -+ uint32_t *ps0, -+ uint32_t *qs0, -+ uint32_t *qs1, -+ uint32_t *qs2 -+) -+{ -+ int32_t vps2, vps1, vps0, vqs0, vqs1, vqs2; -+ int32_t vps2_l, vps1_l, vps0_l, vqs0_l, vqs1_l, vqs2_l; -+ int32_t vps2_r, vps1_r, vps0_r, vqs0_r, vqs1_r, vqs2_r; -+ uint32_t HWM, vp8_filter_l, vp8_filter_r, mask_l, mask_r, hev_l, hev_r, subr_r, subr_l; -+ uint32_t Filter2_l, Filter2_r, t1, t2, Filter1_l, Filter1_r, invhev_l, invhev_r; -+ uint32_t N128, R63; -+ uint32_t u1_l, u1_r, u2_l, u2_r, u3_l, u3_r; -+ -+ R63 = 0x003F003F; -+ HWM = 0xFF00FF00; -+ N128 = 0x80808080; -+ t1 = 0x03000300; -+ t2 = 0x04000400; -+ -+ vps0 = (*ps0) ^ N128; -+ vps1 = (*ps1) ^ N128; -+ vps2 = (*ps2) ^ N128; -+ vqs0 = (*qs0) ^ N128; -+ vqs1 = (*qs1) ^ N128; -+ vqs2 = (*qs2) ^ N128; -+ -+ /* use halfword pairs instead quad-bytes because of accuracy */ -+ vps0_l = vps0 & HWM; -+ vps0_r = vps0 << 8; -+ vps0_r = vps0_r & HWM; -+ -+ vqs0_l = vqs0 & HWM; -+ vqs0_r = vqs0 << 8; -+ vqs0_r = vqs0_r & HWM; -+ -+ vps1_l = vps1 & HWM; -+ vps1_r = vps1 << 8; -+ vps1_r = vps1_r & HWM; -+ -+ vqs1_l = vqs1 & HWM; -+ vqs1_r = vqs1 << 8; -+ vqs1_r = vqs1_r & HWM; -+ -+ vqs2_l = vqs2 & HWM; -+ vqs2_r = vqs2 << 8; -+ vqs2_r = vqs2_r & HWM; -+ -+ __asm__ __volatile__ ( -+ /* qs0 - ps0 */ -+ "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" -+ "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" -+ -+ /* vp8_filter = vp8_signed_char_clamp(ps1 - qs1); */ -+ "subq_s.ph %[vp8_filter_l], %[vps1_l], %[vqs1_l] \n\t" -+ "subq_s.ph %[vp8_filter_r], %[vps1_r], %[vqs1_r] \n\t" -+ -+ : [vp8_filter_l] "=&r" (vp8_filter_l), [vp8_filter_r] "=r" (vp8_filter_r), -+ [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r) -+ : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l), -+ [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r), -+ [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r) -+ ); -+ -+ vps2_l = vps2 & HWM; -+ vps2_r = vps2 << 8; -+ vps2_r = vps2_r & HWM; -+ -+ /* add outer taps if we have high edge variance */ -+ __asm__ __volatile__ ( -+ /* vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); */ -+ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" -+ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" -+ "and %[mask_l], %[HWM], %[mask] \n\t" -+ "sll %[mask_r], %[mask], 8 \n\t" -+ "and %[mask_r], %[HWM], %[mask_r] \n\t" -+ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" -+ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" -+ "and %[hev_l], %[HWM], %[hev] \n\t" -+ "sll %[hev_r], %[hev], 8 \n\t" -+ "and %[hev_r], %[HWM], %[hev_r] \n\t" -+ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" -+ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" -+ -+ /* vp8_filter &= mask; */ -+ "and %[vp8_filter_l], %[vp8_filter_l], %[mask_l] \n\t" -+ "and %[vp8_filter_r], %[vp8_filter_r], %[mask_r] \n\t" -+ -+ /* Filter2 = vp8_filter & hev; */ -+ "and %[Filter2_l], %[vp8_filter_l], %[hev_l] \n\t" -+ "and %[Filter2_r], %[vp8_filter_r], %[hev_r] \n\t" -+ -+ : [vp8_filter_l] "+r" (vp8_filter_l), [vp8_filter_r] "+r" (vp8_filter_r), -+ [hev_l] "=&r" (hev_l), [hev_r] "=&r" (hev_r), -+ [mask_l] "=&r" (mask_l), [mask_r] "=&r" (mask_r), -+ [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r) -+ : [subr_l] "r" (subr_l), [subr_r] "r" (subr_r), -+ [HWM] "r" (HWM), [hev] "r" (hev), [mask] "r" (mask) -+ ); -+ -+ /* save bottom 3 bits so that we round one side +4 and the other +3 */ -+ __asm__ __volatile__ ( -+ /* Filter1 = vp8_signed_char_clamp(Filter2 + 4) >>= 3; */ -+ "addq_s.ph %[Filter1_l], %[Filter2_l], %[t2] \n\t" -+ "xor %[invhev_l], %[hev_l], %[HWM] \n\t" -+ "addq_s.ph %[Filter1_r], %[Filter2_r], %[t2] \n\t" -+ -+ /* Filter2 = vp8_signed_char_clamp(Filter2 + 3) >>= 3; */ -+ "addq_s.ph %[Filter2_l], %[Filter2_l], %[t1] \n\t" -+ "addq_s.ph %[Filter2_r], %[Filter2_r], %[t1] \n\t" -+ -+ "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" -+ "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" -+ -+ "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" -+ "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" -+ "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" -+ "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" -+ "xor %[invhev_r], %[hev_r], %[HWM] \n\t" -+ -+ /* qs0 = vp8_signed_char_clamp(qs0 - Filter1); */ -+ "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" -+ "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" -+ -+ /* ps0 = vp8_signed_char_clamp(ps0 + Filter2); */ -+ "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" -+ "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" -+ -+ : [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r), -+ [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r), -+ [Filter2_l] "+r" (Filter2_l), [Filter2_r] "+r" (Filter2_r), -+ [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), -+ [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) -+ : [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM), -+ [hev_l] "r" (hev_l), [hev_r] "r" (hev_r) -+ ); -+ -+ /* only apply wider filter if not high edge variance */ -+ __asm__ __volatile__ ( -+ /* vp8_filter &= ~hev; */ -+ "and %[Filter2_l], %[vp8_filter_l], %[invhev_l] \n\t" -+ "and %[Filter2_r], %[vp8_filter_r], %[invhev_r] \n\t" -+ -+ "shra.ph %[Filter2_l], %[Filter2_l], 8 \n\t" -+ "shra.ph %[Filter2_r], %[Filter2_r], 8 \n\t" -+ -+ : [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r) -+ : [vp8_filter_l] "r" (vp8_filter_l), [vp8_filter_r] "r" (vp8_filter_r), -+ [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r) -+ ); -+ -+ /* roughly 3/7th difference across boundary */ -+ __asm__ __volatile__ ( -+ "shll.ph %[u3_l], %[Filter2_l], 3 \n\t" -+ "shll.ph %[u3_r], %[Filter2_r], 3 \n\t" -+ -+ "addq.ph %[u3_l], %[u3_l], %[Filter2_l] \n\t" -+ "addq.ph %[u3_r], %[u3_r], %[Filter2_r] \n\t" -+ -+ "shll.ph %[u2_l], %[u3_l], 1 \n\t" -+ "shll.ph %[u2_r], %[u3_r], 1 \n\t" -+ -+ "addq.ph %[u1_l], %[u3_l], %[u2_l] \n\t" -+ "addq.ph %[u1_r], %[u3_r], %[u2_r] \n\t" -+ -+ "addq.ph %[u2_l], %[u2_l], %[R63] \n\t" -+ "addq.ph %[u2_r], %[u2_r], %[R63] \n\t" -+ -+ "addq.ph %[u3_l], %[u3_l], %[R63] \n\t" -+ "addq.ph %[u3_r], %[u3_r], %[R63] \n\t" -+ -+ /* vp8_signed_char_clamp((63 + Filter2 * 27) >> 7) -+ * vp8_signed_char_clamp((63 + Filter2 * 18) >> 7) -+ */ -+ "addq.ph %[u1_l], %[u1_l], %[R63] \n\t" -+ "addq.ph %[u1_r], %[u1_r], %[R63] \n\t" -+ "shra.ph %[u1_l], %[u1_l], 7 \n\t" -+ "shra.ph %[u1_r], %[u1_r], 7 \n\t" -+ "shra.ph %[u2_l], %[u2_l], 7 \n\t" -+ "shra.ph %[u2_r], %[u2_r], 7 \n\t" -+ "shll.ph %[u1_l], %[u1_l], 8 \n\t" -+ "shll.ph %[u1_r], %[u1_r], 8 \n\t" -+ "shll.ph %[u2_l], %[u2_l], 8 \n\t" -+ "shll.ph %[u2_r], %[u2_r], 8 \n\t" -+ -+ /* vqs0 = vp8_signed_char_clamp(qs0 - u); */ -+ "subq_s.ph %[vqs0_l], %[vqs0_l], %[u1_l] \n\t" -+ "subq_s.ph %[vqs0_r], %[vqs0_r], %[u1_r] \n\t" -+ -+ /* vps0 = vp8_signed_char_clamp(ps0 + u); */ -+ "addq_s.ph %[vps0_l], %[vps0_l], %[u1_l] \n\t" -+ "addq_s.ph %[vps0_r], %[vps0_r], %[u1_r] \n\t" -+ -+ : [u1_l] "=&r" (u1_l), [u1_r] "=&r" (u1_r), [u2_l] "=&r" (u2_l), -+ [u2_r] "=&r" (u2_r), [u3_l] "=&r" (u3_l), [u3_r] "=&r" (u3_r), -+ [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), -+ [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) -+ : [R63] "r" (R63), -+ [Filter2_l] "r" (Filter2_l), [Filter2_r] "r" (Filter2_r) -+ ); -+ -+ __asm__ __volatile__ ( -+ /* vqs1 = vp8_signed_char_clamp(qs1 - u); */ -+ "subq_s.ph %[vqs1_l], %[vqs1_l], %[u2_l] \n\t" -+ "addq_s.ph %[vps1_l], %[vps1_l], %[u2_l] \n\t" -+ -+ /* vps1 = vp8_signed_char_clamp(ps1 + u); */ -+ "addq_s.ph %[vps1_r], %[vps1_r], %[u2_r] \n\t" -+ "subq_s.ph %[vqs1_r], %[vqs1_r], %[u2_r] \n\t" -+ -+ : [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), -+ [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r) -+ : [u2_l] "r" (u2_l), [u2_r] "r" (u2_r) -+ ); -+ -+ /* roughly 1/7th difference across boundary */ -+ __asm__ __volatile__ ( -+ /* u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7); */ -+ "shra.ph %[u3_l], %[u3_l], 7 \n\t" -+ "shra.ph %[u3_r], %[u3_r], 7 \n\t" -+ "shll.ph %[u3_l], %[u3_l], 8 \n\t" -+ "shll.ph %[u3_r], %[u3_r], 8 \n\t" -+ -+ /* vqs2 = vp8_signed_char_clamp(qs2 - u); */ -+ "subq_s.ph %[vqs2_l], %[vqs2_l], %[u3_l] \n\t" -+ "subq_s.ph %[vqs2_r], %[vqs2_r], %[u3_r] \n\t" -+ -+ /* vps2 = vp8_signed_char_clamp(ps2 + u); */ -+ "addq_s.ph %[vps2_l], %[vps2_l], %[u3_l] \n\t" -+ "addq_s.ph %[vps2_r], %[vps2_r], %[u3_r] \n\t" -+ -+ : [u3_l] "+r" (u3_l), [u3_r] "+r" (u3_r), [vps2_l] "+r" (vps2_l), -+ [vps2_r] "+r" (vps2_r), [vqs2_l] "+r" (vqs2_l), [vqs2_r] "+r" (vqs2_r) -+ : -+ ); -+ -+ /* Create quad-bytes from halfword pairs */ -+ __asm__ __volatile__ ( -+ "and %[vqs0_l], %[vqs0_l], %[HWM] \n\t" -+ "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" -+ -+ "and %[vps0_l], %[vps0_l], %[HWM] \n\t" -+ "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" -+ -+ "and %[vqs1_l], %[vqs1_l], %[HWM] \n\t" -+ "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" -+ -+ "and %[vps1_l], %[vps1_l], %[HWM] \n\t" -+ "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" -+ -+ "and %[vqs2_l], %[vqs2_l], %[HWM] \n\t" -+ "shrl.ph %[vqs2_r], %[vqs2_r], 8 \n\t" -+ -+ "and %[vps2_l], %[vps2_l], %[HWM] \n\t" -+ "shrl.ph %[vps2_r], %[vps2_r], 8 \n\t" -+ -+ "or %[vqs0_r], %[vqs0_l], %[vqs0_r] \n\t" -+ "or %[vps0_r], %[vps0_l], %[vps0_r] \n\t" -+ "or %[vqs1_r], %[vqs1_l], %[vqs1_r] \n\t" -+ "or %[vps1_r], %[vps1_l], %[vps1_r] \n\t" -+ "or %[vqs2_r], %[vqs2_l], %[vqs2_r] \n\t" -+ "or %[vps2_r], %[vps2_l], %[vps2_r] \n\t" -+ -+ : [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), [vqs1_l] "+r" (vqs1_l), -+ [vqs1_r] "+r" (vqs1_r), [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), -+ [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r), [vqs2_l] "+r" (vqs2_l), -+ [vqs2_r] "+r" (vqs2_r), [vps2_r] "+r" (vps2_r), [vps2_l] "+r" (vps2_l) -+ : [HWM] "r" (HWM) -+ ); -+ -+ *ps0 = vps0_r ^ N128; -+ *ps1 = vps1_r ^ N128; -+ *ps2 = vps2_r ^ N128; -+ *qs0 = vqs0_r ^ N128; -+ *qs1 = vqs1_r ^ N128; -+ *qs2 = vqs2_r ^ N128; -+} -+ -+void vp8_mbloop_filter_horizontal_edge_mips -+( -+ unsigned char *s, -+ int p, -+ unsigned int flimit, -+ unsigned int limit, -+ unsigned int thresh, -+ int count -+) -+{ -+ int i; -+ uint32_t mask, hev; -+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; -+ unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; -+ -+ mask = 0; -+ hev = 0; -+ i = 0; -+ p1 = 0; -+ p2 = 0; -+ p3 = 0; -+ p4 = 0; -+ -+ /* loop filter designed to work using chars so that we can make maximum use -+ * of 8 bit simd instructions. -+ */ -+ -+ sm1 = s - (p << 2); -+ s0 = s - p - p - p; -+ s1 = s - p - p; -+ s2 = s - p; -+ s3 = s; -+ s4 = s + p; -+ s5 = s + p + p; -+ s6 = s + p + p + p; -+ -+ /* prefetch data for load */ -+ prefetch_load_lf(s + p); -+ -+ /* apply filter on 4 pixesl at the same time */ -+ do -+ { -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p1 = *((uint32_t *)(s1)); -+ p2 = *((uint32_t *)(s2)); -+ p3 = *((uint32_t *)(s3)); -+ p4 = *((uint32_t *)(s4)); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ pm1 = *((uint32_t *)(sm1)); -+ p0 = *((uint32_t *)(s0)); -+ p5 = *((uint32_t *)(s5)); -+ p6 = *((uint32_t *)(s6)); -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); -+ -+ /* unpack processed 4x4 neighborhood -+ * memory is 4 byte aligned -+ */ -+ *((uint32_t *)s0) = p0; -+ *((uint32_t *)s1) = p1; -+ *((uint32_t *)s2) = p2; -+ *((uint32_t *)s3) = p3; -+ *((uint32_t *)s4) = p4; -+ *((uint32_t *)s5) = p5; -+ } -+ } -+ -+ sm1 += 4; -+ s0 += 4; -+ s1 += 4; -+ s2 += 4; -+ s3 += 4; -+ s4 += 4; -+ s5 += 4; -+ s6 += 4; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p1 = *((uint32_t *)(s1)); -+ p2 = *((uint32_t *)(s2)); -+ p3 = *((uint32_t *)(s3)); -+ p4 = *((uint32_t *)(s4)); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ pm1 = *((uint32_t *)(sm1)); -+ p0 = *((uint32_t *)(s0)); -+ p5 = *((uint32_t *)(s5)); -+ p6 = *((uint32_t *)(s6)); -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); -+ -+ /* unpack processed 4x4 neighborhood -+ * memory is 4 byte aligned -+ */ -+ *((uint32_t *)s0) = p0; -+ *((uint32_t *)s1) = p1; -+ *((uint32_t *)s2) = p2; -+ *((uint32_t *)s3) = p3; -+ *((uint32_t *)s4) = p4; -+ *((uint32_t *)s5) = p5; -+ } -+ } -+ -+ sm1 += 4; -+ s0 += 4; -+ s1 += 4; -+ s2 += 4; -+ s3 += 4; -+ s4 += 4; -+ s5 += 4; -+ s6 += 4; -+ -+ i += 8; -+ } -+ -+ while (i < count); -+} -+ -+void vp8_mbloop_filter_uvhorizontal_edge_mips -+( -+ unsigned char *s, -+ int p, -+ unsigned int flimit, -+ unsigned int limit, -+ unsigned int thresh, -+ int count -+) -+{ -+ uint32_t mask, hev; -+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; -+ unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; -+ -+ mask = 0; -+ hev = 0; -+ p1 = 0; -+ p2 = 0; -+ p3 = 0; -+ p4 = 0; -+ -+ /* loop filter designed to work using chars so that we can make maximum use -+ * of 8 bit simd instructions. -+ */ -+ -+ sm1 = s - (p << 2); -+ s0 = s - p - p - p; -+ s1 = s - p - p; -+ s2 = s - p; -+ s3 = s; -+ s4 = s + p; -+ s5 = s + p + p; -+ s6 = s + p + p + p; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p1 = *((uint32_t *)(s1)); -+ p2 = *((uint32_t *)(s2)); -+ p3 = *((uint32_t *)(s3)); -+ p4 = *((uint32_t *)(s4)); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ pm1 = *((uint32_t *)(sm1)); -+ p0 = *((uint32_t *)(s0)); -+ p5 = *((uint32_t *)(s5)); -+ p6 = *((uint32_t *)(s6)); -+ -+ /* if mask == 0 do filtering is not needed */ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); -+ -+ /* unpack processed 4x4 neighborhood -+ * memory is 4 byte aligned -+ */ -+ *((uint32_t *)s0) = p0; -+ *((uint32_t *)s1) = p1; -+ *((uint32_t *)s2) = p2; -+ *((uint32_t *)s3) = p3; -+ *((uint32_t *)s4) = p4; -+ *((uint32_t *)s5) = p5; -+ } -+ } -+ -+ sm1 += 4; -+ s0 += 4; -+ s1 += 4; -+ s2 += 4; -+ s3 += 4; -+ s4 += 4; -+ s5 += 4; -+ s6 += 4; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p1 = *((uint32_t *)(s1)); -+ p2 = *((uint32_t *)(s2)); -+ p3 = *((uint32_t *)(s3)); -+ p4 = *((uint32_t *)(s4)); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ pm1 = *((uint32_t *)(sm1)); -+ p0 = *((uint32_t *)(s0)); -+ p5 = *((uint32_t *)(s5)); -+ p6 = *((uint32_t *)(s6)); -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); -+ -+ /* unpack processed 4x4 neighborhood -+ * memory is 4 byte aligned -+ */ -+ *((uint32_t *)s0) = p0; -+ *((uint32_t *)s1) = p1; -+ *((uint32_t *)s2) = p2; -+ *((uint32_t *)s3) = p3; -+ *((uint32_t *)s4) = p4; -+ *((uint32_t *)s5) = p5; -+ } -+ } -+} -+ -+ -+void vp8_mbloop_filter_vertical_edge_mips -+( -+ unsigned char *s, -+ int p, -+ unsigned int flimit, -+ unsigned int limit, -+ unsigned int thresh, -+ int count -+) -+{ -+ -+ int i; -+ uint32_t mask, hev; -+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; -+ unsigned char *s1, *s2, *s3, *s4; -+ uint32_t prim1, prim2, sec3, sec4, prim3, prim4; -+ -+ mask = 0; -+ hev = 0; -+ i = 0; -+ pm1 = 0; -+ p0 = 0; -+ p1 = 0; -+ p2 = 0; -+ p3 = 0; -+ p4 = 0; -+ p5 = 0; -+ p6 = 0; -+ -+ /* loop filter designed to work using chars so that we can make maximum use -+ * of 8 bit simd instructions. -+ */ -+ -+ /* apply filter on 4 pixesl at the same time */ -+ do -+ { -+ s1 = s; -+ s2 = s + p; -+ s3 = s2 + p; -+ s4 = s3 + p; -+ s = s4 + p; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p2 = *((uint32_t *)(s1 - 4)); -+ p6 = *((uint32_t *)(s1)); -+ p1 = *((uint32_t *)(s2 - 4)); -+ p5 = *((uint32_t *)(s2)); -+ p0 = *((uint32_t *)(s3 - 4)); -+ p4 = *((uint32_t *)(s3)); -+ pm1 = *((uint32_t *)(s4 - 4)); -+ p3 = *((uint32_t *)(s4)); -+ -+ /* transpose pm1, p0, p1, p2 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" -+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" -+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" -+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" -+ -+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" -+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" -+ "append %[p1], %[sec3], 16 \n\t" -+ "append %[pm1], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* transpose p3, p4, p5, p6 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" -+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" -+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" -+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" -+ -+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" -+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" -+ "append %[p5], %[sec3], 16 \n\t" -+ "append %[p3], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); -+ -+ /* don't use transpose on output data -+ * because memory isn't aligned -+ */ -+ __asm__ __volatile__ ( -+ "sb %[p5], 2(%[s4]) \n\t" -+ "sb %[p4], 1(%[s4]) \n\t" -+ "sb %[p3], 0(%[s4]) \n\t" -+ "sb %[p2], -1(%[s4]) \n\t" -+ "sb %[p1], -2(%[s4]) \n\t" -+ "sb %[p0], -3(%[s4]) \n\t" -+ : -+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), -+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p5], %[p5], 8 \n\t" -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ "srl %[p0], %[p0], 8 \n\t" -+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p5], 2(%[s3]) \n\t" -+ "sb %[p4], 1(%[s3]) \n\t" -+ "sb %[p3], 0(%[s3]) \n\t" -+ "sb %[p2], -1(%[s3]) \n\t" -+ "sb %[p1], -2(%[s3]) \n\t" -+ "sb %[p0], -3(%[s3]) \n\t" -+ : -+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), -+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p5], %[p5], 8 \n\t" -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ "srl %[p0], %[p0], 8 \n\t" -+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p5], 2(%[s2]) \n\t" -+ "sb %[p4], 1(%[s2]) \n\t" -+ "sb %[p3], 0(%[s2]) \n\t" -+ "sb %[p2], -1(%[s2]) \n\t" -+ "sb %[p1], -2(%[s2]) \n\t" -+ "sb %[p0], -3(%[s2]) \n\t" -+ : -+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), -+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p5], %[p5], 8 \n\t" -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ "srl %[p0], %[p0], 8 \n\t" -+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p5], 2(%[s1]) \n\t" -+ "sb %[p4], 1(%[s1]) \n\t" -+ "sb %[p3], 0(%[s1]) \n\t" -+ "sb %[p2], -1(%[s1]) \n\t" -+ "sb %[p1], -2(%[s1]) \n\t" -+ "sb %[p0], -3(%[s1]) \n\t" -+ : -+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), -+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) -+ ); -+ } -+ } -+ -+ i += 4; -+ } -+ -+ while (i < count); -+} -+ -+void vp8_mbloop_filter_uvvertical_edge_mips -+( -+ unsigned char *s, -+ int p, -+ unsigned int flimit, -+ unsigned int limit, -+ unsigned int thresh, -+ int count -+) -+{ -+ uint32_t mask, hev; -+ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; -+ unsigned char *s1, *s2, *s3, *s4; -+ uint32_t prim1, prim2, sec3, sec4, prim3, prim4; -+ -+ mask = 0; -+ hev = 0; -+ pm1 = 0; -+ p0 = 0; -+ p1 = 0; -+ p2 = 0; -+ p3 = 0; -+ p4 = 0; -+ p5 = 0; -+ p6 = 0; -+ -+ /* loop filter designed to work using chars so that we can make maximum use -+ * of 8 bit simd instructions. -+ */ -+ -+ /* apply filter on 4 pixesl at the same time */ -+ -+ s1 = s; -+ s2 = s + p; -+ s3 = s2 + p; -+ s4 = s3 + p; -+ -+ /* prefetch data for load */ -+ prefetch_load_lf(s + 2 * p); -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p2 = *((uint32_t *)(s1 - 4)); -+ p6 = *((uint32_t *)(s1)); -+ p1 = *((uint32_t *)(s2 - 4)); -+ p5 = *((uint32_t *)(s2)); -+ p0 = *((uint32_t *)(s3 - 4)); -+ p4 = *((uint32_t *)(s3)); -+ pm1 = *((uint32_t *)(s4 - 4)); -+ p3 = *((uint32_t *)(s4)); -+ -+ /* transpose pm1, p0, p1, p2 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" -+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" -+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" -+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" -+ -+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" -+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" -+ "append %[p1], %[sec3], 16 \n\t" -+ "append %[pm1], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* transpose p3, p4, p5, p6 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" -+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" -+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" -+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" -+ -+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" -+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" -+ "append %[p5], %[sec3], 16 \n\t" -+ "append %[p3], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, -+ thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); -+ -+ /* don't use transpose on output data -+ * because memory isn't aligned -+ */ -+ __asm__ __volatile__ ( -+ "sb %[p5], 2(%[s4]) \n\t" -+ "sb %[p4], 1(%[s4]) \n\t" -+ "sb %[p3], 0(%[s4]) \n\t" -+ "sb %[p2], -1(%[s4]) \n\t" -+ "sb %[p1], -2(%[s4]) \n\t" -+ "sb %[p0], -3(%[s4]) \n\t" -+ : -+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), -+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p5], %[p5], 8 \n\t" -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ "srl %[p0], %[p0], 8 \n\t" -+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p5], 2(%[s3]) \n\t" -+ "sb %[p4], 1(%[s3]) \n\t" -+ "sb %[p3], 0(%[s3]) \n\t" -+ "sb %[p2], -1(%[s3]) \n\t" -+ "sb %[p1], -2(%[s3]) \n\t" -+ "sb %[p0], -3(%[s3]) \n\t" -+ : -+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), -+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p5], %[p5], 8 \n\t" -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ "srl %[p0], %[p0], 8 \n\t" -+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p5], 2(%[s2]) \n\t" -+ "sb %[p4], 1(%[s2]) \n\t" -+ "sb %[p3], 0(%[s2]) \n\t" -+ "sb %[p2], -1(%[s2]) \n\t" -+ "sb %[p1], -2(%[s2]) \n\t" -+ "sb %[p0], -3(%[s2]) \n\t" -+ : -+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), -+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p5], %[p5], 8 \n\t" -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ "srl %[p0], %[p0], 8 \n\t" -+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p5], 2(%[s1]) \n\t" -+ "sb %[p4], 1(%[s1]) \n\t" -+ "sb %[p3], 0(%[s1]) \n\t" -+ "sb %[p2], -1(%[s1]) \n\t" -+ "sb %[p1], -2(%[s1]) \n\t" -+ "sb %[p0], -3(%[s1]) \n\t" -+ : -+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), -+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) -+ ); -+ } -+ } -+ -+ s1 = s4 + p; -+ s2 = s1 + p; -+ s3 = s2 + p; -+ s4 = s3 + p; -+ -+ /* load quad-byte vectors -+ * memory is 4 byte aligned -+ */ -+ p2 = *((uint32_t *)(s1 - 4)); -+ p6 = *((uint32_t *)(s1)); -+ p1 = *((uint32_t *)(s2 - 4)); -+ p5 = *((uint32_t *)(s2)); -+ p0 = *((uint32_t *)(s3 - 4)); -+ p4 = *((uint32_t *)(s3)); -+ pm1 = *((uint32_t *)(s4 - 4)); -+ p3 = *((uint32_t *)(s4)); -+ -+ /* transpose pm1, p0, p1, p2 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" -+ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" -+ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" -+ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" -+ -+ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" -+ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" -+ "append %[p1], %[sec3], 16 \n\t" -+ "append %[pm1], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* transpose p3, p4, p5, p6 */ -+ __asm__ __volatile__ ( -+ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" -+ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" -+ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" -+ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" -+ -+ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" -+ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" -+ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" -+ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" -+ -+ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" -+ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" -+ "append %[p5], %[sec3], 16 \n\t" -+ "append %[p3], %[sec4], 16 \n\t" -+ -+ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), -+ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), -+ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) -+ : -+ ); -+ -+ /* if (p1 - p4 == 0) and (p2 - p3 == 0) -+ * mask will be zero and filtering is not needed -+ */ -+ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) -+ { -+ -+ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); -+ -+ /* if mask == 0 do filtering is not needed */ -+ if (mask) -+ { -+ /* filtering */ -+ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); -+ -+ /* don't use transpose on output data -+ * because memory isn't aligned -+ */ -+ __asm__ __volatile__ ( -+ "sb %[p5], 2(%[s4]) \n\t" -+ "sb %[p4], 1(%[s4]) \n\t" -+ "sb %[p3], 0(%[s4]) \n\t" -+ "sb %[p2], -1(%[s4]) \n\t" -+ "sb %[p1], -2(%[s4]) \n\t" -+ "sb %[p0], -3(%[s4]) \n\t" -+ : -+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), -+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p5], %[p5], 8 \n\t" -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ "srl %[p0], %[p0], 8 \n\t" -+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p5], 2(%[s3]) \n\t" -+ "sb %[p4], 1(%[s3]) \n\t" -+ "sb %[p3], 0(%[s3]) \n\t" -+ "sb %[p2], -1(%[s3]) \n\t" -+ "sb %[p1], -2(%[s3]) \n\t" -+ "sb %[p0], -3(%[s3]) \n\t" -+ : -+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), -+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p5], %[p5], 8 \n\t" -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ "srl %[p0], %[p0], 8 \n\t" -+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p5], 2(%[s2]) \n\t" -+ "sb %[p4], 1(%[s2]) \n\t" -+ "sb %[p3], 0(%[s2]) \n\t" -+ "sb %[p2], -1(%[s2]) \n\t" -+ "sb %[p1], -2(%[s2]) \n\t" -+ "sb %[p0], -3(%[s2]) \n\t" -+ : -+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), -+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) -+ ); -+ -+ __asm__ __volatile__ ( -+ "srl %[p5], %[p5], 8 \n\t" -+ "srl %[p4], %[p4], 8 \n\t" -+ "srl %[p3], %[p3], 8 \n\t" -+ "srl %[p2], %[p2], 8 \n\t" -+ "srl %[p1], %[p1], 8 \n\t" -+ "srl %[p0], %[p0], 8 \n\t" -+ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), -+ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) -+ : -+ ); -+ -+ __asm__ __volatile__ ( -+ "sb %[p5], 2(%[s1]) \n\t" -+ "sb %[p4], 1(%[s1]) \n\t" -+ "sb %[p3], 0(%[s1]) \n\t" -+ "sb %[p2], -1(%[s1]) \n\t" -+ "sb %[p1], -2(%[s1]) \n\t" -+ "sb %[p0], -3(%[s1]) \n\t" -+ : -+ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), -+ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) -+ ); -+ } -+ } -+} -+ -+/* Horizontal MB filtering */ -+void vp8_loop_filter_mbh_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, -+ int y_stride, int uv_stride, loop_filter_info *lfi) -+{ -+ unsigned int thresh_vec, flimit_vec, limit_vec; -+ unsigned char thresh, flimit, limit, flimit_temp; -+ -+ /* use direct value instead pointers */ -+ limit = *(lfi->lim); -+ flimit_temp = *(lfi->mblim); -+ thresh = *(lfi->hev_thr); -+ flimit = flimit_temp; -+ -+ /* create quad-byte */ -+ __asm__ __volatile__ ( -+ "replv.qb %[thresh_vec], %[thresh] \n\t" -+ "replv.qb %[flimit_vec], %[flimit] \n\t" -+ "replv.qb %[limit_vec], %[limit] \n\t" -+ : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec) -+ : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit) -+ ); -+ -+ vp8_mbloop_filter_horizontal_edge_mips(y_ptr, y_stride, flimit_vec, limit_vec, thresh_vec, 16); -+ -+ if (u_ptr) -+ { -+ vp8_mbloop_filter_uvhorizontal_edge_mips(u_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); -+ } -+ -+ if (v_ptr) -+ { -+ vp8_mbloop_filter_uvhorizontal_edge_mips(v_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); -+ } -+} -+ -+ -+/* Vertical MB Filtering */ -+void vp8_loop_filter_mbv_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, -+ int y_stride, int uv_stride, loop_filter_info *lfi) -+{ -+ unsigned int thresh_vec, flimit_vec, limit_vec; -+ unsigned char thresh, flimit, limit, flimit_temp; -+ -+ /* use direct value instead pointers */ -+ limit = *(lfi->lim); -+ flimit_temp = *(lfi->mblim); -+ thresh = *(lfi->hev_thr); -+ flimit = flimit_temp; -+ -+ /* create quad-byte */ -+ __asm__ __volatile__ ( -+ "replv.qb %[thresh_vec], %[thresh] \n\t" -+ "replv.qb %[flimit_vec], %[flimit] \n\t" -+ "replv.qb %[limit_vec], %[limit] \n\t" -+ : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec) -+ : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit) -+ ); -+ -+ vp8_mbloop_filter_vertical_edge_mips(y_ptr, y_stride, flimit_vec, limit_vec, thresh_vec, 16); -+ -+ if (u_ptr) -+ vp8_mbloop_filter_uvvertical_edge_mips(u_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); -+ -+ if (v_ptr) -+ vp8_mbloop_filter_uvvertical_edge_mips(v_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); -+} -+ -+ -+/* Horizontal B Filtering */ -+void vp8_loop_filter_bh_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, -+ int y_stride, int uv_stride, loop_filter_info *lfi) -+{ -+ unsigned int thresh_vec, flimit_vec, limit_vec; -+ unsigned char thresh, flimit, limit, flimit_temp; -+ -+ /* use direct value instead pointers */ -+ limit = *(lfi->lim); -+ flimit_temp = *(lfi->blim); -+ thresh = *(lfi->hev_thr); -+ flimit = flimit_temp; -+ -+ /* create quad-byte */ -+ __asm__ __volatile__ ( -+ "replv.qb %[thresh_vec], %[thresh] \n\t" -+ "replv.qb %[flimit_vec], %[flimit] \n\t" -+ "replv.qb %[limit_vec], %[limit] \n\t" -+ : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec) -+ : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit) -+ ); -+ -+ vp8_loop_filter_horizontal_edge_mips(y_ptr + 4 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16); -+ vp8_loop_filter_horizontal_edge_mips(y_ptr + 8 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16); -+ vp8_loop_filter_horizontal_edge_mips(y_ptr + 12 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16); -+ -+ if (u_ptr) -+ vp8_loop_filter_uvhorizontal_edge_mips(u_ptr + 4 * uv_stride, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); -+ -+ if (v_ptr) -+ vp8_loop_filter_uvhorizontal_edge_mips(v_ptr + 4 * uv_stride, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); -+} -+ -+ -+/* Vertical B Filtering */ -+void vp8_loop_filter_bv_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, -+ int y_stride, int uv_stride, loop_filter_info *lfi) -+{ -+ unsigned int thresh_vec, flimit_vec, limit_vec; -+ unsigned char thresh, flimit, limit, flimit_temp; -+ -+ /* use direct value instead pointers */ -+ limit = *(lfi->lim); -+ flimit_temp = *(lfi->blim); -+ thresh = *(lfi->hev_thr); -+ flimit = flimit_temp; -+ -+ /* create quad-byte */ -+ __asm__ __volatile__ ( -+ "replv.qb %[thresh_vec], %[thresh] \n\t" -+ "replv.qb %[flimit_vec], %[flimit] \n\t" -+ "replv.qb %[limit_vec], %[limit] \n\t" -+ : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec) -+ : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit) -+ ); -+ -+ vp8_loop_filter_vertical_edge_mips(y_ptr + 4, y_stride, flimit_vec, limit_vec, thresh_vec, 16); -+ vp8_loop_filter_vertical_edge_mips(y_ptr + 8, y_stride, flimit_vec, limit_vec, thresh_vec, 16); -+ vp8_loop_filter_vertical_edge_mips(y_ptr + 12, y_stride, flimit_vec, limit_vec, thresh_vec, 16); -+ -+ if (u_ptr) -+ vp8_loop_filter_uvvertical_edge_mips(u_ptr + 4, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); -+ -+ if (v_ptr) -+ vp8_loop_filter_uvvertical_edge_mips(v_ptr + 4, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); -+} -+ -+#endif -diff --git a/vp8/common/mips/dspr2/reconinter_dspr2.c b/vp8/common/mips/dspr2/reconinter_dspr2.c -new file mode 100644 -index 0000000..a5239a3 ---- /dev/null -+++ b/vp8/common/mips/dspr2/reconinter_dspr2.c -@@ -0,0 +1,121 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+ -+#include "vpx_config.h" -+#include "vpx_rtcd.h" -+#include "vpx/vpx_integer.h" -+ -+#if HAVE_DSPR2 -+inline void prefetch_load_int(unsigned char *src) -+{ -+ __asm__ __volatile__ ( -+ "pref 0, 0(%[src]) \n\t" -+ : -+ : [src] "r" (src) -+ ); -+} -+ -+ -+__inline void vp8_copy_mem16x16_dspr2( -+ unsigned char *RESTRICT src, -+ int src_stride, -+ unsigned char *RESTRICT dst, -+ int dst_stride) -+{ -+ int r; -+ unsigned int a0, a1, a2, a3; -+ -+ for (r = 16; r--;) -+ { -+ /* load src data in cache memory */ -+ prefetch_load_int(src + src_stride); -+ -+ /* use unaligned memory load and store */ -+ __asm__ __volatile__ ( -+ "ulw %[a0], 0(%[src]) \n\t" -+ "ulw %[a1], 4(%[src]) \n\t" -+ "ulw %[a2], 8(%[src]) \n\t" -+ "ulw %[a3], 12(%[src]) \n\t" -+ "sw %[a0], 0(%[dst]) \n\t" -+ "sw %[a1], 4(%[dst]) \n\t" -+ "sw %[a2], 8(%[dst]) \n\t" -+ "sw %[a3], 12(%[dst]) \n\t" -+ : [a0] "=&r" (a0), [a1] "=&r" (a1), -+ [a2] "=&r" (a2), [a3] "=&r" (a3) -+ : [src] "r" (src), [dst] "r" (dst) -+ ); -+ -+ src += src_stride; -+ dst += dst_stride; -+ } -+} -+ -+ -+__inline void vp8_copy_mem8x8_dspr2( -+ unsigned char *RESTRICT src, -+ int src_stride, -+ unsigned char *RESTRICT dst, -+ int dst_stride) -+{ -+ int r; -+ unsigned int a0, a1; -+ -+ /* load src data in cache memory */ -+ prefetch_load_int(src + src_stride); -+ -+ for (r = 8; r--;) -+ { -+ /* use unaligned memory load and store */ -+ __asm__ __volatile__ ( -+ "ulw %[a0], 0(%[src]) \n\t" -+ "ulw %[a1], 4(%[src]) \n\t" -+ "sw %[a0], 0(%[dst]) \n\t" -+ "sw %[a1], 4(%[dst]) \n\t" -+ : [a0] "=&r" (a0), [a1] "=&r" (a1) -+ : [src] "r" (src), [dst] "r" (dst) -+ ); -+ -+ src += src_stride; -+ dst += dst_stride; -+ } -+} -+ -+ -+__inline void vp8_copy_mem8x4_dspr2( -+ unsigned char *RESTRICT src, -+ int src_stride, -+ unsigned char *RESTRICT dst, -+ int dst_stride) -+{ -+ int r; -+ unsigned int a0, a1; -+ -+ /* load src data in cache memory */ -+ prefetch_load_int(src + src_stride); -+ -+ for (r = 4; r--;) -+ { -+ /* use unaligned memory load and store */ -+ __asm__ __volatile__ ( -+ "ulw %[a0], 0(%[src]) \n\t" -+ "ulw %[a1], 4(%[src]) \n\t" -+ "sw %[a0], 0(%[dst]) \n\t" -+ "sw %[a1], 4(%[dst]) \n\t" -+ : [a0] "=&r" (a0), [a1] "=&r" (a1) -+ : [src] "r" (src), [dst] "r" (dst) -+ ); -+ -+ src += src_stride; -+ dst += dst_stride; -+ } -+} -+ -+#endif -diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h -index 2e282f6..766b4ea 100644 ---- a/vp8/common/onyx.h -+++ b/vp8/common/onyx.h -@@ -39,14 +39,6 @@ extern "C" - - typedef enum - { -- VP8_LAST_FLAG = 1, -- VP8_GOLD_FLAG = 2, -- VP8_ALT_FLAG = 4 -- } VP8_REFFRAME; -- -- -- typedef enum -- { - USAGE_STREAM_FROM_SERVER = 0x0, - USAGE_LOCAL_FILE_PLAYBACK = 0x1, - USAGE_CONSTRAINED_QUALITY = 0x2 -@@ -102,83 +94,101 @@ extern "C" - - typedef struct - { -- int Version; // 4 versions of bitstream defined 0 best quality/slowest decode, 3 lowest quality/fastest decode -- int Width; // width of data passed to the compressor -- int Height; // height of data passed to the compressor -+ /* 4 versions of bitstream defined: -+ * 0 best quality/slowest decode, 3 lowest quality/fastest decode -+ */ -+ int Version; -+ int Width; -+ int Height; - struct vpx_rational timebase; -- int target_bandwidth; // bandwidth to be used in kilobits per second -+ unsigned int target_bandwidth; /* kilobits per second */ -+ -+ /* parameter used for applying pre processing blur: recommendation 0 */ -+ int noise_sensitivity; - -- int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0 -- int Sharpness; // parameter used for sharpening output: recommendation 0: -+ /* parameter used for sharpening output: recommendation 0: */ -+ int Sharpness; - int cpu_used; - unsigned int rc_max_intra_bitrate_pct; - -- // mode -> -- //(0)=Realtime/Live Encoding. This mode is optimized for realtim encoding (for example, capturing -- // a television signal or feed from a live camera). ( speed setting controls how fast ) -- //(1)=Good Quality Fast Encoding. The encoder balances quality with the amount of time it takes to -- // encode the output. ( speed setting controls how fast ) -- //(2)=One Pass - Best Quality. The encoder places priority on the quality of the output over encoding -- // speed. The output is compressed at the highest possible quality. This option takes the longest -- // amount of time to encode. ( speed setting ignored ) -- //(3)=Two Pass - First Pass. The encoder generates a file of statistics for use in the second encoding -- // pass. ( speed setting controls how fast ) -- //(4)=Two Pass - Second Pass. The encoder uses the statistics that were generated in the first encoding -- // pass to create the compressed output. ( speed setting controls how fast ) -- //(5)=Two Pass - Second Pass Best. The encoder uses the statistics that were generated in the first -- // encoding pass to create the compressed output using the highest possible quality, and taking a -- // longer amount of time to encode.. ( speed setting ignored ) -- int Mode; // -- -- // Key Framing Operations -- int auto_key; // automatically detect cut scenes and set the keyframes -- int key_freq; // maximum distance to key frame. -- -- int allow_lag; // allow lagged compression (if 0 lagin frames is ignored) -- int lag_in_frames; // how many frames lag before we start encoding -- -- //---------------------------------------------------------------- -- // DATARATE CONTROL OPTIONS -- -- int end_usage; // vbr or cbr -- -- // buffer targeting aggressiveness -+ /* mode -> -+ *(0)=Realtime/Live Encoding. This mode is optimized for realtim -+ * encoding (for example, capturing a television signal or feed -+ * from a live camera). ( speed setting controls how fast ) -+ *(1)=Good Quality Fast Encoding. The encoder balances quality with -+ * the amount of time it takes to encode the output. ( speed -+ * setting controls how fast ) -+ *(2)=One Pass - Best Quality. The encoder places priority on the -+ * quality of the output over encoding speed. The output is -+ * compressed at the highest possible quality. This option takes -+ * the longest amount of time to encode. ( speed setting ignored -+ * ) -+ *(3)=Two Pass - First Pass. The encoder generates a file of -+ * statistics for use in the second encoding pass. ( speed -+ * setting controls how fast ) -+ *(4)=Two Pass - Second Pass. The encoder uses the statistics that -+ * were generated in the first encoding pass to create the -+ * compressed output. ( speed setting controls how fast ) -+ *(5)=Two Pass - Second Pass Best. The encoder uses the statistics -+ * that were generated in the first encoding pass to create the -+ * compressed output using the highest possible quality, and -+ * taking a longer amount of time to encode.. ( speed setting -+ * ignored ) -+ */ -+ int Mode; -+ -+ /* Key Framing Operations */ -+ int auto_key; /* automatically detect cut scenes */ -+ int key_freq; /* maximum distance to key frame. */ -+ -+ /* lagged compression (if allow_lag == 0 lag_in_frames is ignored) */ -+ int allow_lag; -+ int lag_in_frames; /* how many frames lag before we start encoding */ -+ -+ /* -+ * DATARATE CONTROL OPTIONS -+ */ -+ -+ int end_usage; /* vbr or cbr */ -+ -+ /* buffer targeting aggressiveness */ - int under_shoot_pct; - int over_shoot_pct; - -- // buffering parameters -- int64_t starting_buffer_level; // in bytes -+ /* buffering parameters */ -+ int64_t starting_buffer_level; - int64_t optimal_buffer_level; - int64_t maximum_buffer_size; - -- int64_t starting_buffer_level_in_ms; // in milli-seconds -+ int64_t starting_buffer_level_in_ms; - int64_t optimal_buffer_level_in_ms; - int64_t maximum_buffer_size_in_ms; - -- // controlling quality -+ /* controlling quality */ - int fixed_q; - int worst_allowed_q; - int best_allowed_q; - int cq_level; - -- // allow internal resizing ( currently disabled in the build !!!!!) -+ /* allow internal resizing */ - int allow_spatial_resampling; - int resample_down_water_mark; - int resample_up_water_mark; - -- // allow internal frame rate alterations -+ /* allow internal frame rate alterations */ - int allow_df; - int drop_frames_water_mark; - -- // two pass datarate control -- int two_pass_vbrbias; // two pass datarate control tweaks -+ /* two pass datarate control */ -+ int two_pass_vbrbias; - int two_pass_vbrmin_section; - int two_pass_vbrmax_section; -- // END DATARATE CONTROL OPTIONS -- //---------------------------------------------------------------- - -+ /* -+ * END DATARATE CONTROL OPTIONS -+ */ - -- // these parameters aren't to be used in final build don't use!!! -+ /* these parameters aren't to be used in final build don't use!!! */ - int play_alternate; - int alt_freq; - int alt_q; -@@ -186,26 +196,28 @@ extern "C" - int gold_q; - - -- int multi_threaded; // how many threads to run the encoder on -- int token_partitions; // how many token partitions to create for multi core decoding -- int encode_breakout; // early breakout encode threshold : for video conf recommend 800 -+ int multi_threaded; /* how many threads to run the encoder on */ -+ int token_partitions; /* how many token partitions to create */ -+ -+ /* early breakout threshold: for video conf recommend 800 */ -+ int encode_breakout; - -- unsigned int error_resilient_mode; // Bitfield defining the error -- // resiliency features to enable. Can provide -- // decodable frames after losses in previous -- // frames and decodable partitions after -- // losses in the same frame. -+ /* Bitfield defining the error resiliency features to enable. -+ * Can provide decodable frames after losses in previous -+ * frames and decodable partitions after losses in the same frame. -+ */ -+ unsigned int error_resilient_mode; - - int arnr_max_frames; -- int arnr_strength ; -- int arnr_type ; -+ int arnr_strength; -+ int arnr_type; - -- struct vpx_fixed_buf two_pass_stats_in; -+ struct vpx_fixed_buf two_pass_stats_in; - struct vpx_codec_pkt_list *output_pkt_list; - - vp8e_tuning tuning; - -- // Temporal scaling parameters -+ /* Temporal scaling parameters */ - unsigned int number_of_layers; - unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY]; - unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY]; -@@ -236,16 +248,14 @@ extern "C" - void vp8_init_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf); - void vp8_change_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf); - --// receive a frames worth of data caller can assume that a copy of this frame is made --// and not just a copy of the pointer.. - int vp8_receive_raw_frame(struct VP8_COMP* comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp); - int vp8_get_compressed_data(struct VP8_COMP* comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush); - int vp8_get_preview_raw_frame(struct VP8_COMP* comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags); - - int vp8_use_as_reference(struct VP8_COMP* comp, int ref_frame_flags); - int vp8_update_reference(struct VP8_COMP* comp, int ref_frame_flags); -- int vp8_get_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); -- int vp8_set_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); -+ int vp8_get_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); -+ int vp8_set_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); - int vp8_update_entropy(struct VP8_COMP* comp, int update); - int vp8_set_roimap(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]); - int vp8_set_active_map(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols); -diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h -index c3215c0..5325bac 100644 ---- a/vp8/common/onyxc_int.h -+++ b/vp8/common/onyxc_int.h -@@ -42,7 +42,6 @@ typedef struct frame_contexts - vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1]; - vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - MV_CONTEXT mvc[2]; -- MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */ - } FRAME_CONTEXT; - - typedef enum -@@ -59,12 +58,6 @@ typedef enum - RECON_CLAMP_NOTREQUIRED = 1 - } CLAMP_TYPE; - --typedef enum --{ -- SIXTAP = 0, -- BILINEAR = 1 --} INTERPOLATIONFILTERTYPE; -- - typedef struct VP8Common - - { -@@ -94,6 +87,7 @@ typedef struct VP8Common - YV12_BUFFER_CONFIG post_proc_buffer; - YV12_BUFFER_CONFIG post_proc_buffer_int; - int post_proc_buffer_int_used; -+ unsigned char *pp_limits_buffer; /* post-processing filter coefficients */ - #endif - - FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */ -@@ -114,7 +108,6 @@ typedef struct VP8Common - int full_pixel; - - int base_qindex; -- int last_kf_gf_q; /* Q used on the last GF or KF */ - - int y1dc_delta_q; - int y2dc_delta_q; -@@ -130,11 +123,11 @@ typedef struct VP8Common - - MODE_INFO *mip; /* Base of allocated array */ - MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ -+#if CONFIG_ERROR_CONCEALMENT - MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ - MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ -+#endif - -- -- INTERPOLATIONFILTERTYPE mcomp_filter_type; - LOOPFILTERTYPE filter_type; - - loop_filter_info_n lf_info; -@@ -158,14 +151,6 @@ typedef struct VP8Common - ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */ - ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */ - -- -- /* keyframe block modes are predicted by their above, left neighbors */ -- -- vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]; -- vp8_prob kf_ymode_prob [VP8_YMODES-1]; /* keyframe "" */ -- vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1]; -- -- - FRAME_CONTEXT lfc; /* last frame entropy */ - FRAME_CONTEXT fc; /* this frame entropy */ - -diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h -index 35a8b6e..fd7e051 100644 ---- a/vp8/common/onyxd.h -+++ b/vp8/common/onyxd.h -@@ -22,6 +22,7 @@ extern "C" - #include "ppflags.h" - #include "vpx_ports/mem.h" - #include "vpx/vpx_codec.h" -+#include "vpx/vp8.h" - - struct VP8D_COMP; - -@@ -35,12 +36,6 @@ extern "C" - int error_concealment; - int input_fragments; - } VP8D_CONFIG; -- typedef enum -- { -- VP8_LAST_FLAG = 1, -- VP8_GOLD_FLAG = 2, -- VP8_ALT_FLAG = 4 -- } VP8_REFFRAME; - - typedef enum - { -@@ -53,11 +48,13 @@ extern "C" - - int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst); - -- int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, unsigned long size, const unsigned char *dest, int64_t time_stamp); -+ int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, -+ size_t size, const uint8_t *dest, -+ int64_t time_stamp); - int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags); - -- vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); -- vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); -+ vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); -+ vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); - - struct VP8D_COMP* vp8dx_create_decompressor(VP8D_CONFIG *oxcf); - -diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c -index ccf6ad7..80fa530 100644 ---- a/vp8/common/postproc.c -+++ b/vp8/common/postproc.c -@@ -127,27 +127,24 @@ extern void vp8_blit_text(const char *msg, unsigned char *address, const int pit - extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch); - /*********************************************************************************************************** - */ --void vp8_post_proc_down_and_across_c -+void vp8_post_proc_down_and_across_mb_row_c - ( - unsigned char *src_ptr, - unsigned char *dst_ptr, - int src_pixels_per_line, - int dst_pixels_per_line, -- int rows, - int cols, -- int flimit -+ unsigned char *f, -+ int size - ) - { - unsigned char *p_src, *p_dst; - int row; - int col; -- int i; -- int v; -- int pitch = src_pixels_per_line; -- unsigned char d[8]; -- (void)dst_pixels_per_line; -+ unsigned char v; -+ unsigned char d[4]; - -- for (row = 0; row < rows; row++) -+ for (row = 0; row < size; row++) - { - /* post_proc_down for one row */ - p_src = src_ptr; -@@ -155,20 +152,23 @@ void vp8_post_proc_down_and_across_c - - for (col = 0; col < cols; col++) - { -+ unsigned char p_above2 = p_src[col - 2 * src_pixels_per_line]; -+ unsigned char p_above1 = p_src[col - src_pixels_per_line]; -+ unsigned char p_below1 = p_src[col + src_pixels_per_line]; -+ unsigned char p_below2 = p_src[col + 2 * src_pixels_per_line]; - -- int kernel = 4; -- int v = p_src[col]; -+ v = p_src[col]; - -- for (i = -2; i <= 2; i++) -+ if ((abs(v - p_above2) < f[col]) && (abs(v - p_above1) < f[col]) -+ && (abs(v - p_below1) < f[col]) && (abs(v - p_below2) < f[col])) - { -- if (abs(v - p_src[col+i*pitch]) > flimit) -- goto down_skip_convolve; -- -- kernel += kernel5[2+i] * p_src[col+i*pitch]; -+ unsigned char k1, k2, k3; -+ k1 = (p_above2 + p_above1 + 1) >> 1; -+ k2 = (p_below2 + p_below1 + 1) >> 1; -+ k3 = (k1 + k2 + 1) >> 1; -+ v = (k3 + v + 1) >> 1; - } - -- v = (kernel >> 3); -- down_skip_convolve: - p_dst[col] = v; - } - -@@ -176,45 +176,38 @@ void vp8_post_proc_down_and_across_c - p_src = dst_ptr; - p_dst = dst_ptr; - -- for (i = -8; i<0; i++) -- p_src[i]=p_src[0]; -- -- for (i = cols; i flimit) -- goto across_skip_convolve; -- -- kernel += kernel5[2+i] * p_src[col+i]; -+ unsigned char k1, k2, k3; -+ k1 = (p_src[col - 2] + p_src[col - 1] + 1) >> 1; -+ k2 = (p_src[col + 2] + p_src[col + 1] + 1) >> 1; -+ k3 = (k1 + k2 + 1) >> 1; -+ v = (k3 + v + 1) >> 1; - } - -- d[col&7] = (kernel >> 3); -- across_skip_convolve: -+ d[col & 3] = v; - - if (col >= 2) -- p_dst[col-2] = d[(col-2)&7]; -+ p_dst[col - 2] = d[(col - 2) & 3]; - } - - /* handle the last two pixels */ -- p_dst[col-2] = d[(col-2)&7]; -- p_dst[col-1] = d[(col-1)&7]; -- -+ p_dst[col - 2] = d[(col - 2) & 3]; -+ p_dst[col - 1] = d[(col - 1) & 3]; - - /* next row */ -- src_ptr += pitch; -- dst_ptr += pitch; -+ src_ptr += src_pixels_per_line; -+ dst_ptr += dst_pixels_per_line; - } - } - -@@ -240,8 +233,9 @@ void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int co - for (i = -8; i<0; i++) - s[i]=s[0]; - -- // 17 avoids valgrind warning - we buffer values in c in d -- // and only write them when we've read 8 ahead... -+ /* 17 avoids valgrind warning - we buffer values in c in d -+ * and only write them when we've read 8 ahead... -+ */ - for (i = cols; iy_buffer, post->y_buffer, source->y_stride, post->y_stride, source->y_height, source->y_width, ppl); -- vp8_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); -- vp8_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); -- -- vp8_post_proc_down_and_across(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); -- vp8_post_proc_down_and_across(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); -- -+ vp8_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, -+ post->y_width, q2mbl(q)); -+ vp8_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, -+ post->y_width, q2mbl(q)); - } - --void vp8_deblock(YV12_BUFFER_CONFIG *source, -+void vp8_deblock(VP8_COMMON *cm, -+ YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, - int q, - int low_var_thresh, -@@ -351,16 +332,64 @@ void vp8_deblock(YV12_BUFFER_CONFIG *source, - { - double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; - int ppl = (int)(level + .5); -+ -+ const MODE_INFO *mode_info_context = cm->mi; -+ int mbr, mbc; -+ -+ /* The pixel thresholds are adjusted according to if or not the macroblock -+ * is a skipped block. */ -+ unsigned char *ylimits = cm->pp_limits_buffer; -+ unsigned char *uvlimits = cm->pp_limits_buffer + 16 * cm->mb_cols; - (void) low_var_thresh; - (void) flag; - -- vp8_post_proc_down_and_across(source->y_buffer, post->y_buffer, source->y_stride, post->y_stride, source->y_height, source->y_width, ppl); -- vp8_post_proc_down_and_across(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); -- vp8_post_proc_down_and_across(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); -+ if (ppl > 0) -+ { -+ for (mbr = 0; mbr < cm->mb_rows; mbr++) -+ { -+ unsigned char *ylptr = ylimits; -+ unsigned char *uvlptr = uvlimits; -+ for (mbc = 0; mbc < cm->mb_cols; mbc++) -+ { -+ unsigned char mb_ppl; -+ -+ if (mode_info_context->mbmi.mb_skip_coeff) -+ mb_ppl = (unsigned char)ppl >> 1; -+ else -+ mb_ppl = (unsigned char)ppl; -+ -+ vpx_memset(ylptr, mb_ppl, 16); -+ vpx_memset(uvlptr, mb_ppl, 8); -+ -+ ylptr += 16; -+ uvlptr += 8; -+ mode_info_context++; -+ } -+ mode_info_context++; -+ -+ vp8_post_proc_down_and_across_mb_row( -+ source->y_buffer + 16 * mbr * source->y_stride, -+ post->y_buffer + 16 * mbr * post->y_stride, source->y_stride, -+ post->y_stride, source->y_width, ylimits, 16); -+ -+ vp8_post_proc_down_and_across_mb_row( -+ source->u_buffer + 8 * mbr * source->uv_stride, -+ post->u_buffer + 8 * mbr * post->uv_stride, source->uv_stride, -+ post->uv_stride, source->uv_width, uvlimits, 8); -+ vp8_post_proc_down_and_across_mb_row( -+ source->v_buffer + 8 * mbr * source->uv_stride, -+ post->v_buffer + 8 * mbr * post->uv_stride, source->uv_stride, -+ post->uv_stride, source->uv_width, uvlimits, 8); -+ } -+ } else -+ { -+ vp8_yv12_copy_frame(source, post); -+ } - } - - #if !(CONFIG_TEMPORAL_DENOISING) --void vp8_de_noise(YV12_BUFFER_CONFIG *source, -+void vp8_de_noise(VP8_COMMON *cm, -+ YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, - int q, - int low_var_thresh, -@@ -368,33 +397,33 @@ void vp8_de_noise(YV12_BUFFER_CONFIG *source, - { - double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; - int ppl = (int)(level + .5); -+ int mb_rows = source->y_width >> 4; -+ int mb_cols = source->y_height >> 4; -+ unsigned char *limits = cm->pp_limits_buffer;; -+ int mbr, mbc; - (void) post; - (void) low_var_thresh; - (void) flag; - -- vp8_post_proc_down_and_across( -- source->y_buffer + 2 * source->y_stride + 2, -- source->y_buffer + 2 * source->y_stride + 2, -- source->y_stride, -- source->y_stride, -- source->y_height - 4, -- source->y_width - 4, -- ppl); -- vp8_post_proc_down_and_across( -- source->u_buffer + 2 * source->uv_stride + 2, -- source->u_buffer + 2 * source->uv_stride + 2, -- source->uv_stride, -- source->uv_stride, -- source->uv_height - 4, -- source->uv_width - 4, ppl); -- vp8_post_proc_down_and_across( -- source->v_buffer + 2 * source->uv_stride + 2, -- source->v_buffer + 2 * source->uv_stride + 2, -- source->uv_stride, -- source->uv_stride, -- source->uv_height - 4, -- source->uv_width - 4, ppl); -+ vpx_memset(limits, (unsigned char)ppl, 16 * mb_cols); - -+ /* TODO: The original code don't filter the 2 outer rows and columns. */ -+ for (mbr = 0; mbr < mb_rows; mbr++) -+ { -+ vp8_post_proc_down_and_across_mb_row( -+ source->y_buffer + 16 * mbr * source->y_stride, -+ source->y_buffer + 16 * mbr * source->y_stride, -+ source->y_stride, source->y_stride, source->y_width, limits, 16); -+ -+ vp8_post_proc_down_and_across_mb_row( -+ source->u_buffer + 8 * mbr * source->uv_stride, -+ source->u_buffer + 8 * mbr * source->uv_stride, -+ source->uv_stride, source->uv_stride, source->uv_width, limits, 8); -+ vp8_post_proc_down_and_across_mb_row( -+ source->v_buffer + 8 * mbr * source->uv_stride, -+ source->v_buffer + 8 * mbr * source->uv_stride, -+ source->uv_stride, source->uv_stride, source->uv_width, limits, 8); -+ } - } - #endif - -@@ -441,7 +470,7 @@ static void fillrd(struct postproc_state *state, int q, int a) - - } - -- for (next = next; next < 256; next++) -+ for (; next < 256; next++) - char_dist[next] = 0; - - } -@@ -731,21 +760,21 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t - - oci->post_proc_buffer_int_used = 1; - -- // insure that postproc is set to all 0's so that post proc -- // doesn't pull random data in from edge -+ /* insure that postproc is set to all 0's so that post proc -+ * doesn't pull random data in from edge -+ */ - vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size); - - } - } - --#if ARCH_X86||ARCH_X86_64 -- vpx_reset_mmx_state(); --#endif -+ vp8_clear_system_state(); - - if ((flags & VP8D_MFQE) && - oci->postproc_state.last_frame_valid && - oci->current_video_frame >= 2 && -- oci->base_qindex - oci->postproc_state.last_base_qindex >= 10) -+ oci->postproc_state.last_base_qindex < 60 && -+ oci->base_qindex - oci->postproc_state.last_base_qindex >= 20) - { - vp8_multiframe_quality_enhance(oci); - if (((flags & VP8D_DEBLOCK) || (flags & VP8D_DEMACROBLOCK)) && -@@ -754,12 +783,14 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t - vp8_yv12_copy_frame(&oci->post_proc_buffer, &oci->post_proc_buffer_int); - if (flags & VP8D_DEMACROBLOCK) - { -- vp8_deblock_and_de_macro_block(&oci->post_proc_buffer_int, &oci->post_proc_buffer, -+ vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer, - q + (deblock_level - 5) * 10, 1, 0); -+ vp8_de_mblock(&oci->post_proc_buffer, -+ q + (deblock_level - 5) * 10); - } - else if (flags & VP8D_DEBLOCK) - { -- vp8_deblock(&oci->post_proc_buffer_int, &oci->post_proc_buffer, -+ vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer, - q, 1, 0); - } - } -@@ -768,13 +799,15 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t - } - else if (flags & VP8D_DEMACROBLOCK) - { -- vp8_deblock_and_de_macro_block(oci->frame_to_show, &oci->post_proc_buffer, -- q + (deblock_level - 5) * 10, 1, 0); -+ vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer, -+ q + (deblock_level - 5) * 10, 1, 0); -+ vp8_de_mblock(&oci->post_proc_buffer, q + (deblock_level - 5) * 10); -+ - oci->postproc_state.last_base_qindex = oci->base_qindex; - } - else if (flags & VP8D_DEBLOCK) - { -- vp8_deblock(oci->frame_to_show, &oci->post_proc_buffer, -+ vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer, - q, 1, 0); - oci->postproc_state.last_base_qindex = oci->base_qindex; - } -diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h -index 6ac788c..495a2c9 100644 ---- a/vp8/common/postproc.h -+++ b/vp8/common/postproc.h -@@ -30,13 +30,15 @@ int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest, - vp8_ppflags_t *flags); - - --void vp8_de_noise(YV12_BUFFER_CONFIG *source, -+void vp8_de_noise(struct VP8Common *oci, -+ YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, - int q, - int low_var_thresh, - int flag); - --void vp8_deblock(YV12_BUFFER_CONFIG *source, -+void vp8_deblock(struct VP8Common *oci, -+ YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, - int q, - int low_var_thresh, -diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c -index 7046a63..87f4cac 100644 ---- a/vp8/common/ppc/systemdependent.c -+++ b/vp8/common/ppc/systemdependent.c -@@ -19,14 +19,14 @@ void (*vp8_short_idct4x4)(short *input, short *output, int pitch); - void (*vp8_short_idct4x4_1)(short *input, short *output, int pitch); - void (*vp8_dc_only_idct)(short input_dc, short *output, int pitch); - --extern void (*vp8_post_proc_down_and_across)( -+extern void (*vp8_post_proc_down_and_across_mb_row)( - unsigned char *src_ptr, - unsigned char *dst_ptr, - int src_pixels_per_line, - int dst_pixels_per_line, -- int rows, - int cols, -- int flimit -+ unsigned char *f, -+ int size - ); - - extern void (*vp8_mbpost_proc_down)(unsigned char *dst, int pitch, int rows, int cols, int flimit); -@@ -34,15 +34,15 @@ extern void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int - extern void (*vp8_mbpost_proc_across_ip)(unsigned char *src, int pitch, int rows, int cols, int flimit); - extern void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int cols, int flimit); - --extern void vp8_post_proc_down_and_across_c -+extern void vp8_post_proc_down_and_across_mb_row_c - ( - unsigned char *src_ptr, - unsigned char *dst_ptr, - int src_pixels_per_line, - int dst_pixels_per_line, -- int rows, - int cols, -- int flimit -+ unsigned char *f, -+ int size - ); - void vp8_plane_add_noise_c(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int q, int a); - -@@ -158,7 +158,7 @@ void vp8_machine_specific_config(void) - vp8_lf_mbhsimple = loop_filter_mbhs_ppc; - vp8_lf_bhsimple = loop_filter_bhs_ppc; - -- vp8_post_proc_down_and_across = vp8_post_proc_down_and_across_c; -+ vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_c; - vp8_mbpost_proc_down = vp8_mbpost_proc_down_c; - vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_c; - vp8_plane_add_noise = vp8_plane_add_noise_c; -diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c -index e9833fe..05f9210 100644 ---- a/vp8/common/quant_common.c -+++ b/vp8/common/quant_common.c -@@ -109,7 +109,10 @@ int vp8_ac2quant(int QIndex, int Delta) - else if (QIndex < 0) - QIndex = 0; - -- retval = (ac_qlookup[ QIndex ] * 155) / 100; -+ /* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16. -+ * The smallest precision for that is '(x*6349) >> 12' but 16 is a good -+ * word size. */ -+ retval = (ac_qlookup[ QIndex ] * 101581) >> 16; - - if (retval < 8) - retval = 8; -diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c -index dcc35ec..7bb8d0a 100644 ---- a/vp8/common/reconintra4x4.c -+++ b/vp8/common/reconintra4x4.c -@@ -13,11 +13,11 @@ - #include "vpx_rtcd.h" - #include "blockd.h" - --void vp8_intra4x4_predict_d_c(unsigned char *Above, -- unsigned char *yleft, int left_stride, -- int b_mode, -- unsigned char *dst, int dst_stride, -- unsigned char top_left) -+void vp8_intra4x4_predict_c(unsigned char *Above, -+ unsigned char *yleft, int left_stride, -+ B_PREDICTION_MODE b_mode, -+ unsigned char *dst, int dst_stride, -+ unsigned char top_left) - { - int i, r, c; - -@@ -290,19 +290,8 @@ void vp8_intra4x4_predict_d_c(unsigned char *Above, - } - break; - -+ default: -+ break; - - } - } -- --void vp8_intra4x4_predict_c(unsigned char *src, int src_stride, -- int b_mode, -- unsigned char *dst, int dst_stride) --{ -- unsigned char *Above = src - src_stride; -- -- vp8_intra4x4_predict_d_c(Above, -- src - 1, src_stride, -- b_mode, -- dst, dst_stride, -- Above[-1]); --} -diff --git a/vp8/common/rtcd.c b/vp8/common/rtcd.c -index 232640d..01dad46 100644 ---- a/vp8/common/rtcd.c -+++ b/vp8/common/rtcd.c -@@ -10,3 +10,96 @@ - #include "vpx_config.h" - #define RTCD_C - #include "vpx_rtcd.h" -+ -+#if CONFIG_MULTITHREAD && defined(_WIN32) -+#include -+#include -+static void once(void (*func)(void)) -+{ -+ static CRITICAL_SECTION *lock; -+ static LONG waiters; -+ static int done; -+ void *lock_ptr = &lock; -+ -+ /* If the initialization is complete, return early. This isn't just an -+ * optimization, it prevents races on the destruction of the global -+ * lock. -+ */ -+ if(done) -+ return; -+ -+ InterlockedIncrement(&waiters); -+ -+ /* Get a lock. We create one and try to make it the one-true-lock, -+ * throwing it away if we lost the race. -+ */ -+ -+ { -+ /* Scope to protect access to new_lock */ -+ CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION)); -+ InitializeCriticalSection(new_lock); -+ if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL) -+ { -+ DeleteCriticalSection(new_lock); -+ free(new_lock); -+ } -+ } -+ -+ /* At this point, we have a lock that can be synchronized on. We don't -+ * care which thread actually performed the allocation. -+ */ -+ -+ EnterCriticalSection(lock); -+ -+ if (!done) -+ { -+ func(); -+ done = 1; -+ } -+ -+ LeaveCriticalSection(lock); -+ -+ /* Last one out should free resources. The destructed objects are -+ * protected by checking if(done) above. -+ */ -+ if(!InterlockedDecrement(&waiters)) -+ { -+ DeleteCriticalSection(lock); -+ free(lock); -+ lock = NULL; -+ } -+} -+ -+ -+#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H -+#include -+static void once(void (*func)(void)) -+{ -+ static pthread_once_t lock = PTHREAD_ONCE_INIT; -+ pthread_once(&lock, func); -+} -+ -+ -+#else -+/* No-op version that performs no synchronization. vpx_rtcd() is idempotent, -+ * so as long as your platform provides atomic loads/stores of pointers -+ * no synchronization is strictly necessary. -+ */ -+ -+static void once(void (*func)(void)) -+{ -+ static int done; -+ -+ if(!done) -+ { -+ func(); -+ done = 1; -+ } -+} -+#endif -+ -+ -+void vpx_rtcd() -+{ -+ once(setup_rtcd_internal); -+} -diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh -index 33bf08b..0f950f8 100644 ---- a/vp8/common/rtcd_defs.sh -+++ b/vp8/common/rtcd_defs.sh -@@ -1,5 +1,7 @@ - common_forward_decls() { - cat < - #include - #include "vpx_config.h" - #include "vpx/vpx_integer.h" - --static --unsigned int sad_mx_n_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- int max_sad, -- int m, -- int n) -+static unsigned int sad_mx_n_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned int max_sad, int m, int n) - { -- - int r, c; - unsigned int sad = 0; - -@@ -48,298 +42,211 @@ unsigned int sad_mx_n_c( - * implementations of these functions are not required to check it. - */ - --unsigned int vp8_sad16x16_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- int max_sad) -+unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned int max_sad) - { -- - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 16); - } - -- --unsigned int vp8_sad8x8_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- int max_sad) -+unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned int max_sad) - { -- - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 8); - } - -- --unsigned int vp8_sad16x8_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- int max_sad) -+unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned int max_sad) - { -- - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 8); - - } - -- --unsigned int vp8_sad8x16_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- int max_sad) -+unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned int max_sad) - { -- - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 16); - } - -- --unsigned int vp8_sad4x4_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- int max_sad) -+unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned int max_sad) - { -- - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 4, 4); - } - --void vp8_sad16x16x3_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- unsigned int *sad_array --) -+void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned int *sad_array) - { -- sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); -- sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); -- sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); -+ sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); -+ sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); -+ sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); - } - --void vp8_sad16x16x8_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- unsigned short *sad_array --) -+void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned short *sad_array) - { -- sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); -- sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); -- sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); -- sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); -- sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); -- sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); -- sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); -- sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); -+ sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); -+ sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); -+ sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); -+ sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); -+ sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); -+ sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); -+ sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); -+ sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); - } - --void vp8_sad16x8x3_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- unsigned int *sad_array --) -+void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned int *sad_array) - { -- sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); -- sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); -- sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); -+ sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); -+ sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); -+ sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); - } - --void vp8_sad16x8x8_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- unsigned short *sad_array --) -+void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned short *sad_array) - { -- sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); -- sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); -- sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); -- sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); -- sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); -- sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); -- sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); -- sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); -+ sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); -+ sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); -+ sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); -+ sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); -+ sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); -+ sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); -+ sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); -+ sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); - } - --void vp8_sad8x8x3_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- unsigned int *sad_array --) -+void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned int *sad_array) - { -- sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); -- sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); -- sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); -+ sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); -+ sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); -+ sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); - } - --void vp8_sad8x8x8_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- unsigned short *sad_array --) -+void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned short *sad_array) - { -- sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); -- sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); -- sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); -- sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); -- sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); -- sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); -- sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); -- sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); -+ sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); -+ sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); -+ sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); -+ sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); -+ sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); -+ sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); -+ sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); -+ sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); - } - --void vp8_sad8x16x3_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- unsigned int *sad_array --) -+void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned int *sad_array) - { -- sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); -- sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); -- sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); -+ sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); -+ sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); -+ sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); - } - --void vp8_sad8x16x8_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- unsigned short *sad_array --) -+void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned short *sad_array) - { -- sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); -- sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); -- sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); -- sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); -- sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); -- sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); -- sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); -- sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); -+ sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); -+ sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); -+ sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); -+ sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); -+ sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); -+ sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); -+ sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); -+ sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); - } - --void vp8_sad4x4x3_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- unsigned int *sad_array --) -+void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned int *sad_array) - { -- sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); -- sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); -- sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); -+ sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); -+ sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); -+ sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); - } - --void vp8_sad4x4x8_c( -- const unsigned char *src_ptr, -- int src_stride, -- const unsigned char *ref_ptr, -- int ref_stride, -- unsigned short *sad_array --) -+void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char *ref_ptr, int ref_stride, -+ unsigned short *sad_array) - { -- sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); -- sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); -- sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); -- sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); -- sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); -- sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); -- sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); -- sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); -+ sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); -+ sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); -+ sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); -+ sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); -+ sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); -+ sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); -+ sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); -+ sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); - } - --void vp8_sad16x16x4d_c( -- const unsigned char *src_ptr, -- int src_stride, -- unsigned char *ref_ptr[], -- int ref_stride, -- unsigned int *sad_array --) -+void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char * const ref_ptr[], int ref_stride, -+ unsigned int *sad_array) - { -- sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); -- sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); -- sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); -- sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); -+ sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); -+ sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); -+ sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); -+ sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); - } - --void vp8_sad16x8x4d_c( -- const unsigned char *src_ptr, -- int src_stride, -- unsigned char *ref_ptr[], -- int ref_stride, -- unsigned int *sad_array --) -+void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char * const ref_ptr[], int ref_stride, -+ unsigned int *sad_array) - { -- sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); -- sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); -- sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); -- sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); -+ sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); -+ sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); -+ sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); -+ sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); - } - --void vp8_sad8x8x4d_c( -- const unsigned char *src_ptr, -- int src_stride, -- unsigned char *ref_ptr[], -- int ref_stride, -- unsigned int *sad_array --) -+void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char * const ref_ptr[], int ref_stride, -+ unsigned int *sad_array) - { -- sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); -- sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); -- sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); -- sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); -+ sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); -+ sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); -+ sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); -+ sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); - } - --void vp8_sad8x16x4d_c( -- const unsigned char *src_ptr, -- int src_stride, -- unsigned char *ref_ptr[], -- int ref_stride, -- unsigned int *sad_array --) -+void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char * const ref_ptr[], int ref_stride, -+ unsigned int *sad_array) - { -- sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); -- sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); -- sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); -- sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); -+ sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); -+ sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); -+ sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); -+ sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); - } - --void vp8_sad4x4x4d_c( -- const unsigned char *src_ptr, -- int src_stride, -- unsigned char *ref_ptr[], -- int ref_stride, -- unsigned int *sad_array --) -+void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, -+ const unsigned char * const ref_ptr[], int ref_stride, -+ unsigned int *sad_array) - { -- sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); -- sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); -- sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); -- sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); -+ sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); -+ sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); -+ sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); -+ sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); - } - - /* Copy 2 macroblocks to a buffer */ --void vp8_copy32xn_c( -- unsigned char *src_ptr, -- int src_stride, -- unsigned char *dst_ptr, -- int dst_stride, -- int height) -+void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride, -+ unsigned char *dst_ptr, int dst_stride, -+ int height) - { - int r; - -diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c -index 7976e25..60afe51 100644 ---- a/vp8/common/setupintrarecon.c -+++ b/vp8/common/setupintrarecon.c -@@ -30,3 +30,10 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf) - ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129; - - } -+ -+void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf) -+{ -+ vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); -+ vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); -+ vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); -+} -diff --git a/vp8/common/setupintrarecon.h b/vp8/common/setupintrarecon.h -index 5264fd0..e515c3a 100644 ---- a/vp8/common/setupintrarecon.h -+++ b/vp8/common/setupintrarecon.h -@@ -11,3 +11,23 @@ - - #include "vpx_scale/yv12config.h" - extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf); -+extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf); -+ -+static -+void setup_intra_recon_left(unsigned char *y_buffer, -+ unsigned char *u_buffer, -+ unsigned char *v_buffer, -+ int y_stride, -+ int uv_stride) -+{ -+ int i; -+ -+ for (i = 0; i < 16; i++) -+ y_buffer[y_stride *i] = (unsigned char) 129; -+ -+ for (i = 0; i < 8; i++) -+ u_buffer[uv_stride *i] = (unsigned char) 129; -+ -+ for (i = 0; i < 8; i++) -+ v_buffer[uv_stride *i] = (unsigned char) 129; -+} -diff --git a/vp8/common/variance.h b/vp8/common/variance.h -index b77aa28..01193b8 100644 ---- a/vp8/common/variance.h -+++ b/vp8/common/variance.h -@@ -12,14 +12,14 @@ - #ifndef VARIANCE_H - #define VARIANCE_H - --typedef unsigned int(*vp8_sad_fn_t) -- ( -+#include "vpx_config.h" -+ -+typedef unsigned int(*vp8_sad_fn_t)( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int ref_stride, -- int max_sad -- ); -+ unsigned int max_sad); - - typedef void (*vp8_copy32xn_fn_t)( - const unsigned char *src_ptr, -@@ -48,7 +48,7 @@ typedef void (*vp8_sad_multi_d_fn_t) - ( - const unsigned char *src_ptr, - int source_stride, -- unsigned char *ref_ptr[4], -+ const unsigned char * const ref_ptr[], - int ref_stride, - unsigned int *sad_array - ); -diff --git a/vp8/common/variance_c.c b/vp8/common/variance_c.c -index 996404d..da08aff 100644 ---- a/vp8/common/variance_c.c -+++ b/vp8/common/variance_c.c -@@ -205,14 +205,14 @@ static void var_filter_block2d_bil_first_pass - { - for (j = 0; j < output_width; j++) - { -- // Apply bilinear filter -+ /* Apply bilinear filter */ - output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + - ((int)src_ptr[pixel_step] * vp8_filter[1]) + - (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; - src_ptr++; - } - -- // Next row... -+ /* Next row... */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -@@ -264,15 +264,15 @@ static void var_filter_block2d_bil_second_pass - { - for (j = 0; j < output_width; j++) - { -- // Apply filter -- Temp = ((int)src_ptr[0] * vp8_filter[0]) + -+ /* Apply filter */ -+ Temp = ((int)src_ptr[0] * vp8_filter[0]) + - ((int)src_ptr[pixel_step] * vp8_filter[1]) + - (VP8_FILTER_WEIGHT / 2); - output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); - src_ptr++; - } - -- // Next row... -+ /* Next row... */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -@@ -292,15 +292,15 @@ unsigned int vp8_sub_pixel_variance4x4_c - { - unsigned char temp2[20*16]; - const short *HFilter, *VFilter; -- unsigned short FData3[5*4]; // Temp data bufffer used in filtering -+ unsigned short FData3[5*4]; /* Temp data bufffer used in filtering */ - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - -- // First filter 1d Horizontal -+ /* First filter 1d Horizontal */ - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter); - -- // Now filter Verticaly -+ /* Now filter Verticaly */ - var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); - - return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); -@@ -318,7 +318,7 @@ unsigned int vp8_sub_pixel_variance8x8_c - unsigned int *sse - ) - { -- unsigned short FData3[9*8]; // Temp data bufffer used in filtering -+ unsigned short FData3[9*8]; /* Temp data bufffer used in filtering */ - unsigned char temp2[20*16]; - const short *HFilter, *VFilter; - -@@ -342,7 +342,7 @@ unsigned int vp8_sub_pixel_variance16x16_c - unsigned int *sse - ) - { -- unsigned short FData3[17*16]; // Temp data bufffer used in filtering -+ unsigned short FData3[17*16]; /* Temp data bufffer used in filtering */ - unsigned char temp2[20*16]; - const short *HFilter, *VFilter; - -@@ -418,7 +418,7 @@ unsigned int vp8_sub_pixel_variance16x8_c - unsigned int *sse - ) - { -- unsigned short FData3[16*9]; // Temp data bufffer used in filtering -+ unsigned short FData3[16*9]; /* Temp data bufffer used in filtering */ - unsigned char temp2[20*16]; - const short *HFilter, *VFilter; - -@@ -442,7 +442,7 @@ unsigned int vp8_sub_pixel_variance8x16_c - unsigned int *sse - ) - { -- unsigned short FData3[9*16]; // Temp data bufffer used in filtering -+ unsigned short FData3[9*16]; /* Temp data bufffer used in filtering */ - unsigned char temp2[20*16]; - const short *HFilter, *VFilter; - -diff --git a/vp8/common/vp8_entropymodedata.h b/vp8/common/vp8_entropymodedata.h -old mode 100755 -new mode 100644 -diff --git a/vp8/common/x86/dequantize_mmx.asm b/vp8/common/x86/dequantize_mmx.asm -index de9eba8..4e551f0 100644 ---- a/vp8/common/x86/dequantize_mmx.asm -+++ b/vp8/common/x86/dequantize_mmx.asm -@@ -13,7 +13,7 @@ - - - ;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q) --global sym(vp8_dequantize_b_impl_mmx) -+global sym(vp8_dequantize_b_impl_mmx) PRIVATE - sym(vp8_dequantize_b_impl_mmx): - push rbp - mov rbp, rsp -@@ -55,7 +55,7 @@ sym(vp8_dequantize_b_impl_mmx): - ;short *dq, 1 - ;unsigned char *dest, 2 - ;int stride) 3 --global sym(vp8_dequant_idct_add_mmx) -+global sym(vp8_dequant_idct_add_mmx) PRIVATE - sym(vp8_dequant_idct_add_mmx): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm -index 0c9c205..96fa2c6 100644 ---- a/vp8/common/x86/idctllm_mmx.asm -+++ b/vp8/common/x86/idctllm_mmx.asm -@@ -34,7 +34,7 @@ - - ;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, - ;int pitch, unsigned char *dest,int stride) --global sym(vp8_short_idct4x4llm_mmx) -+global sym(vp8_short_idct4x4llm_mmx) PRIVATE - sym(vp8_short_idct4x4llm_mmx): - push rbp - mov rbp, rsp -@@ -224,7 +224,7 @@ sym(vp8_short_idct4x4llm_mmx): - ;int pred_stride, - ;unsigned char *dst_ptr, - ;int stride) --global sym(vp8_dc_only_idct_add_mmx) -+global sym(vp8_dc_only_idct_add_mmx) PRIVATE - sym(vp8_dc_only_idct_add_mmx): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/idctllm_mmx_test.cc b/vp8/common/x86/idctllm_mmx_test.cc -deleted file mode 100755 -index 8c11533..0000000 ---- a/vp8/common/x86/idctllm_mmx_test.cc -+++ /dev/null -@@ -1,31 +0,0 @@ --/* -- * Copyright (c) 2010 The WebM project authors. All Rights Reserved. -- * -- * Use of this source code is governed by a BSD-style license -- * that can be found in the LICENSE file in the root of the source -- * tree. An additional intellectual property rights grant can be found -- * in the file PATENTS. All contributing project authors may -- * be found in the AUTHORS file in the root of the source tree. -- */ -- -- -- extern "C" { -- void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred_ptr, -- int pred_stride, unsigned char *dst_ptr, -- int dst_stride); --} -- --#include "vp8/common/idctllm_test.h" -- --namespace --{ -- --INSTANTIATE_TEST_CASE_P(MMX, IDCTTest, -- ::testing::Values(vp8_short_idct4x4llm_mmx)); -- --} // namespace -- --int main(int argc, char **argv) { -- ::testing::InitGoogleTest(&argc, argv); -- return RUN_ALL_TESTS(); --} -diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm -index abeb0b6..bf8e2c4 100644 ---- a/vp8/common/x86/idctllm_sse2.asm -+++ b/vp8/common/x86/idctllm_sse2.asm -@@ -19,7 +19,7 @@ - ; int dst_stride - 3 - ; ) - --global sym(vp8_idct_dequant_0_2x_sse2) -+global sym(vp8_idct_dequant_0_2x_sse2) PRIVATE - sym(vp8_idct_dequant_0_2x_sse2): - push rbp - mov rbp, rsp -@@ -101,7 +101,7 @@ sym(vp8_idct_dequant_0_2x_sse2): - ; unsigned char *dst - 2 - ; int dst_stride - 3 - ; ) --global sym(vp8_idct_dequant_full_2x_sse2) -+global sym(vp8_idct_dequant_full_2x_sse2) PRIVATE - sym(vp8_idct_dequant_full_2x_sse2): - push rbp - mov rbp, rsp -@@ -358,7 +358,7 @@ sym(vp8_idct_dequant_full_2x_sse2): - ; int dst_stride - 3 - ; short *dc - 4 - ; ) --global sym(vp8_idct_dequant_dc_0_2x_sse2) -+global sym(vp8_idct_dequant_dc_0_2x_sse2) PRIVATE - sym(vp8_idct_dequant_dc_0_2x_sse2): - push rbp - mov rbp, rsp -@@ -434,7 +434,7 @@ sym(vp8_idct_dequant_dc_0_2x_sse2): - ; int dst_stride - 3 - ; short *dc - 4 - ; ) --global sym(vp8_idct_dequant_dc_full_2x_sse2) -+global sym(vp8_idct_dequant_dc_full_2x_sse2) PRIVATE - sym(vp8_idct_dequant_dc_full_2x_sse2): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm -index 6582687..4aac094 100644 ---- a/vp8/common/x86/iwalsh_mmx.asm -+++ b/vp8/common/x86/iwalsh_mmx.asm -@@ -12,7 +12,7 @@ - %include "vpx_ports/x86_abi_support.asm" - - ;void vp8_short_inv_walsh4x4_mmx(short *input, short *output) --global sym(vp8_short_inv_walsh4x4_mmx) -+global sym(vp8_short_inv_walsh4x4_mmx) PRIVATE - sym(vp8_short_inv_walsh4x4_mmx): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm -index 51cb5e2..06e86a8 100644 ---- a/vp8/common/x86/iwalsh_sse2.asm -+++ b/vp8/common/x86/iwalsh_sse2.asm -@@ -12,7 +12,7 @@ - %include "vpx_ports/x86_abi_support.asm" - - ;void vp8_short_inv_walsh4x4_sse2(short *input, short *output) --global sym(vp8_short_inv_walsh4x4_sse2) -+global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE - sym(vp8_short_inv_walsh4x4_sse2): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/loopfilter_block_sse2.asm b/vp8/common/x86/loopfilter_block_sse2.asm -index 4918eb5..3d45c61 100644 ---- a/vp8/common/x86/loopfilter_block_sse2.asm -+++ b/vp8/common/x86/loopfilter_block_sse2.asm -@@ -133,7 +133,7 @@ - ; const char *limit, - ; const char *thresh - ;) --global sym(vp8_loop_filter_bh_y_sse2) -+global sym(vp8_loop_filter_bh_y_sse2) PRIVATE - sym(vp8_loop_filter_bh_y_sse2): - - %ifidn __OUTPUT_FORMAT__,x64 -@@ -150,6 +150,7 @@ sym(vp8_loop_filter_bh_y_sse2): - - push rbp - mov rbp, rsp -+ SAVE_XMM 11 - push r12 - push r13 - mov thresh, arg(4) -@@ -258,6 +259,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 - %ifidn __OUTPUT_FORMAT__,x64 - pop r13 - pop r12 -+ RESTORE_XMM - pop rbp - %endif - -@@ -273,7 +275,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 - ; const char *thresh - ;) - --global sym(vp8_loop_filter_bv_y_sse2) -+global sym(vp8_loop_filter_bv_y_sse2) PRIVATE - sym(vp8_loop_filter_bv_y_sse2): - - %ifidn __OUTPUT_FORMAT__,x64 -diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm -index 697a5de..f388d24 100644 ---- a/vp8/common/x86/loopfilter_mmx.asm -+++ b/vp8/common/x86/loopfilter_mmx.asm -@@ -21,7 +21,7 @@ - ; const char *thresh, - ; int count - ;) --global sym(vp8_loop_filter_horizontal_edge_mmx) -+global sym(vp8_loop_filter_horizontal_edge_mmx) PRIVATE - sym(vp8_loop_filter_horizontal_edge_mmx): - push rbp - mov rbp, rsp -@@ -233,7 +233,7 @@ sym(vp8_loop_filter_horizontal_edge_mmx): - ; const char *thresh, - ; int count - ;) --global sym(vp8_loop_filter_vertical_edge_mmx) -+global sym(vp8_loop_filter_vertical_edge_mmx) PRIVATE - sym(vp8_loop_filter_vertical_edge_mmx): - push rbp - mov rbp, rsp -@@ -603,7 +603,7 @@ sym(vp8_loop_filter_vertical_edge_mmx): - ; const char *thresh, - ; int count - ;) --global sym(vp8_mbloop_filter_horizontal_edge_mmx) -+global sym(vp8_mbloop_filter_horizontal_edge_mmx) PRIVATE - sym(vp8_mbloop_filter_horizontal_edge_mmx): - push rbp - mov rbp, rsp -@@ -920,7 +920,7 @@ sym(vp8_mbloop_filter_horizontal_edge_mmx): - ; const char *thresh, - ; int count - ;) --global sym(vp8_mbloop_filter_vertical_edge_mmx) -+global sym(vp8_mbloop_filter_vertical_edge_mmx) PRIVATE - sym(vp8_mbloop_filter_vertical_edge_mmx): - push rbp - mov rbp, rsp -@@ -1384,7 +1384,7 @@ sym(vp8_mbloop_filter_vertical_edge_mmx): - ; int src_pixel_step, - ; const char *blimit - ;) --global sym(vp8_loop_filter_simple_horizontal_edge_mmx) -+global sym(vp8_loop_filter_simple_horizontal_edge_mmx) PRIVATE - sym(vp8_loop_filter_simple_horizontal_edge_mmx): - push rbp - mov rbp, rsp -@@ -1500,7 +1500,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx): - ; int src_pixel_step, - ; const char *blimit - ;) --global sym(vp8_loop_filter_simple_vertical_edge_mmx) -+global sym(vp8_loop_filter_simple_vertical_edge_mmx) PRIVATE - sym(vp8_loop_filter_simple_vertical_edge_mmx): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm -index 9944c33..a66753b 100644 ---- a/vp8/common/x86/loopfilter_sse2.asm -+++ b/vp8/common/x86/loopfilter_sse2.asm -@@ -286,7 +286,7 @@ - ; const char *limit, - ; const char *thresh, - ;) --global sym(vp8_loop_filter_horizontal_edge_sse2) -+global sym(vp8_loop_filter_horizontal_edge_sse2) PRIVATE - sym(vp8_loop_filter_horizontal_edge_sse2): - push rbp - mov rbp, rsp -@@ -334,7 +334,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2): - ; const char *thresh, - ; int count - ;) --global sym(vp8_loop_filter_horizontal_edge_uv_sse2) -+global sym(vp8_loop_filter_horizontal_edge_uv_sse2) PRIVATE - sym(vp8_loop_filter_horizontal_edge_uv_sse2): - push rbp - mov rbp, rsp -@@ -561,7 +561,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2): - ; const char *limit, - ; const char *thresh, - ;) --global sym(vp8_mbloop_filter_horizontal_edge_sse2) -+global sym(vp8_mbloop_filter_horizontal_edge_sse2) PRIVATE - sym(vp8_mbloop_filter_horizontal_edge_sse2): - push rbp - mov rbp, rsp -@@ -607,7 +607,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2): - ; const char *thresh, - ; unsigned char *v - ;) --global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) -+global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) PRIVATE - sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): - push rbp - mov rbp, rsp -@@ -928,7 +928,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): - ; const char *limit, - ; const char *thresh, - ;) --global sym(vp8_loop_filter_vertical_edge_sse2) -+global sym(vp8_loop_filter_vertical_edge_sse2) PRIVATE - sym(vp8_loop_filter_vertical_edge_sse2): - push rbp - mov rbp, rsp -@@ -993,7 +993,7 @@ sym(vp8_loop_filter_vertical_edge_sse2): - ; const char *thresh, - ; unsigned char *v - ;) --global sym(vp8_loop_filter_vertical_edge_uv_sse2) -+global sym(vp8_loop_filter_vertical_edge_uv_sse2) PRIVATE - sym(vp8_loop_filter_vertical_edge_uv_sse2): - push rbp - mov rbp, rsp -@@ -1142,7 +1142,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2): - ; const char *limit, - ; const char *thresh, - ;) --global sym(vp8_mbloop_filter_vertical_edge_sse2) -+global sym(vp8_mbloop_filter_vertical_edge_sse2) PRIVATE - sym(vp8_mbloop_filter_vertical_edge_sse2): - push rbp - mov rbp, rsp -@@ -1209,7 +1209,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2): - ; const char *thresh, - ; unsigned char *v - ;) --global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) -+global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) PRIVATE - sym(vp8_mbloop_filter_vertical_edge_uv_sse2): - push rbp - mov rbp, rsp -@@ -1269,7 +1269,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2): - ; int src_pixel_step, - ; const char *blimit, - ;) --global sym(vp8_loop_filter_simple_horizontal_edge_sse2) -+global sym(vp8_loop_filter_simple_horizontal_edge_sse2) PRIVATE - sym(vp8_loop_filter_simple_horizontal_edge_sse2): - push rbp - mov rbp, rsp -@@ -1374,7 +1374,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): - ; int src_pixel_step, - ; const char *blimit, - ;) --global sym(vp8_loop_filter_simple_vertical_edge_sse2) -+global sym(vp8_loop_filter_simple_vertical_edge_sse2) PRIVATE - sym(vp8_loop_filter_simple_vertical_edge_sse2): - push rbp ; save old base pointer value. - mov rbp, rsp ; set new base pointer value. -diff --git a/vp8/common/x86/mfqe_sse2.asm b/vp8/common/x86/mfqe_sse2.asm -index 10d21f3..c1d2174 100644 ---- a/vp8/common/x86/mfqe_sse2.asm -+++ b/vp8/common/x86/mfqe_sse2.asm -@@ -19,7 +19,7 @@ - ; int dst_stride, - ; int src_weight - ;) --global sym(vp8_filter_by_weight16x16_sse2) -+global sym(vp8_filter_by_weight16x16_sse2) PRIVATE - sym(vp8_filter_by_weight16x16_sse2): - push rbp - mov rbp, rsp -@@ -97,7 +97,7 @@ sym(vp8_filter_by_weight16x16_sse2): - ; int dst_stride, - ; int src_weight - ;) --global sym(vp8_filter_by_weight8x8_sse2) -+global sym(vp8_filter_by_weight8x8_sse2) PRIVATE - sym(vp8_filter_by_weight8x8_sse2): - push rbp - mov rbp, rsp -@@ -165,7 +165,7 @@ sym(vp8_filter_by_weight8x8_sse2): - ; unsigned int *variance, 4 - ; unsigned int *sad, 5 - ;) --global sym(vp8_variance_and_sad_16x16_sse2) -+global sym(vp8_variance_and_sad_16x16_sse2) PRIVATE - sym(vp8_variance_and_sad_16x16_sse2): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm -index d24f740..966c586 100644 ---- a/vp8/common/x86/postproc_mmx.asm -+++ b/vp8/common/x86/postproc_mmx.asm -@@ -14,275 +14,10 @@ - %define VP8_FILTER_WEIGHT 128 - %define VP8_FILTER_SHIFT 7 - --;void vp8_post_proc_down_and_across_mmx --;( --; unsigned char *src_ptr, --; unsigned char *dst_ptr, --; int src_pixels_per_line, --; int dst_pixels_per_line, --; int rows, --; int cols, --; int flimit --;) --global sym(vp8_post_proc_down_and_across_mmx) --sym(vp8_post_proc_down_and_across_mmx): -- push rbp -- mov rbp, rsp -- SHADOW_ARGS_TO_STACK 7 -- GET_GOT rbx -- push rsi -- push rdi -- ; end prolog -- --%if ABI_IS_32BIT=1 && CONFIG_PIC=1 -- ; move the global rd onto the stack, since we don't have enough registers -- ; to do PIC addressing -- movq mm0, [GLOBAL(rd)] -- sub rsp, 8 -- movq [rsp], mm0 --%define RD [rsp] --%else --%define RD [GLOBAL(rd)] --%endif -- -- push rbx -- lea rbx, [GLOBAL(Blur)] -- movd mm2, dword ptr arg(6) ;flimit -- punpcklwd mm2, mm2 -- punpckldq mm2, mm2 -- -- mov rsi, arg(0) ;src_ptr -- mov rdi, arg(1) ;dst_ptr -- -- movsxd rcx, DWORD PTR arg(4) ;rows -- movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch? -- pxor mm0, mm0 ; mm0 = 00000000 -- --.nextrow: -- -- xor rdx, rdx ; clear out rdx for use as loop counter --.nextcol: -- -- pxor mm7, mm7 ; mm7 = 00000000 -- movq mm6, [rbx + 32 ] ; mm6 = kernel 2 taps -- movq mm3, [rsi] ; mm4 = r0 p0..p7 -- punpcklbw mm3, mm0 ; mm3 = p0..p3 -- movq mm1, mm3 ; mm1 = p0..p3 -- pmullw mm3, mm6 ; mm3 *= kernel 2 modifiers -- -- movq mm6, [rbx + 48] ; mm6 = kernel 3 taps -- movq mm5, [rsi + rax] ; mm4 = r1 p0..p7 -- punpcklbw mm5, mm0 ; mm5 = r1 p0..p3 -- pmullw mm6, mm5 ; mm6 *= p0..p3 * kernel 3 modifiers -- paddusw mm3, mm6 ; mm3 += mm6 -- -- ; thresholding -- movq mm7, mm1 ; mm7 = r0 p0..p3 -- psubusw mm7, mm5 ; mm7 = r0 p0..p3 - r1 p0..p3 -- psubusw mm5, mm1 ; mm5 = r1 p0..p3 - r0 p0..p3 -- paddusw mm7, mm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3) -- pcmpgtw mm7, mm2 -- -- movq mm6, [rbx + 64 ] ; mm6 = kernel 4 modifiers -- movq mm5, [rsi + 2*rax] ; mm4 = r2 p0..p7 -- punpcklbw mm5, mm0 ; mm5 = r2 p0..p3 -- pmullw mm6, mm5 ; mm5 *= kernel 4 modifiers -- paddusw mm3, mm6 ; mm3 += mm5 -- -- ; thresholding -- movq mm6, mm1 ; mm6 = r0 p0..p3 -- psubusw mm6, mm5 ; mm6 = r0 p0..p3 - r2 p0..p3 -- psubusw mm5, mm1 ; mm5 = r2 p0..p3 - r2 p0..p3 -- paddusw mm6, mm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3) -- pcmpgtw mm6, mm2 -- por mm7, mm6 ; accumulate thresholds -- -- -- neg rax -- movq mm6, [rbx ] ; kernel 0 taps -- movq mm5, [rsi+2*rax] ; mm4 = r-2 p0..p7 -- punpcklbw mm5, mm0 ; mm5 = r-2 p0..p3 -- pmullw mm6, mm5 ; mm5 *= kernel 0 modifiers -- paddusw mm3, mm6 ; mm3 += mm5 -- -- ; thresholding -- movq mm6, mm1 ; mm6 = r0 p0..p3 -- psubusw mm6, mm5 ; mm6 = p0..p3 - r-2 p0..p3 -- psubusw mm5, mm1 ; mm5 = r-2 p0..p3 - p0..p3 -- paddusw mm6, mm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3) -- pcmpgtw mm6, mm2 -- por mm7, mm6 ; accumulate thresholds -- -- movq mm6, [rbx + 16] ; kernel 1 taps -- movq mm4, [rsi+rax] ; mm4 = r-1 p0..p7 -- punpcklbw mm4, mm0 ; mm4 = r-1 p0..p3 -- pmullw mm6, mm4 ; mm4 *= kernel 1 modifiers. -- paddusw mm3, mm6 ; mm3 += mm5 -- -- ; thresholding -- movq mm6, mm1 ; mm6 = r0 p0..p3 -- psubusw mm6, mm4 ; mm6 = p0..p3 - r-2 p0..p3 -- psubusw mm4, mm1 ; mm5 = r-1 p0..p3 - p0..p3 -- paddusw mm6, mm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3) -- pcmpgtw mm6, mm2 -- por mm7, mm6 ; accumulate thresholds -- -- -- paddusw mm3, RD ; mm3 += round value -- psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 -- -- pand mm1, mm7 ; mm1 select vals > thresh from source -- pandn mm7, mm3 ; mm7 select vals < thresh from blurred result -- paddusw mm1, mm7 ; combination -- -- packuswb mm1, mm0 ; pack to bytes -- -- movd [rdi], mm1 ; -- neg rax ; pitch is positive -- -- -- add rsi, 4 -- add rdi, 4 -- add rdx, 4 -- -- cmp edx, dword ptr arg(5) ;cols -- jl .nextcol -- ; done with the all cols, start the across filtering in place -- sub rsi, rdx -- sub rdi, rdx -- -- ; dup the first byte into the left border 8 times -- movq mm1, [rdi] -- punpcklbw mm1, mm1 -- punpcklwd mm1, mm1 -- punpckldq mm1, mm1 -- -- mov rdx, -8 -- movq [rdi+rdx], mm1 -- -- ; dup the last byte into the right border -- movsxd rdx, dword arg(5) -- movq mm1, [rdi + rdx + -1] -- punpcklbw mm1, mm1 -- punpcklwd mm1, mm1 -- punpckldq mm1, mm1 -- movq [rdi+rdx], mm1 -- -- -- push rax -- xor rdx, rdx -- mov rax, [rdi-4]; -- --.acrossnextcol: -- pxor mm7, mm7 ; mm7 = 00000000 -- movq mm6, [rbx + 32 ] ; -- movq mm4, [rdi+rdx] ; mm4 = p0..p7 -- movq mm3, mm4 ; mm3 = p0..p7 -- punpcklbw mm3, mm0 ; mm3 = p0..p3 -- movq mm1, mm3 ; mm1 = p0..p3 -- pmullw mm3, mm6 ; mm3 *= kernel 2 modifiers -- -- movq mm6, [rbx + 48] -- psrlq mm4, 8 ; mm4 = p1..p7 -- movq mm5, mm4 ; mm5 = p1..p7 -- punpcklbw mm5, mm0 ; mm5 = p1..p4 -- pmullw mm6, mm5 ; mm6 *= p1..p4 * kernel 3 modifiers -- paddusw mm3, mm6 ; mm3 += mm6 -- -- ; thresholding -- movq mm7, mm1 ; mm7 = p0..p3 -- psubusw mm7, mm5 ; mm7 = p0..p3 - p1..p4 -- psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3 -- paddusw mm7, mm5 ; mm7 = abs(p0..p3 - p1..p4) -- pcmpgtw mm7, mm2 -- -- movq mm6, [rbx + 64 ] -- psrlq mm4, 8 ; mm4 = p2..p7 -- movq mm5, mm4 ; mm5 = p2..p7 -- punpcklbw mm5, mm0 ; mm5 = p2..p5 -- pmullw mm6, mm5 ; mm5 *= kernel 4 modifiers -- paddusw mm3, mm6 ; mm3 += mm5 -- -- ; thresholding -- movq mm6, mm1 ; mm6 = p0..p3 -- psubusw mm6, mm5 ; mm6 = p0..p3 - p1..p4 -- psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3 -- paddusw mm6, mm5 ; mm6 = abs(p0..p3 - p1..p4) -- pcmpgtw mm6, mm2 -- por mm7, mm6 ; accumulate thresholds -- -- -- movq mm6, [rbx ] -- movq mm4, [rdi+rdx-2] ; mm4 = p-2..p5 -- movq mm5, mm4 ; mm5 = p-2..p5 -- punpcklbw mm5, mm0 ; mm5 = p-2..p1 -- pmullw mm6, mm5 ; mm5 *= kernel 0 modifiers -- paddusw mm3, mm6 ; mm3 += mm5 -- -- ; thresholding -- movq mm6, mm1 ; mm6 = p0..p3 -- psubusw mm6, mm5 ; mm6 = p0..p3 - p1..p4 -- psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3 -- paddusw mm6, mm5 ; mm6 = abs(p0..p3 - p1..p4) -- pcmpgtw mm6, mm2 -- por mm7, mm6 ; accumulate thresholds -- -- movq mm6, [rbx + 16] -- psrlq mm4, 8 ; mm4 = p-1..p5 -- punpcklbw mm4, mm0 ; mm4 = p-1..p2 -- pmullw mm6, mm4 ; mm4 *= kernel 1 modifiers. -- paddusw mm3, mm6 ; mm3 += mm5 -- -- ; thresholding -- movq mm6, mm1 ; mm6 = p0..p3 -- psubusw mm6, mm4 ; mm6 = p0..p3 - p1..p4 -- psubusw mm4, mm1 ; mm5 = p1..p4 - p0..p3 -- paddusw mm6, mm4 ; mm6 = abs(p0..p3 - p1..p4) -- pcmpgtw mm6, mm2 -- por mm7, mm6 ; accumulate thresholds -- -- paddusw mm3, RD ; mm3 += round value -- psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 -- -- pand mm1, mm7 ; mm1 select vals > thresh from source -- pandn mm7, mm3 ; mm7 select vals < thresh from blurred result -- paddusw mm1, mm7 ; combination -- -- packuswb mm1, mm0 ; pack to bytes -- mov DWORD PTR [rdi+rdx-4], eax ; store previous four bytes -- movd eax, mm1 -- -- add rdx, 4 -- cmp edx, dword ptr arg(5) ;cols -- jl .acrossnextcol; -- -- mov DWORD PTR [rdi+rdx-4], eax -- pop rax -- -- ; done with this rwo -- add rsi,rax ; next line -- movsxd rax, dword ptr arg(3) ;dst_pixels_per_line ; destination pitch? -- add rdi,rax ; next destination -- movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; destination pitch? -- -- dec rcx ; decrement count -- jnz .nextrow ; next row -- pop rbx -- -- ; begin epilog -- pop rdi -- pop rsi -- RESTORE_GOT -- UNSHADOW_ARGS -- pop rbp -- ret --%undef RD -- -- - ;void vp8_mbpost_proc_down_mmx(unsigned char *dst, - ; int pitch, int rows, int cols,int flimit) - extern sym(vp8_rv) --global sym(vp8_mbpost_proc_down_mmx) -+global sym(vp8_mbpost_proc_down_mmx) PRIVATE - sym(vp8_mbpost_proc_down_mmx): - push rbp - mov rbp, rsp -@@ -510,7 +245,7 @@ sym(vp8_mbpost_proc_down_mmx): - ; unsigned char bothclamp[16], - ; unsigned int Width, unsigned int Height, int Pitch) - extern sym(rand) --global sym(vp8_plane_add_noise_mmx) -+global sym(vp8_plane_add_noise_mmx) PRIVATE - sym(vp8_plane_add_noise_mmx): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm -index 966aafd..00f84a3 100644 ---- a/vp8/common/x86/postproc_sse2.asm -+++ b/vp8/common/x86/postproc_sse2.asm -@@ -11,146 +11,159 @@ - - %include "vpx_ports/x86_abi_support.asm" - --;void vp8_post_proc_down_and_across_xmm -+;macro in deblock functions -+%macro FIRST_2_ROWS 0 -+ movdqa xmm4, xmm0 -+ movdqa xmm6, xmm0 -+ movdqa xmm5, xmm1 -+ pavgb xmm5, xmm3 -+ -+ ;calculate absolute value -+ psubusb xmm4, xmm1 -+ psubusb xmm1, xmm0 -+ psubusb xmm6, xmm3 -+ psubusb xmm3, xmm0 -+ paddusb xmm4, xmm1 -+ paddusb xmm6, xmm3 -+ -+ ;get threshold -+ movdqa xmm2, flimit -+ pxor xmm1, xmm1 -+ movdqa xmm7, xmm2 -+ -+ ;get mask -+ psubusb xmm2, xmm4 -+ psubusb xmm7, xmm6 -+ pcmpeqb xmm2, xmm1 -+ pcmpeqb xmm7, xmm1 -+ por xmm7, xmm2 -+%endmacro -+ -+%macro SECOND_2_ROWS 0 -+ movdqa xmm6, xmm0 -+ movdqa xmm4, xmm0 -+ movdqa xmm2, xmm1 -+ pavgb xmm1, xmm3 -+ -+ ;calculate absolute value -+ psubusb xmm6, xmm2 -+ psubusb xmm2, xmm0 -+ psubusb xmm4, xmm3 -+ psubusb xmm3, xmm0 -+ paddusb xmm6, xmm2 -+ paddusb xmm4, xmm3 -+ -+ pavgb xmm5, xmm1 -+ -+ ;get threshold -+ movdqa xmm2, flimit -+ pxor xmm1, xmm1 -+ movdqa xmm3, xmm2 -+ -+ ;get mask -+ psubusb xmm2, xmm6 -+ psubusb xmm3, xmm4 -+ pcmpeqb xmm2, xmm1 -+ pcmpeqb xmm3, xmm1 -+ -+ por xmm7, xmm2 -+ por xmm7, xmm3 -+ -+ pavgb xmm5, xmm0 -+ -+ ;decide if or not to use filtered value -+ pand xmm0, xmm7 -+ pandn xmm7, xmm5 -+ paddusb xmm0, xmm7 -+%endmacro -+ -+%macro UPDATE_FLIMIT 0 -+ movdqa xmm2, XMMWORD PTR [rbx] -+ movdqa [rsp], xmm2 -+ add rbx, 16 -+%endmacro -+ -+;void vp8_post_proc_down_and_across_mb_row_sse2 - ;( - ; unsigned char *src_ptr, - ; unsigned char *dst_ptr, - ; int src_pixels_per_line, - ; int dst_pixels_per_line, --; int rows, - ; int cols, --; int flimit -+; int *flimits, -+; int size - ;) --global sym(vp8_post_proc_down_and_across_xmm) --sym(vp8_post_proc_down_and_across_xmm): -+global sym(vp8_post_proc_down_and_across_mb_row_sse2) PRIVATE -+sym(vp8_post_proc_down_and_across_mb_row_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 -- GET_GOT rbx -+ push rbx - push rsi - push rdi - ; end prolog -- --%if ABI_IS_32BIT=1 && CONFIG_PIC=1 - ALIGN_STACK 16, rax -- ; move the global rd onto the stack, since we don't have enough registers -- ; to do PIC addressing -- movdqa xmm0, [GLOBAL(rd42)] - sub rsp, 16 -- movdqa [rsp], xmm0 --%define RD42 [rsp] --%else --%define RD42 [GLOBAL(rd42)] --%endif -- - -- movd xmm2, dword ptr arg(6) ;flimit -- punpcklwd xmm2, xmm2 -- punpckldq xmm2, xmm2 -- punpcklqdq xmm2, xmm2 -+ ; put flimit on stack -+ mov rbx, arg(5) ;flimits ptr -+ UPDATE_FLIMIT - -- mov rsi, arg(0) ;src_ptr -- mov rdi, arg(1) ;dst_ptr -+%define flimit [rsp] - -- movsxd rcx, DWORD PTR arg(4) ;rows -- movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch? -- pxor xmm0, xmm0 ; mm0 = 00000000 -+ mov rsi, arg(0) ;src_ptr -+ mov rdi, arg(1) ;dst_ptr - -+ movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line -+ movsxd rcx, DWORD PTR arg(6) ;rows in a macroblock - .nextrow: -- -- xor rdx, rdx ; clear out rdx for use as loop counter -+ xor rdx, rdx ;col - .nextcol: -- movq xmm3, QWORD PTR [rsi] ; mm4 = r0 p0..p7 -- punpcklbw xmm3, xmm0 ; mm3 = p0..p3 -- movdqa xmm1, xmm3 ; mm1 = p0..p3 -- psllw xmm3, 2 ; -- -- movq xmm5, QWORD PTR [rsi + rax] ; mm4 = r1 p0..p7 -- punpcklbw xmm5, xmm0 ; mm5 = r1 p0..p3 -- paddusw xmm3, xmm5 ; mm3 += mm6 -- -- ; thresholding -- movdqa xmm7, xmm1 ; mm7 = r0 p0..p3 -- psubusw xmm7, xmm5 ; mm7 = r0 p0..p3 - r1 p0..p3 -- psubusw xmm5, xmm1 ; mm5 = r1 p0..p3 - r0 p0..p3 -- paddusw xmm7, xmm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3) -- pcmpgtw xmm7, xmm2 -- -- movq xmm5, QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7 -- punpcklbw xmm5, xmm0 ; mm5 = r2 p0..p3 -- paddusw xmm3, xmm5 ; mm3 += mm5 -- -- ; thresholding -- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 -- psubusw xmm6, xmm5 ; mm6 = r0 p0..p3 - r2 p0..p3 -- psubusw xmm5, xmm1 ; mm5 = r2 p0..p3 - r2 p0..p3 -- paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3) -- pcmpgtw xmm6, xmm2 -- por xmm7, xmm6 ; accumulate thresholds -+ ;load current and next 2 rows -+ movdqu xmm0, XMMWORD PTR [rsi] -+ movdqu xmm1, XMMWORD PTR [rsi + rax] -+ movdqu xmm3, XMMWORD PTR [rsi + 2*rax] - -+ FIRST_2_ROWS - -+ ;load above 2 rows - neg rax -- movq xmm5, QWORD PTR [rsi+2*rax] ; mm4 = r-2 p0..p7 -- punpcklbw xmm5, xmm0 ; mm5 = r-2 p0..p3 -- paddusw xmm3, xmm5 ; mm3 += mm5 -- -- ; thresholding -- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 -- psubusw xmm6, xmm5 ; mm6 = p0..p3 - r-2 p0..p3 -- psubusw xmm5, xmm1 ; mm5 = r-2 p0..p3 - p0..p3 -- paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3) -- pcmpgtw xmm6, xmm2 -- por xmm7, xmm6 ; accumulate thresholds -- -- movq xmm4, QWORD PTR [rsi+rax] ; mm4 = r-1 p0..p7 -- punpcklbw xmm4, xmm0 ; mm4 = r-1 p0..p3 -- paddusw xmm3, xmm4 ; mm3 += mm5 -- -- ; thresholding -- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 -- psubusw xmm6, xmm4 ; mm6 = p0..p3 - r-2 p0..p3 -- psubusw xmm4, xmm1 ; mm5 = r-1 p0..p3 - p0..p3 -- paddusw xmm6, xmm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3) -- pcmpgtw xmm6, xmm2 -- por xmm7, xmm6 ; accumulate thresholds -- -- -- paddusw xmm3, RD42 ; mm3 += round value -- psraw xmm3, 3 ; mm3 /= 8 -- -- pand xmm1, xmm7 ; mm1 select vals > thresh from source -- pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result -- paddusw xmm1, xmm7 ; combination -+ movdqu xmm1, XMMWORD PTR [rsi + 2*rax] -+ movdqu xmm3, XMMWORD PTR [rsi + rax] - -- packuswb xmm1, xmm0 ; pack to bytes -- movq QWORD PTR [rdi], xmm1 ; -+ SECOND_2_ROWS - -- neg rax ; pitch is positive -- add rsi, 8 -- add rdi, 8 -+ movdqu XMMWORD PTR [rdi], xmm0 - -- add rdx, 8 -- cmp edx, dword arg(5) ;cols -+ neg rax ; positive stride -+ add rsi, 16 -+ add rdi, 16 - -- jl .nextcol -+ add rdx, 16 -+ cmp edx, dword arg(4) ;cols -+ jge .downdone -+ UPDATE_FLIMIT -+ jmp .nextcol - -+.downdone: - ; done with the all cols, start the across filtering in place - sub rsi, rdx - sub rdi, rdx - -+ mov rbx, arg(5) ; flimits -+ UPDATE_FLIMIT - - ; dup the first byte into the left border 8 times - movq mm1, [rdi] - punpcklbw mm1, mm1 - punpcklwd mm1, mm1 - punpckldq mm1, mm1 -- - mov rdx, -8 - movq [rdi+rdx], mm1 - - ; dup the last byte into the right border -- movsxd rdx, dword arg(5) -+ movsxd rdx, dword arg(4) - movq mm1, [rdi + rdx + -1] - punpcklbw mm1, mm1 - punpcklwd mm1, mm1 -@@ -158,118 +171,69 @@ sym(vp8_post_proc_down_and_across_xmm): - movq [rdi+rdx], mm1 - - xor rdx, rdx -- movq mm0, QWORD PTR [rdi-8]; -+ movq mm0, QWORD PTR [rdi-16]; -+ movq mm1, QWORD PTR [rdi-8]; - - .acrossnextcol: -- movq xmm7, QWORD PTR [rdi +rdx -2] -- movd xmm4, DWORD PTR [rdi +rdx +6] -- -- pslldq xmm4, 8 -- por xmm4, xmm7 -- -- movdqa xmm3, xmm4 -- psrldq xmm3, 2 -- punpcklbw xmm3, xmm0 ; mm3 = p0..p3 -- movdqa xmm1, xmm3 ; mm1 = p0..p3 -- psllw xmm3, 2 -- -- -- movdqa xmm5, xmm4 -- psrldq xmm5, 3 -- punpcklbw xmm5, xmm0 ; mm5 = p1..p4 -- paddusw xmm3, xmm5 ; mm3 += mm6 -- -- ; thresholding -- movdqa xmm7, xmm1 ; mm7 = p0..p3 -- psubusw xmm7, xmm5 ; mm7 = p0..p3 - p1..p4 -- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 -- paddusw xmm7, xmm5 ; mm7 = abs(p0..p3 - p1..p4) -- pcmpgtw xmm7, xmm2 -- -- movdqa xmm5, xmm4 -- psrldq xmm5, 4 -- punpcklbw xmm5, xmm0 ; mm5 = p2..p5 -- paddusw xmm3, xmm5 ; mm3 += mm5 -- -- ; thresholding -- movdqa xmm6, xmm1 ; mm6 = p0..p3 -- psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 -- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 -- paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) -- pcmpgtw xmm6, xmm2 -- por xmm7, xmm6 ; accumulate thresholds -- -- -- movdqa xmm5, xmm4 ; mm5 = p-2..p5 -- punpcklbw xmm5, xmm0 ; mm5 = p-2..p1 -- paddusw xmm3, xmm5 ; mm3 += mm5 -- -- ; thresholding -- movdqa xmm6, xmm1 ; mm6 = p0..p3 -- psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 -- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 -- paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) -- pcmpgtw xmm6, xmm2 -- por xmm7, xmm6 ; accumulate thresholds -- -- psrldq xmm4, 1 ; mm4 = p-1..p5 -- punpcklbw xmm4, xmm0 ; mm4 = p-1..p2 -- paddusw xmm3, xmm4 ; mm3 += mm5 -- -- ; thresholding -- movdqa xmm6, xmm1 ; mm6 = p0..p3 -- psubusw xmm6, xmm4 ; mm6 = p0..p3 - p1..p4 -- psubusw xmm4, xmm1 ; mm5 = p1..p4 - p0..p3 -- paddusw xmm6, xmm4 ; mm6 = abs(p0..p3 - p1..p4) -- pcmpgtw xmm6, xmm2 -- por xmm7, xmm6 ; accumulate thresholds -- -- paddusw xmm3, RD42 ; mm3 += round value -- psraw xmm3, 3 ; mm3 /= 8 -- -- pand xmm1, xmm7 ; mm1 select vals > thresh from source -- pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result -- paddusw xmm1, xmm7 ; combination -- -- packuswb xmm1, xmm0 ; pack to bytes -- movq QWORD PTR [rdi+rdx-8], mm0 ; store previous four bytes -- movdq2q mm0, xmm1 -- -- add rdx, 8 -- cmp edx, dword arg(5) ;cols -- jl .acrossnextcol; -- -- ; last 8 pixels -- movq QWORD PTR [rdi+rdx-8], mm0 -+ movdqu xmm0, XMMWORD PTR [rdi + rdx] -+ movdqu xmm1, XMMWORD PTR [rdi + rdx -2] -+ movdqu xmm3, XMMWORD PTR [rdi + rdx -1] -+ -+ FIRST_2_ROWS -+ -+ movdqu xmm1, XMMWORD PTR [rdi + rdx +1] -+ movdqu xmm3, XMMWORD PTR [rdi + rdx +2] -+ -+ SECOND_2_ROWS -+ -+ movq QWORD PTR [rdi+rdx-16], mm0 ; store previous 8 bytes -+ movq QWORD PTR [rdi+rdx-8], mm1 ; store previous 8 bytes -+ movdq2q mm0, xmm0 -+ psrldq xmm0, 8 -+ movdq2q mm1, xmm0 -+ -+ add rdx, 16 -+ cmp edx, dword arg(4) ;cols -+ jge .acrossdone -+ UPDATE_FLIMIT -+ jmp .acrossnextcol - -+.acrossdone -+ ; last 16 pixels -+ movq QWORD PTR [rdi+rdx-16], mm0 -+ -+ cmp edx, dword arg(4) -+ jne .throw_last_8 -+ movq QWORD PTR [rdi+rdx-8], mm1 -+.throw_last_8: - ; done with this rwo -- add rsi,rax ; next line -- mov eax, dword arg(3) ;dst_pixels_per_line ; destination pitch? -- add rdi,rax ; next destination -- mov eax, dword arg(2) ;src_pixels_per_line ; destination pitch? -+ add rsi,rax ;next src line -+ mov eax, dword arg(3) ;dst_pixels_per_line -+ add rdi,rax ;next destination -+ mov eax, dword arg(2) ;src_pixels_per_line - -- dec rcx ; decrement count -- jnz .nextrow ; next row -+ mov rbx, arg(5) ;flimits -+ UPDATE_FLIMIT - --%if ABI_IS_32BIT=1 && CONFIG_PIC=1 -- add rsp,16 -+ dec rcx ;decrement count -+ jnz .nextrow ;next row -+ -+ add rsp, 16 - pop rsp --%endif - ; begin epilog - pop rdi - pop rsi -- RESTORE_GOT -+ pop rbx - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret --%undef RD42 -- -+%undef flimit - - ;void vp8_mbpost_proc_down_xmm(unsigned char *dst, - ; int pitch, int rows, int cols,int flimit) - extern sym(vp8_rv) --global sym(vp8_mbpost_proc_down_xmm) -+global sym(vp8_mbpost_proc_down_xmm) PRIVATE - sym(vp8_mbpost_proc_down_xmm): - push rbp - mov rbp, rsp -@@ -497,7 +461,7 @@ sym(vp8_mbpost_proc_down_xmm): - - ;void vp8_mbpost_proc_across_ip_xmm(unsigned char *src, - ; int pitch, int rows, int cols,int flimit) --global sym(vp8_mbpost_proc_across_ip_xmm) -+global sym(vp8_mbpost_proc_across_ip_xmm) PRIVATE - sym(vp8_mbpost_proc_across_ip_xmm): - push rbp - mov rbp, rsp -@@ -694,7 +658,7 @@ sym(vp8_mbpost_proc_across_ip_xmm): - ; unsigned char bothclamp[16], - ; unsigned int Width, unsigned int Height, int Pitch) - extern sym(rand) --global sym(vp8_plane_add_noise_wmt) -+global sym(vp8_plane_add_noise_wmt) PRIVATE - sym(vp8_plane_add_noise_wmt): - push rbp - mov rbp, rsp -@@ -753,7 +717,5 @@ sym(vp8_plane_add_noise_wmt): - - SECTION_RODATA - align 16 --rd42: -- times 8 dw 0x04 - four8s: - times 4 dd 8 -diff --git a/vp8/common/x86/postproc_x86.c b/vp8/common/x86/postproc_x86.c -index a25921b..3ec0106 100644 ---- a/vp8/common/x86/postproc_x86.c -+++ b/vp8/common/x86/postproc_x86.c -@@ -18,4 +18,7 @@ extern int rand(void) - { - return __rand(); - } -+#else -+/* ISO C forbids an empty translation unit. */ -+int vp8_unused; - #endif -diff --git a/vp8/common/x86/recon_mmx.asm b/vp8/common/x86/recon_mmx.asm -index 19c0faf..15e9871 100644 ---- a/vp8/common/x86/recon_mmx.asm -+++ b/vp8/common/x86/recon_mmx.asm -@@ -18,7 +18,7 @@ - ; unsigned char *dst, - ; int dst_stride - ; ) --global sym(vp8_copy_mem8x8_mmx) -+global sym(vp8_copy_mem8x8_mmx) PRIVATE - sym(vp8_copy_mem8x8_mmx): - push rbp - mov rbp, rsp -@@ -81,7 +81,7 @@ sym(vp8_copy_mem8x8_mmx): - ; unsigned char *dst, - ; int dst_stride - ; ) --global sym(vp8_copy_mem8x4_mmx) -+global sym(vp8_copy_mem8x4_mmx) PRIVATE - sym(vp8_copy_mem8x4_mmx): - push rbp - mov rbp, rsp -@@ -125,7 +125,7 @@ sym(vp8_copy_mem8x4_mmx): - ; unsigned char *dst, - ; int dst_stride - ; ) --global sym(vp8_copy_mem16x16_mmx) -+global sym(vp8_copy_mem16x16_mmx) PRIVATE - sym(vp8_copy_mem16x16_mmx): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm -index 7b6e3cf..1434bcd 100644 ---- a/vp8/common/x86/recon_sse2.asm -+++ b/vp8/common/x86/recon_sse2.asm -@@ -17,7 +17,7 @@ - ; unsigned char *dst, - ; int dst_stride - ; ) --global sym(vp8_copy_mem16x16_sse2) -+global sym(vp8_copy_mem16x16_sse2) PRIVATE - sym(vp8_copy_mem16x16_sse2): - push rbp - mov rbp, rsp -@@ -123,7 +123,7 @@ sym(vp8_copy_mem16x16_sse2): - ; unsigned char *left, - ; int left_stride, - ; ) --global sym(vp8_intra_pred_uv_dc_mmx2) -+global sym(vp8_intra_pred_uv_dc_mmx2) PRIVATE - sym(vp8_intra_pred_uv_dc_mmx2): - push rbp - mov rbp, rsp -@@ -196,7 +196,7 @@ sym(vp8_intra_pred_uv_dc_mmx2): - ; unsigned char *left, - ; int left_stride, - ; ) --global sym(vp8_intra_pred_uv_dctop_mmx2) -+global sym(vp8_intra_pred_uv_dctop_mmx2) PRIVATE - sym(vp8_intra_pred_uv_dctop_mmx2): - push rbp - mov rbp, rsp -@@ -250,7 +250,7 @@ sym(vp8_intra_pred_uv_dctop_mmx2): - ; unsigned char *left, - ; int left_stride, - ; ) --global sym(vp8_intra_pred_uv_dcleft_mmx2) -+global sym(vp8_intra_pred_uv_dcleft_mmx2) PRIVATE - sym(vp8_intra_pred_uv_dcleft_mmx2): - push rbp - mov rbp, rsp -@@ -317,7 +317,7 @@ sym(vp8_intra_pred_uv_dcleft_mmx2): - ; unsigned char *left, - ; int left_stride, - ; ) --global sym(vp8_intra_pred_uv_dc128_mmx) -+global sym(vp8_intra_pred_uv_dc128_mmx) PRIVATE - sym(vp8_intra_pred_uv_dc128_mmx): - push rbp - mov rbp, rsp -@@ -357,7 +357,7 @@ sym(vp8_intra_pred_uv_dc128_mmx): - ; int left_stride, - ; ) - %macro vp8_intra_pred_uv_tm 1 --global sym(vp8_intra_pred_uv_tm_%1) -+global sym(vp8_intra_pred_uv_tm_%1) PRIVATE - sym(vp8_intra_pred_uv_tm_%1): - push rbp - mov rbp, rsp -@@ -437,7 +437,7 @@ vp8_intra_pred_uv_tm ssse3 - ; unsigned char *left, - ; int left_stride, - ; ) --global sym(vp8_intra_pred_uv_ve_mmx) -+global sym(vp8_intra_pred_uv_ve_mmx) PRIVATE - sym(vp8_intra_pred_uv_ve_mmx): - push rbp - mov rbp, rsp -@@ -479,7 +479,7 @@ sym(vp8_intra_pred_uv_ve_mmx): - ; int left_stride - ; ) - %macro vp8_intra_pred_uv_ho 1 --global sym(vp8_intra_pred_uv_ho_%1) -+global sym(vp8_intra_pred_uv_ho_%1) PRIVATE - sym(vp8_intra_pred_uv_ho_%1): - push rbp - mov rbp, rsp -@@ -577,7 +577,7 @@ vp8_intra_pred_uv_ho ssse3 - ; unsigned char *left, - ; int left_stride - ; ) --global sym(vp8_intra_pred_y_dc_sse2) -+global sym(vp8_intra_pred_y_dc_sse2) PRIVATE - sym(vp8_intra_pred_y_dc_sse2): - push rbp - mov rbp, rsp -@@ -683,7 +683,7 @@ sym(vp8_intra_pred_y_dc_sse2): - ; unsigned char *left, - ; int left_stride - ; ) --global sym(vp8_intra_pred_y_dctop_sse2) -+global sym(vp8_intra_pred_y_dctop_sse2) PRIVATE - sym(vp8_intra_pred_y_dctop_sse2): - push rbp - mov rbp, rsp -@@ -745,7 +745,7 @@ sym(vp8_intra_pred_y_dctop_sse2): - ; unsigned char *left, - ; int left_stride - ; ) --global sym(vp8_intra_pred_y_dcleft_sse2) -+global sym(vp8_intra_pred_y_dcleft_sse2) PRIVATE - sym(vp8_intra_pred_y_dcleft_sse2): - push rbp - mov rbp, rsp -@@ -838,7 +838,7 @@ sym(vp8_intra_pred_y_dcleft_sse2): - ; unsigned char *left, - ; int left_stride - ; ) --global sym(vp8_intra_pred_y_dc128_sse2) -+global sym(vp8_intra_pred_y_dc128_sse2) PRIVATE - sym(vp8_intra_pred_y_dc128_sse2): - push rbp - mov rbp, rsp -@@ -885,11 +885,12 @@ sym(vp8_intra_pred_y_dc128_sse2): - ; int left_stride - ; ) - %macro vp8_intra_pred_y_tm 1 --global sym(vp8_intra_pred_y_tm_%1) -+global sym(vp8_intra_pred_y_tm_%1) PRIVATE - sym(vp8_intra_pred_y_tm_%1): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 -+ SAVE_XMM 7 - push rsi - push rdi - GET_GOT rbx -@@ -957,6 +958,7 @@ vp8_intra_pred_y_tm_%1_loop: - RESTORE_GOT - pop rdi - pop rsi -+ RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -@@ -972,7 +974,7 @@ vp8_intra_pred_y_tm ssse3 - ; unsigned char *left, - ; int left_stride - ; ) --global sym(vp8_intra_pred_y_ve_sse2) -+global sym(vp8_intra_pred_y_ve_sse2) PRIVATE - sym(vp8_intra_pred_y_ve_sse2): - push rbp - mov rbp, rsp -@@ -1020,7 +1022,7 @@ sym(vp8_intra_pred_y_ve_sse2): - ; unsigned char *left, - ; int left_stride, - ; ) --global sym(vp8_intra_pred_y_ho_sse2) -+global sym(vp8_intra_pred_y_ho_sse2) PRIVATE - sym(vp8_intra_pred_y_ho_sse2): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/sad_mmx.asm b/vp8/common/x86/sad_mmx.asm -index 407b399..592112f 100644 ---- a/vp8/common/x86/sad_mmx.asm -+++ b/vp8/common/x86/sad_mmx.asm -@@ -11,11 +11,11 @@ - - %include "vpx_ports/x86_abi_support.asm" - --global sym(vp8_sad16x16_mmx) --global sym(vp8_sad8x16_mmx) --global sym(vp8_sad8x8_mmx) --global sym(vp8_sad4x4_mmx) --global sym(vp8_sad16x8_mmx) -+global sym(vp8_sad16x16_mmx) PRIVATE -+global sym(vp8_sad8x16_mmx) PRIVATE -+global sym(vp8_sad8x8_mmx) PRIVATE -+global sym(vp8_sad4x4_mmx) PRIVATE -+global sym(vp8_sad16x8_mmx) PRIVATE - - ;unsigned int vp8_sad16x16_mmx( - ; unsigned char *src_ptr, -diff --git a/vp8/common/x86/sad_sse2.asm b/vp8/common/x86/sad_sse2.asm -index 0b01d7b..8d86abc 100644 ---- a/vp8/common/x86/sad_sse2.asm -+++ b/vp8/common/x86/sad_sse2.asm -@@ -16,7 +16,7 @@ - ; int src_stride, - ; unsigned char *ref_ptr, - ; int ref_stride) --global sym(vp8_sad16x16_wmt) -+global sym(vp8_sad16x16_wmt) PRIVATE - sym(vp8_sad16x16_wmt): - push rbp - mov rbp, rsp -@@ -90,7 +90,7 @@ sym(vp8_sad16x16_wmt): - ; unsigned char *ref_ptr, - ; int ref_stride, - ; int max_sad) --global sym(vp8_sad8x16_wmt) -+global sym(vp8_sad8x16_wmt) PRIVATE - sym(vp8_sad8x16_wmt): - push rbp - mov rbp, rsp -@@ -115,7 +115,7 @@ sym(vp8_sad8x16_wmt): - - movq rax, mm7 - cmp eax, arg(4) -- jg .x8x16sad_wmt_early_exit -+ ja .x8x16sad_wmt_early_exit - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] -@@ -153,7 +153,7 @@ sym(vp8_sad8x16_wmt): - ; int src_stride, - ; unsigned char *ref_ptr, - ; int ref_stride) --global sym(vp8_sad8x8_wmt) -+global sym(vp8_sad8x8_wmt) PRIVATE - sym(vp8_sad8x8_wmt): - push rbp - mov rbp, rsp -@@ -176,7 +176,7 @@ sym(vp8_sad8x8_wmt): - - movq rax, mm7 - cmp eax, arg(4) -- jg .x8x8sad_wmt_early_exit -+ ja .x8x8sad_wmt_early_exit - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] -@@ -206,7 +206,7 @@ sym(vp8_sad8x8_wmt): - ; int src_stride, - ; unsigned char *ref_ptr, - ; int ref_stride) --global sym(vp8_sad4x4_wmt) -+global sym(vp8_sad4x4_wmt) PRIVATE - sym(vp8_sad4x4_wmt): - push rbp - mov rbp, rsp -@@ -261,7 +261,7 @@ sym(vp8_sad4x4_wmt): - ; int src_stride, - ; unsigned char *ref_ptr, - ; int ref_stride) --global sym(vp8_sad16x8_wmt) -+global sym(vp8_sad16x8_wmt) PRIVATE - sym(vp8_sad16x8_wmt): - push rbp - mov rbp, rsp -@@ -285,7 +285,7 @@ sym(vp8_sad16x8_wmt): - - movq rax, mm7 - cmp eax, arg(4) -- jg .x16x8sad_wmt_early_exit -+ ja .x16x8sad_wmt_early_exit - - movq mm0, QWORD PTR [rsi] - movq mm2, QWORD PTR [rsi+8] -@@ -335,7 +335,7 @@ sym(vp8_sad16x8_wmt): - ; unsigned char *dst_ptr, - ; int dst_stride, - ; int height); --global sym(vp8_copy32xn_sse2) -+global sym(vp8_copy32xn_sse2) PRIVATE - sym(vp8_copy32xn_sse2): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/sad_sse3.asm b/vp8/common/x86/sad_sse3.asm -index c2af3c8..f90a589 100644 ---- a/vp8/common/x86/sad_sse3.asm -+++ b/vp8/common/x86/sad_sse3.asm -@@ -380,7 +380,7 @@ - ; unsigned char *ref_ptr, - ; int ref_stride, - ; int *results) --global sym(vp8_sad16x16x3_sse3) -+global sym(vp8_sad16x16x3_sse3) PRIVATE - sym(vp8_sad16x16x3_sse3): - - STACK_FRAME_CREATE_X3 -@@ -422,7 +422,7 @@ sym(vp8_sad16x16x3_sse3): - ; unsigned char *ref_ptr, - ; int ref_stride, - ; int *results) --global sym(vp8_sad16x8x3_sse3) -+global sym(vp8_sad16x8x3_sse3) PRIVATE - sym(vp8_sad16x8x3_sse3): - - STACK_FRAME_CREATE_X3 -@@ -460,7 +460,7 @@ sym(vp8_sad16x8x3_sse3): - ; unsigned char *ref_ptr, - ; int ref_stride, - ; int *results) --global sym(vp8_sad8x16x3_sse3) -+global sym(vp8_sad8x16x3_sse3) PRIVATE - sym(vp8_sad8x16x3_sse3): - - STACK_FRAME_CREATE_X3 -@@ -489,7 +489,7 @@ sym(vp8_sad8x16x3_sse3): - ; unsigned char *ref_ptr, - ; int ref_stride, - ; int *results) --global sym(vp8_sad8x8x3_sse3) -+global sym(vp8_sad8x8x3_sse3) PRIVATE - sym(vp8_sad8x8x3_sse3): - - STACK_FRAME_CREATE_X3 -@@ -514,7 +514,7 @@ sym(vp8_sad8x8x3_sse3): - ; unsigned char *ref_ptr, - ; int ref_stride, - ; int *results) --global sym(vp8_sad4x4x3_sse3) -+global sym(vp8_sad4x4x3_sse3) PRIVATE - sym(vp8_sad4x4x3_sse3): - - STACK_FRAME_CREATE_X3 -@@ -589,7 +589,7 @@ sym(vp8_sad4x4x3_sse3): - ; int ref_stride, - ; int max_sad) - ;%define lddqu movdqu --global sym(vp8_sad16x16_sse3) -+global sym(vp8_sad16x16_sse3) PRIVATE - sym(vp8_sad16x16_sse3): - - STACK_FRAME_CREATE_X3 -@@ -642,7 +642,7 @@ sym(vp8_sad16x16_sse3): - ; unsigned char *dst_ptr, - ; int dst_stride, - ; int height); --global sym(vp8_copy32xn_sse3) -+global sym(vp8_copy32xn_sse3) PRIVATE - sym(vp8_copy32xn_sse3): - - STACK_FRAME_CREATE_X3 -@@ -703,7 +703,7 @@ sym(vp8_copy32xn_sse3): - ; unsigned char *ref_ptr_base, - ; int ref_stride, - ; int *results) --global sym(vp8_sad16x16x4d_sse3) -+global sym(vp8_sad16x16x4d_sse3) PRIVATE - sym(vp8_sad16x16x4d_sse3): - - STACK_FRAME_CREATE_X4 -@@ -754,7 +754,7 @@ sym(vp8_sad16x16x4d_sse3): - ; unsigned char *ref_ptr_base, - ; int ref_stride, - ; int *results) --global sym(vp8_sad16x8x4d_sse3) -+global sym(vp8_sad16x8x4d_sse3) PRIVATE - sym(vp8_sad16x8x4d_sse3): - - STACK_FRAME_CREATE_X4 -@@ -801,7 +801,7 @@ sym(vp8_sad16x8x4d_sse3): - ; unsigned char *ref_ptr, - ; int ref_stride, - ; int *results) --global sym(vp8_sad8x16x4d_sse3) -+global sym(vp8_sad8x16x4d_sse3) PRIVATE - sym(vp8_sad8x16x4d_sse3): - - STACK_FRAME_CREATE_X4 -@@ -834,7 +834,7 @@ sym(vp8_sad8x16x4d_sse3): - ; unsigned char *ref_ptr, - ; int ref_stride, - ; int *results) --global sym(vp8_sad8x8x4d_sse3) -+global sym(vp8_sad8x8x4d_sse3) PRIVATE - sym(vp8_sad8x8x4d_sse3): - - STACK_FRAME_CREATE_X4 -@@ -863,7 +863,7 @@ sym(vp8_sad8x8x4d_sse3): - ; unsigned char *ref_ptr, - ; int ref_stride, - ; int *results) --global sym(vp8_sad4x4x4d_sse3) -+global sym(vp8_sad4x4x4d_sse3) PRIVATE - sym(vp8_sad4x4x4d_sse3): - - STACK_FRAME_CREATE_X4 -diff --git a/vp8/common/x86/sad_sse4.asm b/vp8/common/x86/sad_sse4.asm -index 03ecec4..f7fccd7 100644 ---- a/vp8/common/x86/sad_sse4.asm -+++ b/vp8/common/x86/sad_sse4.asm -@@ -161,7 +161,7 @@ - ; const unsigned char *ref_ptr, - ; int ref_stride, - ; unsigned short *sad_array); --global sym(vp8_sad16x16x8_sse4) -+global sym(vp8_sad16x16x8_sse4) PRIVATE - sym(vp8_sad16x16x8_sse4): - push rbp - mov rbp, rsp -@@ -203,7 +203,7 @@ sym(vp8_sad16x16x8_sse4): - ; int ref_stride, - ; unsigned short *sad_array - ;); --global sym(vp8_sad16x8x8_sse4) -+global sym(vp8_sad16x8x8_sse4) PRIVATE - sym(vp8_sad16x8x8_sse4): - push rbp - mov rbp, rsp -@@ -241,7 +241,7 @@ sym(vp8_sad16x8x8_sse4): - ; int ref_stride, - ; unsigned short *sad_array - ;); --global sym(vp8_sad8x8x8_sse4) -+global sym(vp8_sad8x8x8_sse4) PRIVATE - sym(vp8_sad8x8x8_sse4): - push rbp - mov rbp, rsp -@@ -279,7 +279,7 @@ sym(vp8_sad8x8x8_sse4): - ; int ref_stride, - ; unsigned short *sad_array - ;); --global sym(vp8_sad8x16x8_sse4) -+global sym(vp8_sad8x16x8_sse4) PRIVATE - sym(vp8_sad8x16x8_sse4): - push rbp - mov rbp, rsp -@@ -320,7 +320,7 @@ sym(vp8_sad8x16x8_sse4): - ; int ref_stride, - ; unsigned short *sad_array - ;); --global sym(vp8_sad4x4x8_sse4) -+global sym(vp8_sad4x4x8_sse4) PRIVATE - sym(vp8_sad4x4x8_sse4): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/sad_ssse3.asm b/vp8/common/x86/sad_ssse3.asm -index 95b6c89..278fc06 100644 ---- a/vp8/common/x86/sad_ssse3.asm -+++ b/vp8/common/x86/sad_ssse3.asm -@@ -152,7 +152,7 @@ - ; unsigned char *ref_ptr, - ; int ref_stride, - ; int *results) --global sym(vp8_sad16x16x3_ssse3) -+global sym(vp8_sad16x16x3_ssse3) PRIVATE - sym(vp8_sad16x16x3_ssse3): - push rbp - mov rbp, rsp -@@ -265,7 +265,7 @@ sym(vp8_sad16x16x3_ssse3): - ; unsigned char *ref_ptr, - ; int ref_stride, - ; int *results) --global sym(vp8_sad16x8x3_ssse3) -+global sym(vp8_sad16x8x3_ssse3) PRIVATE - sym(vp8_sad16x8x3_ssse3): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm -index 5528fd0..47dd452 100644 ---- a/vp8/common/x86/subpixel_mmx.asm -+++ b/vp8/common/x86/subpixel_mmx.asm -@@ -28,7 +28,7 @@ extern sym(vp8_bilinear_filters_x86_8) - ; unsigned int output_width, - ; short * vp8_filter - ;) --global sym(vp8_filter_block1d_h6_mmx) -+global sym(vp8_filter_block1d_h6_mmx) PRIVATE - sym(vp8_filter_block1d_h6_mmx): - push rbp - mov rbp, rsp -@@ -125,7 +125,7 @@ sym(vp8_filter_block1d_h6_mmx): - ; unsigned int output_width, - ; short * vp8_filter - ;) --global sym(vp8_filter_block1dc_v6_mmx) -+global sym(vp8_filter_block1dc_v6_mmx) PRIVATE - sym(vp8_filter_block1dc_v6_mmx): - push rbp - mov rbp, rsp -@@ -213,7 +213,7 @@ sym(vp8_filter_block1dc_v6_mmx): - ; unsigned char *dst_ptr, - ; int dst_pitch - ;) --global sym(vp8_bilinear_predict8x8_mmx) -+global sym(vp8_bilinear_predict8x8_mmx) PRIVATE - sym(vp8_bilinear_predict8x8_mmx): - push rbp - mov rbp, rsp -@@ -370,7 +370,7 @@ sym(vp8_bilinear_predict8x8_mmx): - ; unsigned char *dst_ptr, - ; int dst_pitch - ;) --global sym(vp8_bilinear_predict8x4_mmx) -+global sym(vp8_bilinear_predict8x4_mmx) PRIVATE - sym(vp8_bilinear_predict8x4_mmx): - push rbp - mov rbp, rsp -@@ -525,7 +525,7 @@ sym(vp8_bilinear_predict8x4_mmx): - ; unsigned char *dst_ptr, - ; int dst_pitch - ;) --global sym(vp8_bilinear_predict4x4_mmx) -+global sym(vp8_bilinear_predict4x4_mmx) PRIVATE - sym(vp8_bilinear_predict4x4_mmx): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm -index cb550af..69f8d10 100644 ---- a/vp8/common/x86/subpixel_sse2.asm -+++ b/vp8/common/x86/subpixel_sse2.asm -@@ -33,7 +33,7 @@ extern sym(vp8_bilinear_filters_x86_8) - ; unsigned int output_width, - ; short *vp8_filter - ;) --global sym(vp8_filter_block1d8_h6_sse2) -+global sym(vp8_filter_block1d8_h6_sse2) PRIVATE - sym(vp8_filter_block1d8_h6_sse2): - push rbp - mov rbp, rsp -@@ -153,7 +153,7 @@ sym(vp8_filter_block1d8_h6_sse2): - ; even number. This function handles 8 pixels in horizontal direction, calculating ONE - ; rows each iteration to take advantage of the 128 bits operations. - ;*************************************************************************************/ --global sym(vp8_filter_block1d16_h6_sse2) -+global sym(vp8_filter_block1d16_h6_sse2) PRIVATE - sym(vp8_filter_block1d16_h6_sse2): - push rbp - mov rbp, rsp -@@ -329,7 +329,7 @@ sym(vp8_filter_block1d16_h6_sse2): - ; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The - ; input pixel array has output_height rows. - ;*************************************************************************************/ --global sym(vp8_filter_block1d8_v6_sse2) -+global sym(vp8_filter_block1d8_v6_sse2) PRIVATE - sym(vp8_filter_block1d8_v6_sse2): - push rbp - mov rbp, rsp -@@ -424,7 +424,7 @@ sym(vp8_filter_block1d8_v6_sse2): - ; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The - ; input pixel array has output_height rows. - ;*************************************************************************************/ --global sym(vp8_filter_block1d16_v6_sse2) -+global sym(vp8_filter_block1d16_v6_sse2) PRIVATE - sym(vp8_filter_block1d16_v6_sse2): - push rbp - mov rbp, rsp -@@ -534,7 +534,7 @@ sym(vp8_filter_block1d16_v6_sse2): - ; const short *vp8_filter - ;) - ; First-pass filter only when yoffset==0 --global sym(vp8_filter_block1d8_h6_only_sse2) -+global sym(vp8_filter_block1d8_h6_only_sse2) PRIVATE - sym(vp8_filter_block1d8_h6_only_sse2): - push rbp - mov rbp, rsp -@@ -647,7 +647,7 @@ sym(vp8_filter_block1d8_h6_only_sse2): - ; const short *vp8_filter - ;) - ; First-pass filter only when yoffset==0 --global sym(vp8_filter_block1d16_h6_only_sse2) -+global sym(vp8_filter_block1d16_h6_only_sse2) PRIVATE - sym(vp8_filter_block1d16_h6_only_sse2): - push rbp - mov rbp, rsp -@@ -812,7 +812,7 @@ sym(vp8_filter_block1d16_h6_only_sse2): - ; const short *vp8_filter - ;) - ; Second-pass filter only when xoffset==0 --global sym(vp8_filter_block1d8_v6_only_sse2) -+global sym(vp8_filter_block1d8_v6_only_sse2) PRIVATE - sym(vp8_filter_block1d8_v6_only_sse2): - push rbp - mov rbp, rsp -@@ -904,7 +904,7 @@ sym(vp8_filter_block1d8_v6_only_sse2): - ; unsigned int output_height, - ; unsigned int output_width - ;) --global sym(vp8_unpack_block1d16_h6_sse2) -+global sym(vp8_unpack_block1d16_h6_sse2) PRIVATE - sym(vp8_unpack_block1d16_h6_sse2): - push rbp - mov rbp, rsp -@@ -963,7 +963,7 @@ sym(vp8_unpack_block1d16_h6_sse2): - ; int dst_pitch - ;) - extern sym(vp8_bilinear_filters_x86_8) --global sym(vp8_bilinear_predict16x16_sse2) -+global sym(vp8_bilinear_predict16x16_sse2) PRIVATE - sym(vp8_bilinear_predict16x16_sse2): - push rbp - mov rbp, rsp -@@ -1231,7 +1231,7 @@ sym(vp8_bilinear_predict16x16_sse2): - ; unsigned char *dst_ptr, - ; int dst_pitch - ;) --global sym(vp8_bilinear_predict8x8_sse2) -+global sym(vp8_bilinear_predict8x8_sse2) PRIVATE - sym(vp8_bilinear_predict8x8_sse2): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm -index 6bca82b..c06f245 100644 ---- a/vp8/common/x86/subpixel_ssse3.asm -+++ b/vp8/common/x86/subpixel_ssse3.asm -@@ -34,7 +34,7 @@ - ; unsigned int output_height, - ; unsigned int vp8_filter_index - ;) --global sym(vp8_filter_block1d8_h6_ssse3) -+global sym(vp8_filter_block1d8_h6_ssse3) PRIVATE - sym(vp8_filter_block1d8_h6_ssse3): - push rbp - mov rbp, rsp -@@ -177,7 +177,7 @@ vp8_filter_block1d8_h4_ssse3: - ; unsigned int output_height, - ; unsigned int vp8_filter_index - ;) --global sym(vp8_filter_block1d16_h6_ssse3) -+global sym(vp8_filter_block1d16_h6_ssse3) PRIVATE - sym(vp8_filter_block1d16_h6_ssse3): - push rbp - mov rbp, rsp -@@ -284,7 +284,7 @@ sym(vp8_filter_block1d16_h6_ssse3): - ; unsigned int output_height, - ; unsigned int vp8_filter_index - ;) --global sym(vp8_filter_block1d4_h6_ssse3) -+global sym(vp8_filter_block1d4_h6_ssse3) PRIVATE - sym(vp8_filter_block1d4_h6_ssse3): - push rbp - mov rbp, rsp -@@ -352,6 +352,7 @@ sym(vp8_filter_block1d4_h6_ssse3): - pop rdi - pop rsi - RESTORE_GOT -+ RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -@@ -413,7 +414,7 @@ sym(vp8_filter_block1d4_h6_ssse3): - ; unsigned int output_height, - ; unsigned int vp8_filter_index - ;) --global sym(vp8_filter_block1d16_v6_ssse3) -+global sym(vp8_filter_block1d16_v6_ssse3) PRIVATE - sym(vp8_filter_block1d16_v6_ssse3): - push rbp - mov rbp, rsp -@@ -601,7 +602,7 @@ sym(vp8_filter_block1d16_v6_ssse3): - ; unsigned int output_height, - ; unsigned int vp8_filter_index - ;) --global sym(vp8_filter_block1d8_v6_ssse3) -+global sym(vp8_filter_block1d8_v6_ssse3) PRIVATE - sym(vp8_filter_block1d8_v6_ssse3): - push rbp - mov rbp, rsp -@@ -741,7 +742,7 @@ sym(vp8_filter_block1d8_v6_ssse3): - ; unsigned int output_height, - ; unsigned int vp8_filter_index - ;) --global sym(vp8_filter_block1d4_v6_ssse3) -+global sym(vp8_filter_block1d4_v6_ssse3) PRIVATE - sym(vp8_filter_block1d4_v6_ssse3): - push rbp - mov rbp, rsp -@@ -880,7 +881,7 @@ sym(vp8_filter_block1d4_v6_ssse3): - ; unsigned char *dst_ptr, - ; int dst_pitch - ;) --global sym(vp8_bilinear_predict16x16_ssse3) -+global sym(vp8_bilinear_predict16x16_ssse3) PRIVATE - sym(vp8_bilinear_predict16x16_ssse3): - push rbp - mov rbp, rsp -@@ -1143,7 +1144,7 @@ sym(vp8_bilinear_predict16x16_ssse3): - ; unsigned char *dst_ptr, - ; int dst_pitch - ;) --global sym(vp8_bilinear_predict8x8_ssse3) -+global sym(vp8_bilinear_predict8x8_ssse3) PRIVATE - sym(vp8_bilinear_predict8x8_ssse3): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/variance_impl_mmx.asm b/vp8/common/x86/variance_impl_mmx.asm -index 2be8bbe..d9120d0 100644 ---- a/vp8/common/x86/variance_impl_mmx.asm -+++ b/vp8/common/x86/variance_impl_mmx.asm -@@ -12,7 +12,7 @@ - %include "vpx_ports/x86_abi_support.asm" - - ;unsigned int vp8_get_mb_ss_mmx( short *src_ptr ) --global sym(vp8_get_mb_ss_mmx) -+global sym(vp8_get_mb_ss_mmx) PRIVATE - sym(vp8_get_mb_ss_mmx): - push rbp - mov rbp, rsp -@@ -72,7 +72,7 @@ sym(vp8_get_mb_ss_mmx): - ; unsigned int *SSE, - ; int *Sum - ;) --global sym(vp8_get8x8var_mmx) -+global sym(vp8_get8x8var_mmx) PRIVATE - sym(vp8_get8x8var_mmx): - push rbp - mov rbp, rsp -@@ -320,7 +320,7 @@ sym(vp8_get8x8var_mmx): - ; unsigned int *SSE, - ; int *Sum - ;) --global sym(vp8_get4x4var_mmx) -+global sym(vp8_get4x4var_mmx) PRIVATE - sym(vp8_get4x4var_mmx): - push rbp - mov rbp, rsp -@@ -433,7 +433,7 @@ sym(vp8_get4x4var_mmx): - ; unsigned char *ref_ptr, - ; int recon_stride - ;) --global sym(vp8_get4x4sse_cs_mmx) -+global sym(vp8_get4x4sse_cs_mmx) PRIVATE - sym(vp8_get4x4sse_cs_mmx): - push rbp - mov rbp, rsp -@@ -522,7 +522,7 @@ sym(vp8_get4x4sse_cs_mmx): - ; int *sum, - ; unsigned int *sumsquared - ;) --global sym(vp8_filter_block2d_bil4x4_var_mmx) -+global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE - sym(vp8_filter_block2d_bil4x4_var_mmx): - push rbp - mov rbp, rsp -@@ -667,7 +667,7 @@ sym(vp8_filter_block2d_bil4x4_var_mmx): - ; int *sum, - ; unsigned int *sumsquared - ;) --global sym(vp8_filter_block2d_bil_var_mmx) -+global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE - sym(vp8_filter_block2d_bil_var_mmx): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/variance_impl_sse2.asm b/vp8/common/x86/variance_impl_sse2.asm -index 7629220..761433c 100644 ---- a/vp8/common/x86/variance_impl_sse2.asm -+++ b/vp8/common/x86/variance_impl_sse2.asm -@@ -17,7 +17,7 @@ - ;( - ; short *src_ptr - ;) --global sym(vp8_get_mb_ss_sse2) -+global sym(vp8_get_mb_ss_sse2) PRIVATE - sym(vp8_get_mb_ss_sse2): - push rbp - mov rbp, rsp -@@ -80,7 +80,7 @@ sym(vp8_get_mb_ss_sse2): - ; unsigned int * SSE, - ; int * Sum - ;) --global sym(vp8_get16x16var_sse2) -+global sym(vp8_get16x16var_sse2) PRIVATE - sym(vp8_get16x16var_sse2): - push rbp - mov rbp, rsp -@@ -224,7 +224,7 @@ sym(vp8_get16x16var_sse2): - ; unsigned int * SSE, - ; int * Sum - ;) --global sym(vp8_get8x8var_sse2) -+global sym(vp8_get8x8var_sse2) PRIVATE - sym(vp8_get8x8var_sse2): - push rbp - mov rbp, rsp -@@ -413,7 +413,7 @@ sym(vp8_get8x8var_sse2): - ; unsigned int *sumsquared;; - ; - ;) --global sym(vp8_filter_block2d_bil_var_sse2) -+global sym(vp8_filter_block2d_bil_var_sse2) PRIVATE - sym(vp8_filter_block2d_bil_var_sse2): - push rbp - mov rbp, rsp -@@ -690,7 +690,7 @@ filter_block2d_bil_variance: - ; int *sum, - ; unsigned int *sumsquared - ;) --global sym(vp8_half_horiz_vert_variance8x_h_sse2) -+global sym(vp8_half_horiz_vert_variance8x_h_sse2) PRIVATE - sym(vp8_half_horiz_vert_variance8x_h_sse2): - push rbp - mov rbp, rsp -@@ -812,7 +812,7 @@ vp8_half_horiz_vert_variance8x_h_1: - ; int *sum, - ; unsigned int *sumsquared - ;) --global sym(vp8_half_horiz_vert_variance16x_h_sse2) -+global sym(vp8_half_horiz_vert_variance16x_h_sse2) PRIVATE - sym(vp8_half_horiz_vert_variance16x_h_sse2): - push rbp - mov rbp, rsp -@@ -928,7 +928,7 @@ vp8_half_horiz_vert_variance16x_h_1: - ; int *sum, - ; unsigned int *sumsquared - ;) --global sym(vp8_half_vert_variance8x_h_sse2) -+global sym(vp8_half_vert_variance8x_h_sse2) PRIVATE - sym(vp8_half_vert_variance8x_h_sse2): - push rbp - mov rbp, rsp -@@ -1035,7 +1035,7 @@ vp8_half_vert_variance8x_h_1: - ; int *sum, - ; unsigned int *sumsquared - ;) --global sym(vp8_half_vert_variance16x_h_sse2) -+global sym(vp8_half_vert_variance16x_h_sse2) PRIVATE - sym(vp8_half_vert_variance16x_h_sse2): - push rbp - mov rbp, rsp -@@ -1143,7 +1143,7 @@ vp8_half_vert_variance16x_h_1: - ; int *sum, - ; unsigned int *sumsquared - ;) --global sym(vp8_half_horiz_variance8x_h_sse2) -+global sym(vp8_half_horiz_variance8x_h_sse2) PRIVATE - sym(vp8_half_horiz_variance8x_h_sse2): - push rbp - mov rbp, rsp -@@ -1248,7 +1248,7 @@ vp8_half_horiz_variance8x_h_1: - ; int *sum, - ; unsigned int *sumsquared - ;) --global sym(vp8_half_horiz_variance16x_h_sse2) -+global sym(vp8_half_horiz_variance16x_h_sse2) PRIVATE - sym(vp8_half_horiz_variance16x_h_sse2): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/variance_impl_ssse3.asm b/vp8/common/x86/variance_impl_ssse3.asm -index 97e8b0e..686b4a9 100644 ---- a/vp8/common/x86/variance_impl_ssse3.asm -+++ b/vp8/common/x86/variance_impl_ssse3.asm -@@ -29,7 +29,7 @@ - ;) - ;Note: The filter coefficient at offset=0 is 128. Since the second register - ;for Pmaddubsw is signed bytes, we must calculate zero offset seperately. --global sym(vp8_filter_block2d_bil_var_ssse3) -+global sym(vp8_filter_block2d_bil_var_ssse3) PRIVATE - sym(vp8_filter_block2d_bil_var_ssse3): - push rbp - mov rbp, rsp -diff --git a/vp8/common/x86/variance_sse2.c b/vp8/common/x86/variance_sse2.c -index 2769a30..afd6429 100644 ---- a/vp8/common/x86/variance_sse2.c -+++ b/vp8/common/x86/variance_sse2.c -@@ -332,8 +332,9 @@ unsigned int vp8_sub_pixel_variance16x16_wmt - unsigned int xxsum0, xxsum1; - - -- // note we could avoid these if statements if the calling function -- // just called the appropriate functions inside. -+ /* note we could avoid these if statements if the calling function -+ * just called the appropriate functions inside. -+ */ - if (xoffset == 4 && yoffset == 0) - { - vp8_half_horiz_variance16x_h_sse2( -diff --git a/vp8/common/x86/variance_ssse3.c b/vp8/common/x86/variance_ssse3.c -index 1be0d92..ba2055c 100644 ---- a/vp8/common/x86/variance_ssse3.c -+++ b/vp8/common/x86/variance_ssse3.c -@@ -79,8 +79,9 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3 - int xsum0; - unsigned int xxsum0; - -- // note we could avoid these if statements if the calling function -- // just called the appropriate functions inside. -+ /* note we could avoid these if statements if the calling function -+ * just called the appropriate functions inside. -+ */ - if (xoffset == 4 && yoffset == 0) - { - vp8_half_horiz_variance16x_h_sse2( -diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c -index 23a7fdc..3437a23 100644 ---- a/vp8/common/x86/vp8_asm_stubs.c -+++ b/vp8/common/x86/vp8_asm_stubs.c -@@ -438,19 +438,35 @@ void vp8_sixtap_predict16x16_ssse3 - { - if (yoffset) - { -- vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset); -- vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset); -+ vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), -+ src_pixels_per_line, FData2, -+ 16, 21, xoffset); -+ vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, -+ 16, yoffset); - } - else - { - /* First-pass only */ -- vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset); -+ vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, -+ dst_ptr, dst_pitch, 16, xoffset); - } - } - else - { -- /* Second-pass only */ -- vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset); -+ if (yoffset) -+ { -+ /* Second-pass only */ -+ vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line), -+ src_pixels_per_line, -+ dst_ptr, dst_pitch, 16, yoffset); -+ } -+ else -+ { -+ /* ssse3 second-pass only function couldn't handle (xoffset==0 && -+ * yoffset==0) case correctly. Add copy function here to guarantee -+ * six-tap function handles all possible offsets. */ -+ vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); -+ } - } - } - -@@ -470,18 +486,34 @@ void vp8_sixtap_predict8x8_ssse3 - { - if (yoffset) - { -- vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset); -- vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset); -+ vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), -+ src_pixels_per_line, FData2, -+ 8, 13, xoffset); -+ vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, -+ 8, yoffset); - } - else - { -- vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset); -+ vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, -+ dst_ptr, dst_pitch, 8, xoffset); - } - } - else - { -- /* Second-pass only */ -- vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset); -+ if (yoffset) -+ { -+ /* Second-pass only */ -+ vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), -+ src_pixels_per_line, -+ dst_ptr, dst_pitch, 8, yoffset); -+ } -+ else -+ { -+ /* ssse3 second-pass only function couldn't handle (xoffset==0 && -+ * yoffset==0) case correctly. Add copy function here to guarantee -+ * six-tap function handles all possible offsets. */ -+ vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); -+ } - } - } - -@@ -502,19 +534,35 @@ void vp8_sixtap_predict8x4_ssse3 - { - if (yoffset) - { -- vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset); -- vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset); -+ vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), -+ src_pixels_per_line, FData2, -+ 8, 9, xoffset); -+ vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, -+ 4, yoffset); - } - else - { - /* First-pass only */ -- vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset); -+ vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, -+ dst_ptr, dst_pitch, 4, xoffset); - } - } - else - { -- /* Second-pass only */ -- vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset); -+ if (yoffset) -+ { -+ /* Second-pass only */ -+ vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), -+ src_pixels_per_line, -+ dst_ptr, dst_pitch, 4, yoffset); -+ } -+ else -+ { -+ /* ssse3 second-pass only function couldn't handle (xoffset==0 && -+ * yoffset==0) case correctly. Add copy function here to guarantee -+ * six-tap function handles all possible offsets. */ -+ vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); -+ } - } - } - -@@ -534,19 +582,48 @@ void vp8_sixtap_predict4x4_ssse3 - { - if (yoffset) - { -- vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset); -- vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset); -+ vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), -+ src_pixels_per_line, -+ FData2, 4, 9, xoffset); -+ vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, -+ 4, yoffset); - } - else - { -- vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset); -+ vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, -+ dst_ptr, dst_pitch, 4, xoffset); - } - } - else - { -- vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset); -+ if (yoffset) -+ { -+ vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), -+ src_pixels_per_line, -+ dst_ptr, dst_pitch, 4, yoffset); -+ } -+ else -+ { -+ /* ssse3 second-pass only function couldn't handle (xoffset==0 && -+ * yoffset==0) case correctly. Add copy function here to guarantee -+ * six-tap function handles all possible offsets. */ -+ int r; -+ -+ for (r = 0; r < 4; r++) -+ { -+ #if !(CONFIG_FAST_UNALIGNED) -+ dst_ptr[0] = src_ptr[0]; -+ dst_ptr[1] = src_ptr[1]; -+ dst_ptr[2] = src_ptr[2]; -+ dst_ptr[3] = src_ptr[3]; -+ #else -+ *(uint32_t *)dst_ptr = *(uint32_t *)src_ptr ; -+ #endif -+ dst_ptr += dst_pitch; -+ src_ptr += src_pixels_per_line; -+ } -+ } - } -- - } - - #endif -diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h -index 880c185..1a08c05 100644 ---- a/vp8/decoder/dboolhuff.h -+++ b/vp8/decoder/dboolhuff.h -@@ -55,7 +55,7 @@ void vp8dx_bool_decoder_fill(BOOL_DECODER *br); - int loop_end, x; \ - size_t bits_left = ((_bufend)-(_bufptr))*CHAR_BIT; \ - \ -- x = shift + CHAR_BIT - bits_left; \ -+ x = (int)(shift + CHAR_BIT - bits_left); \ - loop_end = 0; \ - if(x >= 0) \ - { \ -diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c -index 51e2420..8027a07 100644 ---- a/vp8/decoder/decodemv.c -+++ b/vp8/decoder/decodemv.c -@@ -48,11 +48,11 @@ static MB_PREDICTION_MODE read_uv_mode(vp8_reader *bc, const vp8_prob *p) - - static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi) - { -- vp8_reader *const bc = & pbi->bc; -+ vp8_reader *const bc = & pbi->mbc[8]; - const int mis = pbi->common.mode_info_stride; - - mi->mbmi.ref_frame = INTRA_FRAME; -- mi->mbmi.mode = read_kf_ymode(bc, pbi->common.kf_ymode_prob); -+ mi->mbmi.mode = read_kf_ymode(bc, vp8_kf_ymode_prob); - - if (mi->mbmi.mode == B_PRED) - { -@@ -65,12 +65,12 @@ static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi) - const B_PREDICTION_MODE L = left_block_mode(mi, i); - - mi->bmi[i].as_mode = -- read_bmode(bc, pbi->common.kf_bmode_prob [A] [L]); -+ read_bmode(bc, vp8_kf_bmode_prob [A] [L]); - } - while (++i < 16); - } - -- mi->mbmi.uv_mode = read_uv_mode(bc, pbi->common.kf_uv_mode_prob); -+ mi->mbmi.uv_mode = read_uv_mode(bc, vp8_kf_uv_mode_prob); - } - - static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc) -@@ -150,7 +150,7 @@ static const unsigned char mbsplit_fill_offset[4][16] = { - - static void mb_mode_mv_init(VP8D_COMP *pbi) - { -- vp8_reader *const bc = & pbi->bc; -+ vp8_reader *const bc = & pbi->mbc[8]; - MV_CONTEXT *const mvc = pbi->common.fc.mvc; - - #if CONFIG_ERROR_CONCEALMENT -@@ -159,6 +159,9 @@ static void mb_mode_mv_init(VP8D_COMP *pbi) - * outside the frame. */ - pbi->mvs_corrupt_from_mb = UINT_MAX; - #endif -+ /* Read the mb_no_coeff_skip flag */ -+ pbi->common.mb_no_coeff_skip = (int)vp8_read_bit(bc); -+ - pbi->prob_skip_false = 0; - if (pbi->common.mb_no_coeff_skip) - pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8); -@@ -293,26 +296,24 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi, - blockmv.as_mv.row += best_mv.as_mv.row; - blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) << 1; - blockmv.as_mv.col += best_mv.as_mv.col; -- -- mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv, -- mb_to_left_edge, -- mb_to_right_edge, -- mb_to_top_edge, -- mb_to_bottom_edge); - } - } - else - { - blockmv.as_int = abovemv.as_int; -- mbmi->need_to_clamp_mvs |= above_mb->mbmi.need_to_clamp_mvs; - } - } - else - { - blockmv.as_int = leftmv.as_int; -- mbmi->need_to_clamp_mvs |= left_mb->mbmi.need_to_clamp_mvs; - } - -+ mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv, -+ mb_to_left_edge, -+ mb_to_right_edge, -+ mb_to_top_edge, -+ mb_to_bottom_edge); -+ - { - /* Fill (uniform) modes, mvs of jth subset. - Must do it here because ensuing subsets can -@@ -337,7 +338,7 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi, - - static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi) - { -- vp8_reader *const bc = & pbi->bc; -+ vp8_reader *const bc = & pbi->mbc[8]; - mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra); - if (mbmi->ref_frame) /* inter MB */ - { -@@ -595,14 +596,14 @@ static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi, - * By default on a key frame reset all MBs to segment 0 - */ - if (pbi->mb.update_mb_segmentation_map) -- read_mb_features(&pbi->bc, &mi->mbmi, &pbi->mb); -+ read_mb_features(&pbi->mbc[8], &mi->mbmi, &pbi->mb); - else if(pbi->common.frame_type == KEY_FRAME) - mi->mbmi.segment_id = 0; - - /* Read the macroblock coeff skip flag if this feature is in use, - * else default to 0 */ - if (pbi->common.mb_no_coeff_skip) -- mi->mbmi.mb_skip_coeff = vp8_read(&pbi->bc, pbi->prob_skip_false); -+ mi->mbmi.mb_skip_coeff = vp8_read(&pbi->mbc[8], pbi->prob_skip_false); - else - mi->mbmi.mb_skip_coeff = 0; - -@@ -644,7 +645,8 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi) - #if CONFIG_ERROR_CONCEALMENT - /* look for corruption. set mvs_corrupt_from_mb to the current - * mb_num if the frame is corrupt from this macroblock. */ -- if (vp8dx_bool_error(&pbi->bc) && mb_num < pbi->mvs_corrupt_from_mb) -+ if (vp8dx_bool_error(&pbi->mbc[8]) && mb_num < -+ (int)pbi->mvs_corrupt_from_mb) - { - pbi->mvs_corrupt_from_mb = mb_num; - /* no need to continue since the partition is corrupt from -diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c -index 62a068b..a4a00f6 100644 ---- a/vp8/decoder/decodframe.c -+++ b/vp8/decoder/decodframe.c -@@ -177,7 +177,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, - { - short *DQC = xd->dequant_y1; - int dst_stride = xd->dst.y_stride; -- unsigned char *base_dst = xd->dst.y_buffer; - - /* clear out residual eob info */ - if(xd->mode_info_context->mbmi.mb_skip_coeff) -@@ -188,38 +187,29 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, - for (i = 0; i < 16; i++) - { - BLOCKD *b = &xd->block[i]; -- int b_mode = xd->mode_info_context->bmi[i].as_mode; -- unsigned char *yabove; -- unsigned char *yleft; -- int left_stride; -- unsigned char top_left; -- -- yabove = base_dst + b->offset - dst_stride; -- yleft = base_dst + b->offset - 1; -- left_stride = dst_stride; -- top_left = yabove[-1]; -- -- // vp8_intra4x4_predict (base_dst + b->offset, dst_stride, b_mode, -- // base_dst + b->offset, dst_stride ); -- vp8_intra4x4_predict_d_c(yabove, yleft, left_stride, -- b_mode, -- base_dst + b->offset, dst_stride, -- top_left); -+ unsigned char *dst = xd->dst.y_buffer + b->offset; -+ B_PREDICTION_MODE b_mode = -+ xd->mode_info_context->bmi[i].as_mode; -+ unsigned char *Above = dst - dst_stride; -+ unsigned char *yleft = dst - 1; -+ int left_stride = dst_stride; -+ unsigned char top_left = Above[-1]; -+ -+ vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, -+ dst, dst_stride, top_left); - - if (xd->eobs[i]) - { - if (xd->eobs[i] > 1) - { -- vp8_dequant_idct_add -- (b->qcoeff, DQC, -- base_dst + b->offset, dst_stride); -+ vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); - } - else - { - vp8_dc_only_idct_add - (b->qcoeff[0] * DQC[0], -- base_dst + b->offset, dst_stride, -- base_dst + b->offset, dst_stride); -+ dst, dst_stride, -+ dst, dst_stride); - ((int *)b->qcoeff)[0] = 0; - } - } -@@ -317,48 +307,253 @@ static int get_delta_q(vp8_reader *bc, int prev, int *q_update) - FILE *vpxlog = 0; - #endif - -+static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf) -+{ -+ int i; -+ unsigned char *src_ptr1; -+ unsigned char *dest_ptr1; -+ -+ unsigned int Border; -+ int plane_stride; -+ -+ /***********/ -+ /* Y Plane */ -+ /***********/ -+ Border = ybf->border; -+ plane_stride = ybf->y_stride; -+ src_ptr1 = ybf->y_buffer - Border; -+ dest_ptr1 = src_ptr1 - (Border * plane_stride); -+ -+ for (i = 0; i < (int)Border; i++) -+ { -+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); -+ dest_ptr1 += plane_stride; -+ } -+ -+ -+ /***********/ -+ /* U Plane */ -+ /***********/ -+ plane_stride = ybf->uv_stride; -+ Border /= 2; -+ src_ptr1 = ybf->u_buffer - Border; -+ dest_ptr1 = src_ptr1 - (Border * plane_stride); -+ -+ for (i = 0; i < (int)(Border); i++) -+ { -+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); -+ dest_ptr1 += plane_stride; -+ } -+ -+ /***********/ -+ /* V Plane */ -+ /***********/ -+ -+ src_ptr1 = ybf->v_buffer - Border; -+ dest_ptr1 = src_ptr1 - (Border * plane_stride); -+ -+ for (i = 0; i < (int)(Border); i++) -+ { -+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); -+ dest_ptr1 += plane_stride; -+ } -+} -+ -+static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf) -+{ -+ int i; -+ unsigned char *src_ptr1, *src_ptr2; -+ unsigned char *dest_ptr2; -+ -+ unsigned int Border; -+ int plane_stride; -+ int plane_height; -+ -+ /***********/ -+ /* Y Plane */ -+ /***********/ -+ Border = ybf->border; -+ plane_stride = ybf->y_stride; -+ plane_height = ybf->y_height; -+ -+ src_ptr1 = ybf->y_buffer - Border; -+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; -+ dest_ptr2 = src_ptr2 + plane_stride; -+ -+ for (i = 0; i < (int)Border; i++) -+ { -+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); -+ dest_ptr2 += plane_stride; -+ } -+ -+ -+ /***********/ -+ /* U Plane */ -+ /***********/ -+ plane_stride = ybf->uv_stride; -+ plane_height = ybf->uv_height; -+ Border /= 2; -+ -+ src_ptr1 = ybf->u_buffer - Border; -+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; -+ dest_ptr2 = src_ptr2 + plane_stride; -+ -+ for (i = 0; i < (int)(Border); i++) -+ { -+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); -+ dest_ptr2 += plane_stride; -+ } -+ -+ /***********/ -+ /* V Plane */ -+ /***********/ -+ -+ src_ptr1 = ybf->v_buffer - Border; -+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; -+ dest_ptr2 = src_ptr2 + plane_stride; -+ -+ for (i = 0; i < (int)(Border); i++) -+ { -+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); -+ dest_ptr2 += plane_stride; -+ } -+} -+ -+static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf, -+ unsigned char *y_src, -+ unsigned char *u_src, -+ unsigned char *v_src) -+{ -+ int i; -+ unsigned char *src_ptr1, *src_ptr2; -+ unsigned char *dest_ptr1, *dest_ptr2; -+ -+ unsigned int Border; -+ int plane_stride; -+ int plane_height; -+ int plane_width; -+ -+ /***********/ -+ /* Y Plane */ -+ /***********/ -+ Border = ybf->border; -+ plane_stride = ybf->y_stride; -+ plane_height = 16; -+ plane_width = ybf->y_width; -+ -+ /* copy the left and right most columns out */ -+ src_ptr1 = y_src; -+ src_ptr2 = src_ptr1 + plane_width - 1; -+ dest_ptr1 = src_ptr1 - Border; -+ dest_ptr2 = src_ptr2 + 1; -+ -+ for (i = 0; i < plane_height; i++) -+ { -+ vpx_memset(dest_ptr1, src_ptr1[0], Border); -+ vpx_memset(dest_ptr2, src_ptr2[0], Border); -+ src_ptr1 += plane_stride; -+ src_ptr2 += plane_stride; -+ dest_ptr1 += plane_stride; -+ dest_ptr2 += plane_stride; -+ } -+ -+ /***********/ -+ /* U Plane */ -+ /***********/ -+ plane_stride = ybf->uv_stride; -+ plane_height = 8; -+ plane_width = ybf->uv_width; -+ Border /= 2; -+ -+ /* copy the left and right most columns out */ -+ src_ptr1 = u_src; -+ src_ptr2 = src_ptr1 + plane_width - 1; -+ dest_ptr1 = src_ptr1 - Border; -+ dest_ptr2 = src_ptr2 + 1; -+ -+ for (i = 0; i < plane_height; i++) -+ { -+ vpx_memset(dest_ptr1, src_ptr1[0], Border); -+ vpx_memset(dest_ptr2, src_ptr2[0], Border); -+ src_ptr1 += plane_stride; -+ src_ptr2 += plane_stride; -+ dest_ptr1 += plane_stride; -+ dest_ptr2 += plane_stride; -+ } -+ -+ /***********/ -+ /* V Plane */ -+ /***********/ -+ -+ /* copy the left and right most columns out */ -+ src_ptr1 = v_src; -+ src_ptr2 = src_ptr1 + plane_width - 1; -+ dest_ptr1 = src_ptr1 - Border; -+ dest_ptr2 = src_ptr2 + 1; -+ -+ for (i = 0; i < plane_height; i++) -+ { -+ vpx_memset(dest_ptr1, src_ptr1[0], Border); -+ vpx_memset(dest_ptr2, src_ptr2[0], Border); -+ src_ptr1 += plane_stride; -+ src_ptr2 += plane_stride; -+ dest_ptr1 += plane_stride; -+ dest_ptr2 += plane_stride; -+ } -+} -+ - static void decode_mb_rows(VP8D_COMP *pbi) - { - VP8_COMMON *const pc = & pbi->common; - MACROBLOCKD *const xd = & pbi->mb; - -+ MODE_INFO *lf_mic = xd->mode_info_context; -+ - int ibc = 0; - int num_part = 1 << pc->multi_token_partition; - - int recon_yoffset, recon_uvoffset; - int mb_row, mb_col; - int mb_idx = 0; -- int dst_fb_idx = pc->new_fb_idx; -- int recon_y_stride = pc->yv12_fb[dst_fb_idx].y_stride; -- int recon_uv_stride = pc->yv12_fb[dst_fb_idx].uv_stride; -+ -+ YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; -+ -+ int recon_y_stride = yv12_fb_new->y_stride; -+ int recon_uv_stride = yv12_fb_new->uv_stride; - - unsigned char *ref_buffer[MAX_REF_FRAMES][3]; - unsigned char *dst_buffer[3]; -+ unsigned char *lf_dst[3]; -+ unsigned char *eb_dst[3]; - int i; -- int ref_fb_index[MAX_REF_FRAMES]; - int ref_fb_corrupted[MAX_REF_FRAMES]; - - ref_fb_corrupted[INTRA_FRAME] = 0; - -- ref_fb_index[LAST_FRAME] = pc->lst_fb_idx; -- ref_fb_index[GOLDEN_FRAME] = pc->gld_fb_idx; -- ref_fb_index[ALTREF_FRAME] = pc->alt_fb_idx; -- - for(i = 1; i < MAX_REF_FRAMES; i++) - { -- ref_buffer[i][0] = pc->yv12_fb[ref_fb_index[i]].y_buffer; -- ref_buffer[i][1] = pc->yv12_fb[ref_fb_index[i]].u_buffer; -- ref_buffer[i][2] = pc->yv12_fb[ref_fb_index[i]].v_buffer; -+ YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; -+ -+ ref_buffer[i][0] = this_fb->y_buffer; -+ ref_buffer[i][1] = this_fb->u_buffer; -+ ref_buffer[i][2] = this_fb->v_buffer; - -- ref_fb_corrupted[i] = pc->yv12_fb[ref_fb_index[i]].corrupted; -+ ref_fb_corrupted[i] = this_fb->corrupted; - } - -- dst_buffer[0] = pc->yv12_fb[dst_fb_idx].y_buffer; -- dst_buffer[1] = pc->yv12_fb[dst_fb_idx].u_buffer; -- dst_buffer[2] = pc->yv12_fb[dst_fb_idx].v_buffer; -+ /* Set up the buffer pointers */ -+ eb_dst[0] = lf_dst[0] = dst_buffer[0] = yv12_fb_new->y_buffer; -+ eb_dst[1] = lf_dst[1] = dst_buffer[1] = yv12_fb_new->u_buffer; -+ eb_dst[2] = lf_dst[2] = dst_buffer[2] = yv12_fb_new->v_buffer; - - xd->up_available = 0; - -+ /* Initialize the loop filter for this frame. */ -+ if(pc->filter_level) -+ vp8_loop_filter_frame_init(pc, xd, pc->filter_level); -+ -+ vp8_setup_intra_recon_top_line(yv12_fb_new); -+ - /* Decode the individual macro block */ - for (mb_row = 0; mb_row < pc->mb_rows; mb_row++) - { -@@ -395,10 +590,14 @@ static void decode_mb_rows(VP8D_COMP *pbi) - xd->recon_above[1] -= xd->dst.uv_stride; - xd->recon_above[2] -= xd->dst.uv_stride; - -- //TODO: move to outside row loop -+ /* TODO: move to outside row loop */ - xd->recon_left_stride[0] = xd->dst.y_stride; - xd->recon_left_stride[1] = xd->dst.uv_stride; - -+ setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], -+ xd->recon_left[2], xd->dst.y_stride, -+ xd->dst.uv_stride); -+ - for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) - { - /* Distance of Mb to the various image edges. -@@ -460,26 +659,103 @@ static void decode_mb_rows(VP8D_COMP *pbi) - xd->recon_left[1] += 8; - xd->recon_left[2] += 8; - -- - recon_yoffset += 16; - recon_uvoffset += 8; - - ++xd->mode_info_context; /* next mb */ - - xd->above_context++; -- - } - - /* adjust to the next row of mbs */ -- vp8_extend_mb_row( -- &pc->yv12_fb[dst_fb_idx], -- xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8 -- ); -+ vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, -+ xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); - - ++xd->mode_info_context; /* skip prediction column */ - xd->up_available = 1; - -+ if(pc->filter_level) -+ { -+ if(mb_row > 0) -+ { -+ if (pc->filter_type == NORMAL_LOOPFILTER) -+ vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, -+ recon_y_stride, recon_uv_stride, -+ lf_dst[0], lf_dst[1], lf_dst[2]); -+ else -+ vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, -+ recon_y_stride, recon_uv_stride, -+ lf_dst[0], lf_dst[1], lf_dst[2]); -+ -+ if(mb_row > 1) -+ { -+ yv12_extend_frame_left_right_c(yv12_fb_new, -+ eb_dst[0], -+ eb_dst[1], -+ eb_dst[2]); -+ -+ eb_dst[0] += recon_y_stride * 16; -+ eb_dst[1] += recon_uv_stride * 8; -+ eb_dst[2] += recon_uv_stride * 8; -+ -+ if(mb_row == 2) -+ yv12_extend_frame_top_c(yv12_fb_new); -+ -+ } -+ -+ lf_dst[0] += recon_y_stride * 16; -+ lf_dst[1] += recon_uv_stride * 8; -+ lf_dst[2] += recon_uv_stride * 8; -+ lf_mic += pc->mb_cols; -+ lf_mic++; /* Skip border mb */ -+ } -+ } -+ else -+ { -+ if(mb_row > 0) -+ { -+ /**/ -+ yv12_extend_frame_left_right_c(yv12_fb_new, -+ eb_dst[0], -+ eb_dst[1], -+ eb_dst[2]); -+ -+ eb_dst[0] += recon_y_stride * 16; -+ eb_dst[1] += recon_uv_stride * 8; -+ eb_dst[2] += recon_uv_stride * 8; -+ -+ if(mb_row == 1) -+ yv12_extend_frame_top_c(yv12_fb_new); -+ } -+ } -+ } -+ -+ if(pc->filter_level) -+ { -+ if (pc->filter_type == NORMAL_LOOPFILTER) -+ vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, recon_y_stride, -+ recon_uv_stride, lf_dst[0], lf_dst[1], -+ lf_dst[2]); -+ else -+ vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, recon_y_stride, -+ recon_uv_stride, lf_dst[0], lf_dst[1], -+ lf_dst[2]); -+ -+ yv12_extend_frame_left_right_c(yv12_fb_new, -+ eb_dst[0], -+ eb_dst[1], -+ eb_dst[2]); -+ eb_dst[0] += recon_y_stride * 16; -+ eb_dst[1] += recon_uv_stride * 8; -+ eb_dst[2] += recon_uv_stride * 8; - } -+ yv12_extend_frame_left_right_c(yv12_fb_new, -+ eb_dst[0], -+ eb_dst[1], -+ eb_dst[2]); -+ -+ yv12_extend_frame_bottom_c(yv12_fb_new); -+ - } - - static unsigned int read_partition_size(const unsigned char *cx_size) -@@ -519,13 +795,13 @@ static unsigned int read_available_partition_size( - if (read_is_valid(partition_size_ptr, 3, first_fragment_end)) - partition_size = read_partition_size(partition_size_ptr); - else if (pbi->ec_active) -- partition_size = bytes_left; -+ partition_size = (unsigned int)bytes_left; - else - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated partition size data"); - } - else -- partition_size = bytes_left; -+ partition_size = (unsigned int)bytes_left; - - /* Validate the calculated partition length. If the buffer - * described by the partition can't be fully read, then restrict -@@ -534,7 +810,7 @@ static unsigned int read_available_partition_size( - if (!read_is_valid(fragment_start, partition_size, fragment_end)) - { - if (pbi->ec_active) -- partition_size = bytes_left; -+ partition_size = (unsigned int)bytes_left; - else - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt partition " -@@ -547,24 +823,18 @@ static unsigned int read_available_partition_size( - static void setup_token_decoder(VP8D_COMP *pbi, - const unsigned char* token_part_sizes) - { -- vp8_reader *bool_decoder = &pbi->bc2; -+ vp8_reader *bool_decoder = &pbi->mbc[0]; - unsigned int partition_idx; -- int fragment_idx; -- int num_token_partitions; -+ unsigned int fragment_idx; -+ unsigned int num_token_partitions; - const unsigned char *first_fragment_end = pbi->fragments[0] + - pbi->fragment_sizes[0]; - - TOKEN_PARTITION multi_token_partition = -- (TOKEN_PARTITION)vp8_read_literal(&pbi->bc, 2); -- if (!vp8dx_bool_error(&pbi->bc)) -+ (TOKEN_PARTITION)vp8_read_literal(&pbi->mbc[8], 2); -+ if (!vp8dx_bool_error(&pbi->mbc[8])) - pbi->common.multi_token_partition = multi_token_partition; - num_token_partitions = 1 << pbi->common.multi_token_partition; -- if (num_token_partitions > 1) -- { -- CHECK_MEM_ERROR(pbi->mbc, vpx_malloc(num_token_partitions * -- sizeof(vp8_reader))); -- bool_decoder = pbi->mbc; -- } - - /* Check for partitions within the fragments and unpack the fragments - * so that each fragment pointer points to its corresponding partition. */ -@@ -580,10 +850,10 @@ static void setup_token_decoder(VP8D_COMP *pbi, - /* Size of first partition + token partition sizes element */ - ptrdiff_t ext_first_part_size = token_part_sizes - - pbi->fragments[0] + 3 * (num_token_partitions - 1); -- fragment_size -= ext_first_part_size; -+ fragment_size -= (unsigned int)ext_first_part_size; - if (fragment_size > 0) - { -- pbi->fragment_sizes[0] = ext_first_part_size; -+ pbi->fragment_sizes[0] = (unsigned int)ext_first_part_size; - /* The fragment contains an additional partition. Move to - * next. */ - fragment_idx++; -@@ -602,8 +872,8 @@ static void setup_token_decoder(VP8D_COMP *pbi, - fragment_end, - fragment_idx - 1, - num_token_partitions); -- pbi->fragment_sizes[fragment_idx] = partition_size; -- fragment_size -= partition_size; -+ pbi->fragment_sizes[fragment_idx] = (unsigned int)partition_size; -+ fragment_size -= (unsigned int)partition_size; - assert(fragment_idx <= num_token_partitions); - if (fragment_size > 0) - { -@@ -637,16 +907,6 @@ static void setup_token_decoder(VP8D_COMP *pbi, - #endif - } - --static void stop_token_decoder(VP8D_COMP *pbi) --{ -- VP8_COMMON *pc = &pbi->common; -- -- if (pc->multi_token_partition != ONE_PARTITION) -- { -- vpx_free(pbi->mbc); -- pbi->mbc = NULL; -- } --} - - static void init_frame(VP8D_COMP *pbi) - { -@@ -661,7 +921,6 @@ static void init_frame(VP8D_COMP *pbi) - vp8_init_mbmode_probs(pc); - - vp8_default_coef_probs(pc); -- vp8_kf_default_bmode_probs(pc->kf_bmode_prob); - - /* reset the segment feature data to 0 with delta coding (Default state). */ - vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); -@@ -685,13 +944,8 @@ static void init_frame(VP8D_COMP *pbi) - } - else - { -- if (!pc->use_bilinear_mc_filter) -- pc->mcomp_filter_type = SIXTAP; -- else -- pc->mcomp_filter_type = BILINEAR; -- - /* To enable choice of different interploation filters */ -- if (pc->mcomp_filter_type == SIXTAP) -+ if (!pc->use_bilinear_mc_filter) - { - xd->subpixel_predict = vp8_sixtap_predict4x4; - xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; -@@ -725,7 +979,7 @@ static void init_frame(VP8D_COMP *pbi) - - int vp8_decode_frame(VP8D_COMP *pbi) - { -- vp8_reader *const bc = & pbi->bc; -+ vp8_reader *const bc = & pbi->mbc[8]; - VP8_COMMON *const pc = & pbi->common; - MACROBLOCKD *const xd = & pbi->mb; - const unsigned char *data = pbi->fragments[0]; -@@ -737,9 +991,11 @@ int vp8_decode_frame(VP8D_COMP *pbi) - int corrupt_tokens = 0; - int prev_independent_partitions = pbi->independent_partitions; - -+ YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; -+ - /* start with no corruption of current frame */ - xd->corrupted = 0; -- pc->yv12_fb[pc->new_fb_idx].corrupted = 0; -+ yv12_fb_new->corrupted = 0; - - if (data_end - data < 3) - { -@@ -774,11 +1030,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) - - vp8_setup_version(pc); - -+ - if (pc->frame_type == KEY_FRAME) - { -- const int Width = pc->Width; -- const int Height = pc->Height; -- - /* vet via sync code */ - /* When error concealment is enabled we should only check the sync - * code if we have enough bits available -@@ -803,56 +1057,21 @@ int vp8_decode_frame(VP8D_COMP *pbi) - } - data += 7; - -- if (Width != pc->Width || Height != pc->Height) -- { -- int prev_mb_rows = pc->mb_rows; -- -- if (pc->Width <= 0) -- { -- pc->Width = Width; -- vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, -- "Invalid frame width"); -- } -- -- if (pc->Height <= 0) -- { -- pc->Height = Height; -- vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, -- "Invalid frame height"); -- } -- -- if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height)) -- vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, -- "Failed to allocate frame buffers"); -- --#if CONFIG_ERROR_CONCEALMENT -- pbi->overlaps = NULL; -- if (pbi->ec_enabled) -- { -- if (vp8_alloc_overlap_lists(pbi)) -- vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, -- "Failed to allocate overlap lists " -- "for error concealment"); -- } --#endif -- --#if CONFIG_MULTITHREAD -- if (pbi->b_multithreaded_rd) -- vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows); --#endif -- } -+ } -+ else -+ { -+ vpx_memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); -+ vpx_memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); - } - } -- -- if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME) || -- pc->Width == 0 || pc->Height == 0) -+ if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME)) - { - return -1; - } - - init_frame(pbi); - -- if (vp8dx_start_decode(bc, data, data_end - data)) -+ if (vp8dx_start_decode(bc, data, (unsigned int)(data_end - data))) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder 0"); - if (pc->frame_type == KEY_FRAME) { -@@ -961,7 +1180,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) - - setup_token_decoder(pbi, data + first_partition_length_in_bytes); - -- xd->current_bc = &pbi->bc2; -+ xd->current_bc = &pbi->mbc[0]; - - /* Read the default quantizers. */ - { -@@ -1094,26 +1313,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) - } - } - -- vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG)); -- vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG)); -- -- /* set up frame new frame for intra coded blocks */ --#if CONFIG_MULTITHREAD -- if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level)) --#endif -- vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]); -- -- vp8_setup_block_dptrs(xd); -- -- vp8_build_block_doffsets(xd); -- - /* clear out the coeff buffer */ - vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - -- /* Read the mb_no_coeff_skip flag */ -- pc->mb_no_coeff_skip = (int)vp8_read_bit(bc); -- -- - vp8_decode_mode_mvs(pbi); - - #if CONFIG_ERROR_CONCEALMENT -@@ -1132,9 +1334,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) - #if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) - { -- int i; -+ unsigned int i; - vp8mt_decode_mb_rows(pbi, xd); -- vp8_yv12_extend_frame_borders(&pc->yv12_fb[pc->new_fb_idx]); /*cm->frame_to_show);*/ -+ vp8_yv12_extend_frame_borders(yv12_fb_new); - for (i = 0; i < pbi->decoding_thread_count; ++i) - corrupt_tokens |= pbi->mb_row_di[i].mbd.corrupted; - } -@@ -1145,18 +1347,16 @@ int vp8_decode_frame(VP8D_COMP *pbi) - corrupt_tokens |= xd->corrupted; - } - -- stop_token_decoder(pbi); -- - /* Collect information about decoder corruption. */ - /* 1. Check first boolean decoder for errors. */ -- pc->yv12_fb[pc->new_fb_idx].corrupted = vp8dx_bool_error(bc); -+ yv12_fb_new->corrupted = vp8dx_bool_error(bc); - /* 2. Check the macroblock information */ -- pc->yv12_fb[pc->new_fb_idx].corrupted |= corrupt_tokens; -+ yv12_fb_new->corrupted |= corrupt_tokens; - - if (!pbi->decoded_key_frame) - { - if (pc->frame_type == KEY_FRAME && -- !pc->yv12_fb[pc->new_fb_idx].corrupted) -+ !yv12_fb_new->corrupted) - pbi->decoded_key_frame = 1; - else - vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, -@@ -1165,13 +1365,6 @@ int vp8_decode_frame(VP8D_COMP *pbi) - - /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); */ - -- /* If this was a kf or Gf note the Q used */ -- if ((pc->frame_type == KEY_FRAME) || -- pc->refresh_golden_frame || pc->refresh_alt_ref_frame) -- { -- pc->last_kf_gf_q = pc->base_qindex; -- } -- - if (pc->refresh_entropy_probs == 0) - { - vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc)); -diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c -index 0c39848..452ff6c 100644 ---- a/vp8/decoder/detokenize.c -+++ b/vp8/decoder/detokenize.c -@@ -53,7 +53,8 @@ static const uint8_t kZigzag[16] = { - #define NUM_PROBAS 11 - #define NUM_CTX 3 - --typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting -+/* for const-casting */ -+typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; - - static int GetSigned(BOOL_DECODER *br, int value_to_sign) - { -diff --git a/vp8/decoder/error_concealment.c b/vp8/decoder/error_concealment.c -index 7750728..8b2e32b 100644 ---- a/vp8/decoder/error_concealment.c -+++ b/vp8/decoder/error_concealment.c -@@ -51,12 +51,13 @@ int vp8_alloc_overlap_lists(VP8D_COMP *pbi) - vpx_free(pbi->overlaps); - pbi->overlaps = NULL; - } -+ - pbi->overlaps = vpx_calloc(pbi->common.mb_rows * pbi->common.mb_cols, - sizeof(MB_OVERLAP)); -+ - if (pbi->overlaps == NULL) - return -1; -- vpx_memset(pbi->overlaps, 0, -- sizeof(MB_OVERLAP) * pbi->common.mb_rows * pbi->common.mb_cols); -+ - return 0; - } - -diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c -index c59ce25..8d6871b 100644 ---- a/vp8/decoder/onyxd_if.c -+++ b/vp8/decoder/onyxd_if.c -@@ -80,6 +80,7 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf) - - #if CONFIG_ERROR_CONCEALMENT - pbi->ec_enabled = oxcf->error_concealment; -+ pbi->overlaps = NULL; - #else - pbi->ec_enabled = 0; - #endif -@@ -99,6 +100,8 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf) - */ - pbi->independent_partitions = 0; - -+ vp8_setup_block_dptrs(&pbi->mb); -+ - return pbi; - } - -@@ -117,21 +120,20 @@ void vp8dx_remove_decompressor(VP8D_COMP *pbi) - vp8_de_alloc_overlap_lists(pbi); - #endif - vp8_remove_common(&pbi->common); -- vpx_free(pbi->mbc); - vpx_free(pbi); - } - - --vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) -+vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) - { - VP8_COMMON *cm = &pbi->common; - int ref_fb_idx; - -- if (ref_frame_flag == VP8_LAST_FLAG) -+ if (ref_frame_flag == VP8_LAST_FRAME) - ref_fb_idx = cm->lst_fb_idx; -- else if (ref_frame_flag == VP8_GOLD_FLAG) -+ else if (ref_frame_flag == VP8_GOLD_FRAME) - ref_fb_idx = cm->gld_fb_idx; -- else if (ref_frame_flag == VP8_ALT_FLAG) -+ else if (ref_frame_flag == VP8_ALTR_FRAME) - ref_fb_idx = cm->alt_fb_idx; - else{ - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, -@@ -153,17 +155,17 @@ vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, - } - - --vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) -+vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) - { - VP8_COMMON *cm = &pbi->common; - int *ref_fb_ptr = NULL; - int free_fb; - -- if (ref_frame_flag == VP8_LAST_FLAG) -+ if (ref_frame_flag == VP8_LAST_FRAME) - ref_fb_ptr = &cm->lst_fb_idx; -- else if (ref_frame_flag == VP8_GOLD_FLAG) -+ else if (ref_frame_flag == VP8_GOLD_FRAME) - ref_fb_ptr = &cm->gld_fb_idx; -- else if (ref_frame_flag == VP8_ALT_FLAG) -+ else if (ref_frame_flag == VP8_ALTR_FRAME) - ref_fb_ptr = &cm->alt_fb_idx; - else{ - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, -@@ -279,28 +281,22 @@ static int swap_frame_buffers (VP8_COMMON *cm) - return err; - } - --int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsigned char *source, int64_t time_stamp) -+int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, -+ const uint8_t *source, -+ int64_t time_stamp) - { - #if HAVE_NEON - int64_t dx_store_reg[8]; - #endif - VP8_COMMON *cm = &pbi->common; -- int retcode = 0; -- -- /*if(pbi->ready_for_new_data == 0) -- return -1;*/ -- -- if (pbi == 0) -- { -- return -1; -- } -+ int retcode = -1; - - pbi->common.error.error_code = VPX_CODEC_OK; - - if (pbi->num_fragments == 0) - { - /* New frame, reset fragment pointers and sizes */ -- vpx_memset(pbi->fragments, 0, sizeof(pbi->fragments)); -+ vpx_memset((void*)pbi->fragments, 0, sizeof(pbi->fragments)); - vpx_memset(pbi->fragment_sizes, 0, sizeof(pbi->fragment_sizes)); - } - if (pbi->input_fragments && !(source == NULL && size == 0)) -@@ -381,20 +377,14 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi - - cm->new_fb_idx = get_free_fb (cm); - -+ /* setup reference frames for vp8_decode_frame */ -+ pbi->dec_fb_ref[INTRA_FRAME] = &cm->yv12_fb[cm->new_fb_idx]; -+ pbi->dec_fb_ref[LAST_FRAME] = &cm->yv12_fb[cm->lst_fb_idx]; -+ pbi->dec_fb_ref[GOLDEN_FRAME] = &cm->yv12_fb[cm->gld_fb_idx]; -+ pbi->dec_fb_ref[ALTREF_FRAME] = &cm->yv12_fb[cm->alt_fb_idx]; -+ - if (setjmp(pbi->common.error.jmp)) - { --#if HAVE_NEON --#if CONFIG_RUNTIME_CPU_DETECT -- if (cm->cpu_caps & HAS_NEON) --#endif -- { -- vp8_pop_neon(dx_store_reg); -- } --#endif -- pbi->common.error.setjmp = 0; -- -- pbi->num_fragments = 0; -- - /* We do not know if the missing frame(s) was supposed to update - * any of the reference buffers, but we act conservative and - * mark only the last buffer as corrupted. -@@ -403,7 +393,8 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi - - if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) - cm->fb_idx_ref_cnt[cm->new_fb_idx]--; -- return -1; -+ -+ goto decode_exit; - } - - pbi->common.error.setjmp = 1; -@@ -412,68 +403,19 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi - - if (retcode < 0) - { --#if HAVE_NEON --#if CONFIG_RUNTIME_CPU_DETECT -- if (cm->cpu_caps & HAS_NEON) --#endif -- { -- vp8_pop_neon(dx_store_reg); -- } --#endif -- pbi->common.error.error_code = VPX_CODEC_ERROR; -- pbi->common.error.setjmp = 0; -- pbi->num_fragments = 0; - if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) - cm->fb_idx_ref_cnt[cm->new_fb_idx]--; -- return retcode; -+ -+ pbi->common.error.error_code = VPX_CODEC_ERROR; -+ goto decode_exit; - } - --#if CONFIG_MULTITHREAD -- if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION) -- { -- if (swap_frame_buffers (cm)) -- { --#if HAVE_NEON --#if CONFIG_RUNTIME_CPU_DETECT -- if (cm->cpu_caps & HAS_NEON) --#endif -- { -- vp8_pop_neon(dx_store_reg); -- } --#endif -- pbi->common.error.error_code = VPX_CODEC_ERROR; -- pbi->common.error.setjmp = 0; -- pbi->num_fragments = 0; -- return -1; -- } -- } else --#endif -+ if (swap_frame_buffers (cm)) - { -- if (swap_frame_buffers (cm)) -- { --#if HAVE_NEON --#if CONFIG_RUNTIME_CPU_DETECT -- if (cm->cpu_caps & HAS_NEON) --#endif -- { -- vp8_pop_neon(dx_store_reg); -- } --#endif -- pbi->common.error.error_code = VPX_CODEC_ERROR; -- pbi->common.error.setjmp = 0; -- pbi->num_fragments = 0; -- return -1; -- } -- -- if(cm->filter_level) -- { -- /* Apply the loop filter if appropriate. */ -- vp8_loop_filter_frame(cm, &pbi->mb); -- } -- vp8_yv12_extend_frame_borders(cm->frame_to_show); -+ pbi->common.error.error_code = VPX_CODEC_ERROR; -+ goto decode_exit; - } - -- - vp8_clear_system_state(); - - #if CONFIG_ERROR_CONCEALMENT -@@ -498,49 +440,13 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi - } - #endif - -- /*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/ -- - if (cm->show_frame) - cm->current_video_frame++; - - pbi->ready_for_new_data = 0; - pbi->last_time_stamp = time_stamp; -- pbi->num_fragments = 0; -- --#if 0 -- { -- int i; -- int64_t earliest_time = pbi->dr[0].time_stamp; -- int64_t latest_time = pbi->dr[0].time_stamp; -- int64_t time_diff = 0; -- int bytes = 0; -- -- pbi->dr[pbi->common.current_video_frame&0xf].size = pbi->bc.pos + pbi->bc2.pos + 4;; -- pbi->dr[pbi->common.current_video_frame&0xf].time_stamp = time_stamp; -- -- for (i = 0; i < 16; i++) -- { -- -- bytes += pbi->dr[i].size; -- -- if (pbi->dr[i].time_stamp < earliest_time) -- earliest_time = pbi->dr[i].time_stamp; -- -- if (pbi->dr[i].time_stamp > latest_time) -- latest_time = pbi->dr[i].time_stamp; -- } -- -- time_diff = latest_time - earliest_time; -- -- if (time_diff > 0) -- { -- pbi->common.bitrate = 80000.00 * bytes / time_diff ; -- pbi->common.framerate = 160000000.00 / time_diff ; -- } -- -- } --#endif - -+decode_exit: - #if HAVE_NEON - #if CONFIG_RUNTIME_CPU_DETECT - if (cm->cpu_caps & HAS_NEON) -@@ -549,7 +455,9 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi - vp8_pop_neon(dx_store_reg); - } - #endif -+ - pbi->common.error.setjmp = 0; -+ pbi->num_fragments = 0; - return retcode; - } - int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags) -diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h -index 97cf0dc..0063beb 100644 ---- a/vp8/decoder/onyxd_int.h -+++ b/vp8/decoder/onyxd_int.h -@@ -31,23 +31,18 @@ typedef struct - typedef struct - { - MACROBLOCKD mbd; -- int mb_row; - } MB_ROW_DEC; - --typedef struct --{ -- int64_t time_stamp; -- int size; --} DATARATE; -- -- - typedef struct VP8D_COMP - { - DECLARE_ALIGNED(16, MACROBLOCKD, mb); - -+ YV12_BUFFER_CONFIG *dec_fb_ref[NUM_YV12_BUFFERS]; -+ - DECLARE_ALIGNED(16, VP8_COMMON, common); - -- vp8_reader bc, bc2; -+ /* the last partition will be used for the modes/mvs */ -+ vp8_reader mbc[MAX_PARTITIONS]; - - VP8D_CONFIG oxcf; - -@@ -62,7 +57,7 @@ typedef struct VP8D_COMP - volatile int b_multithreaded_rd; - int max_threads; - int current_mb_col_main; -- int decoding_thread_count; -+ unsigned int decoding_thread_count; - int allocated_decoding_thread_count; - - int mt_baseline_filter_level[MAX_MB_SEGMENTS]; -@@ -85,12 +80,9 @@ typedef struct VP8D_COMP - /* end of threading data */ - #endif - -- vp8_reader *mbc; - int64_t last_time_stamp; - int ready_for_new_data; - -- DATARATE dr[16]; -- - vp8_prob prob_intra; - vp8_prob prob_last; - vp8_prob prob_gf; -diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c -index 47a0349..88c06be 100644 ---- a/vp8/decoder/threading.c -+++ b/vp8/decoder/threading.c -@@ -24,10 +24,18 @@ - #include "detokenize.h" - #include "vp8/common/reconintra4x4.h" - #include "vp8/common/reconinter.h" -+#include "vp8/common/setupintrarecon.h" - #if CONFIG_ERROR_CONCEALMENT - #include "error_concealment.h" - #endif - -+#define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n))) -+#define CALLOC_ARRAY_ALIGNED(p, n, algn) do { \ -+ CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \ -+ memset((p), 0, (n) * sizeof(*(p))); \ -+} while (0) -+ -+ - extern void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd); - - static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count) -@@ -47,11 +55,9 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D - mbd->mode_info_stride = pc->mode_info_stride; - - mbd->frame_type = pc->frame_type; -- mbd->pre = pc->yv12_fb[pc->lst_fb_idx]; -- mbd->dst = pc->yv12_fb[pc->new_fb_idx]; -+ mbd->pre = xd->pre; -+ mbd->dst = xd->dst; - -- vp8_setup_block_dptrs(mbd); -- vp8_build_block_doffsets(mbd); - mbd->segmentation_enabled = xd->segmentation_enabled; - mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta; - vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); -@@ -65,7 +71,7 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D - mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled; - mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update; - -- mbd->current_bc = &pbi->bc2; -+ mbd->current_bc = &pbi->mbc[0]; - - vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); - vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); -@@ -73,16 +79,18 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D - vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); - - mbd->fullpixel_mask = 0xffffffff; -- if(pc->full_pixel) -+ -+ if (pc->full_pixel) - mbd->fullpixel_mask = 0xfffffff8; - - } - -- for (i=0; i< pc->mb_rows; i++) -- pbi->mt_current_mb_col[i]=-1; -+ for (i = 0; i < pc->mb_rows; i++) -+ pbi->mt_current_mb_col[i] = -1; - } - --static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_idx) -+static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, -+ unsigned int mb_idx) - { - MB_PREDICTION_MODE mode; - int i; -@@ -166,7 +174,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i - { - short *DQC = xd->dequant_y1; - int dst_stride = xd->dst.y_stride; -- unsigned char *base_dst = xd->dst.y_buffer; - - /* clear out residual eob info */ - if(xd->mode_info_context->mbmi.mb_skip_coeff) -@@ -177,17 +184,19 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i - for (i = 0; i < 16; i++) - { - BLOCKD *b = &xd->block[i]; -- int b_mode = xd->mode_info_context->bmi[i].as_mode; -- unsigned char *yabove; -+ unsigned char *dst = xd->dst.y_buffer + b->offset; -+ B_PREDICTION_MODE b_mode = -+ xd->mode_info_context->bmi[i].as_mode; -+ unsigned char *Above; - unsigned char *yleft; - int left_stride; - unsigned char top_left; - - /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/ - if (i < 4 && pbi->common.filter_level) -- yabove = xd->recon_above[0] + b->offset; //i*4; -+ Above = xd->recon_above[0] + b->offset; - else -- yabove = (base_dst - dst_stride) + b->offset; -+ Above = dst - dst_stride; - - if (i%4==0 && pbi->common.filter_level) - { -@@ -196,34 +205,28 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i - } - else - { -- yleft = (base_dst - 1) + b->offset; -+ yleft = dst - 1; - left_stride = dst_stride; - } - - if ((i==4 || i==8 || i==12) && pbi->common.filter_level) - top_left = *(xd->recon_left[0] + i - 1); - else -- top_left = yabove[-1]; -+ top_left = Above[-1]; - -- vp8_intra4x4_predict_d_c(yabove, yleft, left_stride, -- b_mode, -- base_dst + b->offset, dst_stride, -- top_left); -+ vp8_intra4x4_predict(Above, yleft, left_stride, -+ b_mode, dst, dst_stride, top_left); - - if (xd->eobs[i] ) - { - if (xd->eobs[i] > 1) - { -- vp8_dequant_idct_add -- (b->qcoeff, DQC, -- base_dst + b->offset, dst_stride); -+ vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); - } - else - { -- vp8_dc_only_idct_add -- (b->qcoeff[0] * DQC[0], -- base_dst + b->offset, dst_stride, -- base_dst + b->offset, dst_stride); -+ vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], -+ dst, dst_stride, dst, dst_stride); - ((int *)b->qcoeff)[0] = 0; - } - } -@@ -297,60 +300,44 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i - } - } - --typedef void (*init_current_bc_fn_t)(VP8D_COMP *pbi, MACROBLOCKD *xd, -- int start_mb_row, int mb_row, int num_part); -- --static void init_current_bc(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, -- int mb_row, int num_part) -+static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) - { -- (void) start_mb_row; -- -- xd->current_bc = &pbi->mbc[mb_row%num_part]; --} -- --static void init_current_bc_threads(VP8D_COMP *pbi, MACROBLOCKD *xd, -- int start_mb_row, int mb_row, int num_part) --{ -- (void) xd; -- pbi->mb_row_di[start_mb_row - 1].mb_row = mb_row; -- pbi->mb_row_di[start_mb_row - 1].mbd.current_bc = &pbi->mbc[mb_row%num_part]; --} -- -- --static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, -- init_current_bc_fn_t init_current_bc_fn) --{ -- volatile int *last_row_current_mb_col = NULL; -+ volatile const int *last_row_current_mb_col; -+ volatile int *current_mb_col; - int mb_row; - VP8_COMMON *pc = &pbi->common; -- int nsync = pbi->sync_range; -+ const int nsync = pbi->sync_range; -+ const int first_row_no_sync_above = pc->mb_cols + nsync; - int num_part = 1 << pbi->common.multi_token_partition; -+ int last_mb_row = start_mb_row; -+ -+ YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; -+ YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME]; -+ -+ int recon_y_stride = yv12_fb_new->y_stride; -+ int recon_uv_stride = yv12_fb_new->uv_stride; - -- int dst_fb_idx = pc->new_fb_idx; - unsigned char *ref_buffer[MAX_REF_FRAMES][3]; - unsigned char *dst_buffer[3]; - int i; -- int ref_fb_index[MAX_REF_FRAMES]; - int ref_fb_corrupted[MAX_REF_FRAMES]; - - ref_fb_corrupted[INTRA_FRAME] = 0; - -- ref_fb_index[LAST_FRAME] = pc->lst_fb_idx; -- ref_fb_index[GOLDEN_FRAME] = pc->gld_fb_idx; -- ref_fb_index[ALTREF_FRAME] = pc->alt_fb_idx; -- - for(i = 1; i < MAX_REF_FRAMES; i++) - { -- ref_buffer[i][0] = pc->yv12_fb[ref_fb_index[i]].y_buffer; -- ref_buffer[i][1] = pc->yv12_fb[ref_fb_index[i]].u_buffer; -- ref_buffer[i][2] = pc->yv12_fb[ref_fb_index[i]].v_buffer; -+ YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; - -- ref_fb_corrupted[i] = pc->yv12_fb[ref_fb_index[i]].corrupted; -+ ref_buffer[i][0] = this_fb->y_buffer; -+ ref_buffer[i][1] = this_fb->u_buffer; -+ ref_buffer[i][2] = this_fb->v_buffer; -+ -+ ref_fb_corrupted[i] = this_fb->corrupted; - } - -- dst_buffer[0] = pc->yv12_fb[dst_fb_idx].y_buffer; -- dst_buffer[1] = pc->yv12_fb[dst_fb_idx].u_buffer; -- dst_buffer[2] = pc->yv12_fb[dst_fb_idx].v_buffer; -+ dst_buffer[0] = yv12_fb_new->y_buffer; -+ dst_buffer[1] = yv12_fb_new->u_buffer; -+ dst_buffer[2] = yv12_fb_new->v_buffer; - - xd->up_available = (start_mb_row != 0); - -@@ -359,18 +346,20 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, - int i; - int recon_yoffset, recon_uvoffset; - int mb_col; -- int ref_fb_idx = pc->lst_fb_idx; -- int dst_fb_idx = pc->new_fb_idx; -- int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride; -- int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride; -- - int filter_level; - loop_filter_info_n *lfi_n = &pc->lf_info; - -- init_current_bc_fn(pbi, xd, start_mb_row, mb_row, num_part); -+ /* save last row processed by this thread */ -+ last_mb_row = mb_row; -+ /* select bool coder for current partition */ -+ xd->current_bc = &pbi->mbc[mb_row%num_part]; - - if (mb_row > 0) - last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1]; -+ else -+ last_row_current_mb_col = &first_row_no_sync_above; -+ -+ current_mb_col = &pbi->mt_current_mb_col[mb_row]; - - recon_yoffset = mb_row * recon_y_stride * 16; - recon_uvoffset = mb_row * recon_uv_stride * 8; -@@ -394,7 +383,7 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, - xd->recon_left[1] = pbi->mt_uleft_col[mb_row]; - xd->recon_left[2] = pbi->mt_vleft_col[mb_row]; - -- //TODO: move to outside row loop -+ /* TODO: move to outside row loop */ - xd->recon_left_stride[0] = 1; - xd->recon_left_stride[1] = 1; - } -@@ -412,16 +401,22 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, - xd->recon_above[1] -= xd->dst.uv_stride; - xd->recon_above[2] -= xd->dst.uv_stride; - -- //TODO: move to outside row loop -+ /* TODO: move to outside row loop */ - xd->recon_left_stride[0] = xd->dst.y_stride; - xd->recon_left_stride[1] = xd->dst.uv_stride; -+ -+ setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], -+ xd->recon_left[2], xd->dst.y_stride, -+ xd->dst.uv_stride); - } - - for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) - { -- if ( mb_row > 0 && (mb_col & (nsync-1)) == 0) -+ *current_mb_col = mb_col - 1; -+ -+ if ((mb_col & (nsync - 1)) == 0) - { -- while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1) -+ while (mb_col > (*last_row_current_mb_col - nsync)) - { - x86_pause_hint(); - thread_sleep(0); -@@ -477,7 +472,7 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, - /* propagate errors from reference frames */ - xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; - -- decode_macroblock(pbi, xd, 0); -+ mt_decode_macroblock(pbi, xd, 0); - - xd->left_available = 1; - -@@ -591,9 +586,6 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, - ++xd->mode_info_context; /* next mb */ - - xd->above_context++; -- -- /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/ -- pbi->mt_current_mb_col[mb_row] = mb_col; - } - - /* adjust to the next row of mbs */ -@@ -601,8 +593,8 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, - { - if(mb_row != pc->mb_rows-1) - { -- int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS; -- int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1); -+ int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS; -+ int lastuv = (yv12_fb_lst->y_width>>1) + (VP8BORDERINPIXELS>>1); - - for (i = 0; i < 4; i++) - { -@@ -611,8 +603,13 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, - pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1]; - } - } -- } else -- vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); -+ } -+ else -+ vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, -+ xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); -+ -+ /* last MB of row is ready just after extension is done */ -+ *current_mb_col = mb_col + nsync; - - ++xd->mode_info_context; /* skip prediction column */ - xd->up_available = 1; -@@ -620,6 +617,11 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, - /* since we have multithread */ - xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; - } -+ -+ /* signal end of frame decoding if this thread processed the last mb_row */ -+ if (last_mb_row == (pc->mb_rows - 1)) -+ sem_post(&pbi->h_event_end_decoding); -+ - } - - -@@ -635,7 +637,6 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) - if (pbi->b_multithreaded_rd == 0) - break; - -- /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/ - if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) - { - if (pbi->b_multithreaded_rd == 0) -@@ -643,21 +644,11 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) - else - { - MACROBLOCKD *xd = &mbrd->mbd; -- - xd->left_context = &mb_row_left_context; - -- decode_mb_rows(pbi, xd, ithread+1, init_current_bc_threads); -+ mt_decode_mb_rows(pbi, xd, ithread+1); - } - } -- -- /* add this to each frame */ -- if ((mbrd->mb_row == pbi->common.mb_rows-1) || -- ((mbrd->mb_row == pbi->common.mb_rows-2) && -- (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1)) -- { -- /*SetEvent(pbi->h_event_end_decoding);*/ -- sem_post(&pbi->h_event_end_decoding); -- } - } - - return 0 ; -@@ -667,7 +658,7 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) - void vp8_decoder_create_threads(VP8D_COMP *pbi) - { - int core_count = 0; -- int ithread; -+ unsigned int ithread; - - pbi->b_multithreaded_rd = 0; - pbi->allocated_decoding_thread_count = 0; -@@ -684,16 +675,17 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) - pbi->b_multithreaded_rd = 1; - pbi->decoding_thread_count = core_count - 1; - -- CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count)); -- CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count)); -- CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count)); -- vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count); -- CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count)); -+ CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count); -+ CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count); -+ CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32); -+ CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count); - - for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++) - { - sem_init(&pbi->h_event_start_decoding[ithread], 0, 0); - -+ vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd); -+ - pbi->de_thread_data[ithread].ithread = ithread; - pbi->de_thread_data[ithread].ptr1 = (void *)pbi; - pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread]; -@@ -810,32 +802,32 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) - uv_width = width >>1; - - /* Allocate an int for each mb row. */ -- CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows)); -+ CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows); - - /* Allocate memory for above_row buffers. */ -- CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); -- for (i=0; i< pc->mb_rows; i++) -+ CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows); -+ for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)))); - -- CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); -- for (i=0; i< pc->mb_rows; i++) -+ CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows); -+ for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); - -- CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); -- for (i=0; i< pc->mb_rows; i++) -+ CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows); -+ for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); - - /* Allocate memory for left_col buffers. */ -- CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); -- for (i=0; i< pc->mb_rows; i++) -+ CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows); -+ for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1)); - -- CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); -- for (i=0; i< pc->mb_rows; i++) -+ CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows); -+ for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); - -- CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); -- for (i=0; i< pc->mb_rows; i++) -+ CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows); -+ for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); - } - } -@@ -881,42 +873,46 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi) - void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) - { - VP8_COMMON *pc = &pbi->common; -- int i; -+ unsigned int i; -+ int j; - - int filter_level = pc->filter_level; -+ YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; - - if (filter_level) - { - /* Set above_row buffer to 127 for decoding first MB row */ -- vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5); -- vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5); -- vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5); -+ vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5); -+ vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); -+ vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); - -- for (i=1; imb_rows; i++) -+ for (j=1; jmb_rows; j++) - { -- vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); -- vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); -- vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); -+ vpx_memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); -+ vpx_memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); -+ vpx_memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); - } - - /* Set left_col to 129 initially */ -- for (i=0; imb_rows; i++) -+ for (j=0; jmb_rows; j++) - { -- vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16); -- vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8); -- vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8); -+ vpx_memset(pbi->mt_yleft_col[j], (unsigned char)129, 16); -+ vpx_memset(pbi->mt_uleft_col[j], (unsigned char)129, 8); -+ vpx_memset(pbi->mt_vleft_col[j], (unsigned char)129, 8); - } - - /* Initialize the loop filter for this frame. */ - vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level); - } -+ else -+ vp8_setup_intra_recon_top_line(yv12_fb_new); - - setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count); - - for (i = 0; i < pbi->decoding_thread_count; i++) - sem_post(&pbi->h_event_start_decoding[i]); - -- decode_mb_rows(pbi, xd, 0, init_current_bc); -+ mt_decode_mb_rows(pbi, xd, 0); - - sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ - } -diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c -index 3824294..e666b6c 100644 ---- a/vp8/encoder/bitstream.c -+++ b/vp8/encoder/bitstream.c -@@ -118,7 +118,7 @@ static void update_mbintra_mode_probs(VP8_COMP *cpi) - - update_mode( - w, VP8_YMODES, vp8_ymode_encodings, vp8_ymode_tree, -- Pnew, x->fc.ymode_prob, bct, (unsigned int *)cpi->ymode_count -+ Pnew, x->fc.ymode_prob, bct, (unsigned int *)cpi->mb.ymode_count - ); - } - { -@@ -127,7 +127,7 @@ static void update_mbintra_mode_probs(VP8_COMP *cpi) - - update_mode( - w, VP8_UV_MODES, vp8_uv_mode_encodings, vp8_uv_mode_tree, -- Pnew, x->fc.uv_mode_prob, bct, (unsigned int *)cpi->uv_mode_count -+ Pnew, x->fc.uv_mode_prob, bct, (unsigned int *)cpi->mb.uv_mode_count - ); - } - } -@@ -172,7 +172,7 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount) - while (p < stop) - { - const int t = p->Token; -- const vp8_token *a = vp8_coef_encodings + t; -+ vp8_token *a = vp8_coef_encodings + t; - const vp8_extra_bit_struct *b = vp8_extra_bits + t; - int i = 0; - const unsigned char *pp = p->context_tree; -@@ -397,7 +397,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data, - { - const TOKENEXTRA *p = cpi->tplist[mb_row].start; - const TOKENEXTRA *stop = cpi->tplist[mb_row].stop; -- int tokens = stop - p; -+ int tokens = (int)(stop - p); - - vp8_pack_tokens_c(w, p, tokens); - } -@@ -416,7 +416,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w) - { - const TOKENEXTRA *p = cpi->tplist[mb_row].start; - const TOKENEXTRA *stop = cpi->tplist[mb_row].stop; -- int tokens = stop - p; -+ int tokens = (int)(stop - p); - - vp8_pack_tokens_c(w, p, tokens); - } -@@ -461,7 +461,7 @@ static void write_mv - - static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACROBLOCKD *x) - { -- // Encode the MB segment id. -+ /* Encode the MB segment id. */ - if (x->segmentation_enabled && x->update_mb_segmentation_map) - { - switch (mi->segment_id) -@@ -483,7 +483,7 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO - vp8_write(w, 1, x->mb_segment_tree_probs[2]); - break; - -- // TRAP.. This should not happen -+ /* TRAP.. This should not happen */ - default: - vp8_write(w, 0, x->mb_segment_tree_probs[0]); - vp8_write(w, 0, x->mb_segment_tree_probs[1]); -@@ -493,11 +493,11 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO - } - void vp8_convert_rfct_to_prob(VP8_COMP *const cpi) - { -- const int *const rfct = cpi->count_mb_ref_frame_usage; -+ const int *const rfct = cpi->mb.count_mb_ref_frame_usage; - const int rf_intra = rfct[INTRA_FRAME]; - const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; - -- // Calculate the probabilities used to code the ref frame based on useage -+ /* Calculate the probabilities used to code the ref frame based on usage */ - if (!(cpi->prob_intra_coded = rf_intra * 255 / (rf_intra + rf_inter))) - cpi->prob_intra_coded = 1; - -@@ -539,7 +539,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) - { - int total_mbs = pc->mb_rows * pc->mb_cols; - -- prob_skip_false = (total_mbs - cpi->skip_true_count ) * 256 / total_mbs; -+ prob_skip_false = (total_mbs - cpi->mb.skip_true_count ) * 256 / total_mbs; - - if (prob_skip_false <= 1) - prob_skip_false = 1; -@@ -571,8 +571,10 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) - - MACROBLOCKD *xd = &cpi->mb.e_mbd; - -- // Distance of Mb to the various image edges. -- // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units -+ /* Distance of Mb to the various image edges. -+ * These specified to 8th pel as they are always compared to MV -+ * values that are in 1/8th pel units -+ */ - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - xd->mb_to_top_edge = -((mb_row * 16)) << 3; -@@ -728,7 +730,7 @@ static void write_kfmodes(VP8_COMP *cpi) - { - int total_mbs = c->mb_rows * c->mb_cols; - -- prob_skip_false = (total_mbs - cpi->skip_true_count ) * 256 / total_mbs; -+ prob_skip_false = (total_mbs - cpi->mb.skip_true_count ) * 256 / total_mbs; - - if (prob_skip_false <= 1) - prob_skip_false = 1; -@@ -754,7 +756,7 @@ static void write_kfmodes(VP8_COMP *cpi) - if (c->mb_no_coeff_skip) - vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false); - -- kfwrite_ymode(bc, ym, c->kf_ymode_prob); -+ kfwrite_ymode(bc, ym, vp8_kf_ymode_prob); - - if (ym == B_PRED) - { -@@ -771,15 +773,15 @@ static void write_kfmodes(VP8_COMP *cpi) - ++intra_mode_stats [A] [L] [bm]; - #endif - -- write_bmode(bc, bm, c->kf_bmode_prob [A] [L]); -+ write_bmode(bc, bm, vp8_kf_bmode_prob [A] [L]); - } - while (++i < 16); - } - -- write_uv_mode(bc, (m++)->mbmi.uv_mode, c->kf_uv_mode_prob); -+ write_uv_mode(bc, (m++)->mbmi.uv_mode, vp8_kf_uv_mode_prob); - } - -- m++; // skip L prediction border -+ m++; /* skip L prediction border */ - } - } - -@@ -849,6 +851,7 @@ static int prob_update_savings(const unsigned int *ct, - - static int independent_coef_context_savings(VP8_COMP *cpi) - { -+ MACROBLOCK *const x = & cpi->mb; - int savings = 0; - int i = 0; - do -@@ -865,7 +868,7 @@ static int independent_coef_context_savings(VP8_COMP *cpi) - */ - - probs = (const unsigned int (*)[MAX_ENTROPY_TOKENS]) -- cpi->coef_counts[i][j]; -+ x->coef_counts[i][j]; - - /* Reset to default probabilities at key frames */ - if (cpi->common.frame_type == KEY_FRAME) -@@ -878,9 +881,6 @@ static int independent_coef_context_savings(VP8_COMP *cpi) - /* at every context */ - - /* calc probs and branch cts for this frame only */ -- //vp8_prob new_p [ENTROPY_NODES]; -- //unsigned int branch_ct [ENTROPY_NODES] [2]; -- - int t = 0; /* token/prob index */ - - vp8_tree_probs_from_distribution( -@@ -927,6 +927,7 @@ static int independent_coef_context_savings(VP8_COMP *cpi) - - static int default_coef_context_savings(VP8_COMP *cpi) - { -+ MACROBLOCK *const x = & cpi->mb; - int savings = 0; - int i = 0; - do -@@ -940,16 +941,13 @@ static int default_coef_context_savings(VP8_COMP *cpi) - /* at every context */ - - /* calc probs and branch cts for this frame only */ -- //vp8_prob new_p [ENTROPY_NODES]; -- //unsigned int branch_ct [ENTROPY_NODES] [2]; -- - int t = 0; /* token/prob index */ - - vp8_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, - cpi->frame_coef_probs [i][j][k], - cpi->frame_branch_ct [i][j][k], -- cpi->coef_counts [i][j][k], -+ x->coef_counts [i][j][k], - 256, 1 - ); - -@@ -998,13 +996,13 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi) - { - int savings = 0; - -- const int *const rfct = cpi->count_mb_ref_frame_usage; -+ const int *const rfct = cpi->mb.count_mb_ref_frame_usage; - const int rf_intra = rfct[INTRA_FRAME]; - const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; - int new_intra, new_last, new_garf, oldtotal, newtotal; - int ref_frame_cost[MAX_REF_FRAMES]; - -- vp8_clear_system_state(); //__asm emms; -+ vp8_clear_system_state(); - - if (cpi->common.frame_type != KEY_FRAME) - { -@@ -1026,7 +1024,7 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi) - rfct[ALTREF_FRAME] * ref_frame_cost[ALTREF_FRAME]; - - -- // old costs -+ /* old costs */ - vp8_calc_ref_frame_costs(ref_frame_cost,cpi->prob_intra_coded, - cpi->prob_last_coded,cpi->prob_gf_coded); - -@@ -1078,7 +1076,7 @@ void vp8_update_coef_probs(VP8_COMP *cpi) - #endif - int savings = 0; - -- vp8_clear_system_state(); //__asm emms; -+ vp8_clear_system_state(); - - do - { -@@ -1110,21 +1108,15 @@ void vp8_update_coef_probs(VP8_COMP *cpi) - } - do - { -- //note: use result from vp8_estimate_entropy_savings, so no need to call vp8_tree_probs_from_distribution here. -+ /* note: use result from vp8_estimate_entropy_savings, so no -+ * need to call vp8_tree_probs_from_distribution here. -+ */ -+ - /* at every context */ - - /* calc probs and branch cts for this frame only */ -- //vp8_prob new_p [ENTROPY_NODES]; -- //unsigned int branch_ct [ENTROPY_NODES] [2]; -- - int t = 0; /* token/prob index */ - -- //vp8_tree_probs_from_distribution( -- // MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, -- // new_p, branch_ct, (unsigned int *)cpi->coef_counts [i][j][k], -- // 256, 1 -- // ); -- - do - { - const vp8_prob newp = cpi->frame_coef_probs [i][j][k][t]; -@@ -1295,19 +1287,16 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest - Sectionbits[active_section = 1] += sizeof(VP8_HEADER) * 8 * 256; - #endif - -- //vp8_kf_default_bmode_probs() is called in vp8_setup_key_frame() once for each -- //K frame before encode frame. pc->kf_bmode_prob doesn't get changed anywhere -- //else. No need to call it again here. --yw -- //vp8_kf_default_bmode_probs( pc->kf_bmode_prob); -- -- // every keyframe send startcode, width, height, scale factor, clamp and color type -+ /* every keyframe send startcode, width, height, scale factor, clamp -+ * and color type -+ */ - if (oh.type == KEY_FRAME) - { - int v; - - validate_buffer(cx_data, 7, cx_data_end, &cpi->common.error); - -- // Start / synch code -+ /* Start / synch code */ - cx_data[0] = 0x9D; - cx_data[1] = 0x01; - cx_data[2] = 0x2a; -@@ -1326,7 +1315,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest - - vp8_start_encode(bc, cx_data, cx_data_end); - -- // signal clr type -+ /* signal clr type */ - vp8_write_bit(bc, pc->clr_type); - vp8_write_bit(bc, pc->clamp_type); - -@@ -1335,13 +1324,13 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest - vp8_start_encode(bc, cx_data, cx_data_end); - - -- // Signal whether or not Segmentation is enabled -+ /* Signal whether or not Segmentation is enabled */ - vp8_write_bit(bc, xd->segmentation_enabled); - -- // Indicate which features are enabled -+ /* Indicate which features are enabled */ - if (xd->segmentation_enabled) - { -- // Signal whether or not the segmentation map is being updated. -+ /* Signal whether or not the segmentation map is being updated. */ - vp8_write_bit(bc, xd->update_mb_segmentation_map); - vp8_write_bit(bc, xd->update_mb_segmentation_data); - -@@ -1351,15 +1340,15 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest - - vp8_write_bit(bc, xd->mb_segement_abs_delta); - -- // For each segmentation feature (Quant and loop filter level) -+ /* For each segmentation feature (Quant and loop filter level) */ - for (i = 0; i < MB_LVL_MAX; i++) - { -- // For each of the segments -+ /* For each of the segments */ - for (j = 0; j < MAX_MB_SEGMENTS; j++) - { - Data = xd->segment_feature_data[i][j]; - -- // Frame level data -+ /* Frame level data */ - if (Data) - { - vp8_write_bit(bc, 1); -@@ -1384,7 +1373,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest - - if (xd->update_mb_segmentation_map) - { -- // Write the probs used to decode the segment id for each macro block. -+ /* Write the probs used to decode the segment id for each mb */ - for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) - { - int Data = xd->mb_segment_tree_probs[i]; -@@ -1400,17 +1389,18 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest - } - } - -- // Code to determine whether or not to update the scan order. - vp8_write_bit(bc, pc->filter_type); - vp8_write_literal(bc, pc->filter_level, 6); - vp8_write_literal(bc, pc->sharpness_level, 3); - -- // Write out loop filter deltas applied at the MB level based on mode or ref frame (if they are enabled). -+ /* Write out loop filter deltas applied at the MB level based on mode -+ * or ref frame (if they are enabled). -+ */ - vp8_write_bit(bc, xd->mode_ref_lf_delta_enabled); - - if (xd->mode_ref_lf_delta_enabled) - { -- // Do the deltas need to be updated -+ /* Do the deltas need to be updated */ - int send_update = xd->mode_ref_lf_delta_update - || cpi->oxcf.error_resilient_mode; - -@@ -1419,12 +1409,12 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest - { - int Data; - -- // Send update -+ /* Send update */ - for (i = 0; i < MAX_REF_LF_DELTAS; i++) - { - Data = xd->ref_lf_deltas[i]; - -- // Frame level data -+ /* Frame level data */ - if (xd->ref_lf_deltas[i] != xd->last_ref_lf_deltas[i] - || cpi->oxcf.error_resilient_mode) - { -@@ -1434,20 +1424,20 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest - if (Data > 0) - { - vp8_write_literal(bc, (Data & 0x3F), 6); -- vp8_write_bit(bc, 0); // sign -+ vp8_write_bit(bc, 0); /* sign */ - } - else - { - Data = -Data; - vp8_write_literal(bc, (Data & 0x3F), 6); -- vp8_write_bit(bc, 1); // sign -+ vp8_write_bit(bc, 1); /* sign */ - } - } - else - vp8_write_bit(bc, 0); - } - -- // Send update -+ /* Send update */ - for (i = 0; i < MAX_MODE_LF_DELTAS; i++) - { - Data = xd->mode_lf_deltas[i]; -@@ -1461,13 +1451,13 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest - if (Data > 0) - { - vp8_write_literal(bc, (Data & 0x3F), 6); -- vp8_write_bit(bc, 0); // sign -+ vp8_write_bit(bc, 0); /* sign */ - } - else - { - Data = -Data; - vp8_write_literal(bc, (Data & 0x3F), 6); -- vp8_write_bit(bc, 1); // sign -+ vp8_write_bit(bc, 1); /* sign */ - } - } - else -@@ -1476,34 +1466,42 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest - } - } - -- //signal here is multi token partition is enabled -+ /* signal here is multi token partition is enabled */ - vp8_write_literal(bc, pc->multi_token_partition, 2); - -- // Frame Qbaseline quantizer index -+ /* Frame Qbaseline quantizer index */ - vp8_write_literal(bc, pc->base_qindex, 7); - -- // Transmit Dc, Second order and Uv quantizer delta information -+ /* Transmit Dc, Second order and Uv quantizer delta information */ - put_delta_q(bc, pc->y1dc_delta_q); - put_delta_q(bc, pc->y2dc_delta_q); - put_delta_q(bc, pc->y2ac_delta_q); - put_delta_q(bc, pc->uvdc_delta_q); - put_delta_q(bc, pc->uvac_delta_q); - -- // When there is a key frame all reference buffers are updated using the new key frame -+ /* When there is a key frame all reference buffers are updated using -+ * the new key frame -+ */ - if (pc->frame_type != KEY_FRAME) - { -- // Should the GF or ARF be updated using the transmitted frame or buffer -+ /* Should the GF or ARF be updated using the transmitted frame -+ * or buffer -+ */ - vp8_write_bit(bc, pc->refresh_golden_frame); - vp8_write_bit(bc, pc->refresh_alt_ref_frame); - -- // If not being updated from current frame should either GF or ARF be updated from another buffer -+ /* If not being updated from current frame should either GF or ARF -+ * be updated from another buffer -+ */ - if (!pc->refresh_golden_frame) - vp8_write_literal(bc, pc->copy_buffer_to_gf, 2); - - if (!pc->refresh_alt_ref_frame) - vp8_write_literal(bc, pc->copy_buffer_to_arf, 2); - -- // Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer) -+ /* Indicate reference frame sign bias for Golden and ARF frames -+ * (always 0 for last frame buffer) -+ */ - vp8_write_bit(bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]); - vp8_write_bit(bc, pc->ref_frame_sign_bias[ALTREF_FRAME]); - } -@@ -1532,14 +1530,14 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest - - #endif - -- vp8_clear_system_state(); //__asm emms; -+ vp8_clear_system_state(); - - #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING - pack_coef_probs(cpi); - #else - if (pc->refresh_entropy_probs == 0) - { -- // save a copy for later refresh -+ /* save a copy for later refresh */ - vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc)); - } - -@@ -1550,7 +1548,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest - active_section = 2; - #endif - -- // Write out the mb_no_coeff_skip flag -+ /* Write out the mb_no_coeff_skip flag */ - vp8_write_bit(bc, pc->mb_no_coeff_skip); - - if (pc->frame_type == KEY_FRAME) -diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h -index 6165d04..a30f888 100644 ---- a/vp8/encoder/block.h -+++ b/vp8/encoder/block.h -@@ -18,7 +18,10 @@ - #include "vp8/common/entropy.h" - #include "vpx_ports/mem.h" - --// motion search site -+#define MAX_MODES 20 -+#define MAX_ERROR_BINS 1024 -+ -+/* motion search site */ - typedef struct - { - MV mv; -@@ -27,11 +30,11 @@ typedef struct - - typedef struct block - { -- // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries -+ /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ - short *src_diff; - short *coeff; - -- // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries -+ /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ - short *quant; - short *quant_fast; - unsigned char *quant_shift; -@@ -39,7 +42,7 @@ typedef struct block - short *zrun_zbin_boost; - short *round; - -- // Zbin Over Quant value -+ /* Zbin Over Quant value */ - short zbin_extra; - - unsigned char **base_src; -@@ -59,12 +62,12 @@ typedef struct - - typedef struct macroblock - { -- DECLARE_ALIGNED(16, short, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y -- DECLARE_ALIGNED(16, short, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y -+ DECLARE_ALIGNED(16, short, src_diff[400]); /* 25 blocks Y,U,V,Y2 */ -+ DECLARE_ALIGNED(16, short, coeff[400]); /* 25 blocks Y,U,V,Y2 */ - DECLARE_ALIGNED(16, unsigned char, thismb[256]); - - unsigned char *thismb_ptr; -- // 16 Y blocks, 4 U blocks, 4 V blocks, 1 DC 2nd order block each with 16 entries -+ /* 16 Y, 4 U, 4 V, 1 DC 2nd order block */ - BLOCK block[25]; - - YV12_BUFFER_CONFIG src; -@@ -90,16 +93,18 @@ typedef struct macroblock - signed int act_zbin_adj; - signed int last_act_zbin_adj; - -- int mvcosts[2][MVvals+1]; - int *mvcost[2]; -- int mvsadcosts[2][MVfpvals+1]; - int *mvsadcost[2]; -- int mbmode_cost[2][MB_MODE_COUNT]; -- int intra_uv_mode_cost[2][MB_MODE_COUNT]; -- unsigned int bmode_costs[10][10][10]; -- unsigned int inter_bmode_costs[B_MODE_COUNT]; -- -- // These define limits to motion vector components to prevent them from extending outside the UMV borders -+ int (*mbmode_cost)[MB_MODE_COUNT]; -+ int (*intra_uv_mode_cost)[MB_MODE_COUNT]; -+ int (*bmode_costs)[10][10]; -+ int *inter_bmode_costs; -+ int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS] -+ [MAX_ENTROPY_TOKENS]; -+ -+ /* These define limits to motion vector components to prevent -+ * them from extending outside the UMV borders. -+ */ - int mv_col_min; - int mv_col_max; - int mv_row_min; -@@ -107,18 +112,45 @@ typedef struct macroblock - - int skip; - -- int encode_breakout; -+ unsigned int encode_breakout; - -- //char * gf_active_ptr; - signed char *gf_active_ptr; - - unsigned char *active_ptr; - MV_CONTEXT *mvc; - -- unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - int optimize; - int q_index; - -+#if CONFIG_TEMPORAL_DENOISING -+ MB_PREDICTION_MODE best_sse_inter_mode; -+ int_mv best_sse_mv; -+ MV_REFERENCE_FRAME best_reference_frame; -+ MV_REFERENCE_FRAME best_zeromv_reference_frame; -+ unsigned char need_to_clamp_best_mvs; -+#endif -+ -+ int skip_true_count; -+ unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -+ unsigned int MVcount [2] [MVvals]; /* (row,col) MV cts this frame */ -+ int ymode_count [VP8_YMODES]; /* intra MB type cts this frame */ -+ int uv_mode_count[VP8_UV_MODES]; /* intra MB type cts this frame */ -+ int64_t prediction_error; -+ int64_t intra_error; -+ int count_mb_ref_frame_usage[MAX_REF_FRAMES]; -+ -+ int rd_thresh_mult[MAX_MODES]; -+ int rd_threshes[MAX_MODES]; -+ unsigned int mbs_tested_so_far; -+ unsigned int mode_test_hit_counts[MAX_MODES]; -+ int zbin_mode_boost_enabled; -+ int zbin_mode_boost; -+ int last_zbin_mode_boost; -+ -+ int last_zbin_over_quant; -+ int zbin_over_quant; -+ int error_bins[MAX_ERROR_BINS]; -+ - void (*short_fdct4x4)(short *input, short *output, int pitch); - void (*short_fdct8x4)(short *input, short *output, int pitch); - void (*short_walsh4x4)(short *input, short *output, int pitch); -diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h -index fb6cbaf..8309063 100644 ---- a/vp8/encoder/boolhuff.h -+++ b/vp8/encoder/boolhuff.h -@@ -32,7 +32,7 @@ typedef struct - unsigned char *buffer_end; - struct vpx_internal_error_info *error; - -- // Variables used to track bit costs without outputing to the bitstream -+ /* Variables used to track bit costs without outputing to the bitstream */ - unsigned int measure_cost; - unsigned long bit_counter; - } BOOL_CODER; -diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c -index 09ed9dd..f3faa22 100644 ---- a/vp8/encoder/denoising.c -+++ b/vp8/encoder/denoising.c -@@ -15,198 +15,293 @@ - #include "vpx_mem/vpx_mem.h" - #include "vpx_rtcd.h" - --static const unsigned int NOISE_MOTION_THRESHOLD = 20*20; --static const unsigned int NOISE_DIFF2_THRESHOLD = 75; --// SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming var(noise) ~= 100. --static const unsigned int SSE_DIFF_THRESHOLD = 16*16*20; --static const unsigned int SSE_THRESHOLD = 16*16*40; -+static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25; -+/* SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming -+ * var(noise) ~= 100. -+ */ -+static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20; -+static const unsigned int SSE_THRESHOLD = 16 * 16 * 40; - --static uint8_t blend(uint8_t state, uint8_t sample, uint8_t factor_q8) --{ -- return (uint8_t)( -- (((uint16_t)factor_q8 * ((uint16_t)state) + // Q8 -- (uint16_t)(256 - factor_q8) * ((uint16_t)sample)) + 128) // Q8 -- >> 8); --} -+/* -+ * The filter function was modified to reduce the computational complexity. -+ * Step 1: -+ * Instead of applying tap coefficients for each pixel, we calculated the -+ * pixel adjustments vs. pixel diff value ahead of time. -+ * adjustment = filtered_value - current_raw -+ * = (filter_coefficient * diff + 128) >> 8 -+ * where -+ * filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3)); -+ * filter_coefficient += filter_coefficient / -+ * (3 + motion_magnitude_adjustment); -+ * filter_coefficient is clamped to 0 ~ 255. -+ * -+ * Step 2: -+ * The adjustment vs. diff curve becomes flat very quick when diff increases. -+ * This allowed us to use only several levels to approximate the curve without -+ * changing the filtering algorithm too much. -+ * The adjustments were further corrected by checking the motion magnitude. -+ * The levels used are: -+ * diff adjustment w/o motion correction adjustment w/ motion correction -+ * [-255, -16] -6 -7 -+ * [-15, -8] -4 -5 -+ * [-7, -4] -3 -4 -+ * [-3, 3] diff diff -+ * [4, 7] 3 4 -+ * [8, 15] 4 5 -+ * [16, 255] 6 7 -+ */ - --static unsigned int denoiser_motion_compensate(YV12_BUFFER_CONFIG* src, -- YV12_BUFFER_CONFIG* dst, -- MACROBLOCK* x, -- unsigned int best_sse, -- unsigned int zero_mv_sse, -- int recon_yoffset, -- int recon_uvoffset) -+int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg, -+ YV12_BUFFER_CONFIG *running_avg, MACROBLOCK *signal, -+ unsigned int motion_magnitude, int y_offset, -+ int uv_offset) - { -- MACROBLOCKD filter_xd = x->e_mbd; -- int mv_col; -- int mv_row; -- int sse_diff = zero_mv_sse - best_sse; -- // Compensate the running average. -- filter_xd.pre.y_buffer = src->y_buffer + recon_yoffset; -- filter_xd.pre.u_buffer = src->u_buffer + recon_uvoffset; -- filter_xd.pre.v_buffer = src->v_buffer + recon_uvoffset; -- // Write the compensated running average to the destination buffer. -- filter_xd.dst.y_buffer = dst->y_buffer + recon_yoffset; -- filter_xd.dst.u_buffer = dst->u_buffer + recon_uvoffset; -- filter_xd.dst.v_buffer = dst->v_buffer + recon_uvoffset; -- // Use the best MV for the compensation. -- filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME; -- filter_xd.mode_info_context->mbmi.mode = filter_xd.best_sse_inter_mode; -- filter_xd.mode_info_context->mbmi.mv = filter_xd.best_sse_mv; -- filter_xd.mode_info_context->mbmi.need_to_clamp_mvs = -- filter_xd.need_to_clamp_best_mvs; -- mv_col = filter_xd.best_sse_mv.as_mv.col; -- mv_row = filter_xd.best_sse_mv.as_mv.row; -- if (filter_xd.mode_info_context->mbmi.mode <= B_PRED || -- (mv_row*mv_row + mv_col*mv_col <= NOISE_MOTION_THRESHOLD && -- sse_diff < SSE_DIFF_THRESHOLD)) -- { -- // Handle intra blocks as referring to last frame with zero motion and -- // let the absolute pixel difference affect the filter factor. -- // Also consider small amount of motion as being random walk due to noise, -- // if it doesn't mean that we get a much bigger error. -- // Note that any changes to the mode info only affects the denoising. -- filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME; -- filter_xd.mode_info_context->mbmi.mode = ZEROMV; -- filter_xd.mode_info_context->mbmi.mv.as_int = 0; -- x->e_mbd.best_sse_inter_mode = ZEROMV; -- x->e_mbd.best_sse_mv.as_int = 0; -- best_sse = zero_mv_sse; -- } -- if (!x->skip) -- { -- vp8_build_inter_predictors_mb(&filter_xd); -- } -- else -- { -- vp8_build_inter16x16_predictors_mb(&filter_xd, -- filter_xd.dst.y_buffer, -- filter_xd.dst.u_buffer, -- filter_xd.dst.v_buffer, -- filter_xd.dst.y_stride, -- filter_xd.dst.uv_stride); -- } -- return best_sse; --} -+ unsigned char *sig = signal->thismb; -+ int sig_stride = 16; -+ unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; -+ int mc_avg_y_stride = mc_running_avg->y_stride; -+ unsigned char *running_avg_y = running_avg->y_buffer + y_offset; -+ int avg_y_stride = running_avg->y_stride; -+ int r, c, i; -+ int sum_diff = 0; -+ int adj_val[3] = {3, 4, 6}; - --static void denoiser_filter(YV12_BUFFER_CONFIG* mc_running_avg, -- YV12_BUFFER_CONFIG* running_avg, -- MACROBLOCK* signal, -- unsigned int motion_magnitude2, -- int y_offset, -- int uv_offset) --{ -- unsigned char* sig = signal->thismb; -- int sig_stride = 16; -- unsigned char* mc_running_avg_y = mc_running_avg->y_buffer + y_offset; -- int mc_avg_y_stride = mc_running_avg->y_stride; -- unsigned char* running_avg_y = running_avg->y_buffer + y_offset; -- int avg_y_stride = running_avg->y_stride; -- int r, c; -- for (r = 0; r < 16; r++) -- { -- for (c = 0; c < 16; c++) -+ /* If motion_magnitude is small, making the denoiser more aggressive by -+ * increasing the adjustment for each level. */ -+ if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) -+ { -+ for (i = 0; i < 3; i++) -+ adj_val[i] += 1; -+ } -+ -+ for (r = 0; r < 16; ++r) - { -- int diff; -- int absdiff = 0; -- unsigned int filter_coefficient; -- absdiff = sig[c] - mc_running_avg_y[c]; -- absdiff = absdiff > 0 ? absdiff : -absdiff; -- assert(absdiff >= 0 && absdiff < 256); -- filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3)); -- // Allow some additional filtering of static blocks, or blocks with very -- // small motion vectors. -- filter_coefficient += filter_coefficient / (3 + (motion_magnitude2 >> 3)); -- filter_coefficient = filter_coefficient > 255 ? 255 : filter_coefficient; -- -- running_avg_y[c] = blend(mc_running_avg_y[c], sig[c], filter_coefficient); -- diff = sig[c] - running_avg_y[c]; -- -- if (diff * diff < NOISE_DIFF2_THRESHOLD) -- { -- // Replace with mean to suppress the noise. -- sig[c] = running_avg_y[c]; -- } -- else -- { -- // Replace the filter state with the signal since the change in this -- // pixel isn't classified as noise. -- running_avg_y[c] = sig[c]; -- } -+ for (c = 0; c < 16; ++c) -+ { -+ int diff = 0; -+ int adjustment = 0; -+ int absdiff = 0; -+ -+ diff = mc_running_avg_y[c] - sig[c]; -+ absdiff = abs(diff); -+ -+ /* When |diff| < 4, use pixel value from last denoised raw. */ -+ if (absdiff <= 3) -+ { -+ running_avg_y[c] = mc_running_avg_y[c]; -+ sum_diff += diff; -+ } -+ else -+ { -+ if (absdiff >= 4 && absdiff <= 7) -+ adjustment = adj_val[0]; -+ else if (absdiff >= 8 && absdiff <= 15) -+ adjustment = adj_val[1]; -+ else -+ adjustment = adj_val[2]; -+ -+ if (diff > 0) -+ { -+ if ((sig[c] + adjustment) > 255) -+ running_avg_y[c] = 255; -+ else -+ running_avg_y[c] = sig[c] + adjustment; -+ -+ sum_diff += adjustment; -+ } -+ else -+ { -+ if ((sig[c] - adjustment) < 0) -+ running_avg_y[c] = 0; -+ else -+ running_avg_y[c] = sig[c] - adjustment; -+ -+ sum_diff -= adjustment; -+ } -+ } -+ } -+ -+ /* Update pointers for next iteration. */ -+ sig += sig_stride; -+ mc_running_avg_y += mc_avg_y_stride; -+ running_avg_y += avg_y_stride; - } -- sig += sig_stride; -- mc_running_avg_y += mc_avg_y_stride; -- running_avg_y += avg_y_stride; -- } -+ -+ if (abs(sum_diff) > SUM_DIFF_THRESHOLD) -+ return COPY_BLOCK; -+ -+ vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride, -+ signal->thismb, sig_stride); -+ return FILTER_BLOCK; - } - - int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height) - { -- assert(denoiser); -- denoiser->yv12_running_avg.flags = 0; -- if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg), width, -- height, VP8BORDERINPIXELS) < 0) -- { -- vp8_denoiser_free(denoiser); -- return 1; -- } -- denoiser->yv12_mc_running_avg.flags = 0; -- if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width, -- height, VP8BORDERINPIXELS) < 0) -- { -- vp8_denoiser_free(denoiser); -- return 1; -- } -- vpx_memset(denoiser->yv12_running_avg.buffer_alloc, 0, -- denoiser->yv12_running_avg.frame_size); -- vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0, -- denoiser->yv12_mc_running_avg.frame_size); -- return 0; -+ int i; -+ assert(denoiser); -+ -+ for (i = 0; i < MAX_REF_FRAMES; i++) -+ { -+ denoiser->yv12_running_avg[i].flags = 0; -+ -+ if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg[i]), width, -+ height, VP8BORDERINPIXELS) -+ < 0) -+ { -+ vp8_denoiser_free(denoiser); -+ return 1; -+ } -+ vpx_memset(denoiser->yv12_running_avg[i].buffer_alloc, 0, -+ denoiser->yv12_running_avg[i].frame_size); -+ -+ } -+ denoiser->yv12_mc_running_avg.flags = 0; -+ -+ if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width, -+ height, VP8BORDERINPIXELS) < 0) -+ { -+ vp8_denoiser_free(denoiser); -+ return 1; -+ } -+ -+ vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0, -+ denoiser->yv12_mc_running_avg.frame_size); -+ return 0; - } - - void vp8_denoiser_free(VP8_DENOISER *denoiser) - { -- assert(denoiser); -- vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg); -- vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg); -+ int i; -+ assert(denoiser); -+ -+ for (i = 0; i < MAX_REF_FRAMES ; i++) -+ { -+ vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg[i]); -+ } -+ vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg); - } - -+ - void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, - MACROBLOCK *x, - unsigned int best_sse, - unsigned int zero_mv_sse, - int recon_yoffset, -- int recon_uvoffset) { -- int mv_row; -- int mv_col; -- unsigned int motion_magnitude2; -- // Motion compensate the running average. -- best_sse = denoiser_motion_compensate(&denoiser->yv12_running_avg, -- &denoiser->yv12_mc_running_avg, -- x, -- best_sse, -- zero_mv_sse, -- recon_yoffset, -- recon_uvoffset); -- -- mv_row = x->e_mbd.best_sse_mv.as_mv.row; -- mv_col = x->e_mbd.best_sse_mv.as_mv.col; -- motion_magnitude2 = mv_row*mv_row + mv_col*mv_col; -- if (best_sse > SSE_THRESHOLD || -- motion_magnitude2 > 8 * NOISE_MOTION_THRESHOLD) -- { -- // No filtering of this block since it differs too much from the predictor, -- // or the motion vector magnitude is considered too big. -- vp8_copy_mem16x16(x->thismb, 16, -- denoiser->yv12_running_avg.y_buffer + recon_yoffset, -- denoiser->yv12_running_avg.y_stride); -- return; -- } -- // Filter. -- denoiser_filter(&denoiser->yv12_mc_running_avg, -- &denoiser->yv12_running_avg, -- x, -- motion_magnitude2, -- recon_yoffset, -- recon_uvoffset); -+ int recon_uvoffset) -+{ -+ int mv_row; -+ int mv_col; -+ unsigned int motion_magnitude2; -+ -+ MV_REFERENCE_FRAME frame = x->best_reference_frame; -+ MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame; -+ -+ enum vp8_denoiser_decision decision = FILTER_BLOCK; -+ -+ if (zero_frame) -+ { -+ YV12_BUFFER_CONFIG *src = &denoiser->yv12_running_avg[frame]; -+ YV12_BUFFER_CONFIG *dst = &denoiser->yv12_mc_running_avg; -+ YV12_BUFFER_CONFIG saved_pre,saved_dst; -+ MB_MODE_INFO saved_mbmi; -+ MACROBLOCKD *filter_xd = &x->e_mbd; -+ MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi; -+ int mv_col; -+ int mv_row; -+ int sse_diff = zero_mv_sse - best_sse; -+ -+ saved_mbmi = *mbmi; -+ -+ /* Use the best MV for the compensation. */ -+ mbmi->ref_frame = x->best_reference_frame; -+ mbmi->mode = x->best_sse_inter_mode; -+ mbmi->mv = x->best_sse_mv; -+ mbmi->need_to_clamp_mvs = x->need_to_clamp_best_mvs; -+ mv_col = x->best_sse_mv.as_mv.col; -+ mv_row = x->best_sse_mv.as_mv.row; -+ -+ if (frame == INTRA_FRAME || -+ ((unsigned int)(mv_row *mv_row + mv_col *mv_col) -+ <= NOISE_MOTION_THRESHOLD && -+ sse_diff < (int)SSE_DIFF_THRESHOLD)) -+ { -+ /* -+ * Handle intra blocks as referring to last frame with zero motion -+ * and let the absolute pixel difference affect the filter factor. -+ * Also consider small amount of motion as being random walk due -+ * to noise, if it doesn't mean that we get a much bigger error. -+ * Note that any changes to the mode info only affects the -+ * denoising. -+ */ -+ mbmi->ref_frame = -+ x->best_zeromv_reference_frame; -+ -+ src = &denoiser->yv12_running_avg[zero_frame]; -+ -+ mbmi->mode = ZEROMV; -+ mbmi->mv.as_int = 0; -+ x->best_sse_inter_mode = ZEROMV; -+ x->best_sse_mv.as_int = 0; -+ best_sse = zero_mv_sse; -+ } -+ -+ saved_pre = filter_xd->pre; -+ saved_dst = filter_xd->dst; -+ -+ /* Compensate the running average. */ -+ filter_xd->pre.y_buffer = src->y_buffer + recon_yoffset; -+ filter_xd->pre.u_buffer = src->u_buffer + recon_uvoffset; -+ filter_xd->pre.v_buffer = src->v_buffer + recon_uvoffset; -+ /* Write the compensated running average to the destination buffer. */ -+ filter_xd->dst.y_buffer = dst->y_buffer + recon_yoffset; -+ filter_xd->dst.u_buffer = dst->u_buffer + recon_uvoffset; -+ filter_xd->dst.v_buffer = dst->v_buffer + recon_uvoffset; -+ -+ if (!x->skip) -+ { -+ vp8_build_inter_predictors_mb(filter_xd); -+ } -+ else -+ { -+ vp8_build_inter16x16_predictors_mb(filter_xd, -+ filter_xd->dst.y_buffer, -+ filter_xd->dst.u_buffer, -+ filter_xd->dst.v_buffer, -+ filter_xd->dst.y_stride, -+ filter_xd->dst.uv_stride); -+ } -+ filter_xd->pre = saved_pre; -+ filter_xd->dst = saved_dst; -+ *mbmi = saved_mbmi; -+ -+ } -+ -+ mv_row = x->best_sse_mv.as_mv.row; -+ mv_col = x->best_sse_mv.as_mv.col; -+ motion_magnitude2 = mv_row * mv_row + mv_col * mv_col; -+ if (best_sse > SSE_THRESHOLD || motion_magnitude2 -+ > 8 * NOISE_MOTION_THRESHOLD) -+ { -+ decision = COPY_BLOCK; -+ } -+ -+ if (decision == FILTER_BLOCK) -+ { -+ /* Filter. */ -+ decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg, -+ &denoiser->yv12_running_avg[INTRA_FRAME], -+ x, -+ motion_magnitude2, -+ recon_yoffset, recon_uvoffset); -+ } -+ if (decision == COPY_BLOCK) -+ { -+ /* No filtering of this block; it differs too much from the predictor, -+ * or the motion vector magnitude is considered too big. -+ */ -+ vp8_copy_mem16x16( -+ x->thismb, 16, -+ denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset, -+ denoiser->yv12_running_avg[INTRA_FRAME].y_stride); -+ } - } -diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h -index 343531b..b025f5c 100644 ---- a/vp8/encoder/denoising.h -+++ b/vp8/encoder/denoising.h -@@ -13,10 +13,19 @@ - - #include "block.h" - -+#define SUM_DIFF_THRESHOLD (16 * 16 * 2) -+#define MOTION_MAGNITUDE_THRESHOLD (8*3) -+ -+enum vp8_denoiser_decision -+{ -+ COPY_BLOCK, -+ FILTER_BLOCK -+}; -+ - typedef struct vp8_denoiser - { -- YV12_BUFFER_CONFIG yv12_running_avg; -- YV12_BUFFER_CONFIG yv12_mc_running_avg; -+ YV12_BUFFER_CONFIG yv12_running_avg[MAX_REF_FRAMES]; -+ YV12_BUFFER_CONFIG yv12_mc_running_avg; - } VP8_DENOISER; - - int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height); -@@ -30,4 +39,4 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, - int recon_yoffset, - int recon_uvoffset); - --#endif // VP8_ENCODER_DENOISING_H_ -+#endif /* VP8_ENCODER_DENOISING_H_ */ -diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c -index 8233873..d1b647b 100644 ---- a/vp8/encoder/encodeframe.c -+++ b/vp8/encoder/encodeframe.c -@@ -33,7 +33,7 @@ - #endif - #include "encodeframe.h" - --extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; -+extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ; - extern void vp8_calc_ref_frame_costs(int *ref_frame_cost, - int prob_intra, - int prob_last, -@@ -45,7 +45,6 @@ extern void vp8_auto_select_speed(VP8_COMP *cpi); - extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi, - MACROBLOCK *x, - MB_ROW_COMP *mbr_ei, -- int mb_row, - int count); - static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ); - -@@ -77,7 +76,7 @@ static const unsigned char VP8_VAR_OFFS[16]= - }; - - --// Original activity measure from Tim T's code. -+/* Original activity measure from Tim T's code. */ - static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) - { - unsigned int act; -@@ -100,7 +99,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) - return act; - } - --// Stub for alternative experimental activity measures. -+/* Stub for alternative experimental activity measures. */ - static unsigned int alt_activity_measure( VP8_COMP *cpi, - MACROBLOCK *x, int use_dc_pred ) - { -@@ -108,8 +107,9 @@ static unsigned int alt_activity_measure( VP8_COMP *cpi, - } - - --// Measure the activity of the current macroblock --// What we measure here is TBD so abstracted to this function -+/* Measure the activity of the current macroblock -+ * What we measure here is TBD so abstracted to this function -+ */ - #define ALT_ACT_MEASURE 1 - static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x, - int mb_row, int mb_col) -@@ -120,12 +120,12 @@ static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x, - { - int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); - -- // Or use and alternative. -+ /* Or use and alternative. */ - mb_activity = alt_activity_measure( cpi, x, use_dc_pred ); - } - else - { -- // Original activity measure from Tim T's code. -+ /* Original activity measure from Tim T's code. */ - mb_activity = tt_activity_measure( cpi, x ); - } - -@@ -135,36 +135,36 @@ static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x, - return mb_activity; - } - --// Calculate an "average" mb activity value for the frame -+/* Calculate an "average" mb activity value for the frame */ - #define ACT_MEDIAN 0 - static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum ) - { - #if ACT_MEDIAN -- // Find median: Simple n^2 algorithm for experimentation -+ /* Find median: Simple n^2 algorithm for experimentation */ - { - unsigned int median; - unsigned int i,j; - unsigned int * sortlist; - unsigned int tmp; - -- // Create a list to sort to -+ /* Create a list to sort to */ - CHECK_MEM_ERROR(sortlist, - vpx_calloc(sizeof(unsigned int), - cpi->common.MBs)); - -- // Copy map to sort list -+ /* Copy map to sort list */ - vpx_memcpy( sortlist, cpi->mb_activity_map, - sizeof(unsigned int) * cpi->common.MBs ); - - -- // Ripple each value down to its correct position -+ /* Ripple each value down to its correct position */ - for ( i = 1; i < cpi->common.MBs; i ++ ) - { - for ( j = i; j > 0; j -- ) - { - if ( sortlist[j] < sortlist[j-1] ) - { -- // Swap values -+ /* Swap values */ - tmp = sortlist[j-1]; - sortlist[j-1] = sortlist[j]; - sortlist[j] = tmp; -@@ -174,7 +174,7 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum ) - } - } - -- // Even number MBs so estimate median as mean of two either side. -+ /* Even number MBs so estimate median as mean of two either side. */ - median = ( 1 + sortlist[cpi->common.MBs >> 1] + - sortlist[(cpi->common.MBs >> 1) + 1] ) >> 1; - -@@ -183,14 +183,14 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum ) - vpx_free(sortlist); - } - #else -- // Simple mean for now -+ /* Simple mean for now */ - cpi->activity_avg = (unsigned int)(activity_sum/cpi->common.MBs); - #endif - - if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN) - cpi->activity_avg = VP8_ACTIVITY_AVG_MIN; - -- // Experimental code: return fixed value normalized for several clips -+ /* Experimental code: return fixed value normalized for several clips */ - if ( ALT_ACT_MEASURE ) - cpi->activity_avg = 100000; - } -@@ -199,7 +199,7 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum ) - #define OUTPUT_NORM_ACT_STATS 0 - - #if USE_ACT_INDEX --// Calculate and activity index for each mb -+/* Calculate and activity index for each mb */ - static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x ) - { - VP8_COMMON *const cm = & cpi->common; -@@ -214,19 +214,19 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x ) - fprintf(f, "\n%12d\n", cpi->activity_avg ); - #endif - -- // Reset pointers to start of activity map -+ /* Reset pointers to start of activity map */ - x->mb_activity_ptr = cpi->mb_activity_map; - -- // Calculate normalized mb activity number. -+ /* Calculate normalized mb activity number. */ - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) - { -- // for each macroblock col in image -+ /* for each macroblock col in image */ - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { -- // Read activity from the map -+ /* Read activity from the map */ - act = *(x->mb_activity_ptr); - -- // Calculate a normalized activity number -+ /* Calculate a normalized activity number */ - a = act + 4*cpi->activity_avg; - b = 4*act + cpi->activity_avg; - -@@ -238,7 +238,7 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x ) - #if OUTPUT_NORM_ACT_STATS - fprintf(f, " %6d", *(x->mb_activity_ptr)); - #endif -- // Increment activity map pointers -+ /* Increment activity map pointers */ - x->mb_activity_ptr++; - } - -@@ -255,8 +255,9 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x ) - } - #endif - --// Loop through all MBs. Note activity of each, average activity and --// calculate a normalized activity for each -+/* Loop through all MBs. Note activity of each, average activity and -+ * calculate a normalized activity for each -+ */ - static void build_activity_map( VP8_COMP *cpi ) - { - MACROBLOCK *const x = & cpi->mb; -@@ -273,15 +274,15 @@ static void build_activity_map( VP8_COMP *cpi ) - unsigned int mb_activity; - int64_t activity_sum = 0; - -- // for each macroblock row in image -+ /* for each macroblock row in image */ - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) - { - #if ALT_ACT_MEASURE -- // reset above block coeffs -+ /* reset above block coeffs */ - xd->up_available = (mb_row != 0); - recon_yoffset = (mb_row * recon_y_stride * 16); - #endif -- // for each macroblock col in image -+ /* for each macroblock col in image */ - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - #if ALT_ACT_MEASURE -@@ -289,48 +290,48 @@ static void build_activity_map( VP8_COMP *cpi ) - xd->left_available = (mb_col != 0); - recon_yoffset += 16; - #endif -- //Copy current mb to a buffer -+ /* Copy current mb to a buffer */ - vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); - -- // measure activity -+ /* measure activity */ - mb_activity = mb_activity_measure( cpi, x, mb_row, mb_col ); - -- // Keep frame sum -+ /* Keep frame sum */ - activity_sum += mb_activity; - -- // Store MB level activity details. -+ /* Store MB level activity details. */ - *x->mb_activity_ptr = mb_activity; - -- // Increment activity map pointer -+ /* Increment activity map pointer */ - x->mb_activity_ptr++; - -- // adjust to the next column of source macroblocks -+ /* adjust to the next column of source macroblocks */ - x->src.y_buffer += 16; - } - - -- // adjust to the next row of mbs -+ /* adjust to the next row of mbs */ - x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; - - #if ALT_ACT_MEASURE -- //extend the recon for intra prediction -+ /* extend the recon for intra prediction */ - vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); - #endif - - } - -- // Calculate an "average" MB activity -+ /* Calculate an "average" MB activity */ - calc_av_activity(cpi, activity_sum); - - #if USE_ACT_INDEX -- // Calculate an activity index number of each mb -+ /* Calculate an activity index number of each mb */ - calc_activity_index( cpi, x ); - #endif - - } - --// Macroblock activity masking -+/* Macroblock activity masking */ - void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) - { - #if USE_ACT_INDEX -@@ -342,7 +343,7 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) - int64_t b; - int64_t act = *(x->mb_activity_ptr); - -- // Apply the masking to the RD multiplier. -+ /* Apply the masking to the RD multiplier. */ - a = act + (2*cpi->activity_avg); - b = (2*act) + cpi->activity_avg; - -@@ -351,7 +352,7 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) - x->errorperbit += (x->errorperbit==0); - #endif - -- // Activity based Zbin adjustment -+ /* Activity based Zbin adjustment */ - adjust_act_zbin(cpi, x); - } - -@@ -398,7 +399,7 @@ void encode_mb_row(VP8_COMP *cpi, - w = &cpi->bc[1]; - #endif - -- // reset above block coeffs -+ /* reset above block coeffs */ - xd->above_context = cm->above_context; - - xd->up_available = (mb_row != 0); -@@ -406,37 +407,41 @@ void encode_mb_row(VP8_COMP *cpi, - recon_uvoffset = (mb_row * recon_uv_stride * 8); - - cpi->tplist[mb_row].start = *tp; -- //printf("Main mb_row = %d\n", mb_row); -+ /* printf("Main mb_row = %d\n", mb_row); */ - -- // Distance of Mb to the top & bottom edges, specified in 1/8th pel -- // units as they are always compared to values that are in 1/8th pel units -+ /* Distance of Mb to the top & bottom edges, specified in 1/8th pel -+ * units as they are always compared to values that are in 1/8th pel -+ */ - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - -- // Set up limit values for vertical motion vector components -- // to prevent them extending beyond the UMV borders -+ /* Set up limit values for vertical motion vector components -+ * to prevent them extending beyond the UMV borders -+ */ - x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) - + (VP8BORDERINPIXELS - 16); - -- // Set the mb activity pointer to the start of the row. -+ /* Set the mb activity pointer to the start of the row. */ - x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - -- // for each macroblock col in image -+ /* for each macroblock col in image */ - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - - #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) - *tp = cpi->tok; - #endif -- // Distance of Mb to the left & right edges, specified in -- // 1/8th pel units as they are always compared to values -- // that are in 1/8th pel units -+ /* Distance of Mb to the left & right edges, specified in -+ * 1/8th pel units as they are always compared to values -+ * that are in 1/8th pel units -+ */ - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; - -- // Set up limit values for horizontal motion vector components -- // to prevent them extending beyond the UMV borders -+ /* Set up limit values for horizontal motion vector components -+ * to prevent them extending beyond the UMV borders -+ */ - x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) - + (VP8BORDERINPIXELS - 16); -@@ -449,13 +454,13 @@ void encode_mb_row(VP8_COMP *cpi, - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; - -- //Copy current mb to a buffer -+ /* Copy current mb to a buffer */ - vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); - - #if CONFIG_MULTITHREAD - if (cpi->b_multi_threaded != 0) - { -- *current_mb_col = mb_col - 1; // set previous MB done -+ *current_mb_col = mb_col - 1; /* set previous MB done */ - - if ((mb_col & (nsync - 1)) == 0) - { -@@ -471,11 +476,13 @@ void encode_mb_row(VP8_COMP *cpi, - if(cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp8_activity_masking(cpi, x); - -- // Is segmentation enabled -- // MB level adjustment to quantizer -+ /* Is segmentation enabled */ -+ /* MB level adjustment to quantizer */ - if (xd->segmentation_enabled) - { -- // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) -+ /* Code to set segment id in xd->mbmi.segment_id for current MB -+ * (with range checking) -+ */ - if (cpi->segmentation_map[map_index+mb_col] <= 3) - xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col]; - else -@@ -484,7 +491,8 @@ void encode_mb_row(VP8_COMP *cpi, - vp8cx_mb_init_quantizer(cpi, x, 1); - } - else -- xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default -+ /* Set to Segment 0 by default */ -+ xd->mode_info_context->mbmi.segment_id = 0; - - x->active_ptr = cpi->active_map + map_index + mb_col; - -@@ -514,21 +522,25 @@ void encode_mb_row(VP8_COMP *cpi, - - #endif - -- // Count of last ref frame 0,0 usage -- if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) -- cpi->inter_zz_count ++; -- -- // Special case code for cyclic refresh -- // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode -- // during vp8cx_encode_inter_macroblock()) back into the global segmentation map -+ /* Special case code for cyclic refresh -+ * If cyclic update enabled then copy xd->mbmi.segment_id; (which -+ * may have been updated based on mode during -+ * vp8cx_encode_inter_macroblock()) back into the global -+ * segmentation map -+ */ - if ((cpi->current_layer == 0) && -- (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)) -+ (cpi->cyclic_refresh_mode_enabled && -+ xd->segmentation_enabled)) - { - cpi->segmentation_map[map_index+mb_col] = xd->mode_info_context->mbmi.segment_id; - -- // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh): -- // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0) -- // else mark it as dirty (1). -+ /* If the block has been refreshed mark it as clean (the -+ * magnitude of the -ve influences how long it will be before -+ * we consider another refresh): -+ * Else if it was coded (last frame 0,0) and has not already -+ * been refreshed then mark it as a candidate for cleanup -+ * next time (marked 0) else mark it as dirty (1). -+ */ - if (xd->mode_info_context->mbmi.segment_id) - cpi->cyclic_refresh_map[map_index+mb_col] = -1; - else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) -@@ -551,13 +563,13 @@ void encode_mb_row(VP8_COMP *cpi, - pack_tokens(w, tp_start, tok_count); - } - #endif -- // Increment pointer into gf usage flags structure. -+ /* Increment pointer into gf usage flags structure. */ - x->gf_active_ptr++; - -- // Increment the activity mask pointers. -+ /* Increment the activity mask pointers. */ - x->mb_activity_ptr++; - -- // adjust to the next column of macroblocks -+ /* adjust to the next column of macroblocks */ - x->src.y_buffer += 16; - x->src.u_buffer += 8; - x->src.v_buffer += 8; -@@ -565,16 +577,16 @@ void encode_mb_row(VP8_COMP *cpi, - recon_yoffset += 16; - recon_uvoffset += 8; - -- // Keep track of segment usage -+ /* Keep track of segment usage */ - segment_counts[xd->mode_info_context->mbmi.segment_id] ++; - -- // skip to next mb -+ /* skip to next mb */ - xd->mode_info_context++; - x->partition_info++; - xd->above_context++; - } - -- //extend the recon for intra prediction -+ /* extend the recon for intra prediction */ - vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx], - xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, -@@ -585,7 +597,7 @@ void encode_mb_row(VP8_COMP *cpi, - *current_mb_col = rightmost_col; - #endif - -- // this is to account for the border -+ /* this is to account for the border */ - xd->mode_info_context++; - x->partition_info++; - } -@@ -596,10 +608,10 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi) - VP8_COMMON *const cm = & cpi->common; - MACROBLOCKD *const xd = & x->e_mbd; - -- // GF active flags data structure -+ /* GF active flags data structure */ - x->gf_active_ptr = (signed char *)cpi->gf_active_flags; - -- // Activity map pointer -+ /* Activity map pointer */ - x->mb_activity_ptr = cpi->mb_activity_map; - - x->act_zbin_adj = 0; -@@ -611,48 +623,42 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi) - - xd->frame_type = cm->frame_type; - -- // reset intra mode contexts -+ /* reset intra mode contexts */ - if (cm->frame_type == KEY_FRAME) - vp8_init_mbmode_probs(cm); - -- // Copy data over into macro block data structures. -+ /* Copy data over into macro block data structures. */ - x->src = * cpi->Source; - xd->pre = cm->yv12_fb[cm->lst_fb_idx]; - xd->dst = cm->yv12_fb[cm->new_fb_idx]; - -- // set up frame for intra coded blocks -+ /* set up frame for intra coded blocks */ - vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]); - - vp8_build_block_offsets(x); - -- vp8_setup_block_dptrs(&x->e_mbd); -- -- vp8_setup_block_ptrs(x); -- - xd->mode_info_context->mbmi.mode = DC_PRED; - xd->mode_info_context->mbmi.uv_mode = DC_PRED; - - xd->left_context = &cm->left_context; - -- vp8_zero(cpi->count_mb_ref_frame_usage) -- vp8_zero(cpi->ymode_count) -- vp8_zero(cpi->uv_mode_count) -- - x->mvc = cm->fc.mvc; - - vpx_memset(cm->above_context, 0, - sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols); - -- // Special case treatment when GF and ARF are not sensible options for reference -- if (cpi->ref_frame_flags == VP8_LAST_FLAG) -+ /* Special case treatment when GF and ARF are not sensible options -+ * for reference -+ */ -+ if (cpi->ref_frame_flags == VP8_LAST_FRAME) - vp8_calc_ref_frame_costs(x->ref_frame_cost, - cpi->prob_intra_coded,255,128); - else if ((cpi->oxcf.number_of_layers > 1) && -- (cpi->ref_frame_flags == VP8_GOLD_FLAG)) -+ (cpi->ref_frame_flags == VP8_GOLD_FRAME)) - vp8_calc_ref_frame_costs(x->ref_frame_cost, - cpi->prob_intra_coded,1,255); - else if ((cpi->oxcf.number_of_layers > 1) && -- (cpi->ref_frame_flags == VP8_ALT_FLAG)) -+ (cpi->ref_frame_flags == VP8_ALTR_FRAME)) - vp8_calc_ref_frame_costs(x->ref_frame_cost, - cpi->prob_intra_coded,1,1); - else -@@ -664,6 +670,43 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi) - xd->fullpixel_mask = 0xffffffff; - if(cm->full_pixel) - xd->fullpixel_mask = 0xfffffff8; -+ -+ vp8_zero(x->coef_counts); -+ vp8_zero(x->ymode_count); -+ vp8_zero(x->uv_mode_count) -+ x->prediction_error = 0; -+ x->intra_error = 0; -+ vp8_zero(x->count_mb_ref_frame_usage); -+} -+ -+static void sum_coef_counts(MACROBLOCK *x, MACROBLOCK *x_thread) -+{ -+ int i = 0; -+ do -+ { -+ int j = 0; -+ do -+ { -+ int k = 0; -+ do -+ { -+ /* at every context */ -+ -+ /* calc probs and branch cts for this frame only */ -+ int t = 0; /* token/prob index */ -+ -+ do -+ { -+ x->coef_counts [i][j][k][t] += -+ x_thread->coef_counts [i][j][k][t]; -+ } -+ while (++t < ENTROPY_NODES); -+ } -+ while (++k < PREV_COEF_CONTEXTS); -+ } -+ while (++j < COEF_BANDS); -+ } -+ while (++i < BLOCK_TYPES); - } - - void vp8_encode_frame(VP8_COMP *cpi) -@@ -676,7 +719,7 @@ void vp8_encode_frame(VP8_COMP *cpi) - int segment_counts[MAX_MB_SEGMENTS]; - int totalrate; - #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING -- BOOL_CODER * bc = &cpi->bc[1]; // bc[0] is for control partition -+ BOOL_CODER * bc = &cpi->bc[1]; /* bc[0] is for control partition */ - const int num_part = (1 << cm->multi_token_partition); - #endif - -@@ -691,8 +734,8 @@ void vp8_encode_frame(VP8_COMP *cpi) - vp8_auto_select_speed(cpi); - } - -- // Functions setup for all frame types so we can use MC in AltRef -- if (cm->mcomp_filter_type == SIXTAP) -+ /* Functions setup for all frame types so we can use MC in AltRef */ -+ if(!cm->use_bilinear_mc_filter) - { - xd->subpixel_predict = vp8_sixtap_predict4x4; - xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; -@@ -707,43 +750,36 @@ void vp8_encode_frame(VP8_COMP *cpi) - xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; - } - -- // Reset frame count of inter 0,0 motion vector usage. -- cpi->inter_zz_count = 0; -- -- cpi->prediction_error = 0; -- cpi->intra_error = 0; -- cpi->skip_true_count = 0; -+ cpi->mb.skip_true_count = 0; - cpi->tok_count = 0; - - #if 0 -- // Experimental code -+ /* Experimental code */ - cpi->frame_distortion = 0; - cpi->last_mb_distortion = 0; - #endif - - xd->mode_info_context = cm->mi; - -- vp8_zero(cpi->MVcount); -- -- vp8_zero(cpi->coef_counts); -+ vp8_zero(cpi->mb.MVcount); - - vp8cx_frame_init_quantizer(cpi); - -- vp8_initialize_rd_consts(cpi, -+ vp8_initialize_rd_consts(cpi, x, - vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); - - vp8cx_initialize_me_consts(cpi, cm->base_qindex); - - if(cpi->oxcf.tuning == VP8_TUNE_SSIM) - { -- // Initialize encode frame context. -+ /* Initialize encode frame context. */ - init_encode_frame_mb_context(cpi); - -- // Build a frame level activity map -+ /* Build a frame level activity map */ - build_activity_map(cpi); - } - -- // re-init encode frame context. -+ /* re-init encode frame context. */ - init_encode_frame_mb_context(cpi); - - #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING -@@ -768,7 +804,8 @@ void vp8_encode_frame(VP8_COMP *cpi) - { - int i; - -- vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count); -+ vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, -+ cpi->encoding_thread_count); - - for (i = 0; i < cm->mb_rows; i++) - cpi->mt_current_mb_col[i] = -1; -@@ -790,7 +827,7 @@ void vp8_encode_frame(VP8_COMP *cpi) - - encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); - -- // adjust to the next row of mbs -+ /* adjust to the next row of mbs */ - x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; - x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; - x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; -@@ -809,7 +846,8 @@ void vp8_encode_frame(VP8_COMP *cpi) - - for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++) - { -- cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start; -+ cpi->tok_count += (unsigned int) -+ (cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start); - } - - if (xd->segmentation_enabled) -@@ -829,14 +867,50 @@ void vp8_encode_frame(VP8_COMP *cpi) - - for (i = 0; i < cpi->encoding_thread_count; i++) - { -+ int mode_count; -+ int c_idx; - totalrate += cpi->mb_row_ei[i].totalrate; -+ -+ cpi->mb.skip_true_count += cpi->mb_row_ei[i].mb.skip_true_count; -+ -+ for(mode_count = 0; mode_count < VP8_YMODES; mode_count++) -+ cpi->mb.ymode_count[mode_count] += -+ cpi->mb_row_ei[i].mb.ymode_count[mode_count]; -+ -+ for(mode_count = 0; mode_count < VP8_UV_MODES; mode_count++) -+ cpi->mb.uv_mode_count[mode_count] += -+ cpi->mb_row_ei[i].mb.uv_mode_count[mode_count]; -+ -+ for(c_idx = 0; c_idx < MVvals; c_idx++) -+ { -+ cpi->mb.MVcount[0][c_idx] += -+ cpi->mb_row_ei[i].mb.MVcount[0][c_idx]; -+ cpi->mb.MVcount[1][c_idx] += -+ cpi->mb_row_ei[i].mb.MVcount[1][c_idx]; -+ } -+ -+ cpi->mb.prediction_error += -+ cpi->mb_row_ei[i].mb.prediction_error; -+ cpi->mb.intra_error += cpi->mb_row_ei[i].mb.intra_error; -+ -+ for(c_idx = 0; c_idx < MAX_REF_FRAMES; c_idx++) -+ cpi->mb.count_mb_ref_frame_usage[c_idx] += -+ cpi->mb_row_ei[i].mb.count_mb_ref_frame_usage[c_idx]; -+ -+ for(c_idx = 0; c_idx < MAX_ERROR_BINS; c_idx++) -+ cpi->mb.error_bins[c_idx] += -+ cpi->mb_row_ei[i].mb.error_bins[c_idx]; -+ -+ /* add up counts for each thread */ -+ sum_coef_counts(x, &cpi->mb_row_ei[i].mb); - } - - } - else - #endif - { -- // for each macroblock row in image -+ -+ /* for each macroblock row in image */ - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) - { - vp8_zero(cm->left_context) -@@ -847,13 +921,13 @@ void vp8_encode_frame(VP8_COMP *cpi) - - encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); - -- // adjust to the next row of mbs -+ /* adjust to the next row of mbs */ - x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; - x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; - x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; - } - -- cpi->tok_count = tp - cpi->tok; -+ cpi->tok_count = (unsigned int)(tp - cpi->tok); - } - - #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING -@@ -873,12 +947,13 @@ void vp8_encode_frame(VP8_COMP *cpi) - - - // Work out the segment probabilities if segmentation is enabled -- if (xd->segmentation_enabled) -+ // and needs to be updated -+ if (xd->segmentation_enabled && xd->update_mb_segmentation_map) - { - int tot_count; - int i; - -- // Set to defaults -+ /* Set to defaults */ - vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs)); - - tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3]; -@@ -899,7 +974,7 @@ void vp8_encode_frame(VP8_COMP *cpi) - if (tot_count > 0) - xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count; - -- // Zero probabilities not allowed -+ /* Zero probabilities not allowed */ - for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++) - { - if (xd->mb_segment_tree_probs[i] == 0) -@@ -908,10 +983,10 @@ void vp8_encode_frame(VP8_COMP *cpi) - } - } - -- // 256 rate units to the bit -- cpi->projected_frame_size = totalrate >> 8; // projected_frame_size in units of BYTES -+ /* projected_frame_size in units of BYTES */ -+ cpi->projected_frame_size = totalrate >> 8; - -- // Make a note of the percentage MBs coded Intra. -+ /* Make a note of the percentage MBs coded Intra. */ - if (cm->frame_type == KEY_FRAME) - { - cpi->this_frame_percent_intra = 100; -@@ -920,50 +995,23 @@ void vp8_encode_frame(VP8_COMP *cpi) - { - int tot_modes; - -- tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME] -- + cpi->count_mb_ref_frame_usage[LAST_FRAME] -- + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME] -- + cpi->count_mb_ref_frame_usage[ALTREF_FRAME]; -+ tot_modes = cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] -+ + cpi->mb.count_mb_ref_frame_usage[LAST_FRAME] -+ + cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME] -+ + cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME]; - - if (tot_modes) -- cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes; -- -- } -- --#if 0 -- { -- int cnt = 0; -- int flag[2] = {0, 0}; -- -- for (cnt = 0; cnt < MVPcount; cnt++) -- { -- if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt]) -- { -- flag[0] = 1; -- vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount); -- break; -- } -- } -- -- for (cnt = 0; cnt < MVPcount; cnt++) -- { -- if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt]) -- { -- flag[1] = 1; -- vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount); -- break; -- } -- } -+ cpi->this_frame_percent_intra = -+ cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes; - -- if (flag[0] || flag[1]) -- vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag); - } --#endif - - #if ! CONFIG_REALTIME_ONLY -- // Adjust the projected reference frame usage probability numbers to reflect -- // what we have just seen. This may be useful when we make multiple iterations -- // of the recode loop rather than continuing to use values from the previous frame. -+ /* Adjust the projected reference frame usage probability numbers to -+ * reflect what we have just seen. This may be useful when we make -+ * multiple iterations of the recode loop rather than continuing to use -+ * values from the previous frame. -+ */ - if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) || - (!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame))) - { -@@ -1017,16 +1065,13 @@ void vp8_build_block_offsets(MACROBLOCK *x) - - vp8_build_block_doffsets(&x->e_mbd); - -- // y blocks -+ /* y blocks */ - x->thismb_ptr = &x->thismb[0]; - for (br = 0; br < 4; br++) - { - for (bc = 0; bc < 4; bc++) - { - BLOCK *this_block = &x->block[block]; -- //this_block->base_src = &x->src.y_buffer; -- //this_block->src_stride = x->src.y_stride; -- //this_block->src = 4 * br * this_block->src_stride + 4 * bc; - this_block->base_src = &x->thismb_ptr; - this_block->src_stride = 16; - this_block->src = 4 * br * 16 + 4 * bc; -@@ -1034,7 +1079,7 @@ void vp8_build_block_offsets(MACROBLOCK *x) - } - } - -- // u blocks -+ /* u blocks */ - for (br = 0; br < 2; br++) - { - for (bc = 0; bc < 2; bc++) -@@ -1047,7 +1092,7 @@ void vp8_build_block_offsets(MACROBLOCK *x) - } - } - -- // v blocks -+ /* v blocks */ - for (br = 0; br < 2; br++) - { - for (bc = 0; bc < 2; bc++) -@@ -1087,13 +1132,14 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x) - - #endif - -- ++cpi->ymode_count[m]; -- ++cpi->uv_mode_count[uvm]; -+ ++x->ymode_count[m]; -+ ++x->uv_mode_count[uvm]; - - } - --// Experimental stub function to create a per MB zbin adjustment based on --// some previously calculated measure of MB activity. -+/* Experimental stub function to create a per MB zbin adjustment based on -+ * some previously calculated measure of MB activity. -+ */ - static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ) - { - #if USE_ACT_INDEX -@@ -1103,7 +1149,7 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ) - int64_t b; - int64_t act = *(x->mb_activity_ptr); - -- // Apply the masking to the RD multiplier. -+ /* Apply the masking to the RD multiplier. */ - a = act + 4*cpi->activity_avg; - b = 4*act + cpi->activity_avg; - -@@ -1114,15 +1160,16 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ) - #endif - } - --int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) -+int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, -+ TOKENEXTRA **t) - { - MACROBLOCKD *xd = &x->e_mbd; - int rate; - - if (cpi->sf.RD && cpi->compressor_speed != 2) -- vp8_rd_pick_intra_mode(cpi, x, &rate); -+ vp8_rd_pick_intra_mode(x, &rate); - else -- vp8_pick_intra_mode(cpi, x, &rate); -+ vp8_pick_intra_mode(x, &rate); - - if(cpi->oxcf.tuning == VP8_TUNE_SSIM) - { -@@ -1139,7 +1186,7 @@ int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) - - sum_intra_stats(cpi, x); - -- vp8_tokenize_mb(cpi, &x->e_mbd, t); -+ vp8_tokenize_mb(cpi, x, t); - - if (xd->mode_info_context->mbmi.mode != B_PRED) - vp8_inverse_transform_mby(xd); -@@ -1176,25 +1223,27 @@ int vp8cx_encode_inter_macroblock - x->encode_breakout = cpi->oxcf.encode_breakout; - - #if CONFIG_TEMPORAL_DENOISING -- // Reset the best sse mode/mv for each macroblock. -- x->e_mbd.best_sse_inter_mode = 0; -- x->e_mbd.best_sse_mv.as_int = 0; -- x->e_mbd.need_to_clamp_best_mvs = 0; -+ /* Reset the best sse mode/mv for each macroblock. */ -+ x->best_reference_frame = INTRA_FRAME; -+ x->best_zeromv_reference_frame = INTRA_FRAME; -+ x->best_sse_inter_mode = 0; -+ x->best_sse_mv.as_int = 0; -+ x->need_to_clamp_best_mvs = 0; - #endif - - if (cpi->sf.RD) - { -- int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled; -+ int zbin_mode_boost_enabled = x->zbin_mode_boost_enabled; - - /* Are we using the fast quantizer for the mode selection? */ - if(cpi->sf.use_fastquant_for_pick) - { -- cpi->mb.quantize_b = vp8_fast_quantize_b; -- cpi->mb.quantize_b_pair = vp8_fast_quantize_b_pair; -+ x->quantize_b = vp8_fast_quantize_b; -+ x->quantize_b_pair = vp8_fast_quantize_b_pair; - - /* the fast quantizer does not use zbin_extra, so - * do not recalculate */ -- cpi->zbin_mode_boost_enabled = 0; -+ x->zbin_mode_boost_enabled = 0; - } - vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, - &distortion, &intra_error); -@@ -1202,12 +1251,12 @@ int vp8cx_encode_inter_macroblock - /* switch back to the regular quantizer for the encode */ - if (cpi->sf.improved_quant) - { -- cpi->mb.quantize_b = vp8_regular_quantize_b; -- cpi->mb.quantize_b_pair = vp8_regular_quantize_b_pair; -+ x->quantize_b = vp8_regular_quantize_b; -+ x->quantize_b_pair = vp8_regular_quantize_b_pair; - } - - /* restore cpi->zbin_mode_boost_enabled */ -- cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled; -+ x->zbin_mode_boost_enabled = zbin_mode_boost_enabled; - - } - else -@@ -1216,28 +1265,28 @@ int vp8cx_encode_inter_macroblock - &distortion, &intra_error, mb_row, mb_col); - } - -- cpi->prediction_error += distortion; -- cpi->intra_error += intra_error; -+ x->prediction_error += distortion; -+ x->intra_error += intra_error; - - if(cpi->oxcf.tuning == VP8_TUNE_SSIM) - { -- // Adjust the zbin based on this MB rate. -+ /* Adjust the zbin based on this MB rate. */ - adjust_act_zbin( cpi, x ); - } - - #if 0 -- // Experimental RD code -+ /* Experimental RD code */ - cpi->frame_distortion += distortion; - cpi->last_mb_distortion = distortion; - #endif - -- // MB level adjutment to quantizer setup -+ /* MB level adjutment to quantizer setup */ - if (xd->segmentation_enabled) - { -- // If cyclic update enabled -+ /* If cyclic update enabled */ - if (cpi->current_layer == 0 && cpi->cyclic_refresh_mode_enabled) - { -- // Clear segment_id back to 0 if not coded (last frame 0,0) -+ /* Clear segment_id back to 0 if not coded (last frame 0,0) */ - if ((xd->mode_info_context->mbmi.segment_id == 1) && - ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV))) - { -@@ -1250,24 +1299,25 @@ int vp8cx_encode_inter_macroblock - } - - { -- // Experimental code. Special case for gf and arf zeromv modes. -- // Increase zbin size to supress noise -- cpi->zbin_mode_boost = 0; -- if (cpi->zbin_mode_boost_enabled) -+ /* Experimental code. Special case for gf and arf zeromv modes. -+ * Increase zbin size to supress noise -+ */ -+ x->zbin_mode_boost = 0; -+ if (x->zbin_mode_boost_enabled) - { - if ( xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME ) - { - if (xd->mode_info_context->mbmi.mode == ZEROMV) - { - if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) -- cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; -+ x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; - else -- cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; -+ x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; - } - else if (xd->mode_info_context->mbmi.mode == SPLITMV) -- cpi->zbin_mode_boost = 0; -+ x->zbin_mode_boost = 0; - else -- cpi->zbin_mode_boost = MV_ZBIN_BOOST; -+ x->zbin_mode_boost = MV_ZBIN_BOOST; - } - } - -@@ -1277,7 +1327,7 @@ int vp8cx_encode_inter_macroblock - vp8_update_zbin_extra(cpi, x); - } - -- cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++; -+ x->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++; - - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) - { -@@ -1322,7 +1372,7 @@ int vp8cx_encode_inter_macroblock - - if (!x->skip) - { -- vp8_tokenize_mb(cpi, xd, t); -+ vp8_tokenize_mb(cpi, x, t); - - if (xd->mode_info_context->mbmi.mode != B_PRED) - vp8_inverse_transform_mby(xd); -@@ -1339,12 +1389,12 @@ int vp8cx_encode_inter_macroblock - - if (cpi->common.mb_no_coeff_skip) - { -- cpi->skip_true_count ++; -+ x->skip_true_count ++; - vp8_fix_contexts(xd); - } - else - { -- vp8_stuff_mb(cpi, xd, t); -+ vp8_stuff_mb(cpi, x, t); - } - } - -diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c -index 1f445b7..340dd63 100644 ---- a/vp8/encoder/encodeintra.c -+++ b/vp8/encoder/encodeintra.c -@@ -54,10 +54,13 @@ void vp8_encode_intra4x4block(MACROBLOCK *x, int ib) - BLOCKD *b = &x->e_mbd.block[ib]; - BLOCK *be = &x->block[ib]; - int dst_stride = x->e_mbd.dst.y_stride; -- unsigned char *base_dst = x->e_mbd.dst.y_buffer; -+ unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; -+ unsigned char *Above = dst - dst_stride; -+ unsigned char *yleft = dst - 1; -+ unsigned char top_left = Above[-1]; - -- vp8_intra4x4_predict(base_dst + b->offset, dst_stride, -- b->bmi.as_mode, b->predictor, 16); -+ vp8_intra4x4_predict(Above, yleft, dst_stride, b->bmi.as_mode, -+ b->predictor, 16, top_left); - - vp8_subtract_b(be, b, 16); - -@@ -67,14 +70,11 @@ void vp8_encode_intra4x4block(MACROBLOCK *x, int ib) - - if (*b->eob > 1) - { -- vp8_short_idct4x4llm(b->dqcoeff, -- b->predictor, 16, base_dst + b->offset, dst_stride); -+ vp8_short_idct4x4llm(b->dqcoeff, b->predictor, 16, dst, dst_stride); - } - else - { -- vp8_dc_only_idct_add -- (b->dqcoeff[0], b->predictor, 16, base_dst + b->offset, -- dst_stride); -+ vp8_dc_only_idct_add(b->dqcoeff[0], b->predictor, 16, dst, dst_stride); - } - } - -diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c -index f89e4f7..7d494f2 100644 ---- a/vp8/encoder/encodemb.c -+++ b/vp8/encoder/encodemb.c -@@ -137,10 +137,10 @@ void vp8_transform_intra_mby(MACROBLOCK *x) - &x->block[i].coeff[0], 32); - } - -- // build dc block from 16 y dc values -+ /* build dc block from 16 y dc values */ - build_dcblock(x); - -- // do 2nd order transform on the dc block -+ /* do 2nd order transform on the dc block */ - x->short_walsh4x4(&x->block[24].src_diff[0], - &x->block[24].coeff[0], 8); - -@@ -157,7 +157,7 @@ static void transform_mb(MACROBLOCK *x) - &x->block[i].coeff[0], 32); - } - -- // build dc block from 16 y dc values -+ /* build dc block from 16 y dc values */ - if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) - build_dcblock(x); - -@@ -167,7 +167,7 @@ static void transform_mb(MACROBLOCK *x) - &x->block[i].coeff[0], 16); - } - -- // do 2nd order transform on the dc block -+ /* do 2nd order transform on the dc block */ - if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) - x->short_walsh4x4(&x->block[24].src_diff[0], - &x->block[24].coeff[0], 8); -@@ -185,7 +185,7 @@ static void transform_mby(MACROBLOCK *x) - &x->block[i].coeff[0], 32); - } - -- // build dc block from 16 y dc values -+ /* build dc block from 16 y dc values */ - if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) - { - build_dcblock(x); -@@ -208,7 +208,7 @@ struct vp8_token_state{ - short qc; - }; - --// TODO: experiments to find optimal multiple numbers -+/* TODO: experiments to find optimal multiple numbers */ - #define Y1_RD_MULT 4 - #define UV_RD_MULT 2 - #define Y2_RD_MULT 16 -diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c -index 0145f6d..0c43d06 100644 ---- a/vp8/encoder/encodemv.c -+++ b/vp8/encoder/encodemv.c -@@ -29,15 +29,15 @@ static void encode_mvcomponent( - const vp8_prob *p = mvc->prob; - const int x = v < 0 ? -v : v; - -- if (x < mvnum_short) // Small -+ if (x < mvnum_short) /* Small */ - { - vp8_write(w, 0, p [mvpis_short]); - vp8_treed_write(w, vp8_small_mvtree, p + MVPshort, x, 3); - - if (!x) -- return; // no sign bit -+ return; /* no sign bit */ - } -- else // Large -+ else /* Large */ - { - int i = 0; - -@@ -100,7 +100,7 @@ void vp8_encode_motion_vector(vp8_writer *w, const MV *mv, const MV_CONTEXT *mvc - static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc) - { - const vp8_prob *p = mvc->prob; -- const int x = v; //v<0? -v:v; -+ const int x = v; - unsigned int cost; - - if (x < mvnum_short) -@@ -132,12 +132,12 @@ static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc) - cost += vp8_cost_bit(p [MVPbits + 3], (x >> 3) & 1); - } - -- return cost; // + vp8_cost_bit( p [MVPsign], v < 0); -+ return cost; /* + vp8_cost_bit( p [MVPsign], v < 0); */ - } - - void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int mvc_flag[2]) - { -- int i = 1; //-mv_max; -+ int i = 1; - unsigned int cost0 = 0; - unsigned int cost1 = 0; - -@@ -151,7 +151,6 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m - - do - { -- //mvcost [0] [i] = cost_mvcomponent( i, &mvc[0]); - cost0 = cost_mvcomponent(i, &mvc[0]); - - mvcost [0] [i] = cost0 + vp8_cost_zero(mvc[0].prob[MVPsign]); -@@ -168,7 +167,6 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m - - do - { -- //mvcost [1] [i] = cost_mvcomponent( i, mvc[1]); - cost1 = cost_mvcomponent(i, &mvc[1]); - - mvcost [1] [i] = cost1 + vp8_cost_zero(mvc[1].prob[MVPsign]); -@@ -179,10 +177,10 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m - } - - --// Motion vector probability table update depends on benefit. --// Small correction allows for the fact that an update to an MV probability --// may have benefit in subsequent frames as well as the current one. -- -+/* Motion vector probability table update depends on benefit. -+ * Small correction allows for the fact that an update to an MV probability -+ * may have benefit in subsequent frames as well as the current one. -+ */ - #define MV_PROB_UPDATE_CORRECTION -1 - - -@@ -254,22 +252,22 @@ static void write_component_probs( - vp8_zero(short_bct) - - -- //j=0 -+ /* j=0 */ - { - const int c = events [mv_max]; - -- is_short_ct [0] += c; // Short vector -- short_ct [0] += c; // Magnitude distribution -+ is_short_ct [0] += c; /* Short vector */ -+ short_ct [0] += c; /* Magnitude distribution */ - } - -- //j: 1 ~ mv_max (1023) -+ /* j: 1 ~ mv_max (1023) */ - { - int j = 1; - - do - { -- const int c1 = events [mv_max + j]; //positive -- const int c2 = events [mv_max - j]; //negative -+ const int c1 = events [mv_max + j]; /* positive */ -+ const int c2 = events [mv_max - j]; /* negative */ - const int c = c1 + c2; - int a = j; - -@@ -278,13 +276,13 @@ static void write_component_probs( - - if (a < mvnum_short) - { -- is_short_ct [0] += c; // Short vector -- short_ct [a] += c; // Magnitude distribution -+ is_short_ct [0] += c; /* Short vector */ -+ short_ct [a] += c; /* Magnitude distribution */ - } - else - { - int k = mvlong_width - 1; -- is_short_ct [1] += c; // Long vector -+ is_short_ct [1] += c; /* Long vector */ - - /* bit 3 not always encoded. */ - do -@@ -296,43 +294,6 @@ static void write_component_probs( - while (++j <= mv_max); - } - -- /* -- { -- int j = -mv_max; -- do -- { -- -- const int c = events [mv_max + j]; -- int a = j; -- -- if( j < 0) -- { -- sign_ct [1] += c; -- a = -j; -- } -- else if( j) -- sign_ct [0] += c; -- -- if( a < mvnum_short) -- { -- is_short_ct [0] += c; // Short vector -- short_ct [a] += c; // Magnitude distribution -- } -- else -- { -- int k = mvlong_width - 1; -- is_short_ct [1] += c; // Long vector -- -- // bit 3 not always encoded. -- -- do -- bit_ct [k] [(a >> k) & 1] += c; -- while( --k >= 0); -- } -- } while( ++j <= mv_max); -- } -- */ -- - calc_prob(Pnew + mvpis_short, is_short_ct); - - calc_prob(Pnew + MVPsign, sign_ct); -@@ -402,10 +363,12 @@ void vp8_write_mvprobs(VP8_COMP *cpi) - active_section = 4; - #endif - write_component_probs( -- w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0], cpi->MVcount[0], 0, &flags[0] -+ w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0], -+ cpi->mb.MVcount[0], 0, &flags[0] - ); - write_component_probs( -- w, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1], cpi->MVcount[1], 1, &flags[1] -+ w, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1], -+ cpi->mb.MVcount[1], 1, &flags[1] - ); - - if (flags[0] || flags[1]) -diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c -index 2a2cb2f..d4b17ce 100644 ---- a/vp8/encoder/ethreading.c -+++ b/vp8/encoder/ethreading.c -@@ -17,12 +17,6 @@ - - #if CONFIG_MULTITHREAD - --extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, -- TOKENEXTRA **t, -- int recon_yoffset, int recon_uvoffset, -- int mb_row, int mb_col); --extern int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, -- TOKENEXTRA **t); - extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip); - - extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); -@@ -39,7 +33,7 @@ static THREAD_FUNCTION thread_loopfilter(void *p_data) - - if (sem_wait(&cpi->h_event_start_lpf) == 0) - { -- if (cpi->b_multi_threaded == 0) // we're shutting down -+ if (cpi->b_multi_threaded == 0) /* we're shutting down */ - break; - - vp8_loopfilter_frame(cpi, cm); -@@ -59,17 +53,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) - MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2); - ENTROPY_CONTEXT_PLANES mb_row_left_context; - -- const int nsync = cpi->mt_sync_range; -- //printf("Started thread %d\n", ithread); -- - while (1) - { - if (cpi->b_multi_threaded == 0) - break; - -- //if(WaitForSingleObject(cpi->h_event_mbrencoding[ithread], INFINITE) == WAIT_OBJECT_0) - if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) - { -+ const int nsync = cpi->mt_sync_range; - VP8_COMMON *cm = &cpi->common; - int mb_row; - MACROBLOCK *x = &mbri->mb; -@@ -83,7 +74,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) - int *segment_counts = mbri->segment_counts; - int *totalrate = &mbri->totalrate; - -- if (cpi->b_multi_threaded == 0) // we're shutting down -+ if (cpi->b_multi_threaded == 0) /* we're shutting down */ - break; - - for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) -@@ -108,7 +99,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) - - last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; - -- // reset above block coeffs -+ /* reset above block coeffs */ - xd->above_context = cm->above_context; - xd->left_context = &mb_row_left_context; - -@@ -118,10 +109,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) - recon_yoffset = (mb_row * recon_y_stride * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8); - -- // Set the mb activity pointer to the start of the row. -+ /* Set the mb activity pointer to the start of the row. */ - x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - -- // for each macroblock col in image -+ /* for each macroblock col in image */ - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - *current_mb_col = mb_col - 1; -@@ -139,14 +130,18 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) - tp = tp_start; - #endif - -- // Distance of Mb to the various image edges. -- // These specified to 8th pel as they are always compared to values that are in 1/8th pel units -+ /* Distance of Mb to the various image edges. -+ * These specified to 8th pel as they are always compared -+ * to values that are in 1/8th pel units -+ */ - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - -- // Set up limit values for motion vectors used to prevent them extending outside the UMV borders -+ /* Set up limit values for motion vectors used to prevent -+ * them extending outside the UMV borders -+ */ - x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); - x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); -@@ -160,17 +155,19 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; - -- //Copy current mb to a buffer -+ /* Copy current mb to a buffer */ - vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp8_activity_masking(cpi, x); - -- // Is segmentation enabled -- // MB level adjustment to quantizer -+ /* Is segmentation enabled */ -+ /* MB level adjustment to quantizer */ - if (xd->segmentation_enabled) - { -- // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) -+ /* Code to set segment id in xd->mbmi.segment_id for -+ * current MB (with range checking) -+ */ - if (cpi->segmentation_map[map_index + mb_col] <= 3) - xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index + mb_col]; - else -@@ -179,7 +176,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) - vp8cx_mb_init_quantizer(cpi, x, 1); - } - else -- xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default -+ /* Set to Segment 0 by default */ -+ xd->mode_info_context->mbmi.segment_id = 0; - - x->active_ptr = cpi->active_map + map_index + mb_col; - -@@ -209,21 +207,28 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) - - #endif - -- // Count of last ref frame 0,0 usage -- if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) -- cpi->inter_zz_count++; -- -- // Special case code for cyclic refresh -- // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode -- // during vp8cx_encode_inter_macroblock()) back into the global segmentation map -- if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled) -+ /* Special case code for cyclic refresh -+ * If cyclic update enabled then copy -+ * xd->mbmi.segment_id; (which may have been updated -+ * based on mode during -+ * vp8cx_encode_inter_macroblock()) back into the -+ * global segmentation map -+ */ -+ if ((cpi->current_layer == 0) && -+ (cpi->cyclic_refresh_mode_enabled && -+ xd->segmentation_enabled)) - { - const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - cpi->segmentation_map[map_index + mb_col] = mbmi->segment_id; - -- // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh): -- // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0) -- // else mark it as dirty (1). -+ /* If the block has been refreshed mark it as clean -+ * (the magnitude of the -ve influences how long it -+ * will be before we consider another refresh): -+ * Else if it was coded (last frame 0,0) and has -+ * not already been refreshed then mark it as a -+ * candidate for cleanup next time (marked 0) else -+ * mark it as dirty (1). -+ */ - if (mbmi->segment_id) - cpi->cyclic_refresh_map[map_index + mb_col] = -1; - else if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) -@@ -246,13 +251,13 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) - #else - cpi->tplist[mb_row].stop = tp; - #endif -- // Increment pointer into gf usage flags structure. -+ /* Increment pointer into gf usage flags structure. */ - x->gf_active_ptr++; - -- // Increment the activity mask pointers. -+ /* Increment the activity mask pointers. */ - x->mb_activity_ptr++; - -- // adjust to the next column of macroblocks -+ /* adjust to the next column of macroblocks */ - x->src.y_buffer += 16; - x->src.u_buffer += 8; - x->src.v_buffer += 8; -@@ -260,10 +265,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) - recon_yoffset += 16; - recon_uvoffset += 8; - -- // Keep track of segment usage -+ /* Keep track of segment usage */ - segment_counts[xd->mode_info_context->mbmi.segment_id]++; - -- // skip to next mb -+ /* skip to next mb */ - xd->mode_info_context++; - x->partition_info++; - xd->above_context++; -@@ -276,7 +281,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) - - *current_mb_col = mb_col + nsync; - -- // this is to account for the border -+ /* this is to account for the border */ - xd->mode_info_context++; - x->partition_info++; - -@@ -296,7 +301,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) - } - } - -- //printf("exit thread %d\n", ithread); -+ /* printf("exit thread %d\n", ithread); */ - return 0; - } - -@@ -336,21 +341,16 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) - z->src.v_buffer = x->src.v_buffer; - */ - -+ z->mvcost[0] = x->mvcost[0]; -+ z->mvcost[1] = x->mvcost[1]; -+ z->mvsadcost[0] = x->mvsadcost[0]; -+ z->mvsadcost[1] = x->mvsadcost[1]; - -- vpx_memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts)); -- z->mvcost[0] = &z->mvcosts[0][mv_max+1]; -- z->mvcost[1] = &z->mvcosts[1][mv_max+1]; -- z->mvsadcost[0] = &z->mvsadcosts[0][mvfp_max+1]; -- z->mvsadcost[1] = &z->mvsadcosts[1][mvfp_max+1]; -- -- -- vpx_memcpy(z->token_costs, x->token_costs, sizeof(x->token_costs)); -- vpx_memcpy(z->inter_bmode_costs, x->inter_bmode_costs, sizeof(x->inter_bmode_costs)); -- //memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts)); -- //memcpy(z->mvcost, x->mvcost, sizeof(x->mvcost)); -- vpx_memcpy(z->mbmode_cost, x->mbmode_cost, sizeof(x->mbmode_cost)); -- vpx_memcpy(z->intra_uv_mode_cost, x->intra_uv_mode_cost, sizeof(x->intra_uv_mode_cost)); -- vpx_memcpy(z->bmode_costs, x->bmode_costs, sizeof(x->bmode_costs)); -+ z->token_costs = x->token_costs; -+ z->inter_bmode_costs = x->inter_bmode_costs; -+ z->mbmode_cost = x->mbmode_cost; -+ z->intra_uv_mode_cost = x->intra_uv_mode_cost; -+ z->bmode_costs = x->bmode_costs; - - for (i = 0; i < 25; i++) - { -@@ -358,17 +358,15 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) - z->block[i].quant_fast = x->block[i].quant_fast; - z->block[i].quant_shift = x->block[i].quant_shift; - z->block[i].zbin = x->block[i].zbin; -- z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; -+ z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; - z->block[i].round = x->block[i].round; -- z->q_index = x->q_index; -- z->act_zbin_adj = x->act_zbin_adj; -- z->last_act_zbin_adj = x->last_act_zbin_adj; -- /* -- z->block[i].src = x->block[i].src; -- */ -- z->block[i].src_stride = x->block[i].src_stride; -+ z->block[i].src_stride = x->block[i].src_stride; - } - -+ z->q_index = x->q_index; -+ z->act_zbin_adj = x->act_zbin_adj; -+ z->last_act_zbin_adj = x->last_act_zbin_adj; -+ - { - MACROBLOCKD *xd = &x->e_mbd; - MACROBLOCKD *zd = &z->e_mbd; -@@ -400,9 +398,11 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) - zd->subpixel_predict16x16 = xd->subpixel_predict16x16; - zd->segmentation_enabled = xd->segmentation_enabled; - zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; -- vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); -+ vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, -+ sizeof(xd->segment_feature_data)); - -- vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); -+ vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, -+ sizeof(xd->dequant_y1_dc)); - vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); - vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); - vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); -@@ -418,13 +418,23 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) - zd->block[i].dequant = zd->dequant_uv; - zd->block[24].dequant = zd->dequant_y2; - #endif -+ -+ -+ vpx_memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes)); -+ vpx_memcpy(z->rd_thresh_mult, x->rd_thresh_mult, -+ sizeof(x->rd_thresh_mult)); -+ -+ z->zbin_over_quant = x->zbin_over_quant; -+ z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled; -+ z->zbin_mode_boost = x->zbin_mode_boost; -+ -+ vpx_memset(z->error_bins, 0, sizeof(z->error_bins)); - } - } - - void vp8cx_init_mbrthread_data(VP8_COMP *cpi, - MACROBLOCK *x, - MB_ROW_COMP *mbr_ei, -- int mb_row, - int count - ) - { -@@ -432,7 +442,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, - VP8_COMMON *const cm = & cpi->common; - MACROBLOCKD *const xd = & x->e_mbd; - int i; -- (void) mb_row; - - for (i = 0; i < count; i++) - { -@@ -465,10 +474,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, - - vp8_build_block_offsets(mb); - -- vp8_setup_block_dptrs(mbd); -- -- vp8_setup_block_ptrs(mb); -- - mbd->left_context = &cm->left_context; - mb->mvc = cm->fc.mvc; - -@@ -477,10 +482,19 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, - mbd->fullpixel_mask = 0xffffffff; - if(cm->full_pixel) - mbd->fullpixel_mask = 0xfffffff8; -+ -+ vp8_zero(mb->coef_counts); -+ vp8_zero(x->ymode_count); -+ mb->skip_true_count = 0; -+ vp8_zero(mb->MVcount); -+ mb->prediction_error = 0; -+ mb->intra_error = 0; -+ vp8_zero(mb->count_mb_ref_frame_usage); -+ mb->mbs_tested_so_far = 0; - } - } - --void vp8cx_create_encoder_threads(VP8_COMP *cpi) -+int vp8cx_create_encoder_threads(VP8_COMP *cpi) - { - const VP8_COMMON * cm = &cpi->common; - -@@ -492,6 +506,7 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) - { - int ithread; - int th_count = cpi->oxcf.multi_threaded - 1; -+ int rc = 0; - - /* don't allocate more threads than cores available */ - if (cpi->oxcf.multi_threaded > cm->processor_core_count) -@@ -505,16 +520,17 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) - } - - if(th_count == 0) -- return; -- -- CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count)); -- CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count)); -- CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); -+ return 0; -+ -+ CHECK_MEM_ERROR(cpi->h_encoding_thread, -+ vpx_malloc(sizeof(pthread_t) * th_count)); -+ CHECK_MEM_ERROR(cpi->h_event_start_encoding, -+ vpx_malloc(sizeof(sem_t) * th_count)); -+ CHECK_MEM_ERROR(cpi->mb_row_ei, -+ vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); - vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); - CHECK_MEM_ERROR(cpi->en_thread_data, - vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count)); -- CHECK_MEM_ERROR(cpi->mt_current_mb_col, -- vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows)); - - sem_init(&cpi->h_event_end_encoding, 0, 0); - -@@ -528,16 +544,45 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) - - for (ithread = 0; ithread < th_count; ithread++) - { -- ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread]; -+ ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread]; -+ -+ /* Setup block ptrs and offsets */ -+ vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb); -+ vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd); - - sem_init(&cpi->h_event_start_encoding[ithread], 0, 0); -+ - ethd->ithread = ithread; - ethd->ptr1 = (void *)cpi; - ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread]; - -- pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd); -+ rc = pthread_create(&cpi->h_encoding_thread[ithread], 0, -+ thread_encoding_proc, ethd); -+ if(rc) -+ break; - } - -+ if(rc) -+ { -+ /* shutdown other threads */ -+ cpi->b_multi_threaded = 0; -+ for(--ithread; ithread >= 0; ithread--) -+ { -+ pthread_join(cpi->h_encoding_thread[ithread], 0); -+ sem_destroy(&cpi->h_event_start_encoding[ithread]); -+ } -+ sem_destroy(&cpi->h_event_end_encoding); -+ -+ /* free thread related resources */ -+ vpx_free(cpi->h_event_start_encoding); -+ vpx_free(cpi->h_encoding_thread); -+ vpx_free(cpi->mb_row_ei); -+ vpx_free(cpi->en_thread_data); -+ -+ return -1; -+ } -+ -+ - { - LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data; - -@@ -545,24 +590,47 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) - sem_init(&cpi->h_event_end_lpf, 0, 0); - - lpfthd->ptr1 = (void *)cpi; -- pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, lpfthd); -+ rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, -+ lpfthd); -+ -+ if(rc) -+ { -+ /* shutdown other threads */ -+ cpi->b_multi_threaded = 0; -+ for(--ithread; ithread >= 0; ithread--) -+ { -+ sem_post(&cpi->h_event_start_encoding[ithread]); -+ pthread_join(cpi->h_encoding_thread[ithread], 0); -+ sem_destroy(&cpi->h_event_start_encoding[ithread]); -+ } -+ sem_destroy(&cpi->h_event_end_encoding); -+ sem_destroy(&cpi->h_event_end_lpf); -+ sem_destroy(&cpi->h_event_start_lpf); -+ -+ /* free thread related resources */ -+ vpx_free(cpi->h_event_start_encoding); -+ vpx_free(cpi->h_encoding_thread); -+ vpx_free(cpi->mb_row_ei); -+ vpx_free(cpi->en_thread_data); -+ -+ return -2; -+ } - } - } -- -+ return 0; - } - - void vp8cx_remove_encoder_threads(VP8_COMP *cpi) - { - if (cpi->b_multi_threaded) - { -- //shutdown other threads -+ /* shutdown other threads */ - cpi->b_multi_threaded = 0; - { - int i; - - for (i = 0; i < cpi->encoding_thread_count; i++) - { -- //SetEvent(cpi->h_event_mbrencoding[i]); - sem_post(&cpi->h_event_start_encoding[i]); - pthread_join(cpi->h_encoding_thread[i], 0); - -@@ -577,12 +645,11 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi) - sem_destroy(&cpi->h_event_end_lpf); - sem_destroy(&cpi->h_event_start_lpf); - -- //free thread related resources -+ /* free thread related resources */ - vpx_free(cpi->h_event_start_encoding); - vpx_free(cpi->h_encoding_thread); - vpx_free(cpi->mb_row_ei); - vpx_free(cpi->en_thread_data); -- vpx_free(cpi->mt_current_mb_col); - } - } - #endif -diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c -index 8de1a6a..30bf8a6 100644 ---- a/vp8/encoder/firstpass.c -+++ b/vp8/encoder/firstpass.c -@@ -30,14 +30,12 @@ - #include "encodemv.h" - #include "encodeframe.h" - --//#define OUTPUT_FPF 1 -+/* #define OUTPUT_FPF 1 */ - - extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi); - extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv); - extern void vp8_alloc_compressor_data(VP8_COMP *cpi); - --//#define GFQ_ADJUSTMENT (40 + ((15*Q)/10)) --//#define GFQ_ADJUSTMENT (80 + ((15*Q)/10)) - #define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q] - extern int vp8_kf_boost_qadjustment[QINDEX_RANGE]; - -@@ -77,7 +75,9 @@ static const int cq_level[QINDEX_RANGE] = - - static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame); - --// Resets the first pass file to the given position using a relative seek from the current position -+/* Resets the first pass file to the given position using a relative seek -+ * from the current position -+ */ - static void reset_fpf_position(VP8_COMP *cpi, FIRSTPASS_STATS *Position) - { - cpi->twopass.stats_in = Position; -@@ -92,14 +92,14 @@ static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame) - return 1; - } - --// Read frame stats at an offset from the current position -+/* Read frame stats at an offset from the current position */ - static int read_frame_stats( VP8_COMP *cpi, - FIRSTPASS_STATS *frame_stats, - int offset ) - { - FIRSTPASS_STATS * fps_ptr = cpi->twopass.stats_in; - -- // Check legality of offset -+ /* Check legality of offset */ - if ( offset >= 0 ) - { - if ( &fps_ptr[offset] >= cpi->twopass.stats_in_end ) -@@ -136,7 +136,7 @@ static void output_stats(const VP8_COMP *cpi, - pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS); - vpx_codec_pkt_list_add(pktlist, &pkt); - --// TEMP debug code -+/* TEMP debug code */ - #if OUTPUT_FPF - - { -@@ -257,7 +257,9 @@ static void avg_stats(FIRSTPASS_STATS *section) - section->duration /= section->count; - } - --// Calculate a modified Error used in distributing bits between easier and harder frames -+/* Calculate a modified Error used in distributing bits between easier -+ * and harder frames -+ */ - static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - { - double av_err = ( cpi->twopass.total_stats.ssim_weighted_pred_err / -@@ -315,7 +317,9 @@ static double simple_weight(YV12_BUFFER_CONFIG *source) - unsigned char *src = source->y_buffer; - double sum_weights = 0.0; - -- // Loop throught the Y plane raw examining levels and creating a weight for the image -+ /* Loop throught the Y plane raw examining levels and creating a weight -+ * for the image -+ */ - i = source->y_height; - do - { -@@ -335,41 +339,52 @@ static double simple_weight(YV12_BUFFER_CONFIG *source) - } - - --// This function returns the current per frame maximum bitrate target -+/* This function returns the current per frame maximum bitrate target */ - static int frame_max_bits(VP8_COMP *cpi) - { -- // Max allocation for a single frame based on the max section guidelines passed in and how many bits are left -+ /* Max allocation for a single frame based on the max section guidelines -+ * passed in and how many bits are left -+ */ - int max_bits; - -- // For CBR we need to also consider buffer fullness. -- // If we are running below the optimal level then we need to gradually tighten up on max_bits. -+ /* For CBR we need to also consider buffer fullness. -+ * If we are running below the optimal level then we need to gradually -+ * tighten up on max_bits. -+ */ - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) - { - double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level); - -- // For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user -+ /* For CBR base this on the target average bits per frame plus the -+ * maximum sedction rate passed in by the user -+ */ - max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); - -- // If our buffer is below the optimum level -+ /* If our buffer is below the optimum level */ - if (buffer_fullness_ratio < 1.0) - { -- // The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4. -+ /* The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4. */ - int min_max_bits = ((cpi->av_per_frame_bandwidth >> 2) < (max_bits >> 2)) ? cpi->av_per_frame_bandwidth >> 2 : max_bits >> 2; - - max_bits = (int)(max_bits * buffer_fullness_ratio); - -+ /* Lowest value we will set ... which should allow the buffer to -+ * refill. -+ */ - if (max_bits < min_max_bits) -- max_bits = min_max_bits; // Lowest value we will set ... which should allow the buffer to refil. -+ max_bits = min_max_bits; - } - } -- // VBR -+ /* VBR */ - else - { -- // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user -+ /* For VBR base this on the bits and frames left plus the -+ * two_pass_vbrmax_section rate passed in by the user -+ */ - max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats.count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); - } - -- // Trap case where we are out of bits -+ /* Trap case where we are out of bits */ - if (max_bits < 0) - max_bits = 0; - -@@ -403,13 +418,13 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, - unsigned char *ref_ptr; - int ref_stride = x->e_mbd.pre.y_stride; - -- // Set up pointers for this macro block raw buffer -+ /* Set up pointers for this macro block raw buffer */ - raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset - + d->offset); - vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride, - (unsigned int *)(raw_motion_err)); - -- // Set up pointers for this macro block recon buffer -+ /* Set up pointers for this macro block recon buffer */ - xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; - ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset ); - vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride, -@@ -430,19 +445,19 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, - int_mv ref_mv_full; - - int tmp_err; -- int step_param = 3; //3; // Dont search over full range for first pass -- int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; //3; -+ int step_param = 3; /* Dont search over full range for first pass */ -+ int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; - int n; - vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; - int new_mv_mode_penalty = 256; - -- // override the default variance function to use MSE -+ /* override the default variance function to use MSE */ - v_fn_ptr.vf = vp8_mse16x16; - -- // Set up pointers for this macro block recon buffer -+ /* Set up pointers for this macro block recon buffer */ - xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; - -- // Initial step/diamond search centred on best mv -+ /* Initial step/diamond search centred on best mv */ - tmp_mv.as_int = 0; - ref_mv_full.as_mv.col = ref_mv->as_mv.col>>3; - ref_mv_full.as_mv.row = ref_mv->as_mv.row>>3; -@@ -459,7 +474,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, - best_mv->col = tmp_mv.as_mv.col; - } - -- // Further step/diamond searches as necessary -+ /* Further step/diamond searches as necessary */ - n = num00; - num00 = 0; - -@@ -520,7 +535,7 @@ void vp8_first_pass(VP8_COMP *cpi) - - zero_ref_mv.as_int = 0; - -- vp8_clear_system_state(); //__asm emms; -+ vp8_clear_system_state(); - - x->src = * cpi->Source; - xd->pre = *lst_yv12; -@@ -530,44 +545,55 @@ void vp8_first_pass(VP8_COMP *cpi) - - xd->mode_info_context = cm->mi; - -- vp8_build_block_offsets(x); -- -- vp8_setup_block_dptrs(&x->e_mbd); -+ if(!cm->use_bilinear_mc_filter) -+ { -+ xd->subpixel_predict = vp8_sixtap_predict4x4; -+ xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; -+ xd->subpixel_predict8x8 = vp8_sixtap_predict8x8; -+ xd->subpixel_predict16x16 = vp8_sixtap_predict16x16; -+ } -+ else -+ { -+ xd->subpixel_predict = vp8_bilinear_predict4x4; -+ xd->subpixel_predict8x4 = vp8_bilinear_predict8x4; -+ xd->subpixel_predict8x8 = vp8_bilinear_predict8x8; -+ xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; -+ } - -- vp8_setup_block_ptrs(x); -+ vp8_build_block_offsets(x); - -- // set up frame new frame for intra coded blocks -+ /* set up frame new frame for intra coded blocks */ - vp8_setup_intra_recon(new_yv12); - vp8cx_frame_init_quantizer(cpi); - -- // Initialise the MV cost table to the defaults -- //if( cm->current_video_frame == 0) -- //if ( 0 ) -+ /* Initialise the MV cost table to the defaults */ - { - int flag[2] = {1, 1}; -- vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); -+ vp8_initialize_rd_consts(cpi, x, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); - vpx_memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); - vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag); - } - -- // for each macroblock row in image -+ /* for each macroblock row in image */ - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) - { - int_mv best_ref_mv; - - best_ref_mv.as_int = 0; - -- // reset above block coeffs -+ /* reset above block coeffs */ - xd->up_available = (mb_row != 0); - recon_yoffset = (mb_row * recon_y_stride * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8); - -- // Set up limit values for motion vectors to prevent them extending outside the UMV borders -+ /* Set up limit values for motion vectors to prevent them extending -+ * outside the UMV borders -+ */ - x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); - - -- // for each macroblock col in image -+ /* for each macroblock col in image */ - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - int this_error; -@@ -579,26 +605,33 @@ void vp8_first_pass(VP8_COMP *cpi) - xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset; - xd->left_available = (mb_col != 0); - -- //Copy current mb to a buffer -+ /* Copy current mb to a buffer */ - vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); - -- // do intra 16x16 prediction -+ /* do intra 16x16 prediction */ - this_error = vp8_encode_intra(cpi, x, use_dc_pred); - -- // "intrapenalty" below deals with situations where the intra and inter error scores are very low (eg a plain black frame) -- // We do not have special cases in first pass for 0,0 and nearest etc so all inter modes carry an overhead cost estimate fot the mv. -- // When the error score is very low this causes us to pick all or lots of INTRA modes and throw lots of key frames. -- // This penalty adds a cost matching that of a 0,0 mv to the intra case. -+ /* "intrapenalty" below deals with situations where the intra -+ * and inter error scores are very low (eg a plain black frame) -+ * We do not have special cases in first pass for 0,0 and -+ * nearest etc so all inter modes carry an overhead cost -+ * estimate fot the mv. When the error score is very low this -+ * causes us to pick all or lots of INTRA modes and throw lots -+ * of key frames. This penalty adds a cost matching that of a -+ * 0,0 mv to the intra case. -+ */ - this_error += intrapenalty; - -- // Cumulative intra error total -+ /* Cumulative intra error total */ - intra_error += (int64_t)this_error; - -- // Set up limit values for motion vectors to prevent them extending outside the UMV borders -+ /* Set up limit values for motion vectors to prevent them -+ * extending outside the UMV borders -+ */ - x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); - -- // Other than for the first frame do a motion search -+ /* Other than for the first frame do a motion search */ - if (cm->current_video_frame > 0) - { - BLOCKD *d = &x->e_mbd.block[0]; -@@ -607,7 +640,7 @@ void vp8_first_pass(VP8_COMP *cpi) - int motion_error = INT_MAX; - int raw_motion_error = INT_MAX; - -- // Simple 0,0 motion with no mv overhead -+ /* Simple 0,0 motion with no mv overhead */ - zz_motion_search( cpi, x, cpi->last_frame_unscaled_source, - &raw_motion_error, lst_yv12, &motion_error, - recon_yoffset ); -@@ -617,13 +650,16 @@ void vp8_first_pass(VP8_COMP *cpi) - if (raw_motion_error < cpi->oxcf.encode_breakout) - goto skip_motion_search; - -- // Test last reference frame using the previous best mv as the -- // starting point (best reference) for the search -+ /* Test last reference frame using the previous best mv as the -+ * starting point (best reference) for the search -+ */ - first_pass_motion_search(cpi, x, &best_ref_mv, - &d->bmi.mv.as_mv, lst_yv12, - &motion_error, recon_yoffset); - -- // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well -+ /* If the current best reference mv is not centred on 0,0 -+ * then do a 0,0 based search as well -+ */ - if (best_ref_mv.as_int) - { - tmp_err = INT_MAX; -@@ -638,7 +674,9 @@ void vp8_first_pass(VP8_COMP *cpi) - } - } - -- // Experimental search in a second reference frame ((0,0) based only) -+ /* Experimental search in a second reference frame ((0,0) -+ * based only) -+ */ - if (cm->current_video_frame > 1) - { - first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset); -@@ -646,19 +684,9 @@ void vp8_first_pass(VP8_COMP *cpi) - if ((gf_motion_error < motion_error) && (gf_motion_error < this_error)) - { - second_ref_count++; -- //motion_error = gf_motion_error; -- //d->bmi.mv.as_mv.row = tmp_mv.row; -- //d->bmi.mv.as_mv.col = tmp_mv.col; - } -- /*else -- { -- xd->pre.y_buffer = cm->last_frame.y_buffer + recon_yoffset; -- xd->pre.u_buffer = cm->last_frame.u_buffer + recon_uvoffset; -- xd->pre.v_buffer = cm->last_frame.v_buffer + recon_uvoffset; -- }*/ -- - -- // Reset to last frame as reference buffer -+ /* Reset to last frame as reference buffer */ - xd->pre.y_buffer = lst_yv12->y_buffer + recon_yoffset; - xd->pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset; - xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; -@@ -670,10 +698,11 @@ skip_motion_search: - - if (motion_error <= this_error) - { -- // Keep a count of cases where the inter and intra were -- // very close and very low. This helps with scene cut -- // detection for example in cropped clips with black bars -- // at the sides or top and bottom. -+ /* Keep a count of cases where the inter and intra were -+ * very close and very low. This helps with scene cut -+ * detection for example in cropped clips with black bars -+ * at the sides or top and bottom. -+ */ - if( (((this_error-intrapenalty) * 9) <= - (motion_error*10)) && - (this_error < (2*intrapenalty)) ) -@@ -696,17 +725,17 @@ skip_motion_search: - - best_ref_mv.as_int = d->bmi.mv.as_int; - -- // Was the vector non-zero -+ /* Was the vector non-zero */ - if (d->bmi.mv.as_int) - { - mvcount++; - -- // Was it different from the last non zero vector -+ /* Was it different from the last non zero vector */ - if ( d->bmi.mv.as_int != lastmv_as_int ) - new_mv_count++; - lastmv_as_int = d->bmi.mv.as_int; - -- // Does the Row vector point inwards or outwards -+ /* Does the Row vector point inwards or outwards */ - if (mb_row < cm->mb_rows / 2) - { - if (d->bmi.mv.as_mv.row > 0) -@@ -722,7 +751,7 @@ skip_motion_search: - sum_in_vectors--; - } - -- // Does the Row vector point inwards or outwards -+ /* Does the Row vector point inwards or outwards */ - if (mb_col < cm->mb_cols / 2) - { - if (d->bmi.mv.as_mv.col > 0) -@@ -743,7 +772,7 @@ skip_motion_search: - - coded_error += (int64_t)this_error; - -- // adjust to the next column of macroblocks -+ /* adjust to the next column of macroblocks */ - x->src.y_buffer += 16; - x->src.u_buffer += 8; - x->src.v_buffer += 8; -@@ -752,25 +781,25 @@ skip_motion_search: - recon_uvoffset += 8; - } - -- // adjust to the next row of mbs -+ /* adjust to the next row of mbs */ - x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; - x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; - x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; - -- //extend the recon for intra prediction -+ /* extend the recon for intra prediction */ - vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); -- vp8_clear_system_state(); //__asm emms; -+ vp8_clear_system_state(); - } - -- vp8_clear_system_state(); //__asm emms; -+ vp8_clear_system_state(); - { - double weight = 0.0; - - FIRSTPASS_STATS fps; - - fps.frame = cm->current_video_frame ; -- fps.intra_error = intra_error >> 8; -- fps.coded_error = coded_error >> 8; -+ fps.intra_error = (double)(intra_error >> 8); -+ fps.coded_error = (double)(coded_error >> 8); - weight = simple_weight(cpi->Source); - - -@@ -809,12 +838,13 @@ skip_motion_search: - fps.pcnt_motion = 1.0 * (double)mvcount / cpi->common.MBs; - } - -- // TODO: handle the case when duration is set to 0, or something less -- // than the full time between subsequent cpi->source_time_stamp s . -- fps.duration = cpi->source->ts_end -- - cpi->source->ts_start; -+ /* TODO: handle the case when duration is set to 0, or something less -+ * than the full time between subsequent cpi->source_time_stamps -+ */ -+ fps.duration = (double)(cpi->source->ts_end -+ - cpi->source->ts_start); - -- // don't want to do output stats with a stack variable! -+ /* don't want to do output stats with a stack variable! */ - memcpy(&cpi->twopass.this_frame_stats, - &fps, - sizeof(FIRSTPASS_STATS)); -@@ -822,7 +852,9 @@ skip_motion_search: - accumulate_stats(&cpi->twopass.total_stats, &fps); - } - -- // Copy the previous Last Frame into the GF buffer if specific conditions for doing so are met -+ /* Copy the previous Last Frame into the GF buffer if specific -+ * conditions for doing so are met -+ */ - if ((cm->current_video_frame > 0) && - (cpi->twopass.this_frame_stats.pcnt_inter > 0.20) && - ((cpi->twopass.this_frame_stats.intra_error / cpi->twopass.this_frame_stats.coded_error) > 2.0)) -@@ -830,18 +862,22 @@ skip_motion_search: - vp8_yv12_copy_frame(lst_yv12, gld_yv12); - } - -- // swap frame pointers so last frame refers to the frame we just compressed -+ /* swap frame pointers so last frame refers to the frame we just -+ * compressed -+ */ - vp8_swap_yv12_buffer(lst_yv12, new_yv12); - vp8_yv12_extend_frame_borders(lst_yv12); - -- // Special case for the first frame. Copy into the GF buffer as a second reference. -+ /* Special case for the first frame. Copy into the GF buffer as a -+ * second reference. -+ */ - if (cm->current_video_frame == 0) - { - vp8_yv12_copy_frame(lst_yv12, gld_yv12); - } - - -- // use this to see what the first pass reconstruction looks like -+ /* use this to see what the first pass reconstruction looks like */ - if (0) - { - char filename[512]; -@@ -853,7 +889,8 @@ skip_motion_search: - else - recon_file = fopen(filename, "ab"); - -- if(fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file)); -+ (void) fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, -+ recon_file); - fclose(recon_file); - } - -@@ -862,11 +899,10 @@ skip_motion_search: - } - extern const int vp8_bits_per_mb[2][QINDEX_RANGE]; - --// Estimate a cost per mb attributable to overheads such as the coding of --// modes and motion vectors. --// Currently simplistic in its assumptions for testing. --// -- -+/* Estimate a cost per mb attributable to overheads such as the coding of -+ * modes and motion vectors. -+ * Currently simplistic in its assumptions for testing. -+ */ - - static double bitcost( double prob ) - { -@@ -890,12 +926,14 @@ static int64_t estimate_modemvcost(VP8_COMP *cpi, - motion_cost = bitcost(av_pct_motion); - intra_cost = bitcost(av_intra); - -- // Estimate of extra bits per mv overhead for mbs -- // << 9 is the normalization to the (bits * 512) used in vp8_bits_per_mb -+ /* Estimate of extra bits per mv overhead for mbs -+ * << 9 is the normalization to the (bits * 512) used in vp8_bits_per_mb -+ */ - mv_cost = ((int)(fpstats->new_mv_count / fpstats->count) * 8) << 9; - -- // Crude estimate of overhead cost from modes -- // << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb -+ /* Crude estimate of overhead cost from modes -+ * << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb -+ */ - mode_cost = - (int)( ( ((av_pct_inter - av_pct_motion) * zz_cost) + - (av_pct_motion * motion_cost) + -@@ -914,17 +952,17 @@ static double calc_correction_factor( double err_per_mb, - double error_term = err_per_mb / err_devisor; - double correction_factor; - -- // Adjustment based on Q to power term. -+ /* Adjustment based on Q to power term. */ - power_term = pt_low + (Q * 0.01); - power_term = (power_term > pt_high) ? pt_high : power_term; - -- // Adjustments to error term -- // TBD -+ /* Adjustments to error term */ -+ /* TBD */ - -- // Calculate correction factor -+ /* Calculate correction factor */ - correction_factor = pow(error_term, power_term); - -- // Clip range -+ /* Clip range */ - correction_factor = - (correction_factor < 0.05) - ? 0.05 : (correction_factor > 5.0) ? 5.0 : correction_factor; -@@ -948,15 +986,16 @@ static int estimate_max_q(VP8_COMP *cpi, - int overhead_bits_per_mb; - - if (section_target_bandwitdh <= 0) -- return cpi->twopass.maxq_max_limit; // Highest value allowed -+ return cpi->twopass.maxq_max_limit; /* Highest value allowed */ - - target_norm_bits_per_mb = - (section_target_bandwitdh < (1 << 20)) - ? (512 * section_target_bandwitdh) / num_mbs - : 512 * (section_target_bandwitdh / num_mbs); - -- // Calculate a corrective factor based on a rolling ratio of bits spent -- // vs target bits -+ /* Calculate a corrective factor based on a rolling ratio of bits spent -+ * vs target bits -+ */ - if ((cpi->rolling_target_bits > 0) && - (cpi->active_worst_quality < cpi->worst_quality)) - { -@@ -977,8 +1016,9 @@ static int estimate_max_q(VP8_COMP *cpi, - ? 10.0 : cpi->twopass.est_max_qcorrection_factor; - } - -- // Corrections for higher compression speed settings -- // (reduced compression expected) -+ /* Corrections for higher compression speed settings -+ * (reduced compression expected) -+ */ - if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) - { - if (cpi->oxcf.cpu_used <= 5) -@@ -987,18 +1027,20 @@ static int estimate_max_q(VP8_COMP *cpi, - speed_correction = 1.25; - } - -- // Estimate of overhead bits per mb -- // Correction to overhead bits for min allowed Q. -+ /* Estimate of overhead bits per mb */ -+ /* Correction to overhead bits for min allowed Q. */ - overhead_bits_per_mb = overhead_bits / num_mbs; -- overhead_bits_per_mb *= pow( 0.98, (double)cpi->twopass.maxq_min_limit ); -+ overhead_bits_per_mb = (int)(overhead_bits_per_mb * -+ pow( 0.98, (double)cpi->twopass.maxq_min_limit )); - -- // Try and pick a max Q that will be high enough to encode the -- // content at the given rate. -+ /* Try and pick a max Q that will be high enough to encode the -+ * content at the given rate. -+ */ - for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; Q++) - { - int bits_per_mb_at_this_q; - -- // Error per MB based correction factor -+ /* Error per MB based correction factor */ - err_correction_factor = - calc_correction_factor(err_per_mb, 150.0, 0.40, 0.90, Q); - -@@ -1010,27 +1052,29 @@ static int estimate_max_q(VP8_COMP *cpi, - * cpi->twopass.section_max_qfactor - * (double)bits_per_mb_at_this_q); - -- // Mode and motion overhead -- // As Q rises in real encode loop rd code will force overhead down -- // We make a crude adjustment for this here as *.98 per Q step. -+ /* Mode and motion overhead */ -+ /* As Q rises in real encode loop rd code will force overhead down -+ * We make a crude adjustment for this here as *.98 per Q step. -+ */ - overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98); - - if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) - break; - } - -- // Restriction on active max q for constrained quality mode. -+ /* Restriction on active max q for constrained quality mode. */ - if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && - (Q < cpi->cq_target_quality) ) - { - Q = cpi->cq_target_quality; - } - -- // Adjust maxq_min_limit and maxq_max_limit limits based on -- // averaga q observed in clip for non kf/gf.arf frames -- // Give average a chance to settle though. -+ /* Adjust maxq_min_limit and maxq_max_limit limits based on -+ * average q observed in clip for non kf/gf.arf frames -+ * Give average a chance to settle though. -+ */ - if ( (cpi->ni_frames > -- ((unsigned int)cpi->twopass.total_stats.count >> 8)) && -+ ((int)cpi->twopass.total_stats.count >> 8)) && - (cpi->ni_frames > 150) ) - { - cpi->twopass.maxq_max_limit = ((cpi->ni_av_qi + 32) < cpi->worst_quality) -@@ -1042,8 +1086,9 @@ static int estimate_max_q(VP8_COMP *cpi, - return Q; - } - --// For cq mode estimate a cq level that matches the observed --// complexity and data rate. -+/* For cq mode estimate a cq level that matches the observed -+ * complexity and data rate. -+ */ - static int estimate_cq( VP8_COMP *cpi, - FIRSTPASS_STATS * fpstats, - int section_target_bandwitdh, -@@ -1072,11 +1117,12 @@ static int estimate_cq( VP8_COMP *cpi, - ? (512 * section_target_bandwitdh) / num_mbs - : 512 * (section_target_bandwitdh / num_mbs); - -- // Estimate of overhead bits per mb -+ /* Estimate of overhead bits per mb */ - overhead_bits_per_mb = overhead_bits / num_mbs; - -- // Corrections for higher compression speed settings -- // (reduced compression expected) -+ /* Corrections for higher compression speed settings -+ * (reduced compression expected) -+ */ - if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) - { - if (cpi->oxcf.cpu_used <= 5) -@@ -1085,19 +1131,19 @@ static int estimate_cq( VP8_COMP *cpi, - speed_correction = 1.25; - } - -- // II ratio correction factor for clip as a whole -+ /* II ratio correction factor for clip as a whole */ - clip_iiratio = cpi->twopass.total_stats.intra_error / - DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.coded_error); - clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025); - if (clip_iifactor < 0.80) - clip_iifactor = 0.80; - -- // Try and pick a Q that can encode the content at the given rate. -+ /* Try and pick a Q that can encode the content at the given rate. */ - for (Q = 0; Q < MAXQ; Q++) - { - int bits_per_mb_at_this_q; - -- // Error per MB based correction factor -+ /* Error per MB based correction factor */ - err_correction_factor = - calc_correction_factor(err_per_mb, 100.0, 0.40, 0.90, Q); - -@@ -1110,16 +1156,17 @@ static int estimate_cq( VP8_COMP *cpi, - clip_iifactor * - (double)bits_per_mb_at_this_q); - -- // Mode and motion overhead -- // As Q rises in real encode loop rd code will force overhead down -- // We make a crude adjustment for this here as *.98 per Q step. -+ /* Mode and motion overhead */ -+ /* As Q rises in real encode loop rd code will force overhead down -+ * We make a crude adjustment for this here as *.98 per Q step. -+ */ - overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98); - - if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) - break; - } - -- // Clip value to range "best allowed to (worst allowed - 1)" -+ /* Clip value to range "best allowed to (worst allowed - 1)" */ - Q = cq_level[Q]; - if ( Q >= cpi->worst_quality ) - Q = cpi->worst_quality - 1; -@@ -1141,7 +1188,9 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band - - target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs); - -- // Corrections for higher compression speed settings (reduced compression expected) -+ /* Corrections for higher compression speed settings -+ * (reduced compression expected) -+ */ - if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) - { - if (cpi->oxcf.cpu_used <= 5) -@@ -1150,12 +1199,12 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band - speed_correction = 1.25; - } - -- // Try and pick a Q that can encode the content at the given rate. -+ /* Try and pick a Q that can encode the content at the given rate. */ - for (Q = 0; Q < MAXQ; Q++) - { - int bits_per_mb_at_this_q; - -- // Error per MB based correction factor -+ /* Error per MB based correction factor */ - err_correction_factor = - calc_correction_factor(err_per_mb, 150.0, 0.40, 0.90, Q); - -@@ -1172,7 +1221,7 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band - return Q; - } - --// Estimate a worst case Q for a KF group -+/* Estimate a worst case Q for a KF group */ - static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, double group_iiratio) - { - int Q; -@@ -1192,12 +1241,14 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta - - double combined_correction_factor; - -- // Trap special case where the target is <= 0 -+ /* Trap special case where the target is <= 0 */ - if (target_norm_bits_per_mb <= 0) - return MAXQ * 2; - -- // Calculate a corrective factor based on a rolling ratio of bits spent vs target bits -- // This is clamped to the range 0.1 to 10.0 -+ /* Calculate a corrective factor based on a rolling ratio of bits spent -+ * vs target bits -+ * This is clamped to the range 0.1 to 10.0 -+ */ - if (cpi->long_rolling_target_bits <= 0) - current_spend_ratio = 10.0; - else -@@ -1206,14 +1257,19 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta - current_spend_ratio = (current_spend_ratio > 10.0) ? 10.0 : (current_spend_ratio < 0.1) ? 0.1 : current_spend_ratio; - } - -- // Calculate a correction factor based on the quality of prediction in the sequence as indicated by intra_inter error score ratio (IIRatio) -- // The idea here is to favour subsampling in the hardest sections vs the easyest. -+ /* Calculate a correction factor based on the quality of prediction in -+ * the sequence as indicated by intra_inter error score ratio (IIRatio) -+ * The idea here is to favour subsampling in the hardest sections vs -+ * the easyest. -+ */ - iiratio_correction_factor = 1.0 - ((group_iiratio - 6.0) * 0.1); - - if (iiratio_correction_factor < 0.5) - iiratio_correction_factor = 0.5; - -- // Corrections for higher compression speed settings (reduced compression expected) -+ /* Corrections for higher compression speed settings -+ * (reduced compression expected) -+ */ - if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) - { - if (cpi->oxcf.cpu_used <= 5) -@@ -1222,13 +1278,15 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta - speed_correction = 1.25; - } - -- // Combine the various factors calculated above -+ /* Combine the various factors calculated above */ - combined_correction_factor = speed_correction * iiratio_correction_factor * current_spend_ratio; - -- // Try and pick a Q that should be high enough to encode the content at the given rate. -+ /* Try and pick a Q that should be high enough to encode the content at -+ * the given rate. -+ */ - for (Q = 0; Q < MAXQ; Q++) - { -- // Error per MB based correction factor -+ /* Error per MB based correction factor */ - err_correction_factor = - calc_correction_factor(err_per_mb, 150.0, pow_lowq, pow_highq, Q); - -@@ -1241,7 +1299,9 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta - break; - } - -- // If we could not hit the target even at Max Q then estimate what Q would have bee required -+ /* If we could not hit the target even at Max Q then estimate what Q -+ * would have been required -+ */ - while ((bits_per_mb_at_this_q > target_norm_bits_per_mb) && (Q < (MAXQ * 2))) - { - -@@ -1280,30 +1340,34 @@ void vp8_init_second_pass(VP8_COMP *cpi) - cpi->twopass.total_stats = *cpi->twopass.stats_in_end; - cpi->twopass.total_left_stats = cpi->twopass.total_stats; - -- // each frame can have a different duration, as the frame rate in the source -- // isn't guaranteed to be constant. The frame rate prior to the first frame -- // encoded in the second pass is a guess. However the sum duration is not. -- // Its calculated based on the actual durations of all frames from the first -- // pass. -+ /* each frame can have a different duration, as the frame rate in the -+ * source isn't guaranteed to be constant. The frame rate prior to -+ * the first frame encoded in the second pass is a guess. However the -+ * sum duration is not. Its calculated based on the actual durations of -+ * all frames from the first pass. -+ */ - vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration); - - cpi->output_frame_rate = cpi->frame_rate; - cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ; - cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0); - -- // Calculate a minimum intra value to be used in determining the IIratio -- // scores used in the second pass. We have this minimum to make sure -- // that clips that are static but "low complexity" in the intra domain -- // are still boosted appropriately for KF/GF/ARF -+ /* Calculate a minimum intra value to be used in determining the IIratio -+ * scores used in the second pass. We have this minimum to make sure -+ * that clips that are static but "low complexity" in the intra domain -+ * are still boosted appropriately for KF/GF/ARF -+ */ - cpi->twopass.kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; - cpi->twopass.gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; - -- // Scan the first pass file and calculate an average Intra / Inter error score ratio for the sequence -+ /* Scan the first pass file and calculate an average Intra / Inter error -+ * score ratio for the sequence -+ */ - { - double sum_iiratio = 0.0; - double IIRatio; - -- start_pos = cpi->twopass.stats_in; // Note starting "file" position -+ start_pos = cpi->twopass.stats_in; /* Note starting "file" position */ - - while (input_stats(cpi, &this_frame) != EOF) - { -@@ -1314,14 +1378,15 @@ void vp8_init_second_pass(VP8_COMP *cpi) - - cpi->twopass.avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats.count); - -- // Reset file position -+ /* Reset file position */ - reset_fpf_position(cpi, start_pos); - } - -- // Scan the first pass file and calculate a modified total error based upon the bias/power function -- // used to allocate bits -+ /* Scan the first pass file and calculate a modified total error based -+ * upon the bias/power function used to allocate bits -+ */ - { -- start_pos = cpi->twopass.stats_in; // Note starting "file" position -+ start_pos = cpi->twopass.stats_in; /* Note starting "file" position */ - - cpi->twopass.modified_error_total = 0.0; - cpi->twopass.modified_error_used = 0.0; -@@ -1332,7 +1397,7 @@ void vp8_init_second_pass(VP8_COMP *cpi) - } - cpi->twopass.modified_error_left = cpi->twopass.modified_error_total; - -- reset_fpf_position(cpi, start_pos); // Reset file position -+ reset_fpf_position(cpi, start_pos); /* Reset file position */ - - } - } -@@ -1341,23 +1406,24 @@ void vp8_end_second_pass(VP8_COMP *cpi) - { - } - --// This function gives and estimate of how badly we believe --// the prediction quality is decaying from frame to frame. -+/* This function gives and estimate of how badly we believe the prediction -+ * quality is decaying from frame to frame. -+ */ - static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame) - { - double prediction_decay_rate; - double motion_decay; - double motion_pct = next_frame->pcnt_motion; - -- // Initial basis is the % mbs inter coded -+ /* Initial basis is the % mbs inter coded */ - prediction_decay_rate = next_frame->pcnt_inter; - -- // High % motion -> somewhat higher decay rate -+ /* High % motion -> somewhat higher decay rate */ - motion_decay = (1.0 - (motion_pct / 20.0)); - if (motion_decay < prediction_decay_rate) - prediction_decay_rate = motion_decay; - -- // Adjustment to decay rate based on speed of motion -+ /* Adjustment to decay rate based on speed of motion */ - { - double this_mv_rabs; - double this_mv_cabs; -@@ -1377,9 +1443,10 @@ static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_fra - return prediction_decay_rate; - } - --// Function to test for a condition where a complex transition is followed --// by a static section. For example in slide shows where there is a fade --// between slides. This is to help with more optimal kf and gf positioning. -+/* Function to test for a condition where a complex transition is followed -+ * by a static section. For example in slide shows where there is a fade -+ * between slides. This is to help with more optimal kf and gf positioning. -+ */ - static int detect_transition_to_still( - VP8_COMP *cpi, - int frame_interval, -@@ -1389,9 +1456,10 @@ static int detect_transition_to_still( - { - int trans_to_still = 0; - -- // Break clause to detect very still sections after motion -- // For example a static image after a fade or other transition -- // instead of a clean scene cut. -+ /* Break clause to detect very still sections after motion -+ * For example a static image after a fade or other transition -+ * instead of a clean scene cut. -+ */ - if ( (frame_interval > MIN_GF_INTERVAL) && - (loop_decay_rate >= 0.999) && - (decay_accumulator < 0.9) ) -@@ -1401,8 +1469,7 @@ static int detect_transition_to_still( - FIRSTPASS_STATS tmp_next_frame; - double decay_rate; - -- // Look ahead a few frames to see if static condition -- // persists... -+ /* Look ahead a few frames to see if static condition persists... */ - for ( j = 0; j < still_interval; j++ ) - { - if (EOF == input_stats(cpi, &tmp_next_frame)) -@@ -1412,10 +1479,10 @@ static int detect_transition_to_still( - if ( decay_rate < 0.999 ) - break; - } -- // Reset file position -+ /* Reset file position */ - reset_fpf_position(cpi, position); - -- // Only if it does do we signal a transition to still -+ /* Only if it does do we signal a transition to still */ - if ( j == still_interval ) - trans_to_still = 1; - } -@@ -1423,24 +1490,26 @@ static int detect_transition_to_still( - return trans_to_still; - } - --// This function detects a flash through the high relative pcnt_second_ref --// score in the frame following a flash frame. The offset passed in should --// reflect this -+/* This function detects a flash through the high relative pcnt_second_ref -+ * score in the frame following a flash frame. The offset passed in should -+ * reflect this -+ */ - static int detect_flash( VP8_COMP *cpi, int offset ) - { - FIRSTPASS_STATS next_frame; - - int flash_detected = 0; - -- // Read the frame data. -- // The return is 0 (no flash detected) if not a valid frame -+ /* Read the frame data. */ -+ /* The return is 0 (no flash detected) if not a valid frame */ - if ( read_frame_stats(cpi, &next_frame, offset) != EOF ) - { -- // What we are looking for here is a situation where there is a -- // brief break in prediction (such as a flash) but subsequent frames -- // are reasonably well predicted by an earlier (pre flash) frame. -- // The recovery after a flash is indicated by a high pcnt_second_ref -- // comapred to pcnt_inter. -+ /* What we are looking for here is a situation where there is a -+ * brief break in prediction (such as a flash) but subsequent frames -+ * are reasonably well predicted by an earlier (pre flash) frame. -+ * The recovery after a flash is indicated by a high pcnt_second_ref -+ * comapred to pcnt_inter. -+ */ - if ( (next_frame.pcnt_second_ref > next_frame.pcnt_inter) && - (next_frame.pcnt_second_ref >= 0.5 ) ) - { -@@ -1461,7 +1530,7 @@ static int detect_flash( VP8_COMP *cpi, int offset ) - return flash_detected; - } - --// Update the motion related elements to the GF arf boost calculation -+/* Update the motion related elements to the GF arf boost calculation */ - static void accumulate_frame_motion_stats( - VP8_COMP *cpi, - FIRSTPASS_STATS * this_frame, -@@ -1470,22 +1539,22 @@ static void accumulate_frame_motion_stats( - double * abs_mv_in_out_accumulator, - double * mv_ratio_accumulator ) - { -- //double this_frame_mv_in_out; - double this_frame_mvr_ratio; - double this_frame_mvc_ratio; - double motion_pct; - -- // Accumulate motion stats. -+ /* Accumulate motion stats. */ - motion_pct = this_frame->pcnt_motion; - -- // Accumulate Motion In/Out of frame stats -+ /* Accumulate Motion In/Out of frame stats */ - *this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct; - *mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct; - *abs_mv_in_out_accumulator += - fabs(this_frame->mv_in_out_count * motion_pct); - -- // Accumulate a measure of how uniform (or conversely how random) -- // the motion field is. (A ratio of absmv / mv) -+ /* Accumulate a measure of how uniform (or conversely how random) -+ * the motion field is. (A ratio of absmv / mv) -+ */ - if (motion_pct > 0.05) - { - this_frame_mvr_ratio = fabs(this_frame->mvr_abs) / -@@ -1507,7 +1576,7 @@ static void accumulate_frame_motion_stats( - } - } - --// Calculate a baseline boost number for the current frame. -+/* Calculate a baseline boost number for the current frame. */ - static double calc_frame_boost( - VP8_COMP *cpi, - FIRSTPASS_STATS * this_frame, -@@ -1515,7 +1584,7 @@ static double calc_frame_boost( - { - double frame_boost; - -- // Underlying boost factor is based on inter intra error ratio -+ /* Underlying boost factor is based on inter intra error ratio */ - if (this_frame->intra_error > cpi->twopass.gf_intra_err_min) - frame_boost = (IIFACTOR * this_frame->intra_error / - DOUBLE_DIVIDE_CHECK(this_frame->coded_error)); -@@ -1523,17 +1592,18 @@ static double calc_frame_boost( - frame_boost = (IIFACTOR * cpi->twopass.gf_intra_err_min / - DOUBLE_DIVIDE_CHECK(this_frame->coded_error)); - -- // Increase boost for frames where new data coming into frame -- // (eg zoom out). Slightly reduce boost if there is a net balance -- // of motion out of the frame (zoom in). -- // The range for this_frame_mv_in_out is -1.0 to +1.0 -+ /* Increase boost for frames where new data coming into frame -+ * (eg zoom out). Slightly reduce boost if there is a net balance -+ * of motion out of the frame (zoom in). -+ * The range for this_frame_mv_in_out is -1.0 to +1.0 -+ */ - if (this_frame_mv_in_out > 0.0) - frame_boost += frame_boost * (this_frame_mv_in_out * 2.0); -- // In extreme case boost is halved -+ /* In extreme case boost is halved */ - else - frame_boost += frame_boost * (this_frame_mv_in_out / 2.0); - -- // Clip to maximum -+ /* Clip to maximum */ - if (frame_boost > GF_RMAX) - frame_boost = GF_RMAX; - -@@ -1561,26 +1631,27 @@ static int calc_arf_boost( - double r; - int flash_detected = 0; - -- // Search forward from the proposed arf/next gf position -+ /* Search forward from the proposed arf/next gf position */ - for ( i = 0; i < f_frames; i++ ) - { - if ( read_frame_stats(cpi, &this_frame, (i+offset)) == EOF ) - break; - -- // Update the motion related elements to the boost calculation -+ /* Update the motion related elements to the boost calculation */ - accumulate_frame_motion_stats( cpi, &this_frame, - &this_frame_mv_in_out, &mv_in_out_accumulator, - &abs_mv_in_out_accumulator, &mv_ratio_accumulator ); - -- // Calculate the baseline boost number for this frame -+ /* Calculate the baseline boost number for this frame */ - r = calc_frame_boost( cpi, &this_frame, this_frame_mv_in_out ); - -- // We want to discount the the flash frame itself and the recovery -- // frame that follows as both will have poor scores. -+ /* We want to discount the the flash frame itself and the recovery -+ * frame that follows as both will have poor scores. -+ */ - flash_detected = detect_flash(cpi, (i+offset)) || - detect_flash(cpi, (i+offset+1)); - -- // Cumulative effect of prediction quality decay -+ /* Cumulative effect of prediction quality decay */ - if ( !flash_detected ) - { - decay_accumulator = -@@ -1591,7 +1662,7 @@ static int calc_arf_boost( - } - boost_score += (decay_accumulator * r); - -- // Break out conditions. -+ /* Break out conditions. */ - if ( (!flash_detected) && - ((mv_ratio_accumulator > 100.0) || - (abs_mv_in_out_accumulator > 3.0) || -@@ -1603,7 +1674,7 @@ static int calc_arf_boost( - - *f_boost = (int)(boost_score * 100.0) >> 4; - -- // Reset for backward looking loop -+ /* Reset for backward looking loop */ - boost_score = 0.0; - mv_ratio_accumulator = 0.0; - decay_accumulator = 1.0; -@@ -1611,26 +1682,27 @@ static int calc_arf_boost( - mv_in_out_accumulator = 0.0; - abs_mv_in_out_accumulator = 0.0; - -- // Search forward from the proposed arf/next gf position -+ /* Search forward from the proposed arf/next gf position */ - for ( i = -1; i >= -b_frames; i-- ) - { - if ( read_frame_stats(cpi, &this_frame, (i+offset)) == EOF ) - break; - -- // Update the motion related elements to the boost calculation -+ /* Update the motion related elements to the boost calculation */ - accumulate_frame_motion_stats( cpi, &this_frame, - &this_frame_mv_in_out, &mv_in_out_accumulator, - &abs_mv_in_out_accumulator, &mv_ratio_accumulator ); - -- // Calculate the baseline boost number for this frame -+ /* Calculate the baseline boost number for this frame */ - r = calc_frame_boost( cpi, &this_frame, this_frame_mv_in_out ); - -- // We want to discount the the flash frame itself and the recovery -- // frame that follows as both will have poor scores. -+ /* We want to discount the the flash frame itself and the recovery -+ * frame that follows as both will have poor scores. -+ */ - flash_detected = detect_flash(cpi, (i+offset)) || - detect_flash(cpi, (i+offset+1)); - -- // Cumulative effect of prediction quality decay -+ /* Cumulative effect of prediction quality decay */ - if ( !flash_detected ) - { - decay_accumulator = -@@ -1642,7 +1714,7 @@ static int calc_arf_boost( - - boost_score += (decay_accumulator * r); - -- // Break out conditions. -+ /* Break out conditions. */ - if ( (!flash_detected) && - ((mv_ratio_accumulator > 100.0) || - (abs_mv_in_out_accumulator > 3.0) || -@@ -1657,7 +1729,7 @@ static int calc_arf_boost( - } - #endif - --// Analyse and define a gf/arf group . -+/* Analyse and define a gf/arf group . */ - static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - { - FIRSTPASS_STATS next_frame; -@@ -1673,14 +1745,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - double mv_ratio_accumulator = 0.0; - double decay_accumulator = 1.0; - -- double loop_decay_rate = 1.00; // Starting decay rate -+ double loop_decay_rate = 1.00; /* Starting decay rate */ - - double this_frame_mv_in_out = 0.0; - double mv_in_out_accumulator = 0.0; - double abs_mv_in_out_accumulator = 0.0; - double mod_err_per_mb_accumulator = 0.0; - -- int max_bits = frame_max_bits(cpi); // Max for a single frame -+ int max_bits = frame_max_bits(cpi); /* Max for a single frame */ - - unsigned int allow_alt_ref = - cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames; -@@ -1693,37 +1765,40 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - cpi->twopass.gf_group_bits = 0; - cpi->twopass.gf_decay_rate = 0; - -- vp8_clear_system_state(); //__asm emms; -+ vp8_clear_system_state(); - - start_pos = cpi->twopass.stats_in; - -- vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean -+ vpx_memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */ - -- // Load stats for the current frame. -+ /* Load stats for the current frame. */ - mod_frame_err = calculate_modified_err(cpi, this_frame); - -- // Note the error of the frame at the start of the group (this will be -- // the GF frame error if we code a normal gf -+ /* Note the error of the frame at the start of the group (this will be -+ * the GF frame error if we code a normal gf -+ */ - gf_first_frame_err = mod_frame_err; - -- // Special treatment if the current frame is a key frame (which is also -- // a gf). If it is then its error score (and hence bit allocation) need -- // to be subtracted out from the calculation for the GF group -+ /* Special treatment if the current frame is a key frame (which is also -+ * a gf). If it is then its error score (and hence bit allocation) need -+ * to be subtracted out from the calculation for the GF group -+ */ - if (cpi->common.frame_type == KEY_FRAME) - gf_group_err -= gf_first_frame_err; - -- // Scan forward to try and work out how many frames the next gf group -- // should contain and what level of boost is appropriate for the GF -- // or ARF that will be coded with the group -+ /* Scan forward to try and work out how many frames the next gf group -+ * should contain and what level of boost is appropriate for the GF -+ * or ARF that will be coded with the group -+ */ - i = 0; - - while (((i < cpi->twopass.static_scene_max_gf_interval) || - ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL)) && - (i < cpi->twopass.frames_to_key)) - { -- i++; // Increment the loop counter -+ i++; - -- // Accumulate error score of frames in this gf group -+ /* Accumulate error score of frames in this gf group */ - mod_frame_err = calculate_modified_err(cpi, this_frame); - - gf_group_err += mod_frame_err; -@@ -1734,19 +1809,20 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - if (EOF == input_stats(cpi, &next_frame)) - break; - -- // Test for the case where there is a brief flash but the prediction -- // quality back to an earlier frame is then restored. -+ /* Test for the case where there is a brief flash but the prediction -+ * quality back to an earlier frame is then restored. -+ */ - flash_detected = detect_flash(cpi, 0); - -- // Update the motion related elements to the boost calculation -+ /* Update the motion related elements to the boost calculation */ - accumulate_frame_motion_stats( cpi, &next_frame, - &this_frame_mv_in_out, &mv_in_out_accumulator, - &abs_mv_in_out_accumulator, &mv_ratio_accumulator ); - -- // Calculate a baseline boost number for this frame -+ /* Calculate a baseline boost number for this frame */ - r = calc_frame_boost( cpi, &next_frame, this_frame_mv_in_out ); - -- // Cumulative effect of prediction quality decay -+ /* Cumulative effect of prediction quality decay */ - if ( !flash_detected ) - { - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); -@@ -1756,8 +1832,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - } - boost_score += (decay_accumulator * r); - -- // Break clause to detect very still sections after motion -- // For example a staic image after a fade or other transition. -+ /* Break clause to detect very still sections after motion -+ * For example a staic image after a fade or other transition. -+ */ - if ( detect_transition_to_still( cpi, i, 5, - loop_decay_rate, - decay_accumulator ) ) -@@ -1767,14 +1844,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - break; - } - -- // Break out conditions. -+ /* Break out conditions. */ - if ( -- // Break at cpi->max_gf_interval unless almost totally static -+ /* Break at cpi->max_gf_interval unless almost totally static */ - (i >= cpi->max_gf_interval && (decay_accumulator < 0.995)) || - ( -- // Dont break out with a very short interval -+ /* Dont break out with a very short interval */ - (i > MIN_GF_INTERVAL) && -- // Dont break out very close to a key frame -+ /* Dont break out very close to a key frame */ - ((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) && - ((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) && - (!flash_detected) && -@@ -1796,15 +1873,15 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - cpi->twopass.gf_decay_rate = - (i > 0) ? (int)(100.0 * (1.0 - decay_accumulator)) / i : 0; - -- // When using CBR apply additional buffer related upper limits -+ /* When using CBR apply additional buffer related upper limits */ - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) - { - double max_boost; - -- // For cbr apply buffer related limits -+ /* For cbr apply buffer related limits */ - if (cpi->drop_frames_allowed) - { -- int df_buffer_level = cpi->oxcf.drop_frames_water_mark * -+ int64_t df_buffer_level = cpi->oxcf.drop_frames_water_mark * - (cpi->oxcf.optimal_buffer_level / 100); - - if (cpi->buffer_level > df_buffer_level) -@@ -1825,7 +1902,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - boost_score = max_boost; - } - -- // Dont allow conventional gf too near the next kf -+ /* Dont allow conventional gf too near the next kf */ - if ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL) - { - while (i < cpi->twopass.frames_to_key) -@@ -1846,14 +1923,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - cpi->gfu_boost = (int)(boost_score * 100.0) >> 4; - - #if NEW_BOOST -- // Alterrnative boost calculation for alt ref -+ /* Alterrnative boost calculation for alt ref */ - alt_boost = calc_arf_boost( cpi, 0, (i-1), (i-1), &f_boost, &b_boost ); - #endif - -- // Should we use the alternate refernce frame -+ /* Should we use the alternate refernce frame */ - if (allow_alt_ref && - (i >= MIN_GF_INTERVAL) && -- // dont use ARF very near next kf -+ /* dont use ARF very near next kf */ - (i <= (cpi->twopass.frames_to_key - MIN_GF_INTERVAL)) && - #if NEW_BOOST - ((next_frame.pcnt_inter > 0.75) || -@@ -1883,7 +1960,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - cpi->gfu_boost = alt_boost; - #endif - -- // Estimate the bits to be allocated to the group as a whole -+ /* Estimate the bits to be allocated to the group as a whole */ - if ((cpi->twopass.kf_group_bits > 0) && - (cpi->twopass.kf_group_error_left > 0)) - { -@@ -1893,7 +1970,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - else - group_bits = 0; - -- // Boost for arf frame -+ /* Boost for arf frame */ - #if NEW_BOOST - Boost = (alt_boost * GFQ_ADJUSTMENT) / 100; - #else -@@ -1901,7 +1978,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - #endif - Boost += (i * 50); - -- // Set max and minimum boost and hence minimum allocation -+ /* Set max and minimum boost and hence minimum allocation */ - if (Boost > ((cpi->baseline_gf_interval + 1) * 200)) - Boost = ((cpi->baseline_gf_interval + 1) * 200); - else if (Boost < 125) -@@ -1909,24 +1986,27 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - - allocation_chunks = (i * 100) + Boost; - -- // Normalize Altboost and allocations chunck down to prevent overflow -+ /* Normalize Altboost and allocations chunck down to prevent overflow */ - while (Boost > 1000) - { - Boost /= 2; - allocation_chunks /= 2; - } - -- // Calculate the number of bits to be spent on the arf based on the -- // boost number -+ /* Calculate the number of bits to be spent on the arf based on the -+ * boost number -+ */ - arf_frame_bits = (int)((double)Boost * (group_bits / - (double)allocation_chunks)); - -- // Estimate if there are enough bits available to make worthwhile use -- // of an arf. -+ /* Estimate if there are enough bits available to make worthwhile use -+ * of an arf. -+ */ - tmp_q = estimate_q(cpi, mod_frame_err, (int)arf_frame_bits); - -- // Only use an arf if it is likely we will be able to code -- // it at a lower Q than the surrounding frames. -+ /* Only use an arf if it is likely we will be able to code -+ * it at a lower Q than the surrounding frames. -+ */ - if (tmp_q < cpi->worst_quality) - { - int half_gf_int; -@@ -1936,42 +2016,46 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - - cpi->source_alt_ref_pending = 1; - -- // For alt ref frames the error score for the end frame of the -- // group (the alt ref frame) should not contribute to the group -- // total and hence the number of bit allocated to the group. -- // Rather it forms part of the next group (it is the GF at the -- // start of the next group) -- // gf_group_err -= mod_frame_err; -- -- // For alt ref frames alt ref frame is technically part of the -- // GF frame for the next group but we always base the error -- // calculation and bit allocation on the current group of frames. -- -- // Set the interval till the next gf or arf. -- // For ARFs this is the number of frames to be coded before the -- // future frame that is coded as an ARF. -- // The future frame itself is part of the next group -+ /* -+ * For alt ref frames the error score for the end frame of the -+ * group (the alt ref frame) should not contribute to the group -+ * total and hence the number of bit allocated to the group. -+ * Rather it forms part of the next group (it is the GF at the -+ * start of the next group) -+ * gf_group_err -= mod_frame_err; -+ * -+ * For alt ref frames alt ref frame is technically part of the -+ * GF frame for the next group but we always base the error -+ * calculation and bit allocation on the current group of frames. -+ * -+ * Set the interval till the next gf or arf. -+ * For ARFs this is the number of frames to be coded before the -+ * future frame that is coded as an ARF. -+ * The future frame itself is part of the next group -+ */ - cpi->baseline_gf_interval = i; - -- // Define the arnr filter width for this group of frames: -- // We only filter frames that lie within a distance of half -- // the GF interval from the ARF frame. We also have to trap -- // cases where the filter extends beyond the end of clip. -- // Note: this_frame->frame has been updated in the loop -- // so it now points at the ARF frame. -+ /* -+ * Define the arnr filter width for this group of frames: -+ * We only filter frames that lie within a distance of half -+ * the GF interval from the ARF frame. We also have to trap -+ * cases where the filter extends beyond the end of clip. -+ * Note: this_frame->frame has been updated in the loop -+ * so it now points at the ARF frame. -+ */ - half_gf_int = cpi->baseline_gf_interval >> 1; -- frames_after_arf = cpi->twopass.total_stats.count - -- this_frame->frame - 1; -+ frames_after_arf = (int)(cpi->twopass.total_stats.count - -+ this_frame->frame - 1); - - switch (cpi->oxcf.arnr_type) - { -- case 1: // Backward filter -+ case 1: /* Backward filter */ - frames_fwd = 0; - if (frames_bwd > half_gf_int) - frames_bwd = half_gf_int; - break; - -- case 2: // Forward filter -+ case 2: /* Forward filter */ - if (frames_fwd > half_gf_int) - frames_fwd = half_gf_int; - if (frames_fwd > frames_after_arf) -@@ -1979,7 +2063,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - frames_bwd = 0; - break; - -- case 3: // Centered filter -+ case 3: /* Centered filter */ - default: - frames_fwd >>= 1; - if (frames_fwd > frames_after_arf) -@@ -1989,8 +2073,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - - frames_bwd = frames_fwd; - -- // For even length filter there is one more frame backward -- // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. -+ /* For even length filter there is one more frame backward -+ * than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. -+ */ - if (frames_bwd < half_gf_int) - frames_bwd += (cpi->oxcf.arnr_max_frames+1) & 0x1; - break; -@@ -2010,12 +2095,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - cpi->baseline_gf_interval = i; - } - -- // Now decide how many bits should be allocated to the GF group as a -- // proportion of those remaining in the kf group. -- // The final key frame group in the clip is treated as a special case -- // where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left. -- // This is also important for short clips where there may only be one -- // key frame. -+ /* -+ * Now decide how many bits should be allocated to the GF group as a -+ * proportion of those remaining in the kf group. -+ * The final key frame group in the clip is treated as a special case -+ * where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left. -+ * This is also important for short clips where there may only be one -+ * key frame. -+ */ - if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats.count - - cpi->common.current_video_frame)) - { -@@ -2023,7 +2110,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0; - } - -- // Calculate the bits to be allocated to the group as a whole -+ /* Calculate the bits to be allocated to the group as a whole */ - if ((cpi->twopass.kf_group_bits > 0) && - (cpi->twopass.kf_group_error_left > 0)) - { -@@ -2034,31 +2121,32 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - else - cpi->twopass.gf_group_bits = 0; - -- cpi->twopass.gf_group_bits = -+ cpi->twopass.gf_group_bits = (int)( - (cpi->twopass.gf_group_bits < 0) - ? 0 - : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits) -- ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits; -+ ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits); - -- // Clip cpi->twopass.gf_group_bits based on user supplied data rate -- // variability limit (cpi->oxcf.two_pass_vbrmax_section) -+ /* Clip cpi->twopass.gf_group_bits based on user supplied data rate -+ * variability limit (cpi->oxcf.two_pass_vbrmax_section) -+ */ - if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval) - cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval; - -- // Reset the file position -+ /* Reset the file position */ - reset_fpf_position(cpi, start_pos); - -- // Update the record of error used so far (only done once per gf group) -+ /* Update the record of error used so far (only done once per gf group) */ - cpi->twopass.modified_error_used += gf_group_err; - -- // Assign bits to the arf or gf. -+ /* Assign bits to the arf or gf. */ - for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); i++) { - int Boost; - int allocation_chunks; - int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; - int gf_bits; - -- // For ARF frames -+ /* For ARF frames */ - if (cpi->source_alt_ref_pending && i == 0) - { - #if NEW_BOOST -@@ -2068,7 +2156,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - #endif - Boost += (cpi->baseline_gf_interval * 50); - -- // Set max and minimum boost and hence minimum allocation -+ /* Set max and minimum boost and hence minimum allocation */ - if (Boost > ((cpi->baseline_gf_interval + 1) * 200)) - Boost = ((cpi->baseline_gf_interval + 1) * 200); - else if (Boost < 125) -@@ -2077,13 +2165,13 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - allocation_chunks = - ((cpi->baseline_gf_interval + 1) * 100) + Boost; - } -- // Else for standard golden frames -+ /* Else for standard golden frames */ - else - { -- // boost based on inter / intra ratio of subsequent frames -+ /* boost based on inter / intra ratio of subsequent frames */ - Boost = (cpi->gfu_boost * GFQ_ADJUSTMENT) / 100; - -- // Set max and minimum boost and hence minimum allocation -+ /* Set max and minimum boost and hence minimum allocation */ - if (Boost > (cpi->baseline_gf_interval * 150)) - Boost = (cpi->baseline_gf_interval * 150); - else if (Boost < 125) -@@ -2093,22 +2181,24 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - (cpi->baseline_gf_interval * 100) + (Boost - 100); - } - -- // Normalize Altboost and allocations chunck down to prevent overflow -+ /* Normalize Altboost and allocations chunck down to prevent overflow */ - while (Boost > 1000) - { - Boost /= 2; - allocation_chunks /= 2; - } - -- // Calculate the number of bits to be spent on the gf or arf based on -- // the boost number -+ /* Calculate the number of bits to be spent on the gf or arf based on -+ * the boost number -+ */ - gf_bits = (int)((double)Boost * - (cpi->twopass.gf_group_bits / - (double)allocation_chunks)); - -- // If the frame that is to be boosted is simpler than the average for -- // the gf/arf group then use an alternative calculation -- // based on the error score of the frame itself -+ /* If the frame that is to be boosted is simpler than the average for -+ * the gf/arf group then use an alternative calculation -+ * based on the error score of the frame itself -+ */ - if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval) - { - double alt_gf_grp_bits; -@@ -2127,9 +2217,10 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - gf_bits = alt_gf_bits; - } - } -- // Else if it is harder than other frames in the group make sure it at -- // least receives an allocation in keeping with its relative error -- // score, otherwise it may be worse off than an "un-boosted" frame -+ /* Else if it is harder than other frames in the group make sure it at -+ * least receives an allocation in keeping with its relative error -+ * score, otherwise it may be worse off than an "un-boosted" frame -+ */ - else - { - int alt_gf_bits = -@@ -2143,18 +2234,19 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - } - } - -- // Apply an additional limit for CBR -+ /* Apply an additional limit for CBR */ - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) - { -- if (cpi->twopass.gf_bits > (cpi->buffer_level >> 1)) -- cpi->twopass.gf_bits = cpi->buffer_level >> 1; -+ if (cpi->twopass.gf_bits > (int)(cpi->buffer_level >> 1)) -+ cpi->twopass.gf_bits = (int)(cpi->buffer_level >> 1); - } - -- // Dont allow a negative value for gf_bits -+ /* Dont allow a negative value for gf_bits */ - if (gf_bits < 0) - gf_bits = 0; - -- gf_bits += cpi->min_frame_bandwidth; // Add in minimum for a frame -+ /* Add in minimum for a frame */ -+ gf_bits += cpi->min_frame_bandwidth; - - if (i == 0) - { -@@ -2162,33 +2254,39 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - } - if (i == 1 || (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME))) - { -- cpi->per_frame_bandwidth = gf_bits; // Per frame bit target for this frame -+ /* Per frame bit target for this frame */ -+ cpi->per_frame_bandwidth = gf_bits; - } - } - - { -- // Adjust KF group bits and error remainin -- cpi->twopass.kf_group_error_left -= gf_group_err; -+ /* Adjust KF group bits and error remainin */ -+ cpi->twopass.kf_group_error_left -= (int64_t)gf_group_err; - cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits; - - if (cpi->twopass.kf_group_bits < 0) - cpi->twopass.kf_group_bits = 0; - -- // Note the error score left in the remaining frames of the group. -- // For normal GFs we want to remove the error score for the first frame of the group (except in Key frame case where this has already happened) -+ /* Note the error score left in the remaining frames of the group. -+ * For normal GFs we want to remove the error score for the first -+ * frame of the group (except in Key frame case where this has -+ * already happened) -+ */ - if (!cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME) -- cpi->twopass.gf_group_error_left = gf_group_err - gf_first_frame_err; -+ cpi->twopass.gf_group_error_left = (int)(gf_group_err - -+ gf_first_frame_err); - else -- cpi->twopass.gf_group_error_left = gf_group_err; -+ cpi->twopass.gf_group_error_left = (int) gf_group_err; - - cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth; - - if (cpi->twopass.gf_group_bits < 0) - cpi->twopass.gf_group_bits = 0; - -- // This condition could fail if there are two kfs very close together -- // despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the -- // calculation of cpi->twopass.alt_extra_bits. -+ /* This condition could fail if there are two kfs very close together -+ * despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the -+ * calculation of cpi->twopass.alt_extra_bits. -+ */ - if ( cpi->baseline_gf_interval >= 3 ) - { - #if NEW_BOOST -@@ -2217,7 +2315,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - cpi->twopass.alt_extra_bits = 0; - } - -- // Adjustments based on a measure of complexity of the section -+ /* Adjustments based on a measure of complexity of the section */ - if (cpi->common.frame_type != KEY_FRAME) - { - FIRSTPASS_STATS sectionstats; -@@ -2234,47 +2332,45 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - - avg_stats(§ionstats); - -- cpi->twopass.section_intra_rating = -- sectionstats.intra_error / -- DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); -+ cpi->twopass.section_intra_rating = (unsigned int) -+ (sectionstats.intra_error / -+ DOUBLE_DIVIDE_CHECK(sectionstats.coded_error)); - - Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); -- //if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) ) -- //{ - cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025); - - if (cpi->twopass.section_max_qfactor < 0.80) - cpi->twopass.section_max_qfactor = 0.80; - -- //} -- //else -- // cpi->twopass.section_max_qfactor = 1.0; -- - reset_fpf_position(cpi, start_pos); - } - } - --// Allocate bits to a normal frame that is neither a gf an arf or a key frame. -+/* Allocate bits to a normal frame that is neither a gf an arf or a key frame. */ - static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - { -- int target_frame_size; // gf_group_error_left -+ int target_frame_size; - - double modified_err; -- double err_fraction; // What portion of the remaining GF group error is used by this frame -+ double err_fraction; - -- int max_bits = frame_max_bits(cpi); // Max for a single frame -+ int max_bits = frame_max_bits(cpi); /* Max for a single frame */ - -- // Calculate modified prediction error used in bit allocation -+ /* Calculate modified prediction error used in bit allocation */ - modified_err = calculate_modified_err(cpi, this_frame); - -+ /* What portion of the remaining GF group error is used by this frame */ - if (cpi->twopass.gf_group_error_left > 0) -- err_fraction = modified_err / cpi->twopass.gf_group_error_left; // What portion of the remaining GF group error is used by this frame -+ err_fraction = modified_err / cpi->twopass.gf_group_error_left; - else - err_fraction = 0.0; - -- target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); // How many of those bits available for allocation should we give it? -+ /* How many of those bits available for allocation should we give it? */ -+ target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); - -- // Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at the top end. -+ /* Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits) -+ * at the top end. -+ */ - if (target_frame_size < 0) - target_frame_size = 0; - else -@@ -2286,22 +2382,25 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - target_frame_size = cpi->twopass.gf_group_bits; - } - -- cpi->twopass.gf_group_error_left -= modified_err; // Adjust error remaining -- cpi->twopass.gf_group_bits -= target_frame_size; // Adjust bits remaining -+ /* Adjust error and bits remaining */ -+ cpi->twopass.gf_group_error_left -= (int)modified_err; -+ cpi->twopass.gf_group_bits -= target_frame_size; - - if (cpi->twopass.gf_group_bits < 0) - cpi->twopass.gf_group_bits = 0; - -- target_frame_size += cpi->min_frame_bandwidth; // Add in the minimum number of bits that is set aside for every frame. -+ /* Add in the minimum number of bits that is set aside for every frame. */ -+ target_frame_size += cpi->min_frame_bandwidth; - -- // Every other frame gets a few extra bits -+ /* Every other frame gets a few extra bits */ - if ( (cpi->common.frames_since_golden & 0x01) && - (cpi->frames_till_gf_update_due > 0) ) - { - target_frame_size += cpi->twopass.alt_extra_bits; - } - -- cpi->per_frame_bandwidth = target_frame_size; // Per frame bit target for this frame -+ /* Per frame bit target for this frame */ -+ cpi->per_frame_bandwidth = target_frame_size; - } - - void vp8_second_pass(VP8_COMP *cpi) -@@ -2330,20 +2429,25 @@ void vp8_second_pass(VP8_COMP *cpi) - this_frame_intra_error = this_frame.intra_error; - this_frame_coded_error = this_frame.coded_error; - -- // keyframe and section processing ! -+ /* keyframe and section processing ! */ - if (cpi->twopass.frames_to_key == 0) - { -- // Define next KF group and assign bits to it -+ /* Define next KF group and assign bits to it */ - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); - find_next_key_frame(cpi, &this_frame_copy); - -- // Special case: Error error_resilient_mode mode does not make much sense for two pass but with its current meaning but this code is designed to stop -- // outlandish behaviour if someone does set it when using two pass. It effectively disables GF groups. -- // This is temporary code till we decide what should really happen in this case. -+ /* Special case: Error error_resilient_mode mode does not make much -+ * sense for two pass but with its current meaning but this code is -+ * designed to stop outlandish behaviour if someone does set it when -+ * using two pass. It effectively disables GF groups. This is -+ * temporary code till we decide what should really happen in this -+ * case. -+ */ - if (cpi->oxcf.error_resilient_mode) - { -- cpi->twopass.gf_group_bits = cpi->twopass.kf_group_bits; -- cpi->twopass.gf_group_error_left = cpi->twopass.kf_group_error_left; -+ cpi->twopass.gf_group_bits = (int)cpi->twopass.kf_group_bits; -+ cpi->twopass.gf_group_error_left = -+ (int)cpi->twopass.kf_group_error_left; - cpi->baseline_gf_interval = cpi->twopass.frames_to_key; - cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; - cpi->source_alt_ref_pending = 0; -@@ -2351,19 +2455,25 @@ void vp8_second_pass(VP8_COMP *cpi) - - } - -- // Is this a GF / ARF (Note that a KF is always also a GF) -+ /* Is this a GF / ARF (Note that a KF is always also a GF) */ - if (cpi->frames_till_gf_update_due == 0) - { -- // Define next gf group and assign bits to it -+ /* Define next gf group and assign bits to it */ - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); - define_gf_group(cpi, &this_frame_copy); - -- // If we are going to code an altref frame at the end of the group and the current frame is not a key frame.... -- // If the previous group used an arf this frame has already benefited from that arf boost and it should not be given extra bits -- // If the previous group was NOT coded using arf we may want to apply some boost to this GF as well -+ /* If we are going to code an altref frame at the end of the group -+ * and the current frame is not a key frame.... If the previous -+ * group used an arf this frame has already benefited from that arf -+ * boost and it should not be given extra bits If the previous -+ * group was NOT coded using arf we may want to apply some boost to -+ * this GF as well -+ */ - if (cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)) - { -- // Assign a standard frames worth of bits from those allocated to the GF group -+ /* Assign a standard frames worth of bits from those allocated -+ * to the GF group -+ */ - int bak = cpi->per_frame_bandwidth; - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); - assign_std_frame_bits(cpi, &this_frame_copy); -@@ -2371,59 +2481,64 @@ void vp8_second_pass(VP8_COMP *cpi) - } - } - -- // Otherwise this is an ordinary frame -+ /* Otherwise this is an ordinary frame */ - else - { -- // Special case: Error error_resilient_mode mode does not make much sense for two pass but with its current meaning but this code is designed to stop -- // outlandish behaviour if someone does set it when using two pass. It effectively disables GF groups. -- // This is temporary code till we decide what should really happen in this case. -+ /* Special case: Error error_resilient_mode mode does not make much -+ * sense for two pass but with its current meaning but this code is -+ * designed to stop outlandish behaviour if someone does set it -+ * when using two pass. It effectively disables GF groups. This is -+ * temporary code till we decide what should really happen in this -+ * case. -+ */ - if (cpi->oxcf.error_resilient_mode) - { - cpi->frames_till_gf_update_due = cpi->twopass.frames_to_key; - - if (cpi->common.frame_type != KEY_FRAME) - { -- // Assign bits from those allocated to the GF group -+ /* Assign bits from those allocated to the GF group */ - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); - assign_std_frame_bits(cpi, &this_frame_copy); - } - } - else - { -- // Assign bits from those allocated to the GF group -+ /* Assign bits from those allocated to the GF group */ - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); - assign_std_frame_bits(cpi, &this_frame_copy); - } - } - -- // Keep a globally available copy of this and the next frame's iiratio. -- cpi->twopass.this_iiratio = this_frame_intra_error / -- DOUBLE_DIVIDE_CHECK(this_frame_coded_error); -+ /* Keep a globally available copy of this and the next frame's iiratio. */ -+ cpi->twopass.this_iiratio = (unsigned int)(this_frame_intra_error / -+ DOUBLE_DIVIDE_CHECK(this_frame_coded_error)); - { - FIRSTPASS_STATS next_frame; - if ( lookup_next_frame_stats(cpi, &next_frame) != EOF ) - { -- cpi->twopass.next_iiratio = next_frame.intra_error / -- DOUBLE_DIVIDE_CHECK(next_frame.coded_error); -+ cpi->twopass.next_iiratio = (unsigned int)(next_frame.intra_error / -+ DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); - } - } - -- // Set nominal per second bandwidth for this frame -- cpi->target_bandwidth = cpi->per_frame_bandwidth * cpi->output_frame_rate; -+ /* Set nominal per second bandwidth for this frame */ -+ cpi->target_bandwidth = (int) -+ (cpi->per_frame_bandwidth * cpi->output_frame_rate); - if (cpi->target_bandwidth < 0) - cpi->target_bandwidth = 0; - - -- // Account for mv, mode and other overheads. -- overhead_bits = estimate_modemvcost( -+ /* Account for mv, mode and other overheads. */ -+ overhead_bits = (int)estimate_modemvcost( - cpi, &cpi->twopass.total_left_stats ); - -- // Special case code for first frame. -+ /* Special case code for first frame. */ - if (cpi->common.current_video_frame == 0) - { - cpi->twopass.est_max_qcorrection_factor = 1.0; - -- // Set a cq_level in constrained quality mode. -+ /* Set a cq_level in constrained quality mode. */ - if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY ) - { - int est_cq; -@@ -2439,7 +2554,7 @@ void vp8_second_pass(VP8_COMP *cpi) - cpi->cq_target_quality = est_cq; - } - -- // guess at maxq needed in 2nd pass -+ /* guess at maxq needed in 2nd pass */ - cpi->twopass.maxq_max_limit = cpi->worst_quality; - cpi->twopass.maxq_min_limit = cpi->best_quality; - -@@ -2449,11 +2564,12 @@ void vp8_second_pass(VP8_COMP *cpi) - (int)(cpi->twopass.bits_left / frames_left), - overhead_bits ); - -- // Limit the maxq value returned subsequently. -- // This increases the risk of overspend or underspend if the initial -- // estimate for the clip is bad, but helps prevent excessive -- // variation in Q, especially near the end of a clip -- // where for example a small overspend may cause Q to crash -+ /* Limit the maxq value returned subsequently. -+ * This increases the risk of overspend or underspend if the initial -+ * estimate for the clip is bad, but helps prevent excessive -+ * variation in Q, especially near the end of a clip -+ * where for example a small overspend may cause Q to crash -+ */ - cpi->twopass.maxq_max_limit = ((tmp_q + 32) < cpi->worst_quality) - ? (tmp_q + 32) : cpi->worst_quality; - cpi->twopass.maxq_min_limit = ((tmp_q - 32) > cpi->best_quality) -@@ -2463,10 +2579,11 @@ void vp8_second_pass(VP8_COMP *cpi) - cpi->ni_av_qi = tmp_q; - } - -- // The last few frames of a clip almost always have to few or too many -- // bits and for the sake of over exact rate control we dont want to make -- // radical adjustments to the allowed quantizer range just to use up a -- // few surplus bits or get beneath the target rate. -+ /* The last few frames of a clip almost always have to few or too many -+ * bits and for the sake of over exact rate control we dont want to make -+ * radical adjustments to the allowed quantizer range just to use up a -+ * few surplus bits or get beneath the target rate. -+ */ - else if ( (cpi->common.current_video_frame < - (((unsigned int)cpi->twopass.total_stats.count * 255)>>8)) && - ((cpi->common.current_video_frame + cpi->baseline_gf_interval) < -@@ -2481,7 +2598,7 @@ void vp8_second_pass(VP8_COMP *cpi) - (int)(cpi->twopass.bits_left / frames_left), - overhead_bits ); - -- // Move active_worst_quality but in a damped way -+ /* Move active_worst_quality but in a damped way */ - if (tmp_q > cpi->active_worst_quality) - cpi->active_worst_quality ++; - else if (tmp_q < cpi->active_worst_quality) -@@ -2493,7 +2610,7 @@ void vp8_second_pass(VP8_COMP *cpi) - - cpi->twopass.frames_to_key --; - -- // Update the total stats remaining sturcture -+ /* Update the total stats remaining sturcture */ - subtract_stats(&cpi->twopass.total_left_stats, &this_frame ); - } - -@@ -2502,8 +2619,9 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP - { - int is_viable_kf = 0; - -- // Does the frame satisfy the primary criteria of a key frame -- // If so, then examine how well it predicts subsequent frames -+ /* Does the frame satisfy the primary criteria of a key frame -+ * If so, then examine how well it predicts subsequent frames -+ */ - if ((this_frame->pcnt_second_ref < 0.10) && - (next_frame->pcnt_second_ref < 0.10) && - ((this_frame->pcnt_inter < 0.05) || -@@ -2530,10 +2648,10 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP - - vpx_memcpy(&local_next_frame, next_frame, sizeof(*next_frame)); - -- // Note the starting file position so we can reset to it -+ /* Note the starting file position so we can reset to it */ - start_pos = cpi->twopass.stats_in; - -- // Examine how well the key frame predicts subsequent frames -+ /* Examine how well the key frame predicts subsequent frames */ - for (i = 0 ; i < 16; i++) - { - next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error / DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error)) ; -@@ -2541,18 +2659,16 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP - if (next_iiratio > RMAX) - next_iiratio = RMAX; - -- // Cumulative effect of decay in prediction quality -+ /* Cumulative effect of decay in prediction quality */ - if (local_next_frame.pcnt_inter > 0.85) - decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter; - else - decay_accumulator = decay_accumulator * ((0.85 + local_next_frame.pcnt_inter) / 2.0); - -- //decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter; -- -- // Keep a running total -+ /* Keep a running total */ - boost_score += (decay_accumulator * next_iiratio); - -- // Test various breakout clauses -+ /* Test various breakout clauses */ - if ((local_next_frame.pcnt_inter < 0.05) || - (next_iiratio < 1.5) || - (((local_next_frame.pcnt_inter - -@@ -2567,17 +2683,19 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP - - old_boost_score = boost_score; - -- // Get the next frame details -+ /* Get the next frame details */ - if (EOF == input_stats(cpi, &local_next_frame)) - break; - } - -- // If there is tolerable prediction for at least the next 3 frames then break out else discard this pottential key frame and move on -+ /* If there is tolerable prediction for at least the next 3 frames -+ * then break out else discard this pottential key frame and move on -+ */ - if (boost_score > 5.0 && (i > 3)) - is_viable_kf = 1; - else - { -- // Reset the file position -+ /* Reset the file position */ - reset_fpf_position(cpi, start_pos); - - is_viable_kf = 0; -@@ -2605,65 +2723,71 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - double kf_group_coded_err = 0.0; - double recent_loop_decay[8] = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0}; - -- vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean -+ vpx_memset(&next_frame, 0, sizeof(next_frame)); - -- vp8_clear_system_state(); //__asm emms; -+ vp8_clear_system_state(); - start_position = cpi->twopass.stats_in; - - cpi->common.frame_type = KEY_FRAME; - -- // is this a forced key frame by interval -+ /* is this a forced key frame by interval */ - cpi->this_key_frame_forced = cpi->next_key_frame_forced; - -- // Clear the alt ref active flag as this can never be active on a key frame -+ /* Clear the alt ref active flag as this can never be active on a key -+ * frame -+ */ - cpi->source_alt_ref_active = 0; - -- // Kf is always a gf so clear frames till next gf counter -+ /* Kf is always a gf so clear frames till next gf counter */ - cpi->frames_till_gf_update_due = 0; - - cpi->twopass.frames_to_key = 1; - -- // Take a copy of the initial frame details -+ /* Take a copy of the initial frame details */ - vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame)); - -- cpi->twopass.kf_group_bits = 0; // Total bits avaialable to kf group -- cpi->twopass.kf_group_error_left = 0; // Group modified error score. -+ cpi->twopass.kf_group_bits = 0; -+ cpi->twopass.kf_group_error_left = 0; - - kf_mod_err = calculate_modified_err(cpi, this_frame); - -- // find the next keyframe -+ /* find the next keyframe */ - i = 0; - while (cpi->twopass.stats_in < cpi->twopass.stats_in_end) - { -- // Accumulate kf group error -+ /* Accumulate kf group error */ - kf_group_err += calculate_modified_err(cpi, this_frame); - -- // These figures keep intra and coded error counts for all frames including key frames in the group. -- // The effect of the key frame itself can be subtracted out using the first_frame data collected above -+ /* These figures keep intra and coded error counts for all frames -+ * including key frames in the group. The effect of the key frame -+ * itself can be subtracted out using the first_frame data -+ * collected above -+ */ - kf_group_intra_err += this_frame->intra_error; - kf_group_coded_err += this_frame->coded_error; - -- // load a the next frame's stats -+ /* load a the next frame's stats */ - vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame)); - input_stats(cpi, this_frame); - -- // Provided that we are not at the end of the file... -+ /* Provided that we are not at the end of the file... */ - if (cpi->oxcf.auto_key - && lookup_next_frame_stats(cpi, &next_frame) != EOF) - { -- // Normal scene cut check -+ /* Normal scene cut check */ - if ( ( i >= MIN_GF_INTERVAL ) && - test_candidate_kf(cpi, &last_frame, this_frame, &next_frame) ) - { - break; - } - -- // How fast is prediction quality decaying -+ /* How fast is prediction quality decaying */ - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); - -- // We want to know something about the recent past... rather than -- // as used elsewhere where we are concened with decay in prediction -- // quality since the last GF or KF. -+ /* We want to know something about the recent past... rather than -+ * as used elsewhere where we are concened with decay in prediction -+ * quality since the last GF or KF. -+ */ - recent_loop_decay[i%8] = loop_decay_rate; - decay_accumulator = 1.0; - for (j = 0; j < 8; j++) -@@ -2671,8 +2795,9 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - decay_accumulator = decay_accumulator * recent_loop_decay[j]; - } - -- // Special check for transition or high motion followed by a -- // to a static scene. -+ /* Special check for transition or high motion followed by a -+ * static scene. -+ */ - if ( detect_transition_to_still( cpi, i, - (cpi->key_frame_frequency-i), - loop_decay_rate, -@@ -2682,11 +2807,12 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - } - - -- // Step on to the next frame -+ /* Step on to the next frame */ - cpi->twopass.frames_to_key ++; - -- // If we don't have a real key frame within the next two -- // forcekeyframeevery intervals then break out of the loop. -+ /* If we don't have a real key frame within the next two -+ * forcekeyframeevery intervals then break out of the loop. -+ */ - if (cpi->twopass.frames_to_key >= 2 *(int)cpi->key_frame_frequency) - break; - } else -@@ -2695,10 +2821,11 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - i++; - } - -- // If there is a max kf interval set by the user we must obey it. -- // We already breakout of the loop above at 2x max. -- // This code centers the extra kf if the actual natural -- // interval is between 1x and 2x -+ /* If there is a max kf interval set by the user we must obey it. -+ * We already breakout of the loop above at 2x max. -+ * This code centers the extra kf if the actual natural -+ * interval is between 1x and 2x -+ */ - if (cpi->oxcf.auto_key - && cpi->twopass.frames_to_key > (int)cpi->key_frame_frequency ) - { -@@ -2707,29 +2834,29 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - - cpi->twopass.frames_to_key /= 2; - -- // Copy first frame details -+ /* Copy first frame details */ - vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame)); - -- // Reset to the start of the group -+ /* Reset to the start of the group */ - reset_fpf_position(cpi, start_position); - - kf_group_err = 0; - kf_group_intra_err = 0; - kf_group_coded_err = 0; - -- // Rescan to get the correct error data for the forced kf group -+ /* Rescan to get the correct error data for the forced kf group */ - for( i = 0; i < cpi->twopass.frames_to_key; i++ ) - { -- // Accumulate kf group errors -+ /* Accumulate kf group errors */ - kf_group_err += calculate_modified_err(cpi, &tmp_frame); - kf_group_intra_err += tmp_frame.intra_error; - kf_group_coded_err += tmp_frame.coded_error; - -- // Load a the next frame's stats -+ /* Load a the next frame's stats */ - input_stats(cpi, &tmp_frame); - } - -- // Reset to the start of the group -+ /* Reset to the start of the group */ - reset_fpf_position(cpi, current_pos); - - cpi->next_key_frame_forced = 1; -@@ -2737,58 +2864,63 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - else - cpi->next_key_frame_forced = 0; - -- // Special case for the last frame of the file -+ /* Special case for the last frame of the file */ - if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) - { -- // Accumulate kf group error -+ /* Accumulate kf group error */ - kf_group_err += calculate_modified_err(cpi, this_frame); - -- // These figures keep intra and coded error counts for all frames including key frames in the group. -- // The effect of the key frame itself can be subtracted out using the first_frame data collected above -+ /* These figures keep intra and coded error counts for all frames -+ * including key frames in the group. The effect of the key frame -+ * itself can be subtracted out using the first_frame data -+ * collected above -+ */ - kf_group_intra_err += this_frame->intra_error; - kf_group_coded_err += this_frame->coded_error; - } - -- // Calculate the number of bits that should be assigned to the kf group. -+ /* Calculate the number of bits that should be assigned to the kf group. */ - if ((cpi->twopass.bits_left > 0) && (cpi->twopass.modified_error_left > 0.0)) - { -- // Max for a single normal frame (not key frame) -+ /* Max for a single normal frame (not key frame) */ - int max_bits = frame_max_bits(cpi); - -- // Maximum bits for the kf group -+ /* Maximum bits for the kf group */ - int64_t max_grp_bits; - -- // Default allocation based on bits left and relative -- // complexity of the section -+ /* Default allocation based on bits left and relative -+ * complexity of the section -+ */ - cpi->twopass.kf_group_bits = (int64_t)( cpi->twopass.bits_left * - ( kf_group_err / - cpi->twopass.modified_error_left )); - -- // Clip based on maximum per frame rate defined by the user. -+ /* Clip based on maximum per frame rate defined by the user. */ - max_grp_bits = (int64_t)max_bits * (int64_t)cpi->twopass.frames_to_key; - if (cpi->twopass.kf_group_bits > max_grp_bits) - cpi->twopass.kf_group_bits = max_grp_bits; - -- // Additional special case for CBR if buffer is getting full. -+ /* Additional special case for CBR if buffer is getting full. */ - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) - { -- int opt_buffer_lvl = cpi->oxcf.optimal_buffer_level; -- int buffer_lvl = cpi->buffer_level; -+ int64_t opt_buffer_lvl = cpi->oxcf.optimal_buffer_level; -+ int64_t buffer_lvl = cpi->buffer_level; - -- // If the buffer is near or above the optimal and this kf group is -- // not being allocated much then increase the allocation a bit. -+ /* If the buffer is near or above the optimal and this kf group is -+ * not being allocated much then increase the allocation a bit. -+ */ - if (buffer_lvl >= opt_buffer_lvl) - { -- int high_water_mark = (opt_buffer_lvl + -+ int64_t high_water_mark = (opt_buffer_lvl + - cpi->oxcf.maximum_buffer_size) >> 1; - - int64_t av_group_bits; - -- // Av bits per frame * number of frames -+ /* Av bits per frame * number of frames */ - av_group_bits = (int64_t)cpi->av_per_frame_bandwidth * - (int64_t)cpi->twopass.frames_to_key; - -- // We are at or above the maximum. -+ /* We are at or above the maximum. */ - if (cpi->buffer_level >= high_water_mark) - { - int64_t min_group_bits; -@@ -2800,7 +2932,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - if (cpi->twopass.kf_group_bits < min_group_bits) - cpi->twopass.kf_group_bits = min_group_bits; - } -- // We are above optimal but below the maximum -+ /* We are above optimal but below the maximum */ - else if (cpi->twopass.kf_group_bits < av_group_bits) - { - int64_t bits_below_av = av_group_bits - -@@ -2817,13 +2949,15 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - else - cpi->twopass.kf_group_bits = 0; - -- // Reset the first pass file position -+ /* Reset the first pass file position */ - reset_fpf_position(cpi, start_position); - -- // determine how big to make this keyframe based on how well the subsequent frames use inter blocks -+ /* determine how big to make this keyframe based on how well the -+ * subsequent frames use inter blocks -+ */ - decay_accumulator = 1.0; - boost_score = 0.0; -- loop_decay_rate = 1.00; // Starting decay rate -+ loop_decay_rate = 1.00; /* Starting decay rate */ - - for (i = 0 ; i < cpi->twopass.frames_to_key ; i++) - { -@@ -2842,7 +2976,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - if (r > RMAX) - r = RMAX; - -- // How fast is prediction quality decaying -+ /* How fast is prediction quality decaying */ - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); - - decay_accumulator = decay_accumulator * loop_decay_rate; -@@ -2875,31 +3009,26 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - - avg_stats(§ionstats); - -- cpi->twopass.section_intra_rating = -- sectionstats.intra_error -- / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); -+ cpi->twopass.section_intra_rating = (unsigned int) -+ (sectionstats.intra_error -+ / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error)); - - Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); -- // if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) ) -- //{ - cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025); - - if (cpi->twopass.section_max_qfactor < 0.80) - cpi->twopass.section_max_qfactor = 0.80; -- -- //} -- //else -- // cpi->twopass.section_max_qfactor = 1.0; - } - -- // When using CBR apply additional buffer fullness related upper limits -+ /* When using CBR apply additional buffer fullness related upper limits */ - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) - { - double max_boost; - - if (cpi->drop_frames_allowed) - { -- int df_buffer_level = cpi->oxcf.drop_frames_water_mark * (cpi->oxcf.optimal_buffer_level / 100); -+ int df_buffer_level = (int)(cpi->oxcf.drop_frames_water_mark -+ * (cpi->oxcf.optimal_buffer_level / 100)); - - if (cpi->buffer_level > df_buffer_level) - max_boost = ((double)((cpi->buffer_level - df_buffer_level) * 2 / 3) * 16.0) / DOUBLE_DIVIDE_CHECK((double)cpi->av_per_frame_bandwidth); -@@ -2919,18 +3048,18 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - boost_score = max_boost; - } - -- // Reset the first pass file position -+ /* Reset the first pass file position */ - reset_fpf_position(cpi, start_position); - -- // Work out how many bits to allocate for the key frame itself -+ /* Work out how many bits to allocate for the key frame itself */ - if (1) - { -- int kf_boost = boost_score; -+ int kf_boost = (int)boost_score; - int allocation_chunks; - int Counter = cpi->twopass.frames_to_key; - int alt_kf_bits; - YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; -- // Min boost based on kf interval -+ /* Min boost based on kf interval */ - #if 0 - - while ((kf_boost < 48) && (Counter > 0)) -@@ -2948,32 +3077,33 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - if (kf_boost > 48) kf_boost = 48; - } - -- // bigger frame sizes need larger kf boosts, smaller frames smaller boosts... -+ /* bigger frame sizes need larger kf boosts, smaller frames smaller -+ * boosts... -+ */ - if ((lst_yv12->y_width * lst_yv12->y_height) > (320 * 240)) - kf_boost += 2 * (lst_yv12->y_width * lst_yv12->y_height) / (320 * 240); - else if ((lst_yv12->y_width * lst_yv12->y_height) < (320 * 240)) - kf_boost -= 4 * (320 * 240) / (lst_yv12->y_width * lst_yv12->y_height); - -- kf_boost = (int)((double)kf_boost * 100.0) >> 4; // Scale 16 to 100 -- -- // Adjustment to boost based on recent average q -- //kf_boost = kf_boost * vp8_kf_boost_qadjustment[cpi->ni_av_qi] / 100; -- -- if (kf_boost < 250) // Min KF boost -+ /* Min KF boost */ -+ kf_boost = (int)((double)kf_boost * 100.0) >> 4; /* Scale 16 to 100 */ -+ if (kf_boost < 250) - kf_boost = 250; - -- // We do three calculations for kf size. -- // The first is based on the error score for the whole kf group. -- // The second (optionaly) on the key frames own error if this is -- // smaller than the average for the group. -- // The final one insures that the frame receives at least the -- // allocation it would have received based on its own error score vs -- // the error score remaining -- // Special case if the sequence appears almost totaly static -- // as measured by the decay accumulator. In this case we want to -- // spend almost all of the bits on the key frame. -- // cpi->twopass.frames_to_key-1 because key frame itself is taken -- // care of by kf_boost. -+ /* -+ * We do three calculations for kf size. -+ * The first is based on the error score for the whole kf group. -+ * The second (optionaly) on the key frames own error if this is -+ * smaller than the average for the group. -+ * The final one insures that the frame receives at least the -+ * allocation it would have received based on its own error score vs -+ * the error score remaining -+ * Special case if the sequence appears almost totaly static -+ * as measured by the decay accumulator. In this case we want to -+ * spend almost all of the bits on the key frame. -+ * cpi->twopass.frames_to_key-1 because key frame itself is taken -+ * care of by kf_boost. -+ */ - if ( decay_accumulator >= 0.99 ) - { - allocation_chunks = -@@ -2985,7 +3115,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - ((cpi->twopass.frames_to_key - 1) * 100) + kf_boost; - } - -- // Normalize Altboost and allocations chunck down to prevent overflow -+ /* Normalize Altboost and allocations chunck down to prevent overflow */ - while (kf_boost > 1000) - { - kf_boost /= 2; -@@ -2994,20 +3124,21 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - - cpi->twopass.kf_group_bits = (cpi->twopass.kf_group_bits < 0) ? 0 : cpi->twopass.kf_group_bits; - -- // Calculate the number of bits to be spent on the key frame -+ /* Calculate the number of bits to be spent on the key frame */ - cpi->twopass.kf_bits = (int)((double)kf_boost * ((double)cpi->twopass.kf_group_bits / (double)allocation_chunks)); - -- // Apply an additional limit for CBR -+ /* Apply an additional limit for CBR */ - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) - { -- if (cpi->twopass.kf_bits > ((3 * cpi->buffer_level) >> 2)) -- cpi->twopass.kf_bits = (3 * cpi->buffer_level) >> 2; -+ if (cpi->twopass.kf_bits > (int)((3 * cpi->buffer_level) >> 2)) -+ cpi->twopass.kf_bits = (int)((3 * cpi->buffer_level) >> 2); - } - -- // If the key frame is actually easier than the average for the -- // kf group (which does sometimes happen... eg a blank intro frame) -- // Then use an alternate calculation based on the kf error score -- // which should give a smaller key frame. -+ /* If the key frame is actually easier than the average for the -+ * kf group (which does sometimes happen... eg a blank intro frame) -+ * Then use an alternate calculation based on the kf error score -+ * which should give a smaller key frame. -+ */ - if (kf_mod_err < kf_group_err / cpi->twopass.frames_to_key) - { - double alt_kf_grp_bits = -@@ -3023,9 +3154,10 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - cpi->twopass.kf_bits = alt_kf_bits; - } - } -- // Else if it is much harder than other frames in the group make sure -- // it at least receives an allocation in keeping with its relative -- // error score -+ /* Else if it is much harder than other frames in the group make sure -+ * it at least receives an allocation in keeping with its relative -+ * error score -+ */ - else - { - alt_kf_bits = -@@ -3040,17 +3172,23 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - } - - cpi->twopass.kf_group_bits -= cpi->twopass.kf_bits; -- cpi->twopass.kf_bits += cpi->min_frame_bandwidth; // Add in the minimum frame allowance -+ /* Add in the minimum frame allowance */ -+ cpi->twopass.kf_bits += cpi->min_frame_bandwidth; -+ -+ /* Peer frame bit target for this frame */ -+ cpi->per_frame_bandwidth = cpi->twopass.kf_bits; - -- cpi->per_frame_bandwidth = cpi->twopass.kf_bits; // Peer frame bit target for this frame -- cpi->target_bandwidth = cpi->twopass.kf_bits * cpi->output_frame_rate; // Convert to a per second bitrate -+ /* Convert to a per second bitrate */ -+ cpi->target_bandwidth = (int)(cpi->twopass.kf_bits * -+ cpi->output_frame_rate); - } - -- // Note the total error score of the kf group minus the key frame itself -+ /* Note the total error score of the kf group minus the key frame itself */ - cpi->twopass.kf_group_error_left = (int)(kf_group_err - kf_mod_err); - -- // Adjust the count of total modified error left. -- // The count of bits left is adjusted elsewhere based on real coded frame sizes -+ /* Adjust the count of total modified error left. The count of bits left -+ * is adjusted elsewhere based on real coded frame sizes -+ */ - cpi->twopass.modified_error_left -= kf_group_err; - - if (cpi->oxcf.allow_spatial_resampling) -@@ -3063,7 +3201,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - int new_width = cpi->oxcf.Width; - int new_height = cpi->oxcf.Height; - -- int projected_buffer_level = cpi->buffer_level; -+ int projected_buffer_level = (int)cpi->buffer_level; - int tmp_q; - - double projected_bits_perframe; -@@ -3076,40 +3214,47 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - if ((cpi->common.Width != cpi->oxcf.Width) || (cpi->common.Height != cpi->oxcf.Height)) - last_kf_resampled = 1; - -- // Set back to unscaled by defaults -+ /* Set back to unscaled by defaults */ - cpi->common.horiz_scale = NORMAL; - cpi->common.vert_scale = NORMAL; - -- // Calculate Average bits per frame. -- //av_bits_per_frame = cpi->twopass.bits_left/(double)(cpi->twopass.total_stats.count - cpi->common.current_video_frame); -+ /* Calculate Average bits per frame. */ - av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate); -- //if ( av_bits_per_frame < 0.0 ) -- // av_bits_per_frame = 0.0 - -- // CBR... Use the clip average as the target for deciding resample -+ /* CBR... Use the clip average as the target for deciding resample */ - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) - { - bits_per_frame = av_bits_per_frame; - } - -- // In VBR we want to avoid downsampling in easy section unless we are under extreme pressure -- // So use the larger of target bitrate for this sectoion or average bitrate for sequence -+ /* In VBR we want to avoid downsampling in easy section unless we -+ * are under extreme pressure So use the larger of target bitrate -+ * for this section or average bitrate for sequence -+ */ - else - { -- bits_per_frame = cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key; // This accounts for how hard the section is... -+ /* This accounts for how hard the section is... */ -+ bits_per_frame = (double) -+ (cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key); - -- if (bits_per_frame < av_bits_per_frame) // Dont turn to resampling in easy sections just because they have been assigned a small number of bits -+ /* Dont turn to resampling in easy sections just because they -+ * have been assigned a small number of bits -+ */ -+ if (bits_per_frame < av_bits_per_frame) - bits_per_frame = av_bits_per_frame; - } - -- // bits_per_frame should comply with our minimum -+ /* bits_per_frame should comply with our minimum */ - if (bits_per_frame < (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100)) - bits_per_frame = (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); - -- // Work out if spatial resampling is necessary -- kf_q = estimate_kf_group_q(cpi, err_per_frame, bits_per_frame, group_iiratio); -+ /* Work out if spatial resampling is necessary */ -+ kf_q = estimate_kf_group_q(cpi, err_per_frame, -+ (int)bits_per_frame, group_iiratio); - -- // If we project a required Q higher than the maximum allowed Q then make a guess at the actual size of frames in this section -+ /* If we project a required Q higher than the maximum allowed Q then -+ * make a guess at the actual size of frames in this section -+ */ - projected_bits_perframe = bits_per_frame; - tmp_q = kf_q; - -@@ -3119,8 +3264,11 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - tmp_q--; - } - -- // Guess at buffer level at the end of the section -- projected_buffer_level = cpi->buffer_level - (int)((projected_bits_perframe - av_bits_per_frame) * cpi->twopass.frames_to_key); -+ /* Guess at buffer level at the end of the section */ -+ projected_buffer_level = (int) -+ (cpi->buffer_level - (int) -+ ((projected_bits_perframe - av_bits_per_frame) * -+ cpi->twopass.frames_to_key)); - - if (0) - { -@@ -3129,15 +3277,17 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - fclose(f); - } - -- // The trigger for spatial resampling depends on the various parameters such as whether we are streaming (CBR) or VBR. -+ /* The trigger for spatial resampling depends on the various -+ * parameters such as whether we are streaming (CBR) or VBR. -+ */ - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) - { -- // Trigger resample if we are projected to fall below down sample level or -- // resampled last time and are projected to remain below the up sample level -+ /* Trigger resample if we are projected to fall below down -+ * sample level or resampled last time and are projected to -+ * remain below the up sample level -+ */ - if ((projected_buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100)) || - (last_kf_resampled && (projected_buffer_level < (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100)))) -- //( ((cpi->buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100))) && -- // ((projected_buffer_level < (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100))) )) - resample_trigger = 1; - else - resample_trigger = 0; -@@ -3147,9 +3297,15 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate)); - int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level; - -- if ((last_kf_resampled && (kf_q > cpi->worst_quality)) || // If triggered last time the threshold for triggering again is reduced -- ((kf_q > cpi->worst_quality) && // Projected Q higher than allowed and ... -- (over_spend > clip_bits / 20))) // ... Overspend > 5% of total bits -+ /* If triggered last time the threshold for triggering again is -+ * reduced: -+ * -+ * Projected Q higher than allowed and Overspend > 5% of total -+ * bits -+ */ -+ if ((last_kf_resampled && (kf_q > cpi->worst_quality)) || -+ ((kf_q > cpi->worst_quality) && -+ (over_spend > clip_bits / 20))) - resample_trigger = 1; - else - resample_trigger = 0; -@@ -3171,13 +3327,19 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) - new_width = ((hs - 1) + (cpi->oxcf.Width * hr)) / hs; - new_height = ((vs - 1) + (cpi->oxcf.Height * vr)) / vs; - -- // Reducing the area to 1/4 does not reduce the complexity (err_per_frame) to 1/4... -- // effective_sizeratio attempts to provide a crude correction for this -+ /* Reducing the area to 1/4 does not reduce the complexity -+ * (err_per_frame) to 1/4... effective_sizeratio attempts -+ * to provide a crude correction for this -+ */ - effective_size_ratio = (double)(new_width * new_height) / (double)(cpi->oxcf.Width * cpi->oxcf.Height); - effective_size_ratio = (1.0 + (3.0 * effective_size_ratio)) / 4.0; - -- // Now try again and see what Q we get with the smaller image size -- kf_q = estimate_kf_group_q(cpi, err_per_frame * effective_size_ratio, bits_per_frame, group_iiratio); -+ /* Now try again and see what Q we get with the smaller -+ * image size -+ */ -+ kf_q = estimate_kf_group_q(cpi, -+ err_per_frame * effective_size_ratio, -+ (int)bits_per_frame, group_iiratio); - - if (0) - { -diff --git a/vp8/encoder/lookahead.c b/vp8/encoder/lookahead.c -index 4c92281..ce2ce08 100644 ---- a/vp8/encoder/lookahead.c -+++ b/vp8/encoder/lookahead.c -@@ -118,10 +118,11 @@ vp8_lookahead_push(struct lookahead_ctx *ctx, - ctx->sz++; - buf = pop(ctx, &ctx->write_idx); - -- // Only do this partial copy if the following conditions are all met: -- // 1. Lookahead queue has has size of 1. -- // 2. Active map is provided. -- // 3. This is not a key frame, golden nor altref frame. -+ /* Only do this partial copy if the following conditions are all met: -+ * 1. Lookahead queue has has size of 1. -+ * 2. Active map is provided. -+ * 3. This is not a key frame, golden nor altref frame. -+ */ - if (ctx->max_sz == 1 && active_map && !flags) - { - for (row = 0; row < mb_rows; ++row) -@@ -130,18 +131,18 @@ vp8_lookahead_push(struct lookahead_ctx *ctx, - - while (1) - { -- // Find the first active macroblock in this row. -+ /* Find the first active macroblock in this row. */ - for (; col < mb_cols; ++col) - { - if (active_map[col]) - break; - } - -- // No more active macroblock in this row. -+ /* No more active macroblock in this row. */ - if (col == mb_cols) - break; - -- // Find the end of active region in this row. -+ /* Find the end of active region in this row. */ - active_end = col; - - for (; active_end < mb_cols; ++active_end) -@@ -150,13 +151,13 @@ vp8_lookahead_push(struct lookahead_ctx *ctx, - break; - } - -- // Only copy this active region. -+ /* Only copy this active region. */ - vp8_copy_and_extend_frame_with_rect(src, &buf->img, - row << 4, - col << 4, 16, - (active_end - col) << 4); - -- // Start again from the end of this active region. -+ /* Start again from the end of this active region. */ - col = active_end; - } - -diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c -index 67e4f7e..b08c7a5 100644 ---- a/vp8/encoder/mcomp.c -+++ b/vp8/encoder/mcomp.c -@@ -25,26 +25,35 @@ static int mv_mode_cts [4] [2]; - - int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) - { -- // MV costing is based on the distribution of vectors in the previous frame and as such will tend to -- // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the -- // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks. -- // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors. -+ /* MV costing is based on the distribution of vectors in the previous -+ * frame and as such will tend to over state the cost of vectors. In -+ * addition coding a new vector can have a knock on effect on the cost -+ * of subsequent vectors and the quality of prediction from NEAR and -+ * NEAREST for subsequent blocks. The "Weight" parameter allows, to a -+ * limited extent, for some account to be taken of these factors. -+ */ - return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7; - } - - static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit) - { -- return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + -- mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) -- * error_per_bit + 128) >> 8; -+ /* Ignore mv costing if mvcost is NULL */ -+ if (mvcost) -+ return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + -+ mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) -+ * error_per_bit + 128) >> 8; -+ return 0; - } - - static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit) - { - /* Calculate sad error cost on full pixel basis. */ -- return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] + -- mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) -- * error_per_bit + 128) >> 8; -+ /* Ignore mv costing if mvsadcost is NULL */ -+ if (mvsadcost) -+ return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] + -+ mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) -+ * error_per_bit + 128) >> 8; -+ return 0; - } - - void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) -@@ -53,7 +62,7 @@ void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) - int search_site_count = 0; - - -- // Generate offsets for 4 search sites per step. -+ /* Generate offsets for 4 search sites per step. */ - Len = MAX_FIRST_STEP; - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = 0; -@@ -63,31 +72,31 @@ void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) - while (Len > 0) - { - -- // Compute offsets for search sites. -+ /* Compute offsets for search sites. */ - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride; - search_site_count++; - -- // Compute offsets for search sites. -+ /* Compute offsets for search sites. */ - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride; - search_site_count++; - -- // Compute offsets for search sites. -+ /* Compute offsets for search sites. */ - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = -Len; - search_site_count++; - -- // Compute offsets for search sites. -+ /* Compute offsets for search sites. */ - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = Len; - search_site_count++; - -- // Contract. -+ /* Contract. */ - Len /= 2; - } - -@@ -100,7 +109,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) - int Len; - int search_site_count = 0; - -- // Generate offsets for 8 search sites per step. -+ /* Generate offsets for 8 search sites per step. */ - Len = MAX_FIRST_STEP; - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = 0; -@@ -110,56 +119,56 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) - while (Len > 0) - { - -- // Compute offsets for search sites. -+ /* Compute offsets for search sites. */ - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride; - search_site_count++; - -- // Compute offsets for search sites. -+ /* Compute offsets for search sites. */ - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride; - search_site_count++; - -- // Compute offsets for search sites. -+ /* Compute offsets for search sites. */ - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = -Len; - search_site_count++; - -- // Compute offsets for search sites. -+ /* Compute offsets for search sites. */ - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = Len; - search_site_count++; - -- // Compute offsets for search sites. -+ /* Compute offsets for search sites. */ - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride - Len; - search_site_count++; - -- // Compute offsets for search sites. -+ /* Compute offsets for search sites. */ - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride + Len; - search_site_count++; - -- // Compute offsets for search sites. -+ /* Compute offsets for search sites. */ - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride - Len; - search_site_count++; - -- // Compute offsets for search sites. -+ /* Compute offsets for search sites. */ - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride + Len; - search_site_count++; - - -- // Contract. -+ /* Contract. */ - Len /= 2; - } - -@@ -176,13 +185,20 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) - * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we - * could reduce the area. - */ --#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) --#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector --#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc --#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function. -+ -+/* estimated cost of a motion vector (r,c) */ -+#define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0) -+/* pointer to predictor base of a motionvector */ -+#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) -+/* convert motion vector component to offset for svf calc */ -+#define SP(x) (((x)&3)<<1) -+/* returns subpixel variance error function. */ -+#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) - #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; --#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost --#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best -+/* returns distortion + motion vector cost */ -+#define ERR(r,c) (MVC(r,c)+DIST(r,c)) -+/* checks if (r,c) has better score than previous best */ -+#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;) - - int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - int_mv *bestmv, int_mv *ref_mv, -@@ -196,7 +212,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1; - int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2; - int tr = br, tc = bc; -- unsigned int besterr = INT_MAX; -+ unsigned int besterr; - unsigned int left, right, up, down, diag; - unsigned int sse; - unsigned int whichdir; -@@ -221,7 +237,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - unsigned char *y; - int buf_r1, buf_r2, buf_c1, buf_c2; - -- // Clamping to avoid out-of-range data access -+ /* Clamping to avoid out-of-range data access */ - buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3; - buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3; - buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3; -@@ -238,19 +254,21 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - - offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; - -- // central mv -+ /* central mv */ - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; - -- // calculate central point error -+ /* calculate central point error */ - besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); - *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - -- // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) -+ /* TODO: Each subsequent iteration checks at least one point in common -+ * with the last iteration could be 2 ( if diag selected) -+ */ - while (--halfiters) - { -- // 1/2 pel -+ /* 1/2 pel */ - CHECK_BETTER(left, tr, tc - 2); - CHECK_BETTER(right, tr, tc + 2); - CHECK_BETTER(up, tr - 2, tc); -@@ -274,7 +292,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - break; - } - -- // no reason to check the same one again. -+ /* no reason to check the same one again. */ - if (tr == br && tc == bc) - break; - -@@ -282,8 +300,11 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - tc = bc; - } - -- // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) -- // 1/4 pel -+ /* TODO: Each subsequent iteration checks at least one point in common -+ * with the last iteration could be 2 ( if diag selected) -+ */ -+ -+ /* 1/4 pel */ - while (--quarteriters) - { - CHECK_BETTER(left, tr, tc - 1); -@@ -309,7 +330,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - break; - } - -- // no reason to check the same one again. -+ /* no reason to check the same one again. */ - if (tr == br && tc == bc) - break; - -@@ -367,17 +388,17 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - y_stride = pre_stride; - #endif - -- // central mv -+ /* central mv */ - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; - startmv = *bestmv; - -- // calculate central point error -+ /* calculate central point error */ - bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); - *distortion = bestmse; - bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - -- // go left then right and check error -+ /* go left then right and check error */ - this_mv.as_mv.row = startmv.as_mv.row; - this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); - thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); -@@ -403,7 +424,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - *sse1 = sse; - } - -- // go up then down and check error -+ /* go up then down and check error */ - this_mv.as_mv.col = startmv.as_mv.col; - this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); - thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); -@@ -430,10 +451,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - } - - -- // now check 1 more diagonal -+ /* now check 1 more diagonal */ - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); -- //for(whichdir =0;whichdir<4;whichdir++) -- //{ - this_mv = startmv; - - switch (whichdir) -@@ -471,10 +490,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - *sse1 = sse; - } - --// } -- - -- // time to check quarter pels. -+ /* time to check quarter pels. */ - if (bestmv->as_mv.row < startmv.as_mv.row) - y -= y_stride; - -@@ -485,7 +502,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - - - -- // go left then right and check error -+ /* go left then right and check error */ - this_mv.as_mv.row = startmv.as_mv.row; - - if (startmv.as_mv.col & 7) -@@ -521,7 +538,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - *sse1 = sse; - } - -- // go up then down and check error -+ /* go up then down and check error */ - this_mv.as_mv.col = startmv.as_mv.col; - - if (startmv.as_mv.row & 7) -@@ -558,11 +575,9 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - } - - -- // now check 1 more diagonal -+ /* now check 1 more diagonal */ - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - --// for(whichdir=0;whichdir<4;whichdir++) --// { - this_mv = startmv; - - switch (whichdir) -@@ -684,17 +699,17 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - y_stride = pre_stride; - #endif - -- // central mv -+ /* central mv */ - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; - startmv = *bestmv; - -- // calculate central point error -+ /* calculate central point error */ - bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); - *distortion = bestmse; - bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - -- // go left then right and check error -+ /* go left then right and check error */ - this_mv.as_mv.row = startmv.as_mv.row; - this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); - thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); -@@ -720,7 +735,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - *sse1 = sse; - } - -- // go up then down and check error -+ /* go up then down and check error */ - this_mv.as_mv.col = startmv.as_mv.col; - this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); - thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); -@@ -746,7 +761,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - *sse1 = sse; - } - -- // now check 1 more diagonal - -+ /* now check 1 more diagonal - */ - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - this_mv = startmv; - -@@ -855,7 +870,7 @@ int vp8_hex_search - int in_what_stride = pre_stride; - int br, bc; - int_mv this_mv; -- unsigned int bestsad = 0x7fffffff; -+ unsigned int bestsad; - unsigned int thissad; - unsigned char *base_offset; - unsigned char *this_offset; -@@ -869,18 +884,17 @@ int vp8_hex_search - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - -- // adjust ref_mv to make sure it is within MV range -+ /* adjust ref_mv to make sure it is within MV range */ - vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - br = ref_mv->as_mv.row; - bc = ref_mv->as_mv.col; - -- // Work out the start point for the search -+ /* Work out the start point for the search */ - base_offset = (unsigned char *)(base_pre + d->offset); - this_offset = base_offset + (br * (pre_stride)) + bc; - this_mv.as_mv.row = br; - this_mv.as_mv.col = bc; -- bestsad = vfp->sdf( what, what_stride, this_offset, -- in_what_stride, 0x7fffffff) -+ bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX) - + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); - - #if CONFIG_MULTI_RES_ENCODING -@@ -895,8 +909,7 @@ int vp8_hex_search - dia_range = 8; - #endif - -- // hex search -- //j=0 -+ /* hex search */ - CHECK_BOUNDS(2) - - if(all_in) -@@ -906,7 +919,7 @@ int vp8_hex_search - this_mv.as_mv.row = br + hex[i].row; - this_mv.as_mv.col = bc + hex[i].col; - this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; -- thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); -+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); - CHECK_BETTER - } - }else -@@ -917,7 +930,7 @@ int vp8_hex_search - this_mv.as_mv.col = bc + hex[i].col; - CHECK_POINT - this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; -- thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); -+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); - CHECK_BETTER - } - } -@@ -943,7 +956,7 @@ int vp8_hex_search - this_mv.as_mv.row = br + next_chkpts[k][i].row; - this_mv.as_mv.col = bc + next_chkpts[k][i].col; - this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; -- thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); -+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); - CHECK_BETTER - } - }else -@@ -954,7 +967,7 @@ int vp8_hex_search - this_mv.as_mv.col = bc + next_chkpts[k][i].col; - CHECK_POINT - this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; -- thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); -+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); - CHECK_BETTER - } - } -@@ -971,7 +984,7 @@ int vp8_hex_search - } - } - -- // check 4 1-away neighbors -+ /* check 4 1-away neighbors */ - cal_neighbors: - for (j = 0; j < dia_range; j++) - { -@@ -985,7 +998,7 @@ cal_neighbors: - this_mv.as_mv.row = br + neighbors[i].row; - this_mv.as_mv.col = bc + neighbors[i].col; - this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; -- thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); -+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); - CHECK_BETTER - } - }else -@@ -996,7 +1009,7 @@ cal_neighbors: - this_mv.as_mv.col = bc + neighbors[i].col; - CHECK_POINT - this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; -- thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); -+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); - CHECK_BETTER - } - } -@@ -1047,7 +1060,8 @@ int vp8_diamond_search_sad_c - int tot_steps; - int_mv this_mv; - -- int bestsad = INT_MAX; -+ unsigned int bestsad; -+ unsigned int thissad; - int best_site = 0; - int last_site = 0; - -@@ -1058,10 +1072,12 @@ int vp8_diamond_search_sad_c - search_site *ss; - - unsigned char *check_here; -- int thissad; - -- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; -+ int *mvsadcost[2]; - int_mv fcenter_mv; -+ -+ mvsadcost[0] = x->mvsadcost[0]; -+ mvsadcost[1] = x->mvsadcost[1]; - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - -@@ -1072,17 +1088,18 @@ int vp8_diamond_search_sad_c - best_mv->as_mv.row = ref_row; - best_mv->as_mv.col = ref_col; - -- // Work out the start point for the search -+ /* Work out the start point for the search */ - in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col); - best_address = in_what; - -- // Check the starting position -- bestsad = fn_ptr->sdf(what, what_stride, in_what, -- in_what_stride, 0x7fffffff) -- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); -+ /* Check the starting position */ -+ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX) -+ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - -- // search_param determines the length of the initial step and hence the number of iterations -- // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. -+ /* search_param determines the length of the initial step and hence -+ * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel : -+ * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. -+ */ - ss = &x->ss[search_param * x->searches_per_step]; - tot_steps = (x->ss_count / x->searches_per_step) - search_param; - -@@ -1092,7 +1109,7 @@ int vp8_diamond_search_sad_c - { - for (j = 0 ; j < x->searches_per_step ; j++) - { -- // Trap illegal vectors -+ /* Trap illegal vectors */ - this_row_offset = best_mv->as_mv.row + ss[i].mv.row; - this_col_offset = best_mv->as_mv.col + ss[i].mv.col; - -@@ -1101,14 +1118,14 @@ int vp8_diamond_search_sad_c - - { - check_here = ss[i].offset + best_address; -- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); -+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); - - if (thissad < bestsad) - { - this_mv.as_mv.row = this_row_offset; - this_mv.as_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, -- mvsadcost, sad_per_bit); -+ mvsadcost, sad_per_bit); - - if (thissad < bestsad) - { -@@ -1135,11 +1152,8 @@ int vp8_diamond_search_sad_c - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; - -- if (bestsad == INT_MAX) -- return INT_MAX; -- -- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) -- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); -+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) -+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); - } - - int vp8_diamond_search_sadx4 -@@ -1170,7 +1184,8 @@ int vp8_diamond_search_sadx4 - int tot_steps; - int_mv this_mv; - -- unsigned int bestsad = UINT_MAX; -+ unsigned int bestsad; -+ unsigned int thissad; - int best_site = 0; - int last_site = 0; - -@@ -1181,10 +1196,12 @@ int vp8_diamond_search_sadx4 - search_site *ss; - - unsigned char *check_here; -- unsigned int thissad; - -- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; -+ int *mvsadcost[2]; - int_mv fcenter_mv; -+ -+ mvsadcost[0] = x->mvsadcost[0]; -+ mvsadcost[1] = x->mvsadcost[1]; - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - -@@ -1195,17 +1212,18 @@ int vp8_diamond_search_sadx4 - best_mv->as_mv.row = ref_row; - best_mv->as_mv.col = ref_col; - -- // Work out the start point for the search -+ /* Work out the start point for the search */ - in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col); - best_address = in_what; - -- // Check the starting position -- bestsad = fn_ptr->sdf(what, what_stride, -- in_what, in_what_stride, 0x7fffffff) -- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); -+ /* Check the starting position */ -+ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX) -+ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - -- // search_param determines the length of the initial step and hence the number of iterations -- // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. -+ /* search_param determines the length of the initial step and hence the -+ * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 = -+ * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. -+ */ - ss = &x->ss[search_param * x->searches_per_step]; - tot_steps = (x->ss_count / x->searches_per_step) - search_param; - -@@ -1215,8 +1233,10 @@ int vp8_diamond_search_sadx4 - { - int all_in = 1, t; - -- // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of -- // checking 4 bounds for each points. -+ /* To know if all neighbor points are within the bounds, 4 bounds -+ * checking are enough instead of checking 4 bounds for each -+ * points. -+ */ - all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min); - all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max); - all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min); -@@ -1228,7 +1248,7 @@ int vp8_diamond_search_sadx4 - - for (j = 0 ; j < x->searches_per_step ; j += 4) - { -- unsigned char *block_offset[4]; -+ const unsigned char *block_offset[4]; - - for (t = 0; t < 4; t++) - block_offset[t] = ss[i+t].offset + best_address; -@@ -1257,7 +1277,7 @@ int vp8_diamond_search_sadx4 - { - for (j = 0 ; j < x->searches_per_step ; j++) - { -- // Trap illegal vectors -+ /* Trap illegal vectors */ - this_row_offset = best_mv->as_mv.row + ss[i].mv.row; - this_col_offset = best_mv->as_mv.col + ss[i].mv.col; - -@@ -1265,14 +1285,14 @@ int vp8_diamond_search_sadx4 - (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) - { - check_here = ss[i].offset + best_address; -- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); -+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); - - if (thissad < bestsad) - { - this_mv.as_mv.row = this_row_offset; - this_mv.as_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, -- mvsadcost, sad_per_bit); -+ mvsadcost, sad_per_bit); - - if (thissad < bestsad) - { -@@ -1299,11 +1319,8 @@ int vp8_diamond_search_sadx4 - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; - -- if (bestsad == INT_MAX) -- return INT_MAX; -- -- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) -- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); -+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) -+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); - } - - int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, -@@ -1321,11 +1338,11 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - unsigned char *bestaddress; - int_mv *best_mv = &d->bmi.mv; - int_mv this_mv; -- int bestsad = INT_MAX; -+ unsigned int bestsad; -+ unsigned int thissad; - int r, c; - - unsigned char *check_here; -- int thissad; - - int ref_row = ref_mv->as_mv.row; - int ref_col = ref_mv->as_mv.col; -@@ -1335,24 +1352,29 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - int col_min = ref_col - distance; - int col_max = ref_col + distance; - -- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; -+ int *mvsadcost[2]; - int_mv fcenter_mv; -+ -+ mvsadcost[0] = x->mvsadcost[0]; -+ mvsadcost[1] = x->mvsadcost[1]; - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - -- // Work out the mid point for the search -+ /* Work out the mid point for the search */ - in_what = base_pre + d->offset; - bestaddress = in_what + (ref_row * pre_stride) + ref_col; - - best_mv->as_mv.row = ref_row; - best_mv->as_mv.col = ref_col; - -- // Baseline value at the centre -+ /* Baseline value at the centre */ - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, -- in_what_stride, 0x7fffffff) -- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); -+ in_what_stride, UINT_MAX) -+ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - -- // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border -+ /* Apply further limits to prevent us looking using vectors that -+ * stretch beyiond the UMV border -+ */ - if (col_min < x->mv_col_min) - col_min = x->mv_col_min; - -@@ -1372,11 +1394,11 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - - for (c = col_min; c < col_max; c++) - { -- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); -+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); - - this_mv.as_mv.col = c; -- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, -- mvsadcost, sad_per_bit); -+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv, -+ mvsadcost, sad_per_bit); - - if (thissad < bestsad) - { -@@ -1393,11 +1415,8 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; - -- if (bestsad < INT_MAX) -- return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) -- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); -- else -- return INT_MAX; -+ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) -+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); - } - - int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, -@@ -1415,11 +1434,11 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - unsigned char *bestaddress; - int_mv *best_mv = &d->bmi.mv; - int_mv this_mv; -- unsigned int bestsad = UINT_MAX; -+ unsigned int bestsad; -+ unsigned int thissad; - int r, c; - - unsigned char *check_here; -- unsigned int thissad; - - int ref_row = ref_mv->as_mv.row; - int ref_col = ref_mv->as_mv.col; -@@ -1431,24 +1450,29 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - - unsigned int sad_array[3]; - -- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; -+ int *mvsadcost[2]; - int_mv fcenter_mv; -+ -+ mvsadcost[0] = x->mvsadcost[0]; -+ mvsadcost[1] = x->mvsadcost[1]; - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - -- // Work out the mid point for the search -+ /* Work out the mid point for the search */ - in_what = base_pre + d->offset; - bestaddress = in_what + (ref_row * pre_stride) + ref_col; - - best_mv->as_mv.row = ref_row; - best_mv->as_mv.col = ref_col; - -- // Baseline value at the centre -- bestsad = fn_ptr->sdf(what, what_stride, -- bestaddress, in_what_stride, 0x7fffffff) -- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); -+ /* Baseline value at the centre */ -+ bestsad = fn_ptr->sdf(what, what_stride, bestaddress, -+ in_what_stride, UINT_MAX) -+ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - -- // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border -+ /* Apply further limits to prevent us looking using vectors that stretch -+ * beyond the UMV border -+ */ - if (col_min < x->mv_col_min) - col_min = x->mv_col_min; - -@@ -1471,7 +1495,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - { - int i; - -- fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array); -+ fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); - - for (i = 0; i < 3; i++) - { -@@ -1480,8 +1504,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - if (thissad < bestsad) - { - this_mv.as_mv.col = c; -- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, -- mvsadcost, sad_per_bit); -+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv, -+ mvsadcost, sad_per_bit); - - if (thissad < bestsad) - { -@@ -1499,13 +1523,13 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - - while (c < col_max) - { -- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); -+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); - - if (thissad < bestsad) - { - this_mv.as_mv.col = c; -- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, -- mvsadcost, sad_per_bit); -+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv, -+ mvsadcost, sad_per_bit); - - if (thissad < bestsad) - { -@@ -1525,11 +1549,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; - -- if (bestsad < INT_MAX) -- return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) -- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); -- else -- return INT_MAX; -+ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) -+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); - } - - int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, -@@ -1547,11 +1568,11 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - unsigned char *bestaddress; - int_mv *best_mv = &d->bmi.mv; - int_mv this_mv; -- unsigned int bestsad = UINT_MAX; -+ unsigned int bestsad; -+ unsigned int thissad; - int r, c; - - unsigned char *check_here; -- unsigned int thissad; - - int ref_row = ref_mv->as_mv.row; - int ref_col = ref_mv->as_mv.col; -@@ -1564,24 +1585,29 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8); - unsigned int sad_array[3]; - -- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; -+ int *mvsadcost[2]; - int_mv fcenter_mv; -+ -+ mvsadcost[0] = x->mvsadcost[0]; -+ mvsadcost[1] = x->mvsadcost[1]; - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - -- // Work out the mid point for the search -+ /* Work out the mid point for the search */ - in_what = base_pre + d->offset; - bestaddress = in_what + (ref_row * pre_stride) + ref_col; - - best_mv->as_mv.row = ref_row; - best_mv->as_mv.col = ref_col; - -- // Baseline value at the centre -+ /* Baseline value at the centre */ - bestsad = fn_ptr->sdf(what, what_stride, -- bestaddress, in_what_stride, 0x7fffffff) -- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); -+ bestaddress, in_what_stride, UINT_MAX) -+ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - -- // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border -+ /* Apply further limits to prevent us looking using vectors that stretch -+ * beyond the UMV border -+ */ - if (col_min < x->mv_col_min) - col_min = x->mv_col_min; - -@@ -1604,17 +1630,17 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - { - int i; - -- fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8); -+ fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); - - for (i = 0; i < 8; i++) - { -- thissad = (unsigned int)sad_array8[i]; -+ thissad = sad_array8[i]; - - if (thissad < bestsad) - { - this_mv.as_mv.col = c; -- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, -- mvsadcost, sad_per_bit); -+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv, -+ mvsadcost, sad_per_bit); - - if (thissad < bestsad) - { -@@ -1687,11 +1713,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; - -- if (bestsad < INT_MAX) -- return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) -- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); -- else -- return INT_MAX; -+ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) -+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); - } - - int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, -@@ -1711,17 +1734,21 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv - unsigned char *best_address = (unsigned char *)(base_pre + d->offset + - (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); - unsigned char *check_here; -- unsigned int thissad; - int_mv this_mv; -- unsigned int bestsad = INT_MAX; -+ unsigned int bestsad; -+ unsigned int thissad; - -- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; -+ int *mvsadcost[2]; - int_mv fcenter_mv; - -+ mvsadcost[0] = x->mvsadcost[0]; -+ mvsadcost[1] = x->mvsadcost[1]; - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - -- bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); -+ bestsad = fn_ptr->sdf(what, what_stride, best_address, -+ in_what_stride, UINT_MAX) -+ + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); - - for (i=0; ias_mv.row << 3; - this_mv.as_mv.col = ref_mv->as_mv.col << 3; - -- if (bestsad < INT_MAX) -- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) -- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); -- else -- return INT_MAX; -+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) -+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); - } - - int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, -@@ -1790,17 +1814,21 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - unsigned char *best_address = (unsigned char *)(base_pre + d->offset + - (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); - unsigned char *check_here; -- unsigned int thissad; - int_mv this_mv; -- unsigned int bestsad = INT_MAX; -+ unsigned int bestsad; -+ unsigned int thissad; - -- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; -+ int *mvsadcost[2]; - int_mv fcenter_mv; - -+ mvsadcost[0] = x->mvsadcost[0]; -+ mvsadcost[1] = x->mvsadcost[1]; - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - -- bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); -+ bestsad = fn_ptr->sdf(what, what_stride, best_address, -+ in_what_stride, UINT_MAX) -+ + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); - - for (i=0; ias_mv.row << 3; - this_mv.as_mv.col = ref_mv->as_mv.col << 3; - -- if (bestsad < INT_MAX) -- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) -- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); -- else -- return INT_MAX; -+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) -+ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); - } - - #ifdef ENTROPY_STATS -@@ -1900,16 +1925,16 @@ void print_mode_context(void) - - for (j = 0; j < 6; j++) - { -- fprintf(f, " { // %d \n", j); -+ fprintf(f, " { /* %d */\n", j); - fprintf(f, " "); - - for (i = 0; i < 4; i++) - { - int overal_prob; - int this_prob; -- int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1]; -+ int count; - -- // Overall probs -+ /* Overall probs */ - count = mv_mode_cts[i][0] + mv_mode_cts[i][1]; - - if (count) -@@ -1920,7 +1945,7 @@ void print_mode_context(void) - if (overal_prob == 0) - overal_prob = 1; - -- // context probs -+ /* context probs */ - count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; - - if (count) -@@ -1932,8 +1957,6 @@ void print_mode_context(void) - this_prob = 1; - - fprintf(f, "%5d, ", this_prob); -- //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob); -- //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob); - } - - fprintf(f, " },\n"); -diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h -index cdb0cb6..890113f 100644 ---- a/vp8/encoder/mcomp.h -+++ b/vp8/encoder/mcomp.h -@@ -21,9 +21,16 @@ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]); - #endif - - --#define MAX_MVSEARCH_STEPS 8 // The maximum number of steps in a step search given the largest allowed initial step --#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) // Max full pel mv specified in 1 pel units --#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units -+/* The maximum number of steps in a step search given the largest allowed -+ * initial step -+ */ -+#define MAX_MVSEARCH_STEPS 8 -+ -+/* Max full pel mv specified in 1 pel units */ -+#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) -+ -+/* Maximum size of the first step in full pel units */ -+#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) - - extern void print_mode_context(void); - extern int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight); -diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c -index c636c48..c61563c 100644 ---- a/vp8/encoder/modecosts.c -+++ b/vp8/encoder/modecosts.c -@@ -18,6 +18,8 @@ - void vp8_init_mode_costs(VP8_COMP *c) - { - VP8_COMMON *x = &c->common; -+ struct rd_costs_struct *rd_costs = &c->rd_costs; -+ - { - const vp8_tree_p T = vp8_bmode_tree; - -@@ -29,19 +31,24 @@ void vp8_init_mode_costs(VP8_COMP *c) - - do - { -- vp8_cost_tokens((int *)c->mb.bmode_costs[i][j], x->kf_bmode_prob[i][j], T); -+ vp8_cost_tokens(rd_costs->bmode_costs[i][j], -+ vp8_kf_bmode_prob[i][j], T); - } - while (++j < VP8_BINTRAMODES); - } - while (++i < VP8_BINTRAMODES); - -- vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.bmode_prob, T); -+ vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.bmode_prob, T); - } -- vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.sub_mv_ref_prob, vp8_sub_mv_ref_tree); -+ vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.sub_mv_ref_prob, -+ vp8_sub_mv_ref_tree); - -- vp8_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree); -- vp8_cost_tokens(c->mb.mbmode_cost[0], x->kf_ymode_prob, vp8_kf_ymode_tree); -+ vp8_cost_tokens(rd_costs->mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree); -+ vp8_cost_tokens(rd_costs->mbmode_cost[0], vp8_kf_ymode_prob, -+ vp8_kf_ymode_tree); - -- vp8_cost_tokens(c->mb.intra_uv_mode_cost[1], x->fc.uv_mode_prob, vp8_uv_mode_tree); -- vp8_cost_tokens(c->mb.intra_uv_mode_cost[0], x->kf_uv_mode_prob, vp8_uv_mode_tree); -+ vp8_cost_tokens(rd_costs->intra_uv_mode_cost[1], x->fc.uv_mode_prob, -+ vp8_uv_mode_tree); -+ vp8_cost_tokens(rd_costs->intra_uv_mode_cost[0], vp8_kf_uv_mode_prob, -+ vp8_uv_mode_tree); - } -diff --git a/vp8/encoder/mr_dissim.c b/vp8/encoder/mr_dissim.c -index 7a62a06..71218cc 100644 ---- a/vp8/encoder/mr_dissim.c -+++ b/vp8/encoder/mr_dissim.c -@@ -53,6 +53,7 @@ if(x->mbmi.ref_frame !=INTRA_FRAME) \ - void vp8_cal_dissimilarity(VP8_COMP *cpi) - { - VP8_COMMON *cm = &cpi->common; -+ int i; - - /* Note: The first row & first column in mip are outside the frame, which - * were initialized to all 0.(ref_frame, mode, mv...) -@@ -65,14 +66,25 @@ void vp8_cal_dissimilarity(VP8_COMP *cpi) - /* Store info for show/no-show frames for supporting alt_ref. - * If parent frame is alt_ref, child has one too. - */ -+ LOWER_RES_FRAME_INFO* store_info -+ = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info; -+ -+ store_info->frame_type = cm->frame_type; -+ -+ if(cm->frame_type != KEY_FRAME) -+ { -+ store_info->is_frame_dropped = 0; -+ for (i = 1; i < MAX_REF_FRAMES; i++) -+ store_info->low_res_ref_frames[i] = cpi->current_ref_frames[i]; -+ } -+ - if(cm->frame_type != KEY_FRAME) - { - int mb_row; - int mb_col; - /* Point to beginning of allocated MODE_INFO arrays. */ - MODE_INFO *tmp = cm->mip + cm->mode_info_stride; -- LOWER_RES_INFO* store_mode_info -- = (LOWER_RES_INFO*)cpi->oxcf.mr_low_res_mode_info; -+ LOWER_RES_MB_INFO* store_mode_info = store_info->mb_info; - - for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++) - { -@@ -199,3 +211,26 @@ void vp8_cal_dissimilarity(VP8_COMP *cpi) - } - } - } -+ -+/* This function is called only when this frame is dropped at current -+ resolution level. */ -+void vp8_store_drop_frame_info(VP8_COMP *cpi) -+{ -+ /* If the frame is dropped in lower-resolution encoding, this information -+ is passed to higher resolution level so that the encoder knows there -+ is no mode & motion info available. -+ */ -+ if (cpi->oxcf.mr_total_resolutions >1 -+ && cpi->oxcf.mr_encoder_id < (cpi->oxcf.mr_total_resolutions - 1)) -+ { -+ /* Store info for show/no-show frames for supporting alt_ref. -+ * If parent frame is alt_ref, child has one too. -+ */ -+ LOWER_RES_FRAME_INFO* store_info -+ = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info; -+ -+ /* Set frame_type to be INTER_FRAME since we won't drop key frame. */ -+ store_info->frame_type = INTER_FRAME; -+ store_info->is_frame_dropped = 1; -+ } -+} -diff --git a/vp8/encoder/mr_dissim.h b/vp8/encoder/mr_dissim.h -index 3d2c203..f8cb135 100644 ---- a/vp8/encoder/mr_dissim.h -+++ b/vp8/encoder/mr_dissim.h -@@ -15,5 +15,6 @@ - - extern void vp8_cal_low_res_mb_cols(VP8_COMP *cpi); - extern void vp8_cal_dissimilarity(VP8_COMP *cpi); -+extern void vp8_store_drop_frame_info(VP8_COMP *cpi); - - #endif -diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c -index cee62fa..4680f39 100644 ---- a/vp8/encoder/onyx_if.c -+++ b/vp8/encoder/onyx_if.c -@@ -11,6 +11,7 @@ - - #include "vpx_config.h" - #include "vp8/common/onyxc_int.h" -+#include "vp8/common/blockd.h" - #include "onyx_int.h" - #include "vp8/common/systemdependent.h" - #include "quantize.h" -@@ -55,12 +56,8 @@ extern void vp8_deblock_frame(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *po - extern void print_parms(VP8_CONFIG *ocf, char *filenam); - extern unsigned int vp8_get_processor_freq(); - extern void print_tree_update_probs(); --extern void vp8cx_create_encoder_threads(VP8_COMP *cpi); -+extern int vp8cx_create_encoder_threads(VP8_COMP *cpi); - extern void vp8cx_remove_encoder_threads(VP8_COMP *cpi); --#if HAVE_NEON --extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); --extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); --#endif - - int vp8_estimate_entropy_savings(VP8_COMP *cpi); - -@@ -143,7 +140,7 @@ extern const int qzbin_factors[129]; - extern void vp8cx_init_quantizer(VP8_COMP *cpi); - extern const int vp8cx_base_skip_false_prob[128]; - --// Tables relating active max Q to active min Q -+/* Tables relating active max Q to active min Q */ - static const unsigned char kf_low_motion_minq[QINDEX_RANGE] = - { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -@@ -219,9 +216,8 @@ static void save_layer_context(VP8_COMP *cpi) - { - LAYER_CONTEXT *lc = &cpi->layer_context[cpi->current_layer]; - -- // Save layer dependent coding state -+ /* Save layer dependent coding state */ - lc->target_bandwidth = cpi->target_bandwidth; -- //lc->target_bandwidth = cpi->oxcf.target_bandwidth; - lc->starting_buffer_level = cpi->oxcf.starting_buffer_level; - lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level; - lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size; -@@ -242,7 +238,7 @@ static void save_layer_context(VP8_COMP *cpi) - lc->rate_correction_factor = cpi->rate_correction_factor; - lc->key_frame_rate_correction_factor = cpi->key_frame_rate_correction_factor; - lc->gf_rate_correction_factor = cpi->gf_rate_correction_factor; -- lc->zbin_over_quant = cpi->zbin_over_quant; -+ lc->zbin_over_quant = cpi->mb.zbin_over_quant; - lc->inter_frame_target = cpi->inter_frame_target; - lc->total_byte_count = cpi->total_byte_count; - lc->filter_level = cpi->common.filter_level; -@@ -250,15 +246,15 @@ static void save_layer_context(VP8_COMP *cpi) - lc->last_frame_percent_intra = cpi->last_frame_percent_intra; - - memcpy (lc->count_mb_ref_frame_usage, -- cpi->count_mb_ref_frame_usage, -- sizeof(cpi->count_mb_ref_frame_usage)); -+ cpi->mb.count_mb_ref_frame_usage, -+ sizeof(cpi->mb.count_mb_ref_frame_usage)); - } - - static void restore_layer_context(VP8_COMP *cpi, const int layer) - { - LAYER_CONTEXT *lc = &cpi->layer_context[layer]; - -- // Restore layer dependent coding state -+ /* Restore layer dependent coding state */ - cpi->current_layer = layer; - cpi->target_bandwidth = lc->target_bandwidth; - cpi->oxcf.target_bandwidth = lc->target_bandwidth; -@@ -271,9 +267,7 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer) - cpi->buffer_level = lc->buffer_level; - cpi->bits_off_target = lc->bits_off_target; - cpi->total_actual_bits = lc->total_actual_bits; -- //cpi->worst_quality = lc->worst_quality; - cpi->active_worst_quality = lc->active_worst_quality; -- //cpi->best_quality = lc->best_quality; - cpi->active_best_quality = lc->active_best_quality; - cpi->ni_av_qi = lc->ni_av_qi; - cpi->ni_tot_qi = lc->ni_tot_qi; -@@ -282,26 +276,31 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer) - cpi->rate_correction_factor = lc->rate_correction_factor; - cpi->key_frame_rate_correction_factor = lc->key_frame_rate_correction_factor; - cpi->gf_rate_correction_factor = lc->gf_rate_correction_factor; -- cpi->zbin_over_quant = lc->zbin_over_quant; -+ cpi->mb.zbin_over_quant = lc->zbin_over_quant; - cpi->inter_frame_target = lc->inter_frame_target; - cpi->total_byte_count = lc->total_byte_count; - cpi->common.filter_level = lc->filter_level; - - cpi->last_frame_percent_intra = lc->last_frame_percent_intra; - -- memcpy (cpi->count_mb_ref_frame_usage, -+ memcpy (cpi->mb.count_mb_ref_frame_usage, - lc->count_mb_ref_frame_usage, -- sizeof(cpi->count_mb_ref_frame_usage)); -+ sizeof(cpi->mb.count_mb_ref_frame_usage)); - } - - static void setup_features(VP8_COMP *cpi) - { -- // Set up default state for MB feature flags -- cpi->mb.e_mbd.segmentation_enabled = 0; -- cpi->mb.e_mbd.update_mb_segmentation_map = 0; -- cpi->mb.e_mbd.update_mb_segmentation_data = 0; -- vpx_memset(cpi->mb.e_mbd.mb_segment_tree_probs, 255, sizeof(cpi->mb.e_mbd.mb_segment_tree_probs)); -- vpx_memset(cpi->mb.e_mbd.segment_feature_data, 0, sizeof(cpi->mb.e_mbd.segment_feature_data)); -+ // If segmentation enabled set the update flags -+ if ( cpi->mb.e_mbd.segmentation_enabled ) -+ { -+ cpi->mb.e_mbd.update_mb_segmentation_map = 1; -+ cpi->mb.e_mbd.update_mb_segmentation_data = 1; -+ } -+ else -+ { -+ cpi->mb.e_mbd.update_mb_segmentation_map = 0; -+ cpi->mb.e_mbd.update_mb_segmentation_data = 0; -+ } - - cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 0; - cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; -@@ -323,7 +322,7 @@ static void dealloc_compressor_data(VP8_COMP *cpi) - vpx_free(cpi->tplist); - cpi->tplist = NULL; - -- // Delete last frame MV storage buffers -+ /* Delete last frame MV storage buffers */ - vpx_free(cpi->lfmv); - cpi->lfmv = 0; - -@@ -333,7 +332,7 @@ static void dealloc_compressor_data(VP8_COMP *cpi) - vpx_free(cpi->lf_ref_frame); - cpi->lf_ref_frame = 0; - -- // Delete sementation map -+ /* Delete sementation map */ - vpx_free(cpi->segmentation_map); - cpi->segmentation_map = 0; - -@@ -349,53 +348,61 @@ static void dealloc_compressor_data(VP8_COMP *cpi) - vpx_free(cpi->tok); - cpi->tok = 0; - -- // Structure used to monitor GF usage -+ /* Structure used to monitor GF usage */ - vpx_free(cpi->gf_active_flags); - cpi->gf_active_flags = 0; - -- // Activity mask based per mb zbin adjustments -+ /* Activity mask based per mb zbin adjustments */ - vpx_free(cpi->mb_activity_map); - cpi->mb_activity_map = 0; -- vpx_free(cpi->mb_norm_activity_map); -- cpi->mb_norm_activity_map = 0; - - vpx_free(cpi->mb.pip); - cpi->mb.pip = 0; -+ -+#if CONFIG_MULTITHREAD -+ vpx_free(cpi->mt_current_mb_col); -+ cpi->mt_current_mb_col = NULL; -+#endif - } - - static void enable_segmentation(VP8_COMP *cpi) - { -- // Set the appropriate feature bit -+ /* Set the appropriate feature bit */ - cpi->mb.e_mbd.segmentation_enabled = 1; - cpi->mb.e_mbd.update_mb_segmentation_map = 1; - cpi->mb.e_mbd.update_mb_segmentation_data = 1; - } - static void disable_segmentation(VP8_COMP *cpi) - { -- // Clear the appropriate feature bit -+ /* Clear the appropriate feature bit */ - cpi->mb.e_mbd.segmentation_enabled = 0; - } - --// Valid values for a segment are 0 to 3 --// Segmentation map is arrange as [Rows][Columns] -+/* Valid values for a segment are 0 to 3 -+ * Segmentation map is arrange as [Rows][Columns] -+ */ - static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map) - { -- // Copy in the new segmentation map -+ /* Copy in the new segmentation map */ - vpx_memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols)); - -- // Signal that the map should be updated. -+ /* Signal that the map should be updated. */ - cpi->mb.e_mbd.update_mb_segmentation_map = 1; - cpi->mb.e_mbd.update_mb_segmentation_data = 1; - } - --// The values given for each segment can be either deltas (from the default value chosen for the frame) or absolute values. --// --// Valid range for abs values is (0-127 for MB_LVL_ALT_Q) , (0-63 for SEGMENT_ALT_LF) --// Valid range for delta values are (+/-127 for MB_LVL_ALT_Q) , (+/-63 for SEGMENT_ALT_LF) --// --// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use the absolute values given). --// --// -+/* The values given for each segment can be either deltas (from the default -+ * value chosen for the frame) or absolute values. -+ * -+ * Valid range for abs values is: -+ * (0-127 for MB_LVL_ALT_Q), (0-63 for SEGMENT_ALT_LF) -+ * Valid range for delta values are: -+ * (+/-127 for MB_LVL_ALT_Q), (+/-63 for SEGMENT_ALT_LF) -+ * -+ * abs_delta = SEGMENT_DELTADATA (deltas) -+ * abs_delta = SEGMENT_ABSDATA (use the absolute values given). -+ * -+ */ - static void set_segment_data(VP8_COMP *cpi, signed char *feature_data, unsigned char abs_delta) - { - cpi->mb.e_mbd.mb_segement_abs_delta = abs_delta; -@@ -411,26 +418,6 @@ static void segmentation_test_function(VP8_COMP *cpi) - // Create a temporary map for segmentation data. - CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); - -- // MB loop to set local segmentation map -- /*for ( i = 0; i < cpi->common.mb_rows; i++ ) -- { -- for ( j = 0; j < cpi->common.mb_cols; j++ ) -- { -- //seg_map[(i*cpi->common.mb_cols) + j] = (j % 2) + ((i%2)* 2); -- //if ( j < cpi->common.mb_cols/2 ) -- -- // Segment 1 around the edge else 0 -- if ( (i == 0) || (j == 0) || (i == (cpi->common.mb_rows-1)) || (j == (cpi->common.mb_cols-1)) ) -- seg_map[(i*cpi->common.mb_cols) + j] = 1; -- //else if ( (i < 2) || (j < 2) || (i > (cpi->common.mb_rows-3)) || (j > (cpi->common.mb_cols-3)) ) -- // seg_map[(i*cpi->common.mb_cols) + j] = 2; -- //else if ( (i < 5) || (j < 5) || (i > (cpi->common.mb_rows-6)) || (j > (cpi->common.mb_cols-6)) ) -- // seg_map[(i*cpi->common.mb_cols) + j] = 3; -- else -- seg_map[(i*cpi->common.mb_cols) + j] = 0; -- } -- }*/ -- - // Set the segmentation Map - set_segmentation_map(cpi, seg_map); - -@@ -453,103 +440,78 @@ static void segmentation_test_function(VP8_COMP *cpi) - set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA); - - // Delete sementation map -- vpx_free(seg_map); -+ vpx_free(seg_map); - - seg_map = 0; -- - } - --// A simple function to cyclically refresh the background at a lower Q -+/* A simple function to cyclically refresh the background at a lower Q */ - static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment) - { -- unsigned char *seg_map; -+ unsigned char *seg_map = cpi->segmentation_map; - signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; - int i; - int block_count = cpi->cyclic_refresh_mode_max_mbs_perframe; - int mbs_in_frame = cpi->common.mb_rows * cpi->common.mb_cols; - -- // Create a temporary map for segmentation data. -- CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); -+ cpi->cyclic_refresh_q = Q / 2; - -- cpi->cyclic_refresh_q = Q; -+ // Set every macroblock to be eligible for update. -+ // For key frame this will reset seg map to 0. -+ vpx_memset(cpi->segmentation_map, 0, mbs_in_frame); - -- for (i = Q; i > 0; i--) -- { -- if (vp8_bits_per_mb[cpi->common.frame_type][i] >= ((vp8_bits_per_mb[cpi->common.frame_type][Q]*(Q + 128)) / 64)) -- //if ( vp8_bits_per_mb[cpi->common.frame_type][i] >= ((vp8_bits_per_mb[cpi->common.frame_type][Q]*((2*Q)+96))/64) ) -- { -- break; -- } -- } -- -- cpi->cyclic_refresh_q = i; -- -- // Only update for inter frames - if (cpi->common.frame_type != KEY_FRAME) - { -- // Cycle through the macro_block rows -- // MB loop to set local segmentation map -- for (i = cpi->cyclic_refresh_mode_index; i < mbs_in_frame; i++) -+ /* Cycle through the macro_block rows */ -+ /* MB loop to set local segmentation map */ -+ i = cpi->cyclic_refresh_mode_index; -+ assert(i < mbs_in_frame); -+ do - { -- // If the MB is as a candidate for clean up then mark it for possible boost/refresh (segment 1) -- // The segment id may get reset to 0 later if the MB gets coded anything other than last frame 0,0 -- // as only (last frame 0,0) MBs are eligable for refresh : that is to say Mbs likely to be background blocks. -- if (cpi->cyclic_refresh_map[i] == 0) -- { -- seg_map[i] = 1; -- } -- else -- { -- seg_map[i] = 0; -- -- // Skip blocks that have been refreshed recently anyway. -- if (cpi->cyclic_refresh_map[i] < 0) -- //cpi->cyclic_refresh_map[i] = cpi->cyclic_refresh_map[i] / 16; -- cpi->cyclic_refresh_map[i]++; -- } -- -- -- if (block_count > 0) -- block_count--; -- else -- break; -+ /* If the MB is as a candidate for clean up then mark it for -+ * possible boost/refresh (segment 1) The segment id may get -+ * reset to 0 later if the MB gets coded anything other than -+ * last frame 0,0 as only (last frame 0,0) MBs are eligable for -+ * refresh : that is to say Mbs likely to be background blocks. -+ */ -+ if (cpi->cyclic_refresh_map[i] == 0) -+ { -+ seg_map[i] = 1; -+ block_count --; -+ } -+ else if (cpi->cyclic_refresh_map[i] < 0) -+ cpi->cyclic_refresh_map[i]++; -+ -+ i++; -+ if (i == mbs_in_frame) -+ i = 0; - - } -+ while(block_count && i != cpi->cyclic_refresh_mode_index); - -- // If we have gone through the frame reset to the start - cpi->cyclic_refresh_mode_index = i; -- -- if (cpi->cyclic_refresh_mode_index >= mbs_in_frame) -- cpi->cyclic_refresh_mode_index = 0; - } - -- // Set the segmentation Map -- set_segmentation_map(cpi, seg_map); -- -- // Activate segmentation. -+ /* Activate segmentation. */ -+ cpi->mb.e_mbd.update_mb_segmentation_map = 1; -+ cpi->mb.e_mbd.update_mb_segmentation_data = 1; - enable_segmentation(cpi); - -- // Set up the quant segment data -+ /* Set up the quant segment data */ - feature_data[MB_LVL_ALT_Q][0] = 0; - feature_data[MB_LVL_ALT_Q][1] = (cpi->cyclic_refresh_q - Q); - feature_data[MB_LVL_ALT_Q][2] = 0; - feature_data[MB_LVL_ALT_Q][3] = 0; - -- // Set up the loop segment data -+ /* Set up the loop segment data */ - feature_data[MB_LVL_ALT_LF][0] = 0; - feature_data[MB_LVL_ALT_LF][1] = lf_adjustment; - feature_data[MB_LVL_ALT_LF][2] = 0; - feature_data[MB_LVL_ALT_LF][3] = 0; - -- // Initialise the feature data structure -- // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1 -+ /* Initialise the feature data structure */ - set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA); - -- // Delete sementation map -- vpx_free(seg_map); -- -- seg_map = 0; -- - } - - static void set_default_lf_deltas(VP8_COMP *cpi) -@@ -560,16 +522,21 @@ static void set_default_lf_deltas(VP8_COMP *cpi) - vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); - vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); - -- // Test of ref frame deltas -+ /* Test of ref frame deltas */ - cpi->mb.e_mbd.ref_lf_deltas[INTRA_FRAME] = 2; - cpi->mb.e_mbd.ref_lf_deltas[LAST_FRAME] = 0; - cpi->mb.e_mbd.ref_lf_deltas[GOLDEN_FRAME] = -2; - cpi->mb.e_mbd.ref_lf_deltas[ALTREF_FRAME] = -2; - -- cpi->mb.e_mbd.mode_lf_deltas[0] = 4; // BPRED -- cpi->mb.e_mbd.mode_lf_deltas[1] = -2; // Zero -- cpi->mb.e_mbd.mode_lf_deltas[2] = 2; // New mv -- cpi->mb.e_mbd.mode_lf_deltas[3] = 4; // Split mv -+ cpi->mb.e_mbd.mode_lf_deltas[0] = 4; /* BPRED */ -+ -+ if(cpi->oxcf.Mode == MODE_REALTIME) -+ cpi->mb.e_mbd.mode_lf_deltas[1] = -12; /* Zero */ -+ else -+ cpi->mb.e_mbd.mode_lf_deltas[1] = -2; /* Zero */ -+ -+ cpi->mb.e_mbd.mode_lf_deltas[2] = 2; /* New mv */ -+ cpi->mb.e_mbd.mode_lf_deltas[3] = 4; /* Split mv */ - } - - /* Convenience macros for mapping speed and mode into a continuous -@@ -669,17 +636,16 @@ void vp8_set_speed_features(VP8_COMP *cpi) - int last_improved_quant = sf->improved_quant; - int ref_frames; - -- // Initialise default mode frequency sampling variables -+ /* Initialise default mode frequency sampling variables */ - for (i = 0; i < MAX_MODES; i ++) - { - cpi->mode_check_freq[i] = 0; -- cpi->mode_test_hit_counts[i] = 0; - cpi->mode_chosen_counts[i] = 0; - } - -- cpi->mbs_tested_so_far = 0; -+ cpi->mb.mbs_tested_so_far = 0; - -- // best quality defaults -+ /* best quality defaults */ - sf->RD = 1; - sf->search_method = NSTEP; - sf->improved_quant = 1; -@@ -697,17 +663,17 @@ void vp8_set_speed_features(VP8_COMP *cpi) - sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - sf->improved_mv_pred = 1; - -- // default thresholds to 0 -+ /* default thresholds to 0 */ - for (i = 0; i < MAX_MODES; i++) - sf->thresh_mult[i] = 0; - - /* Count enabled references */ - ref_frames = 1; -- if (cpi->ref_frame_flags & VP8_LAST_FLAG) -+ if (cpi->ref_frame_flags & VP8_LAST_FRAME) - ref_frames++; -- if (cpi->ref_frame_flags & VP8_GOLD_FLAG) -+ if (cpi->ref_frame_flags & VP8_GOLD_FRAME) - ref_frames++; -- if (cpi->ref_frame_flags & VP8_ALT_FLAG) -+ if (cpi->ref_frame_flags & VP8_ALTR_FRAME) - ref_frames++; - - /* Convert speed to continuous range, with clamping */ -@@ -779,7 +745,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) - switch (Mode) - { - #if !(CONFIG_REALTIME_ONLY) -- case 0: // best quality mode -+ case 0: /* best quality mode */ - sf->first_step = 0; - sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - break; -@@ -800,8 +766,9 @@ void vp8_set_speed_features(VP8_COMP *cpi) - sf->improved_quant = 0; - sf->improved_dct = 0; - -- // Only do recode loop on key frames, golden frames and -- // alt ref frames -+ /* Only do recode loop on key frames, golden frames and -+ * alt ref frames -+ */ - sf->recode_loop = 2; - - } -@@ -809,14 +776,14 @@ void vp8_set_speed_features(VP8_COMP *cpi) - if (Speed > 3) - { - sf->auto_filter = 1; -- sf->recode_loop = 0; // recode loop off -- sf->RD = 0; // Turn rd off -+ sf->recode_loop = 0; /* recode loop off */ -+ sf->RD = 0; /* Turn rd off */ - - } - - if (Speed > 4) - { -- sf->auto_filter = 0; // Faster selection of loop filter -+ sf->auto_filter = 0; /* Faster selection of loop filter */ - } - - break; -@@ -839,7 +806,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) - } - - if (Speed > 2) -- sf->auto_filter = 0; // Faster selection of loop filter -+ sf->auto_filter = 0; /* Faster selection of loop filter */ - - if (Speed > 3) - { -@@ -849,7 +816,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) - - if (Speed > 4) - { -- sf->auto_filter = 0; // Faster selection of loop filter -+ sf->auto_filter = 0; /* Faster selection of loop filter */ - sf->search_method = HEX; - sf->iterative_sub_pixel = 0; - } -@@ -870,16 +837,16 @@ void vp8_set_speed_features(VP8_COMP *cpi) - - for (i = 0; i < min; i++) - { -- sum += cpi->error_bins[i]; -+ sum += cpi->mb.error_bins[i]; - } - - total_skip = sum; - sum = 0; - -- // i starts from 2 to make sure thresh started from 2048 -+ /* i starts from 2 to make sure thresh started from 2048 */ - for (; i < 1024; i++) - { -- sum += cpi->error_bins[i]; -+ sum += cpi->mb.error_bins[i]; - - if (10 * sum >= (unsigned int)(cpi->Speed - 6)*(total_mbs - total_skip)) - break; -@@ -930,16 +897,17 @@ void vp8_set_speed_features(VP8_COMP *cpi) - cm->filter_type = SIMPLE_LOOPFILTER; - } - -- // This has a big hit on quality. Last resort -+ /* This has a big hit on quality. Last resort */ - if (Speed >= 15) - sf->half_pixel_search = 0; - -- vpx_memset(cpi->error_bins, 0, sizeof(cpi->error_bins)); -+ vpx_memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins)); - - }; /* switch */ - -- // Slow quant, dct and trellis not worthwhile for first pass -- // so make sure they are always turned off. -+ /* Slow quant, dct and trellis not worthwhile for first pass -+ * so make sure they are always turned off. -+ */ - if ( cpi->pass == 1 ) - { - sf->improved_quant = 0; -@@ -1107,27 +1075,46 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) - CHECK_MEM_ERROR(cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok))); - } - -- // Data used for real time vc mode to see if gf needs refreshing -- cpi->inter_zz_count = 0; -- cpi->gf_bad_count = 0; -- cpi->gf_update_recommended = 0; -+ /* Data used for real time vc mode to see if gf needs refreshing */ -+ cpi->zeromv_count = 0; - - -- // Structures used to minitor GF usage -+ /* Structures used to monitor GF usage */ - vpx_free(cpi->gf_active_flags); - CHECK_MEM_ERROR(cpi->gf_active_flags, -- vpx_calloc(1, cm->mb_rows * cm->mb_cols)); -+ vpx_calloc(sizeof(*cpi->gf_active_flags), -+ cm->mb_rows * cm->mb_cols)); - cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - - vpx_free(cpi->mb_activity_map); - CHECK_MEM_ERROR(cpi->mb_activity_map, -- vpx_calloc(sizeof(unsigned int), -+ vpx_calloc(sizeof(*cpi->mb_activity_map), - cm->mb_rows * cm->mb_cols)); - -- vpx_free(cpi->mb_norm_activity_map); -- CHECK_MEM_ERROR(cpi->mb_norm_activity_map, -- vpx_calloc(sizeof(unsigned int), -- cm->mb_rows * cm->mb_cols)); -+ /* allocate memory for storing last frame's MVs for MV prediction. */ -+ vpx_free(cpi->lfmv); -+ CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2), -+ sizeof(*cpi->lfmv))); -+ vpx_free(cpi->lf_ref_frame_sign_bias); -+ CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, -+ vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2), -+ sizeof(*cpi->lf_ref_frame_sign_bias))); -+ vpx_free(cpi->lf_ref_frame); -+ CHECK_MEM_ERROR(cpi->lf_ref_frame, -+ vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2), -+ sizeof(*cpi->lf_ref_frame))); -+ -+ /* Create the encoder segmentation map and set all entries to 0 */ -+ vpx_free(cpi->segmentation_map); -+ CHECK_MEM_ERROR(cpi->segmentation_map, -+ vpx_calloc(cm->mb_rows * cm->mb_cols, -+ sizeof(*cpi->segmentation_map))); -+ cpi->cyclic_refresh_mode_index = 0; -+ vpx_free(cpi->active_map); -+ CHECK_MEM_ERROR(cpi->active_map, -+ vpx_calloc(cm->mb_rows * cm->mb_cols, -+ sizeof(*cpi->active_map))); -+ vpx_memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols)); - - #if CONFIG_MULTITHREAD - if (width < 640) -@@ -1138,15 +1125,22 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) - cpi->mt_sync_range = 8; - else - cpi->mt_sync_range = 16; -+ -+ if (cpi->oxcf.multi_threaded > 1) -+ { -+ vpx_free(cpi->mt_current_mb_col); -+ CHECK_MEM_ERROR(cpi->mt_current_mb_col, -+ vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows)); -+ } -+ - #endif - - vpx_free(cpi->tplist); -- -- CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows)); -+ CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cm->mb_rows)); - } - - --// Quant MOD -+/* Quant MOD */ - static const int q_trans[] = - { - 0, 1, 2, 3, 4, 5, 7, 8, -@@ -1168,7 +1162,7 @@ int vp8_reverse_trans(int x) - return i; - - return 63; --}; -+} - void vp8_new_frame_rate(VP8_COMP *cpi, double framerate) - { - if(framerate < .1) -@@ -1182,16 +1176,16 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate) - cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * - cpi->oxcf.two_pass_vbrmin_section / 100); - -- // Set Maximum gf/arf interval -+ /* Set Maximum gf/arf interval */ - cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2); - - if(cpi->max_gf_interval < 12) - cpi->max_gf_interval = 12; - -- // Extended interval for genuinely static scenes -+ /* Extended interval for genuinely static scenes */ - cpi->twopass.static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; - -- // Special conditions when altr ref frame enabled in lagged compress mode -+ /* Special conditions when altr ref frame enabled in lagged compress mode */ - if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames) - { - if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1) -@@ -1213,7 +1207,7 @@ rescale(int val, int num, int denom) - int64_t llden = denom; - int64_t llval = val; - -- return llval * llnum / llden; -+ return (int)(llval * llnum / llden); - } - - -@@ -1225,7 +1219,6 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - - cpi->auto_gold = 1; - cpi->auto_adjust_gold_quantizer = 1; -- cpi->goldfreq = 7; - - cm->version = oxcf->Version; - vp8_setup_version(cm); -@@ -1244,15 +1237,15 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - - cpi->ref_frame_rate = cpi->frame_rate; - -- // change includes all joint functionality -+ /* change includes all joint functionality */ - vp8_change_config(cpi, oxcf); - -- // Initialize active best and worst q and average q values. -+ /* Initialize active best and worst q and average q values. */ - cpi->active_worst_quality = cpi->oxcf.worst_allowed_q; - cpi->active_best_quality = cpi->oxcf.best_allowed_q; - cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q; - -- // Initialise the starting buffer levels -+ /* Initialise the starting buffer levels */ - cpi->buffer_level = cpi->oxcf.starting_buffer_level; - cpi->bits_off_target = cpi->oxcf.starting_buffer_level; - -@@ -1264,7 +1257,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - cpi->total_actual_bits = 0; - cpi->total_target_vs_actual = 0; - -- // Temporal scalabilty -+ /* Temporal scalabilty */ - if (cpi->oxcf.number_of_layers > 1) - { - unsigned int i; -@@ -1274,7 +1267,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - { - LAYER_CONTEXT *lc = &cpi->layer_context[i]; - -- // Layer configuration -+ /* Layer configuration */ - lc->frame_rate = - cpi->output_frame_rate / cpi->oxcf.rate_decimator[i]; - lc->target_bandwidth = cpi->oxcf.target_bitrate[i] * 1000; -@@ -1284,28 +1277,29 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - lc->maximum_buffer_size_in_ms = oxcf->maximum_buffer_size; - - lc->starting_buffer_level = -- rescale(oxcf->starting_buffer_level, -+ rescale((int)(oxcf->starting_buffer_level), - lc->target_bandwidth, 1000); - - if (oxcf->optimal_buffer_level == 0) - lc->optimal_buffer_level = lc->target_bandwidth / 8; - else - lc->optimal_buffer_level = -- rescale(oxcf->optimal_buffer_level, -+ rescale((int)(oxcf->optimal_buffer_level), - lc->target_bandwidth, 1000); - - if (oxcf->maximum_buffer_size == 0) - lc->maximum_buffer_size = lc->target_bandwidth / 8; - else - lc->maximum_buffer_size = -- rescale(oxcf->maximum_buffer_size, -+ rescale((int)oxcf->maximum_buffer_size, - lc->target_bandwidth, 1000); - -- // Work out the average size of a frame within this layer -+ /* Work out the average size of a frame within this layer */ - if (i > 0) -- lc->avg_frame_size_for_layer = (cpi->oxcf.target_bitrate[i] - -- cpi->oxcf.target_bitrate[i-1]) * 1000 / -- (lc->frame_rate - prev_layer_frame_rate); -+ lc->avg_frame_size_for_layer = -+ (int)((cpi->oxcf.target_bitrate[i] - -+ cpi->oxcf.target_bitrate[i-1]) * 1000 / -+ (lc->frame_rate - prev_layer_frame_rate)); - - lc->active_worst_quality = cpi->oxcf.worst_allowed_q; - lc->active_best_quality = cpi->oxcf.best_allowed_q; -@@ -1321,7 +1315,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - lc->rate_correction_factor = 1.0; - lc->key_frame_rate_correction_factor = 1.0; - lc->gf_rate_correction_factor = 1.0; -- lc->inter_frame_target = 0.0; -+ lc->inter_frame_target = 0; - - prev_layer_frame_rate = lc->frame_rate; - } -@@ -1358,32 +1352,29 @@ static void update_layer_contexts (VP8_COMP *cpi) - lc->target_bandwidth = oxcf->target_bitrate[i] * 1000; - - lc->starting_buffer_level = rescale( -- oxcf->starting_buffer_level_in_ms, -+ (int)oxcf->starting_buffer_level_in_ms, - lc->target_bandwidth, 1000); - - if (oxcf->optimal_buffer_level == 0) - lc->optimal_buffer_level = lc->target_bandwidth / 8; - else - lc->optimal_buffer_level = rescale( -- oxcf->optimal_buffer_level_in_ms, -+ (int)oxcf->optimal_buffer_level_in_ms, - lc->target_bandwidth, 1000); - - if (oxcf->maximum_buffer_size == 0) - lc->maximum_buffer_size = lc->target_bandwidth / 8; - else - lc->maximum_buffer_size = rescale( -- oxcf->maximum_buffer_size_in_ms, -+ (int)oxcf->maximum_buffer_size_in_ms, - lc->target_bandwidth, 1000); - -- // Work out the average size of a frame within this layer -+ /* Work out the average size of a frame within this layer */ - if (i > 0) -- lc->avg_frame_size_for_layer = (oxcf->target_bitrate[i] - -- oxcf->target_bitrate[i-1]) * 1000 / -- (lc->frame_rate - prev_layer_frame_rate); -- -- lc->active_worst_quality = oxcf->worst_allowed_q; -- lc->active_best_quality = oxcf->best_allowed_q; -- lc->avg_frame_qindex = oxcf->worst_allowed_q; -+ lc->avg_frame_size_for_layer = -+ (int)((oxcf->target_bitrate[i] - -+ oxcf->target_bitrate[i-1]) * 1000 / -+ (lc->frame_rate - prev_layer_frame_rate)); - - prev_layer_frame_rate = lc->frame_rate; - } -@@ -1514,10 +1505,8 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - cpi->baseline_gf_interval = - cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL; - -- cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG; -+ cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME; - -- //cpi->use_golden_frame_only = 0; -- //cpi->use_last_frame_only = 0; - cm->refresh_golden_frame = 0; - cm->refresh_last_frame = 1; - cm->refresh_entropy_probs = 1; -@@ -1539,11 +1528,11 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout; - } - -- // At the moment the first order values may not be > MAXQ -+ /* At the moment the first order values may not be > MAXQ */ - if (cpi->oxcf.fixed_q > MAXQ) - cpi->oxcf.fixed_q = MAXQ; - -- // local file playback mode == really big buffer -+ /* local file playback mode == really big buffer */ - if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) - { - cpi->oxcf.starting_buffer_level = 60000; -@@ -1554,41 +1543,41 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - cpi->oxcf.maximum_buffer_size_in_ms = 240000; - } - -- // Convert target bandwidth from Kbit/s to Bit/s -+ /* Convert target bandwidth from Kbit/s to Bit/s */ - cpi->oxcf.target_bandwidth *= 1000; - - cpi->oxcf.starting_buffer_level = -- rescale(cpi->oxcf.starting_buffer_level, -+ rescale((int)cpi->oxcf.starting_buffer_level, - cpi->oxcf.target_bandwidth, 1000); - -- // Set or reset optimal and maximum buffer levels. -+ /* Set or reset optimal and maximum buffer levels. */ - if (cpi->oxcf.optimal_buffer_level == 0) - cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; - else - cpi->oxcf.optimal_buffer_level = -- rescale(cpi->oxcf.optimal_buffer_level, -+ rescale((int)cpi->oxcf.optimal_buffer_level, - cpi->oxcf.target_bandwidth, 1000); - - if (cpi->oxcf.maximum_buffer_size == 0) - cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; - else - cpi->oxcf.maximum_buffer_size = -- rescale(cpi->oxcf.maximum_buffer_size, -+ rescale((int)cpi->oxcf.maximum_buffer_size, - cpi->oxcf.target_bandwidth, 1000); - -- // Set up frame rate and related parameters rate control values. -+ /* Set up frame rate and related parameters rate control values. */ - vp8_new_frame_rate(cpi, cpi->frame_rate); - -- // Set absolute upper and lower quality limits -+ /* Set absolute upper and lower quality limits */ - cpi->worst_quality = cpi->oxcf.worst_allowed_q; - cpi->best_quality = cpi->oxcf.best_allowed_q; - -- // active values should only be modified if out of new range -+ /* active values should only be modified if out of new range */ - if (cpi->active_worst_quality > cpi->oxcf.worst_allowed_q) - { - cpi->active_worst_quality = cpi->oxcf.worst_allowed_q; - } -- // less likely -+ /* less likely */ - else if (cpi->active_worst_quality < cpi->oxcf.best_allowed_q) - { - cpi->active_worst_quality = cpi->oxcf.best_allowed_q; -@@ -1597,7 +1586,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - { - cpi->active_best_quality = cpi->oxcf.best_allowed_q; - } -- // less likely -+ /* less likely */ - else if (cpi->active_best_quality > cpi->oxcf.worst_allowed_q) - { - cpi->active_best_quality = cpi->oxcf.worst_allowed_q; -@@ -1607,14 +1596,9 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - - cpi->cq_target_quality = cpi->oxcf.cq_level; - -- // Only allow dropped frames in buffered mode -+ /* Only allow dropped frames in buffered mode */ - cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode; - -- if (!cm->use_bilinear_mc_filter) -- cm->mcomp_filter_type = SIXTAP; -- else -- cm->mcomp_filter_type = BILINEAR; -- - cpi->target_bandwidth = cpi->oxcf.target_bandwidth; - - -@@ -1627,7 +1611,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - * correct. - */ - -- // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) -+ /* VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) */ - if (cpi->oxcf.Sharpness > 7) - cpi->oxcf.Sharpness = 7; - -@@ -1641,7 +1625,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - Scale2Ratio(cm->horiz_scale, &hr, &hs); - Scale2Ratio(cm->vert_scale, &vr, &vs); - -- // always go to the next whole number -+ /* always go to the next whole number */ - cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs; - cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs; - } -@@ -1655,6 +1639,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - cm->yv12_fb[cm->lst_fb_idx].y_height || - cm->yv12_fb[cm->lst_fb_idx].y_width == 0) - { -+ dealloc_raw_frame_buffers(cpi); - alloc_raw_frame_buffers(cpi); - vp8_alloc_compressor_data(cpi); - } -@@ -1667,16 +1652,16 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - - cpi->Speed = cpi->oxcf.cpu_used; - -- // force to allowlag to 0 if lag_in_frames is 0; -+ /* force to allowlag to 0 if lag_in_frames is 0; */ - if (cpi->oxcf.lag_in_frames == 0) - { - cpi->oxcf.allow_lag = 0; - } -- // Limit on lag buffers as these are not currently dynamically allocated -+ /* Limit on lag buffers as these are not currently dynamically allocated */ - else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS) - cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS; - -- // YX Temp -+ /* YX Temp */ - cpi->alt_ref_source = NULL; - cpi->is_src_frame_alt_ref = 0; - -@@ -1693,7 +1678,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) - #endif - - #if 0 -- // Experimental RD Code -+ /* Experimental RD Code */ - cpi->frame_distortion = 0; - cpi->last_frame_distortion = 0; - #endif -@@ -1728,7 +1713,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) - VP8_COMMON *cm; - - cpi = vpx_memalign(32, sizeof(VP8_COMP)); -- // Check that the CPI instance is valid -+ /* Check that the CPI instance is valid */ - if (!cpi) - return 0; - -@@ -1762,14 +1747,15 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) - cpi->prob_gf_coded = 128; - cpi->prob_intra_coded = 63; - -- // Prime the recent reference frame usage counters. -- // Hereafter they will be maintained as a sort of moving average -+ /* Prime the recent reference frame usage counters. -+ * Hereafter they will be maintained as a sort of moving average -+ */ - cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; - cpi->recent_ref_frame_usage[LAST_FRAME] = 1; - cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1; - cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1; - -- // Set reference frame sign bias for ALTREF frame to 1 (for now) -+ /* Set reference frame sign bias for ALTREF frame to 1 (for now) */ - cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1; - - cpi->twopass.gf_decay_rate = 0; -@@ -1779,21 +1765,12 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) - cpi->alt_is_last = 0 ; - cpi->gold_is_alt = 0 ; - -- // allocate memory for storing last frame's MVs for MV prediction. -- CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int_mv))); -- CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int))); -- CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int))); -- -- // Create the encoder segmentation map and set all entries to 0 -- CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); -- CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); -- vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols)); - cpi->active_map_enabled = 0; - - #if 0 -- // Experimental code for lagged and one pass -- // Initialise one_pass GF frames stats -- // Update stats used for GF selection -+ /* Experimental code for lagged and one pass */ -+ /* Initialise one_pass GF frames stats */ -+ /* Update stats used for GF selection */ - if (cpi->pass == 0) - { - cpi->one_pass_frame_index = 0; -@@ -1813,10 +1790,11 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) - } - #endif - -- // Should we use the cyclic refresh method. -- // Currently this is tied to error resilliant mode -+ /* Should we use the cyclic refresh method. -+ * Currently this is tied to error resilliant mode -+ */ - cpi->cyclic_refresh_mode_enabled = cpi->oxcf.error_resilient_mode; -- cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 40; -+ cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 5; - cpi->cyclic_refresh_mode_index = 0; - cpi->cyclic_refresh_q = 32; - -@@ -1827,9 +1805,6 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) - else - cpi->cyclic_refresh_map = (signed char *) NULL; - -- // Test function for segmentation -- //segmentation_test_function( cpi); -- - #ifdef ENTROPY_STATS - init_context_counters(); - #endif -@@ -1837,7 +1812,8 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) - /*Initialize the feed-forward activity masking.*/ - cpi->activity_avg = 90<<12; - -- cpi->frames_since_key = 8; // Give a sensible default for the first frame. -+ /* Give a sensible default for the first frame. */ -+ cpi->frames_since_key = 8; - cpi->key_frame_frequency = cpi->oxcf.key_freq; - cpi->this_key_frame_forced = 0; - cpi->next_key_frame_forced = 0; -@@ -1880,10 +1856,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) - - #endif - --#ifndef LLONG_MAX --#define LLONG_MAX 9223372036854775807LL --#endif -- cpi->first_time_stamp_ever = LLONG_MAX; -+ cpi->first_time_stamp_ever = 0x7FFFFFFF; - - cpi->frames_till_gf_update_due = 0; - cpi->key_frame_count = 1; -@@ -1894,22 +1867,12 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) - cpi->total_byte_count = 0; - - cpi->drop_frame = 0; -- cpi->drop_count = 0; -- cpi->max_drop_count = 0; -- cpi->max_consec_dropped_frames = 4; - - cpi->rate_correction_factor = 1.0; - cpi->key_frame_rate_correction_factor = 1.0; - cpi->gf_rate_correction_factor = 1.0; - cpi->twopass.est_max_qcorrection_factor = 1.0; - -- cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max+1]; -- cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max+1]; -- cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max+1]; -- cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max+1]; -- -- cal_mvsadcosts(cpi->mb.mvsadcost); -- - for (i = 0; i < KEY_FRAME_CONTEXT; i++) - { - cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate; -@@ -1935,7 +1898,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) - else if (cpi->pass == 2) - { - size_t packet_sz = sizeof(FIRSTPASS_STATS); -- int packets = oxcf->two_pass_stats_in.sz / packet_sz; -+ int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); - - cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; - cpi->twopass.stats_in = cpi->twopass.stats_in_start; -@@ -1948,17 +1911,16 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) - - if (cpi->compressor_speed == 2) - { -- cpi->cpu_freq = 0; //vp8_get_processor_freq(); - cpi->avg_encode_time = 0; - cpi->avg_pick_mode_time = 0; - } - - vp8_set_speed_features(cpi); - -- // Set starting values of RD threshold multipliers (128 = *1) -+ /* Set starting values of RD threshold multipliers (128 = *1) */ - for (i = 0; i < MAX_MODES; i++) - { -- cpi->rd_thresh_mult[i] = 128; -+ cpi->mb.rd_thresh_mult[i] = 128; - } - - #ifdef ENTROPY_STATS -@@ -1966,7 +1928,11 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) - #endif - - #if CONFIG_MULTITHREAD -- vp8cx_create_encoder_threads(cpi); -+ if(vp8cx_create_encoder_threads(cpi)) -+ { -+ vp8_remove_compressor(&cpi); -+ return 0; -+ } - #endif - - cpi->fn_ptr[BLOCK_16X16].sdf = vp8_sad16x16; -@@ -2031,11 +1997,14 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) - cpi->diamond_search_sad = vp8_diamond_search_sad; - cpi->refining_search_sad = vp8_refining_search_sad; - -- // make sure frame 1 is okay -- cpi->error_bins[0] = cpi->common.MBs; -+ /* make sure frame 1 is okay */ -+ cpi->mb.error_bins[0] = cpi->common.MBs; - -- //vp8cx_init_quantizer() is first called here. Add check in vp8cx_frame_init_quantizer() so that vp8cx_init_quantizer is only called later -- //when needed. This will avoid unnecessary calls of vp8cx_init_quantizer() for every frame. -+ /* vp8cx_init_quantizer() is first called here. Add check in -+ * vp8cx_frame_init_quantizer() so that vp8cx_init_quantizer is only -+ * called later when needed. This will avoid unnecessary calls of -+ * vp8cx_init_quantizer() for every frame. -+ */ - vp8cx_init_quantizer(cpi); - - vp8_loop_filter_init(cm); -@@ -2043,13 +2012,33 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) - cpi->common.error.setjmp = 0; - - #if CONFIG_MULTI_RES_ENCODING -+ - /* Calculate # of MBs in a row in lower-resolution level image. */ - if (cpi->oxcf.mr_encoder_id > 0) - vp8_cal_low_res_mb_cols(cpi); -+ - #endif - -- return cpi; -+ /* setup RD costs to MACROBLOCK struct */ -+ -+ cpi->mb.mvcost[0] = &cpi->rd_costs.mvcosts[0][mv_max+1]; -+ cpi->mb.mvcost[1] = &cpi->rd_costs.mvcosts[1][mv_max+1]; -+ cpi->mb.mvsadcost[0] = &cpi->rd_costs.mvsadcosts[0][mvfp_max+1]; -+ cpi->mb.mvsadcost[1] = &cpi->rd_costs.mvsadcosts[1][mvfp_max+1]; - -+ cal_mvsadcosts(cpi->mb.mvsadcost); -+ -+ cpi->mb.mbmode_cost = cpi->rd_costs.mbmode_cost; -+ cpi->mb.intra_uv_mode_cost = cpi->rd_costs.intra_uv_mode_cost; -+ cpi->mb.bmode_costs = cpi->rd_costs.bmode_costs; -+ cpi->mb.inter_bmode_costs = cpi->rd_costs.inter_bmode_costs; -+ cpi->mb.token_costs = cpi->rd_costs.token_costs; -+ -+ /* setup block ptrs & offsets */ -+ vp8_setup_block_ptrs(&cpi->mb); -+ vp8_setup_block_dptrs(&cpi->mb.e_mbd); -+ -+ return cpi; - } - - -@@ -2099,7 +2088,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) - - fprintf(f, "Layer\tBitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t" - "GLPsnrP\tVPXSSIM\t\n"); -- for (i=0; ioxcf.number_of_layers; i++) -+ for (i=0; i<(int)cpi->oxcf.number_of_layers; i++) - { - double dr = (double)cpi->bytes_in_layer[i] * - 8.0 / 1000.0 / time_encoded; -@@ -2150,7 +2139,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) - - fprintf(f, "Layer\tBitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t" - "Time(us)\n"); -- for (i=0; ioxcf.number_of_layers; i++) -+ for (i=0; i<(int)cpi->oxcf.number_of_layers; i++) - { - double dr = (double)cpi->bytes_in_layer[i] * - 8.0 / 1000.0 / time_encoded; -@@ -2204,7 +2193,6 @@ void vp8_remove_compressor(VP8_COMP **ptr) - fprintf(f, "%5d", frames_at_speed[i]); - - fprintf(f, "\n"); -- //fprintf(f, "%10d PM %10d %10d %10d EF %10d %10d %10d\n", cpi->Speed, cpi->avg_pick_mode_time, (tot_pm/cnt_pm), cnt_pm, cpi->avg_encode_time, 0, 0); - fclose(f); - } - -@@ -2266,7 +2254,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) - for (i = 0; i < 10; i++) - { - -- fprintf(fmode, " { //Above Mode : %d\n", i); -+ fprintf(fmode, " { /* Above Mode : %d */\n", i); - - for (j = 0; j < 10; j++) - { -@@ -2281,7 +2269,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) - fprintf(fmode, " %5d, ", intra_mode_stats[i][j][k]); - } - -- fprintf(fmode, "}, // left_mode %d\n", j); -+ fprintf(fmode, "}, /* left_mode %d */\n", j); - - } - -@@ -2459,7 +2447,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) - - for (i = 0; i < 4; i++) - pkt.data.psnr.psnr[i] = vp8_mse2psnr(pkt.data.psnr.samples[i], 255.0, -- pkt.data.psnr.sse[i]); -+ (double)(pkt.data.psnr.sse[i])); - - vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); - } -@@ -2482,28 +2470,28 @@ int vp8_update_reference(VP8_COMP *cpi, int ref_frame_flags) - cpi->common.refresh_alt_ref_frame = 0; - cpi->common.refresh_last_frame = 0; - -- if (ref_frame_flags & VP8_LAST_FLAG) -+ if (ref_frame_flags & VP8_LAST_FRAME) - cpi->common.refresh_last_frame = 1; - -- if (ref_frame_flags & VP8_GOLD_FLAG) -+ if (ref_frame_flags & VP8_GOLD_FRAME) - cpi->common.refresh_golden_frame = 1; - -- if (ref_frame_flags & VP8_ALT_FLAG) -+ if (ref_frame_flags & VP8_ALTR_FRAME) - cpi->common.refresh_alt_ref_frame = 1; - - return 0; - } - --int vp8_get_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) -+int vp8_get_reference(VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) - { - VP8_COMMON *cm = &cpi->common; - int ref_fb_idx; - -- if (ref_frame_flag == VP8_LAST_FLAG) -+ if (ref_frame_flag == VP8_LAST_FRAME) - ref_fb_idx = cm->lst_fb_idx; -- else if (ref_frame_flag == VP8_GOLD_FLAG) -+ else if (ref_frame_flag == VP8_GOLD_FRAME) - ref_fb_idx = cm->gld_fb_idx; -- else if (ref_frame_flag == VP8_ALT_FLAG) -+ else if (ref_frame_flag == VP8_ALTR_FRAME) - ref_fb_idx = cm->alt_fb_idx; - else - return -1; -@@ -2512,17 +2500,17 @@ int vp8_get_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CO - - return 0; - } --int vp8_set_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) -+int vp8_set_reference(VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) - { - VP8_COMMON *cm = &cpi->common; - - int ref_fb_idx; - -- if (ref_frame_flag == VP8_LAST_FLAG) -+ if (ref_frame_flag == VP8_LAST_FRAME) - ref_fb_idx = cm->lst_fb_idx; -- else if (ref_frame_flag == VP8_GOLD_FLAG) -+ else if (ref_frame_flag == VP8_GOLD_FRAME) - ref_fb_idx = cm->gld_fb_idx; -- else if (ref_frame_flag == VP8_ALT_FLAG) -+ else if (ref_frame_flag == VP8_ALTR_FRAME) - ref_fb_idx = cm->alt_fb_idx; - else - return -1; -@@ -2583,7 +2571,7 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - { - VP8_COMMON *cm = &cpi->common; - -- // are we resizing the image -+ /* are we resizing the image */ - if (cm->horiz_scale != 0 || cm->vert_scale != 0) - { - #if CONFIG_SPATIAL_RESAMPLING -@@ -2611,51 +2599,57 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - } - - --static void resize_key_frame(VP8_COMP *cpi) -+static int resize_key_frame(VP8_COMP *cpi) - { - #if CONFIG_SPATIAL_RESAMPLING - VP8_COMMON *cm = &cpi->common; - -- // Do we need to apply resampling for one pass cbr. -- // In one pass this is more limited than in two pass cbr -- // The test and any change is only made one per key frame sequence -+ /* Do we need to apply resampling for one pass cbr. -+ * In one pass this is more limited than in two pass cbr -+ * The test and any change is only made one per key frame sequence -+ */ - if (cpi->oxcf.allow_spatial_resampling && (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) - { - int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs); - int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs); - int new_width, new_height; - -- // If we are below the resample DOWN watermark then scale down a notch. -+ /* If we are below the resample DOWN watermark then scale down a -+ * notch. -+ */ - if (cpi->buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100)) - { - cm->horiz_scale = (cm->horiz_scale < ONETWO) ? cm->horiz_scale + 1 : ONETWO; - cm->vert_scale = (cm->vert_scale < ONETWO) ? cm->vert_scale + 1 : ONETWO; - } -- // Should we now start scaling back up -+ /* Should we now start scaling back up */ - else if (cpi->buffer_level > (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100)) - { - cm->horiz_scale = (cm->horiz_scale > NORMAL) ? cm->horiz_scale - 1 : NORMAL; - cm->vert_scale = (cm->vert_scale > NORMAL) ? cm->vert_scale - 1 : NORMAL; - } - -- // Get the new hieght and width -+ /* Get the new hieght and width */ - Scale2Ratio(cm->horiz_scale, &hr, &hs); - Scale2Ratio(cm->vert_scale, &vr, &vs); - new_width = ((hs - 1) + (cpi->oxcf.Width * hr)) / hs; - new_height = ((vs - 1) + (cpi->oxcf.Height * vr)) / vs; - -- // If the image size has changed we need to reallocate the buffers -- // and resample the source image -+ /* If the image size has changed we need to reallocate the buffers -+ * and resample the source image -+ */ - if ((cm->Width != new_width) || (cm->Height != new_height)) - { - cm->Width = new_width; - cm->Height = new_height; - vp8_alloc_compressor_data(cpi); - scale_and_extend_source(cpi->un_scaled_source, cpi); -+ return 1; - } - } - - #endif -+ return 0; - } - - -@@ -2663,34 +2657,35 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi) - { - VP8_COMMON *cm = &cpi->common; - -- // Select an interval before next GF or altref -+ /* Select an interval before next GF or altref */ - if (!cpi->auto_gold) -- cpi->frames_till_gf_update_due = cpi->goldfreq; -+ cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; - - if ((cpi->pass != 2) && cpi->frames_till_gf_update_due) - { - cpi->current_gf_interval = cpi->frames_till_gf_update_due; - -- // Set the bits per frame that we should try and recover in subsequent inter frames -- // to account for the extra GF spend... note that his does not apply for GF updates -- // that occur coincident with a key frame as the extra cost of key frames is dealt -- // with elsewhere. -- -+ /* Set the bits per frame that we should try and recover in -+ * subsequent inter frames to account for the extra GF spend... -+ * note that his does not apply for GF updates that occur -+ * coincident with a key frame as the extra cost of key frames is -+ * dealt with elsewhere. -+ */ - cpi->gf_overspend_bits += cpi->projected_frame_size; - cpi->non_gf_bitrate_adjustment = cpi->gf_overspend_bits / cpi->frames_till_gf_update_due; - } - -- // Update data structure that monitors level of reference to last GF -+ /* Update data structure that monitors level of reference to last GF */ - vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - -- // this frame refreshes means next frames don't unless specified by user -+ /* this frame refreshes means next frames don't unless specified by user */ - cpi->common.frames_since_golden = 0; - -- // Clear the alternate reference update pending flag. -+ /* Clear the alternate reference update pending flag. */ - cpi->source_alt_ref_pending = 0; - -- // Set the alternate refernce frame active flag -+ /* Set the alternate refernce frame active flag */ - cpi->source_alt_ref_active = 1; - - -@@ -2699,25 +2694,29 @@ static void update_golden_frame_stats(VP8_COMP *cpi) - { - VP8_COMMON *cm = &cpi->common; - -- // Update the Golden frame usage counts. -+ /* Update the Golden frame usage counts. */ - if (cm->refresh_golden_frame) - { -- // Select an interval before next GF -+ /* Select an interval before next GF */ - if (!cpi->auto_gold) -- cpi->frames_till_gf_update_due = cpi->goldfreq; -+ cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; - - if ((cpi->pass != 2) && (cpi->frames_till_gf_update_due > 0)) - { - cpi->current_gf_interval = cpi->frames_till_gf_update_due; - -- // Set the bits per frame that we should try and recover in subsequent inter frames -- // to account for the extra GF spend... note that his does not apply for GF updates -- // that occur coincident with a key frame as the extra cost of key frames is dealt -- // with elsewhere. -+ /* Set the bits per frame that we should try and recover in -+ * subsequent inter frames to account for the extra GF spend... -+ * note that his does not apply for GF updates that occur -+ * coincident with a key frame as the extra cost of key frames -+ * is dealt with elsewhere. -+ */ - if ((cm->frame_type != KEY_FRAME) && !cpi->source_alt_ref_active) - { -- // Calcluate GF bits to be recovered -- // Projected size - av frame bits available for inter frames for clip as a whole -+ /* Calcluate GF bits to be recovered -+ * Projected size - av frame bits available for inter -+ * frames for clip as a whole -+ */ - cpi->gf_overspend_bits += (cpi->projected_frame_size - cpi->inter_frame_target); - } - -@@ -2725,32 +2724,25 @@ static void update_golden_frame_stats(VP8_COMP *cpi) - - } - -- // Update data structure that monitors level of reference to last GF -+ /* Update data structure that monitors level of reference to last GF */ - vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - -- // this frame refreshes means next frames don't unless specified by user -+ /* this frame refreshes means next frames don't unless specified by -+ * user -+ */ - cm->refresh_golden_frame = 0; - cpi->common.frames_since_golden = 0; - -- //if ( cm->frame_type == KEY_FRAME ) -- //{ - cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; - cpi->recent_ref_frame_usage[LAST_FRAME] = 1; - cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1; - cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1; -- //} -- //else -- //{ -- // // Carry a potrtion of count over to begining of next gf sequence -- // cpi->recent_ref_frame_usage[INTRA_FRAME] >>= 5; -- // cpi->recent_ref_frame_usage[LAST_FRAME] >>= 5; -- // cpi->recent_ref_frame_usage[GOLDEN_FRAME] >>= 5; -- // cpi->recent_ref_frame_usage[ALTREF_FRAME] >>= 5; -- //} -- -- // ******** Fixed Q test code only ************ -- // If we are going to use the ALT reference for the next group of frames set a flag to say so. -+ -+ /* ******** Fixed Q test code only ************ */ -+ /* If we are going to use the ALT reference for the next group of -+ * frames set a flag to say so. -+ */ - if (cpi->oxcf.fixed_q >= 0 && - cpi->oxcf.play_alternate && !cpi->common.refresh_alt_ref_frame) - { -@@ -2761,14 +2753,14 @@ static void update_golden_frame_stats(VP8_COMP *cpi) - if (!cpi->source_alt_ref_pending) - cpi->source_alt_ref_active = 0; - -- // Decrement count down till next gf -+ /* Decrement count down till next gf */ - if (cpi->frames_till_gf_update_due > 0) - cpi->frames_till_gf_update_due--; - - } - else if (!cpi->common.refresh_alt_ref_frame) - { -- // Decrement count down till next gf -+ /* Decrement count down till next gf */ - if (cpi->frames_till_gf_update_due > 0) - cpi->frames_till_gf_update_due--; - -@@ -2779,21 +2771,26 @@ static void update_golden_frame_stats(VP8_COMP *cpi) - - if (cpi->common.frames_since_golden > 1) - { -- cpi->recent_ref_frame_usage[INTRA_FRAME] += cpi->count_mb_ref_frame_usage[INTRA_FRAME]; -- cpi->recent_ref_frame_usage[LAST_FRAME] += cpi->count_mb_ref_frame_usage[LAST_FRAME]; -- cpi->recent_ref_frame_usage[GOLDEN_FRAME] += cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]; -- cpi->recent_ref_frame_usage[ALTREF_FRAME] += cpi->count_mb_ref_frame_usage[ALTREF_FRAME]; -+ cpi->recent_ref_frame_usage[INTRA_FRAME] += -+ cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME]; -+ cpi->recent_ref_frame_usage[LAST_FRAME] += -+ cpi->mb.count_mb_ref_frame_usage[LAST_FRAME]; -+ cpi->recent_ref_frame_usage[GOLDEN_FRAME] += -+ cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME]; -+ cpi->recent_ref_frame_usage[ALTREF_FRAME] += -+ cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME]; - } - } - } - --// This function updates the reference frame probability estimates that --// will be used during mode selection -+/* This function updates the reference frame probability estimates that -+ * will be used during mode selection -+ */ - static void update_rd_ref_frame_probs(VP8_COMP *cpi) - { - VP8_COMMON *cm = &cpi->common; - -- const int *const rfct = cpi->count_mb_ref_frame_usage; -+ const int *const rfct = cpi->mb.count_mb_ref_frame_usage; - const int rf_intra = rfct[INTRA_FRAME]; - const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; - -@@ -2810,7 +2807,9 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi) - cpi->prob_gf_coded = 128; - } - -- // update reference frame costs since we can do better than what we got last frame. -+ /* update reference frame costs since we can do better than what we got -+ * last frame. -+ */ - if (cpi->oxcf.number_of_layers == 1) - { - if (cpi->common.refresh_alt_ref_frame) -@@ -2841,7 +2840,7 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi) - } - - --// 1 = key, 0 = inter -+/* 1 = key, 0 = inter */ - static int decide_key_frame(VP8_COMP *cpi) - { - VP8_COMMON *cm = &cpi->common; -@@ -2853,43 +2852,22 @@ static int decide_key_frame(VP8_COMP *cpi) - if (cpi->Speed > 11) - return 0; - -- // Clear down mmx registers -- vp8_clear_system_state(); //__asm emms; -+ /* Clear down mmx registers */ -+ vp8_clear_system_state(); - - if ((cpi->compressor_speed == 2) && (cpi->Speed >= 5) && (cpi->sf.RD == 0)) - { -- double change = 1.0 * abs((int)(cpi->intra_error - cpi->last_intra_error)) / (1 + cpi->last_intra_error); -- double change2 = 1.0 * abs((int)(cpi->prediction_error - cpi->last_prediction_error)) / (1 + cpi->last_prediction_error); -+ double change = 1.0 * abs((int)(cpi->mb.intra_error - -+ cpi->last_intra_error)) / (1 + cpi->last_intra_error); -+ double change2 = 1.0 * abs((int)(cpi->mb.prediction_error - -+ cpi->last_prediction_error)) / (1 + cpi->last_prediction_error); - double minerror = cm->MBs * 256; - --#if 0 -- -- if (10 * cpi->intra_error / (1 + cpi->prediction_error) < 15 -- && cpi->prediction_error > minerror -- && (change > .25 || change2 > .25)) -- { -- FILE *f = fopen("intra_inter.stt", "a"); -- -- if (cpi->prediction_error <= 0) -- cpi->prediction_error = 1; -- -- fprintf(f, "%d %d %d %d %14.4f\n", -- cm->current_video_frame, -- (int) cpi->prediction_error, -- (int) cpi->intra_error, -- (int)((10 * cpi->intra_error) / cpi->prediction_error), -- change); -- -- fclose(f); -- } -- --#endif -- -- cpi->last_intra_error = cpi->intra_error; -- cpi->last_prediction_error = cpi->prediction_error; -+ cpi->last_intra_error = cpi->mb.intra_error; -+ cpi->last_prediction_error = cpi->mb.prediction_error; - -- if (10 * cpi->intra_error / (1 + cpi->prediction_error) < 15 -- && cpi->prediction_error > minerror -+ if (10 * cpi->mb.intra_error / (1 + cpi->mb.prediction_error) < 15 -+ && cpi->mb.prediction_error > minerror - && (change > .25 || change2 > .25)) - { - /*(change > 1.4 || change < .75)&& cpi->this_frame_percent_intra > cpi->last_frame_percent_intra + 3*/ -@@ -2900,7 +2878,7 @@ static int decide_key_frame(VP8_COMP *cpi) - - } - -- // If the following are true we might as well code a key frame -+ /* If the following are true we might as well code a key frame */ - if (((cpi->this_frame_percent_intra == 100) && - (cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra + 2))) || - ((cpi->this_frame_percent_intra > 95) && -@@ -2908,9 +2886,12 @@ static int decide_key_frame(VP8_COMP *cpi) - { - code_key_frame = 1; - } -- // in addition if the following are true and this is not a golden frame then code a key frame -- // Note that on golden frames there often seems to be a pop in intra useage anyway hence this -- // restriction is designed to prevent spurious key frames. The Intra pop needs to be investigated. -+ /* in addition if the following are true and this is not a golden frame -+ * then code a key frame Note that on golden frames there often seems -+ * to be a pop in intra useage anyway hence this restriction is -+ * designed to prevent spurious key frames. The Intra pop needs to be -+ * investigated. -+ */ - else if (((cpi->this_frame_percent_intra > 60) && - (cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra * 2))) || - ((cpi->this_frame_percent_intra > 75) && -@@ -2942,7 +2923,7 @@ static void Pass1Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, - void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) - { - -- // write the frame -+ /* write the frame */ - FILE *yframe; - int i; - char filename[255]; -@@ -2970,10 +2951,11 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) - fclose(yframe); - } - #endif --// return of 0 means drop frame -+/* return of 0 means drop frame */ - --// Function to test for conditions that indeicate we should loop --// back and recode a frame. -+/* Function to test for conditions that indeicate we should loop -+ * back and recode a frame. -+ */ - static int recode_loop_test( VP8_COMP *cpi, - int high_limit, int low_limit, - int q, int maxq, int minq ) -@@ -2981,32 +2963,33 @@ static int recode_loop_test( VP8_COMP *cpi, - int force_recode = 0; - VP8_COMMON *cm = &cpi->common; - -- // Is frame recode allowed at all -- // Yes if either recode mode 1 is selected or mode two is selcted -- // and the frame is a key frame. golden frame or alt_ref_frame -+ /* Is frame recode allowed at all -+ * Yes if either recode mode 1 is selected or mode two is selcted -+ * and the frame is a key frame. golden frame or alt_ref_frame -+ */ - if ( (cpi->sf.recode_loop == 1) || - ( (cpi->sf.recode_loop == 2) && - ( (cm->frame_type == KEY_FRAME) || - cm->refresh_golden_frame || - cm->refresh_alt_ref_frame ) ) ) - { -- // General over and under shoot tests -+ /* General over and under shoot tests */ - if ( ((cpi->projected_frame_size > high_limit) && (q < maxq)) || - ((cpi->projected_frame_size < low_limit) && (q > minq)) ) - { - force_recode = 1; - } -- // Special Constrained quality tests -+ /* Special Constrained quality tests */ - else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) - { -- // Undershoot and below auto cq level -+ /* Undershoot and below auto cq level */ - if ( (q > cpi->cq_target_quality) && - (cpi->projected_frame_size < - ((cpi->this_frame_target * 7) >> 3))) - { - force_recode = 1; - } -- // Severe undershoot and between auto and user cq level -+ /* Severe undershoot and between auto and user cq level */ - else if ( (q > cpi->oxcf.cq_level) && - (cpi->projected_frame_size < cpi->min_frame_bandwidth) && - (cpi->active_best_quality > cpi->oxcf.cq_level)) -@@ -3020,21 +3003,28 @@ static int recode_loop_test( VP8_COMP *cpi, - return force_recode; - } - --static void update_reference_frames(VP8_COMMON *cm) -+static void update_reference_frames(VP8_COMP *cpi) - { -+ VP8_COMMON *cm = &cpi->common; - YV12_BUFFER_CONFIG *yv12_fb = cm->yv12_fb; - -- // At this point the new frame has been encoded. -- // If any buffer copy / swapping is signaled it should be done here. -+ /* At this point the new frame has been encoded. -+ * If any buffer copy / swapping is signaled it should be done here. -+ */ - - if (cm->frame_type == KEY_FRAME) - { -- yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FLAG | VP8_ALT_FLAG ; -+ yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FRAME | VP8_ALTR_FRAME ; - -- yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG; -- yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG; -+ yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; -+ yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; - - cm->alt_fb_idx = cm->gld_fb_idx = cm->new_fb_idx; -+ -+#if CONFIG_MULTI_RES_ENCODING -+ cpi->current_ref_frames[GOLDEN_FRAME] = cm->current_video_frame; -+ cpi->current_ref_frames[ALTREF_FRAME] = cm->current_video_frame; -+#endif - } - else /* For non key frames */ - { -@@ -3042,9 +3032,13 @@ static void update_reference_frames(VP8_COMMON *cm) - { - assert(!cm->copy_buffer_to_arf); - -- cm->yv12_fb[cm->new_fb_idx].flags |= VP8_ALT_FLAG; -- cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG; -+ cm->yv12_fb[cm->new_fb_idx].flags |= VP8_ALTR_FRAME; -+ cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; - cm->alt_fb_idx = cm->new_fb_idx; -+ -+#if CONFIG_MULTI_RES_ENCODING -+ cpi->current_ref_frames[ALTREF_FRAME] = cm->current_video_frame; -+#endif - } - else if (cm->copy_buffer_to_arf) - { -@@ -3054,18 +3048,28 @@ static void update_reference_frames(VP8_COMMON *cm) - { - if(cm->alt_fb_idx != cm->lst_fb_idx) - { -- yv12_fb[cm->lst_fb_idx].flags |= VP8_ALT_FLAG; -- yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG; -+ yv12_fb[cm->lst_fb_idx].flags |= VP8_ALTR_FRAME; -+ yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; - cm->alt_fb_idx = cm->lst_fb_idx; -+ -+#if CONFIG_MULTI_RES_ENCODING -+ cpi->current_ref_frames[ALTREF_FRAME] = -+ cpi->current_ref_frames[LAST_FRAME]; -+#endif - } - } - else /* if (cm->copy_buffer_to_arf == 2) */ - { - if(cm->alt_fb_idx != cm->gld_fb_idx) - { -- yv12_fb[cm->gld_fb_idx].flags |= VP8_ALT_FLAG; -- yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG; -+ yv12_fb[cm->gld_fb_idx].flags |= VP8_ALTR_FRAME; -+ yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; - cm->alt_fb_idx = cm->gld_fb_idx; -+ -+#if CONFIG_MULTI_RES_ENCODING -+ cpi->current_ref_frames[ALTREF_FRAME] = -+ cpi->current_ref_frames[GOLDEN_FRAME]; -+#endif - } - } - } -@@ -3074,9 +3078,13 @@ static void update_reference_frames(VP8_COMMON *cm) - { - assert(!cm->copy_buffer_to_gf); - -- cm->yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FLAG; -- cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG; -+ cm->yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FRAME; -+ cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; - cm->gld_fb_idx = cm->new_fb_idx; -+ -+#if CONFIG_MULTI_RES_ENCODING -+ cpi->current_ref_frames[GOLDEN_FRAME] = cm->current_video_frame; -+#endif - } - else if (cm->copy_buffer_to_gf) - { -@@ -3086,18 +3094,28 @@ static void update_reference_frames(VP8_COMMON *cm) - { - if(cm->gld_fb_idx != cm->lst_fb_idx) - { -- yv12_fb[cm->lst_fb_idx].flags |= VP8_GOLD_FLAG; -- yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG; -+ yv12_fb[cm->lst_fb_idx].flags |= VP8_GOLD_FRAME; -+ yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; - cm->gld_fb_idx = cm->lst_fb_idx; -+ -+#if CONFIG_MULTI_RES_ENCODING -+ cpi->current_ref_frames[GOLDEN_FRAME] = -+ cpi->current_ref_frames[LAST_FRAME]; -+#endif - } - } - else /* if (cm->copy_buffer_to_gf == 2) */ - { - if(cm->alt_fb_idx != cm->gld_fb_idx) - { -- yv12_fb[cm->alt_fb_idx].flags |= VP8_GOLD_FLAG; -- yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG; -+ yv12_fb[cm->alt_fb_idx].flags |= VP8_GOLD_FRAME; -+ yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; - cm->gld_fb_idx = cm->alt_fb_idx; -+ -+#if CONFIG_MULTI_RES_ENCODING -+ cpi->current_ref_frames[GOLDEN_FRAME] = -+ cpi->current_ref_frames[ALTREF_FRAME]; -+#endif - } - } - } -@@ -3105,14 +3123,71 @@ static void update_reference_frames(VP8_COMMON *cm) - - if (cm->refresh_last_frame) - { -- cm->yv12_fb[cm->new_fb_idx].flags |= VP8_LAST_FLAG; -- cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP8_LAST_FLAG; -+ cm->yv12_fb[cm->new_fb_idx].flags |= VP8_LAST_FRAME; -+ cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP8_LAST_FRAME; - cm->lst_fb_idx = cm->new_fb_idx; -+ -+#if CONFIG_MULTI_RES_ENCODING -+ cpi->current_ref_frames[LAST_FRAME] = cm->current_video_frame; -+#endif - } -+ -+#if CONFIG_TEMPORAL_DENOISING -+ if (cpi->oxcf.noise_sensitivity) -+ { -+ /* we shouldn't have to keep multiple copies as we know in advance which -+ * buffer we should start - for now to get something up and running -+ * I've chosen to copy the buffers -+ */ -+ if (cm->frame_type == KEY_FRAME) -+ { -+ int i; -+ vp8_yv12_copy_frame( -+ cpi->Source, -+ &cpi->denoiser.yv12_running_avg[LAST_FRAME]); -+ -+ vp8_yv12_extend_frame_borders( -+ &cpi->denoiser.yv12_running_avg[LAST_FRAME]); -+ -+ for (i = 2; i < MAX_REF_FRAMES - 1; i++) -+ vp8_yv12_copy_frame( -+ &cpi->denoiser.yv12_running_avg[LAST_FRAME], -+ &cpi->denoiser.yv12_running_avg[i]); -+ } -+ else /* For non key frames */ -+ { -+ vp8_yv12_extend_frame_borders( -+ &cpi->denoiser.yv12_running_avg[INTRA_FRAME]); -+ -+ if (cm->refresh_alt_ref_frame || cm->copy_buffer_to_arf) -+ { -+ vp8_yv12_copy_frame( -+ &cpi->denoiser.yv12_running_avg[INTRA_FRAME], -+ &cpi->denoiser.yv12_running_avg[ALTREF_FRAME]); -+ } -+ if (cm->refresh_golden_frame || cm->copy_buffer_to_gf) -+ { -+ vp8_yv12_copy_frame( -+ &cpi->denoiser.yv12_running_avg[INTRA_FRAME], -+ &cpi->denoiser.yv12_running_avg[GOLDEN_FRAME]); -+ } -+ if(cm->refresh_last_frame) -+ { -+ vp8_yv12_copy_frame( -+ &cpi->denoiser.yv12_running_avg[INTRA_FRAME], -+ &cpi->denoiser.yv12_running_avg[LAST_FRAME]); -+ } -+ } -+ -+ } -+#endif -+ - } - - void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) - { -+ const FRAME_TYPE frame_type = cm->frame_type; -+ - if (cm->no_lpf) - { - cm->filter_level = 0; -@@ -3130,6 +3205,11 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) - else - vp8cx_pick_filter_level(cpi->Source, cpi); - -+ if (cm->filter_level > 0) -+ { -+ vp8cx_set_alt_lf_level(cpi, cm->filter_level); -+ } -+ - vpx_usec_timer_mark(&timer); - cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); - } -@@ -3141,17 +3221,11 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) - - if (cm->filter_level > 0) - { -- vp8cx_set_alt_lf_level(cpi, cm->filter_level); -- vp8_loop_filter_frame(cm, &cpi->mb.e_mbd); -+ vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, frame_type); - } - - vp8_yv12_extend_frame_borders(cm->frame_to_show); --#if CONFIG_TEMPORAL_DENOISING -- if (cpi->oxcf.noise_sensitivity) -- { -- vp8_yv12_extend_frame_borders(&cpi->denoiser.yv12_running_avg); -- } --#endif -+ - } - - static void encode_frame_to_data_rate -@@ -3184,13 +3258,14 @@ static void encode_frame_to_data_rate - int undershoot_seen = 0; - #endif - -- int drop_mark = cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100; -+ int drop_mark = (int)(cpi->oxcf.drop_frames_water_mark * -+ cpi->oxcf.optimal_buffer_level / 100); - int drop_mark75 = drop_mark * 2 / 3; - int drop_mark50 = drop_mark / 4; - int drop_mark25 = drop_mark / 8; - - -- // Clear down mmx registers to allow floating point in what follows -+ /* Clear down mmx registers to allow floating point in what follows */ - vp8_clear_system_state(); - - #if CONFIG_MULTITHREAD -@@ -3202,108 +3277,125 @@ static void encode_frame_to_data_rate - } - #endif - -- // Test code for segmentation of gf/arf (0,0) -- //segmentation_test_function( cpi); -- - if(cpi->force_next_frame_intra) - { - cm->frame_type = KEY_FRAME; /* delayed intra frame */ - cpi->force_next_frame_intra = 0; - } - -- // For an alt ref frame in 2 pass we skip the call to the second pass function that sets the target bandwidth -+ /* For an alt ref frame in 2 pass we skip the call to the second pass -+ * function that sets the target bandwidth -+ */ - #if !(CONFIG_REALTIME_ONLY) - - if (cpi->pass == 2) - { - if (cpi->common.refresh_alt_ref_frame) - { -- cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame -- cpi->target_bandwidth = cpi->twopass.gf_bits * cpi->output_frame_rate; // per second target bitrate -+ /* Per frame bit target for the alt ref frame */ -+ cpi->per_frame_bandwidth = cpi->twopass.gf_bits; -+ /* per second target bitrate */ -+ cpi->target_bandwidth = (int)(cpi->twopass.gf_bits * -+ cpi->output_frame_rate); - } - } - else - #endif - cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_frame_rate); - -- // Default turn off buffer to buffer copying -+ /* Default turn off buffer to buffer copying */ - cm->copy_buffer_to_gf = 0; - cm->copy_buffer_to_arf = 0; - -- // Clear zbin over-quant value and mode boost values. -- cpi->zbin_over_quant = 0; -- cpi->zbin_mode_boost = 0; -+ /* Clear zbin over-quant value and mode boost values. */ -+ cpi->mb.zbin_over_quant = 0; -+ cpi->mb.zbin_mode_boost = 0; - -- // Enable or disable mode based tweaking of the zbin -- // For 2 Pass Only used where GF/ARF prediction quality -- // is above a threshold -- cpi->zbin_mode_boost_enabled = 1; -+ /* Enable or disable mode based tweaking of the zbin -+ * For 2 Pass Only used where GF/ARF prediction quality -+ * is above a threshold -+ */ -+ cpi->mb.zbin_mode_boost_enabled = 1; - if (cpi->pass == 2) - { - if ( cpi->gfu_boost <= 400 ) - { -- cpi->zbin_mode_boost_enabled = 0; -+ cpi->mb.zbin_mode_boost_enabled = 0; - } - } - -- // Current default encoder behaviour for the altref sign bias -+ /* Current default encoder behaviour for the altref sign bias */ - if (cpi->source_alt_ref_active) - cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1; - else - cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 0; - -- // Check to see if a key frame is signalled -- // For two pass with auto key frame enabled cm->frame_type may already be set, but not for one pass. -+ /* Check to see if a key frame is signalled -+ * For two pass with auto key frame enabled cm->frame_type may already -+ * be set, but not for one pass. -+ */ - if ((cm->current_video_frame == 0) || - (cm->frame_flags & FRAMEFLAGS_KEY) || - (cpi->oxcf.auto_key && (cpi->frames_since_key % cpi->key_frame_frequency == 0))) - { -- // Key frame from VFW/auto-keyframe/first frame -+ /* Key frame from VFW/auto-keyframe/first frame */ - cm->frame_type = KEY_FRAME; - } - -- // Set default state for segment and mode based loop filter update flags -- cpi->mb.e_mbd.update_mb_segmentation_map = 0; -- cpi->mb.e_mbd.update_mb_segmentation_data = 0; -- cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; -+#if CONFIG_MULTI_RES_ENCODING -+ /* In multi-resolution encoding, frame_type is decided by lowest-resolution -+ * encoder. Same frame_type is adopted while encoding at other resolution. -+ */ -+ if (cpi->oxcf.mr_encoder_id) -+ { -+ LOWER_RES_FRAME_INFO* low_res_frame_info -+ = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info; -+ -+ cm->frame_type = low_res_frame_info->frame_type; - -- // Set various flags etc to special state if it is a key frame -+ if(cm->frame_type != KEY_FRAME) -+ { -+ cpi->mr_low_res_mv_avail = 1; -+ cpi->mr_low_res_mv_avail &= !(low_res_frame_info->is_frame_dropped); -+ -+ if (cpi->ref_frame_flags & VP8_LAST_FRAME) -+ cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[LAST_FRAME] -+ == low_res_frame_info->low_res_ref_frames[LAST_FRAME]); -+ -+ if (cpi->ref_frame_flags & VP8_GOLD_FRAME) -+ cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[GOLDEN_FRAME] -+ == low_res_frame_info->low_res_ref_frames[GOLDEN_FRAME]); -+ -+ if (cpi->ref_frame_flags & VP8_ALTR_FRAME) -+ cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[ALTREF_FRAME] -+ == low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]); -+ } -+ } -+#endif -+ -+ /* Set various flags etc to special state if it is a key frame */ - if (cm->frame_type == KEY_FRAME) - { - int i; - -- // Reset the loop filter deltas and segmentation map -+ // Set the loop filter deltas and segmentation map update - setup_features(cpi); - -- // If segmentation is enabled force a map update for key frames -- if (cpi->mb.e_mbd.segmentation_enabled) -- { -- cpi->mb.e_mbd.update_mb_segmentation_map = 1; -- cpi->mb.e_mbd.update_mb_segmentation_data = 1; -- } -- -- // The alternate reference frame cannot be active for a key frame -+ /* The alternate reference frame cannot be active for a key frame */ - cpi->source_alt_ref_active = 0; - -- // Reset the RD threshold multipliers to default of * 1 (128) -+ /* Reset the RD threshold multipliers to default of * 1 (128) */ - for (i = 0; i < MAX_MODES; i++) - { -- cpi->rd_thresh_mult[i] = 128; -+ cpi->mb.rd_thresh_mult[i] = 128; - } - } - -- // Test code for segmentation -- //if ( (cm->frame_type == KEY_FRAME) || ((cm->current_video_frame % 2) == 0)) -- //if ( (cm->current_video_frame % 2) == 0 ) -- // enable_segmentation(cpi); -- //else -- // disable_segmentation(cpi); -- - #if 0 -- // Experimental code for lagged compress and one pass -- // Initialise one_pass GF frames stats -- // Update stats used for GF selection -- //if ( cpi->pass == 0 ) -+ /* Experimental code for lagged compress and one pass -+ * Initialise one_pass GF frames stats -+ * Update stats used for GF selection -+ */ - { - cpi->one_pass_frame_index = cm->current_video_frame % MAX_LAG_BUFFERS; - -@@ -3323,8 +3415,9 @@ static void encode_frame_to_data_rate - - if (cpi->drop_frames_allowed) - { -- // The reset to decimation 0 is only done here for one pass. -- // Once it is set two pass leaves decimation on till the next kf. -+ /* The reset to decimation 0 is only done here for one pass. -+ * Once it is set two pass leaves decimation on till the next kf. -+ */ - if ((cpi->buffer_level > drop_mark) && (cpi->decimation_factor > 0)) - cpi->decimation_factor --; - -@@ -3343,14 +3436,17 @@ static void encode_frame_to_data_rate - { - cpi->decimation_factor = 1; - } -- //vpx_log("Encoder: Decimation Factor: %d \n",cpi->decimation_factor); - } - -- // The following decimates the frame rate according to a regular pattern (i.e. to 1/2 or 2/3 frame rate) -- // This can be used to help prevent buffer under-run in CBR mode. Alternatively it might be desirable in -- // some situations to drop frame rate but throw more bits at each frame. -- // -- // Note that dropping a key frame can be problematic if spatial resampling is also active -+ /* The following decimates the frame rate according to a regular -+ * pattern (i.e. to 1/2 or 2/3 frame rate) This can be used to help -+ * prevent buffer under-run in CBR mode. Alternatively it might be -+ * desirable in some situations to drop frame rate but throw more bits -+ * at each frame. -+ * -+ * Note that dropping a key frame can be problematic if spatial -+ * resampling is also active -+ */ - if (cpi->decimation_factor > 0) - { - switch (cpi->decimation_factor) -@@ -3366,8 +3462,10 @@ static void encode_frame_to_data_rate - break; - } - -- // Note that we should not throw out a key frame (especially when spatial resampling is enabled). -- if ((cm->frame_type == KEY_FRAME)) // && cpi->oxcf.allow_spatial_resampling ) -+ /* Note that we should not throw out a key frame (especially when -+ * spatial resampling is enabled). -+ */ -+ if ((cm->frame_type == KEY_FRAME)) - { - cpi->decimation_count = cpi->decimation_factor; - } -@@ -3379,6 +3477,10 @@ static void encode_frame_to_data_rate - if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) - cpi->bits_off_target = cpi->oxcf.maximum_buffer_size; - -+#if CONFIG_MULTI_RES_ENCODING -+ vp8_store_drop_frame_info(cpi); -+#endif -+ - cm->current_video_frame++; - cpi->frames_since_key++; - -@@ -3392,7 +3494,9 @@ static void encode_frame_to_data_rate - { - unsigned int i; - -- // Propagate bits saved by dropping the frame to higher layers -+ /* Propagate bits saved by dropping the frame to higher -+ * layers -+ */ - for (i=cpi->current_layer+1; ioxcf.number_of_layers; i++) - { - LAYER_CONTEXT *lc = &cpi->layer_context[i]; -@@ -3408,24 +3512,32 @@ static void encode_frame_to_data_rate - else - cpi->decimation_count = cpi->decimation_factor; - } -+ else -+ cpi->decimation_count = 0; - -- // Decide how big to make the frame -+ /* Decide how big to make the frame */ - if (!vp8_pick_frame_size(cpi)) - { -+ /*TODO: 2 drop_frame and return code could be put together. */ -+#if CONFIG_MULTI_RES_ENCODING -+ vp8_store_drop_frame_info(cpi); -+#endif - cm->current_video_frame++; - cpi->frames_since_key++; - return; - } - -- // Reduce active_worst_allowed_q for CBR if our buffer is getting too full. -- // This has a knock on effect on active best quality as well. -- // For CBR if the buffer reaches its maximum level then we can no longer -- // save up bits for later frames so we might as well use them up -- // on the current frame. -+ /* Reduce active_worst_allowed_q for CBR if our buffer is getting too full. -+ * This has a knock on effect on active best quality as well. -+ * For CBR if the buffer reaches its maximum level then we can no longer -+ * save up bits for later frames so we might as well use them up -+ * on the current frame. -+ */ - if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && - (cpi->buffer_level >= cpi->oxcf.optimal_buffer_level) && cpi->buffered_mode) - { -- int Adjustment = cpi->active_worst_quality / 4; // Max adjustment is 1/4 -+ /* Max adjustment is 1/4 */ -+ int Adjustment = cpi->active_worst_quality / 4; - - if (Adjustment) - { -@@ -3433,10 +3545,16 @@ static void encode_frame_to_data_rate - - if (cpi->buffer_level < cpi->oxcf.maximum_buffer_size) - { -- buff_lvl_step = (cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level) / Adjustment; -+ buff_lvl_step = (int) -+ ((cpi->oxcf.maximum_buffer_size - -+ cpi->oxcf.optimal_buffer_level) / -+ Adjustment); - - if (buff_lvl_step) -- Adjustment = (cpi->buffer_level - cpi->oxcf.optimal_buffer_level) / buff_lvl_step; -+ Adjustment = (int) -+ ((cpi->buffer_level - -+ cpi->oxcf.optimal_buffer_level) / -+ buff_lvl_step); - else - Adjustment = 0; - } -@@ -3448,8 +3566,9 @@ static void encode_frame_to_data_rate - } - } - -- // Set an active best quality and if necessary active worst quality -- // There is some odd behavior for one pass here that needs attention. -+ /* Set an active best quality and if necessary active worst quality -+ * There is some odd behavior for one pass here that needs attention. -+ */ - if ( (cpi->pass == 2) || (cpi->ni_frames > 150)) - { - vp8_clear_system_state(); -@@ -3465,9 +3584,10 @@ static void encode_frame_to_data_rate - else - cpi->active_best_quality = kf_high_motion_minq[Q]; - -- // Special case for key frames forced because we have reached -- // the maximum key frame interval. Here force the Q to a range -- // based on the ambient Q to reduce the risk of popping -+ /* Special case for key frames forced because we have reached -+ * the maximum key frame interval. Here force the Q to a range -+ * based on the ambient Q to reduce the risk of popping -+ */ - if ( cpi->this_key_frame_forced ) - { - if ( cpi->active_best_quality > cpi->avg_frame_qindex * 7/8) -@@ -3476,7 +3596,7 @@ static void encode_frame_to_data_rate - cpi->active_best_quality = cpi->avg_frame_qindex >> 2; - } - } -- // One pass more conservative -+ /* One pass more conservative */ - else - cpi->active_best_quality = kf_high_motion_minq[Q]; - } -@@ -3484,16 +3604,17 @@ static void encode_frame_to_data_rate - else if (cpi->oxcf.number_of_layers==1 && - (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame)) - { -- // Use the lower of cpi->active_worst_quality and recent -- // average Q as basis for GF/ARF Q limit unless last frame was -- // a key frame. -+ /* Use the lower of cpi->active_worst_quality and recent -+ * average Q as basis for GF/ARF Q limit unless last frame was -+ * a key frame. -+ */ - if ( (cpi->frames_since_key > 1) && - (cpi->avg_frame_qindex < cpi->active_worst_quality) ) - { - Q = cpi->avg_frame_qindex; - } - -- // For constrained quality dont allow Q less than the cq level -+ /* For constrained quality dont allow Q less than the cq level */ - if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && - (Q < cpi->cq_target_quality) ) - { -@@ -3509,14 +3630,14 @@ static void encode_frame_to_data_rate - else - cpi->active_best_quality = gf_mid_motion_minq[Q]; - -- // Constrained quality use slightly lower active best. -+ /* Constrained quality use slightly lower active best. */ - if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY ) - { - cpi->active_best_quality = - cpi->active_best_quality * 15/16; - } - } -- // One pass more conservative -+ /* One pass more conservative */ - else - cpi->active_best_quality = gf_high_motion_minq[Q]; - } -@@ -3524,14 +3645,16 @@ static void encode_frame_to_data_rate - { - cpi->active_best_quality = inter_minq[Q]; - -- // For the constant/constrained quality mode we dont want -- // q to fall below the cq level. -+ /* For the constant/constrained quality mode we dont want -+ * q to fall below the cq level. -+ */ - if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && - (cpi->active_best_quality < cpi->cq_target_quality) ) - { -- // If we are strongly undershooting the target rate in the last -- // frames then use the user passed in cq value not the auto -- // cq value. -+ /* If we are strongly undershooting the target rate in the last -+ * frames then use the user passed in cq value not the auto -+ * cq value. -+ */ - if ( cpi->rolling_actual_bits < cpi->min_frame_bandwidth ) - cpi->active_best_quality = cpi->oxcf.cq_level; - else -@@ -3539,26 +3662,33 @@ static void encode_frame_to_data_rate - } - } - -- // If CBR and the buffer is as full then it is reasonable to allow -- // higher quality on the frames to prevent bits just going to waste. -+ /* If CBR and the buffer is as full then it is reasonable to allow -+ * higher quality on the frames to prevent bits just going to waste. -+ */ - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) - { -- // Note that the use of >= here elliminates the risk of a devide -- // by 0 error in the else if clause -+ /* Note that the use of >= here elliminates the risk of a devide -+ * by 0 error in the else if clause -+ */ - if (cpi->buffer_level >= cpi->oxcf.maximum_buffer_size) - cpi->active_best_quality = cpi->best_quality; - - else if (cpi->buffer_level > cpi->oxcf.optimal_buffer_level) - { -- int Fraction = ((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) * 128) / (cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level); -- int min_qadjustment = ((cpi->active_best_quality - cpi->best_quality) * Fraction) / 128; -+ int Fraction = (int) -+ (((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) * 128) -+ / (cpi->oxcf.maximum_buffer_size - -+ cpi->oxcf.optimal_buffer_level)); -+ int min_qadjustment = ((cpi->active_best_quality - -+ cpi->best_quality) * Fraction) / 128; - - cpi->active_best_quality -= min_qadjustment; - } - } - } -- // Make sure constrained quality mode limits are adhered to for the first -- // few frames of one pass encodes -+ /* Make sure constrained quality mode limits are adhered to for the first -+ * few frames of one pass encodes -+ */ - else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) - { - if ( (cm->frame_type == KEY_FRAME) || -@@ -3572,7 +3702,7 @@ static void encode_frame_to_data_rate - } - } - -- // Clip the active best and worst quality values to limits -+ /* Clip the active best and worst quality values to limits */ - if (cpi->active_worst_quality > cpi->worst_quality) - cpi->active_worst_quality = cpi->worst_quality; - -@@ -3582,14 +3712,14 @@ static void encode_frame_to_data_rate - if ( cpi->active_worst_quality < cpi->active_best_quality ) - cpi->active_worst_quality = cpi->active_best_quality; - -- // Determine initial Q to try -+ /* Determine initial Q to try */ - Q = vp8_regulate_q(cpi, cpi->this_frame_target); - - #if !(CONFIG_REALTIME_ONLY) - -- // Set highest allowed value for Zbin over quant -+ /* Set highest allowed value for Zbin over quant */ - if (cm->frame_type == KEY_FRAME) -- zbin_oq_high = 0; //ZBIN_OQ_MAX/16 -+ zbin_oq_high = 0; - else if ((cpi->oxcf.number_of_layers == 1) && ((cm->refresh_alt_ref_frame || - (cm->refresh_golden_frame && !cpi->source_alt_ref_active)))) - { -@@ -3599,15 +3729,21 @@ static void encode_frame_to_data_rate - zbin_oq_high = ZBIN_OQ_MAX; - #endif - -- // Setup background Q adjustment for error resilient mode. -- // For multi-layer encodes only enable this for the base layer. -- if (cpi->cyclic_refresh_mode_enabled && (cpi->current_layer==0)) -+ /* Setup background Q adjustment for error resilient mode. -+ * For multi-layer encodes only enable this for the base layer. -+ */ -+ if (cpi->cyclic_refresh_mode_enabled) -+ { -+ if (cpi->current_layer==0) - cyclic_background_refresh(cpi, Q, 0); -+ else -+ disable_segmentation(cpi); -+ } - - vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); - - #if !(CONFIG_REALTIME_ONLY) -- // Limit Q range for the adaptive loop. -+ /* Limit Q range for the adaptive loop. */ - bottom_index = cpi->active_best_quality; - top_index = cpi->active_worst_quality; - q_low = cpi->active_best_quality; -@@ -3652,11 +3788,11 @@ static void encode_frame_to_data_rate - - if (cm->frame_type == KEY_FRAME) - { -- vp8_de_noise(cpi->Source, cpi->Source, l , 1, 0); -+ vp8_de_noise(cm, cpi->Source, cpi->Source, l , 1, 0); - } - else - { -- vp8_de_noise(cpi->Source, cpi->Source, l , 1, 0); -+ vp8_de_noise(cm, cpi->Source, cpi->Source, l , 1, 0); - - src = cpi->Source->y_buffer; - -@@ -3675,16 +3811,11 @@ static void encode_frame_to_data_rate - - do - { -- vp8_clear_system_state(); //__asm emms; -- -- /* -- if(cpi->is_src_frame_alt_ref) -- Q = 127; -- */ -+ vp8_clear_system_state(); - - vp8_set_quantizer(cpi, Q); - -- // setup skip prob for costing in mode/mv decision -+ /* setup skip prob for costing in mode/mv decision */ - if (cpi->common.mb_no_coeff_skip) - { - cpi->prob_skip_false = cpi->base_skip_false_prob[Q]; -@@ -3728,7 +3859,9 @@ static void encode_frame_to_data_rate - */ - } - -- //as this is for cost estimate, let's make sure it does not go extreme eitehr way -+ /* as this is for cost estimate, let's make sure it does not -+ * go extreme eitehr way -+ */ - if (cpi->prob_skip_false < 5) - cpi->prob_skip_false = 5; - -@@ -3754,7 +3887,22 @@ static void encode_frame_to_data_rate - - if (cm->frame_type == KEY_FRAME) - { -- resize_key_frame(cpi); -+ if(resize_key_frame(cpi)) -+ { -+ /* If the frame size has changed, need to reset Q, quantizer, -+ * and background refresh. -+ */ -+ Q = vp8_regulate_q(cpi, cpi->this_frame_target); -+ if (cpi->cyclic_refresh_mode_enabled) -+ { -+ if (cpi->current_layer==0) -+ cyclic_background_refresh(cpi, Q, 0); -+ else -+ disable_segmentation(cpi); -+ } -+ vp8_set_quantizer(cpi, Q); -+ } -+ - vp8_setup_key_frame(cpi); - } - -@@ -3773,7 +3921,7 @@ static void encode_frame_to_data_rate - - if (cm->refresh_entropy_probs == 0) - { -- // save a copy for later refresh -+ /* save a copy for later refresh */ - vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc)); - } - -@@ -3781,61 +3929,52 @@ static void encode_frame_to_data_rate - - vp8_update_coef_probs(cpi); - -- // transform / motion compensation build reconstruction frame -- // +pack coef partitions -+ /* transform / motion compensation build reconstruction frame -+ * +pack coef partitions -+ */ - vp8_encode_frame(cpi); - - /* cpi->projected_frame_size is not needed for RT mode */ - } - #else -- // transform / motion compensation build reconstruction frame -+ /* transform / motion compensation build reconstruction frame */ - vp8_encode_frame(cpi); - - cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi); - cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0; - #endif -- vp8_clear_system_state(); //__asm emms; -+ vp8_clear_system_state(); - -- // Test to see if the stats generated for this frame indicate that we should have coded a key frame -- // (assuming that we didn't)! -- if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME) -- { -- int key_frame_decision = decide_key_frame(cpi); -+ /* Test to see if the stats generated for this frame indicate that -+ * we should have coded a key frame (assuming that we didn't)! -+ */ - -- if (cpi->compressor_speed == 2) -- { -- /* we don't do re-encoding in realtime mode -- * if key frame is decided then we force it on next frame */ -- cpi->force_next_frame_intra = key_frame_decision; -- } -+ if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME -+ && cpi->compressor_speed != 2) -+ { - #if !(CONFIG_REALTIME_ONLY) -- else if (key_frame_decision) -+ if (decide_key_frame(cpi)) - { -- // Reset all our sizing numbers and recode -+ /* Reset all our sizing numbers and recode */ - cm->frame_type = KEY_FRAME; - - vp8_pick_frame_size(cpi); - -- // Clear the Alt reference frame active flag when we have a key frame -+ /* Clear the Alt reference frame active flag when we have -+ * a key frame -+ */ - cpi->source_alt_ref_active = 0; - -- // Reset the loop filter deltas and segmentation map -+ // Set the loop filter deltas and segmentation map update - setup_features(cpi); - -- // If segmentation is enabled force a map update for key frames -- if (cpi->mb.e_mbd.segmentation_enabled) -- { -- cpi->mb.e_mbd.update_mb_segmentation_map = 1; -- cpi->mb.e_mbd.update_mb_segmentation_data = 1; -- } -- - vp8_restore_coding_context(cpi); - - Q = vp8_regulate_q(cpi, cpi->this_frame_target); - - vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); - -- // Limit Q range for the adaptive loop. -+ /* Limit Q range for the adaptive loop. */ - bottom_index = cpi->active_best_quality; - top_index = cpi->active_worst_quality; - q_low = cpi->active_best_quality; -@@ -3854,7 +3993,7 @@ static void encode_frame_to_data_rate - if (frame_over_shoot_limit == 0) - frame_over_shoot_limit = 1; - -- // Are we are overshooting and up against the limit of active max Q. -+ /* Are we are overshooting and up against the limit of active max Q. */ - if (((cpi->pass != 2) || (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) && - (Q == cpi->active_worst_quality) && - (cpi->active_worst_quality < cpi->worst_quality) && -@@ -3862,50 +4001,52 @@ static void encode_frame_to_data_rate - { - int over_size_percent = ((cpi->projected_frame_size - frame_over_shoot_limit) * 100) / frame_over_shoot_limit; - -- // If so is there any scope for relaxing it -+ /* If so is there any scope for relaxing it */ - while ((cpi->active_worst_quality < cpi->worst_quality) && (over_size_percent > 0)) - { - cpi->active_worst_quality++; -- -- over_size_percent = (int)(over_size_percent * 0.96); // Assume 1 qstep = about 4% on frame size. -+ /* Assume 1 qstep = about 4% on frame size. */ -+ over_size_percent = (int)(over_size_percent * 0.96); - } - #if !(CONFIG_REALTIME_ONLY) - top_index = cpi->active_worst_quality; - #endif -- // If we have updated the active max Q do not call vp8_update_rate_correction_factors() this loop. -+ /* If we have updated the active max Q do not call -+ * vp8_update_rate_correction_factors() this loop. -+ */ - active_worst_qchanged = 1; - } - else - active_worst_qchanged = 0; - - #if !(CONFIG_REALTIME_ONLY) -- // Special case handling for forced key frames -+ /* Special case handling for forced key frames */ - if ( (cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced ) - { - int last_q = Q; - int kf_err = vp8_calc_ss_err(cpi->Source, - &cm->yv12_fb[cm->new_fb_idx]); - -- // The key frame is not good enough -+ /* The key frame is not good enough */ - if ( kf_err > ((cpi->ambient_err * 7) >> 3) ) - { -- // Lower q_high -+ /* Lower q_high */ - q_high = (Q > q_low) ? (Q - 1) : q_low; - -- // Adjust Q -+ /* Adjust Q */ - Q = (q_high + q_low) >> 1; - } -- // The key frame is much better than the previous frame -+ /* The key frame is much better than the previous frame */ - else if ( kf_err < (cpi->ambient_err >> 1) ) - { -- // Raise q_low -+ /* Raise q_low */ - q_low = (Q < q_high) ? (Q + 1) : q_high; - -- // Adjust Q -+ /* Adjust Q */ - Q = (q_high + q_low + 1) >> 1; - } - -- // Clamp Q to upper and lower limits: -+ /* Clamp Q to upper and lower limits: */ - if (Q > q_high) - Q = q_high; - else if (Q < q_low) -@@ -3914,7 +4055,9 @@ static void encode_frame_to_data_rate - Loop = Q != last_q; - } - -- // Is the projected frame size out of range and are we allowed to attempt to recode. -+ /* Is the projected frame size out of range and are we allowed -+ * to attempt to recode. -+ */ - else if ( recode_loop_test( cpi, - frame_over_shoot_limit, frame_under_shoot_limit, - Q, top_index, bottom_index ) ) -@@ -3922,45 +4065,57 @@ static void encode_frame_to_data_rate - int last_q = Q; - int Retries = 0; - -- // Frame size out of permitted range: -- // Update correction factor & compute new Q to try... -+ /* Frame size out of permitted range. Update correction factor -+ * & compute new Q to try... -+ */ - -- // Frame is too large -+ /* Frame is too large */ - if (cpi->projected_frame_size > cpi->this_frame_target) - { -- //if ( cpi->zbin_over_quant == 0 ) -- q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value -+ /* Raise Qlow as to at least the current value */ -+ q_low = (Q < q_high) ? (Q + 1) : q_high; - -- if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low -- zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high; -+ /* If we are using over quant do the same for zbin_oq_low */ -+ if (cpi->mb.zbin_over_quant > 0) -+ zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ? -+ (cpi->mb.zbin_over_quant + 1) : zbin_oq_high; - -- //if ( undershoot_seen || (Q == MAXQ) ) - if (undershoot_seen) - { -- // Update rate_correction_factor unless cpi->active_worst_quality has changed. -+ /* Update rate_correction_factor unless -+ * cpi->active_worst_quality has changed. -+ */ - if (!active_worst_qchanged) - vp8_update_rate_correction_factors(cpi, 1); - - Q = (q_high + q_low + 1) / 2; - -- // Adjust cpi->zbin_over_quant (only allowed when Q is max) -+ /* Adjust cpi->zbin_over_quant (only allowed when Q -+ * is max) -+ */ - if (Q < MAXQ) -- cpi->zbin_over_quant = 0; -+ cpi->mb.zbin_over_quant = 0; - else - { -- zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high; -- cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2; -+ zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ? -+ (cpi->mb.zbin_over_quant + 1) : zbin_oq_high; -+ cpi->mb.zbin_over_quant = -+ (zbin_oq_high + zbin_oq_low) / 2; - } - } - else - { -- // Update rate_correction_factor unless cpi->active_worst_quality has changed. -+ /* Update rate_correction_factor unless -+ * cpi->active_worst_quality has changed. -+ */ - if (!active_worst_qchanged) - vp8_update_rate_correction_factors(cpi, 0); - - Q = vp8_regulate_q(cpi, cpi->this_frame_target); - -- while (((Q < q_low) || (cpi->zbin_over_quant < zbin_oq_low)) && (Retries < 10)) -+ while (((Q < q_low) || -+ (cpi->mb.zbin_over_quant < zbin_oq_low)) && -+ (Retries < 10)) - { - vp8_update_rate_correction_factors(cpi, 0); - Q = vp8_regulate_q(cpi, cpi->this_frame_target); -@@ -3970,47 +4125,60 @@ static void encode_frame_to_data_rate - - overshoot_seen = 1; - } -- // Frame is too small -+ /* Frame is too small */ - else - { -- if (cpi->zbin_over_quant == 0) -- q_high = (Q > q_low) ? (Q - 1) : q_low; // Lower q_high if not using over quant -- else // else lower zbin_oq_high -- zbin_oq_high = (cpi->zbin_over_quant > zbin_oq_low) ? (cpi->zbin_over_quant - 1) : zbin_oq_low; -+ if (cpi->mb.zbin_over_quant == 0) -+ /* Lower q_high if not using over quant */ -+ q_high = (Q > q_low) ? (Q - 1) : q_low; -+ else -+ /* else lower zbin_oq_high */ -+ zbin_oq_high = (cpi->mb.zbin_over_quant > zbin_oq_low) ? -+ (cpi->mb.zbin_over_quant - 1) : zbin_oq_low; - - if (overshoot_seen) - { -- // Update rate_correction_factor unless cpi->active_worst_quality has changed. -+ /* Update rate_correction_factor unless -+ * cpi->active_worst_quality has changed. -+ */ - if (!active_worst_qchanged) - vp8_update_rate_correction_factors(cpi, 1); - - Q = (q_high + q_low) / 2; - -- // Adjust cpi->zbin_over_quant (only allowed when Q is max) -+ /* Adjust cpi->zbin_over_quant (only allowed when Q -+ * is max) -+ */ - if (Q < MAXQ) -- cpi->zbin_over_quant = 0; -+ cpi->mb.zbin_over_quant = 0; - else -- cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2; -+ cpi->mb.zbin_over_quant = -+ (zbin_oq_high + zbin_oq_low) / 2; - } - else - { -- // Update rate_correction_factor unless cpi->active_worst_quality has changed. -+ /* Update rate_correction_factor unless -+ * cpi->active_worst_quality has changed. -+ */ - if (!active_worst_qchanged) - vp8_update_rate_correction_factors(cpi, 0); - - Q = vp8_regulate_q(cpi, cpi->this_frame_target); - -- // Special case reset for qlow for constrained quality. -- // This should only trigger where there is very substantial -- // undershoot on a frame and the auto cq level is above -- // the user passsed in value. -+ /* Special case reset for qlow for constrained quality. -+ * This should only trigger where there is very substantial -+ * undershoot on a frame and the auto cq level is above -+ * the user passsed in value. -+ */ - if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && - (Q < q_low) ) - { - q_low = Q; - } - -- while (((Q > q_high) || (cpi->zbin_over_quant > zbin_oq_high)) && (Retries < 10)) -+ while (((Q > q_high) || -+ (cpi->mb.zbin_over_quant > zbin_oq_high)) && -+ (Retries < 10)) - { - vp8_update_rate_correction_factors(cpi, 0); - Q = vp8_regulate_q(cpi, cpi->this_frame_target); -@@ -4021,14 +4189,16 @@ static void encode_frame_to_data_rate - undershoot_seen = 1; - } - -- // Clamp Q to upper and lower limits: -+ /* Clamp Q to upper and lower limits: */ - if (Q > q_high) - Q = q_high; - else if (Q < q_low) - Q = q_low; - -- // Clamp cpi->zbin_over_quant -- cpi->zbin_over_quant = (cpi->zbin_over_quant < zbin_oq_low) ? zbin_oq_low : (cpi->zbin_over_quant > zbin_oq_high) ? zbin_oq_high : cpi->zbin_over_quant; -+ /* Clamp cpi->zbin_over_quant */ -+ cpi->mb.zbin_over_quant = (cpi->mb.zbin_over_quant < zbin_oq_low) ? -+ zbin_oq_low : (cpi->mb.zbin_over_quant > zbin_oq_high) ? -+ zbin_oq_high : cpi->mb.zbin_over_quant; - - Loop = Q != last_q; - } -@@ -4051,30 +4221,20 @@ static void encode_frame_to_data_rate - while (Loop == 1); - - #if 0 -- // Experimental code for lagged and one pass -- // Update stats used for one pass GF selection -- { -- /* -- int frames_so_far; -- double frame_intra_error; -- double frame_coded_error; -- double frame_pcnt_inter; -- double frame_pcnt_motion; -- double frame_mvr; -- double frame_mvr_abs; -- double frame_mvc; -- double frame_mvc_abs; -- */ -- -+ /* Experimental code for lagged and one pass -+ * Update stats used for one pass GF selection -+ */ -+ { - cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_coded_error = (double)cpi->prediction_error; - cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_intra_error = (double)cpi->intra_error; - cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_pcnt_inter = (double)(100 - cpi->this_frame_percent_intra) / 100.0; - } - #endif - -- // Special case code to reduce pulsing when key frames are forced at a -- // fixed interval. Note the reconstruction error if it is the frame before -- // the force key frame -+ /* Special case code to reduce pulsing when key frames are forced at a -+ * fixed interval. Note the reconstruction error if it is the frame before -+ * the force key frame -+ */ - if ( cpi->next_key_frame_forced && (cpi->twopass.frames_to_key == 0) ) - { - cpi->ambient_err = vp8_calc_ss_err(cpi->Source, -@@ -4113,13 +4273,38 @@ static void encode_frame_to_data_rate - } - } - -+ /* Count last ref frame 0,0 usage on current encoded frame. */ -+ { -+ int mb_row; -+ int mb_col; -+ /* Point to beginning of MODE_INFO arrays. */ -+ MODE_INFO *tmp = cm->mi; -+ -+ cpi->zeromv_count = 0; -+ -+ if(cm->frame_type != KEY_FRAME) -+ { -+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++) -+ { -+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col ++) -+ { -+ if(tmp->mbmi.mode == ZEROMV) -+ cpi->zeromv_count++; -+ tmp++; -+ } -+ tmp++; -+ } -+ } -+ } -+ - #if CONFIG_MULTI_RES_ENCODING - vp8_cal_dissimilarity(cpi); - #endif - -- // Update the GF useage maps. -- // This is done after completing the compression of a frame when all -- // modes etc. are finalized but before loop filter -+ /* Update the GF useage maps. -+ * This is done after completing the compression of a frame when all -+ * modes etc. are finalized but before loop filter -+ */ - if (cpi->oxcf.number_of_layers == 1) - vp8_update_gf_useage_maps(cpi, cm, &cpi->mb); - -@@ -4134,9 +4319,10 @@ static void encode_frame_to_data_rate - } - #endif - -- // For inter frames the current default behavior is that when -- // cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer -- // This is purely an encoder decision at present. -+ /* For inter frames the current default behavior is that when -+ * cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer -+ * This is purely an encoder decision at present. -+ */ - if (!cpi->oxcf.error_resilient_mode && cm->refresh_golden_frame) - cm->copy_buffer_to_arf = 2; - else -@@ -4147,7 +4333,8 @@ static void encode_frame_to_data_rate - #if CONFIG_MULTITHREAD - if (cpi->b_multi_threaded) - { -- sem_post(&cpi->h_event_start_lpf); /* start loopfilter in separate thread */ -+ /* start loopfilter in separate thread */ -+ sem_post(&cpi->h_event_start_lpf); - cpi->b_lpf_running = 1; - } - else -@@ -4156,7 +4343,7 @@ static void encode_frame_to_data_rate - vp8_loopfilter_frame(cpi, cm); - } - -- update_reference_frames(cm); -+ update_reference_frames(cpi); - - #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) - if (cpi->oxcf.error_resilient_mode) -@@ -4171,7 +4358,7 @@ static void encode_frame_to_data_rate - sem_wait(&cpi->h_event_end_lpf); - #endif - -- // build the bitstream -+ /* build the bitstream */ - vp8_pack_bitstream(cpi, dest, dest_end, size); - - #if CONFIG_MULTITHREAD -@@ -4187,7 +4374,7 @@ static void encode_frame_to_data_rate - * needed in motion search besides loopfilter */ - cm->last_frame_type = cm->frame_type; - -- // Update rate control heuristics -+ /* Update rate control heuristics */ - cpi->total_byte_count += (*size); - cpi->projected_frame_size = (*size) << 3; - -@@ -4208,18 +4395,21 @@ static void encode_frame_to_data_rate - vp8_adjust_key_frame_context(cpi); - } - -- // Keep a record of ambient average Q. -+ /* Keep a record of ambient average Q. */ - if (cm->frame_type != KEY_FRAME) - cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2; - -- // Keep a record from which we can calculate the average Q excluding GF updates and key frames -+ /* Keep a record from which we can calculate the average Q excluding -+ * GF updates and key frames -+ */ - if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) || - (!cm->refresh_golden_frame && !cm->refresh_alt_ref_frame))) - { - cpi->ni_frames++; - -- // Calculate the average Q for normal inter frames (not key or GFU -- // frames). -+ /* Calculate the average Q for normal inter frames (not key or GFU -+ * frames). -+ */ - if ( cpi->pass == 2 ) - { - cpi->ni_tot_qi += Q; -@@ -4227,81 +4417,62 @@ static void encode_frame_to_data_rate - } - else - { -- // Damp value for first few frames -+ /* Damp value for first few frames */ - if (cpi->ni_frames > 150 ) - { - cpi->ni_tot_qi += Q; - cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames); - } -- // For one pass, early in the clip ... average the current frame Q -- // value with the worstq entered by the user as a dampening measure -+ /* For one pass, early in the clip ... average the current frame Q -+ * value with the worstq entered by the user as a dampening measure -+ */ - else - { - cpi->ni_tot_qi += Q; - cpi->ni_av_qi = ((cpi->ni_tot_qi / cpi->ni_frames) + cpi->worst_quality + 1) / 2; - } - -- // If the average Q is higher than what was used in the last frame -- // (after going through the recode loop to keep the frame size within range) -- // then use the last frame value - 1. -- // The -1 is designed to stop Q and hence the data rate, from progressively -- // falling away during difficult sections, but at the same time reduce the number of -- // itterations around the recode loop. -+ /* If the average Q is higher than what was used in the last -+ * frame (after going through the recode loop to keep the frame -+ * size within range) then use the last frame value - 1. The -1 -+ * is designed to stop Q and hence the data rate, from -+ * progressively falling away during difficult sections, but at -+ * the same time reduce the number of itterations around the -+ * recode loop. -+ */ - if (Q > cpi->ni_av_qi) - cpi->ni_av_qi = Q - 1; - } - } - --#if 0 -- -- // If the frame was massively oversize and we are below optimal buffer level drop next frame -- if ((cpi->drop_frames_allowed) && -- (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && -- (cpi->buffer_level < cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100) && -- (cpi->projected_frame_size > (4 * cpi->this_frame_target))) -- { -- cpi->drop_frame = 1; -- } -- --#endif -- -- // Set the count for maximum consecutive dropped frames based upon the ratio of -- // this frame size to the target average per frame bandwidth. -- // (cpi->av_per_frame_bandwidth > 0) is just a sanity check to prevent / 0. -- if (cpi->drop_frames_allowed && (cpi->av_per_frame_bandwidth > 0)) -- { -- cpi->max_drop_count = cpi->projected_frame_size / cpi->av_per_frame_bandwidth; -- -- if (cpi->max_drop_count > cpi->max_consec_dropped_frames) -- cpi->max_drop_count = cpi->max_consec_dropped_frames; -- } -- -- // Update the buffer level variable. -- // Non-viewable frames are a special case and are treated as pure overhead. -+ /* Update the buffer level variable. */ -+ /* Non-viewable frames are a special case and are treated as pure overhead. */ - if ( !cm->show_frame ) - cpi->bits_off_target -= cpi->projected_frame_size; - else - cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size; - -- // Clip the buffer level to the maximum specified buffer size -+ /* Clip the buffer level to the maximum specified buffer size */ - if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) - cpi->bits_off_target = cpi->oxcf.maximum_buffer_size; - -- // Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass. -+ /* Rolling monitors of whether we are over or underspending used to -+ * help regulate min and Max Q in two pass. -+ */ - cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4; - cpi->rolling_actual_bits = ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4; - cpi->long_rolling_target_bits = ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32; - cpi->long_rolling_actual_bits = ((cpi->long_rolling_actual_bits * 31) + cpi->projected_frame_size + 16) / 32; - -- // Actual bits spent -+ /* Actual bits spent */ - cpi->total_actual_bits += cpi->projected_frame_size; - -- // Debug stats -+ /* Debug stats */ - cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size); - - cpi->buffer_level = cpi->bits_off_target; - -- // Propagate values to higher temporal layers -+ /* Propagate values to higher temporal layers */ - if (cpi->oxcf.number_of_layers > 1) - { - unsigned int i; -@@ -4309,12 +4480,13 @@ static void encode_frame_to_data_rate - for (i=cpi->current_layer+1; ioxcf.number_of_layers; i++) - { - LAYER_CONTEXT *lc = &cpi->layer_context[i]; -- int bits_off_for_this_layer = lc->target_bandwidth / lc->frame_rate -- - cpi->projected_frame_size; -+ int bits_off_for_this_layer = -+ (int)(lc->target_bandwidth / lc->frame_rate - -+ cpi->projected_frame_size); - - lc->bits_off_target += bits_off_for_this_layer; - -- // Clip buffer level to maximum buffer size for the layer -+ /* Clip buffer level to maximum buffer size for the layer */ - if (lc->bits_off_target > lc->maximum_buffer_size) - lc->bits_off_target = lc->maximum_buffer_size; - -@@ -4324,7 +4496,9 @@ static void encode_frame_to_data_rate - } - } - -- // Update bits left to the kf and gf groups to account for overshoot or undershoot on these frames -+ /* Update bits left to the kf and gf groups to account for overshoot -+ * or undershoot on these frames -+ */ - if (cm->frame_type == KEY_FRAME) - { - cpi->twopass.kf_group_bits += cpi->this_frame_target - cpi->projected_frame_size; -@@ -4357,7 +4531,7 @@ static void encode_frame_to_data_rate - cpi->last_skip_false_probs[0] = cpi->prob_skip_false; - cpi->last_skip_probs_q[0] = cm->base_qindex; - -- //update the baseline -+ /* update the baseline */ - cpi->base_skip_false_prob[cm->base_qindex] = cpi->prob_skip_false; - - } -@@ -4367,7 +4541,7 @@ static void encode_frame_to_data_rate - { - FILE *f = fopen("tmp.stt", "a"); - -- vp8_clear_system_state(); //__asm emms; -+ vp8_clear_system_state(); - - if (cpi->twopass.total_left_stats.coded_error != 0.0) - fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d" -@@ -4383,7 +4557,6 @@ static void encode_frame_to_data_rate - cpi->active_best_quality, cpi->active_worst_quality, - cpi->ni_av_qi, cpi->cq_target_quality, - cpi->zbin_over_quant, -- //cpi->avg_frame_qindex, cpi->zbin_over_quant, - cm->refresh_golden_frame, cm->refresh_alt_ref_frame, - cm->frame_type, cpi->gfu_boost, - cpi->twopass.est_max_qcorrection_factor, -@@ -4406,7 +4579,6 @@ static void encode_frame_to_data_rate - cpi->active_best_quality, cpi->active_worst_quality, - cpi->ni_av_qi, cpi->cq_target_quality, - cpi->zbin_over_quant, -- //cpi->avg_frame_qindex, cpi->zbin_over_quant, - cm->refresh_golden_frame, cm->refresh_alt_ref_frame, - cm->frame_type, cpi->gfu_boost, - cpi->twopass.est_max_qcorrection_factor, -@@ -4436,10 +4608,6 @@ static void encode_frame_to_data_rate - - #endif - -- // If this was a kf or Gf note the Q -- if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cm->refresh_alt_ref_frame) -- cm->last_kf_gf_q = cm->base_qindex; -- - if (cm->refresh_golden_frame == 1) - cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN; - else -@@ -4451,49 +4619,55 @@ static void encode_frame_to_data_rate - cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF; - - -- if (cm->refresh_last_frame & cm->refresh_golden_frame) // both refreshed -+ if (cm->refresh_last_frame & cm->refresh_golden_frame) -+ /* both refreshed */ - cpi->gold_is_last = 1; -- else if (cm->refresh_last_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other -+ else if (cm->refresh_last_frame ^ cm->refresh_golden_frame) -+ /* 1 refreshed but not the other */ - cpi->gold_is_last = 0; - -- if (cm->refresh_last_frame & cm->refresh_alt_ref_frame) // both refreshed -+ if (cm->refresh_last_frame & cm->refresh_alt_ref_frame) -+ /* both refreshed */ - cpi->alt_is_last = 1; -- else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame) // 1 refreshed but not the other -+ else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame) -+ /* 1 refreshed but not the other */ - cpi->alt_is_last = 0; - -- if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame) // both refreshed -+ if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame) -+ /* both refreshed */ - cpi->gold_is_alt = 1; -- else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other -+ else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame) -+ /* 1 refreshed but not the other */ - cpi->gold_is_alt = 0; - -- cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG; -+ cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME; - - if (cpi->gold_is_last) -- cpi->ref_frame_flags &= ~VP8_GOLD_FLAG; -+ cpi->ref_frame_flags &= ~VP8_GOLD_FRAME; - - if (cpi->alt_is_last) -- cpi->ref_frame_flags &= ~VP8_ALT_FLAG; -+ cpi->ref_frame_flags &= ~VP8_ALTR_FRAME; - - if (cpi->gold_is_alt) -- cpi->ref_frame_flags &= ~VP8_ALT_FLAG; -+ cpi->ref_frame_flags &= ~VP8_ALTR_FRAME; - - - if (!cpi->oxcf.error_resilient_mode) - { - if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) -- // Update the alternate reference frame stats as appropriate. -+ /* Update the alternate reference frame stats as appropriate. */ - update_alt_ref_frame_stats(cpi); - else -- // Update the Golden frame stats as appropriate. -+ /* Update the Golden frame stats as appropriate. */ - update_golden_frame_stats(cpi); - } - - if (cm->frame_type == KEY_FRAME) - { -- // Tell the caller that the frame was coded as a key frame -+ /* Tell the caller that the frame was coded as a key frame */ - *frame_flags = cm->frame_flags | FRAMEFLAGS_KEY; - -- // As this frame is a key frame the next defaults to an inter frame. -+ /* As this frame is a key frame the next defaults to an inter frame. */ - cm->frame_type = INTER_FRAME; - - cpi->last_frame_percent_intra = 100; -@@ -4505,20 +4679,24 @@ static void encode_frame_to_data_rate - cpi->last_frame_percent_intra = cpi->this_frame_percent_intra; - } - -- // Clear the one shot update flags for segmentation map and mode/ref loop filter deltas. -+ /* Clear the one shot update flags for segmentation map and mode/ref -+ * loop filter deltas. -+ */ - cpi->mb.e_mbd.update_mb_segmentation_map = 0; - cpi->mb.e_mbd.update_mb_segmentation_data = 0; - cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; - - -- // Dont increment frame counters if this was an altref buffer update not a real frame -+ /* Dont increment frame counters if this was an altref buffer update -+ * not a real frame -+ */ - if (cm->show_frame) - { - cm->current_video_frame++; - cpi->frames_since_key++; - } - -- // reset to normal state now that we are done. -+ /* reset to normal state now that we are done. */ - - - -@@ -4534,67 +4712,11 @@ static void encode_frame_to_data_rate - } - #endif - -- // DEBUG -- //vp8_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show); -+ /* DEBUG */ -+ /* vp8_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show); */ - - - } -- -- --static void check_gf_quality(VP8_COMP *cpi) --{ -- VP8_COMMON *cm = &cpi->common; -- int gf_active_pct = (100 * cpi->gf_active_count) / (cm->mb_rows * cm->mb_cols); -- int gf_ref_usage_pct = (cpi->count_mb_ref_frame_usage[GOLDEN_FRAME] * 100) / (cm->mb_rows * cm->mb_cols); -- int last_ref_zz_useage = (cpi->inter_zz_count * 100) / (cm->mb_rows * cm->mb_cols); -- -- // Gf refresh is not currently being signalled -- if (cpi->gf_update_recommended == 0) -- { -- if (cpi->common.frames_since_golden > 7) -- { -- // Low use of gf -- if ((gf_active_pct < 10) || ((gf_active_pct + gf_ref_usage_pct) < 15)) -- { -- // ...but last frame zero zero usage is reasonbable so a new gf might be appropriate -- if (last_ref_zz_useage >= 25) -- { -- cpi->gf_bad_count ++; -- -- if (cpi->gf_bad_count >= 8) // Check that the condition is stable -- { -- cpi->gf_update_recommended = 1; -- cpi->gf_bad_count = 0; -- } -- } -- else -- cpi->gf_bad_count = 0; // Restart count as the background is not stable enough -- } -- else -- cpi->gf_bad_count = 0; // Gf useage has picked up so reset count -- } -- } -- // If the signal is set but has not been read should we cancel it. -- else if (last_ref_zz_useage < 15) -- { -- cpi->gf_update_recommended = 0; -- cpi->gf_bad_count = 0; -- } -- --#if 0 -- { -- FILE *f = fopen("gfneeded.stt", "a"); -- fprintf(f, "%10d %10d %10d %10d %10ld \n", -- cm->current_video_frame, -- cpi->common.frames_since_golden, -- gf_active_pct, gf_ref_usage_pct, -- cpi->gf_update_recommended); -- fclose(f); -- } -- --#endif --} -- - #if !(CONFIG_REALTIME_ONLY) - static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned char * dest_end, unsigned int *frame_flags) - { -@@ -4614,7 +4736,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, - } - #endif - --//For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us. -+/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ - #if HAVE_NEON - extern void vp8_push_neon(int64_t *store); - extern void vp8_pop_neon(int64_t *store); -@@ -4721,7 +4843,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - cpi->source = NULL; - - #if !(CONFIG_REALTIME_ONLY) -- // Should we code an alternate reference frame -+ /* Should we code an alternate reference frame */ - if (cpi->oxcf.error_resilient_mode == 0 && - cpi->oxcf.play_alternate && - cpi->source_alt_ref_pending) -@@ -4742,7 +4864,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - cm->refresh_golden_frame = 0; - cm->refresh_last_frame = 0; - cm->show_frame = 0; -- cpi->source_alt_ref_pending = 0; // Clear Pending alt Ref flag. -+ /* Clear Pending alt Ref flag. */ -+ cpi->source_alt_ref_pending = 0; - cpi->is_src_frame_alt_ref = 0; - } - } -@@ -4814,7 +4937,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - cpi->last_end_time_stamp_seen = cpi->source->ts_start; - } - -- // adjust frame rates based on timestamps given -+ /* adjust frame rates based on timestamps given */ - if (cm->show_frame) - { - int64_t this_duration; -@@ -4832,9 +4955,10 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen; - last_duration = cpi->last_end_time_stamp_seen - - cpi->last_time_stamp_seen; -- // do a step update if the duration changes by 10% -+ /* do a step update if the duration changes by 10% */ - if (last_duration) -- step = ((this_duration - last_duration) * 10 / last_duration); -+ step = (int)(((this_duration - last_duration) * -+ 10 / last_duration)); - } - - if (this_duration) -@@ -4849,7 +4973,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - * frame rate. If we haven't seen 1 second yet, then average - * over the whole interval seen. - */ -- interval = cpi->source->ts_end - cpi->first_time_stamp_ever; -+ interval = (double)(cpi->source->ts_end - -+ cpi->first_time_stamp_ever); - if(interval > 10000000.0) - interval = 10000000; - -@@ -4862,9 +4987,9 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - - if (cpi->oxcf.number_of_layers > 1) - { -- int i; -+ unsigned int i; - -- // Update frame rates for each layer -+ /* Update frame rates for each layer */ - for (i=0; ioxcf.number_of_layers; i++) - { - LAYER_CONTEXT *lc = &cpi->layer_context[i]; -@@ -4886,7 +5011,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - - update_layer_contexts (cpi); - -- // Restore layer specific context & set frame rate -+ /* Restore layer specific context & set frame rate */ - layer = cpi->oxcf.layer_id[ - cm->current_video_frame % cpi->oxcf.periodicity]; - restore_layer_context (cpi, layer); -@@ -4895,12 +5020,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - - if (cpi->compressor_speed == 2) - { -- if (cpi->oxcf.number_of_layers == 1) -- check_gf_quality(cpi); - vpx_usec_timer_start(&tsctimer); - vpx_usec_timer_start(&ticktimer); - } - -+ cpi->lf_zeromv_pct = (cpi->zeromv_count * 100)/cm->MBs; -+ - #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING - { - int i; -@@ -4924,11 +5049,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - } - #endif - -- // start with a 0 size frame -+ /* start with a 0 size frame */ - *size = 0; - -- // Clear down mmx registers -- vp8_clear_system_state(); //__asm emms; -+ /* Clear down mmx registers */ -+ vp8_clear_system_state(); - - cm->frame_type = INTER_FRAME; - cm->frame_flags = *frame_flags; -@@ -4937,7 +5062,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - - if (cm->refresh_alt_ref_frame) - { -- //cm->refresh_golden_frame = 1; - cm->refresh_golden_frame = 0; - cm->refresh_last_frame = 0; - } -@@ -4982,7 +5106,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - vpx_usec_timer_mark(&tsctimer); - vpx_usec_timer_mark(&ticktimer); - -- duration = vpx_usec_timer_elapsed(&ticktimer); -+ duration = (int)(vpx_usec_timer_elapsed(&ticktimer)); - duration2 = (unsigned int)((double)duration / 2); - - if (cm->frame_type != KEY_FRAME) -@@ -4995,7 +5119,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - - if (duration2) - { -- //if(*frame_flags!=1) - { - - if (cpi->avg_pick_mode_time == 0) -@@ -5012,8 +5135,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - vpx_memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc)); - } - -- // Save the contexts separately for alt ref, gold and last. -- // (TODO jbb -> Optimize this with pointers to avoid extra copies. ) -+ /* Save the contexts separately for alt ref, gold and last. */ -+ /* (TODO jbb -> Optimize this with pointers to avoid extra copies. ) */ - if(cm->refresh_alt_ref_frame) - vpx_memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc)); - -@@ -5023,12 +5146,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - if(cm->refresh_last_frame) - vpx_memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc)); - -- // if its a dropped frame honor the requests on subsequent frames -+ /* if its a dropped frame honor the requests on subsequent frames */ - if (*size > 0) - { - cpi->droppable = !frame_is_reference(cpi); - -- // return to normal state -+ /* return to normal state */ - cm->refresh_entropy_probs = 1; - cm->refresh_alt_ref_frame = 0; - cm->refresh_golden_frame = 0; -@@ -5037,7 +5160,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - - } - -- // Save layer specific state -+ /* Save layer specific state */ - if (cpi->oxcf.number_of_layers > 1) - save_layer_context (cpi); - -@@ -5062,14 +5185,14 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - - if (cpi->b_calculate_psnr) - { -- double ye,ue,ve; -+ uint64_t ye,ue,ve; - double frame_psnr; - YV12_BUFFER_CONFIG *orig = cpi->Source; - YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; - int y_samples = orig->y_height * orig->y_width ; - int uv_samples = orig->uv_height * orig->uv_width ; - int t_samples = y_samples + 2 * uv_samples; -- int64_t sq_error, sq_error2; -+ double sq_error, sq_error2; - - ye = calc_plane_error(orig->y_buffer, orig->y_stride, - recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height); -@@ -5080,13 +5203,13 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - ve = calc_plane_error(orig->v_buffer, orig->uv_stride, - recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height); - -- sq_error = ye + ue + ve; -+ sq_error = (double)(ye + ue + ve); - - frame_psnr = vp8_mse2psnr(t_samples, 255.0, sq_error); - -- cpi->total_y += vp8_mse2psnr(y_samples, 255.0, ye); -- cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, ue); -- cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, ve); -+ cpi->total_y += vp8_mse2psnr(y_samples, 255.0, (double)ye); -+ cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, (double)ue); -+ cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, (double)ve); - cpi->total_sq_error += sq_error; - cpi->total += frame_psnr; - #if CONFIG_POSTPROC -@@ -5095,7 +5218,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - double frame_psnr2, frame_ssim2 = 0; - double weight = 0; - -- vp8_deblock(cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0); -+ vp8_deblock(cm, cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0); - vp8_clear_system_state(); - - ye = calc_plane_error(orig->y_buffer, orig->y_stride, -@@ -5107,13 +5230,16 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - ve = calc_plane_error(orig->v_buffer, orig->uv_stride, - pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height); - -- sq_error2 = ye + ue + ve; -+ sq_error2 = (double)(ye + ue + ve); - - frame_psnr2 = vp8_mse2psnr(t_samples, 255.0, sq_error2); - -- cpi->totalp_y += vp8_mse2psnr(y_samples, 255.0, ye); -- cpi->totalp_u += vp8_mse2psnr(uv_samples, 255.0, ue); -- cpi->totalp_v += vp8_mse2psnr(uv_samples, 255.0, ve); -+ cpi->totalp_y += vp8_mse2psnr(y_samples, -+ 255.0, (double)ye); -+ cpi->totalp_u += vp8_mse2psnr(uv_samples, -+ 255.0, (double)ue); -+ cpi->totalp_v += vp8_mse2psnr(uv_samples, -+ 255.0, (double)ve); - cpi->total_sq_error2 += sq_error2; - cpi->totalp += frame_psnr2; - -@@ -5125,7 +5251,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - - if (cpi->oxcf.number_of_layers > 1) - { -- int i; -+ unsigned int i; - - for (i=cpi->current_layer; - ioxcf.number_of_layers; i++) -@@ -5153,7 +5279,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l - - if (cpi->oxcf.number_of_layers > 1) - { -- int i; -+ unsigned int i; - - for (i=cpi->current_layer; - ioxcf.number_of_layers; i++) -@@ -5251,7 +5377,7 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla - ret = -1; - } - --#endif //!CONFIG_POSTPROC -+#endif - vp8_clear_system_state(); - return ret; - } -@@ -5260,29 +5386,53 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla - int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]) - { - signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; -+ int internal_delta_q[MAX_MB_SEGMENTS]; -+ const int range = 63; -+ int i; - -+ // This method is currently incompatible with the cyclic refresh method -+ if ( cpi->cyclic_refresh_mode_enabled ) -+ return -1; -+ -+ // Check number of rows and columns match - if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols) - return -1; - -+ // Range check the delta Q values and convert the external Q range values -+ // to internal ones. -+ if ( (abs(delta_q[0]) > range) || (abs(delta_q[1]) > range) || -+ (abs(delta_q[2]) > range) || (abs(delta_q[3]) > range) ) -+ return -1; -+ -+ // Range check the delta lf values -+ if ( (abs(delta_lf[0]) > range) || (abs(delta_lf[1]) > range) || -+ (abs(delta_lf[2]) > range) || (abs(delta_lf[3]) > range) ) -+ return -1; -+ - if (!map) - { - disable_segmentation(cpi); - return 0; - } - -- // Set the segmentation Map -+ // Translate the external delta q values to internal values. -+ for ( i = 0; i < MAX_MB_SEGMENTS; i++ ) -+ internal_delta_q[i] = -+ ( delta_q[i] >= 0 ) ? q_trans[delta_q[i]] : -q_trans[-delta_q[i]]; -+ -+ /* Set the segmentation Map */ - set_segmentation_map(cpi, map); - -- // Activate segmentation. -+ /* Activate segmentation. */ - enable_segmentation(cpi); - -- // Set up the quant segment data -- feature_data[MB_LVL_ALT_Q][0] = delta_q[0]; -- feature_data[MB_LVL_ALT_Q][1] = delta_q[1]; -- feature_data[MB_LVL_ALT_Q][2] = delta_q[2]; -- feature_data[MB_LVL_ALT_Q][3] = delta_q[3]; -+ /* Set up the quant segment data */ -+ feature_data[MB_LVL_ALT_Q][0] = internal_delta_q[0]; -+ feature_data[MB_LVL_ALT_Q][1] = internal_delta_q[1]; -+ feature_data[MB_LVL_ALT_Q][2] = internal_delta_q[2]; -+ feature_data[MB_LVL_ALT_Q][3] = internal_delta_q[3]; - -- // Set up the loop segment data s -+ /* Set up the loop segment data s */ - feature_data[MB_LVL_ALT_LF][0] = delta_lf[0]; - feature_data[MB_LVL_ALT_LF][1] = delta_lf[1]; - feature_data[MB_LVL_ALT_LF][2] = delta_lf[2]; -@@ -5293,8 +5443,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne - cpi->segment_encode_breakout[2] = threshold[2]; - cpi->segment_encode_breakout[3] = threshold[3]; - -- // Initialise the feature data structure -- // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1 -+ /* Initialise the feature data structure */ - set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA); - - return 0; -@@ -5316,7 +5465,6 @@ int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, uns - } - else - { -- //cpi->active_map_enabled = 0; - return -1 ; - } - } -@@ -5346,7 +5494,9 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) - unsigned char *src = source->y_buffer; - unsigned char *dst = dest->y_buffer; - -- // Loop through the Y plane raw and reconstruction data summing (square differences) -+ /* Loop through the Y plane raw and reconstruction data summing -+ * (square differences) -+ */ - for (i = 0; i < source->y_height; i += 16) - { - for (j = 0; j < source->y_width; j += 16) -diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h -index 900141b..fb8ad35 100644 ---- a/vp8/encoder/onyx_int.h -+++ b/vp8/encoder/onyx_int.h -@@ -25,6 +25,7 @@ - #include "vp8/common/threading.h" - #include "vpx_ports/mem.h" - #include "vpx/internal/vpx_codec_internal.h" -+#include "vpx/vp8.h" - #include "mcomp.h" - #include "vp8/common/findnearmv.h" - #include "lookahead.h" -@@ -32,7 +33,6 @@ - #include "vp8/encoder/denoising.h" - #endif - --//#define SPEEDSTATS 1 - #define MIN_GF_INTERVAL 4 - #define DEFAULT_GF_INTERVAL 7 - -@@ -43,7 +43,7 @@ - #define AF_THRESH 25 - #define AF_THRESH2 100 - #define ARF_DECAY_THRESH 12 --#define MAX_MODES 20 -+ - - #define MIN_THRESHMULT 32 - #define MAX_THRESHMULT 512 -@@ -73,7 +73,6 @@ typedef struct - int mvcosts[2][MVvals+1]; - - #ifdef MODE_STATS -- // Stats - int y_modes[5]; - int uv_modes[4]; - int b_modes[10]; -@@ -232,22 +231,22 @@ enum - - typedef struct - { -- // Layer configuration -+ /* Layer configuration */ - double frame_rate; - int target_bandwidth; - -- // Layer specific coding parameters -- int starting_buffer_level; -- int optimal_buffer_level; -- int maximum_buffer_size; -- int starting_buffer_level_in_ms; -- int optimal_buffer_level_in_ms; -- int maximum_buffer_size_in_ms; -+ /* Layer specific coding parameters */ -+ int64_t starting_buffer_level; -+ int64_t optimal_buffer_level; -+ int64_t maximum_buffer_size; -+ int64_t starting_buffer_level_in_ms; -+ int64_t optimal_buffer_level_in_ms; -+ int64_t maximum_buffer_size_in_ms; - - int avg_frame_size_for_layer; - -- int buffer_level; -- int bits_off_target; -+ int64_t buffer_level; -+ int64_t bits_off_target; - - int64_t total_actual_bits; - int total_target_vs_actual; -@@ -307,7 +306,7 @@ typedef struct VP8_COMP - - MACROBLOCK mb; - VP8_COMMON common; -- vp8_writer bc[9]; // one boolcoder for each partition -+ vp8_writer bc[9]; /* one boolcoder for each partition */ - - VP8_CONFIG oxcf; - -@@ -321,16 +320,20 @@ typedef struct VP8_COMP - YV12_BUFFER_CONFIG scaled_source; - YV12_BUFFER_CONFIG *last_frame_unscaled_source; - -- int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref -- int source_alt_ref_active; // an alt ref frame has been encoded and is usable -- -- int is_src_frame_alt_ref; // source of frame to encode is an exact copy of an alt ref frame -+ /* frame in src_buffers has been identified to be encoded as an alt ref */ -+ int source_alt_ref_pending; -+ /* an alt ref frame has been encoded and is usable */ -+ int source_alt_ref_active; -+ /* source of frame to encode is an exact copy of an alt ref frame */ -+ int is_src_frame_alt_ref; - -- int gold_is_last; // golden frame same as last frame ( short circuit gold searches) -- int alt_is_last; // Alt reference frame same as last ( short circuit altref search) -- int gold_is_alt; // don't do both alt and gold search ( just do gold). -+ /* golden frame same as last frame ( short circuit gold searches) */ -+ int gold_is_last; -+ /* Alt reference frame same as last ( short circuit altref search) */ -+ int alt_is_last; -+ /* don't do both alt and gold search ( just do gold). */ -+ int gold_is_alt; - -- //int refresh_alt_ref_frame; - YV12_BUFFER_CONFIG pick_lf_lvl_frame; - - TOKENEXTRA *tok; -@@ -342,55 +345,62 @@ typedef struct VP8_COMP - unsigned int this_key_frame_forced; - unsigned int next_key_frame_forced; - -- // Ambient reconstruction err target for force key frames -+ /* Ambient reconstruction err target for force key frames */ - int ambient_err; - - unsigned int mode_check_freq[MAX_MODES]; -- unsigned int mode_test_hit_counts[MAX_MODES]; - unsigned int mode_chosen_counts[MAX_MODES]; -- unsigned int mbs_tested_so_far; - -- int rd_thresh_mult[MAX_MODES]; - int rd_baseline_thresh[MAX_MODES]; -- int rd_threshes[MAX_MODES]; - - int RDMULT; - int RDDIV ; - - CODING_CONTEXT coding_context; - -- // Rate targetting variables -- int64_t prediction_error; -+ /* Rate targetting variables */ - int64_t last_prediction_error; -- int64_t intra_error; - int64_t last_intra_error; - - int this_frame_target; - int projected_frame_size; -- int last_q[2]; // Separate values for Intra/Inter -+ int last_q[2]; /* Separate values for Intra/Inter */ - - double rate_correction_factor; - double key_frame_rate_correction_factor; - double gf_rate_correction_factor; - -- int frames_till_gf_update_due; // Count down till next GF -- int current_gf_interval; // GF interval chosen when we coded the last GF -+ /* Count down till next GF */ -+ int frames_till_gf_update_due; -+ -+ /* GF interval chosen when we coded the last GF */ -+ int current_gf_interval; - -- int gf_overspend_bits; // Total bits overspent becasue of GF boost (cumulative) -+ /* Total bits overspent becasue of GF boost (cumulative) */ -+ int gf_overspend_bits; - -- int non_gf_bitrate_adjustment; // Used in the few frames following a GF to recover the extra bits spent in that GF -+ /* Used in the few frames following a GF to recover the extra bits -+ * spent in that GF -+ */ -+ int non_gf_bitrate_adjustment; - -- int kf_overspend_bits; // Extra bits spent on key frames that need to be recovered on inter frames -- int kf_bitrate_adjustment; // Current number of bit s to try and recover on each inter frame. -+ /* Extra bits spent on key frames that need to be recovered */ -+ int kf_overspend_bits; -+ -+ /* Current number of bit s to try and recover on each inter frame. */ -+ int kf_bitrate_adjustment; - int max_gf_interval; - int baseline_gf_interval; -- int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames -+ int active_arnr_frames; - - int64_t key_frame_count; - int prior_key_frame_distance[KEY_FRAME_CONTEXT]; -- int per_frame_bandwidth; // Current section per frame bandwidth target -- int av_per_frame_bandwidth; // Average frame size target for clip -- int min_frame_bandwidth; // Minimum allocation that should be used for any frame -+ /* Current section per frame bandwidth target */ -+ int per_frame_bandwidth; -+ /* Average frame size target for clip */ -+ int av_per_frame_bandwidth; -+ /* Minimum allocation that should be used for any frame */ -+ int min_frame_bandwidth; - int inter_frame_target; - double output_frame_rate; - int64_t last_time_stamp_seen; -@@ -402,12 +412,6 @@ typedef struct VP8_COMP - int ni_frames; - int avg_frame_qindex; - -- int zbin_over_quant; -- int zbin_mode_boost; -- int zbin_mode_boost_enabled; -- int last_zbin_over_quant; -- int last_zbin_mode_boost; -- - int64_t total_byte_count; - - int buffered_mode; -@@ -415,7 +419,7 @@ typedef struct VP8_COMP - double frame_rate; - double ref_frame_rate; - int64_t buffer_level; -- int bits_off_target; -+ int64_t bits_off_target; - - int rolling_target_bits; - int rolling_actual_bits; -@@ -424,7 +428,7 @@ typedef struct VP8_COMP - int long_rolling_actual_bits; - - int64_t total_actual_bits; -- int total_target_vs_actual; // debug stats -+ int total_target_vs_actual; /* debug stats */ - - int worst_quality; - int active_worst_quality; -@@ -433,22 +437,9 @@ typedef struct VP8_COMP - - int cq_target_quality; - -- int drop_frames_allowed; // Are we permitted to drop frames? -- int drop_frame; // Drop this frame? -- int drop_count; // How many frames have we dropped? -- int max_drop_count; // How many frames should we drop? -- int max_consec_dropped_frames; // Limit number of consecutive frames that can be dropped. -- -- -- int ymode_count [VP8_YMODES]; /* intra MB type cts this frame */ -- int uv_mode_count[VP8_UV_MODES]; /* intra MB type cts this frame */ -+ int drop_frames_allowed; /* Are we permitted to drop frames? */ -+ int drop_frame; /* Drop this frame? */ - -- unsigned int MVcount [2] [MVvals]; /* (row,col) MV cts this frame */ -- -- unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ -- -- //DECLARE_ALIGNED(16, int, coef_counts_backup [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]); //not used any more -- //save vp8_tree_probs_from_distribution result for each frame to avoid repeat calculation - vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - char update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - -@@ -462,7 +453,7 @@ typedef struct VP8_COMP - struct vpx_codec_pkt_list *output_pkt_list; - - #if 0 -- // Experimental code for lagged and one pass -+ /* Experimental code for lagged and one pass */ - ONEPASS_FRAMESTATS one_pass_frame_stats[MAX_LAG_BUFFERS]; - int one_pass_frame_index; - #endif -@@ -470,17 +461,14 @@ typedef struct VP8_COMP - int decimation_factor; - int decimation_count; - -- // for real time encoding -- int avg_encode_time; //microsecond -- int avg_pick_mode_time; //microsecond -+ /* for real time encoding */ -+ int avg_encode_time; /* microsecond */ -+ int avg_pick_mode_time; /* microsecond */ - int Speed; -- unsigned int cpu_freq; //Mhz - int compressor_speed; - -- int interquantizer; - int auto_gold; - int auto_adjust_gold_quantizer; -- int goldfreq; - int auto_worst_q; - int cpu_used; - int pass; -@@ -494,29 +482,28 @@ typedef struct VP8_COMP - int last_skip_probs_q[3]; - int recent_ref_frame_usage[MAX_REF_FRAMES]; - -- int count_mb_ref_frame_usage[MAX_REF_FRAMES]; - int this_frame_percent_intra; - int last_frame_percent_intra; - - int ref_frame_flags; - - SPEED_FEATURES sf; -- int error_bins[1024]; - -- // Data used for real time conferencing mode to help determine if it would be good to update the gf -- int inter_zz_count; -- int gf_bad_count; -- int gf_update_recommended; -- int skip_true_count; -+ /* Count ZEROMV on all reference frames. */ -+ int zeromv_count; -+ int lf_zeromv_pct; - - unsigned char *segmentation_map; -- signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment data (can be deltas or absolute values) -- int segment_encode_breakout[MAX_MB_SEGMENTS]; // segment threashold for encode breakout -+ signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; -+ int segment_encode_breakout[MAX_MB_SEGMENTS]; - - unsigned char *active_map; - unsigned int active_map_enabled; -- // Video conferencing cyclic refresh mode flags etc -- // This is a mode designed to clean up the background over time in live encoding scenarious. It uses segmentation -+ -+ /* Video conferencing cyclic refresh mode flags. This is a mode -+ * designed to clean up the background over time in live encoding -+ * scenarious. It uses segmentation. -+ */ - int cyclic_refresh_mode_enabled; - int cyclic_refresh_mode_max_mbs_perframe; - int cyclic_refresh_mode_index; -@@ -524,7 +511,7 @@ typedef struct VP8_COMP - signed char *cyclic_refresh_map; - - #if CONFIG_MULTITHREAD -- // multithread data -+ /* multithread data */ - int * mt_current_mb_col; - int mt_sync_range; - int b_multi_threaded; -@@ -538,7 +525,7 @@ typedef struct VP8_COMP - ENCODETHREAD_DATA *en_thread_data; - LPFTHREAD_DATA lpf_thread_data; - -- //events -+ /* events */ - sem_t *h_event_start_encoding; - sem_t h_event_end_encoding; - sem_t h_event_start_lpf; -@@ -549,7 +536,6 @@ typedef struct VP8_COMP - unsigned int partition_sz[MAX_PARTITIONS]; - unsigned char *partition_d[MAX_PARTITIONS]; - unsigned char *partition_d_end[MAX_PARTITIONS]; -- // end of multithread data - - - fractional_mv_step_fp *find_fractional_mv_step; -@@ -557,10 +543,10 @@ typedef struct VP8_COMP - vp8_refining_search_fn_t refining_search_sad; - vp8_diamond_search_fn_t diamond_search_sad; - vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS]; -- unsigned int time_receive_data; -- unsigned int time_compress_data; -- unsigned int time_pick_lpf; -- unsigned int time_encode_mb_row; -+ uint64_t time_receive_data; -+ uint64_t time_compress_data; -+ uint64_t time_pick_lpf; -+ uint64_t time_encode_mb_row; - - int base_skip_false_prob[128]; - -@@ -594,16 +580,16 @@ typedef struct VP8_COMP - int gf_decay_rate; - int static_scene_max_gf_interval; - int kf_bits; -- int gf_group_error_left; // Remaining error from uncoded frames in a gf group. Two pass use only -- -- // Projected total bits available for a key frame group of frames -+ /* Remaining error from uncoded frames in a gf group. */ -+ int gf_group_error_left; -+ /* Projected total bits available for a key frame group of frames */ - int64_t kf_group_bits; -- -- // Error score of frames still to be coded in kf group -+ /* Error score of frames still to be coded in kf group */ - int64_t kf_group_error_left; -- -- int gf_group_bits; // Projected Bits available for a group of frames including 1 GF or ARF -- int gf_bits; // Bits for the golden frame or ARF - 2 pass only -+ /* Projected Bits available for a group including 1 GF or ARF */ -+ int gf_group_bits; -+ /* Bits for the golden frame or ARF */ -+ int gf_bits; - int alt_extra_bits; - double est_max_qcorrection_factor; - } twopass; -@@ -641,24 +627,25 @@ typedef struct VP8_COMP - #endif - int b_calculate_psnr; - -- // Per MB activity measurement -+ /* Per MB activity measurement */ - unsigned int activity_avg; - unsigned int * mb_activity_map; -- int * mb_norm_activity_map; - -- // Record of which MBs still refer to last golden frame either -- // directly or through 0,0 -+ /* Record of which MBs still refer to last golden frame either -+ * directly or through 0,0 -+ */ - unsigned char *gf_active_flags; - int gf_active_count; - - int output_partition; - -- //Store last frame's MV info for next frame MV prediction -+ /* Store last frame's MV info for next frame MV prediction */ - int_mv *lfmv; - int *lf_ref_frame_sign_bias; - int *lf_ref_frame; - -- int force_next_frame_intra; /* force next frame to intra when kf_auto says so */ -+ /* force next frame to intra when kf_auto says so */ -+ int force_next_frame_intra; - - int droppable; - -@@ -666,7 +653,7 @@ typedef struct VP8_COMP - VP8_DENOISER denoiser; - #endif - -- // Coding layer state variables -+ /* Coding layer state variables */ - unsigned int current_layer; - LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS]; - -@@ -687,17 +674,29 @@ typedef struct VP8_COMP - #if CONFIG_MULTI_RES_ENCODING - /* Number of MBs per row at lower-resolution level */ - int mr_low_res_mb_cols; -+ /* Indicate if lower-res mv info is available */ -+ unsigned char mr_low_res_mv_avail; -+ /* The frame number of each reference frames */ -+ unsigned int current_ref_frames[MAX_REF_FRAMES]; - #endif - -+ struct rd_costs_struct -+ { -+ int mvcosts[2][MVvals+1]; -+ int mvsadcosts[2][MVfpvals+1]; -+ int mbmode_cost[2][MB_MODE_COUNT]; -+ int intra_uv_mode_cost[2][MB_MODE_COUNT]; -+ int bmode_costs[10][10][10]; -+ int inter_bmode_costs[B_MODE_COUNT]; -+ int token_costs[BLOCK_TYPES][COEF_BANDS] -+ [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; -+ } rd_costs; - } VP8_COMP; - --void control_data_rate(VP8_COMP *cpi); -- --void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char *dest_end, unsigned long *size); -- --int rd_cost_intra_mb(MACROBLOCKD *x); -+void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, -+ unsigned char *dest_end, unsigned long *size); - --void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **); -+void vp8_tokenize_mb(VP8_COMP *, MACROBLOCK *, TOKENEXTRA **); - - void vp8_set_speed_features(VP8_COMP *cpi); - -diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c -index dafb645..673de2b 100644 ---- a/vp8/encoder/pickinter.c -+++ b/vp8/encoder/pickinter.c -@@ -61,7 +61,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, - } - - --static int get_inter_mbpred_error(MACROBLOCK *mb, -+int vp8_get_inter_mbpred_error(MACROBLOCK *mb, - const vp8_variance_fn_ptr_t *vfp, - unsigned int *sse, - int_mv this_mv) -@@ -132,7 +132,7 @@ static int pick_intra4x4block( - MACROBLOCK *x, - int ib, - B_PREDICTION_MODE *best_mode, -- unsigned int *mode_costs, -+ const int *mode_costs, - - int *bestrate, - int *bestdistortion) -@@ -141,20 +141,24 @@ static int pick_intra4x4block( - BLOCKD *b = &x->e_mbd.block[ib]; - BLOCK *be = &x->block[ib]; - int dst_stride = x->e_mbd.dst.y_stride; -- unsigned char *base_dst = x->e_mbd.dst.y_buffer; -+ unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; - B_PREDICTION_MODE mode; -- int best_rd = INT_MAX; // 1<<30 -+ int best_rd = INT_MAX; - int rate; - int distortion; - -- for (mode = B_DC_PRED; mode <= B_HE_PRED /*B_HU_PRED*/; mode++) -+ unsigned char *Above = dst - dst_stride; -+ unsigned char *yleft = dst - 1; -+ unsigned char top_left = Above[-1]; -+ -+ for (mode = B_DC_PRED; mode <= B_HE_PRED; mode++) - { - int this_rd; - - rate = mode_costs[mode]; -- vp8_intra4x4_predict -- (base_dst + b->offset, dst_stride, -- mode, b->predictor, 16); -+ -+ vp8_intra4x4_predict(Above, yleft, dst_stride, mode, -+ b->predictor, 16, top_left); - distortion = get_prediction_error(be, b); - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - -@@ -167,7 +171,7 @@ static int pick_intra4x4block( - } - } - -- b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode); -+ b->bmi.as_mode = *best_mode; - vp8_encode_intra4x4block(x, ib); - return best_rd; - } -@@ -185,7 +189,7 @@ static int pick_intra4x4mby_modes - int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; - int error; - int distortion = 0; -- unsigned int *bmode_costs; -+ const int *bmode_costs; - - intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16); - -@@ -214,8 +218,9 @@ static int pick_intra4x4mby_modes - distortion += d; - mic->bmi[i].as_mode = best_mode; - -- // Break out case where we have already exceeded best so far value -- // that was passed in -+ /* Break out case where we have already exceeded best so far value -+ * that was passed in -+ */ - if (distortion > *best_dist) - break; - } -@@ -384,15 +389,16 @@ static void pick_intra_mbuv_mode(MACROBLOCK *mb) - - } - --static void update_mvcount(VP8_COMP *cpi, MACROBLOCKD *xd, int_mv *best_ref_mv) -+static void update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv) - { -+ MACROBLOCKD *xd = &x->e_mbd; - /* Split MV modes currently not supported when RD is nopt enabled, - * therefore, only need to modify MVcount in NEWMV mode. */ - if (xd->mode_info_context->mbmi.mode == NEWMV) - { -- cpi->MVcount[0][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.row - -+ x->MVcount[0][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.row - - best_ref_mv->as_mv.row) >> 1)]++; -- cpi->MVcount[1][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.col - -+ x->MVcount[1][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.col - - best_ref_mv->as_mv.col) >> 1)]++; - } - } -@@ -405,10 +411,9 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim, - MB_PREDICTION_MODE *parent_mode, - int_mv *parent_ref_mv, int mb_row, int mb_col) - { -- LOWER_RES_INFO* store_mode_info -- = (LOWER_RES_INFO*)cpi->oxcf.mr_low_res_mode_info; -+ LOWER_RES_MB_INFO* store_mode_info -+ = ((LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info)->mb_info; - unsigned int parent_mb_index; -- //unsigned int parent_mb_index = map_640x480_to_320x240[mb_row][mb_col]; - - /* Consider different down_sampling_factor. */ - { -@@ -440,7 +445,6 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim, - /* Consider different down_sampling_factor. - * The result can be rounded to be more precise, but it takes more time. - */ -- //int round = cpi->oxcf.mr_down_sampling_factor.den/2; - (*parent_ref_mv).as_mv.row = store_mode_info[parent_mb_index].mv.as_mv.row - *cpi->oxcf.mr_down_sampling_factor.num - /cpi->oxcf.mr_down_sampling_factor.den; -@@ -455,10 +459,18 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim, - - static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x) - { -- if (sse < x->encode_breakout) -+ MACROBLOCKD *xd = &x->e_mbd; -+ -+ unsigned int threshold = (xd->block[0].dequant[1] -+ * xd->block[0].dequant[1] >>4); -+ -+ if(threshold < x->encode_breakout) -+ threshold = x->encode_breakout; -+ -+ if (sse < threshold ) - { -- // Check u and v to make sure skip is ok -- int sse2 = 0; -+ /* Check u and v to make sure skip is ok */ -+ unsigned int sse2 = 0; - - sse2 = VP8_UVSSE(x); - -@@ -469,7 +481,8 @@ static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x) - } - } - --static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, VP8_COMP *cpi, MACROBLOCK *x) -+static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, -+ VP8_COMP *cpi, MACROBLOCK *x, int rd_adj) - { - MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; - int_mv mv = x->e_mbd.mode_info_context->mbmi.mv; -@@ -486,16 +499,70 @@ static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, V - - if((this_mode != NEWMV) || - !(cpi->sf.half_pixel_search) || cpi->common.full_pixel==1) -- *distortion2 = get_inter_mbpred_error(x, -+ *distortion2 = vp8_get_inter_mbpred_error(x, - &cpi->fn_ptr[BLOCK_16X16], - sse, mv); - - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, *distortion2); - -+ /* Adjust rd to bias to ZEROMV */ -+ if(this_mode == ZEROMV) -+ { -+ /* Bias to ZEROMV on LAST_FRAME reference when it is available. */ -+ if ((cpi->ref_frame_flags & VP8_LAST_FRAME & -+ cpi->common.refresh_last_frame) -+ && x->e_mbd.mode_info_context->mbmi.ref_frame != LAST_FRAME) -+ rd_adj = 100; -+ -+ // rd_adj <= 100 -+ this_rd = ((int64_t)this_rd) * rd_adj / 100; -+ } -+ - check_for_encode_breakout(*sse, x); - return this_rd; - } - -+static void calculate_zeromv_rd_adjustment(VP8_COMP *cpi, MACROBLOCK *x, -+ int *rd_adjustment) -+{ -+ MODE_INFO *mic = x->e_mbd.mode_info_context; -+ int_mv mv_l, mv_a, mv_al; -+ int local_motion_check = 0; -+ -+ if (cpi->lf_zeromv_pct > 40) -+ { -+ /* left mb */ -+ mic -= 1; -+ mv_l = mic->mbmi.mv; -+ -+ if (mic->mbmi.ref_frame != INTRA_FRAME) -+ if( abs(mv_l.as_mv.row) < 8 && abs(mv_l.as_mv.col) < 8) -+ local_motion_check++; -+ -+ /* above-left mb */ -+ mic -= x->e_mbd.mode_info_stride; -+ mv_al = mic->mbmi.mv; -+ -+ if (mic->mbmi.ref_frame != INTRA_FRAME) -+ if( abs(mv_al.as_mv.row) < 8 && abs(mv_al.as_mv.col) < 8) -+ local_motion_check++; -+ -+ /* above mb */ -+ mic += 1; -+ mv_a = mic->mbmi.mv; -+ -+ if (mic->mbmi.ref_frame != INTRA_FRAME) -+ if( abs(mv_a.as_mv.row) < 8 && abs(mv_a.as_mv.col) < 8) -+ local_motion_check++; -+ -+ if (((!x->e_mbd.mb_to_top_edge || !x->e_mbd.mb_to_left_edge) -+ && local_motion_check >0) || local_motion_check >2 ) -+ *rd_adjustment = 80; -+ else if (local_motion_check > 0) -+ *rd_adjustment = 90; -+ } -+} -+ - void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - int recon_uvoffset, int *returnrate, - int *returndistortion, int *returnintra, int mb_row, -@@ -513,7 +580,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - MB_PREDICTION_MODE this_mode; - int num00; - int mdcounts[4]; -- int best_rd = INT_MAX; // 1 << 30; -+ int best_rd = INT_MAX; -+ int rd_adjustment = 100; - int best_intra_rd = INT_MAX; - int mode_index; - int rate; -@@ -523,14 +591,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - int best_mode_index = 0; - unsigned int sse = INT_MAX, best_rd_sse = INT_MAX; - #if CONFIG_TEMPORAL_DENOISING -- unsigned int zero_mv_sse = 0, best_sse = INT_MAX; -+ unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX; - #endif - - int_mv mvp; - - int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; - int saddone=0; -- int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7) -+ /* search range got from mv_pred(). It uses step_param levels. (0-7) */ -+ int sr=0; - - unsigned char *plane[4][3]; - int ref_frame_map[4]; -@@ -539,12 +608,39 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - #if CONFIG_MULTI_RES_ENCODING - int dissim = INT_MAX; - int parent_ref_frame = 0; -+ int parent_ref_valid = cpi->oxcf.mr_encoder_id && cpi->mr_low_res_mv_avail; - int_mv parent_ref_mv; - MB_PREDICTION_MODE parent_mode = 0; - -- if (cpi->oxcf.mr_encoder_id) -+ if (parent_ref_valid) -+ { -+ int parent_ref_flag; -+ - get_lower_res_motion_info(cpi, xd, &dissim, &parent_ref_frame, - &parent_mode, &parent_ref_mv, mb_row, mb_col); -+ -+ /* TODO(jkoleszar): The references available (ref_frame_flags) to the -+ * lower res encoder should match those available to this encoder, but -+ * there seems to be a situation where this mismatch can happen in the -+ * case of frame dropping and temporal layers. For example, -+ * GOLD being disallowed in ref_frame_flags, but being returned as -+ * parent_ref_frame. -+ * -+ * In this event, take the conservative approach of disabling the -+ * lower res info for this MB. -+ */ -+ parent_ref_flag = 0; -+ if (parent_ref_frame == LAST_FRAME) -+ parent_ref_flag = (cpi->ref_frame_flags & VP8_LAST_FRAME); -+ else if (parent_ref_frame == GOLDEN_FRAME) -+ parent_ref_flag = (cpi->ref_frame_flags & VP8_GOLD_FRAME); -+ else if (parent_ref_frame == ALTREF_FRAME) -+ parent_ref_flag = (cpi->ref_frame_flags & VP8_ALTR_FRAME); -+ -+ //assert(!parent_ref_frame || parent_ref_flag); -+ if (parent_ref_frame && !parent_ref_flag) -+ parent_ref_valid = 0; -+ } - #endif - - mode_mv = mode_mv_sb[sign_bias]; -@@ -553,6 +649,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); - - /* Setup search priorities */ -+#if CONFIG_MULTI_RES_ENCODING -+ if (parent_ref_valid && parent_ref_frame && dissim < 8) -+ { -+ ref_frame_map[0] = -1; -+ ref_frame_map[1] = parent_ref_frame; -+ ref_frame_map[2] = -1; -+ ref_frame_map[3] = -1; -+ } else -+#endif - get_reference_search_order(cpi, ref_frame_map); - - /* Check to see if there is at least 1 valid reference frame that we need -@@ -574,22 +679,29 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - - get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset); - -- cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame -+ /* Count of the number of MBs tested so far this frame */ -+ x->mbs_tested_so_far++; - - *returnintra = INT_MAX; - x->skip = 0; - - x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; - -- // if we encode a new mv this is important -- // find the best new motion vector -+ /* If the frame has big static background and current MB is in low -+ * motion area, its mode decision is biased to ZEROMV mode. -+ */ -+ calculate_zeromv_rd_adjustment(cpi, x, &rd_adjustment); -+ -+ /* if we encode a new mv this is important -+ * find the best new motion vector -+ */ - for (mode_index = 0; mode_index < MAX_MODES; mode_index++) - { - int frame_cost; - int this_rd = INT_MAX; - int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]]; - -- if (best_rd <= cpi->rd_threshes[mode_index]) -+ if (best_rd <= x->rd_threshes[mode_index]) - continue; - - if (this_ref_frame < 0) -@@ -597,23 +709,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - - x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; - --#if CONFIG_MULTI_RES_ENCODING -- if (cpi->oxcf.mr_encoder_id) -- { -- /* If parent MB is intra, child MB is intra. */ -- if (!parent_ref_frame && this_ref_frame) -- continue; -- -- /* If parent MB is inter, and it is unlikely there are multiple -- * objects in parent MB, we use parent ref frame as child MB's -- * ref frame. */ -- if (parent_ref_frame && dissim < 8 -- && parent_ref_frame != this_ref_frame) -- continue; -- } --#endif -- -- // everything but intra -+ /* everything but intra */ - if (x->e_mbd.mode_info_context->mbmi.ref_frame) - { - x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; -@@ -628,7 +724,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - } - - #if CONFIG_MULTI_RES_ENCODING -- if (cpi->oxcf.mr_encoder_id) -+ if (parent_ref_valid) - { - if (vp8_mode_order[mode_index] == NEARESTMV && - mode_mv[NEARESTMV].as_int ==0) -@@ -638,7 +734,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - continue; - - if (vp8_mode_order[mode_index] == NEWMV && parent_mode == ZEROMV -- && best_ref_mv.as_int==0) //&& dissim==0 -+ && best_ref_mv.as_int==0) - continue; - else if(vp8_mode_order[mode_index] == NEWMV && dissim==0 - && best_ref_mv.as_int==parent_ref_mv.as_int) -@@ -650,22 +746,22 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - /* Check to see if the testing frequency for this mode is at its max - * If so then prevent it from being tested and increase the threshold - * for its testing */ -- if (cpi->mode_test_hit_counts[mode_index] && -+ if (x->mode_test_hit_counts[mode_index] && - (cpi->mode_check_freq[mode_index] > 1)) - { -- if (cpi->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] * -- cpi->mode_test_hit_counts[mode_index])) -+ if (x->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] * -+ x->mode_test_hit_counts[mode_index])) - { - /* Increase the threshold for coding this mode to make it less - * likely to be chosen */ -- cpi->rd_thresh_mult[mode_index] += 4; -+ x->rd_thresh_mult[mode_index] += 4; - -- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) -- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; -+ if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) -+ x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - -- cpi->rd_threshes[mode_index] = -+ x->rd_threshes[mode_index] = - (cpi->rd_baseline_thresh[mode_index] >> 7) * -- cpi->rd_thresh_mult[mode_index]; -+ x->rd_thresh_mult[mode_index]; - continue; - } - } -@@ -673,7 +769,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - /* We have now reached the point where we are going to test the current - * mode so increment the counter for the number of times it has been - * tested */ -- cpi->mode_test_hit_counts[mode_index] ++; -+ x->mode_test_hit_counts[mode_index] ++; - - rate2 = 0; - distortion2 = 0; -@@ -728,7 +824,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - - case SPLITMV: - -- // Split MV modes currently not supported when RD is nopt enabled. -+ /* Split MV modes currently not supported when RD is not enabled. */ - break; - - case DC_PRED: -@@ -777,13 +873,22 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - - int speed_adjust = (cpi->Speed > 5) ? ((cpi->Speed >= 8)? 3 : 2) : 1; - -- // Further step/diamond searches as necessary -+ /* Further step/diamond searches as necessary */ - step_param = cpi->sf.first_step + speed_adjust; - - #if CONFIG_MULTI_RES_ENCODING -- if (cpi->oxcf.mr_encoder_id) -+ /* If lower-res drops this frame, then higher-res encoder does -+ motion search without any previous knowledge. Also, since -+ last frame motion info is not stored, then we can not -+ use improved_mv_pred. */ -+ if (cpi->oxcf.mr_encoder_id && !parent_ref_valid) -+ cpi->sf.improved_mv_pred = 0; -+ -+ if (parent_ref_valid && parent_ref_frame) - { -- // Use parent MV as predictor. Adjust search range accordingly. -+ /* Use parent MV as predictor. Adjust search range -+ * accordingly. -+ */ - mvp.as_int = parent_ref_mv.as_int; - mvp_full.as_mv.col = parent_ref_mv.as_mv.col>>3; - mvp_full.as_mv.row = parent_ref_mv.as_mv.row>>3; -@@ -808,7 +913,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - &near_sadidx[0]); - - sr += speed_adjust; -- //adjust search range according to sr from mv prediction -+ /* adjust search range according to sr from mv prediction */ - if(sr > step_param) - step_param = sr; - -@@ -823,7 +928,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - } - - #if CONFIG_MULTI_RES_ENCODING -- if (cpi->oxcf.mr_encoder_id && dissim <= 2 && -+ if (parent_ref_valid && parent_ref_frame && dissim <= 2 && - MAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row), - abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col)) <= 4) - { -@@ -860,7 +965,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - * change the behavior in lowest-resolution encoder. - * Will improve it later. - */ -- if (!cpi->oxcf.mr_encoder_id) -+ /* Set step_param to 0 to ensure large-range motion search -+ when encoder drops this frame at lower-resolution. -+ */ -+ if (!parent_ref_valid) - step_param = 0; - #endif - bestsme = vp8_hex_search(x, b, d, &mvp_full, &d->bmi.mv, -@@ -877,10 +985,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - x->mvcost, &best_ref_mv); - mode_mv[NEWMV].as_int = d->bmi.mv.as_int; - -- // Further step/diamond searches as necessary -- n = 0; -- //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; -- -+ /* Further step/diamond searches as necessary */ - n = num00; - num00 = 0; - -@@ -927,7 +1032,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - - mode_mv[NEWMV].as_int = d->bmi.mv.as_int; - -- // mv cost; -+ /* mv cost; */ - rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, - cpi->mb.mvcost, 128); - } -@@ -954,7 +1059,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - rate2 += vp8_cost_mv_ref(this_mode, mdcounts); - x->e_mbd.mode_info_context->mbmi.mv.as_int = - mode_mv[this_mode].as_int; -- this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x); -+ this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x, -+ rd_adjustment); - - break; - default: -@@ -964,31 +1070,33 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - #if CONFIG_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity) - { -- // Store for later use by denoiser. -- if (this_mode == ZEROMV && -- x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) -- { -- zero_mv_sse = sse; -- } -- -- // Store the best NEWMV in x for later use in the denoiser. -- // We are restricted to the LAST_FRAME since the denoiser only keeps -- // one filter state. -- if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && -- x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) -- { -- best_sse = sse; -- x->e_mbd.best_sse_inter_mode = NEWMV; -- x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; -- x->e_mbd.need_to_clamp_best_mvs = -- x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; -- } -+ -+ /* Store for later use by denoiser. */ -+ if (this_mode == ZEROMV && sse < zero_mv_sse ) -+ { -+ zero_mv_sse = sse; -+ x->best_zeromv_reference_frame = -+ x->e_mbd.mode_info_context->mbmi.ref_frame; -+ } -+ -+ /* Store the best NEWMV in x for later use in the denoiser. */ -+ if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && -+ sse < best_sse) -+ { -+ best_sse = sse; -+ x->best_sse_inter_mode = NEWMV; -+ x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; -+ x->need_to_clamp_best_mvs = -+ x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; -+ x->best_reference_frame = -+ x->e_mbd.mode_info_context->mbmi.ref_frame; -+ } - } - #endif - - if (this_rd < best_rd || x->skip) - { -- // Note index of best mode -+ /* Note index of best mode */ - best_mode_index = mode_index; - - *returnrate = rate2; -@@ -1001,12 +1109,12 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - /* Testing this mode gave rise to an improvement in best error - * score. Lower threshold a bit for next time - */ -- cpi->rd_thresh_mult[mode_index] = -- (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? -- cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; -- cpi->rd_threshes[mode_index] = -+ x->rd_thresh_mult[mode_index] = -+ (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? -+ x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; -+ x->rd_threshes[mode_index] = - (cpi->rd_baseline_thresh[mode_index] >> 7) * -- cpi->rd_thresh_mult[mode_index]; -+ x->rd_thresh_mult[mode_index]; - } - - /* If the mode did not help improve the best error case then raise the -@@ -1014,33 +1122,33 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - */ - else - { -- cpi->rd_thresh_mult[mode_index] += 4; -+ x->rd_thresh_mult[mode_index] += 4; - -- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) -- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; -+ if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) -+ x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - -- cpi->rd_threshes[mode_index] = -+ x->rd_threshes[mode_index] = - (cpi->rd_baseline_thresh[mode_index] >> 7) * -- cpi->rd_thresh_mult[mode_index]; -+ x->rd_thresh_mult[mode_index]; - } - - if (x->skip) - break; - } - -- // Reduce the activation RD thresholds for the best choice mode -+ /* Reduce the activation RD thresholds for the best choice mode */ - if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) - { -- int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 3); -+ int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 3); - -- cpi->rd_thresh_mult[best_mode_index] = -- (cpi->rd_thresh_mult[best_mode_index] -+ x->rd_thresh_mult[best_mode_index] = -+ (x->rd_thresh_mult[best_mode_index] - >= (MIN_THRESHMULT + best_adjustment)) ? -- cpi->rd_thresh_mult[best_mode_index] - best_adjustment : -+ x->rd_thresh_mult[best_mode_index] - best_adjustment : - MIN_THRESHMULT; -- cpi->rd_threshes[best_mode_index] = -+ x->rd_threshes[best_mode_index] = - (cpi->rd_baseline_thresh[best_mode_index] >> 7) * -- cpi->rd_thresh_mult[best_mode_index]; -+ x->rd_thresh_mult[best_mode_index]; - } - - -@@ -1052,43 +1160,54 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - this_rdbin = 1023; - } - -- cpi->error_bins[this_rdbin] ++; -+ x->error_bins[this_rdbin] ++; - } - - #if CONFIG_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity) - { -- if (x->e_mbd.best_sse_inter_mode == DC_PRED) { -- // No best MV found. -- x->e_mbd.best_sse_inter_mode = best_mbmode.mode; -- x->e_mbd.best_sse_mv = best_mbmode.mv; -- x->e_mbd.need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs; -- best_sse = best_rd_sse; -- } -- vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, -- recon_yoffset, recon_uvoffset); -- -- // Reevaluate ZEROMV after denoising. -- if (best_mbmode.ref_frame == INTRA_FRAME) -- { -- int this_rd = 0; -- rate2 = 0; -- distortion2 = 0; -- x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME; -- rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; -- this_mode = ZEROMV; -- rate2 += vp8_cost_mv_ref(this_mode, mdcounts); -- x->e_mbd.mode_info_context->mbmi.mode = this_mode; -- x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; -- x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; -- this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x); -+ if (x->best_sse_inter_mode == DC_PRED) -+ { -+ /* No best MV found. */ -+ x->best_sse_inter_mode = best_mbmode.mode; -+ x->best_sse_mv = best_mbmode.mv; -+ x->need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs; -+ x->best_reference_frame = best_mbmode.ref_frame; -+ best_sse = best_rd_sse; -+ } -+ vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, -+ recon_yoffset, recon_uvoffset); - -- if (this_rd < best_rd || x->skip) -+ -+ /* Reevaluate ZEROMV after denoising. */ -+ if (best_mbmode.ref_frame == INTRA_FRAME && -+ x->best_zeromv_reference_frame != INTRA_FRAME) - { -- vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, -- sizeof(MB_MODE_INFO)); -+ int this_rd = 0; -+ int this_ref_frame = x->best_zeromv_reference_frame; -+ rate2 = x->ref_frame_cost[this_ref_frame] + -+ vp8_cost_mv_ref(ZEROMV, mdcounts); -+ distortion2 = 0; -+ -+ /* set up the proper prediction buffers for the frame */ -+ x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; -+ x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; -+ x->e_mbd.pre.u_buffer = plane[this_ref_frame][1]; -+ x->e_mbd.pre.v_buffer = plane[this_ref_frame][2]; -+ -+ x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; -+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; -+ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; -+ this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x, -+ rd_adjustment); -+ -+ if (this_rd < best_rd) -+ { -+ vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, -+ sizeof(MB_MODE_INFO)); -+ } - } -- } -+ - } - #endif - -@@ -1122,11 +1241,11 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) - best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int; - -- update_mvcount(cpi, &x->e_mbd, &best_ref_mv); -+ update_mvcount(cpi, x, &best_ref_mv); - } - - --void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) -+void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_) - { - int error4x4, error16x16 = INT_MAX; - int rate, best_rate = 0, distortion, best_sse; -diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h -index 3d83782..35011ca 100644 ---- a/vp8/encoder/pickinter.h -+++ b/vp8/encoder/pickinter.h -@@ -18,6 +18,10 @@ extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - int recon_uvoffset, int *returnrate, - int *returndistortion, int *returnintra, - int mb_row, int mb_col); --extern void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate); -+extern void vp8_pick_intra_mode(MACROBLOCK *x, int *rate); - -+extern int vp8_get_inter_mbpred_error(MACROBLOCK *mb, -+ const vp8_variance_fn_ptr_t *vfp, -+ unsigned int *sse, -+ int_mv this_mv); - #endif -diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c -index 21af45a..4121349 100644 ---- a/vp8/encoder/picklpf.c -+++ b/vp8/encoder/picklpf.c -@@ -74,7 +74,9 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, - src += srcoffset; - dst += dstoffset; - -- // Loop through the Y plane raw and reconstruction data summing (square differences) -+ /* Loop through the Y plane raw and reconstruction data summing -+ * (square differences) -+ */ - for (i = 0; i < linestocopy; i += 16) - { - for (j = 0; j < source->y_width; j += 16) -@@ -92,7 +94,7 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, - return Total; - } - --// Enforce a minimum filter level based upon baseline Q -+/* Enforce a minimum filter level based upon baseline Q */ - static int get_min_filter_level(VP8_COMP *cpi, int base_qindex) - { - int min_filter_level; -@@ -113,14 +115,15 @@ static int get_min_filter_level(VP8_COMP *cpi, int base_qindex) - return min_filter_level; - } - --// Enforce a maximum filter level based upon baseline Q -+/* Enforce a maximum filter level based upon baseline Q */ - static int get_max_filter_level(VP8_COMP *cpi, int base_qindex) - { -- // PGW August 2006: Highest filter values almost always a bad idea -+ /* PGW August 2006: Highest filter values almost always a bad idea */ - -- // jbb chg: 20100118 - not so any more with this overquant stuff allow high values -- // with lots of intra coming in. -- int max_filter_level = MAX_LOOP_FILTER ;//* 3 / 4; -+ /* jbb chg: 20100118 - not so any more with this overquant stuff allow -+ * high values with lots of intra coming in. -+ */ -+ int max_filter_level = MAX_LOOP_FILTER; - (void)base_qindex; - - if (cpi->twopass.section_intra_rating > 8) -@@ -155,7 +158,9 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - cm->last_sharpness_level = cm->sharpness_level; - } - -- // Start the search at the previous frame filter level unless it is now out of range. -+ /* Start the search at the previous frame filter level unless it is -+ * now out of range. -+ */ - if (cm->filter_level < min_filter_level) - cm->filter_level = min_filter_level; - else if (cm->filter_level > max_filter_level) -@@ -164,7 +169,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - filt_val = cm->filter_level; - best_filt_val = filt_val; - -- // Get the err using the previous frame's filter value. -+ /* Get the err using the previous frame's filter value. */ - - /* Copy the unfiltered / processed recon buffer to the new buffer */ - vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show); -@@ -174,17 +179,17 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - - filt_val -= 1 + (filt_val > 10); - -- // Search lower filter levels -+ /* Search lower filter levels */ - while (filt_val >= min_filter_level) - { -- // Apply the loop filter -+ /* Apply the loop filter */ - vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show); - vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); - -- // Get the err for filtered frame -+ /* Get the err for filtered frame */ - filt_err = calc_partial_ssl_err(sd, cm->frame_to_show); - -- // Update the best case record or exit loop. -+ /* Update the best case record or exit loop. */ - if (filt_err < best_err) - { - best_err = filt_err; -@@ -193,32 +198,34 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - else - break; - -- // Adjust filter level -+ /* Adjust filter level */ - filt_val -= 1 + (filt_val > 10); - } - -- // Search up (note that we have already done filt_val = cm->filter_level) -+ /* Search up (note that we have already done filt_val = cm->filter_level) */ - filt_val = cm->filter_level + 1 + (filt_val > 10); - - if (best_filt_val == cm->filter_level) - { -- // Resist raising filter level for very small gains -+ /* Resist raising filter level for very small gains */ - best_err -= (best_err >> 10); - - while (filt_val < max_filter_level) - { -- // Apply the loop filter -+ /* Apply the loop filter */ - vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show); - - vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); - -- // Get the err for filtered frame -+ /* Get the err for filtered frame */ - filt_err = calc_partial_ssl_err(sd, cm->frame_to_show); - -- // Update the best case record or exit loop. -+ /* Update the best case record or exit loop. */ - if (filt_err < best_err) - { -- // Do not raise filter level if improvement is < 1 part in 4096 -+ /* Do not raise filter level if improvement is < 1 part -+ * in 4096 -+ */ - best_err = filt_err - (filt_err >> 10); - - best_filt_val = filt_val; -@@ -226,7 +233,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - else - break; - -- // Adjust filter level -+ /* Adjust filter level */ - filt_val += 1 + (filt_val > 10); - } - } -@@ -243,7 +250,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - cm->frame_to_show = saved_frame; - } - --// Stub function for now Alt LF not used -+/* Stub function for now Alt LF not used */ - void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val) - { - MACROBLOCKD *mbd = &cpi->mb.e_mbd; -@@ -266,12 +273,14 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - - int filter_step; - int filt_high = 0; -- int filt_mid = cm->filter_level; // Start search at previous frame filter level -+ /* Start search at previous frame filter level */ -+ int filt_mid = cm->filter_level; - int filt_low = 0; - int filt_best; - int filt_direction = 0; - -- int Bias = 0; // Bias against raising loop filter and in favor of lowering it -+ /* Bias against raising loop filter and in favor of lowering it */ -+ int Bias = 0; - - int ss_err[MAX_LOOP_FILTER + 1]; - -@@ -287,7 +296,9 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - else - cm->sharpness_level = cpi->oxcf.Sharpness; - -- // Start the search at the previous frame filter level unless it is now out of range. -+ /* Start the search at the previous frame filter level unless it is -+ * now out of range. -+ */ - filt_mid = cm->filter_level; - - if (filt_mid < min_filter_level) -@@ -295,10 +306,10 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - else if (filt_mid > max_filter_level) - filt_mid = max_filter_level; - -- // Define the initial step size -+ /* Define the initial step size */ - filter_step = (filt_mid < 16) ? 4 : filt_mid / 4; - -- // Get baseline error score -+ /* Get baseline error score */ - - /* Copy the unfiltered / processed recon buffer to the new buffer */ - vp8_yv12_copy_y(saved_frame, cm->frame_to_show); -@@ -314,9 +325,8 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - - while (filter_step > 0) - { -- Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; //PGW change 12/12/06 for small images -+ Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; - -- // jbb chg: 20100118 - in sections with lots of new material coming in don't bias as much to a low filter value - if (cpi->twopass.section_intra_rating < 20) - Bias = Bias * cpi->twopass.section_intra_rating / 20; - -@@ -327,7 +337,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - { - if(ss_err[filt_low] == 0) - { -- // Get Low filter error score -+ /* Get Low filter error score */ - vp8_yv12_copy_y(saved_frame, cm->frame_to_show); - vp8cx_set_alt_lf_level(cpi, filt_low); - vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low); -@@ -338,10 +348,12 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - else - filt_err = ss_err[filt_low]; - -- // If value is close to the best so far then bias towards a lower loop filter value. -+ /* If value is close to the best so far then bias towards a -+ * lower loop filter value. -+ */ - if ((filt_err - Bias) < best_err) - { -- // Was it actually better than the previous best? -+ /* Was it actually better than the previous best? */ - if (filt_err < best_err) - best_err = filt_err; - -@@ -349,7 +361,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - } - } - -- // Now look at filt_high -+ /* Now look at filt_high */ - if ((filt_direction >= 0) && (filt_high != filt_mid)) - { - if(ss_err[filt_high] == 0) -@@ -364,7 +376,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - else - filt_err = ss_err[filt_high]; - -- // Was it better than the previous best? -+ /* Was it better than the previous best? */ - if (filt_err < (best_err - Bias)) - { - best_err = filt_err; -@@ -372,7 +384,9 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) - } - } - -- // Half the step distance if the best filter value was the same as last time -+ /* Half the step distance if the best filter value was the same -+ * as last time -+ */ - if (filt_best == filt_mid) - { - filter_step = filter_step / 2; -diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c -index 5119bb8..5bb49ad 100644 ---- a/vp8/encoder/psnr.c -+++ b/vp8/encoder/psnr.c -@@ -22,7 +22,7 @@ double vp8_mse2psnr(double Samples, double Peak, double Mse) - if ((double)Mse > 0.0) - psnr = 10.0 * log10(Peak * Peak * Samples / Mse); - else -- psnr = MAX_PSNR; // Limit to prevent / 0 -+ psnr = MAX_PSNR; /* Limit to prevent / 0 */ - - if (psnr > MAX_PSNR) - psnr = MAX_PSNR; -diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c -index 766d2b2..33c8ef0 100644 ---- a/vp8/encoder/quantize.c -+++ b/vp8/encoder/quantize.c -@@ -44,21 +44,21 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) - z = coeff_ptr[rc]; - zbin = zbin_ptr[rc] ; - -- sz = (z >> 31); // sign of z -- x = (z ^ sz) - sz; // x = abs(z) -+ sz = (z >> 31); /* sign of z */ -+ x = (z ^ sz) - sz; /* x = abs(z) */ - - if (x >= zbin) - { - x += round_ptr[rc]; - y = (((x * quant_ptr[rc]) >> 16) + x) -- >> quant_shift_ptr[rc]; // quantize (x) -- x = (y ^ sz) - sz; // get the sign back -- qcoeff_ptr[rc] = x; // write to destination -- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value -+ >> quant_shift_ptr[rc]; /* quantize (x) */ -+ x = (y ^ sz) - sz; /* get the sign back */ -+ qcoeff_ptr[rc] = x; /* write to destination */ -+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ - - if (y) - { -- eob = i; // last nonzero coeffs -+ eob = i; /* last nonzero coeffs */ - } - } - } -@@ -84,17 +84,17 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) - rc = vp8_default_zig_zag1d[i]; - z = coeff_ptr[rc]; - -- sz = (z >> 31); // sign of z -- x = (z ^ sz) - sz; // x = abs(z) -+ sz = (z >> 31); /* sign of z */ -+ x = (z ^ sz) - sz; /* x = abs(z) */ - -- y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) -- x = (y ^ sz) - sz; // get the sign back -- qcoeff_ptr[rc] = x; // write to destination -- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value -+ y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */ -+ x = (y ^ sz) - sz; /* get the sign back */ -+ qcoeff_ptr[rc] = x; /* write to destination */ -+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ - - if (y) - { -- eob = i; // last nonzero coeffs -+ eob = i; /* last nonzero coeffs */ - } - } - *d->eob = (char)(eob + 1); -@@ -132,22 +132,22 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d) - zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; - - zbin_boost_ptr ++; -- sz = (z >> 31); // sign of z -- x = (z ^ sz) - sz; // x = abs(z) -+ sz = (z >> 31); /* sign of z */ -+ x = (z ^ sz) - sz; /* x = abs(z) */ - - if (x >= zbin) - { - x += round_ptr[rc]; - y = (((x * quant_ptr[rc]) >> 16) + x) -- >> quant_shift_ptr[rc]; // quantize (x) -- x = (y ^ sz) - sz; // get the sign back -- qcoeff_ptr[rc] = x; // write to destination -- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value -+ >> quant_shift_ptr[rc]; /* quantize (x) */ -+ x = (y ^ sz) - sz; /* get the sign back */ -+ qcoeff_ptr[rc] = x; /* write to destination */ -+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ - - if (y) - { -- eob = i; // last nonzero coeffs -- zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength -+ eob = i; /* last nonzero coeffs */ -+ zbin_boost_ptr = b->zrun_zbin_boost; /* reset zero runlength */ - } - } - } -@@ -240,26 +240,23 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d) - rc = vp8_default_zig_zag1d[i]; - z = coeff_ptr[rc]; - -- //if ( i == 0 ) -- // zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2; -- //else - zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; - - zbin_boost_ptr ++; -- sz = (z >> 31); // sign of z -- x = (z ^ sz) - sz; // x = abs(z) -+ sz = (z >> 31); /* sign of z */ -+ x = (z ^ sz) - sz; /* x = abs(z) */ - - if (x >= zbin) - { -- y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) -- x = (y ^ sz) - sz; // get the sign back -- qcoeff_ptr[rc] = x; // write to destination -- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value -+ y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */ -+ x = (y ^ sz) - sz; /* get the sign back */ -+ qcoeff_ptr[rc] = x; /* write to destination */ -+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ - - if (y) - { -- eob = i; // last nonzero coeffs -- zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength -+ eob = i; /* last nonzero coeffs */ -+ zbin_boost_ptr = &b->zrun_zbin_boost[0]; /* reset zrl */ - } - } - } -@@ -441,7 +438,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) - - for (Q = 0; Q < QINDEX_RANGE; Q++) - { -- // dc values -+ /* dc values */ - quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q); - cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val; - invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0, -@@ -469,7 +466,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) - cpi->common.UVdequant[Q][0] = quant_val; - cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7; - -- // all the ac values = ; -+ /* all the ac values = ; */ - quant_val = vp8_ac_yquant(Q); - cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val; - invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1, -@@ -536,7 +533,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) - - for (Q = 0; Q < QINDEX_RANGE; Q++) - { -- // dc values -+ /* dc values */ - quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q); - cpi->Y1quant[Q][0] = (1 << 16) / quant_val; - cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; -@@ -558,7 +555,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) - cpi->common.UVdequant[Q][0] = quant_val; - cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7; - -- // all the ac values = ; -+ /* all the ac values = ; */ - for (i = 1; i < 16; i++) - { - int rc = vp8_default_zig_zag1d[i]; -@@ -590,20 +587,20 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) - - #define ZBIN_EXTRA_Y \ - (( cpi->common.Y1dequant[QIndex][1] * \ -- ( cpi->zbin_over_quant + \ -- cpi->zbin_mode_boost + \ -+ ( x->zbin_over_quant + \ -+ x->zbin_mode_boost + \ - x->act_zbin_adj ) ) >> 7) - - #define ZBIN_EXTRA_UV \ - (( cpi->common.UVdequant[QIndex][1] * \ -- ( cpi->zbin_over_quant + \ -- cpi->zbin_mode_boost + \ -+ ( x->zbin_over_quant + \ -+ x->zbin_mode_boost + \ - x->act_zbin_adj ) ) >> 7) - - #define ZBIN_EXTRA_Y2 \ - (( cpi->common.Y2dequant[QIndex][1] * \ -- ( (cpi->zbin_over_quant / 2) + \ -- cpi->zbin_mode_boost + \ -+ ( (x->zbin_over_quant / 2) + \ -+ x->zbin_mode_boost + \ - x->act_zbin_adj ) ) >> 7) - - void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) -@@ -613,18 +610,18 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) - MACROBLOCKD *xd = &x->e_mbd; - int zbin_extra; - -- // Select the baseline MB Q index. -+ /* Select the baseline MB Q index. */ - if (xd->segmentation_enabled) - { -- // Abs Value -+ /* Abs Value */ - if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA) -- - QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id]; -- // Delta Value -+ /* Delta Value */ - else - { - QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id]; -- QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; // Clamp to valid range -+ /* Clamp to valid range */ -+ QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; - } - } - else -@@ -657,13 +654,13 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) - * This will also require modifications to the x86 and neon assembly. - * */ - for (i = 0; i < 16; i++) -- x->e_mbd.block[i].dequant = xd->dequant_y1; //cpi->common.Y1dequant[QIndex]; -+ x->e_mbd.block[i].dequant = xd->dequant_y1; - for (i = 16; i < 24; i++) -- x->e_mbd.block[i].dequant = xd->dequant_uv; //cpi->common.UVdequant[QIndex]; -- x->e_mbd.block[24].dequant = xd->dequant_y2; //cpi->common.Y2dequant[QIndex]; -+ x->e_mbd.block[i].dequant = xd->dequant_uv; -+ x->e_mbd.block[24].dequant = xd->dequant_y2; - #endif - -- // Y -+ /* Y */ - zbin_extra = ZBIN_EXTRA_Y; - - for (i = 0; i < 16; i++) -@@ -677,7 +674,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) - x->block[i].zbin_extra = (short)zbin_extra; - } - -- // UV -+ /* UV */ - zbin_extra = ZBIN_EXTRA_UV; - - for (i = 16; i < 24; i++) -@@ -691,7 +688,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) - x->block[i].zbin_extra = (short)zbin_extra; - } - -- // Y2 -+ /* Y2 */ - zbin_extra = ZBIN_EXTRA_Y2; - - x->block[24].quant_fast = cpi->Y2quant_fast[QIndex]; -@@ -705,35 +702,35 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) - /* save this macroblock QIndex for vp8_update_zbin_extra() */ - x->q_index = QIndex; - -- cpi->last_zbin_over_quant = cpi->zbin_over_quant; -- cpi->last_zbin_mode_boost = cpi->zbin_mode_boost; -+ x->last_zbin_over_quant = x->zbin_over_quant; -+ x->last_zbin_mode_boost = x->zbin_mode_boost; - x->last_act_zbin_adj = x->act_zbin_adj; - - - - } -- else if(cpi->last_zbin_over_quant != cpi->zbin_over_quant -- || cpi->last_zbin_mode_boost != cpi->zbin_mode_boost -+ else if(x->last_zbin_over_quant != x->zbin_over_quant -+ || x->last_zbin_mode_boost != x->zbin_mode_boost - || x->last_act_zbin_adj != x->act_zbin_adj) - { -- // Y -+ /* Y */ - zbin_extra = ZBIN_EXTRA_Y; - - for (i = 0; i < 16; i++) - x->block[i].zbin_extra = (short)zbin_extra; - -- // UV -+ /* UV */ - zbin_extra = ZBIN_EXTRA_UV; - - for (i = 16; i < 24; i++) - x->block[i].zbin_extra = (short)zbin_extra; - -- // Y2 -+ /* Y2 */ - zbin_extra = ZBIN_EXTRA_Y2; - x->block[24].zbin_extra = (short)zbin_extra; - -- cpi->last_zbin_over_quant = cpi->zbin_over_quant; -- cpi->last_zbin_mode_boost = cpi->zbin_mode_boost; -+ x->last_zbin_over_quant = x->zbin_over_quant; -+ x->last_zbin_mode_boost = x->zbin_mode_boost; - x->last_act_zbin_adj = x->act_zbin_adj; - } - } -@@ -744,19 +741,19 @@ void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x) - int QIndex = x->q_index; - int zbin_extra; - -- // Y -+ /* Y */ - zbin_extra = ZBIN_EXTRA_Y; - - for (i = 0; i < 16; i++) - x->block[i].zbin_extra = (short)zbin_extra; - -- // UV -+ /* UV */ - zbin_extra = ZBIN_EXTRA_UV; - - for (i = 16; i < 24; i++) - x->block[i].zbin_extra = (short)zbin_extra; - -- // Y2 -+ /* Y2 */ - zbin_extra = ZBIN_EXTRA_Y2; - x->block[24].zbin_extra = (short)zbin_extra; - } -@@ -766,10 +763,10 @@ void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x) - - void vp8cx_frame_init_quantizer(VP8_COMP *cpi) - { -- // Clear Zbin mode boost for default case -- cpi->zbin_mode_boost = 0; -+ /* Clear Zbin mode boost for default case */ -+ cpi->mb.zbin_mode_boost = 0; - -- // MB level quantizer setup -+ /* MB level quantizer setup */ - vp8cx_mb_init_quantizer(cpi, &cpi->mb, 0); - } - -@@ -801,7 +798,7 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q) - cm->y2dc_delta_q = new_delta_q; - - -- // Set Segment specific quatizers -+ /* Set Segment specific quatizers */ - mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0]; - mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1]; - mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2]; -diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c -index 472e85f..a399a38 100644 ---- a/vp8/encoder/ratectrl.c -+++ b/vp8/encoder/ratectrl.c -@@ -41,15 +41,16 @@ extern int inter_uv_modes[4]; - extern int inter_b_modes[10]; - #endif - --// Bits Per MB at different Q (Multiplied by 512) -+/* Bits Per MB at different Q (Multiplied by 512) */ - #define BPER_MB_NORMBITS 9 - --// Work in progress recalibration of baseline rate tables based on --// the assumption that bits per mb is inversely proportional to the --// quantizer value. -+/* Work in progress recalibration of baseline rate tables based on -+ * the assumption that bits per mb is inversely proportional to the -+ * quantizer value. -+ */ - const int vp8_bits_per_mb[2][QINDEX_RANGE] = - { -- // Intra case 450000/Qintra -+ /* Intra case 450000/Qintra */ - { - 1125000,900000, 750000, 642857, 562500, 500000, 450000, 450000, - 409090, 375000, 346153, 321428, 300000, 281250, 264705, 264705, -@@ -68,7 +69,7 @@ const int vp8_bits_per_mb[2][QINDEX_RANGE] = - 36885, 36290, 35714, 35156, 34615, 34090, 33582, 33088, - 32608, 32142, 31468, 31034, 30405, 29801, 29220, 28662, - }, -- // Inter case 285000/Qinter -+ /* Inter case 285000/Qinter */ - { - 712500, 570000, 475000, 407142, 356250, 316666, 285000, 259090, - 237500, 219230, 203571, 190000, 178125, 167647, 158333, 150000, -@@ -109,7 +110,7 @@ static const int kf_boost_qadjustment[QINDEX_RANGE] = - 220, 220, 220, 220, 220, 220, 220, 220, - }; - --//#define GFQ_ADJUSTMENT (Q+100) -+/* #define GFQ_ADJUSTMENT (Q+100) */ - #define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q] - const int vp8_gf_boost_qadjustment[QINDEX_RANGE] = - { -@@ -173,7 +174,7 @@ static const int kf_gf_boost_qlimits[QINDEX_RANGE] = - 600, 600, 600, 600, 600, 600, 600, 600, - }; - --// % adjustment to target kf size based on seperation from previous frame -+/* % adjustment to target kf size based on seperation from previous frame */ - static const int kf_boost_seperation_adjustment[16] = - { - 30, 40, 50, 55, 60, 65, 70, 75, -@@ -224,10 +225,11 @@ void vp8_save_coding_context(VP8_COMP *cpi) - { - CODING_CONTEXT *const cc = & cpi->coding_context; - -- // Stores a snapshot of key state variables which can subsequently be -- // restored with a call to vp8_restore_coding_context. These functions are -- // intended for use in a re-code loop in vp8_compress_frame where the -- // quantizer value is adjusted between loop iterations. -+ /* Stores a snapshot of key state variables which can subsequently be -+ * restored with a call to vp8_restore_coding_context. These functions are -+ * intended for use in a re-code loop in vp8_compress_frame where the -+ * quantizer value is adjusted between loop iterations. -+ */ - - cc->frames_since_key = cpi->frames_since_key; - cc->filter_level = cpi->common.filter_level; -@@ -235,18 +237,16 @@ void vp8_save_coding_context(VP8_COMP *cpi) - cc->frames_since_golden = cpi->common.frames_since_golden; - - vp8_copy(cc->mvc, cpi->common.fc.mvc); -- vp8_copy(cc->mvcosts, cpi->mb.mvcosts); -+ vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts); - -- vp8_copy(cc->kf_ymode_prob, cpi->common.kf_ymode_prob); - vp8_copy(cc->ymode_prob, cpi->common.fc.ymode_prob); -- vp8_copy(cc->kf_uv_mode_prob, cpi->common.kf_uv_mode_prob); - vp8_copy(cc->uv_mode_prob, cpi->common.fc.uv_mode_prob); - -- vp8_copy(cc->ymode_count, cpi->ymode_count); -- vp8_copy(cc->uv_mode_count, cpi->uv_mode_count); -+ vp8_copy(cc->ymode_count, cpi->mb.ymode_count); -+ vp8_copy(cc->uv_mode_count, cpi->mb.uv_mode_count); - - -- // Stats -+ /* Stats */ - #ifdef MODE_STATS - vp8_copy(cc->y_modes, y_modes); - vp8_copy(cc->uv_modes, uv_modes); -@@ -264,8 +264,9 @@ void vp8_restore_coding_context(VP8_COMP *cpi) - { - CODING_CONTEXT *const cc = & cpi->coding_context; - -- // Restore key state variables to the snapshot state stored in the -- // previous call to vp8_save_coding_context. -+ /* Restore key state variables to the snapshot state stored in the -+ * previous call to vp8_save_coding_context. -+ */ - - cpi->frames_since_key = cc->frames_since_key; - cpi->common.filter_level = cc->filter_level; -@@ -274,17 +275,15 @@ void vp8_restore_coding_context(VP8_COMP *cpi) - - vp8_copy(cpi->common.fc.mvc, cc->mvc); - -- vp8_copy(cpi->mb.mvcosts, cc->mvcosts); -+ vp8_copy(cpi->rd_costs.mvcosts, cc->mvcosts); - -- vp8_copy(cpi->common.kf_ymode_prob, cc->kf_ymode_prob); - vp8_copy(cpi->common.fc.ymode_prob, cc->ymode_prob); -- vp8_copy(cpi->common.kf_uv_mode_prob, cc->kf_uv_mode_prob); - vp8_copy(cpi->common.fc.uv_mode_prob, cc->uv_mode_prob); - -- vp8_copy(cpi->ymode_count, cc->ymode_count); -- vp8_copy(cpi->uv_mode_count, cc->uv_mode_count); -+ vp8_copy(cpi->mb.ymode_count, cc->ymode_count); -+ vp8_copy(cpi->mb.uv_mode_count, cc->uv_mode_count); - -- // Stats -+ /* Stats */ - #ifdef MODE_STATS - vp8_copy(y_modes, cc->y_modes); - vp8_copy(uv_modes, cc->uv_modes); -@@ -301,36 +300,30 @@ void vp8_restore_coding_context(VP8_COMP *cpi) - - void vp8_setup_key_frame(VP8_COMP *cpi) - { -- // Setup for Key frame: -+ /* Setup for Key frame: */ - - vp8_default_coef_probs(& cpi->common); - -- -- vp8_kf_default_bmode_probs(cpi->common.kf_bmode_prob); -- - vpx_memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); - { - int flag[2] = {1, 1}; - vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag); - } - -- vpx_memset(cpi->common.fc.pre_mvc, 0, sizeof(cpi->common.fc.pre_mvc)); //initialize pre_mvc to all zero. -- -- // Make sure we initialize separate contexts for altref,gold, and normal. -- // TODO shouldn't need 3 different copies of structure to do this! -+ /* Make sure we initialize separate contexts for altref,gold, and normal. -+ * TODO shouldn't need 3 different copies of structure to do this! -+ */ - vpx_memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc)); - vpx_memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc)); - vpx_memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc)); - -- //cpi->common.filter_level = 0; // Reset every key frame. - cpi->common.filter_level = cpi->common.base_qindex * 3 / 8 ; - -- // Provisional interval before next GF -+ /* Provisional interval before next GF */ - if (cpi->auto_gold) -- //cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; - cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; - else -- cpi->frames_till_gf_update_due = cpi->goldfreq; -+ cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; - - cpi->common.refresh_golden_frame = 1; - cpi->common.refresh_alt_ref_frame = 1; -@@ -355,12 +348,12 @@ static int estimate_bits_at_q(int frame_kind, int Q, int MBs, - - static void calc_iframe_target_size(VP8_COMP *cpi) - { -- // boost defaults to half second -+ /* boost defaults to half second */ - int kf_boost; -- int target; -+ uint64_t target; - -- // Clear down mmx registers to allow floating point in what follows -- vp8_clear_system_state(); //__asm emms; -+ /* Clear down mmx registers to allow floating point in what follows */ -+ vp8_clear_system_state(); - - if (cpi->oxcf.fixed_q >= 0) - { -@@ -371,10 +364,10 @@ static void calc_iframe_target_size(VP8_COMP *cpi) - } - else if (cpi->pass == 2) - { -- // New Two pass RC -+ /* New Two pass RC */ - target = cpi->per_frame_bandwidth; - } -- // First Frame is a special case -+ /* First Frame is a special case */ - else if (cpi->common.current_video_frame == 0) - { - /* 1 Pass there is no information on which to base size so use -@@ -388,29 +381,29 @@ static void calc_iframe_target_size(VP8_COMP *cpi) - } - else - { -- // if this keyframe was forced, use a more recent Q estimate -+ /* if this keyframe was forced, use a more recent Q estimate */ - int Q = (cpi->common.frame_flags & FRAMEFLAGS_KEY) - ? cpi->avg_frame_qindex : cpi->ni_av_qi; - -- int initial_boost = 24; // Corresponds to: |2.5 * per_frame_bandwidth| -- // Boost depends somewhat on frame rate: only used for 1 layer case. -+ int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */ -+ /* Boost depends somewhat on frame rate: only used for 1 layer case. */ - if (cpi->oxcf.number_of_layers == 1) { - kf_boost = MAX(initial_boost, (int)(2 * cpi->output_frame_rate - 16)); - } - else { -- // Initial factor: set target size to: |2.5 * per_frame_bandwidth|. -+ /* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */ - kf_boost = initial_boost; - } - -- // adjustment up based on q: this factor ranges from ~1.2 to 2.2. -+ /* adjustment up based on q: this factor ranges from ~1.2 to 2.2. */ - kf_boost = kf_boost * kf_boost_qadjustment[Q] / 100; - -- // frame separation adjustment ( down) -+ /* frame separation adjustment ( down) */ - if (cpi->frames_since_key < cpi->output_frame_rate / 2) - kf_boost = (int)(kf_boost - * cpi->frames_since_key / (cpi->output_frame_rate / 2)); - -- // Minimal target size is |2* per_frame_bandwidth|. -+ /* Minimal target size is |2* per_frame_bandwidth|. */ - if (kf_boost < 16) - kf_boost = 16; - -@@ -427,10 +420,11 @@ static void calc_iframe_target_size(VP8_COMP *cpi) - target = max_rate; - } - -- cpi->this_frame_target = target; -+ cpi->this_frame_target = (int)target; - -- // TODO: if we separate rate targeting from Q targetting, move this. -- // Reset the active worst quality to the baseline value for key frames. -+ /* TODO: if we separate rate targeting from Q targetting, move this. -+ * Reset the active worst quality to the baseline value for key frames. -+ */ - if (cpi->pass != 2) - cpi->active_worst_quality = cpi->worst_quality; - -@@ -439,9 +433,6 @@ static void calc_iframe_target_size(VP8_COMP *cpi) - FILE *f; - - f = fopen("kf_boost.stt", "a"); -- //fprintf(f, " %8d %10d %10d %10d %10d %10d %10d\n", -- // cpi->common.current_video_frame, cpi->target_bandwidth, cpi->frames_to_key, kf_boost_qadjustment[cpi->ni_av_qi], cpi->kf_boost, (cpi->this_frame_target *100 / cpi->per_frame_bandwidth), cpi->this_frame_target ); -- - fprintf(f, " %8u %10d %10d %10d\n", - cpi->common.current_video_frame, cpi->gfu_boost, cpi->baseline_gf_interval, cpi->source_alt_ref_pending); - -@@ -451,14 +442,15 @@ static void calc_iframe_target_size(VP8_COMP *cpi) - } - - --// Do the best we can to define the parameters for the next GF based on what --// information we have available. -+/* Do the best we can to define the parameters for the next GF based on what -+ * information we have available. -+ */ - static void calc_gf_params(VP8_COMP *cpi) - { - int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; - int Boost = 0; - -- int gf_frame_useage = 0; // Golden frame useage since last GF -+ int gf_frame_useage = 0; /* Golden frame useage since last GF */ - int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME] + - cpi->recent_ref_frame_usage[LAST_FRAME] + - cpi->recent_ref_frame_usage[GOLDEN_FRAME] + -@@ -466,33 +458,30 @@ static void calc_gf_params(VP8_COMP *cpi) - - int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols); - -- // Reset the last boost indicator -- //cpi->last_boost = 100; -- - if (tot_mbs) - gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]) * 100 / tot_mbs; - - if (pct_gf_active > gf_frame_useage) - gf_frame_useage = pct_gf_active; - -- // Not two pass -+ /* Not two pass */ - if (cpi->pass != 2) - { -- // Single Pass lagged mode: TBD -+ /* Single Pass lagged mode: TBD */ - if (0) - { - } - -- // Single Pass compression: Has to use current and historical data -+ /* Single Pass compression: Has to use current and historical data */ - else - { - #if 0 -- // Experimental code -+ /* Experimental code */ - int index = cpi->one_pass_frame_index; - int frames_to_scan = (cpi->max_gf_interval <= MAX_LAG_BUFFERS) ? cpi->max_gf_interval : MAX_LAG_BUFFERS; - -+ /* ************** Experimental code - incomplete */ - /* -- // *************** Experimental code - incomplete - double decay_val = 1.0; - double IIAccumulator = 0.0; - double last_iiaccumulator = 0.0; -@@ -535,48 +524,51 @@ static void calc_gf_params(VP8_COMP *cpi) - #else - - /*************************************************************/ -- // OLD code -+ /* OLD code */ - -- // Adjust boost based upon ambient Q -+ /* Adjust boost based upon ambient Q */ - Boost = GFQ_ADJUSTMENT; - -- // Adjust based upon most recently measure intra useage -+ /* Adjust based upon most recently measure intra useage */ - Boost = Boost * gf_intra_usage_adjustment[(cpi->this_frame_percent_intra < 15) ? cpi->this_frame_percent_intra : 14] / 100; - -- // Adjust gf boost based upon GF usage since last GF -+ /* Adjust gf boost based upon GF usage since last GF */ - Boost = Boost * gf_adjust_table[gf_frame_useage] / 100; - #endif - } - -- // golden frame boost without recode loop often goes awry. be safe by keeping numbers down. -+ /* golden frame boost without recode loop often goes awry. be -+ * safe by keeping numbers down. -+ */ - if (!cpi->sf.recode_loop) - { - if (cpi->compressor_speed == 2) - Boost = Boost / 2; - } - -- // Apply an upper limit based on Q for 1 pass encodes -+ /* Apply an upper limit based on Q for 1 pass encodes */ - if (Boost > kf_gf_boost_qlimits[Q] && (cpi->pass == 0)) - Boost = kf_gf_boost_qlimits[Q]; - -- // Apply lower limits to boost. -+ /* Apply lower limits to boost. */ - else if (Boost < 110) - Boost = 110; - -- // Note the boost used -+ /* Note the boost used */ - cpi->last_boost = Boost; - - } - -- // Estimate next interval -- // This is updated once the real frame size/boost is known. -+ /* Estimate next interval -+ * This is updated once the real frame size/boost is known. -+ */ - if (cpi->oxcf.fixed_q == -1) - { -- if (cpi->pass == 2) // 2 Pass -+ if (cpi->pass == 2) /* 2 Pass */ - { - cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; - } -- else // 1 Pass -+ else /* 1 Pass */ - { - cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; - -@@ -602,10 +594,10 @@ static void calc_gf_params(VP8_COMP *cpi) - else - cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; - -- // ARF on or off -+ /* ARF on or off */ - if (cpi->pass != 2) - { -- // For now Alt ref is not allowed except in 2 pass modes. -+ /* For now Alt ref is not allowed except in 2 pass modes. */ - cpi->source_alt_ref_pending = 0; - - /*if ( cpi->oxcf.fixed_q == -1) -@@ -642,89 +634,34 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - min_frame_target = cpi->per_frame_bandwidth / 4; - - -- // Special alt reference frame case -+ /* Special alt reference frame case */ - if((cpi->common.refresh_alt_ref_frame) && (cpi->oxcf.number_of_layers == 1)) - { - if (cpi->pass == 2) - { -- cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame -+ /* Per frame bit target for the alt ref frame */ -+ cpi->per_frame_bandwidth = cpi->twopass.gf_bits; - cpi->this_frame_target = cpi->per_frame_bandwidth; - } - - /* One Pass ??? TBD */ -- /*else -- { -- int frames_in_section; -- int allocation_chunks; -- int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; -- int alt_boost; -- int max_arf_rate; -- -- alt_boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100); -- alt_boost += (cpi->frames_till_gf_update_due * 50); -- -- // If alt ref is not currently active then we have a pottential double hit with GF and ARF so reduce the boost a bit. -- // A similar thing is done on GFs that preceed a arf update. -- if ( !cpi->source_alt_ref_active ) -- alt_boost = alt_boost * 3 / 4; -- -- frames_in_section = cpi->frames_till_gf_update_due+1; // Standard frames + GF -- allocation_chunks = (frames_in_section * 100) + alt_boost; -- -- // Normalize Altboost and allocations chunck down to prevent overflow -- while ( alt_boost > 1000 ) -- { -- alt_boost /= 2; -- allocation_chunks /= 2; -- } -- -- else -- { -- int bits_in_section; -- -- if ( cpi->kf_overspend_bits > 0 ) -- { -- Adjustment = (cpi->kf_bitrate_adjustment <= cpi->kf_overspend_bits) ? cpi->kf_bitrate_adjustment : cpi->kf_overspend_bits; -- -- if ( Adjustment > (cpi->per_frame_bandwidth - min_frame_target) ) -- Adjustment = (cpi->per_frame_bandwidth - min_frame_target); -- -- cpi->kf_overspend_bits -= Adjustment; -- -- // Calculate an inter frame bandwidth target for the next few frames designed to recover -- // any extra bits spent on the key frame. -- cpi->inter_frame_target = cpi->per_frame_bandwidth - Adjustment; -- if ( cpi->inter_frame_target < min_frame_target ) -- cpi->inter_frame_target = min_frame_target; -- } -- else -- cpi->inter_frame_target = cpi->per_frame_bandwidth; -- -- bits_in_section = cpi->inter_frame_target * frames_in_section; -- -- // Avoid loss of precision but avoid overflow -- if ( (bits_in_section>>7) > allocation_chunks ) -- cpi->this_frame_target = alt_boost * (bits_in_section / allocation_chunks); -- else -- cpi->this_frame_target = (alt_boost * bits_in_section) / allocation_chunks; -- } -- } -- */ - } - -- // Normal frames (gf,and inter) -+ /* Normal frames (gf,and inter) */ - else - { -- // 2 pass -+ /* 2 pass */ - if (cpi->pass == 2) - { - cpi->this_frame_target = cpi->per_frame_bandwidth; - } -- // 1 pass -+ /* 1 pass */ - else - { -- // Make rate adjustment to recover bits spent in key frame -- // Test to see if the key frame inter data rate correction should still be in force -+ /* Make rate adjustment to recover bits spent in key frame -+ * Test to see if the key frame inter data rate correction -+ * should still be in force -+ */ - if (cpi->kf_overspend_bits > 0) - { - Adjustment = (cpi->kf_bitrate_adjustment <= cpi->kf_overspend_bits) ? cpi->kf_bitrate_adjustment : cpi->kf_overspend_bits; -@@ -734,8 +671,10 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - - cpi->kf_overspend_bits -= Adjustment; - -- // Calculate an inter frame bandwidth target for the next few frames designed to recover -- // any extra bits spent on the key frame. -+ /* Calculate an inter frame bandwidth target for the next -+ * few frames designed to recover any extra bits spent on -+ * the key frame. -+ */ - cpi->this_frame_target = cpi->per_frame_bandwidth - Adjustment; - - if (cpi->this_frame_target < min_frame_target) -@@ -744,7 +683,9 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - else - cpi->this_frame_target = cpi->per_frame_bandwidth; - -- // If appropriate make an adjustment to recover bits spent on a recent GF -+ /* If appropriate make an adjustment to recover bits spent on a -+ * recent GF -+ */ - if ((cpi->gf_overspend_bits > 0) && (cpi->this_frame_target > min_frame_target)) - { - int Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits; -@@ -756,11 +697,11 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - cpi->this_frame_target -= Adjustment; - } - -- // Apply small + and - boosts for non gf frames -+ /* Apply small + and - boosts for non gf frames */ - if ((cpi->last_boost > 150) && (cpi->frames_till_gf_update_due > 0) && - (cpi->current_gf_interval >= (MIN_GF_INTERVAL << 1))) - { -- // % Adjustment limited to the range 1% to 10% -+ /* % Adjustment limited to the range 1% to 10% */ - Adjustment = (cpi->last_boost - 100) >> 5; - - if (Adjustment < 1) -@@ -768,7 +709,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - else if (Adjustment > 10) - Adjustment = 10; - -- // Convert to bits -+ /* Convert to bits */ - Adjustment = (cpi->this_frame_target * Adjustment) / 100; - - if (Adjustment > (cpi->this_frame_target - min_frame_target)) -@@ -782,47 +723,53 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - } - } - -- // Sanity check that the total sum of adjustments is not above the maximum allowed -- // That is that having allowed for KF and GF penalties we have not pushed the -- // current interframe target to low. If the adjustment we apply here is not capable of recovering -- // all the extra bits we have spent in the KF or GF then the remainder will have to be recovered over -- // a longer time span via other buffer / rate control mechanisms. -+ /* Sanity check that the total sum of adjustments is not above the -+ * maximum allowed That is that having allowed for KF and GF penalties -+ * we have not pushed the current interframe target to low. If the -+ * adjustment we apply here is not capable of recovering all the extra -+ * bits we have spent in the KF or GF then the remainder will have to -+ * be recovered over a longer time span via other buffer / rate control -+ * mechanisms. -+ */ - if (cpi->this_frame_target < min_frame_target) - cpi->this_frame_target = min_frame_target; - - if (!cpi->common.refresh_alt_ref_frame) -- // Note the baseline target data rate for this inter frame. -+ /* Note the baseline target data rate for this inter frame. */ - cpi->inter_frame_target = cpi->this_frame_target; - -- // One Pass specific code -+ /* One Pass specific code */ - if (cpi->pass == 0) - { -- // Adapt target frame size with respect to any buffering constraints: -+ /* Adapt target frame size with respect to any buffering constraints: */ - if (cpi->buffered_mode) - { -- int one_percent_bits = 1 + cpi->oxcf.optimal_buffer_level / 100; -+ int one_percent_bits = (int) -+ (1 + cpi->oxcf.optimal_buffer_level / 100); - - if ((cpi->buffer_level < cpi->oxcf.optimal_buffer_level) || - (cpi->bits_off_target < cpi->oxcf.optimal_buffer_level)) - { - int percent_low = 0; - -- // Decide whether or not we need to adjust the frame data rate target. -- // -- // If we are are below the optimal buffer fullness level and adherence -- // to buffering constraints is important to the end usage then adjust -- // the per frame target. -+ /* Decide whether or not we need to adjust the frame data -+ * rate target. -+ * -+ * If we are are below the optimal buffer fullness level -+ * and adherence to buffering constraints is important to -+ * the end usage then adjust the per frame target. -+ */ - if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && - (cpi->buffer_level < cpi->oxcf.optimal_buffer_level)) - { -- percent_low = -- (cpi->oxcf.optimal_buffer_level - cpi->buffer_level) / -- one_percent_bits; -+ percent_low = (int) -+ ((cpi->oxcf.optimal_buffer_level - cpi->buffer_level) / -+ one_percent_bits); - } -- // Are we overshooting the long term clip data rate... -+ /* Are we overshooting the long term clip data rate... */ - else if (cpi->bits_off_target < 0) - { -- // Adjust per frame data target downwards to compensate. -+ /* Adjust per frame data target downwards to compensate. */ - percent_low = (int)(100 * -cpi->bits_off_target / - (cpi->total_byte_count * 8)); - } -@@ -832,40 +779,46 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - else if (percent_low < 0) - percent_low = 0; - -- // lower the target bandwidth for this frame. -+ /* lower the target bandwidth for this frame. */ - cpi->this_frame_target -= - (cpi->this_frame_target * percent_low) / 200; - -- // Are we using allowing control of active_worst_allowed_q -- // according to buffer level. -+ /* Are we using allowing control of active_worst_allowed_q -+ * according to buffer level. -+ */ - if (cpi->auto_worst_q && cpi->ni_frames > 150) - { -- int critical_buffer_level; -- -- // For streaming applications the most important factor is -- // cpi->buffer_level as this takes into account the -- // specified short term buffering constraints. However, -- // hitting the long term clip data rate target is also -- // important. -+ int64_t critical_buffer_level; -+ -+ /* For streaming applications the most important factor is -+ * cpi->buffer_level as this takes into account the -+ * specified short term buffering constraints. However, -+ * hitting the long term clip data rate target is also -+ * important. -+ */ - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) - { -- // Take the smaller of cpi->buffer_level and -- // cpi->bits_off_target -+ /* Take the smaller of cpi->buffer_level and -+ * cpi->bits_off_target -+ */ - critical_buffer_level = - (cpi->buffer_level < cpi->bits_off_target) - ? cpi->buffer_level : cpi->bits_off_target; - } -- // For local file playback short term buffering constraints -- // are less of an issue -+ /* For local file playback short term buffering constraints -+ * are less of an issue -+ */ - else - { -- // Consider only how we are doing for the clip as a -- // whole -+ /* Consider only how we are doing for the clip as a -+ * whole -+ */ - critical_buffer_level = cpi->bits_off_target; - } - -- // Set the active worst quality based upon the selected -- // buffer fullness number. -+ /* Set the active worst quality based upon the selected -+ * buffer fullness number. -+ */ - if (critical_buffer_level < cpi->oxcf.optimal_buffer_level) - { - if ( critical_buffer_level > -@@ -877,15 +830,16 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - (critical_buffer_level - - (cpi->oxcf.optimal_buffer_level >> 2)); - -- // Step active worst quality down from -- // cpi->ni_av_qi when (critical_buffer_level == -- // cpi->optimal_buffer_level) to -- // cpi->worst_quality when -- // (critical_buffer_level == -- // cpi->optimal_buffer_level >> 2) -+ /* Step active worst quality down from -+ * cpi->ni_av_qi when (critical_buffer_level == -+ * cpi->optimal_buffer_level) to -+ * cpi->worst_quality when -+ * (critical_buffer_level == -+ * cpi->optimal_buffer_level >> 2) -+ */ - cpi->active_worst_quality = - cpi->worst_quality - -- ((qadjustment_range * above_base) / -+ (int)((qadjustment_range * above_base) / - (cpi->oxcf.optimal_buffer_level*3>>2)); - } - else -@@ -910,9 +864,9 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) - && (cpi->buffer_level > cpi->oxcf.optimal_buffer_level)) - { -- percent_high = (cpi->buffer_level -+ percent_high = (int)((cpi->buffer_level - - cpi->oxcf.optimal_buffer_level) -- / one_percent_bits; -+ / one_percent_bits); - } - else if (cpi->bits_off_target > cpi->oxcf.optimal_buffer_level) - { -@@ -928,11 +882,14 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - cpi->this_frame_target += (cpi->this_frame_target * - percent_high) / 200; - -- // Are we allowing control of active_worst_allowed_q according -- // to buffer level. -+ /* Are we allowing control of active_worst_allowed_q according -+ * to buffer level. -+ */ - if (cpi->auto_worst_q && cpi->ni_frames > 150) - { -- // When using the relaxed buffer model stick to the user specified value -+ /* When using the relaxed buffer model stick to the -+ * user specified value -+ */ - cpi->active_worst_quality = cpi->ni_av_qi; - } - else -@@ -941,26 +898,27 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - } - } - -- // Set active_best_quality to prevent quality rising too high -+ /* Set active_best_quality to prevent quality rising too high */ - cpi->active_best_quality = cpi->best_quality; - -- // Worst quality obviously must not be better than best quality -+ /* Worst quality obviously must not be better than best quality */ - if (cpi->active_worst_quality <= cpi->active_best_quality) - cpi->active_worst_quality = cpi->active_best_quality + 1; - - if(cpi->active_worst_quality > 127) - cpi->active_worst_quality = 127; - } -- // Unbuffered mode (eg. video conferencing) -+ /* Unbuffered mode (eg. video conferencing) */ - else - { -- // Set the active worst quality -+ /* Set the active worst quality */ - cpi->active_worst_quality = cpi->worst_quality; - } - -- // Special trap for constrained quality mode -- // "active_worst_quality" may never drop below cq level -- // for any frame type. -+ /* Special trap for constrained quality mode -+ * "active_worst_quality" may never drop below cq level -+ * for any frame type. -+ */ - if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && - cpi->active_worst_quality < cpi->cq_target_quality) - { -@@ -968,16 +926,19 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - } - } - -- // Test to see if we have to drop a frame -- // The auto-drop frame code is only used in buffered mode. -- // In unbufferd mode (eg vide conferencing) the descision to -- // code or drop a frame is made outside the codec in response to real -- // world comms or buffer considerations. -- if (cpi->drop_frames_allowed && cpi->buffered_mode && -+ /* Test to see if we have to drop a frame -+ * The auto-drop frame code is only used in buffered mode. -+ * In unbufferd mode (eg vide conferencing) the descision to -+ * code or drop a frame is made outside the codec in response to real -+ * world comms or buffer considerations. -+ */ -+ if (cpi->drop_frames_allowed && - (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && -- ((cpi->common.frame_type != KEY_FRAME))) //|| !cpi->oxcf.allow_spatial_resampling) ) -+ ((cpi->common.frame_type != KEY_FRAME))) - { -- // Check for a buffer underun-crisis in which case we have to drop a frame -+ /* Check for a buffer underun-crisis in which case we have to drop -+ * a frame -+ */ - if ((cpi->buffer_level < 0)) - { - #if 0 -@@ -988,41 +949,23 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - (cpi->buffer_level * 100) / cpi->oxcf.optimal_buffer_level); - fclose(f); - #endif -- //vpx_log("Decoder: Drop frame due to bandwidth: %d \n",cpi->buffer_level, cpi->av_per_frame_bandwidth); -- -- cpi->drop_frame = 1; -- } -- --#if 0 -- // Check for other drop frame crtieria (Note 2 pass cbr uses decimation on whole KF sections) -- else if ((cpi->buffer_level < cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100) && -- (cpi->drop_count < cpi->max_drop_count) && (cpi->pass == 0)) -- { - cpi->drop_frame = 1; -- } -- --#endif - -- if (cpi->drop_frame) -- { -- // Update the buffer level variable. -+ /* Update the buffer level variable. */ - cpi->bits_off_target += cpi->av_per_frame_bandwidth; - if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) -- cpi->bits_off_target = cpi->oxcf.maximum_buffer_size; -+ cpi->bits_off_target = (int)cpi->oxcf.maximum_buffer_size; - cpi->buffer_level = cpi->bits_off_target; - } -- else -- cpi->drop_count = 0; - } - -- // Adjust target frame size for Golden Frames: -+ /* Adjust target frame size for Golden Frames: */ - if (cpi->oxcf.error_resilient_mode == 0 && - (cpi->frames_till_gf_update_due == 0) && !cpi->drop_frame) - { -- //int Boost = 0; - int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; - -- int gf_frame_useage = 0; // Golden frame useage since last GF -+ int gf_frame_useage = 0; /* Golden frame useage since last GF */ - int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME] + - cpi->recent_ref_frame_usage[LAST_FRAME] + - cpi->recent_ref_frame_usage[GOLDEN_FRAME] + -@@ -1030,30 +973,29 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - - int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols); - -- // Reset the last boost indicator -- //cpi->last_boost = 100; -- - if (tot_mbs) - gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]) * 100 / tot_mbs; - - if (pct_gf_active > gf_frame_useage) - gf_frame_useage = pct_gf_active; - -- // Is a fixed manual GF frequency being used -+ /* Is a fixed manual GF frequency being used */ - if (cpi->auto_gold) - { -- // For one pass throw a GF if recent frame intra useage is low or the GF useage is high -+ /* For one pass throw a GF if recent frame intra useage is -+ * low or the GF useage is high -+ */ - if ((cpi->pass == 0) && (cpi->this_frame_percent_intra < 15 || gf_frame_useage >= 5)) - cpi->common.refresh_golden_frame = 1; - -- // Two pass GF descision -+ /* Two pass GF descision */ - else if (cpi->pass == 2) - cpi->common.refresh_golden_frame = 1; - } - - #if 0 - -- // Debug stats -+ /* Debug stats */ - if (0) - { - FILE *f; -@@ -1070,7 +1012,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - { - #if 0 - -- if (0) // p_gw -+ if (0) - { - FILE *f; - -@@ -1086,16 +1028,20 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - calc_gf_params(cpi); - } - -- // If we are using alternate ref instead of gf then do not apply the boost -- // It will instead be applied to the altref update -- // Jims modified boost -+ /* If we are using alternate ref instead of gf then do not apply the -+ * boost It will instead be applied to the altref update Jims -+ * modified boost -+ */ - if (!cpi->source_alt_ref_active) - { - if (cpi->oxcf.fixed_q < 0) - { - if (cpi->pass == 2) - { -- cpi->this_frame_target = cpi->per_frame_bandwidth; // The spend on the GF is defined in the two pass code for two pass encodes -+ /* The spend on the GF is defined in the two pass -+ * code for two pass encodes -+ */ -+ cpi->this_frame_target = cpi->per_frame_bandwidth; - } - else - { -@@ -1104,14 +1050,16 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - int allocation_chunks = (frames_in_section * 100) + (Boost - 100); - int bits_in_section = cpi->inter_frame_target * frames_in_section; - -- // Normalize Altboost and allocations chunck down to prevent overflow -+ /* Normalize Altboost and allocations chunck down to -+ * prevent overflow -+ */ - while (Boost > 1000) - { - Boost /= 2; - allocation_chunks /= 2; - } - -- // Avoid loss of precision but avoid overflow -+ /* Avoid loss of precision but avoid overflow */ - if ((bits_in_section >> 7) > allocation_chunks) - cpi->this_frame_target = Boost * (bits_in_section / allocation_chunks); - else -@@ -1124,10 +1072,11 @@ static void calc_pframe_target_size(VP8_COMP *cpi) - * cpi->last_boost) / 100; - - } -- // If there is an active ARF at this location use the minimum -- // bits on this frame even if it is a contructed arf. -- // The active maximum quantizer insures that an appropriate -- // number of bits will be spent if needed for contstructed ARFs. -+ /* If there is an active ARF at this location use the minimum -+ * bits on this frame even if it is a contructed arf. -+ * The active maximum quantizer insures that an appropriate -+ * number of bits will be spent if needed for contstructed ARFs. -+ */ - else - { - cpi->this_frame_target = 0; -@@ -1151,8 +1100,8 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) - - int projected_size_based_on_q = 0; - -- // Clear down mmx registers to allow floating point in what follows -- vp8_clear_system_state(); //__asm emms; -+ /* Clear down mmx registers to allow floating point in what follows */ -+ vp8_clear_system_state(); - - if (cpi->common.frame_type == KEY_FRAME) - { -@@ -1160,23 +1109,26 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) - } - else - { -- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) -+ if (cpi->oxcf.number_of_layers == 1 && -+ (cpi->common.refresh_alt_ref_frame || -+ cpi->common.refresh_golden_frame)) - rate_correction_factor = cpi->gf_rate_correction_factor; - else - rate_correction_factor = cpi->rate_correction_factor; - } - -- // Work out how big we would have expected the frame to be at this Q given the current correction factor. -- // Stay in double to avoid int overflow when values are large -- //projected_size_based_on_q = ((int)(.5 + rate_correction_factor * vp8_bits_per_mb[cpi->common.frame_type][Q]) * cpi->common.MBs) >> BPER_MB_NORMBITS; -+ /* Work out how big we would have expected the frame to be at this Q -+ * given the current correction factor. Stay in double to avoid int -+ * overflow when values are large -+ */ - projected_size_based_on_q = (int)(((.5 + rate_correction_factor * vp8_bits_per_mb[cpi->common.frame_type][Q]) * cpi->common.MBs) / (1 << BPER_MB_NORMBITS)); - -- // Make some allowance for cpi->zbin_over_quant -- if (cpi->zbin_over_quant > 0) -+ /* Make some allowance for cpi->zbin_over_quant */ -+ if (cpi->mb.zbin_over_quant > 0) - { -- int Z = cpi->zbin_over_quant; -+ int Z = cpi->mb.zbin_over_quant; - double Factor = 0.99; -- double factor_adjustment = 0.01 / 256.0; //(double)ZBIN_OQ_MAX; -+ double factor_adjustment = 0.01 / 256.0; - - while (Z > 0) - { -@@ -1190,13 +1142,13 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) - } - } - -- // Work out a size correction factor. -- //if ( cpi->this_frame_target > 0 ) -- // correction_factor = (100 * cpi->projected_frame_size) / cpi->this_frame_target; -+ /* Work out a size correction factor. */ - if (projected_size_based_on_q > 0) - correction_factor = (100 * cpi->projected_frame_size) / projected_size_based_on_q; - -- // More heavily damped adjustment used if we have been oscillating either side of target -+ /* More heavily damped adjustment used if we have been oscillating -+ * either side of target -+ */ - switch (damp_var) - { - case 0: -@@ -1211,25 +1163,23 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) - break; - } - -- //if ( (correction_factor > 102) && (Q < cpi->active_worst_quality) ) - if (correction_factor > 102) - { -- // We are not already at the worst allowable quality -+ /* We are not already at the worst allowable quality */ - correction_factor = (int)(100.5 + ((correction_factor - 100) * adjustment_limit)); - rate_correction_factor = ((rate_correction_factor * correction_factor) / 100); - -- // Keep rate_correction_factor within limits -+ /* Keep rate_correction_factor within limits */ - if (rate_correction_factor > MAX_BPB_FACTOR) - rate_correction_factor = MAX_BPB_FACTOR; - } -- //else if ( (correction_factor < 99) && (Q > cpi->active_best_quality) ) - else if (correction_factor < 99) - { -- // We are not already at the best allowable quality -+ /* We are not already at the best allowable quality */ - correction_factor = (int)(100.5 - ((100 - correction_factor) * adjustment_limit)); - rate_correction_factor = ((rate_correction_factor * correction_factor) / 100); - -- // Keep rate_correction_factor within limits -+ /* Keep rate_correction_factor within limits */ - if (rate_correction_factor < MIN_BPB_FACTOR) - rate_correction_factor = MIN_BPB_FACTOR; - } -@@ -1238,7 +1188,9 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) - cpi->key_frame_rate_correction_factor = rate_correction_factor; - else - { -- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) -+ if (cpi->oxcf.number_of_layers == 1 && -+ (cpi->common.refresh_alt_ref_frame || -+ cpi->common.refresh_golden_frame)) - cpi->gf_rate_correction_factor = rate_correction_factor; - else - cpi->rate_correction_factor = rate_correction_factor; -@@ -1250,8 +1202,8 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) - { - int Q = cpi->active_worst_quality; - -- // Reset Zbin OQ value -- cpi->zbin_over_quant = 0; -+ /* Reset Zbin OQ value */ -+ cpi->mb.zbin_over_quant = 0; - - if (cpi->oxcf.fixed_q >= 0) - { -@@ -1261,11 +1213,13 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) - { - Q = cpi->oxcf.key_q; - } -- else if (cpi->common.refresh_alt_ref_frame) -+ else if (cpi->oxcf.number_of_layers == 1 && -+ cpi->common.refresh_alt_ref_frame) - { - Q = cpi->oxcf.alt_q; - } -- else if (cpi->common.refresh_golden_frame) -+ else if (cpi->oxcf.number_of_layers == 1 && -+ cpi->common.refresh_golden_frame) - { - Q = cpi->oxcf.gold_q; - } -@@ -1279,20 +1233,25 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) - int bits_per_mb_at_this_q; - double correction_factor; - -- // Select the appropriate correction factor based upon type of frame. -+ /* Select the appropriate correction factor based upon type of frame. */ - if (cpi->common.frame_type == KEY_FRAME) - correction_factor = cpi->key_frame_rate_correction_factor; - else - { -- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) -+ if (cpi->oxcf.number_of_layers == 1 && -+ (cpi->common.refresh_alt_ref_frame || -+ cpi->common.refresh_golden_frame)) - correction_factor = cpi->gf_rate_correction_factor; - else - correction_factor = cpi->rate_correction_factor; - } - -- // Calculate required scaling factor based on target frame size and size of frame produced using previous Q -+ /* Calculate required scaling factor based on target frame size and -+ * size of frame produced using previous Q -+ */ - if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS)) -- target_bits_per_mb = (target_bits_per_frame / cpi->common.MBs) << BPER_MB_NORMBITS; // Case where we would overflow int -+ /* Case where we would overflow int */ -+ target_bits_per_mb = (target_bits_per_frame / cpi->common.MBs) << BPER_MB_NORMBITS; - else - target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cpi->common.MBs; - -@@ -1317,18 +1276,23 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) - while (++i <= cpi->active_worst_quality); - - -- // If we are at MAXQ then enable Q over-run which seeks to claw back additional bits through things like -- // the RD multiplier and zero bin size. -+ /* If we are at MAXQ then enable Q over-run which seeks to claw -+ * back additional bits through things like the RD multiplier -+ * and zero bin size. -+ */ - if (Q >= MAXQ) - { - int zbin_oqmax; - - double Factor = 0.99; -- double factor_adjustment = 0.01 / 256.0; //(double)ZBIN_OQ_MAX; -+ double factor_adjustment = 0.01 / 256.0; - - if (cpi->common.frame_type == KEY_FRAME) -- zbin_oqmax = 0; //ZBIN_OQ_MAX/16 -- else if (cpi->common.refresh_alt_ref_frame || (cpi->common.refresh_golden_frame && !cpi->source_alt_ref_active)) -+ zbin_oqmax = 0; -+ else if (cpi->oxcf.number_of_layers == 1 && -+ (cpi->common.refresh_alt_ref_frame || -+ (cpi->common.refresh_golden_frame && -+ !cpi->source_alt_ref_active))) - zbin_oqmax = 16; - else - zbin_oqmax = ZBIN_OQ_MAX; -@@ -1347,25 +1311,29 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) - cpi->zbin_over_quant = (int)Oq; - }*/ - -- // Each incrment in the zbin is assumed to have a fixed effect on bitrate. This is not of course true. -- // The effect will be highly clip dependent and may well have sudden steps. -- // The idea here is to acheive higher effective quantizers than the normal maximum by expanding the zero -- // bin and hence decreasing the number of low magnitude non zero coefficients. -- while (cpi->zbin_over_quant < zbin_oqmax) -+ /* Each incrment in the zbin is assumed to have a fixed effect -+ * on bitrate. This is not of course true. The effect will be -+ * highly clip dependent and may well have sudden steps. The -+ * idea here is to acheive higher effective quantizers than the -+ * normal maximum by expanding the zero bin and hence -+ * decreasing the number of low magnitude non zero coefficients. -+ */ -+ while (cpi->mb.zbin_over_quant < zbin_oqmax) - { -- cpi->zbin_over_quant ++; -+ cpi->mb.zbin_over_quant ++; - -- if (cpi->zbin_over_quant > zbin_oqmax) -- cpi->zbin_over_quant = zbin_oqmax; -+ if (cpi->mb.zbin_over_quant > zbin_oqmax) -+ cpi->mb.zbin_over_quant = zbin_oqmax; - -- // Adjust bits_per_mb_at_this_q estimate -+ /* Adjust bits_per_mb_at_this_q estimate */ - bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q); - Factor += factor_adjustment; - - if (Factor >= 0.999) - Factor = 0.999; - -- if (bits_per_mb_at_this_q <= target_bits_per_mb) // Break out if we get down to the target rate -+ /* Break out if we get down to the target rate */ -+ if (bits_per_mb_at_this_q <= target_bits_per_mb) - break; - } - -@@ -1380,7 +1348,7 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi) - { - int i; - -- // Average key frame frequency -+ /* Average key frame frequency */ - int av_key_frame_frequency = 0; - - /* First key frame at start of sequence is a special case. We have no -@@ -1431,11 +1399,11 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi) - - void vp8_adjust_key_frame_context(VP8_COMP *cpi) - { -- // Clear down mmx registers to allow floating point in what follows -+ /* Clear down mmx registers to allow floating point in what follows */ - vp8_clear_system_state(); - -- // Do we have any key frame overspend to recover? -- // Two-pass overspend handled elsewhere. -+ /* Do we have any key frame overspend to recover? */ -+ /* Two-pass overspend handled elsewhere. */ - if ((cpi->pass != 2) - && (cpi->projected_frame_size > cpi->per_frame_bandwidth)) - { -@@ -1469,10 +1437,12 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi) - - void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit) - { -- // Set-up bounds on acceptable frame size: -+ /* Set-up bounds on acceptable frame size: */ - if (cpi->oxcf.fixed_q >= 0) - { -- // Fixed Q scenario: frame size never outranges target (there is no target!) -+ /* Fixed Q scenario: frame size never outranges target -+ * (there is no target!) -+ */ - *frame_under_shoot_limit = 0; - *frame_over_shoot_limit = INT_MAX; - } -@@ -1494,18 +1464,22 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, - } - else - { -- // For CBR take buffer fullness into account -+ /* For CBR take buffer fullness into account */ - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) - { - if (cpi->buffer_level >= ((cpi->oxcf.optimal_buffer_level + cpi->oxcf.maximum_buffer_size) >> 1)) - { -- // Buffer is too full so relax overshoot and tighten undershoot -+ /* Buffer is too full so relax overshoot and tighten -+ * undershoot -+ */ - *frame_over_shoot_limit = cpi->this_frame_target * 12 / 8; - *frame_under_shoot_limit = cpi->this_frame_target * 6 / 8; - } - else if (cpi->buffer_level <= (cpi->oxcf.optimal_buffer_level >> 1)) - { -- // Buffer is too low so relax undershoot and tighten overshoot -+ /* Buffer is too low so relax undershoot and tighten -+ * overshoot -+ */ - *frame_over_shoot_limit = cpi->this_frame_target * 10 / 8; - *frame_under_shoot_limit = cpi->this_frame_target * 4 / 8; - } -@@ -1515,11 +1489,13 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, - *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8; - } - } -- // VBR and CQ mode -- // Note that tighter restrictions here can help quality but hurt encode speed -+ /* VBR and CQ mode */ -+ /* Note that tighter restrictions here can help quality -+ * but hurt encode speed -+ */ - else - { -- // Stron overshoot limit for constrained quality -+ /* Stron overshoot limit for constrained quality */ - if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) - { - *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; -@@ -1534,9 +1510,10 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, - } - } - -- // For very small rate targets where the fractional adjustment -- // (eg * 7/8) may be tiny make sure there is at least a minimum -- // range. -+ /* For very small rate targets where the fractional adjustment -+ * (eg * 7/8) may be tiny make sure there is at least a minimum -+ * range. -+ */ - *frame_over_shoot_limit += 200; - *frame_under_shoot_limit -= 200; - if ( *frame_under_shoot_limit < 0 ) -@@ -1546,7 +1523,7 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, - } - - --// return of 0 means drop frame -+/* return of 0 means drop frame */ - int vp8_pick_frame_size(VP8_COMP *cpi) - { - VP8_COMMON *cm = &cpi->common; -@@ -1557,11 +1534,10 @@ int vp8_pick_frame_size(VP8_COMP *cpi) - { - calc_pframe_target_size(cpi); - -- // Check if we're dropping the frame: -+ /* Check if we're dropping the frame: */ - if (cpi->drop_frame) - { - cpi->drop_frame = 0; -- cpi->drop_count++; - return 0; - } - } -diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h -index d4f7796..c43f08d 100644 ---- a/vp8/encoder/ratectrl.h -+++ b/vp8/encoder/ratectrl.h -@@ -22,7 +22,7 @@ extern int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame); - extern void vp8_adjust_key_frame_context(VP8_COMP *cpi); - extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit); - --// return of 0 means drop frame -+/* return of 0 means drop frame */ - extern int vp8_pick_frame_size(VP8_COMP *cpi); - - #endif -diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c -index 2b706ba..ceb817c 100644 ---- a/vp8/encoder/rdopt.c -+++ b/vp8/encoder/rdopt.c -@@ -21,6 +21,7 @@ - #include "onyx_int.h" - #include "modecosts.h" - #include "encodeintra.h" -+#include "pickinter.h" - #include "vp8/common/entropymode.h" - #include "vp8/common/reconinter.h" - #include "vp8/common/reconintra4x4.h" -@@ -36,7 +37,6 @@ - #if CONFIG_TEMPORAL_DENOISING - #include "denoising.h" - #endif -- - extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x); - - #define MAXF(a,b) (((a) > (b)) ? (a) : (b)) -@@ -149,8 +149,8 @@ const int vp8_ref_frame_order[MAX_MODES] = - }; - - static void fill_token_costs( -- unsigned int c [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS], -- const vp8_prob p [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] -+ int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS], -+ const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES] - ) - { - int i, j, k; -@@ -159,21 +159,26 @@ static void fill_token_costs( - for (i = 0; i < BLOCK_TYPES; i++) - for (j = 0; j < COEF_BANDS; j++) - for (k = 0; k < PREV_COEF_CONTEXTS; k++) -- // check for pt=0 and band > 1 if block type 0 and 0 if blocktype 1 -- if(k==0 && j>(i==0) ) -- vp8_cost_tokens2((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree,2); -+ -+ /* check for pt=0 and band > 1 if block type 0 -+ * and 0 if blocktype 1 -+ */ -+ if (k == 0 && j > (i == 0)) -+ vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2); - else -- vp8_cost_tokens((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree); -+ vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree); - } - --static int rd_iifactor [ 32 ] = { 4, 4, 3, 2, 1, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0, -- }; -+static const int rd_iifactor[32] = -+{ -+ 4, 4, 3, 2, 1, 0, 0, 0, -+ 0, 0, 0, 0, 0, 0, 0, 0, -+ 0, 0, 0, 0, 0, 0, 0, 0, -+ 0, 0, 0, 0, 0, 0, 0, 0 -+}; - - /* values are now correlated to quantizer */ --static int sad_per_bit16lut[QINDEX_RANGE] = -+static const int sad_per_bit16lut[QINDEX_RANGE] = - { - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -@@ -192,7 +197,7 @@ static int sad_per_bit16lut[QINDEX_RANGE] = - 11, 11, 11, 11, 12, 12, 12, 12, - 12, 12, 13, 13, 13, 13, 14, 14 - }; --static int sad_per_bit4lut[QINDEX_RANGE] = -+static const int sad_per_bit4lut[QINDEX_RANGE] = - { - 2, 2, 2, 2, 2, 2, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, -@@ -218,30 +223,30 @@ void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) - cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex]; - } - --void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) -+void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) - { - int q; - int i; - double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0; - double rdconst = 2.80; - -- vp8_clear_system_state(); //__asm emms; -+ vp8_clear_system_state(); - -- // Further tests required to see if optimum is different -- // for key frames, golden frames and arf frames. -- // if (cpi->common.refresh_golden_frame || -- // cpi->common.refresh_alt_ref_frame) -+ /* Further tests required to see if optimum is different -+ * for key frames, golden frames and arf frames. -+ */ - cpi->RDMULT = (int)(rdconst * (capped_q * capped_q)); - -- // Extend rate multiplier along side quantizer zbin increases -- if (cpi->zbin_over_quant > 0) -+ /* Extend rate multiplier along side quantizer zbin increases */ -+ if (cpi->mb.zbin_over_quant > 0) - { - double oq_factor; - double modq; - -- // Experimental code using the same basic equation as used for Q above -- // The units of cpi->zbin_over_quant are 1/128 of Q bin size -- oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant); -+ /* Experimental code using the same basic equation as used for Q above -+ * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size -+ */ -+ oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant); - modq = (int)((double)capped_q * oq_factor); - cpi->RDMULT = (int)(rdconst * (modq * modq)); - } -@@ -260,6 +265,11 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) - - vp8_set_speed_features(cpi); - -+ for (i = 0; i < MAX_MODES; i++) -+ { -+ x->mode_test_hit_counts[i] = 0; -+ } -+ - q = (int)pow(Qvalue, 1.25); - - if (q < 8) -@@ -274,14 +284,14 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) - { - if (cpi->sf.thresh_mult[i] < INT_MAX) - { -- cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100; -+ x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100; - } - else - { -- cpi->rd_threshes[i] = INT_MAX; -+ x->rd_threshes[i] = INT_MAX; - } - -- cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; -+ cpi->rd_baseline_thresh[i] = x->rd_threshes[i]; - } - } - else -@@ -292,19 +302,19 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) - { - if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) - { -- cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q; -+ x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q; - } - else - { -- cpi->rd_threshes[i] = INT_MAX; -+ x->rd_threshes[i] = INT_MAX; - } - -- cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; -+ cpi->rd_baseline_thresh[i] = x->rd_threshes[i]; - } - } - - { -- // build token cost array for the type of frame we have now -+ /* build token cost array for the type of frame we have now */ - FRAME_CONTEXT *l = &cpi->lfc_n; - - if(cpi->common.refresh_alt_ref_frame) -@@ -323,12 +333,8 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) - */ - - -- // TODO make these mode costs depend on last,alt or gold too. (jbb) -+ /* TODO make these mode costs depend on last,alt or gold too. (jbb) */ - vp8_init_mode_costs(cpi); -- -- // TODO figure onnnnuut why making mv cost frame type dependent didn't help (jbb) -- //vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) l->mvc, flags); -- - } - - } -@@ -353,14 +359,6 @@ void vp8_auto_select_speed(VP8_COMP *cpi) - - #endif - -- /* -- // this is done during parameter valid check -- if( cpi->oxcf.cpu_used > 16) -- cpi->oxcf.cpu_used = 16; -- if( cpi->oxcf.cpu_used < -16) -- cpi->oxcf.cpu_used = -16; -- */ -- - if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress) - { - if (cpi->avg_pick_mode_time == 0) -@@ -387,10 +385,10 @@ void vp8_auto_select_speed(VP8_COMP *cpi) - cpi->avg_pick_mode_time = 0; - cpi->avg_encode_time = 0; - -- // In real-time mode, cpi->speed is in [4, 16]. -- if (cpi->Speed < 4) //if ( cpi->Speed < 0 ) -+ /* In real-time mode, cpi->speed is in [4, 16]. */ -+ if (cpi->Speed < 4) - { -- cpi->Speed = 4; //cpi->Speed = 0; -+ cpi->Speed = 4; - } - } - } -@@ -546,7 +544,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, - if (c < 16) - cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN]; - -- pt = (c != !type); // is eob first coefficient; -+ pt = (c != !type); /* is eob first coefficient; */ - *a = *l = pt; - - return cost; -@@ -592,7 +590,7 @@ static void macro_block_yrd( MACROBLOCK *mb, - vp8_subtract_mby( mb->src_diff, *(mb->block[0].base_src), - mb->block[0].src_stride, mb->e_mbd.predictor, 16); - -- // Fdct and building the 2nd order block -+ /* Fdct and building the 2nd order block */ - for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) - { - mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32); -@@ -600,25 +598,25 @@ static void macro_block_yrd( MACROBLOCK *mb, - *Y2DCPtr++ = beptr->coeff[16]; - } - -- // 2nd order fdct -+ /* 2nd order fdct */ - mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8); - -- // Quantization -+ /* Quantization */ - for (b = 0; b < 16; b++) - { - mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]); - } - -- // DC predication and Quantization of 2nd Order block -+ /* DC predication and Quantization of 2nd Order block */ - mb->quantize_b(mb_y2, x_y2); - -- // Distortion -+ /* Distortion */ - d = vp8_mbblock_error(mb, 1) << 2; - d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff); - - *Distortion = (d >> 4); - -- // rate -+ /* rate */ - *Rate = vp8_rdcost_mby(mb); - } - -@@ -632,12 +630,11 @@ static void copy_predictor(unsigned char *dst, const unsigned char *predictor) - d[12] = p[12]; - } - static int rd_pick_intra4x4block( -- VP8_COMP *cpi, - MACROBLOCK *x, - BLOCK *be, - BLOCKD *b, - B_PREDICTION_MODE *best_mode, -- unsigned int *bmode_costs, -+ const int *bmode_costs, - ENTROPY_CONTEXT *a, - ENTROPY_CONTEXT *l, - -@@ -660,7 +657,11 @@ static int rd_pick_intra4x4block( - DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16*4); - DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16); - int dst_stride = x->e_mbd.dst.y_stride; -- unsigned char *base_dst = x->e_mbd.dst.y_buffer; -+ unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; -+ -+ unsigned char *Above = dst - dst_stride; -+ unsigned char *yleft = dst - 1; -+ unsigned char top_left = Above[-1]; - - for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++) - { -@@ -669,8 +670,8 @@ static int rd_pick_intra4x4block( - - rate = bmode_costs[mode]; - -- vp8_intra4x4_predict(base_dst + b->offset, dst_stride, mode, -- b->predictor, 16); -+ vp8_intra4x4_predict(Above, yleft, dst_stride, mode, -+ b->predictor, 16, top_left); - vp8_subtract_b(be, b, 16); - x->short_fdct4x4(be->src_diff, be->coeff, 32); - x->quantize_b(be, b); -@@ -697,15 +698,14 @@ static int rd_pick_intra4x4block( - vpx_memcpy(best_dqcoeff, b->dqcoeff, 32); - } - } -- b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode); -+ b->bmi.as_mode = *best_mode; - -- vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, base_dst + b->offset, -- dst_stride); -+ vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride); - - return best_rd; - } - --static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, -+static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, - int *rate_y, int *Distortion, int best_rd) - { - MACROBLOCKD *const xd = &mb->e_mbd; -@@ -717,7 +717,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; -- unsigned int *bmode_costs; -+ const int *bmode_costs; - - vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); -@@ -745,7 +745,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, - } - - total_rd += rd_pick_intra4x4block( -- cpi, mb, mb->block + i, xd->block + i, &best_mode, bmode_costs, -+ mb, mb->block + i, xd->block + i, &best_mode, bmode_costs, - ta + vp8_block2above[i], - tl + vp8_block2left[i], &r, &ry, &d); - -@@ -770,8 +770,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, - } - - --static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi, -- MACROBLOCK *x, -+static int rd_pick_intra16x16mby_mode(MACROBLOCK *x, - int *Rate, - int *rate_y, - int *Distortion) -@@ -784,7 +783,7 @@ static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi, - int this_rd; - MACROBLOCKD *xd = &x->e_mbd; - -- //Y Search for 16x16 intra prediction mode -+ /* Y Search for 16x16 intra prediction mode */ - for (mode = DC_PRED; mode <= TM_PRED; mode++) - { - xd->mode_info_context->mbmi.mode = mode; -@@ -873,7 +872,8 @@ static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, - return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); - } - --static void rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion) -+static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate, -+ int *rate_tokenonly, int *distortion) - { - MB_PREDICTION_MODE mode; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); -@@ -981,8 +981,9 @@ static int labels2mode( - m = ABOVE4X4; - else - { -- // the only time we should do costing for new motion vector or mode -- // is when we are on a new label (jbb May 08, 2007) -+ /* the only time we should do costing for new motion vector -+ * or mode is when we are on a new label (jbb May 08, 2007) -+ */ - switch (m = this_mode) - { - case NEW4X4 : -@@ -1001,7 +1002,7 @@ static int labels2mode( - break; - } - -- if (m == ABOVE4X4) // replace above with left if same -+ if (m == ABOVE4X4) /* replace above with left if same */ - { - int_mv left_mv; - -@@ -1062,9 +1063,6 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels - vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride, x->e_mbd.subpixel_predict); - vp8_subtract_b(be, bd, 16); - x->short_fdct4x4(be->src_diff, be->coeff, 32); -- -- // set to 0 no way to account for 2nd order DC so discount -- //be->coeff[0] = 0; - x->quantize_b(be, bd); - - distortion += vp8_block_error(be->coeff, bd->dqcoeff); -@@ -1095,8 +1093,8 @@ typedef struct - int mvthresh; - int *mdcounts; - -- int_mv sv_mvp[4]; // save 4 mvp from 8x8 -- int sv_istep[2]; // save 2 initial step_param for 16x8/8x16 -+ int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */ -+ int sv_istep[2]; /* save 2 initial step_param for 16x8/8x16 */ - - } BEST_SEG_INFO; - -@@ -1143,13 +1141,13 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, - labels = vp8_mbsplits[segmentation]; - label_count = vp8_mbsplit_count[segmentation]; - -- // 64 makes this threshold really big effectively -- // making it so that we very rarely check mvs on -- // segments. setting this to 1 would make mv thresh -- // roughly equal to what it is for macroblocks -+ /* 64 makes this threshold really big effectively making it so that we -+ * very rarely check mvs on segments. setting this to 1 would make mv -+ * thresh roughly equal to what it is for macroblocks -+ */ - label_mv_thresh = 1 * bsi->mvthresh / label_count ; - -- // Segmentation method overheads -+ /* Segmentation method overheads */ - rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation); - rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts); - this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); -@@ -1162,7 +1160,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, - B_PREDICTION_MODE mode_selected = ZERO4X4; - int bestlabelyrate = 0; - -- // search for the best motion vector on this segment -+ /* search for the best motion vector on this segment */ - for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++) - { - int this_rd; -@@ -1191,7 +1189,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, - BLOCK *c; - BLOCKD *e; - -- // Is the best so far sufficiently good that we cant justify doing and new motion search. -+ /* Is the best so far sufficiently good that we cant justify -+ * doing a new motion search. -+ */ - if (best_label_rd < label_mv_thresh) - break; - -@@ -1206,7 +1206,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, - step_param = bsi->sv_istep[i]; - } - -- // use previous block's result as next block's MV predictor. -+ /* use previous block's result as next block's MV -+ * predictor. -+ */ - if (segmentation == BLOCK_4X4 && i>0) - { - bsi->mvp.as_int = x->e_mbd.block[i-1].bmi.mv.as_int; -@@ -1225,7 +1227,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, - mvp_full.as_mv.row = bsi->mvp.as_mv.row >>3; - mvp_full.as_mv.col = bsi->mvp.as_mv.col >>3; - -- // find first label -+ /* find first label */ - n = vp8_mbsplit_offset[segmentation][i]; - - c = &x->block[n]; -@@ -1265,7 +1267,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, - - sseshift = segmentation_to_sseshift[segmentation]; - -- // Should we do a full search (best quality only) -+ /* Should we do a full search (best quality only) */ - if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) - { - /* Check if mvp_full is within the range. */ -@@ -1282,7 +1284,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, - } - else - { -- // The full search result is actually worse so re-instate the previous best vector -+ /* The full search result is actually worse so -+ * re-instate the previous best vector -+ */ - e->bmi.mv.as_int = mode_mv[NEW4X4].as_int; - } - } -@@ -1302,7 +1306,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, - rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], - bsi->ref_mv, x->mvcost); - -- // Trap vectors that reach beyond the UMV borders -+ /* Trap vectors that reach beyond the UMV borders */ - if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || - ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) - { -@@ -1354,7 +1358,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, - bsi->segment_rd = this_segment_rd; - bsi->segment_num = segmentation; - -- // store everything needed to come back to this!! -+ /* store everything needed to come back to this!! */ - for (i = 0; i < 16; i++) - { - bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; -@@ -1516,7 +1520,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, - return bsi.segment_rd; - } - --//The improved MV prediction -+/* The improved MV prediction */ - void vp8_mv_pred - ( - VP8_COMP *cpi, -@@ -1550,7 +1554,9 @@ void vp8_mv_pred - near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0; - near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0; - -- // read in 3 nearby block's MVs from current frame as prediction candidates. -+ /* read in 3 nearby block's MVs from current frame as prediction -+ * candidates. -+ */ - if (above->mbmi.ref_frame != INTRA_FRAME) - { - near_mvs[vcnt].as_int = above->mbmi.mv.as_int; -@@ -1573,12 +1579,12 @@ void vp8_mv_pred - } - vcnt++; - -- // read in 5 nearby block's MVs from last frame. -+ /* read in 5 nearby block's MVs from last frame. */ - if(cpi->common.last_frame_type != KEY_FRAME) - { - mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ; - -- // current in last frame -+ /* current in last frame */ - if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) - { - near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int; -@@ -1587,7 +1593,7 @@ void vp8_mv_pred - } - vcnt++; - -- // above in last frame -+ /* above in last frame */ - if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME) - { - near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int; -@@ -1596,7 +1602,7 @@ void vp8_mv_pred - } - vcnt++; - -- // left in last frame -+ /* left in last frame */ - if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME) - { - near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int; -@@ -1605,7 +1611,7 @@ void vp8_mv_pred - } - vcnt++; - -- // right in last frame -+ /* right in last frame */ - if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME) - { - near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int; -@@ -1614,7 +1620,7 @@ void vp8_mv_pred - } - vcnt++; - -- // below in last frame -+ /* below in last frame */ - if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME) - { - near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int; -@@ -1655,7 +1661,9 @@ void vp8_mv_pred - mv.as_mv.col = mvy[vcnt/2]; - - find = 1; -- //sr is set to 0 to allow calling function to decide the search range. -+ /* sr is set to 0 to allow calling function to decide the search -+ * range. -+ */ - *sr = 0; - } - } -@@ -1667,33 +1675,36 @@ void vp8_mv_pred - - void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]) - { -- -- int near_sad[8] = {0}; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below -+ /* near_sad indexes: -+ * 0-cf above, 1-cf left, 2-cf aboveleft, -+ * 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below -+ */ -+ int near_sad[8] = {0}; - BLOCK *b = &x->block[0]; - unsigned char *src_y_ptr = *(b->base_src); - -- //calculate sad for current frame 3 nearby MBs. -+ /* calculate sad for current frame 3 nearby MBs. */ - if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0) - { - near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX; - }else if(xd->mb_to_top_edge==0) -- { //only has left MB for sad calculation. -+ { /* only has left MB for sad calculation. */ - near_sad[0] = near_sad[2] = INT_MAX; -- near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff); -+ near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX); - }else if(xd->mb_to_left_edge ==0) -- { //only has left MB for sad calculation. -+ { /* only has left MB for sad calculation. */ - near_sad[1] = near_sad[2] = INT_MAX; -- near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff); -+ near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX); - }else - { -- near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff); -- near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff); -- near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff); -+ near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX); -+ near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX); -+ near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX); - } - - if(cpi->common.last_frame_type != KEY_FRAME) - { -- //calculate sad for last frame 5 nearby MBs. -+ /* calculate sad for last frame 5 nearby MBs. */ - unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset; - int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride; - -@@ -1703,14 +1714,14 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse - if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX; - - if(near_sad[4] != INT_MAX) -- near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff); -+ near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX); - if(near_sad[5] != INT_MAX) -- near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff); -- near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, 0x7fffffff); -+ near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX); -+ near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX); - if(near_sad[6] != INT_MAX) -- near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, 0x7fffffff); -+ near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX); - if(near_sad[7] != INT_MAX) -- near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, 0x7fffffff); -+ near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX); - } - - if(cpi->common.last_frame_type != KEY_FRAME) -@@ -1732,18 +1743,18 @@ static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv) - { - if (x->partition_info->bmi[i].mode == NEW4X4) - { -- cpi->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row -+ x->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row - - best_ref_mv->as_mv.row) >> 1)]++; -- cpi->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col -+ x->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col - - best_ref_mv->as_mv.col) >> 1)]++; - } - } - } - else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV) - { -- cpi->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row -+ x->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row - - best_ref_mv->as_mv.row) >> 1)]++; -- cpi->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col -+ x->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col - - best_ref_mv->as_mv.col) >> 1)]++; - } - } -@@ -1766,7 +1777,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4], - { - unsigned int sse; - unsigned int var; -- int threshold = (xd->block[0].dequant[1] -+ unsigned int threshold = (xd->block[0].dequant[1] - * xd->block[0].dequant[1] >>4); - - if(threshold < x->encode_breakout) -@@ -1784,8 +1795,8 @@ static int evaluate_inter_mode_rd(int mdcounts[4], - if ((sse - var < q2dc * q2dc >>4) || - (sse /2 > var && sse-var < 64)) - { -- // Check u and v to make sure skip is ok -- int sse2= VP8_UVSSE(x); -+ /* Check u and v to make sure skip is ok */ -+ unsigned int sse2 = VP8_UVSSE(x); - if (sse2 * 2 < threshold) - { - x->skip = 1; -@@ -1805,17 +1816,15 @@ static int evaluate_inter_mode_rd(int mdcounts[4], - } - - -- //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts); // Experimental debug code -- -- // Add in the Mv/mode cost -+ /* Add in the Mv/mode cost */ - rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts); - -- // Y cost and distortion -+ /* Y cost and distortion */ - macro_block_yrd(x, &rd->rate_y, &distortion); - rd->rate2 += rd->rate_y; - rd->distortion2 += distortion; - -- // UV cost and distortion -+ /* UV cost and distortion */ - rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv, - cpi->common.full_pixel); - rd->rate2 += rd->rate_uv; -@@ -1832,9 +1841,11 @@ static int calculate_final_rd_costs(int this_rd, - VP8_COMP *cpi, MACROBLOCK *x) - { - MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; -- // Where skip is allowable add in the default per mb cost for the no skip case. -- // where we then decide to skip we have to delete this and replace it with the -- // cost of signallying a skip -+ -+ /* Where skip is allowable add in the default per mb cost for the no -+ * skip case. where we then decide to skip we have to delete this and -+ * replace it with the cost of signalling a skip -+ */ - if (cpi->common.mb_no_coeff_skip) - { - *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0); -@@ -1849,7 +1860,10 @@ static int calculate_final_rd_costs(int this_rd, - - if (!disable_skip) - { -- // Test for the condition where skip block will be activated because there are no non zero coefficients and make any necessary adjustment for rate -+ /* Test for the condition where skip block will be activated -+ * because there are no non zero coefficients and make any -+ * necessary adjustment for rate -+ */ - if (cpi->common.mb_no_coeff_skip) - { - int i; -@@ -1874,10 +1888,10 @@ static int calculate_final_rd_costs(int this_rd, - if (tteob == 0) - { - rd->rate2 -= (rd->rate_y + rd->rate_uv); -- //for best_yrd calculation -+ /* for best_yrd calculation */ - rd->rate_uv = 0; - -- // Back out no skip flag costing and add in skip flag costing -+ /* Back out no skip flag costing and add in skip flag costing */ - if (cpi->prob_skip_false) - { - int prob_skip_cost; -@@ -1889,7 +1903,7 @@ static int calculate_final_rd_costs(int this_rd, - } - } - } -- // Calculate the final RD estimate for this mode -+ /* Calculate the final RD estimate for this mode */ - this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2); - if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame - == INTRA_FRAME) -@@ -1953,7 +1967,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - int_mv mvp; - int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; - int saddone=0; -- int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7) -+ /* search range got from mv_pred(). It uses step_param levels. (0-7) */ -+ int sr=0; - - unsigned char *plane[4][3]; - int ref_frame_map[4]; -@@ -1962,6 +1977,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - int intra_rd_penalty = 10* vp8_dc_quant(cpi->common.base_qindex, - cpi->common.y1dc_delta_q); - -+#if CONFIG_TEMPORAL_DENOISING -+ unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX, -+ best_rd_sse = INT_MAX; -+#endif -+ - mode_mv = mode_mv_sb[sign_bias]; - best_ref_mv.as_int = 0; - best_mode.rd = INT_MAX; -@@ -1994,7 +2014,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset); - - *returnintra = INT_MAX; -- cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame -+ /* Count of the number of MBs tested so far this frame */ -+ x->mbs_tested_so_far++; - - x->skip = 0; - -@@ -2005,14 +2026,16 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - int other_cost = 0; - int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]]; - -- // Test best rd so far against threshold for trying this mode. -- if (best_mode.rd <= cpi->rd_threshes[mode_index]) -+ /* Test best rd so far against threshold for trying this mode. */ -+ if (best_mode.rd <= x->rd_threshes[mode_index]) - continue; - - if (this_ref_frame < 0) - continue; - -- // These variables hold are rolling total cost and distortion for this mode -+ /* These variables hold are rolling total cost and distortion for -+ * this mode -+ */ - rd.rate2 = 0; - rd.distortion2 = 0; - -@@ -2021,9 +2044,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - x->e_mbd.mode_info_context->mbmi.mode = this_mode; - x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; - -- // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, -- // unless ARNR filtering is enabled in which case we want -- // an unfiltered alternative -+ /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame, -+ * unless ARNR filtering is enabled in which case we want -+ * an unfiltered alternative -+ */ - if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) - { - if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) -@@ -2045,45 +2069,56 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - } - } - -- // Check to see if the testing frequency for this mode is at its max -- // If so then prevent it from being tested and increase the threshold for its testing -- if (cpi->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1)) -+ /* Check to see if the testing frequency for this mode is at its -+ * max If so then prevent it from being tested and increase the -+ * threshold for its testing -+ */ -+ if (x->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1)) - { -- if (cpi->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] * cpi->mode_test_hit_counts[mode_index]) -+ if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] * x->mode_test_hit_counts[mode_index]) - { -- // Increase the threshold for coding this mode to make it less likely to be chosen -- cpi->rd_thresh_mult[mode_index] += 4; -+ /* Increase the threshold for coding this mode to make it -+ * less likely to be chosen -+ */ -+ x->rd_thresh_mult[mode_index] += 4; - -- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) -- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; -+ if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) -+ x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - -- cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; -+ x->rd_threshes[mode_index] = -+ (cpi->rd_baseline_thresh[mode_index] >> 7) * -+ x->rd_thresh_mult[mode_index]; - - continue; - } - } - -- // We have now reached the point where we are going to test the current mode so increment the counter for the number of times it has been tested -- cpi->mode_test_hit_counts[mode_index] ++; -+ /* We have now reached the point where we are going to test the -+ * current mode so increment the counter for the number of times -+ * it has been tested -+ */ -+ x->mode_test_hit_counts[mode_index] ++; - -- // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise -- if (cpi->zbin_mode_boost_enabled) -+ /* Experimental code. Special case for gf and arf zeromv modes. -+ * Increase zbin size to supress noise -+ */ -+ if (x->zbin_mode_boost_enabled) - { - if ( this_ref_frame == INTRA_FRAME ) -- cpi->zbin_mode_boost = 0; -+ x->zbin_mode_boost = 0; - else - { - if (vp8_mode_order[mode_index] == ZEROMV) - { - if (this_ref_frame != LAST_FRAME) -- cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; -+ x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; - else -- cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; -+ x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; - } - else if (vp8_mode_order[mode_index] == SPLITMV) -- cpi->zbin_mode_boost = 0; -+ x->zbin_mode_boost = 0; - else -- cpi->zbin_mode_boost = MV_ZBIN_BOOST; -+ x->zbin_mode_boost = MV_ZBIN_BOOST; - } - - vp8_update_zbin_extra(cpi, x); -@@ -2091,7 +2126,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - - if(!uv_intra_done && this_ref_frame == INTRA_FRAME) - { -- rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, -+ rd_pick_intra_mbuv_mode(x, &uv_intra_rate, - &uv_intra_rate_tokenonly, - &uv_intra_distortion); - uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode; -@@ -2113,9 +2148,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - { - int tmp_rd; - -- // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED]; -+ /* Note the rate value returned here includes the cost of -+ * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED] -+ */ - int distortion; -- tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rd.rate_y, &distortion, best_mode.yrd); -+ tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion, best_mode.yrd); - rd.rate2 += rate; - rd.distortion2 += distortion; - -@@ -2140,8 +2177,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - int this_rd_thresh; - int distortion; - -- this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ? cpi->rd_threshes[THR_NEW1] : cpi->rd_threshes[THR_NEW3]; -- this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ? cpi->rd_threshes[THR_NEW2] : this_rd_thresh; -+ this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ? -+ x->rd_threshes[THR_NEW1] : x->rd_threshes[THR_NEW3]; -+ this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ? -+ x->rd_threshes[THR_NEW2] : this_rd_thresh; - - tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, - best_mode.yrd, mdcounts, -@@ -2150,10 +2189,12 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - rd.rate2 += rate; - rd.distortion2 += distortion; - -- // If even the 'Y' rd value of split is higher than best so far then dont bother looking at UV -+ /* If even the 'Y' rd value of split is higher than best so far -+ * then dont bother looking at UV -+ */ - if (tmp_rd < best_mode.yrd) - { -- // Now work out UV cost and add it in -+ /* Now work out UV cost and add it in */ - rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, cpi->common.full_pixel); - rd.rate2 += rd.rate_uv; - rd.distortion2 += rd.distortion_uv; -@@ -2225,7 +2266,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - mvp_full.as_mv.col = mvp.as_mv.col>>3; - mvp_full.as_mv.row = mvp.as_mv.row>>3; - -- // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. -+ /* Get intersection of UMV window and valid MV window to -+ * reduce # of checks in diamond search. -+ */ - if (x->mv_col_min < col_min ) - x->mv_col_min = col_min; - if (x->mv_col_max > col_max ) -@@ -2235,11 +2278,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - if (x->mv_row_max > row_max ) - x->mv_row_max = row_max; - -- //adjust search range according to sr from mv prediction -+ /* adjust search range according to sr from mv prediction */ - if(sr > step_param) - step_param = sr; - -- // Initial step/diamond search -+ /* Initial step/diamond search */ - { - bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.mv, - step_param, sadpb, &num00, -@@ -2247,7 +2290,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - x->mvcost, &best_ref_mv); - mode_mv[NEWMV].as_int = d->bmi.mv.as_int; - -- // Further step/diamond searches as necessary -+ /* Further step/diamond searches as necessary */ - n = 0; - further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - -@@ -2293,11 +2336,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - { - int search_range; - -- //It seems not a good way to set search_range. Need further investigation. -- //search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col)); - search_range = 8; - -- //thissme = cpi->full_search_sad(x, b, d, &d->bmi.mv.as_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); - thissme = cpi->refining_search_sad(x, b, d, &d->bmi.mv, sadpb, - search_range, &cpi->fn_ptr[BLOCK_16X16], - x->mvcost, &best_ref_mv); -@@ -2330,24 +2370,31 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - - mode_mv[NEWMV].as_int = d->bmi.mv.as_int; - -- // Add the new motion vector cost to our rolling cost variable -+ /* Add the new motion vector cost to our rolling cost variable */ - rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96); - } - - case NEARESTMV: - case NEARMV: -- // Clip "next_nearest" so that it does not extend to far out of image -+ /* Clip "next_nearest" so that it does not extend to far out -+ * of image -+ */ - vp8_clamp_mv2(&mode_mv[this_mode], xd); - -- // Do not bother proceeding if the vector (from newmv,nearest or near) is 0,0 as this should then be coded using the zeromv mode. -+ /* Do not bother proceeding if the vector (from newmv, nearest -+ * or near) is 0,0 as this should then be coded using the zeromv -+ * mode. -+ */ - if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && (mode_mv[this_mode].as_int == 0)) - continue; - - case ZEROMV: - -- // Trap vectors that reach beyond the UMV borders -- // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point -- // because of the lack of break statements in the previous two cases. -+ /* Trap vectors that reach beyond the UMV borders -+ * Note that ALL New MV, Nearest MV Near MV and Zero MV code -+ * drops through to this point because of the lack of break -+ * statements in the previous two cases. -+ */ - if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || - ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) - continue; -@@ -2365,35 +2412,52 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - disable_skip, uv_intra_tteob, - intra_rd_penalty, cpi, x); - -- // Keep record of best intra distortion -+ /* Keep record of best intra distortion */ - if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) && - (this_rd < best_mode.intra_rd) ) - { - best_mode.intra_rd = this_rd; - *returnintra = rd.distortion2 ; - } -- - #if CONFIG_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity) - { -- // Store the best NEWMV in x for later use in the denoiser. -- // We are restricted to the LAST_FRAME since the denoiser only keeps -- // one filter state. -- if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && -- x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) -- { -- x->e_mbd.best_sse_inter_mode = NEWMV; -- x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; -- x->e_mbd.need_to_clamp_best_mvs = -- x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; -- } -+ unsigned int sse; -+ vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&sse, -+ mode_mv[this_mode]); -+ -+ if (sse < best_rd_sse) -+ best_rd_sse = sse; -+ -+ /* Store for later use by denoiser. */ -+ if (this_mode == ZEROMV && sse < zero_mv_sse ) -+ { -+ zero_mv_sse = sse; -+ x->best_zeromv_reference_frame = -+ x->e_mbd.mode_info_context->mbmi.ref_frame; -+ } -+ -+ /* Store the best NEWMV in x for later use in the denoiser. */ -+ if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && -+ sse < best_sse) -+ { -+ best_sse = sse; -+ vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&best_sse, -+ mode_mv[this_mode]); -+ x->best_sse_inter_mode = NEWMV; -+ x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; -+ x->need_to_clamp_best_mvs = -+ x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; -+ x->best_reference_frame = -+ x->e_mbd.mode_info_context->mbmi.ref_frame; -+ } - } - #endif - -- // Did this mode help.. i.i is it the new best mode -+ /* Did this mode help.. i.i is it the new best mode */ - if (this_rd < best_mode.rd || x->skip) - { -- // Note index of best mode so far -+ /* Note index of best mode so far */ - best_mode_index = mode_index; - *returnrate = rd.rate2; - *returndistortion = rd.distortion2; -@@ -2406,95 +2470,103 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - update_best_mode(&best_mode, this_rd, &rd, other_cost, x); - - -- // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time -- cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; -- cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; -+ /* Testing this mode gave rise to an improvement in best error -+ * score. Lower threshold a bit for next time -+ */ -+ x->rd_thresh_mult[mode_index] = -+ (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? -+ x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; - } - -- // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around. -+ /* If the mode did not help improve the best error case then raise -+ * the threshold for testing that mode next time around. -+ */ - else - { -- cpi->rd_thresh_mult[mode_index] += 4; -- -- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) -- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; -+ x->rd_thresh_mult[mode_index] += 4; - -- cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; -+ if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) -+ x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - } -+ x->rd_threshes[mode_index] = -+ (cpi->rd_baseline_thresh[mode_index] >> 7) * -+ x->rd_thresh_mult[mode_index]; - - if (x->skip) - break; - - } - -- // Reduce the activation RD thresholds for the best choice mode -+ /* Reduce the activation RD thresholds for the best choice mode */ - if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) - { -- int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2); -- -- cpi->rd_thresh_mult[best_mode_index] = (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT; -- cpi->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index]; -- -- // If we chose a split mode then reset the new MV thresholds as well -- /*if ( vp8_mode_order[best_mode_index] == SPLITMV ) -- { -- best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWMV] >> 4); -- cpi->rd_thresh_mult[THR_NEWMV] = (cpi->rd_thresh_mult[THR_NEWMV] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWMV]-best_adjustment: MIN_THRESHMULT; -- cpi->rd_threshes[THR_NEWMV] = (cpi->rd_baseline_thresh[THR_NEWMV] >> 7) * cpi->rd_thresh_mult[THR_NEWMV]; -- -- best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWG] >> 4); -- cpi->rd_thresh_mult[THR_NEWG] = (cpi->rd_thresh_mult[THR_NEWG] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWG]-best_adjustment: MIN_THRESHMULT; -- cpi->rd_threshes[THR_NEWG] = (cpi->rd_baseline_thresh[THR_NEWG] >> 7) * cpi->rd_thresh_mult[THR_NEWG]; -- -- best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWA] >> 4); -- cpi->rd_thresh_mult[THR_NEWA] = (cpi->rd_thresh_mult[THR_NEWA] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWA]-best_adjustment: MIN_THRESHMULT; -- cpi->rd_threshes[THR_NEWA] = (cpi->rd_baseline_thresh[THR_NEWA] >> 7) * cpi->rd_thresh_mult[THR_NEWA]; -- }*/ -- -+ int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2); -+ -+ x->rd_thresh_mult[best_mode_index] = -+ (x->rd_thresh_mult[best_mode_index] >= -+ (MIN_THRESHMULT + best_adjustment)) ? -+ x->rd_thresh_mult[best_mode_index] - best_adjustment : -+ MIN_THRESHMULT; -+ x->rd_threshes[best_mode_index] = -+ (cpi->rd_baseline_thresh[best_mode_index] >> 7) * -+ x->rd_thresh_mult[best_mode_index]; - } - -- // Note how often each mode chosen as best -+ /* Note how often each mode chosen as best */ - cpi->mode_chosen_counts[best_mode_index] ++; - - #if CONFIG_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity) - { -- if (x->e_mbd.best_sse_inter_mode == DC_PRED) { -- // No best MV found. -- x->e_mbd.best_sse_inter_mode = best_mode.mbmode.mode; -- x->e_mbd.best_sse_mv = best_mode.mbmode.mv; -- x->e_mbd.need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs; -- } -- -- // TODO(holmer): No SSEs are calculated in rdopt.c. What else can be used? -- vp8_denoiser_denoise_mb(&cpi->denoiser, x, 0, 0, -- recon_yoffset, recon_uvoffset); -- // Reevalute ZEROMV if the current mode is INTRA. -- if (best_mode.mbmode.ref_frame == INTRA_FRAME) -- { -- int this_rd = INT_MAX; -- int disable_skip = 0; -- int other_cost = 0; -- vpx_memset(&rd, 0, sizeof(rd)); -- x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME; -- rd.rate2 += x->ref_frame_cost[LAST_FRAME]; -- rd.rate2 += vp8_cost_mv_ref(ZEROMV, mdcounts); -- x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; -- x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; -- x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; -- this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x); -- this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost, -- disable_skip, uv_intra_tteob, -- intra_rd_penalty, cpi, x); -- if (this_rd < best_mode.rd || x->skip) -+ if (x->best_sse_inter_mode == DC_PRED) - { -- // Note index of best mode so far -- best_mode_index = mode_index; -- *returnrate = rd.rate2; -- *returndistortion = rd.distortion2; -- update_best_mode(&best_mode, this_rd, &rd, other_cost, x); -+ /* No best MV found. */ -+ x->best_sse_inter_mode = best_mode.mbmode.mode; -+ x->best_sse_mv = best_mode.mbmode.mv; -+ x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs; -+ x->best_reference_frame = best_mode.mbmode.ref_frame; -+ best_sse = best_rd_sse; -+ } -+ vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, -+ recon_yoffset, recon_uvoffset); -+ -+ -+ /* Reevaluate ZEROMV after denoising. */ -+ if (best_mode.mbmode.ref_frame == INTRA_FRAME && -+ x->best_zeromv_reference_frame != INTRA_FRAME) -+ { -+ int this_rd = INT_MAX; -+ int disable_skip = 0; -+ int other_cost = 0; -+ int this_ref_frame = x->best_zeromv_reference_frame; -+ rd.rate2 = x->ref_frame_cost[this_ref_frame] + -+ vp8_cost_mv_ref(ZEROMV, mdcounts); -+ rd.distortion2 = 0; -+ -+ /* set up the proper prediction buffers for the frame */ -+ x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; -+ x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; -+ x->e_mbd.pre.u_buffer = plane[this_ref_frame][1]; -+ x->e_mbd.pre.v_buffer = plane[this_ref_frame][2]; -+ -+ x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; -+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; -+ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; -+ -+ this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x); -+ this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost, -+ disable_skip, uv_intra_tteob, -+ intra_rd_penalty, cpi, x); -+ if (this_rd < best_mode.rd || x->skip) -+ { -+ /* Note index of best mode so far */ -+ best_mode_index = mode_index; -+ *returnrate = rd.rate2; -+ *returndistortion = rd.distortion2; -+ update_best_mode(&best_mode, this_rd, &rd, other_cost, x); -+ } - } -- } -+ - } - #endif - -@@ -2512,7 +2584,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - } - - -- // macroblock modes -+ /* macroblock modes */ - vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO)); - - if (best_mode.mbmode.mode == B_PRED) -@@ -2539,7 +2611,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - rd_update_mvcount(cpi, x, &best_ref_mv); - } - --void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) -+void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_) - { - int error4x4, error16x16; - int rate4x4, rate16x16 = 0, rateuv; -@@ -2551,15 +2623,13 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) - - x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; - -- rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv); -+ rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv); - rate = rateuv; - -- error16x16 = rd_pick_intra16x16mby_mode(cpi, x, -- &rate16x16, &rate16x16_tokenonly, -+ error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly, - &dist16x16); - -- error4x4 = rd_pick_intra4x4mby_modes(cpi, x, -- &rate4x4, &rate4x4_tokenonly, -+ error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly, - &dist4x4, error16x16); - - if (error4x4 < error16x16) -diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h -index db939f9..1e11fa7 100644 ---- a/vp8/encoder/rdopt.h -+++ b/vp8/encoder/rdopt.h -@@ -65,9 +65,9 @@ static void insertsortsad(int arr[],int idx[], int len) - } - } - --extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue); -+extern void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue); - extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra); --extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate); -+extern void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate); - - - static void get_plane_pointers(const YV12_BUFFER_CONFIG *fb, -@@ -86,15 +86,15 @@ static void get_predictor_pointers(const VP8_COMP *cpi, - unsigned int recon_yoffset, - unsigned int recon_uvoffset) - { -- if (cpi->ref_frame_flags & VP8_LAST_FLAG) -+ if (cpi->ref_frame_flags & VP8_LAST_FRAME) - get_plane_pointers(&cpi->common.yv12_fb[cpi->common.lst_fb_idx], - plane[LAST_FRAME], recon_yoffset, recon_uvoffset); - -- if (cpi->ref_frame_flags & VP8_GOLD_FLAG) -+ if (cpi->ref_frame_flags & VP8_GOLD_FRAME) - get_plane_pointers(&cpi->common.yv12_fb[cpi->common.gld_fb_idx], - plane[GOLDEN_FRAME], recon_yoffset, recon_uvoffset); - -- if (cpi->ref_frame_flags & VP8_ALT_FLAG) -+ if (cpi->ref_frame_flags & VP8_ALTR_FRAME) - get_plane_pointers(&cpi->common.yv12_fb[cpi->common.alt_fb_idx], - plane[ALTREF_FRAME], recon_yoffset, recon_uvoffset); - } -@@ -106,11 +106,11 @@ static void get_reference_search_order(const VP8_COMP *cpi, - int i=0; - - ref_frame_map[i++] = INTRA_FRAME; -- if (cpi->ref_frame_flags & VP8_LAST_FLAG) -+ if (cpi->ref_frame_flags & VP8_LAST_FRAME) - ref_frame_map[i++] = LAST_FRAME; -- if (cpi->ref_frame_flags & VP8_GOLD_FLAG) -+ if (cpi->ref_frame_flags & VP8_GOLD_FRAME) - ref_frame_map[i++] = GOLDEN_FRAME; -- if (cpi->ref_frame_flags & VP8_ALT_FLAG) -+ if (cpi->ref_frame_flags & VP8_ALTR_FRAME) - ref_frame_map[i++] = ALTREF_FRAME; - for(; i<4; i++) - ref_frame_map[i] = -1; -diff --git a/vp8/encoder/segmentation.c b/vp8/encoder/segmentation.c -index fc0967d..37972e2 100644 ---- a/vp8/encoder/segmentation.c -+++ b/vp8/encoder/segmentation.c -@@ -22,22 +22,24 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x) - - if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame)) - { -- // Reset Gf useage monitors -+ /* Reset Gf useage monitors */ - vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - } - else - { -- // for each macroblock row in image -+ /* for each macroblock row in image */ - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) - { -- // for each macroblock col in image -+ /* for each macroblock col in image */ - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - -- // If using golden then set GF active flag if not already set. -- // If using last frame 0,0 mode then leave flag as it is -- // else if using non 0,0 motion or intra modes then clear flag if it is currently set -+ /* If using golden then set GF active flag if not already set. -+ * If using last frame 0,0 mode then leave flag as it is -+ * else if using non 0,0 motion or intra modes then clear -+ * flag if it is currently set -+ */ - if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME)) - { - if (*(x->gf_active_ptr) == 0) -@@ -52,12 +54,12 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x) - cpi->gf_active_count--; - } - -- x->gf_active_ptr++; // Step onto next entry -- this_mb_mode_info++; // skip to next mb -+ x->gf_active_ptr++; /* Step onto next entry */ -+ this_mb_mode_info++; /* skip to next mb */ - - } - -- // this is to account for the border -+ /* this is to account for the border */ - this_mb_mode_info++; - } - } -diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c -index 6c61b36..b83ae89 100644 ---- a/vp8/encoder/temporal_filter.c -+++ b/vp8/encoder/temporal_filter.c -@@ -30,8 +30,8 @@ - #include - #include - --#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering --#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering -+#define ALT_REF_MC_ENABLED 1 /* dis/enable MC in AltRef filtering */ -+#define ALT_REF_SUBPEL_ENABLED 1 /* dis/enable subpel in MC AltRef filtering */ - - #if VP8_TEMPORAL_ALT_REF - -@@ -50,7 +50,7 @@ static void vp8_temporal_filter_predictors_mb_c - int offset; - unsigned char *yptr, *uptr, *vptr; - -- // Y -+ /* Y */ - yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3); - - if ((mv_row | mv_col) & 7) -@@ -63,7 +63,7 @@ static void vp8_temporal_filter_predictors_mb_c - vp8_copy_mem16x16(yptr, stride, &pred[0], 16); - } - -- // U & V -+ /* U & V */ - mv_row >>= 1; - mv_col >>= 1; - stride = (stride + 1) >> 1; -@@ -109,9 +109,10 @@ void vp8_temporal_filter_apply_c - int pixel_value = *frame2++; - - modifier = src_byte - pixel_value; -- // This is an integer approximation of: -- // float coeff = (3.0 * modifer * modifier) / pow(2, strength); -- // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); -+ /* This is an integer approximation of: -+ * float coeff = (3.0 * modifer * modifier) / pow(2, strength); -+ * modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); -+ */ - modifier *= modifier; - modifier *= 3; - modifier += 1 << (strength - 1); -@@ -134,7 +135,6 @@ void vp8_temporal_filter_apply_c - } - - #if ALT_REF_MC_ENABLED --static int dummy_cost[2*mv_max+1]; - - static int vp8_temporal_filter_find_matching_mb_c - ( -@@ -155,10 +155,7 @@ static int vp8_temporal_filter_find_matching_mb_c - int_mv best_ref_mv1; - int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ - -- int *mvcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] }; -- int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] }; -- -- // Save input state -+ /* Save input state */ - unsigned char **base_src = b->base_src; - int src = b->src; - int src_stride = b->src_stride; -@@ -170,7 +167,7 @@ static int vp8_temporal_filter_find_matching_mb_c - best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >>3; - best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >>3; - -- // Setup frame pointers -+ /* Setup frame pointers */ - b->base_src = &arf_frame->y_buffer; - b->src_stride = arf_frame->y_stride; - b->src = mb_offset; -@@ -179,7 +176,7 @@ static int vp8_temporal_filter_find_matching_mb_c - x->e_mbd.pre.y_stride = frame_ptr->y_stride; - d->offset = mb_offset; - -- // Further step/diamond searches as necessary -+ /* Further step/diamond searches as necessary */ - if (cpi->Speed < 8) - { - step_param = cpi->sf.first_step + (cpi->Speed > 5); -@@ -189,29 +186,29 @@ static int vp8_temporal_filter_find_matching_mb_c - step_param = cpi->sf.first_step + 2; - } - -- /*cpi->sf.search_method == HEX*/ -- // TODO Check that the 16x16 vf & sdf are selected here -- bestsme = vp8_hex_search(x, b, d, -- &best_ref_mv1_full, &d->bmi.mv, -- step_param, -- sadpb, -- &cpi->fn_ptr[BLOCK_16X16], -- mvsadcost, mvcost, &best_ref_mv1); -+ /* TODO Check that the 16x16 vf & sdf are selected here */ -+ /* Ignore mv costing by sending NULL cost arrays */ -+ bestsme = vp8_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.mv, -+ step_param, sadpb, -+ &cpi->fn_ptr[BLOCK_16X16], -+ NULL, NULL, &best_ref_mv1); - - #if ALT_REF_SUBPEL_ENABLED -- // Try sub-pixel MC? -- //if (bestsme > error_thresh && bestsme < INT_MAX) -+ /* Try sub-pixel MC? */ - { - int distortion; - unsigned int sse; -+ /* Ignore mv costing by sending NULL cost array */ - bestsme = cpi->find_fractional_mv_step(x, b, d, -- &d->bmi.mv, &best_ref_mv1, -- x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], -- mvcost, &distortion, &sse); -+ &d->bmi.mv, -+ &best_ref_mv1, -+ x->errorperbit, -+ &cpi->fn_ptr[BLOCK_16X16], -+ NULL, &distortion, &sse); - } - #endif - -- // Save input state -+ /* Save input state */ - b->base_src = base_src; - b->src = src; - b->src_stride = src_stride; -@@ -246,7 +243,7 @@ static void vp8_temporal_filter_iterate_c - unsigned char *dst1, *dst2; - DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16*16 + 8*8 + 8*8); - -- // Save input state -+ /* Save input state */ - unsigned char *y_buffer = mbd->pre.y_buffer; - unsigned char *u_buffer = mbd->pre.u_buffer; - unsigned char *v_buffer = mbd->pre.v_buffer; -@@ -254,16 +251,17 @@ static void vp8_temporal_filter_iterate_c - for (mb_row = 0; mb_row < mb_rows; mb_row++) - { - #if ALT_REF_MC_ENABLED -- // Source frames are extended to 16 pixels. This is different than -- // L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS) -- // A 6 tap filter is used for motion search. This requires 2 pixels -- // before and 3 pixels after. So the largest Y mv on a border would -- // then be 16 - 3. The UV blocks are half the size of the Y and -- // therefore only extended by 8. The largest mv that a UV block -- // can support is 8 - 3. A UV mv is half of a Y mv. -- // (16 - 3) >> 1 == 6 which is greater than 8 - 3. -- // To keep the mv in play for both Y and UV planes the max that it -- // can be on a border is therefore 16 - 5. -+ /* Source frames are extended to 16 pixels. This is different than -+ * L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS) -+ * A 6 tap filter is used for motion search. This requires 2 pixels -+ * before and 3 pixels after. So the largest Y mv on a border would -+ * then be 16 - 3. The UV blocks are half the size of the Y and -+ * therefore only extended by 8. The largest mv that a UV block -+ * can support is 8 - 3. A UV mv is half of a Y mv. -+ * (16 - 3) >> 1 == 6 which is greater than 8 - 3. -+ * To keep the mv in play for both Y and UV planes the max that it -+ * can be on a border is therefore 16 - 5. -+ */ - cpi->mb.mv_row_min = -((mb_row * 16) + (16 - 5)); - cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16) - + (16 - 5); -@@ -285,36 +283,41 @@ static void vp8_temporal_filter_iterate_c - - for (frame = 0; frame < frame_count; frame++) - { -- int err = 0; -- - if (cpi->frames[frame] == NULL) - continue; - - mbd->block[0].bmi.mv.as_mv.row = 0; - mbd->block[0].bmi.mv.as_mv.col = 0; - -+ if (frame == alt_ref_index) -+ { -+ filter_weight = 2; -+ } -+ else -+ { -+ int err = 0; - #if ALT_REF_MC_ENABLED - #define THRESH_LOW 10000 - #define THRESH_HIGH 20000 -- -- // Find best match in this frame by MC -- err = vp8_temporal_filter_find_matching_mb_c -- (cpi, -- cpi->frames[alt_ref_index], -- cpi->frames[frame], -- mb_y_offset, -- THRESH_LOW); -- -+ /* Find best match in this frame by MC */ -+ err = vp8_temporal_filter_find_matching_mb_c -+ (cpi, -+ cpi->frames[alt_ref_index], -+ cpi->frames[frame], -+ mb_y_offset, -+ THRESH_LOW); - #endif -- // Assign higher weight to matching MB if it's error -- // score is lower. If not applying MC default behavior -- // is to weight all MBs equal. -- filter_weight = errframes[frame]->y_buffer + mb_y_offset, -@@ -325,7 +328,7 @@ static void vp8_temporal_filter_iterate_c - mbd->block[0].bmi.mv.as_mv.col, - predictor); - -- // Apply the filter (YUV) -+ /* Apply the filter (YUV) */ - vp8_temporal_filter_apply - (f->y_buffer + mb_y_offset, - f->y_stride, -@@ -358,7 +361,7 @@ static void vp8_temporal_filter_iterate_c - } - } - -- // Normalize filter output to produce AltRef frame -+ /* Normalize filter output to produce AltRef frame */ - dst1 = cpi->alt_ref_buffer.y_buffer; - stride = cpi->alt_ref_buffer.y_stride; - byte = mb_y_offset; -@@ -372,7 +375,7 @@ static void vp8_temporal_filter_iterate_c - - dst1[byte] = (unsigned char)pval; - -- // move to next pixel -+ /* move to next pixel */ - byte++; - } - -@@ -389,19 +392,19 @@ static void vp8_temporal_filter_iterate_c - { - int m=k+64; - -- // U -+ /* U */ - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= cpi->fixed_divide[count[k]]; - pval >>= 19; - dst1[byte] = (unsigned char)pval; - -- // V -+ /* V */ - pval = accumulator[m] + (count[m] >> 1); - pval *= cpi->fixed_divide[count[m]]; - pval >>= 19; - dst2[byte] = (unsigned char)pval; - -- // move to next pixel -+ /* move to next pixel */ - byte++; - } - -@@ -416,7 +419,7 @@ static void vp8_temporal_filter_iterate_c - mb_uv_offset += 8*(f->uv_stride-mb_cols); - } - -- // Restore input state -+ /* Restore input state */ - mbd->pre.y_buffer = y_buffer; - mbd->pre.u_buffer = u_buffer; - mbd->pre.v_buffer = v_buffer; -@@ -450,8 +453,7 @@ void vp8_temporal_filter_prepare_c - switch (blur_type) - { - case 1: -- ///////////////////////////////////////// -- // Backward Blur -+ /* Backward Blur */ - - frames_to_blur_backward = num_frames_backward; - -@@ -462,8 +464,7 @@ void vp8_temporal_filter_prepare_c - break; - - case 2: -- ///////////////////////////////////////// -- // Forward Blur -+ /* Forward Blur */ - - frames_to_blur_forward = num_frames_forward; - -@@ -475,8 +476,7 @@ void vp8_temporal_filter_prepare_c - - case 3: - default: -- ///////////////////////////////////////// -- // Center Blur -+ /* Center Blur */ - frames_to_blur_forward = num_frames_forward; - frames_to_blur_backward = num_frames_backward; - -@@ -486,7 +486,7 @@ void vp8_temporal_filter_prepare_c - if (frames_to_blur_backward > frames_to_blur_forward) - frames_to_blur_backward = frames_to_blur_forward; - -- // When max_frames is even we have 1 more frame backward than forward -+ /* When max_frames is even we have 1 more frame backward than forward */ - if (frames_to_blur_forward > (max_frames - 1) / 2) - frames_to_blur_forward = ((max_frames - 1) / 2); - -@@ -499,21 +499,7 @@ void vp8_temporal_filter_prepare_c - - start_frame = distance + frames_to_blur_forward; - --#ifdef DEBUGFWG -- // DEBUG FWG -- printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d" -- , max_frames -- , num_frames_backward -- , num_frames_forward -- , frames_to_blur -- , frames_to_blur_backward -- , frames_to_blur_forward -- , cpi->source_encode_index -- , cpi->last_alt_ref_sei -- , start_frame); --#endif -- -- // Setup frame pointers, NULL indicates frame not included in filter -+ /* Setup frame pointers, NULL indicates frame not included in filter */ - vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *)); - for (frame = 0; frame < frames_to_blur; frame++) - { -diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c -index ef41fa8..3b5268b 100644 ---- a/vp8/encoder/tokenize.c -+++ b/vp8/encoder/tokenize.c -@@ -23,7 +23,7 @@ - #ifdef ENTROPY_STATS - _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - #endif --void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; -+void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ; - void vp8_fix_contexts(MACROBLOCKD *x); - - #include "dct_value_tokens.h" -@@ -102,11 +102,12 @@ static void fill_value_tokens() - - static void tokenize2nd_order_b - ( -- MACROBLOCKD *x, -+ MACROBLOCK *x, - TOKENEXTRA **tp, - VP8_COMP *cpi - ) - { -+ MACROBLOCKD *xd = &x->e_mbd; - int pt; /* near block/prev token context index */ - int c; /* start at DC */ - TOKENEXTRA *t = *tp;/* store tokens starting here */ -@@ -117,11 +118,11 @@ static void tokenize2nd_order_b - int band, rc, v, token; - int eob; - -- b = x->block + 24; -+ b = xd->block + 24; - qcoeff_ptr = b->qcoeff; -- a = (ENTROPY_CONTEXT *)x->above_context + 8; -- l = (ENTROPY_CONTEXT *)x->left_context + 8; -- eob = x->eobs[24]; -+ a = (ENTROPY_CONTEXT *)xd->above_context + 8; -+ l = (ENTROPY_CONTEXT *)xd->left_context + 8; -+ eob = xd->eobs[24]; - VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); - - if(!eob) -@@ -131,7 +132,7 @@ static void tokenize2nd_order_b - t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt]; - t->skip_eob_node = 0; - -- ++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN]; -+ ++x->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN]; - t++; - *tp = t; - *a = *l = 0; -@@ -145,7 +146,7 @@ static void tokenize2nd_order_b - - t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt]; - t->skip_eob_node = 0; -- ++cpi->coef_counts [1] [0] [pt] [token]; -+ ++x->coef_counts [1] [0] [pt] [token]; - pt = vp8_prev_token_class[token]; - t++; - c = 1; -@@ -164,7 +165,7 @@ static void tokenize2nd_order_b - - t->skip_eob_node = ((pt == 0)); - -- ++cpi->coef_counts [1] [band] [pt] [token]; -+ ++x->coef_counts [1] [band] [pt] [token]; - - pt = vp8_prev_token_class[token]; - t++; -@@ -177,7 +178,7 @@ static void tokenize2nd_order_b - - t->skip_eob_node = 0; - -- ++cpi->coef_counts [1] [band] [pt] [DCT_EOB_TOKEN]; -+ ++x->coef_counts [1] [band] [pt] [DCT_EOB_TOKEN]; - - t++; - } -@@ -189,12 +190,13 @@ static void tokenize2nd_order_b - - static void tokenize1st_order_b - ( -- MACROBLOCKD *x, -+ MACROBLOCK *x, - TOKENEXTRA **tp, - int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ - VP8_COMP *cpi - ) - { -+ MACROBLOCKD *xd = &x->e_mbd; - unsigned int block; - const BLOCKD *b; - int pt; /* near block/prev token context index */ -@@ -207,15 +209,15 @@ static void tokenize1st_order_b - int band, rc, v; - int tmp1, tmp2; - -- b = x->block; -+ b = xd->block; - /* Luma */ - for (block = 0; block < 16; block++, b++) - { - tmp1 = vp8_block2above[block]; - tmp2 = vp8_block2left[block]; - qcoeff_ptr = b->qcoeff; -- a = (ENTROPY_CONTEXT *)x->above_context + tmp1; -- l = (ENTROPY_CONTEXT *)x->left_context + tmp2; -+ a = (ENTROPY_CONTEXT *)xd->above_context + tmp1; -+ l = (ENTROPY_CONTEXT *)xd->left_context + tmp2; - - VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); - -@@ -228,7 +230,7 @@ static void tokenize1st_order_b - t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt]; - t->skip_eob_node = 0; - -- ++cpi->coef_counts [type] [c] [pt] [DCT_EOB_TOKEN]; -+ ++x->coef_counts [type] [c] [pt] [DCT_EOB_TOKEN]; - t++; - *tp = t; - *a = *l = 0; -@@ -243,7 +245,7 @@ static void tokenize1st_order_b - - t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt]; - t->skip_eob_node = 0; -- ++cpi->coef_counts [type] [c] [pt] [token]; -+ ++x->coef_counts [type] [c] [pt] [token]; - pt = vp8_prev_token_class[token]; - t++; - c++; -@@ -261,7 +263,7 @@ static void tokenize1st_order_b - t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; - - t->skip_eob_node = (pt == 0); -- ++cpi->coef_counts [type] [band] [pt] [token]; -+ ++x->coef_counts [type] [band] [pt] [token]; - - pt = vp8_prev_token_class[token]; - t++; -@@ -273,7 +275,7 @@ static void tokenize1st_order_b - t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; - - t->skip_eob_node = 0; -- ++cpi->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN]; -+ ++x->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN]; - - t++; - } -@@ -287,8 +289,8 @@ static void tokenize1st_order_b - tmp1 = vp8_block2above[block]; - tmp2 = vp8_block2left[block]; - qcoeff_ptr = b->qcoeff; -- a = (ENTROPY_CONTEXT *)x->above_context + tmp1; -- l = (ENTROPY_CONTEXT *)x->left_context + tmp2; -+ a = (ENTROPY_CONTEXT *)xd->above_context + tmp1; -+ l = (ENTROPY_CONTEXT *)xd->left_context + tmp2; - - VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); - -@@ -299,7 +301,7 @@ static void tokenize1st_order_b - t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt]; - t->skip_eob_node = 0; - -- ++cpi->coef_counts [2] [0] [pt] [DCT_EOB_TOKEN]; -+ ++x->coef_counts [2] [0] [pt] [DCT_EOB_TOKEN]; - t++; - *tp = t; - *a = *l = 0; -@@ -314,7 +316,7 @@ static void tokenize1st_order_b - - t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt]; - t->skip_eob_node = 0; -- ++cpi->coef_counts [2] [0] [pt] [token]; -+ ++x->coef_counts [2] [0] [pt] [token]; - pt = vp8_prev_token_class[token]; - t++; - c = 1; -@@ -333,7 +335,7 @@ static void tokenize1st_order_b - - t->skip_eob_node = (pt == 0); - -- ++cpi->coef_counts [2] [band] [pt] [token]; -+ ++x->coef_counts [2] [band] [pt] [token]; - - pt = vp8_prev_token_class[token]; - t++; -@@ -346,7 +348,7 @@ static void tokenize1st_order_b - - t->skip_eob_node = 0; - -- ++cpi->coef_counts [2] [band] [pt] [DCT_EOB_TOKEN]; -+ ++x->coef_counts [2] [band] [pt] [DCT_EOB_TOKEN]; - - t++; - } -@@ -374,16 +376,18 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block) - } - - --void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) -+void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) - { -+ MACROBLOCKD *xd = &x->e_mbd; - int plane_type; - int has_y2_block; - -- has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED -- && x->mode_info_context->mbmi.mode != SPLITMV); -+ has_y2_block = (xd->mode_info_context->mbmi.mode != B_PRED -+ && xd->mode_info_context->mbmi.mode != SPLITMV); - -- x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x, has_y2_block); -- if (x->mode_info_context->mbmi.mb_skip_coeff) -+ xd->mode_info_context->mbmi.mb_skip_coeff = -+ mb_is_skippable(xd, has_y2_block); -+ if (xd->mode_info_context->mbmi.mb_skip_coeff) - { - if (!cpi->common.mb_no_coeff_skip) - { -@@ -391,8 +395,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) - } - else - { -- vp8_fix_contexts(x); -- cpi->skip_true_count++; -+ vp8_fix_contexts(xd); -+ x->skip_true_count++; - } - - return; -@@ -488,7 +492,8 @@ static void stuff2nd_order_b - TOKENEXTRA **tp, - ENTROPY_CONTEXT *a, - ENTROPY_CONTEXT *l, -- VP8_COMP *cpi -+ VP8_COMP *cpi, -+ MACROBLOCK *x - ) - { - int pt; /* near block/prev token context index */ -@@ -498,13 +503,12 @@ static void stuff2nd_order_b - t->Token = DCT_EOB_TOKEN; - t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt]; - t->skip_eob_node = 0; -- ++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN]; -+ ++x->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN]; - ++t; - - *tp = t; - pt = 0; - *a = *l = pt; -- - } - - static void stuff1st_order_b -@@ -513,7 +517,8 @@ static void stuff1st_order_b - ENTROPY_CONTEXT *a, - ENTROPY_CONTEXT *l, - int type, -- VP8_COMP *cpi -+ VP8_COMP *cpi, -+ MACROBLOCK *x - ) - { - int pt; /* near block/prev token context index */ -@@ -524,20 +529,21 @@ static void stuff1st_order_b - t->Token = DCT_EOB_TOKEN; - t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; - t->skip_eob_node = 0; -- ++cpi->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN]; -+ ++x->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN]; - ++t; - *tp = t; - pt = 0; /* 0 <-> all coeff data is zero */ - *a = *l = pt; -- - } -+ - static - void stuff1st_order_buv - ( - TOKENEXTRA **tp, - ENTROPY_CONTEXT *a, - ENTROPY_CONTEXT *l, -- VP8_COMP *cpi -+ VP8_COMP *cpi, -+ MACROBLOCK *x - ) - { - int pt; /* near block/prev token context index */ -@@ -547,38 +553,38 @@ void stuff1st_order_buv - t->Token = DCT_EOB_TOKEN; - t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt]; - t->skip_eob_node = 0; -- ++cpi->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN]; -+ ++x->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN]; - ++t; - *tp = t; - pt = 0; /* 0 <-> all coeff data is zero */ - *a = *l = pt; -- - } - --void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) -+void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) - { -- ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; -- ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; -+ MACROBLOCKD *xd = &x->e_mbd; -+ ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)xd->above_context; -+ ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)xd->left_context; - int plane_type; - int b; - plane_type = 3; -- if((x->mode_info_context->mbmi.mode != B_PRED -- && x->mode_info_context->mbmi.mode != SPLITMV)) -+ if((xd->mode_info_context->mbmi.mode != B_PRED -+ && xd->mode_info_context->mbmi.mode != SPLITMV)) - { - stuff2nd_order_b(t, -- A + vp8_block2above[24], L + vp8_block2left[24], cpi); -+ A + vp8_block2above[24], L + vp8_block2left[24], cpi, x); - plane_type = 0; - } - - for (b = 0; b < 16; b++) - stuff1st_order_b(t, - A + vp8_block2above[b], -- L + vp8_block2left[b], plane_type, cpi); -+ L + vp8_block2left[b], plane_type, cpi, x); - - for (b = 16; b < 24; b++) - stuff1st_order_buv(t, - A + vp8_block2above[b], -- L + vp8_block2left[b], cpi); -+ L + vp8_block2left[b], cpi, x); - - } - void vp8_fix_contexts(MACROBLOCKD *x) -diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm -index f07b030..6f188cb 100644 ---- a/vp8/encoder/x86/dct_mmx.asm -+++ b/vp8/encoder/x86/dct_mmx.asm -@@ -12,7 +12,7 @@ - %include "vpx_ports/x86_abi_support.asm" - - ;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch) --global sym(vp8_short_fdct4x4_mmx) -+global sym(vp8_short_fdct4x4_mmx) PRIVATE - sym(vp8_short_fdct4x4_mmx): - push rbp - mov rbp, rsp -diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm -index 3d52a5d..d880ce0 100644 ---- a/vp8/encoder/x86/dct_sse2.asm -+++ b/vp8/encoder/x86/dct_sse2.asm -@@ -61,7 +61,7 @@ - %endmacro - - ;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch) --global sym(vp8_short_fdct4x4_sse2) -+global sym(vp8_short_fdct4x4_sse2) PRIVATE - sym(vp8_short_fdct4x4_sse2): - - STACK_FRAME_CREATE -@@ -166,7 +166,7 @@ sym(vp8_short_fdct4x4_sse2): - STACK_FRAME_DESTROY - - ;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch) --global sym(vp8_short_fdct8x4_sse2) -+global sym(vp8_short_fdct8x4_sse2) PRIVATE - sym(vp8_short_fdct8x4_sse2): - - STACK_FRAME_CREATE -diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c -new file mode 100644 -index 0000000..c1ac6c1 ---- /dev/null -+++ b/vp8/encoder/x86/denoising_sse2.c -@@ -0,0 +1,119 @@ -+/* -+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+ * -+ * Use of this source code is governed by a BSD-style license -+ * that can be found in the LICENSE file in the root of the source -+ * tree. An additional intellectual property rights grant can be found -+ * in the file PATENTS. All contributing project authors may -+ * be found in the AUTHORS file in the root of the source tree. -+ */ -+ -+#include "vp8/encoder/denoising.h" -+#include "vp8/common/reconinter.h" -+#include "vpx/vpx_integer.h" -+#include "vpx_mem/vpx_mem.h" -+#include "vpx_rtcd.h" -+ -+#include -+ -+union sum_union { -+ __m128i v; -+ signed char e[16]; -+}; -+ -+int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg, -+ YV12_BUFFER_CONFIG *running_avg, -+ MACROBLOCK *signal, unsigned int motion_magnitude, -+ int y_offset, int uv_offset) -+{ -+ unsigned char *sig = signal->thismb; -+ int sig_stride = 16; -+ unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; -+ int mc_avg_y_stride = mc_running_avg->y_stride; -+ unsigned char *running_avg_y = running_avg->y_buffer + y_offset; -+ int avg_y_stride = running_avg->y_stride; -+ int r; -+ __m128i acc_diff = _mm_setzero_si128(); -+ const __m128i k_0 = _mm_setzero_si128(); -+ const __m128i k_4 = _mm_set1_epi8(4); -+ const __m128i k_8 = _mm_set1_epi8(8); -+ const __m128i k_16 = _mm_set1_epi8(16); -+ /* Modify each level's adjustment according to motion_magnitude. */ -+ const __m128i l3 = _mm_set1_epi8( -+ (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 : 6); -+ /* Difference between level 3 and level 2 is 2. */ -+ const __m128i l32 = _mm_set1_epi8(2); -+ /* Difference between level 2 and level 1 is 1. */ -+ const __m128i l21 = _mm_set1_epi8(1); -+ -+ for (r = 0; r < 16; ++r) -+ { -+ /* Calculate differences */ -+ const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0])); -+ const __m128i v_mc_running_avg_y = _mm_loadu_si128( -+ (__m128i *)(&mc_running_avg_y[0])); -+ __m128i v_running_avg_y; -+ const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); -+ const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); -+ /* Obtain the sign. FF if diff is negative. */ -+ const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); -+ /* Clamp absolute difference to 16 to be used to get mask. Doing this -+ * allows us to use _mm_cmpgt_epi8, which operates on signed byte. */ -+ const __m128i clamped_absdiff = _mm_min_epu8( -+ _mm_or_si128(pdiff, ndiff), k_16); -+ /* Get masks for l2 l1 and l0 adjustments */ -+ const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff); -+ const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff); -+ const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff); -+ /* Get adjustments for l2, l1, and l0 */ -+ __m128i adj2 = _mm_and_si128(mask2, l32); -+ const __m128i adj1 = _mm_and_si128(mask1, l21); -+ const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff); -+ __m128i adj, padj, nadj; -+ -+ /* Combine the adjustments and get absolute adjustments. */ -+ adj2 = _mm_add_epi8(adj2, adj1); -+ adj = _mm_sub_epi8(l3, adj2); -+ adj = _mm_andnot_si128(mask0, adj); -+ adj = _mm_or_si128(adj, adj0); -+ -+ /* Restore the sign and get positive and negative adjustments. */ -+ padj = _mm_andnot_si128(diff_sign, adj); -+ nadj = _mm_and_si128(diff_sign, adj); -+ -+ /* Calculate filtered value. */ -+ v_running_avg_y = _mm_adds_epu8(v_sig, padj); -+ v_running_avg_y = _mm_subs_epu8(v_running_avg_y, nadj); -+ _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y); -+ -+ /* Adjustments <=7, and each element in acc_diff can fit in signed -+ * char. -+ */ -+ acc_diff = _mm_adds_epi8(acc_diff, padj); -+ acc_diff = _mm_subs_epi8(acc_diff, nadj); -+ -+ /* Update pointers for next iteration. */ -+ sig += sig_stride; -+ mc_running_avg_y += mc_avg_y_stride; -+ running_avg_y += avg_y_stride; -+ } -+ -+ { -+ /* Compute the sum of all pixel differences of this MB. */ -+ union sum_union s; -+ int sum_diff = 0; -+ s.v = acc_diff; -+ sum_diff = s.e[0] + s.e[1] + s.e[2] + s.e[3] + s.e[4] + s.e[5] -+ + s.e[6] + s.e[7] + s.e[8] + s.e[9] + s.e[10] + s.e[11] -+ + s.e[12] + s.e[13] + s.e[14] + s.e[15]; -+ -+ if (abs(sum_diff) > SUM_DIFF_THRESHOLD) -+ { -+ return COPY_BLOCK; -+ } -+ } -+ -+ vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride, -+ signal->thismb, sig_stride); -+ return FILTER_BLOCK; -+} -diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm -index 7ec7d60..fe26b18 100644 ---- a/vp8/encoder/x86/encodeopt.asm -+++ b/vp8/encoder/x86/encodeopt.asm -@@ -12,7 +12,7 @@ - %include "vpx_ports/x86_abi_support.asm" - - ;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr) --global sym(vp8_block_error_xmm) -+global sym(vp8_block_error_xmm) PRIVATE - sym(vp8_block_error_xmm): - push rbp - mov rbp, rsp -@@ -60,7 +60,7 @@ sym(vp8_block_error_xmm): - ret - - ;int vp8_block_error_mmx(short *coeff_ptr, short *dcoef_ptr) --global sym(vp8_block_error_mmx) -+global sym(vp8_block_error_mmx) PRIVATE - sym(vp8_block_error_mmx): - push rbp - mov rbp, rsp -@@ -126,7 +126,7 @@ sym(vp8_block_error_mmx): - - - ;int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); --global sym(vp8_mbblock_error_mmx_impl) -+global sym(vp8_mbblock_error_mmx_impl) PRIVATE - sym(vp8_mbblock_error_mmx_impl): - push rbp - mov rbp, rsp -@@ -203,7 +203,7 @@ sym(vp8_mbblock_error_mmx_impl): - - - ;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); --global sym(vp8_mbblock_error_xmm_impl) -+global sym(vp8_mbblock_error_xmm_impl) PRIVATE - sym(vp8_mbblock_error_xmm_impl): - push rbp - mov rbp, rsp -@@ -273,7 +273,7 @@ sym(vp8_mbblock_error_xmm_impl): - - - ;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); --global sym(vp8_mbuverror_mmx_impl) -+global sym(vp8_mbuverror_mmx_impl) PRIVATE - sym(vp8_mbuverror_mmx_impl): - push rbp - mov rbp, rsp -@@ -330,7 +330,7 @@ sym(vp8_mbuverror_mmx_impl): - - - ;int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); --global sym(vp8_mbuverror_xmm_impl) -+global sym(vp8_mbuverror_xmm_impl) PRIVATE - sym(vp8_mbuverror_xmm_impl): - push rbp - mov rbp, rsp -diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm -index 71efd56..f498927 100644 ---- a/vp8/encoder/x86/fwalsh_sse2.asm -+++ b/vp8/encoder/x86/fwalsh_sse2.asm -@@ -12,7 +12,7 @@ - %include "vpx_ports/x86_abi_support.asm" - - ;void vp8_short_walsh4x4_sse2(short *input, short *output, int pitch) --global sym(vp8_short_walsh4x4_sse2) -+global sym(vp8_short_walsh4x4_sse2) PRIVATE - sym(vp8_short_walsh4x4_sse2): - push rbp - mov rbp, rsp -diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm -index f29a54e..2864ce1 100644 ---- a/vp8/encoder/x86/quantize_mmx.asm -+++ b/vp8/encoder/x86/quantize_mmx.asm -@@ -15,7 +15,7 @@ - ; short *qcoeff_ptr,short *dequant_ptr, - ; short *scan_mask, short *round_ptr, - ; short *quant_ptr, short *dqcoeff_ptr); --global sym(vp8_fast_quantize_b_impl_mmx) -+global sym(vp8_fast_quantize_b_impl_mmx) PRIVATE - sym(vp8_fast_quantize_b_impl_mmx): - push rbp - mov rbp, rsp -diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm -index 7c249ff..724e54c 100644 ---- a/vp8/encoder/x86/quantize_sse2.asm -+++ b/vp8/encoder/x86/quantize_sse2.asm -@@ -16,7 +16,7 @@ - ; (BLOCK *b, | 0 - ; BLOCKD *d) | 1 - --global sym(vp8_regular_quantize_b_sse2) -+global sym(vp8_regular_quantize_b_sse2) PRIVATE - sym(vp8_regular_quantize_b_sse2): - push rbp - mov rbp, rsp -@@ -240,7 +240,7 @@ ZIGZAG_LOOP 15 - ; (BLOCK *b, | 0 - ; BLOCKD *d) | 1 - --global sym(vp8_fast_quantize_b_sse2) -+global sym(vp8_fast_quantize_b_sse2) PRIVATE - sym(vp8_fast_quantize_b_sse2): - push rbp - mov rbp, rsp -diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm -index 70eac0c..f0e5d40 100644 ---- a/vp8/encoder/x86/quantize_sse4.asm -+++ b/vp8/encoder/x86/quantize_sse4.asm -@@ -16,7 +16,7 @@ - ; (BLOCK *b, | 0 - ; BLOCKD *d) | 1 - --global sym(vp8_regular_quantize_b_sse4) -+global sym(vp8_regular_quantize_b_sse4) PRIVATE - sym(vp8_regular_quantize_b_sse4): - - %if ABI_IS_32BIT -diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm -index e698e90..dd526f4 100644 ---- a/vp8/encoder/x86/quantize_ssse3.asm -+++ b/vp8/encoder/x86/quantize_ssse3.asm -@@ -17,7 +17,7 @@ - ; BLOCKD *d) | 1 - ; - --global sym(vp8_fast_quantize_b_ssse3) -+global sym(vp8_fast_quantize_b_ssse3) PRIVATE - sym(vp8_fast_quantize_b_ssse3): - push rbp - mov rbp, rsp -diff --git a/vp8/encoder/x86/ssim_opt.asm b/vp8/encoder/x86/ssim_opt.asm -index c6db3d1..5964a85 100644 ---- a/vp8/encoder/x86/ssim_opt.asm -+++ b/vp8/encoder/x86/ssim_opt.asm -@@ -61,7 +61,7 @@ - ; or pavgb At this point this is just meant to be first pass for calculating - ; all the parms needed for 16x16 ssim so we can play with dssim as distortion - ; in mode selection code. --global sym(vp8_ssim_parms_16x16_sse2) -+global sym(vp8_ssim_parms_16x16_sse2) PRIVATE - sym(vp8_ssim_parms_16x16_sse2): - push rbp - mov rbp, rsp -@@ -151,7 +151,7 @@ sym(vp8_ssim_parms_16x16_sse2): - ; or pavgb At this point this is just meant to be first pass for calculating - ; all the parms needed for 16x16 ssim so we can play with dssim as distortion - ; in mode selection code. --global sym(vp8_ssim_parms_8x8_sse2) -+global sym(vp8_ssim_parms_8x8_sse2) PRIVATE - sym(vp8_ssim_parms_8x8_sse2): - push rbp - mov rbp, rsp -diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm -index 75e8aa3..794dd22 100644 ---- a/vp8/encoder/x86/subtract_mmx.asm -+++ b/vp8/encoder/x86/subtract_mmx.asm -@@ -14,7 +14,7 @@ - ;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride, - ; short *diff, unsigned char *Predictor, - ; int pitch); --global sym(vp8_subtract_b_mmx_impl) -+global sym(vp8_subtract_b_mmx_impl) PRIVATE - sym(vp8_subtract_b_mmx_impl): - push rbp - mov rbp, rsp -@@ -75,7 +75,7 @@ sym(vp8_subtract_b_mmx_impl): - - ;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, - ;unsigned char *pred, int pred_stride) --global sym(vp8_subtract_mby_mmx) -+global sym(vp8_subtract_mby_mmx) PRIVATE - sym(vp8_subtract_mby_mmx): - push rbp - mov rbp, rsp -@@ -150,7 +150,7 @@ sym(vp8_subtract_mby_mmx): - ; int src_stride, unsigned char *upred, - ; unsigned char *vpred, int pred_stride) - --global sym(vp8_subtract_mbuv_mmx) -+global sym(vp8_subtract_mbuv_mmx) PRIVATE - sym(vp8_subtract_mbuv_mmx): - push rbp - mov rbp, rsp -diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm -index 008e9c7..a5d17f5 100644 ---- a/vp8/encoder/x86/subtract_sse2.asm -+++ b/vp8/encoder/x86/subtract_sse2.asm -@@ -14,7 +14,7 @@ - ;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride, - ; short *diff, unsigned char *Predictor, - ; int pitch); --global sym(vp8_subtract_b_sse2_impl) -+global sym(vp8_subtract_b_sse2_impl) PRIVATE - sym(vp8_subtract_b_sse2_impl): - push rbp - mov rbp, rsp -@@ -73,7 +73,7 @@ sym(vp8_subtract_b_sse2_impl): - - ;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, - ;unsigned char *pred, int pred_stride) --global sym(vp8_subtract_mby_sse2) -+global sym(vp8_subtract_mby_sse2) PRIVATE - sym(vp8_subtract_mby_sse2): - push rbp - mov rbp, rsp -@@ -146,7 +146,7 @@ sym(vp8_subtract_mby_sse2): - ;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, - ; int src_stride, unsigned char *upred, - ; unsigned char *vpred, int pred_stride) --global sym(vp8_subtract_mbuv_sse2) -+global sym(vp8_subtract_mbuv_sse2) PRIVATE - sym(vp8_subtract_mbuv_sse2): - push rbp - mov rbp, rsp -diff --git a/vp8/encoder/x86/temporal_filter_apply_sse2.asm b/vp8/encoder/x86/temporal_filter_apply_sse2.asm -index b97c694..ce9d983 100644 ---- a/vp8/encoder/x86/temporal_filter_apply_sse2.asm -+++ b/vp8/encoder/x86/temporal_filter_apply_sse2.asm -@@ -20,7 +20,7 @@ - ; int filter_weight, | 5 - ; unsigned int *accumulator, | 6 - ; unsigned short *count) | 7 --global sym(vp8_temporal_filter_apply_sse2) -+global sym(vp8_temporal_filter_apply_sse2) PRIVATE - sym(vp8_temporal_filter_apply_sse2): - - push rbp -diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk -index 3a7b146..a328f46 100644 ---- a/vp8/vp8_common.mk -+++ b/vp8/vp8_common.mk -@@ -30,7 +30,6 @@ VP8_COMMON_SRCS-yes += common/findnearmv.c - VP8_COMMON_SRCS-yes += common/generic/systemdependent.c - VP8_COMMON_SRCS-yes += common/idct_blk.c - VP8_COMMON_SRCS-yes += common/idctllm.c --VP8_COMMON_SRCS-yes += common/idctllm_test.cc - VP8_COMMON_SRCS-yes += common/alloccommon.h - VP8_COMMON_SRCS-yes += common/blockd.h - VP8_COMMON_SRCS-yes += common/common.h -@@ -85,7 +84,6 @@ VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c - VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/dequantize_mmx.asm - VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idct_blk_mmx.c - VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm --VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx_test.cc - VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm - VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm - VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm -@@ -122,6 +120,14 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_block_sse2.asm - endif - - # common (c) -+VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idctllm_dspr2.c -+VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/filter_dspr2.c -+VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/loopfilter_filters_dspr2.c -+VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/reconinter_dspr2.c -+VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idct_blk_dspr2.c -+VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/dequantize_dspr2.c -+ -+# common (c) - VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/filter_arm.c - VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.c - VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/reconintra_arm.c -diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c -index 5fb74c4..eeac3a8 100644 ---- a/vp8/vp8_cx_iface.c -+++ b/vp8/vp8_cx_iface.c -@@ -9,6 +9,7 @@ - */ - - -+#include "vpx_rtcd.h" - #include "vpx/vpx_codec.h" - #include "vpx/internal/vpx_codec_internal.h" - #include "vpx_version.h" -@@ -22,7 +23,6 @@ - struct vp8_extracfg - { - struct vpx_codec_pkt_list *pkt_list; -- vp8e_encoding_mode encoding_mode; /** best, good, realtime */ - int cpu_used; /** available cpu percentage in 1/16*/ - unsigned int enable_auto_alt_ref; /** if encoder decides to uses alternate reference frame */ - unsigned int noise_sensitivity; -@@ -51,10 +51,8 @@ static const struct extraconfig_map extracfg_map[] = - { - NULL, - #if !(CONFIG_REALTIME_ONLY) -- VP8_BEST_QUALITY_ENCODING, /* Encoding Mode */ - 0, /* cpu_used */ - #else -- VP8_REAL_TIME_ENCODING, /* Encoding Mode */ - 4, /* cpu_used */ - #endif - 0, /* enable_auto_alt_ref */ -@@ -88,7 +86,8 @@ struct vpx_codec_alg_priv - vpx_image_t preview_img; - unsigned int next_frame_flag; - vp8_postproc_cfg_t preview_ppcfg; -- vpx_codec_pkt_list_decl(64) pkt_list; // changed to accomendate the maximum number of lagged frames allowed -+ /* pkt_list size depends on the maximum number of lagged frames allowed. */ -+ vpx_codec_pkt_list_decl(64) pkt_list; - unsigned int fixed_kf_cntr; - }; - -@@ -146,25 +145,39 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, - RANGE_CHECK_HI(cfg, rc_max_quantizer, 63); - RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer); - RANGE_CHECK_HI(cfg, g_threads, 64); --#if !(CONFIG_REALTIME_ONLY) -- RANGE_CHECK_HI(cfg, g_lag_in_frames, 25); --#else -+#if CONFIG_REALTIME_ONLY - RANGE_CHECK_HI(cfg, g_lag_in_frames, 0); -+#elif CONFIG_MULTI_RES_ENCODING -+ if (ctx->base.enc.total_encoders > 1) -+ RANGE_CHECK_HI(cfg, g_lag_in_frames, 0); -+#else -+ RANGE_CHECK_HI(cfg, g_lag_in_frames, 25); - #endif - RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ); - RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000); - RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000); - RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100); - RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO); -- //RANGE_CHECK_BOOL(cfg, g_delete_firstpassfile); -- RANGE_CHECK_BOOL(cfg, rc_resize_allowed); -+ -+/* TODO: add spatial re-sampling support and frame dropping in -+ * multi-res-encoder.*/ -+#if CONFIG_MULTI_RES_ENCODING -+ if (ctx->base.enc.total_encoders > 1) -+ RANGE_CHECK_HI(cfg, rc_resize_allowed, 0); -+#else -+ RANGE_CHECK_BOOL(cfg, rc_resize_allowed); -+#endif - RANGE_CHECK_HI(cfg, rc_dropframe_thresh, 100); - RANGE_CHECK_HI(cfg, rc_resize_up_thresh, 100); - RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100); --#if !(CONFIG_REALTIME_ONLY) -- RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS); --#else -+ -+#if CONFIG_REALTIME_ONLY - RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_ONE_PASS); -+#elif CONFIG_MULTI_RES_ENCODING -+ if (ctx->base.enc.total_encoders > 1) -+ RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_ONE_PASS); -+#else -+ RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS); - #endif - - /* VP8 does not support a lower bound on the keyframe interval in -@@ -177,11 +190,6 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, - - RANGE_CHECK_BOOL(vp8_cfg, enable_auto_alt_ref); - RANGE_CHECK(vp8_cfg, cpu_used, -16, 16); --#if !(CONFIG_REALTIME_ONLY) -- RANGE_CHECK(vp8_cfg, encoding_mode, VP8_BEST_QUALITY_ENCODING, VP8_REAL_TIME_ENCODING); --#else -- RANGE_CHECK(vp8_cfg, encoding_mode, VP8_REAL_TIME_ENCODING, VP8_REAL_TIME_ENCODING); --#endif - - #if CONFIG_REALTIME_ONLY && !CONFIG_TEMPORAL_DENOISING - RANGE_CHECK(vp8_cfg, noise_sensitivity, 0, 0); -@@ -189,7 +197,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, - RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6); - #endif - -- RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION); -+ RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, -+ VP8_EIGHT_TOKENPARTITION); - RANGE_CHECK_HI(vp8_cfg, Sharpness, 7); - RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15); - RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6); -@@ -203,7 +212,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, - if (cfg->g_pass == VPX_RC_LAST_PASS) - { - size_t packet_sz = sizeof(FIRSTPASS_STATS); -- int n_packets = cfg->rc_twopass_stats_in.sz / packet_sz; -+ int n_packets = (int)(cfg->rc_twopass_stats_in.sz / -+ packet_sz); - FIRSTPASS_STATS *stats; - - if (!cfg->rc_twopass_stats_in.buf) -@@ -227,7 +237,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, - - if (cfg->ts_number_layers > 1) - { -- int i; -+ unsigned int i; - RANGE_CHECK_HI(cfg, ts_periodicity, 16); - - for (i=1; its_number_layers; i++) -@@ -299,7 +309,7 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, - break; - } - -- if (cfg.g_pass == VPX_RC_FIRST_PASS) -+ if (cfg.g_pass == VPX_RC_FIRST_PASS || cfg.g_pass == VPX_RC_ONE_PASS) - { - oxcf->allow_lag = 0; - oxcf->lag_in_frames = 0; -@@ -355,7 +365,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, - - oxcf->auto_key = cfg.kf_mode == VPX_KF_AUTO - && cfg.kf_min_dist != cfg.kf_max_dist; -- //oxcf->kf_min_dist = cfg.kf_min_dis; - oxcf->key_freq = cfg.kf_max_dist; - - oxcf->number_of_layers = cfg.ts_number_layers; -@@ -385,9 +394,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, - } - #endif - -- //oxcf->delete_first_pass_file = cfg.g_delete_firstpassfile; -- //strcpy(oxcf->first_pass_file, cfg.g_firstpass_file); -- - oxcf->cpu_used = vp8_cfg.cpu_used; - oxcf->encode_breakout = vp8_cfg.static_thresh; - oxcf->play_alternate = vp8_cfg.enable_auto_alt_ref; -@@ -447,7 +453,7 @@ static vpx_codec_err_t vp8e_set_config(vpx_codec_alg_priv_t *ctx, - vpx_codec_err_t res; - - if (((cfg->g_w != ctx->cfg.g_w) || (cfg->g_h != ctx->cfg.g_h)) -- && cfg->g_lag_in_frames > 1) -+ && (cfg->g_lag_in_frames > 1 || cfg->g_pass != VPX_RC_ONE_PASS)) - ERROR("Cannot change width or height after initialization"); - - /* Prevent increasing lag_in_frames. This check is stricter than it needs -@@ -542,19 +548,27 @@ static vpx_codec_err_t vp8e_mr_alloc_mem(const vpx_codec_enc_cfg_t *cfg, - vpx_codec_err_t res = 0; - - #if CONFIG_MULTI_RES_ENCODING -+ LOWER_RES_FRAME_INFO *shared_mem_loc; - int mb_rows = ((cfg->g_w + 15) >>4); - int mb_cols = ((cfg->g_h + 15) >>4); - -- *mem_loc = calloc(mb_rows*mb_cols, sizeof(LOWER_RES_INFO)); -- if(!(*mem_loc)) -+ shared_mem_loc = calloc(1, sizeof(LOWER_RES_FRAME_INFO)); -+ if(!shared_mem_loc) -+ { -+ res = VPX_CODEC_MEM_ERROR; -+ } -+ -+ shared_mem_loc->mb_info = calloc(mb_rows*mb_cols, sizeof(LOWER_RES_MB_INFO)); -+ if(!(shared_mem_loc->mb_info)) - { -- free(*mem_loc); - res = VPX_CODEC_MEM_ERROR; - } - else -+ { -+ *mem_loc = (void *)shared_mem_loc; - res = VPX_CODEC_OK; -+ } - #endif -- - return res; - } - -@@ -568,6 +582,8 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, - - struct VP8_COMP *optr; - -+ vpx_rtcd(); -+ - if (!ctx->priv) - { - priv = calloc(1, sizeof(struct vpx_codec_alg_priv)); -@@ -616,15 +632,15 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, - return VPX_CODEC_MEM_ERROR; - } - -+ if(mr_cfg) -+ ctx->priv->enc.total_encoders = mr_cfg->mr_total_resolutions; -+ else -+ ctx->priv->enc.total_encoders = 1; -+ - res = validate_config(priv, &priv->cfg, &priv->vp8_cfg, 0); - - if (!res) - { -- if(mr_cfg) -- ctx->priv->enc.total_encoders = mr_cfg->mr_total_resolutions; -- else -- ctx->priv->enc.total_encoders = 1; -- - set_vp8e_config(&ctx->priv->alg_priv->oxcf, - ctx->priv->alg_priv->cfg, - ctx->priv->alg_priv->vp8_cfg, -@@ -647,7 +663,11 @@ static vpx_codec_err_t vp8e_destroy(vpx_codec_alg_priv_t *ctx) - #if CONFIG_MULTI_RES_ENCODING - /* Free multi-encoder shared memory */ - if (ctx->oxcf.mr_total_resolutions > 0 && (ctx->oxcf.mr_encoder_id == ctx->oxcf.mr_total_resolutions-1)) -+ { -+ LOWER_RES_FRAME_INFO *shared_mem_loc = (LOWER_RES_FRAME_INFO *)ctx->oxcf.mr_low_res_mode_info; -+ free(shared_mem_loc->mb_info); - free(ctx->oxcf.mr_low_res_mode_info); -+ } - #endif - - free(ctx->cx_data); -@@ -673,7 +693,7 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, - yv12->uv_stride = img->stride[VPX_PLANE_U]; - - yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; -- yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12); //REG_YUV = 0 -+ yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12); - return res; - } - -@@ -733,6 +753,9 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, - if (!ctx->cfg.rc_target_bitrate) - return res; - -+ if (!ctx->cfg.rc_target_bitrate) -+ return res; -+ - if (img) - res = validate_img(ctx, img); - -@@ -756,13 +779,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, - int ref = 7; - - if (flags & VP8_EFLAG_NO_REF_LAST) -- ref ^= VP8_LAST_FLAG; -+ ref ^= VP8_LAST_FRAME; - - if (flags & VP8_EFLAG_NO_REF_GF) -- ref ^= VP8_GOLD_FLAG; -+ ref ^= VP8_GOLD_FRAME; - - if (flags & VP8_EFLAG_NO_REF_ARF) -- ref ^= VP8_ALT_FLAG; -+ ref ^= VP8_ALTR_FRAME; - - vp8_use_as_reference(ctx->cpi, ref); - } -@@ -774,13 +797,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, - int upd = 7; - - if (flags & VP8_EFLAG_NO_UPD_LAST) -- upd ^= VP8_LAST_FLAG; -+ upd ^= VP8_LAST_FRAME; - - if (flags & VP8_EFLAG_NO_UPD_GF) -- upd ^= VP8_GOLD_FLAG; -+ upd ^= VP8_GOLD_FRAME; - - if (flags & VP8_EFLAG_NO_UPD_ARF) -- upd ^= VP8_ALT_FLAG; -+ upd ^= VP8_ALTR_FRAME; - - vp8_update_reference(ctx->cpi, upd); - } -@@ -869,15 +892,16 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, - VP8_COMP *cpi = (VP8_COMP *)ctx->cpi; - - /* Add the frame packet to the list of returned packets. */ -- round = 1000000 * ctx->cfg.g_timebase.num / 2 - 1; -+ round = (vpx_codec_pts_t)1000000 -+ * ctx->cfg.g_timebase.num / 2 - 1; - delta = (dst_end_time_stamp - dst_time_stamp); - pkt.kind = VPX_CODEC_CX_FRAME_PKT; - pkt.data.frame.pts = - (dst_time_stamp * ctx->cfg.g_timebase.den + round) - / ctx->cfg.g_timebase.num / 10000000; -- pkt.data.frame.duration = -- (delta * ctx->cfg.g_timebase.den + round) -- / ctx->cfg.g_timebase.num / 10000000; -+ pkt.data.frame.duration = (unsigned long) -+ ((delta * ctx->cfg.g_timebase.den + round) -+ / ctx->cfg.g_timebase.num / 10000000); - pkt.data.frame.flags = lib_flags << 16; - - if (lib_flags & FRAMEFLAGS_KEY) -@@ -887,10 +911,11 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, - { - pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE; - -- // This timestamp should be as close as possible to the -- // prior PTS so that if a decoder uses pts to schedule when -- // to do this, we start right after last frame was decoded. -- // Invisible frames have no duration. -+ /* This timestamp should be as close as possible to the -+ * prior PTS so that if a decoder uses pts to schedule when -+ * to do this, we start right after last frame was decoded. -+ * Invisible frames have no duration. -+ */ - pkt.data.frame.pts = ((cpi->last_time_stamp_seen - * ctx->cfg.g_timebase.den + round) - / ctx->cfg.g_timebase.num / 10000000) + 1; -@@ -942,8 +967,6 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, - cx_data += size; - cx_data_sz -= size; - } -- -- //printf("timestamp: %lld, duration: %d\n", pkt->data.frame.pts, pkt->data.frame.duration); - } - } - } -diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c -index 37773db..c13d697 100644 ---- a/vp8/vp8_dx_iface.c -+++ b/vp8/vp8_dx_iface.c -@@ -11,12 +11,19 @@ - - #include - #include -+#include "vpx_rtcd.h" - #include "vpx/vpx_decoder.h" - #include "vpx/vp8dx.h" - #include "vpx/internal/vpx_codec_internal.h" - #include "vpx_version.h" - #include "common/onyxd.h" - #include "decoder/onyxd_int.h" -+#include "common/alloccommon.h" -+#include "vpx_mem/vpx_mem.h" -+#if CONFIG_ERROR_CONCEALMENT -+#include "decoder/error_concealment.h" -+#endif -+#include "decoder/decoderthreading.h" - - #define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) - #define VP8_CAP_ERROR_CONCEALMENT (CONFIG_ERROR_CONCEALMENT ? \ -@@ -69,7 +76,7 @@ struct vpx_codec_alg_priv - #endif - vpx_image_t img; - int img_setup; -- int img_avail; -+ void *user_priv; - }; - - static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t flags) -@@ -187,6 +194,8 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, - vpx_codec_err_t res = VPX_CODEC_OK; - (void) data; - -+ vpx_rtcd(); -+ - /* This function only allocates space for the vpx_codec_alg_priv_t - * structure. More memory may be required at the time the stream - * information becomes known. -@@ -341,16 +350,30 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, - long deadline) - { - vpx_codec_err_t res = VPX_CODEC_OK; -- -- ctx->img_avail = 0; -+ unsigned int resolution_change = 0; -+ unsigned int w, h; - - /* Determine the stream parameters. Note that we rely on peek_si to - * validate that we have a buffer that does not wrap around the top - * of the heap. - */ -- if (!ctx->si.h) -- res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si); -+ w = ctx->si.w; -+ h = ctx->si.h; -+ -+ res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si); -+ -+ if((res == VPX_CODEC_UNSUP_BITSTREAM) && !ctx->si.is_kf) -+ { -+ /* the peek function returns an error for non keyframes, however for -+ * this case, it is not an error */ -+ res = VPX_CODEC_OK; -+ } -+ -+ if(!ctx->decoder_init && !ctx->si.is_kf) -+ res = VPX_CODEC_UNSUP_BITSTREAM; - -+ if ((ctx->si.h != h) || (ctx->si.w != w)) -+ resolution_change = 1; - - /* Perform deferred allocations, if required */ - if (!res && ctx->defer_alloc) -@@ -426,6 +449,122 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, - - if (!res && ctx->pbi) - { -+ if(resolution_change) -+ { -+ VP8D_COMP *pbi = ctx->pbi; -+ VP8_COMMON *const pc = & pbi->common; -+ MACROBLOCKD *const xd = & pbi->mb; -+#if CONFIG_MULTITHREAD -+ int i; -+#endif -+ pc->Width = ctx->si.w; -+ pc->Height = ctx->si.h; -+ { -+ int prev_mb_rows = pc->mb_rows; -+ -+ if (setjmp(pbi->common.error.jmp)) -+ { -+ pbi->common.error.setjmp = 0; -+ /* same return value as used in vp8dx_receive_compressed_data */ -+ return -1; -+ } -+ -+ pbi->common.error.setjmp = 1; -+ -+ if (pc->Width <= 0) -+ { -+ pc->Width = w; -+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, -+ "Invalid frame width"); -+ } -+ -+ if (pc->Height <= 0) -+ { -+ pc->Height = h; -+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, -+ "Invalid frame height"); -+ } -+ -+ if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height)) -+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, -+ "Failed to allocate frame buffers"); -+ -+ xd->pre = pc->yv12_fb[pc->lst_fb_idx]; -+ xd->dst = pc->yv12_fb[pc->new_fb_idx]; -+ -+#if CONFIG_MULTITHREAD -+ for (i = 0; i < pbi->allocated_decoding_thread_count; i++) -+ { -+ pbi->mb_row_di[i].mbd.dst = pc->yv12_fb[pc->new_fb_idx]; -+ vp8_build_block_doffsets(&pbi->mb_row_di[i].mbd); -+ } -+#endif -+ vp8_build_block_doffsets(&pbi->mb); -+ -+ /* allocate memory for last frame MODE_INFO array */ -+#if CONFIG_ERROR_CONCEALMENT -+ -+ if (pbi->ec_enabled) -+ { -+ /* old prev_mip was released by vp8_de_alloc_frame_buffers() -+ * called in vp8_alloc_frame_buffers() */ -+ pc->prev_mip = vpx_calloc( -+ (pc->mb_cols + 1) * (pc->mb_rows + 1), -+ sizeof(MODE_INFO)); -+ -+ if (!pc->prev_mip) -+ { -+ vp8_de_alloc_frame_buffers(pc); -+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, -+ "Failed to allocate" -+ "last frame MODE_INFO array"); -+ } -+ -+ pc->prev_mi = pc->prev_mip + pc->mode_info_stride + 1; -+ -+ if (vp8_alloc_overlap_lists(pbi)) -+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, -+ "Failed to allocate overlap lists " -+ "for error concealment"); -+ } -+ -+#endif -+ -+#if CONFIG_MULTITHREAD -+ if (pbi->b_multithreaded_rd) -+ vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows); -+#else -+ (void)prev_mb_rows; -+#endif -+ } -+ -+ pbi->common.error.setjmp = 0; -+ -+ /* required to get past the first get_free_fb() call */ -+ ctx->pbi->common.fb_idx_ref_cnt[0] = 0; -+ } -+ -+ ctx->user_priv = user_priv; -+ if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) -+ { -+ VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi; -+ res = update_error_state(ctx, &pbi->common.error); -+ } -+ } -+ -+ return res; -+} -+ -+static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx, -+ vpx_codec_iter_t *iter) -+{ -+ vpx_image_t *img = NULL; -+ -+ /* iter acts as a flip flop, so an image is only returned on the first -+ * call to get_frame. -+ */ -+ if (!(*iter)) -+ { - YV12_BUFFER_CONFIG sd; - int64_t time_stamp = 0, time_end_stamp = 0; - vp8_ppflags_t flags = {0}; -@@ -451,34 +590,10 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, - #endif - } - -- if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) -- { -- VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi; -- res = update_error_state(ctx, &pbi->common.error); -- } -- -- if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags)) -+ if (0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags)) - { -- yuvconfig2image(&ctx->img, &sd, user_priv); -- ctx->img_avail = 1; -- } -- } -+ yuvconfig2image(&ctx->img, &sd, ctx->user_priv); - -- return res; --} -- --static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx, -- vpx_codec_iter_t *iter) --{ -- vpx_image_t *img = NULL; -- -- if (ctx->img_avail) -- { -- /* iter acts as a flip flop, so an image is only returned on the first -- * call to get_frame. -- */ -- if (!(*iter)) -- { - img = &ctx->img; - *iter = img; - } -diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk -index 019edbd..0ae2f10 100644 ---- a/vp8/vp8cx.mk -+++ b/vp8/vp8cx.mk -@@ -22,16 +22,9 @@ ifeq ($(ARCH_ARM),yes) - include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx_arm.mk - endif - --VP8_CX_SRCS-yes += vp8_cx_iface.c -+VP8_CX_SRCS-yes += vp8cx.mk - --# encoder --#INCLUDES += algo/vpx_common/vpx_mem/include --#INCLUDES += common --#INCLUDES += common --#INCLUDES += common --#INCLUDES += algo/vpx_ref/cpu_id/include --#INCLUDES += common --#INCLUDES += encoder -+VP8_CX_SRCS-yes += vp8_cx_iface.c - - VP8_CX_SRCS-yes += encoder/asm_enc_offsets.c - VP8_CX_SRCS-yes += encoder/defaultcoefcounts.h -@@ -99,6 +92,14 @@ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c - VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm - VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm - VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm -+ -+ifeq ($(CONFIG_TEMPORAL_DENOISING),yes) -+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c -+ifeq ($(HAVE_SSE2),yes) -+vp8/encoder/x86/denoising_sse2.c.o: CFLAGS += -msse2 -+endif -+endif -+ - VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm - VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm - VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c -diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk -index b16615d..b030ee5 100644 ---- a/vp8/vp8cx_arm.mk -+++ b/vp8/vp8cx_arm.mk -@@ -9,7 +9,7 @@ - ## - - --#VP8_CX_SRCS list is modified according to different platforms. -+VP8_CX_SRCS-$(ARCH_ARM) += vp8cx_arm.mk - - #File list for arm - # encoder -diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk -index 2cfd280..dd39190 100644 ---- a/vp8/vp8dx.mk -+++ b/vp8/vp8dx.mk -@@ -18,6 +18,8 @@ VP8_DX_SRCS-no += $(VP8_COMMON_SRCS-no) - VP8_DX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes) - VP8_DX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no) - -+VP8_DX_SRCS-yes += vp8dx.mk -+ - VP8_DX_SRCS-yes += vp8_dx_iface.c - - # common -diff --git a/vp8_multi_resolution_encoder.c b/vp8_multi_resolution_encoder.c -index 78f50c2..eae36a4 100644 ---- a/vp8_multi_resolution_encoder.c -+++ b/vp8_multi_resolution_encoder.c -@@ -164,7 +164,7 @@ static void write_ivf_file_header(FILE *outfile, - mem_put_le32(header+24, frame_cnt); /* length */ - mem_put_le32(header+28, 0); /* unused */ - -- if(fwrite(header, 1, 32, outfile)); -+ (void) fwrite(header, 1, 32, outfile); - } - - static void write_ivf_frame_header(FILE *outfile, -@@ -181,7 +181,7 @@ static void write_ivf_frame_header(FILE *outfile, - mem_put_le32(header+4, pts&0xFFFFFFFF); - mem_put_le32(header+8, pts >> 32); - -- if(fwrite(header, 1, 12, outfile)); -+ (void) fwrite(header, 1, 12, outfile); - } - - int main(int argc, char **argv) -@@ -273,7 +273,7 @@ int main(int argc, char **argv) - cfg[0].g_w = width; - cfg[0].g_h = height; - cfg[0].g_threads = 1; /* number of threads used */ -- cfg[0].rc_dropframe_thresh = 0; -+ cfg[0].rc_dropframe_thresh = 30; - cfg[0].rc_end_usage = VPX_CBR; - cfg[0].rc_resize_allowed = 0; - cfg[0].rc_min_quantizer = 4; -@@ -283,13 +283,17 @@ int main(int argc, char **argv) - cfg[0].rc_buf_initial_sz = 500; - cfg[0].rc_buf_optimal_sz = 600; - cfg[0].rc_buf_sz = 1000; -- //cfg[0].rc_dropframe_thresh = 10; - cfg[0].g_error_resilient = 1; /* Enable error resilient mode */ - cfg[0].g_lag_in_frames = 0; - - /* Disable automatic keyframe placement */ -+ /* Note: These 3 settings are copied to all levels. But, except the lowest -+ * resolution level, all other levels are set to VPX_KF_DISABLED internally. -+ */ - //cfg[0].kf_mode = VPX_KF_DISABLED; -- cfg[0].kf_min_dist = cfg[0].kf_max_dist = 1000; -+ cfg[0].kf_mode = VPX_KF_AUTO; -+ cfg[0].kf_min_dist = 3000; -+ cfg[0].kf_max_dist = 3000; - - cfg[0].rc_target_bitrate = target_bitrate[0]; /* Set target bitrate */ - cfg[0].g_timebase.num = 1; /* Set fps */ -@@ -361,6 +365,12 @@ int main(int argc, char **argv) - if(vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, static_thresh)) - die_codec(&codec[i], "Failed to set static threshold"); - } -+ /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */ -+ for ( i=0; i< NUM_ENCODERS; i++) -+ { -+ if(vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0)) -+ die_codec(&codec[i], "Failed to set noise_sensitivity"); -+ } - - frame_avail = 1; - got_data = 0; -@@ -405,8 +415,8 @@ int main(int argc, char **argv) - switch(pkt[i]->kind) { - case VPX_CODEC_CX_FRAME_PKT: - write_ivf_frame_header(outfile[i], pkt[i]); -- if(fwrite(pkt[i]->data.frame.buf, 1, pkt[i]->data.frame.sz, -- outfile[i])); -+ (void) fwrite(pkt[i]->data.frame.buf, 1, -+ pkt[i]->data.frame.sz, outfile[i]); - break; - case VPX_CODEC_PSNR_PKT: - if (show_psnr) -diff --git a/vp8_scalable_patterns.c b/vp8_scalable_patterns.c -index 4311b1a..06270fe 100644 ---- a/vp8_scalable_patterns.c -+++ b/vp8_scalable_patterns.c -@@ -93,7 +93,7 @@ static void write_ivf_file_header(FILE *outfile, - mem_put_le32(header+24, frame_cnt); /* length */ - mem_put_le32(header+28, 0); /* unused */ - -- if(fwrite(header, 1, 32, outfile)); -+ (void) fwrite(header, 1, 32, outfile); - } - - -@@ -111,10 +111,10 @@ static void write_ivf_frame_header(FILE *outfile, - mem_put_le32(header+4, pts&0xFFFFFFFF); - mem_put_le32(header+8, pts >> 32); - -- if(fwrite(header, 1, 12, outfile)); -+ (void) fwrite(header, 1, 12, outfile); - } - --static int mode_to_num_layers[9] = {2, 2, 3, 3, 3, 3, 5, 2, 3}; -+static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3}; - - int main(int argc, char **argv) { - FILE *infile, *outfile[VPX_TS_MAX_LAYERS]; -@@ -129,8 +129,8 @@ int main(int argc, char **argv) { - int got_data; - int flags = 0; - int i; -- int pts = 0; // PTS starts at 0 -- int frame_duration = 1; // 1 timebase tick per frame -+ int pts = 0; /* PTS starts at 0 */ -+ int frame_duration = 1; /* 1 timebase tick per frame */ - - int layering_mode = 0; - int frames_in_layer[VPX_TS_MAX_LAYERS] = {0}; -@@ -138,7 +138,7 @@ int main(int argc, char **argv) { - int flag_periodicity; - int max_intra_size_pct; - -- // Check usage and arguments -+ /* Check usage and arguments */ - if (argc < 9) - die("Usage: %s " - " ... \n", argv[0]); -@@ -150,43 +150,43 @@ int main(int argc, char **argv) { - - if (!sscanf(argv[7], "%d", &layering_mode)) - die ("Invalid mode %s", argv[7]); -- if (layering_mode<0 || layering_mode>8) -- die ("Invalid mode (0..8) %s", argv[7]); -+ if (layering_mode<0 || layering_mode>11) -+ die ("Invalid mode (0..11) %s", argv[7]); - - if (argc != 8+mode_to_num_layers[layering_mode]) - die ("Invalid number of arguments"); - -- if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1)) -+ if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 32)) - die ("Failed to allocate image", width, height); - - printf("Using %s\n",vpx_codec_iface_name(interface)); - -- // Populate encoder configuration -+ /* Populate encoder configuration */ - res = vpx_codec_enc_config_default(interface, &cfg, 0); - if(res) { - printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); - return EXIT_FAILURE; - } - -- // Update the default configuration with our settings -+ /* Update the default configuration with our settings */ - cfg.g_w = width; - cfg.g_h = height; - -- // Timebase format e.g. 30fps: numerator=1, demoninator=30 -+ /* Timebase format e.g. 30fps: numerator=1, demoninator=30 */ - if (!sscanf (argv[5], "%d", &cfg.g_timebase.num )) - die ("Invalid timebase numerator %s", argv[5]); - if (!sscanf (argv[6], "%d", &cfg.g_timebase.den )) - die ("Invalid timebase denominator %s", argv[6]); - - for (i=8; i<8+mode_to_num_layers[layering_mode]; i++) -- if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-8])) -+ if (!sscanf(argv[i], "%ud", &cfg.ts_target_bitrate[i-8])) - die ("Invalid data rate %s", argv[i]); - -- // Real time parameters -- cfg.rc_dropframe_thresh = 0; // 30 -+ /* Real time parameters */ -+ cfg.rc_dropframe_thresh = 0; - cfg.rc_end_usage = VPX_CBR; - cfg.rc_resize_allowed = 0; -- cfg.rc_min_quantizer = 8; -+ cfg.rc_min_quantizer = 2; - cfg.rc_max_quantizer = 56; - cfg.rc_undershoot_pct = 100; - cfg.rc_overshoot_pct = 15; -@@ -194,25 +194,44 @@ int main(int argc, char **argv) { - cfg.rc_buf_optimal_sz = 600; - cfg.rc_buf_sz = 1000; - -- // Enable error resilient mode -+ /* Enable error resilient mode */ - cfg.g_error_resilient = 1; - cfg.g_lag_in_frames = 0; - cfg.kf_mode = VPX_KF_DISABLED; - -- // Disable automatic keyframe placement -- cfg.kf_min_dist = cfg.kf_max_dist = 1000; -+ /* Disable automatic keyframe placement */ -+ cfg.kf_min_dist = cfg.kf_max_dist = 3000; - -- // Temporal scaling parameters: -- // NOTE: The 3 prediction frames cannot be used interchangeably due to -- // differences in the way they are handled throughout the code. The -- // frames should be allocated to layers in the order LAST, GF, ARF. -- // Other combinations work, but may produce slightly inferior results. -+ /* Default setting for bitrate: used in special case of 1 layer (case 0). */ -+ cfg.rc_target_bitrate = cfg.ts_target_bitrate[0]; -+ -+ /* Temporal scaling parameters: */ -+ /* NOTE: The 3 prediction frames cannot be used interchangeably due to -+ * differences in the way they are handled throughout the code. The -+ * frames should be allocated to layers in the order LAST, GF, ARF. -+ * Other combinations work, but may produce slightly inferior results. -+ */ - switch (layering_mode) - { -- - case 0: - { -- // 2-layers, 2-frame period -+ /* 1-layer */ -+ int ids[1] = {0}; -+ cfg.ts_number_layers = 1; -+ cfg.ts_periodicity = 1; -+ cfg.ts_rate_decimator[0] = 1; -+ memcpy(cfg.ts_layer_id, ids, sizeof(ids)); -+ -+ flag_periodicity = cfg.ts_periodicity; -+ -+ // Update L only. -+ layer_flags[0] = VPX_EFLAG_FORCE_KF | -+ VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; -+ break; -+ } -+ case 1: -+ { -+ /* 2-layers, 2-frame period */ - int ids[2] = {0,1}; - cfg.ts_number_layers = 2; - cfg.ts_periodicity = 2; -@@ -222,14 +241,14 @@ int main(int argc, char **argv) { - - flag_periodicity = cfg.ts_periodicity; - #if 1 -- // 0=L, 1=GF, Intra-layer prediction enabled -+ /* 0=L, 1=GF, Intra-layer prediction enabled */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; - layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | - VP8_EFLAG_NO_REF_ARF; - #else -- // 0=L, 1=GF, Intra-layer prediction disabled -+ /* 0=L, 1=GF, Intra-layer prediction disabled */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; -@@ -239,9 +258,9 @@ int main(int argc, char **argv) { - break; - } - -- case 1: -+ case 2: - { -- // 2-layers, 3-frame period -+ /* 2-layers, 3-frame period */ - int ids[3] = {0,1,1}; - cfg.ts_number_layers = 2; - cfg.ts_periodicity = 3; -@@ -251,7 +270,7 @@ int main(int argc, char **argv) { - - flag_periodicity = cfg.ts_periodicity; - -- // 0=L, 1=GF, Intra-layer prediction enabled -+ /* 0=L, 1=GF, Intra-layer prediction enabled */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; -@@ -262,9 +281,9 @@ int main(int argc, char **argv) { - break; - } - -- case 2: -+ case 3: - { -- // 3-layers, 6-frame period -+ /* 3-layers, 6-frame period */ - int ids[6] = {0,2,2,1,2,2}; - cfg.ts_number_layers = 3; - cfg.ts_periodicity = 6; -@@ -275,7 +294,7 @@ int main(int argc, char **argv) { - - flag_periodicity = cfg.ts_periodicity; - -- // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled -+ /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; -@@ -288,9 +307,9 @@ int main(int argc, char **argv) { - break; - } - -- case 3: -+ case 4: - { -- // 3-layers, 4-frame period -+ /* 3-layers, 4-frame period */ - int ids[4] = {0,2,1,2}; - cfg.ts_number_layers = 3; - cfg.ts_periodicity = 4; -@@ -301,7 +320,7 @@ int main(int argc, char **argv) { - - flag_periodicity = cfg.ts_periodicity; - -- // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled -+ /* 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; -@@ -315,9 +334,9 @@ int main(int argc, char **argv) { - break; - } - -- case 4: -+ case 5: - { -- // 3-layers, 4-frame period -+ /* 3-layers, 4-frame period */ - int ids[4] = {0,2,1,2}; - cfg.ts_number_layers = 3; - cfg.ts_periodicity = 4; -@@ -328,8 +347,9 @@ int main(int argc, char **argv) { - - flag_periodicity = cfg.ts_periodicity; - -- // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, -- // disabled in layer 2 -+ /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, -+ * disabled in layer 2 -+ */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; -@@ -342,9 +362,9 @@ int main(int argc, char **argv) { - break; - } - -- case 5: -+ case 6: - { -- // 3-layers, 4-frame period -+ /* 3-layers, 4-frame period */ - int ids[4] = {0,2,1,2}; - cfg.ts_number_layers = 3; - cfg.ts_periodicity = 4; -@@ -355,7 +375,7 @@ int main(int argc, char **argv) { - - flag_periodicity = cfg.ts_periodicity; - -- // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled -+ /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; -@@ -366,11 +386,11 @@ int main(int argc, char **argv) { - break; - } - -- case 6: -+ case 7: - { -- // NOTE: Probably of academic interest only -+ /* NOTE: Probably of academic interest only */ - -- // 5-layers, 16-frame period -+ /* 5-layers, 16-frame period */ - int ids[16] = {0,4,3,4,2,4,3,4,1,4,3,4,2,4,3,4}; - cfg.ts_number_layers = 5; - cfg.ts_periodicity = 16; -@@ -405,9 +425,9 @@ int main(int argc, char **argv) { - break; - } - -- case 7: -+ case 8: - { -- // 2-layers -+ /* 2-layers, with sync point at first frame of layer 1. */ - int ids[2] = {0,1}; - cfg.ts_number_layers = 2; - cfg.ts_periodicity = 2; -@@ -417,30 +437,49 @@ int main(int argc, char **argv) { - - flag_periodicity = 8; - -- // 0=L, 1=GF -+ /* 0=L, 1=GF */ -+ // ARF is used as predictor for all frames, and is only updated on -+ // key frame. Sync point every 8 frames. -+ -+ // Layer 0: predict from L and ARF, update L and G. - layer_flags[0] = VPX_EFLAG_FORCE_KF | -- VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | -- VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; -- layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | -- VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; -- layer_flags[2] = -- layer_flags[4] = -- layer_flags[6] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | -- VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; -- layer_flags[3] = -- layer_flags[5] = VP8_EFLAG_NO_REF_ARF | -- VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; -- layer_flags[7] = VP8_EFLAG_NO_REF_ARF | -- VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | -- VP8_EFLAG_NO_UPD_ARF | -+ VP8_EFLAG_NO_REF_GF | -+ VP8_EFLAG_NO_UPD_ARF; -+ -+ // Layer 1: sync point: predict from L and ARF, and update G. -+ layer_flags[1] = VP8_EFLAG_NO_REF_GF | -+ VP8_EFLAG_NO_UPD_LAST | -+ VP8_EFLAG_NO_UPD_ARF; -+ -+ // Layer 0, predict from L and ARF, update L. -+ layer_flags[2] = VP8_EFLAG_NO_REF_GF | -+ VP8_EFLAG_NO_UPD_GF | -+ VP8_EFLAG_NO_UPD_ARF; -+ -+ // Layer 1: predict from L, G and ARF, and update G. -+ layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | -+ VP8_EFLAG_NO_UPD_LAST | - VP8_EFLAG_NO_UPD_ENTROPY; -+ -+ // Layer 0 -+ layer_flags[4] = layer_flags[2]; -+ -+ // Layer 1 -+ layer_flags[5] = layer_flags[3]; -+ -+ // Layer 0 -+ layer_flags[6] = layer_flags[4]; -+ -+ // Layer 1 -+ layer_flags[7] = layer_flags[5]; - break; - } - -- case 8: -- default: -+ case 9: - { -- // 3-layers -+ /* 3-layers */ -+ // Sync points for layer 1 and 2 every 8 frames. -+ - int ids[4] = {0,2,1,2}; - cfg.ts_number_layers = 3; - cfg.ts_periodicity = 4; -@@ -451,7 +490,7 @@ int main(int argc, char **argv) { - - flag_periodicity = 8; - -- // 0=L, 1=GF, 2=ARF -+ /* 0=L, 1=GF, 2=ARF */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; -@@ -470,13 +509,109 @@ int main(int argc, char **argv) { - VP8_EFLAG_NO_UPD_ENTROPY; - break; - } -+ case 10: -+ { -+ // 3-layers structure where ARF is used as predictor for all frames, -+ // and is only updated on key frame. -+ // Sync points for layer 1 and 2 every 8 frames. -+ -+ int ids[4] = {0,2,1,2}; -+ cfg.ts_number_layers = 3; -+ cfg.ts_periodicity = 4; -+ cfg.ts_rate_decimator[0] = 4; -+ cfg.ts_rate_decimator[1] = 2; -+ cfg.ts_rate_decimator[2] = 1; -+ memcpy(cfg.ts_layer_id, ids, sizeof(ids)); -+ -+ flag_periodicity = 8; -+ -+ /* 0=L, 1=GF, 2=ARF */ -+ -+ // Layer 0: predict from L and ARF; update L and G. -+ layer_flags[0] = VPX_EFLAG_FORCE_KF | -+ VP8_EFLAG_NO_UPD_ARF | -+ VP8_EFLAG_NO_REF_GF; -+ -+ // Layer 2: sync point: predict from L and ARF; update none. -+ layer_flags[1] = VP8_EFLAG_NO_REF_GF | -+ VP8_EFLAG_NO_UPD_GF | -+ VP8_EFLAG_NO_UPD_ARF | -+ VP8_EFLAG_NO_UPD_LAST | -+ VP8_EFLAG_NO_UPD_ENTROPY; -+ -+ // Layer 1: sync point: predict from L and ARF; update G. -+ layer_flags[2] = VP8_EFLAG_NO_REF_GF | -+ VP8_EFLAG_NO_UPD_ARF | -+ VP8_EFLAG_NO_UPD_LAST; -+ -+ // Layer 2: predict from L, G, ARF; update none. -+ layer_flags[3] = VP8_EFLAG_NO_UPD_GF | -+ VP8_EFLAG_NO_UPD_ARF | -+ VP8_EFLAG_NO_UPD_LAST | -+ VP8_EFLAG_NO_UPD_ENTROPY; -+ -+ // Layer 0: predict from L and ARF; update L. -+ layer_flags[4] = VP8_EFLAG_NO_UPD_GF | -+ VP8_EFLAG_NO_UPD_ARF | -+ VP8_EFLAG_NO_REF_GF; -+ -+ // Layer 2: predict from L, G, ARF; update none. -+ layer_flags[5] = layer_flags[3]; -+ -+ // Layer 1: predict from L, G, ARF; update G. -+ layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | -+ VP8_EFLAG_NO_UPD_LAST; -+ -+ // Layer 2: predict from L, G, ARF; update none. -+ layer_flags[7] = layer_flags[3]; -+ break; -+ } -+ case 11: -+ default: -+ { -+ // 3-layers structure as in case 10, but no sync/refresh points for -+ // layer 1 and 2. -+ -+ int ids[4] = {0,2,1,2}; -+ cfg.ts_number_layers = 3; -+ cfg.ts_periodicity = 4; -+ cfg.ts_rate_decimator[0] = 4; -+ cfg.ts_rate_decimator[1] = 2; -+ cfg.ts_rate_decimator[2] = 1; -+ memcpy(cfg.ts_layer_id, ids, sizeof(ids)); -+ -+ flag_periodicity = 8; -+ -+ /* 0=L, 1=GF, 2=ARF */ -+ -+ // Layer 0: predict from L and ARF; update L. -+ layer_flags[0] = VP8_EFLAG_NO_UPD_GF | -+ VP8_EFLAG_NO_UPD_ARF | -+ VP8_EFLAG_NO_REF_GF; -+ layer_flags[4] = layer_flags[0]; -+ -+ // Layer 1: predict from L, G, ARF; update G. -+ layer_flags[2] = VP8_EFLAG_NO_UPD_ARF | -+ VP8_EFLAG_NO_UPD_LAST; -+ layer_flags[6] = layer_flags[2]; -+ -+ // Layer 2: predict from L, G, ARF; update none. -+ layer_flags[1] = VP8_EFLAG_NO_UPD_GF | -+ VP8_EFLAG_NO_UPD_ARF | -+ VP8_EFLAG_NO_UPD_LAST | -+ VP8_EFLAG_NO_UPD_ENTROPY; -+ layer_flags[3] = layer_flags[1]; -+ layer_flags[5] = layer_flags[1]; -+ layer_flags[7] = layer_flags[1]; -+ break; -+ } - } - -- // Open input file -+ /* Open input file */ - if(!(infile = fopen(argv[1], "rb"))) - die("Failed to open %s for reading", argv[1]); - -- // Open an output file for each stream -+ /* Open an output file for each stream */ - for (i=0; i(_tokenPartitions)); - - frame_avail = 1; - while (frame_avail || got_data) { -@@ -517,8 +651,8 @@ int main(int argc, char **argv) { - 1, flags, VPX_DL_REALTIME)) - die_codec(&codec, "Failed to encode frame"); - -- // Reset KF flag -- if (layering_mode != 6) -+ /* Reset KF flag */ -+ if (layering_mode != 7) - layer_flags[0] &= ~VPX_EFLAG_FORCE_KF; - - got_data = 0; -@@ -530,29 +664,25 @@ int main(int argc, char **argv) { - idata.frame.buf, 1, pkt->data.frame.sz, -- outfile[i])); -+ (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, -+ outfile[i]); - frames_in_layer[i]++; - } - break; - default: - break; - } -- printf (pkt->kind == VPX_CODEC_CX_FRAME_PKT -- && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":"."); -- fflush (stdout); - } - frame_cnt++; - pts += frame_duration; - } -- printf ("\n"); - fclose (infile); - - printf ("Processed %d frames.\n",frame_cnt-1); - if (vpx_codec_destroy(&codec)) - die_codec (&codec, "Failed to destroy codec"); - -- // Try to rewrite the output file headers with the actual frame count -+ /* Try to rewrite the output file headers with the actual frame count */ - for (i=0; inum; - mr_cfg.mr_down_sampling_factor.den = dsf->den; - -+ /* Force Key-frame synchronization. Namely, encoder at higher -+ * resolution always use the same frame_type chosen by the -+ * lowest-resolution encoder. -+ */ -+ if(mr_cfg.mr_encoder_id) -+ cfg->kf_mode = VPX_KF_DISABLED; -+ - ctx->iface = iface; - ctx->name = iface->name; - ctx->priv = NULL; -@@ -126,8 +133,20 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t *ctx, - - if (res) - { -- ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL; -+ const char *error_detail = -+ ctx->priv ? ctx->priv->err_detail : NULL; -+ /* Destroy current ctx */ -+ ctx->err_detail = error_detail; - vpx_codec_destroy(ctx); -+ -+ /* Destroy already allocated high-level ctx */ -+ while (i) -+ { -+ ctx--; -+ ctx->err_detail = error_detail; -+ vpx_codec_destroy(ctx); -+ i--; -+ } - } - - if (ctx->priv) -diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h -index 0af631c..a3c95d2 100644 ---- a/vpx/vp8cx.h -+++ b/vpx/vp8cx.h -@@ -204,8 +204,8 @@ typedef struct vpx_roi_map - unsigned char *roi_map; /**< specify an id between 0 and 3 for each 16x16 region within a frame */ - unsigned int rows; /**< number of rows */ - unsigned int cols; /**< number of cols */ -- int delta_q[4]; /**< quantizer delta [-64, 64] off baseline for regions with id between 0 and 3*/ -- int delta_lf[4]; /**< loop filter strength delta [-32, 32] for regions with id between 0 and 3 */ -+ int delta_q[4]; /**< quantizer delta [-63, 63] off baseline for regions with id between 0 and 3*/ -+ int delta_lf[4]; /**< loop filter strength delta [-63, 63] for regions with id between 0 and 3 */ - unsigned int static_threshold[4];/**< threshold for region to be treated as static */ - } vpx_roi_map_t; - -@@ -234,18 +234,6 @@ typedef struct vpx_scaling_mode - VPX_SCALING_MODE v_scaling_mode; /**< vertical scaling mode */ - } vpx_scaling_mode_t; - --/*!\brief VP8 encoding mode -- * -- * This defines VP8 encoding mode -- * -- */ --typedef enum --{ -- VP8_BEST_QUALITY_ENCODING, -- VP8_GOOD_QUALITY_ENCODING, -- VP8_REAL_TIME_ENCODING --} vp8e_encoding_mode; -- - /*!\brief VP8 token partition mode - * - * This defines VP8 partitioning mode for compressed data, i.e., the number of -@@ -298,12 +286,12 @@ VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF, unsigned int) - VPX_CTRL_USE_TYPE(VP8E_SET_NOISE_SENSITIVITY, unsigned int) - VPX_CTRL_USE_TYPE(VP8E_SET_SHARPNESS, unsigned int) - VPX_CTRL_USE_TYPE(VP8E_SET_STATIC_THRESHOLD, unsigned int) --VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, vp8e_token_partitions) -+VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, int) /* vp8e_token_partitions */ - - VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES, unsigned int) - VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH , unsigned int) - VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_TYPE , unsigned int) --VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, vp8e_tuning) -+VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, int) /* vp8e_tuning */ - VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL , unsigned int) - - VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *) -diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h -index d92e165..243b7a5 100644 ---- a/vpx/vpx_codec.h -+++ b/vpx/vpx_codec.h -@@ -49,15 +49,22 @@ extern "C" { - #ifndef DEPRECATED - #if defined(__GNUC__) && __GNUC__ - #define DEPRECATED __attribute__ ((deprecated)) --#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ - #elif defined(_MSC_VER) - #define DEPRECATED --#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */ - #else - #define DEPRECATED --#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ - #endif -+#endif /* DEPRECATED */ -+ -+#ifndef DECLSPEC_DEPRECATED -+#if defined(__GNUC__) && __GNUC__ -+#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ -+#elif defined(_MSC_VER) -+#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */ -+#else -+#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ - #endif -+#endif /* DECLSPEC_DEPRECATED */ - - /*!\brief Decorator indicating a function is potentially unused */ - #ifdef UNUSED -diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk -index 427fd0f..ffa123f 100644 ---- a/vpx/vpx_codec.mk -+++ b/vpx/vpx_codec.mk -@@ -11,6 +11,21 @@ - - API_EXPORTS += exports - -+API_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h -+API_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h -+API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h -+API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h -+ -+API_SRCS-$(CONFIG_VP8_DECODER) += vp8.h -+API_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h -+API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8.h -+API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h -+ -+API_DOC_SRCS-yes += vpx_codec.h -+API_DOC_SRCS-yes += vpx_decoder.h -+API_DOC_SRCS-yes += vpx_encoder.h -+API_DOC_SRCS-yes += vpx_image.h -+ - API_SRCS-yes += src/vpx_decoder.c - API_SRCS-yes += vpx_decoder.h - API_SRCS-yes += src/vpx_encoder.c -@@ -23,3 +38,4 @@ API_SRCS-yes += vpx_codec.mk - API_SRCS-yes += vpx_codec_impl_bottom.h - API_SRCS-yes += vpx_codec_impl_top.h - API_SRCS-yes += vpx_image.h -+API_SRCS-$(BUILD_LIBVPX) += vpx_integer.h -diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h -index 7992cc4..1ccf1c5 100644 ---- a/vpx/vpx_decoder.h -+++ b/vpx/vpx_decoder.h -@@ -113,6 +113,10 @@ extern "C" { - * function directly, to ensure that the ABI version number parameter - * is properly initialized. - * -+ * If the library was configured with --disable-multithread, this call -+ * is not thread safe and should be guarded with a lock if being used -+ * in a multithreaded context. -+ * - * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags - * parameter), the storage pointed to by the cfg parameter must be - * kept readable and stable until all memory maps have been set. -diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h -index 239036e..67d9033 100644 ---- a/vpx/vpx_encoder.h -+++ b/vpx/vpx_encoder.h -@@ -655,6 +655,10 @@ extern "C" { - * function directly, to ensure that the ABI version number parameter - * is properly initialized. - * -+ * If the library was configured with --disable-multithread, this call -+ * is not thread safe and should be guarded with a lock if being used -+ * in a multithreaded context. -+ * - * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags - * parameter), the storage pointed to by the cfg parameter must be - * kept readable and stable until all memory maps have been set. -diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c -index ebe428d..8ff95a1 100644 ---- a/vpx_ports/arm_cpudetect.c -+++ b/vpx_ports/arm_cpudetect.c -@@ -32,8 +32,33 @@ static int arm_cpu_env_mask(void) - return env && *env ? (int)strtol(env, NULL, 0) : ~0; - } - -+#if !CONFIG_RUNTIME_CPU_DETECT - --#if defined(_MSC_VER) -+int arm_cpu_caps(void) -+{ -+ /* This function should actually be a no-op. There is no way to adjust any of -+ * these because the RTCD tables do not exist: the functions are called -+ * statically */ -+ int flags; -+ int mask; -+ if (!arm_cpu_env_flags(&flags)) -+ { -+ return flags; -+ } -+ mask = arm_cpu_env_mask(); -+#if HAVE_EDSP -+ flags |= HAS_EDSP; -+#endif /* HAVE_EDSP */ -+#if HAVE_MEDIA -+ flags |= HAS_MEDIA; -+#endif /* HAVE_MEDIA */ -+#if HAVE_NEON -+ flags |= HAS_NEON; -+#endif /* HAVE_NEON */ -+ return flags & mask; -+} -+ -+#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ - /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ - #define WIN32_LEAN_AND_MEAN - #define WIN32_EXTRA_LEAN -@@ -52,7 +77,7 @@ int arm_cpu_caps(void) - * instructions via their assembled hex code. - * All of these instructions should be essentially nops. - */ --#if defined(HAVE_EDSP) -+#if HAVE_EDSP - if (mask & HAS_EDSP) - { - __try -@@ -66,7 +91,7 @@ int arm_cpu_caps(void) - /*Ignore exception.*/ - } - } --#if defined(HAVE_MEDIA) -+#if HAVE_MEDIA - if (mask & HAS_MEDIA) - __try - { -@@ -79,7 +104,7 @@ int arm_cpu_caps(void) - /*Ignore exception.*/ - } - } --#if defined(HAVE_NEON) -+#if HAVE_NEON - if (mask & HAS_NEON) - { - __try -@@ -93,14 +118,13 @@ int arm_cpu_caps(void) - /*Ignore exception.*/ - } - } --#endif --#endif --#endif -+#endif /* HAVE_NEON */ -+#endif /* HAVE_MEDIA */ -+#endif /* HAVE_EDSP */ - return flags & mask; - } - --#elif defined(__linux__) --#if defined(__ANDROID__) -+#elif defined(__ANDROID__) /* end _MSC_VER */ - #include - - int arm_cpu_caps(void) -@@ -115,19 +139,20 @@ int arm_cpu_caps(void) - mask = arm_cpu_env_mask(); - features = android_getCpuFeatures(); - --#if defined(HAVE_EDSP) -+#if HAVE_EDSP - flags |= HAS_EDSP; --#endif --#if defined(HAVE_MEDIA) -+#endif /* HAVE_EDSP */ -+#if HAVE_MEDIA - flags |= HAS_MEDIA; --#endif --#if defined(HAVE_NEON) -+#endif /* HAVE_MEDIA */ -+#if HAVE_NEON - if (features & ANDROID_CPU_ARM_FEATURE_NEON) - flags |= HAS_NEON; --#endif -+#endif /* HAVE_NEON */ - return flags & mask; - } --#else // !defined(__ANDROID__) -+ -+#elif defined(__linux__) /* end __ANDROID__ */ - #include - - int arm_cpu_caps(void) -@@ -153,27 +178,27 @@ int arm_cpu_caps(void) - char buf[512]; - while (fgets(buf, 511, fin) != NULL) - { --#if defined(HAVE_EDSP) || defined(HAVE_NEON) -+#if HAVE_EDSP || HAVE_NEON - if (memcmp(buf, "Features", 8) == 0) - { - char *p; --#if defined(HAVE_EDSP) -+#if HAVE_EDSP - p=strstr(buf, " edsp"); - if (p != NULL && (p[5] == ' ' || p[5] == '\n')) - { - flags |= HAS_EDSP; - } --#if defined(HAVE_NEON) -+#if HAVE_NEON - p = strstr(buf, " neon"); - if (p != NULL && (p[5] == ' ' || p[5] == '\n')) - { - flags |= HAS_NEON; - } --#endif --#endif -+#endif /* HAVE_NEON */ -+#endif /* HAVE_EDSP */ - } --#endif --#if defined(HAVE_MEDIA) -+#endif /* HAVE_EDSP || HAVE_NEON */ -+#if HAVE_MEDIA - if (memcmp(buf, "CPU architecture:",17) == 0){ - int version; - version = atoi(buf+17); -@@ -182,37 +207,13 @@ int arm_cpu_caps(void) - flags |= HAS_MEDIA; - } - } --#endif -+#endif /* HAVE_MEDIA */ - } - fclose(fin); - } - return flags & mask; - } --#endif // defined(__linux__) --#elif !CONFIG_RUNTIME_CPU_DETECT -- --int arm_cpu_caps(void) --{ -- int flags; -- int mask; -- if (!arm_cpu_env_flags(&flags)) -- { -- return flags; -- } -- mask = arm_cpu_env_mask(); --#if defined(HAVE_EDSP) -- flags |= HAS_EDSP; --#endif --#if defined(HAVE_MEDIA) -- flags |= HAS_MEDIA; --#endif --#if defined(HAVE_NEON) -- flags |= HAS_NEON; --#endif -- return flags & mask; --} -- --#else -+#else /* end __linux__ */ - #error "--enable-runtime-cpu-detect selected, but no CPU detection method " \ -- "available for your platform. Reconfigure without --enable-runtime-cpu-detect." -+ "available for your platform. Reconfigure with --disable-runtime-cpu-detect." - #endif -diff --git a/vpx_ports/asm_offsets.h b/vpx_ports/asm_offsets.h -index d3b4fc7..7b6ae4a 100644 ---- a/vpx_ports/asm_offsets.h -+++ b/vpx_ports/asm_offsets.h -@@ -19,11 +19,11 @@ - static void assert_##name(void) {switch(0){case 0:case !!(cond):;}} - - #if INLINE_ASM --#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val)); -+#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val)) - #define BEGIN int main(void) { - #define END return 0; } - #else --#define DEFINE(sym, val) int sym = val; -+#define DEFINE(sym, val) const int sym = val - #define BEGIN - #define END - #endif -diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm -index 306e235..efad1a5 100644 ---- a/vpx_ports/emms.asm -+++ b/vpx_ports/emms.asm -@@ -12,14 +12,14 @@ - %include "vpx_ports/x86_abi_support.asm" - - section .text -- global sym(vpx_reset_mmx_state) -+global sym(vpx_reset_mmx_state) PRIVATE - sym(vpx_reset_mmx_state): - emms - ret - - - %ifidn __OUTPUT_FORMAT__,x64 --global sym(vpx_winx64_fldcw) -+global sym(vpx_winx64_fldcw) PRIVATE - sym(vpx_winx64_fldcw): - sub rsp, 8 - mov [rsp], rcx ; win x64 specific -@@ -28,7 +28,7 @@ sym(vpx_winx64_fldcw): - ret - - --global sym(vpx_winx64_fstcw) -+global sym(vpx_winx64_fstcw) PRIVATE - sym(vpx_winx64_fstcw): - sub rsp, 8 - fstcw [rsp] -diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h -index 0e52368..dec28d5 100644 ---- a/vpx_ports/mem_ops.h -+++ b/vpx_ports/mem_ops.h -@@ -145,27 +145,27 @@ static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) - - #undef mem_get_sbe16 - #define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16) --mem_get_s_generic(be, 16); -+mem_get_s_generic(be, 16) - - #undef mem_get_sbe24 - #define mem_get_sbe24 mem_ops_wrap_symbol(mem_get_sbe24) --mem_get_s_generic(be, 24); -+mem_get_s_generic(be, 24) - - #undef mem_get_sbe32 - #define mem_get_sbe32 mem_ops_wrap_symbol(mem_get_sbe32) --mem_get_s_generic(be, 32); -+mem_get_s_generic(be, 32) - - #undef mem_get_sle16 - #define mem_get_sle16 mem_ops_wrap_symbol(mem_get_sle16) --mem_get_s_generic(le, 16); -+mem_get_s_generic(le, 16) - - #undef mem_get_sle24 - #define mem_get_sle24 mem_ops_wrap_symbol(mem_get_sle24) --mem_get_s_generic(le, 24); -+mem_get_s_generic(le, 24) - - #undef mem_get_sle32 - #define mem_get_sle32 mem_ops_wrap_symbol(mem_get_sle32) --mem_get_s_generic(le, 32); -+mem_get_s_generic(le, 32) - - #undef mem_put_be16 - #define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16) -diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h -index 0fbba65..fca653a 100644 ---- a/vpx_ports/mem_ops_aligned.h -+++ b/vpx_ports/mem_ops_aligned.h -@@ -99,51 +99,51 @@ - - #undef mem_get_be16_aligned - #define mem_get_be16_aligned mem_ops_wrap_symbol(mem_get_be16_aligned) --mem_get_be_aligned_generic(16); -+mem_get_be_aligned_generic(16) - - #undef mem_get_be32_aligned - #define mem_get_be32_aligned mem_ops_wrap_symbol(mem_get_be32_aligned) --mem_get_be_aligned_generic(32); -+mem_get_be_aligned_generic(32) - - #undef mem_get_le16_aligned - #define mem_get_le16_aligned mem_ops_wrap_symbol(mem_get_le16_aligned) --mem_get_le_aligned_generic(16); -+mem_get_le_aligned_generic(16) - - #undef mem_get_le32_aligned - #define mem_get_le32_aligned mem_ops_wrap_symbol(mem_get_le32_aligned) --mem_get_le_aligned_generic(32); -+mem_get_le_aligned_generic(32) - - #undef mem_get_sbe16_aligned - #define mem_get_sbe16_aligned mem_ops_wrap_symbol(mem_get_sbe16_aligned) --mem_get_sbe_aligned_generic(16); -+mem_get_sbe_aligned_generic(16) - - #undef mem_get_sbe32_aligned - #define mem_get_sbe32_aligned mem_ops_wrap_symbol(mem_get_sbe32_aligned) --mem_get_sbe_aligned_generic(32); -+mem_get_sbe_aligned_generic(32) - - #undef mem_get_sle16_aligned - #define mem_get_sle16_aligned mem_ops_wrap_symbol(mem_get_sle16_aligned) --mem_get_sle_aligned_generic(16); -+mem_get_sle_aligned_generic(16) - - #undef mem_get_sle32_aligned - #define mem_get_sle32_aligned mem_ops_wrap_symbol(mem_get_sle32_aligned) --mem_get_sle_aligned_generic(32); -+mem_get_sle_aligned_generic(32) - - #undef mem_put_be16_aligned - #define mem_put_be16_aligned mem_ops_wrap_symbol(mem_put_be16_aligned) --mem_put_be_aligned_generic(16); -+mem_put_be_aligned_generic(16) - - #undef mem_put_be32_aligned - #define mem_put_be32_aligned mem_ops_wrap_symbol(mem_put_be32_aligned) --mem_put_be_aligned_generic(32); -+mem_put_be_aligned_generic(32) - - #undef mem_put_le16_aligned - #define mem_put_le16_aligned mem_ops_wrap_symbol(mem_put_le16_aligned) --mem_put_le_aligned_generic(16); -+mem_put_le_aligned_generic(16) - - #undef mem_put_le32_aligned - #define mem_put_le32_aligned mem_ops_wrap_symbol(mem_put_le32_aligned) --mem_put_le_aligned_generic(32); -+mem_put_le_aligned_generic(32) - - #undef mem_get_ne_aligned_generic - #undef mem_get_se_aligned_generic -diff --git a/vpx_ports/vpx_ports.mk b/vpx_ports/vpx_ports.mk -new file mode 100644 -index 0000000..e6cb52f ---- /dev/null -+++ b/vpx_ports/vpx_ports.mk -@@ -0,0 +1,26 @@ -+## -+## Copyright (c) 2012 The WebM project authors. All Rights Reserved. -+## -+## Use of this source code is governed by a BSD-style license -+## that can be found in the LICENSE file in the root of the source -+## tree. An additional intellectual property rights grant can be found -+## in the file PATENTS. All contributing project authors may -+## be found in the AUTHORS file in the root of the source tree. -+## -+ -+ -+PORTS_SRCS-yes += vpx_ports.mk -+ -+PORTS_SRCS-$(BUILD_LIBVPX) += asm_offsets.h -+PORTS_SRCS-$(BUILD_LIBVPX) += mem.h -+PORTS_SRCS-$(BUILD_LIBVPX) += vpx_timer.h -+ -+ifeq ($(ARCH_X86)$(ARCH_X86_64),yes) -+PORTS_SRCS-$(BUILD_LIBVPX) += emms.asm -+PORTS_SRCS-$(BUILD_LIBVPX) += x86.h -+PORTS_SRCS-$(BUILD_LIBVPX) += x86_abi_support.asm -+PORTS_SRCS-$(BUILD_LIBVPX) += x86_cpuid.c -+endif -+ -+PORTS_SRCS-$(ARCH_ARM) += arm_cpudetect.c -+PORTS_SRCS-$(ARCH_ARM) += arm.h -diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h -index 1341c7f..9dd8c4b 100644 ---- a/vpx_ports/x86.h -+++ b/vpx_ports/x86.h -@@ -162,7 +162,7 @@ x86_readtsc(void) - return tsc; - #else - #if ARCH_X86_64 -- return __rdtsc(); -+ return (unsigned int)__rdtsc(); - #else - __asm rdtsc; - #endif -diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm -index cef6a0b..0c9fe37 100644 ---- a/vpx_ports/x86_abi_support.asm -+++ b/vpx_ports/x86_abi_support.asm -@@ -88,12 +88,41 @@ - %define sym(x) x - %elifidn __OUTPUT_FORMAT__,elf64 - %define sym(x) x -+%elifidn __OUTPUT_FORMAT__,elfx32 -+%define sym(x) x - %elifidn __OUTPUT_FORMAT__,x64 - %define sym(x) x - %else - %define sym(x) _ %+ x - %endif - -+; PRIVATE -+; Macro for the attribute to hide a global symbol for the target ABI. -+; This is only active if CHROMIUM is defined. -+; -+; Chromium doesn't like exported global symbols due to symbol clashing with -+; plugins among other things. -+; -+; Requires Chromium's patched copy of yasm: -+; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761 -+; http://www.tortall.net/projects/yasm/ticket/236 -+; -+%ifdef CHROMIUM -+ %ifidn __OUTPUT_FORMAT__,elf32 -+ %define PRIVATE :hidden -+ %elifidn __OUTPUT_FORMAT__,elf64 -+ %define PRIVATE :hidden -+ %elifidn __OUTPUT_FORMAT__,elfx32 -+ %define PRIVATE :hidden -+ %elifidn __OUTPUT_FORMAT__,x64 -+ %define PRIVATE -+ %else -+ %define PRIVATE :private_extern -+ %endif -+%else -+ %define PRIVATE -+%endif -+ - ; arg() - ; Return the address specification of the given argument - ; -@@ -181,7 +210,16 @@ - %endmacro - %endif - %endif -- %define HIDDEN_DATA(x) x -+ -+ %ifdef CHROMIUM -+ %ifidn __OUTPUT_FORMAT__,macho32 -+ %define HIDDEN_DATA(x) x:private_extern -+ %else -+ %define HIDDEN_DATA(x) x -+ %endif -+ %else -+ %define HIDDEN_DATA(x) x -+ %endif - %else - %macro GET_GOT 1 - %endmacro -@@ -189,6 +227,9 @@ - %ifidn __OUTPUT_FORMAT__,elf64 - %define WRT_PLT wrt ..plt - %define HIDDEN_DATA(x) x:data hidden -+ %elifidn __OUTPUT_FORMAT__,elfx32 -+ %define WRT_PLT wrt ..plt -+ %define HIDDEN_DATA(x) x:data hidden - %else - %define HIDDEN_DATA(x) x - %endif -@@ -330,5 +371,8 @@ section .text - %elifidn __OUTPUT_FORMAT__,elf64 - section .note.GNU-stack noalloc noexec nowrite progbits - section .text -+%elifidn __OUTPUT_FORMAT__,elfx32 -+section .note.GNU-stack noalloc noexec nowrite progbits -+section .text - %endif - -diff --git a/vpx_scale/arm/neon/yv12extend_arm.c b/vpx_scale/arm/neon/yv12extend_arm.c -index 7529fc6..eabd495 100644 ---- a/vpx_scale/arm/neon/yv12extend_arm.c -+++ b/vpx_scale/arm/neon/yv12extend_arm.c -@@ -8,18 +8,14 @@ - * be found in the AUTHORS file in the root of the source tree. - */ - -+#include "./vpx_rtcd.h" - --#include "vpx_scale/yv12config.h" --#include "vpx_mem/vpx_mem.h" --#include "vpx_scale/vpxscale.h" -+extern void vp8_yv12_copy_frame_func_neon(struct yv12_buffer_config *src_ybc, -+ struct yv12_buffer_config *dst_ybc); - --extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, -- YV12_BUFFER_CONFIG *dst_ybc); -+void vp8_yv12_copy_frame_neon(struct yv12_buffer_config *src_ybc, -+ struct yv12_buffer_config *dst_ybc) { -+ vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc); - --void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, -- YV12_BUFFER_CONFIG *dst_ybc) --{ -- vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc); -- -- vp8_yv12_extend_frame_borders_neon(dst_ybc); -+ vp8_yv12_extend_frame_borders_neon(dst_ybc); - } -diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c -index 4468e9d..c116740 100644 ---- a/vpx_scale/generic/bicubic_scaler.c -+++ b/vpx_scale/generic/bicubic_scaler.c -@@ -46,245 +46,229 @@ static float a = -0.6; - // 3 2 - // C0 = a*t - a*t - // --static short c0_fixed(unsigned int t) --{ -- // put t in Q16 notation -- unsigned short v1, v2; -- -- // Q16 -- v1 = (a_i * t) >> 16; -- v1 = (v1 * t) >> 16; -- -- // Q16 -- v2 = (a_i * t) >> 16; -- v2 = (v2 * t) >> 16; -- v2 = (v2 * t) >> 16; -- -- // Q12 -- return -((v1 - v2) >> 4); -+static short c0_fixed(unsigned int t) { -+ // put t in Q16 notation -+ unsigned short v1, v2; -+ -+ // Q16 -+ v1 = (a_i * t) >> 16; -+ v1 = (v1 * t) >> 16; -+ -+ // Q16 -+ v2 = (a_i * t) >> 16; -+ v2 = (v2 * t) >> 16; -+ v2 = (v2 * t) >> 16; -+ -+ // Q12 -+ return -((v1 - v2) >> 4); - } - - // 2 3 - // C1 = a*t + (3-2*a)*t - (2-a)*t - // --static short c1_fixed(unsigned int t) --{ -- unsigned short v1, v2, v3; -- unsigned short two, three; -- -- // Q16 -- v1 = (a_i * t) >> 16; -- -- // Q13 -- two = 2 << 13; -- v2 = two - (a_i >> 3); -- v2 = (v2 * t) >> 16; -- v2 = (v2 * t) >> 16; -- v2 = (v2 * t) >> 16; -- -- // Q13 -- three = 3 << 13; -- v3 = three - (2 * (a_i >> 3)); -- v3 = (v3 * t) >> 16; -- v3 = (v3 * t) >> 16; -- -- // Q12 -- return (((v1 >> 3) - v2 + v3) >> 1); -+static short c1_fixed(unsigned int t) { -+ unsigned short v1, v2, v3; -+ unsigned short two, three; -+ -+ // Q16 -+ v1 = (a_i * t) >> 16; -+ -+ // Q13 -+ two = 2 << 13; -+ v2 = two - (a_i >> 3); -+ v2 = (v2 * t) >> 16; -+ v2 = (v2 * t) >> 16; -+ v2 = (v2 * t) >> 16; -+ -+ // Q13 -+ three = 3 << 13; -+ v3 = three - (2 * (a_i >> 3)); -+ v3 = (v3 * t) >> 16; -+ v3 = (v3 * t) >> 16; -+ -+ // Q12 -+ return (((v1 >> 3) - v2 + v3) >> 1); - - } - - // 2 3 - // C2 = 1 - (3-a)*t + (2-a)*t - // --static short c2_fixed(unsigned int t) --{ -- unsigned short v1, v2, v3; -- unsigned short two, three; -- -- // Q13 -- v1 = 1 << 13; -- -- // Q13 -- three = 3 << 13; -- v2 = three - (a_i >> 3); -- v2 = (v2 * t) >> 16; -- v2 = (v2 * t) >> 16; -- -- // Q13 -- two = 2 << 13; -- v3 = two - (a_i >> 3); -- v3 = (v3 * t) >> 16; -- v3 = (v3 * t) >> 16; -- v3 = (v3 * t) >> 16; -- -- // Q12 -- return (v1 - v2 + v3) >> 1; -+static short c2_fixed(unsigned int t) { -+ unsigned short v1, v2, v3; -+ unsigned short two, three; -+ -+ // Q13 -+ v1 = 1 << 13; -+ -+ // Q13 -+ three = 3 << 13; -+ v2 = three - (a_i >> 3); -+ v2 = (v2 * t) >> 16; -+ v2 = (v2 * t) >> 16; -+ -+ // Q13 -+ two = 2 << 13; -+ v3 = two - (a_i >> 3); -+ v3 = (v3 * t) >> 16; -+ v3 = (v3 * t) >> 16; -+ v3 = (v3 * t) >> 16; -+ -+ // Q12 -+ return (v1 - v2 + v3) >> 1; - } - - // 2 3 - // C3 = a*t - 2*a*t + a*t - // --static short c3_fixed(unsigned int t) --{ -- int v1, v2, v3; -+static short c3_fixed(unsigned int t) { -+ int v1, v2, v3; - -- // Q16 -- v1 = (a_i * t) >> 16; -+ // Q16 -+ v1 = (a_i * t) >> 16; - -- // Q15 -- v2 = 2 * (a_i >> 1); -- v2 = (v2 * t) >> 16; -- v2 = (v2 * t) >> 16; -+ // Q15 -+ v2 = 2 * (a_i >> 1); -+ v2 = (v2 * t) >> 16; -+ v2 = (v2 * t) >> 16; - -- // Q16 -- v3 = (a_i * t) >> 16; -- v3 = (v3 * t) >> 16; -- v3 = (v3 * t) >> 16; -+ // Q16 -+ v3 = (a_i * t) >> 16; -+ v3 = (v3 * t) >> 16; -+ v3 = (v3 * t) >> 16; - -- // Q12 -- return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3); -+ // Q12 -+ return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3); - } - #else - // 3 2 - // C0 = -a*t + a*t - // --float C0(float t) --{ -- return -a * t * t * t + a * t * t; -+float C0(float t) { -+ return -a * t * t * t + a * t * t; - } - - // 2 3 - // C1 = -a*t + (2*a+3)*t - (a+2)*t - // --float C1(float t) --{ -- return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; -+float C1(float t) { -+ return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; - } - - // 2 3 - // C2 = 1 - (a+3)*t + (a+2)*t - // --float C2(float t) --{ -- return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; -+float C2(float t) { -+ return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; - } - - // 2 3 - // C3 = a*t - 2*a*t + a*t - // --float C3(float t) --{ -- return a * t * t * t - 2.0f * a * t * t + a * t; -+float C3(float t) { -+ return a * t * t * t - 2.0f * a * t * t + a * t; - } - #endif - - #if 0 --int compare_real_fixed() --{ -- int i, errors = 0; -- float mult = 1.0 / 10000.0; -- unsigned int fixed_mult = mult * 4294967296;//65536; -- unsigned int phase_offset_int; -- float phase_offset_real; -- -- for (i = 0; i < 10000; i++) -- { -- int fixed0, fixed1, fixed2, fixed3, fixed_total; -- int real0, real1, real2, real3, real_total; -- -- phase_offset_real = (float)i * mult; -- phase_offset_int = (fixed_mult * i) >> 16; -+int compare_real_fixed() { -+ int i, errors = 0; -+ float mult = 1.0 / 10000.0; -+ unsigned int fixed_mult = mult * 4294967296;// 65536; -+ unsigned int phase_offset_int; -+ float phase_offset_real; -+ -+ for (i = 0; i < 10000; i++) { -+ int fixed0, fixed1, fixed2, fixed3, fixed_total; -+ int real0, real1, real2, real3, real_total; -+ -+ phase_offset_real = (float)i * mult; -+ phase_offset_int = (fixed_mult * i) >> 16; - // phase_offset_int = phase_offset_real * 65536; - -- fixed0 = c0_fixed(phase_offset_int); -- real0 = C0(phase_offset_real) * 4096.0; -+ fixed0 = c0_fixed(phase_offset_int); -+ real0 = C0(phase_offset_real) * 4096.0; - -- if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1))) -- errors++; -+ if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1))) -+ errors++; - -- fixed1 = c1_fixed(phase_offset_int); -- real1 = C1(phase_offset_real) * 4096.0; -+ fixed1 = c1_fixed(phase_offset_int); -+ real1 = C1(phase_offset_real) * 4096.0; - -- if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1))) -- errors++; -+ if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1))) -+ errors++; - -- fixed2 = c2_fixed(phase_offset_int); -- real2 = C2(phase_offset_real) * 4096.0; -+ fixed2 = c2_fixed(phase_offset_int); -+ real2 = C2(phase_offset_real) * 4096.0; - -- if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1))) -- errors++; -+ if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1))) -+ errors++; - -- fixed3 = c3_fixed(phase_offset_int); -- real3 = C3(phase_offset_real) * 4096.0; -+ fixed3 = c3_fixed(phase_offset_int); -+ real3 = C3(phase_offset_real) * 4096.0; - -- if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1))) -- errors++; -+ if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1))) -+ errors++; - -- fixed_total = fixed0 + fixed1 + fixed2 + fixed3; -- real_total = real0 + real1 + real2 + real3; -+ fixed_total = fixed0 + fixed1 + fixed2 + fixed3; -+ real_total = real0 + real1 + real2 + real3; - -- if ((fixed_total > 4097) || (fixed_total < 4094)) -- errors ++; -+ if ((fixed_total > 4097) || (fixed_total < 4094)) -+ errors++; - -- if ((real_total > 4097) || (real_total < 4095)) -- errors ++; -- } -+ if ((real_total > 4097) || (real_total < 4095)) -+ errors++; -+ } - -- return errors; -+ return errors; - } - #endif - - // Find greatest common denominator between two integers. Method used here is - // slow compared to Euclid's algorithm, but does not require any division. --int gcd(int a, int b) --{ -- // Problem with this algorithm is that if a or b = 0 this function -- // will never exit. Don't want to return 0 because any computation -- // that was based on a common denoninator and tried to reduce by -- // dividing by 0 would fail. Best solution that could be thought of -- // would to be fail by returing a 1; -- if (a <= 0 || b <= 0) -- return 1; -- -- while (a != b) -- { -- if (b > a) -- b = b - a; -- else -- { -- int tmp = a;//swap large and -- a = b; //small -- b = tmp; -- } -+int gcd(int a, int b) { -+ // Problem with this algorithm is that if a or b = 0 this function -+ // will never exit. Don't want to return 0 because any computation -+ // that was based on a common denoninator and tried to reduce by -+ // dividing by 0 would fail. Best solution that could be thought of -+ // would to be fail by returing a 1; -+ if (a <= 0 || b <= 0) -+ return 1; -+ -+ while (a != b) { -+ if (b > a) -+ b = b - a; -+ else { -+ int tmp = a;// swap large and -+ a = b; // small -+ b = tmp; - } -+ } - -- return b; -+ return b; - } - --void bicubic_coefficient_init() --{ -- vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); -- g_first_time = 0; -+void bicubic_coefficient_init() { -+ vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); -+ g_first_time = 0; - } - --void bicubic_coefficient_destroy() --{ -- if (!g_first_time) -- { -- vpx_free(g_b_scaler.l_w); -+void bicubic_coefficient_destroy() { -+ if (!g_first_time) { -+ vpx_free(g_b_scaler.l_w); - -- vpx_free(g_b_scaler.l_h); -+ vpx_free(g_b_scaler.l_h); - -- vpx_free(g_b_scaler.l_h_uv); -+ vpx_free(g_b_scaler.l_h_uv); - -- vpx_free(g_b_scaler.c_w); -+ vpx_free(g_b_scaler.c_w); - -- vpx_free(g_b_scaler.c_h); -+ vpx_free(g_b_scaler.c_h); - -- vpx_free(g_b_scaler.c_h_uv); -+ vpx_free(g_b_scaler.c_h_uv); - -- vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); -- } -+ vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); -+ } - } - - // Create the coeffients that will be used for the cubic interpolation. -@@ -292,311 +276,294 @@ void bicubic_coefficient_destroy() - // regimes the phase offsets will be different. There are 4 coefficents - // for each point, two on each side. The layout is that there are the - // 4 coefficents for each phase in the array and then the next phase. --int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) --{ -- int i; -+int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) { -+ int i; - #ifdef FIXED_POINT -- int phase_offset_int; -- unsigned int fixed_mult; -- int product_val = 0; -+ int phase_offset_int; -+ unsigned int fixed_mult; -+ int product_val = 0; - #else -- float phase_offset; -+ float phase_offset; - #endif -- int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv; -+ int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv; - -- if (g_first_time) -- bicubic_coefficient_init(); -+ if (g_first_time) -+ bicubic_coefficient_init(); - - -- // check to see if the coefficents have already been set up correctly -- if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height) -- && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height)) -- return 0; -+ // check to see if the coefficents have already been set up correctly -+ if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height) -+ && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height)) -+ return 0; - -- g_b_scaler.in_width = in_width; -- g_b_scaler.in_height = in_height; -- g_b_scaler.out_width = out_width; -- g_b_scaler.out_height = out_height; -+ g_b_scaler.in_width = in_width; -+ g_b_scaler.in_height = in_height; -+ g_b_scaler.out_width = out_width; -+ g_b_scaler.out_height = out_height; - -- // Don't want to allow crazy scaling, just try and prevent a catastrophic -- // failure here. Want to fail after setting the member functions so if -- // if the scaler is called the member functions will not scale. -- if (out_width <= 0 || out_height <= 0) -- return -1; -+ // Don't want to allow crazy scaling, just try and prevent a catastrophic -+ // failure here. Want to fail after setting the member functions so if -+ // if the scaler is called the member functions will not scale. -+ if (out_width <= 0 || out_height <= 0) -+ return -1; - -- // reduce in/out width and height ratios using the gcd -- gcd_w = gcd(out_width, in_width); -- gcd_h = gcd(out_height, in_height); -- gcd_h_uv = gcd(out_height, in_height / 2); -+ // reduce in/out width and height ratios using the gcd -+ gcd_w = gcd(out_width, in_width); -+ gcd_h = gcd(out_height, in_height); -+ gcd_h_uv = gcd(out_height, in_height / 2); - -- // the numerator width and height are to be saved in -- // globals so they can be used during the scaling process -- // without having to be recalculated. -- g_b_scaler.nw = out_width / gcd_w; -- d_w = in_width / gcd_w; -+ // the numerator width and height are to be saved in -+ // globals so they can be used during the scaling process -+ // without having to be recalculated. -+ g_b_scaler.nw = out_width / gcd_w; -+ d_w = in_width / gcd_w; - -- g_b_scaler.nh = out_height / gcd_h; -- d_h = in_height / gcd_h; -+ g_b_scaler.nh = out_height / gcd_h; -+ d_h = in_height / gcd_h; - -- g_b_scaler.nh_uv = out_height / gcd_h_uv; -- d_h_uv = (in_height / 2) / gcd_h_uv; -+ g_b_scaler.nh_uv = out_height / gcd_h_uv; -+ d_h_uv = (in_height / 2) / gcd_h_uv; - -- // allocate memory for the coefficents -- vpx_free(g_b_scaler.l_w); -+ // allocate memory for the coefficents -+ vpx_free(g_b_scaler.l_w); - -- vpx_free(g_b_scaler.l_h); -+ vpx_free(g_b_scaler.l_h); - -- vpx_free(g_b_scaler.l_h_uv); -+ vpx_free(g_b_scaler.l_h_uv); - -- g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2); -- g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2); -- g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2); -+ g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2); -+ g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2); -+ g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2); - -- vpx_free(g_b_scaler.c_w); -+ vpx_free(g_b_scaler.c_w); - -- vpx_free(g_b_scaler.c_h); -+ vpx_free(g_b_scaler.c_h); - -- vpx_free(g_b_scaler.c_h_uv); -+ vpx_free(g_b_scaler.c_h_uv); - -- g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2); -- g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2); -- g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2); -+ g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2); -+ g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2); -+ g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2); - -- g_b_scaler.hbuf = g_hbuf; -- g_b_scaler.hbuf_uv = g_hbuf_uv; -+ g_b_scaler.hbuf = g_hbuf; -+ g_b_scaler.hbuf_uv = g_hbuf_uv; - -- // Set up polyphase filter taps. This needs to be done before -- // the scaling because of the floating point math required. The -- // coefficients are multiplied by 2^12 so that fixed point math -- // can be used in the main scaling loop. -+ // Set up polyphase filter taps. This needs to be done before -+ // the scaling because of the floating point math required. The -+ // coefficients are multiplied by 2^12 so that fixed point math -+ // can be used in the main scaling loop. - #ifdef FIXED_POINT -- fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296; -+ fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296; - -- product_val = 0; -+ product_val = 0; - -- for (i = 0; i < g_b_scaler.nw; i++) -- { -- if (product_val > g_b_scaler.nw) -- product_val -= g_b_scaler.nw; -+ for (i = 0; i < g_b_scaler.nw; i++) { -+ if (product_val > g_b_scaler.nw) -+ product_val -= g_b_scaler.nw; - -- phase_offset_int = (fixed_mult * product_val) >> 16; -+ phase_offset_int = (fixed_mult * product_val) >> 16; - -- g_b_scaler.c_w[i*4] = c3_fixed(phase_offset_int); -- g_b_scaler.c_w[i*4+1] = c2_fixed(phase_offset_int); -- g_b_scaler.c_w[i*4+2] = c1_fixed(phase_offset_int); -- g_b_scaler.c_w[i*4+3] = c0_fixed(phase_offset_int); -+ g_b_scaler.c_w[i * 4] = c3_fixed(phase_offset_int); -+ g_b_scaler.c_w[i * 4 + 1] = c2_fixed(phase_offset_int); -+ g_b_scaler.c_w[i * 4 + 2] = c1_fixed(phase_offset_int); -+ g_b_scaler.c_w[i * 4 + 3] = c0_fixed(phase_offset_int); - -- product_val += d_w; -- } -+ product_val += d_w; -+ } - - -- fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296; -+ fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296; - -- product_val = 0; -+ product_val = 0; - -- for (i = 0; i < g_b_scaler.nh; i++) -- { -- if (product_val > g_b_scaler.nh) -- product_val -= g_b_scaler.nh; -+ for (i = 0; i < g_b_scaler.nh; i++) { -+ if (product_val > g_b_scaler.nh) -+ product_val -= g_b_scaler.nh; - -- phase_offset_int = (fixed_mult * product_val) >> 16; -+ phase_offset_int = (fixed_mult * product_val) >> 16; - -- g_b_scaler.c_h[i*4] = c0_fixed(phase_offset_int); -- g_b_scaler.c_h[i*4+1] = c1_fixed(phase_offset_int); -- g_b_scaler.c_h[i*4+2] = c2_fixed(phase_offset_int); -- g_b_scaler.c_h[i*4+3] = c3_fixed(phase_offset_int); -+ g_b_scaler.c_h[i * 4] = c0_fixed(phase_offset_int); -+ g_b_scaler.c_h[i * 4 + 1] = c1_fixed(phase_offset_int); -+ g_b_scaler.c_h[i * 4 + 2] = c2_fixed(phase_offset_int); -+ g_b_scaler.c_h[i * 4 + 3] = c3_fixed(phase_offset_int); - -- product_val += d_h; -- } -+ product_val += d_h; -+ } - -- fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296; -+ fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296; - -- product_val = 0; -+ product_val = 0; - -- for (i = 0; i < g_b_scaler.nh_uv; i++) -- { -- if (product_val > g_b_scaler.nh_uv) -- product_val -= g_b_scaler.nh_uv; -+ for (i = 0; i < g_b_scaler.nh_uv; i++) { -+ if (product_val > g_b_scaler.nh_uv) -+ product_val -= g_b_scaler.nh_uv; - -- phase_offset_int = (fixed_mult * product_val) >> 16; -+ phase_offset_int = (fixed_mult * product_val) >> 16; - -- g_b_scaler.c_h_uv[i*4] = c0_fixed(phase_offset_int); -- g_b_scaler.c_h_uv[i*4+1] = c1_fixed(phase_offset_int); -- g_b_scaler.c_h_uv[i*4+2] = c2_fixed(phase_offset_int); -- g_b_scaler.c_h_uv[i*4+3] = c3_fixed(phase_offset_int); -+ g_b_scaler.c_h_uv[i * 4] = c0_fixed(phase_offset_int); -+ g_b_scaler.c_h_uv[i * 4 + 1] = c1_fixed(phase_offset_int); -+ g_b_scaler.c_h_uv[i * 4 + 2] = c2_fixed(phase_offset_int); -+ g_b_scaler.c_h_uv[i * 4 + 3] = c3_fixed(phase_offset_int); - -- product_val += d_h_uv; -- } -+ product_val += d_h_uv; -+ } - - #else - -- for (i = 0; i < g_nw; i++) -- { -- phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw; -- g_c_w[i*4] = (C3(phase_offset) * 4096.0); -- g_c_w[i*4+1] = (C2(phase_offset) * 4096.0); -- g_c_w[i*4+2] = (C1(phase_offset) * 4096.0); -- g_c_w[i*4+3] = (C0(phase_offset) * 4096.0); -- } -- -- for (i = 0; i < g_nh; i++) -- { -- phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh; -- g_c_h[i*4] = (C0(phase_offset) * 4096.0); -- g_c_h[i*4+1] = (C1(phase_offset) * 4096.0); -- g_c_h[i*4+2] = (C2(phase_offset) * 4096.0); -- g_c_h[i*4+3] = (C3(phase_offset) * 4096.0); -- } -- -- for (i = 0; i < g_nh_uv; i++) -- { -- phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv; -- g_c_h_uv[i*4] = (C0(phase_offset) * 4096.0); -- g_c_h_uv[i*4+1] = (C1(phase_offset) * 4096.0); -- g_c_h_uv[i*4+2] = (C2(phase_offset) * 4096.0); -- g_c_h_uv[i*4+3] = (C3(phase_offset) * 4096.0); -- } -+ for (i = 0; i < g_nw; i++) { -+ phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw; -+ g_c_w[i * 4] = (C3(phase_offset) * 4096.0); -+ g_c_w[i * 4 + 1] = (C2(phase_offset) * 4096.0); -+ g_c_w[i * 4 + 2] = (C1(phase_offset) * 4096.0); -+ g_c_w[i * 4 + 3] = (C0(phase_offset) * 4096.0); -+ } -+ -+ for (i = 0; i < g_nh; i++) { -+ phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh; -+ g_c_h[i * 4] = (C0(phase_offset) * 4096.0); -+ g_c_h[i * 4 + 1] = (C1(phase_offset) * 4096.0); -+ g_c_h[i * 4 + 2] = (C2(phase_offset) * 4096.0); -+ g_c_h[i * 4 + 3] = (C3(phase_offset) * 4096.0); -+ } -+ -+ for (i = 0; i < g_nh_uv; i++) { -+ phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv; -+ g_c_h_uv[i * 4] = (C0(phase_offset) * 4096.0); -+ g_c_h_uv[i * 4 + 1] = (C1(phase_offset) * 4096.0); -+ g_c_h_uv[i * 4 + 2] = (C2(phase_offset) * 4096.0); -+ g_c_h_uv[i * 4 + 3] = (C3(phase_offset) * 4096.0); -+ } - - #endif - -- // Create an array that corresponds input lines to output lines. -- // This doesn't require floating point math, but it does require -- // a division and because hardware division is not present that -- // is a call. -- for (i = 0; i < out_width; i++) -- { -- g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw; -+ // Create an array that corresponds input lines to output lines. -+ // This doesn't require floating point math, but it does require -+ // a division and because hardware division is not present that -+ // is a call. -+ for (i = 0; i < out_width; i++) { -+ g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw; - -- if ((g_b_scaler.l_w[i] + 2) <= in_width) -- g_b_scaler.max_usable_out_width = i; -+ if ((g_b_scaler.l_w[i] + 2) <= in_width) -+ g_b_scaler.max_usable_out_width = i; - -- } -+ } - -- for (i = 0; i < out_height + 1; i++) -- { -- g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh; -- g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv; -- } -+ for (i = 0; i < out_height + 1; i++) { -+ g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh; -+ g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv; -+ } - -- return 0; -+ return 0; - } - - int bicubic_scale(int in_width, int in_height, int in_stride, - int out_width, int out_height, int out_stride, -- unsigned char *input_image, unsigned char *output_image) --{ -- short *RESTRICT l_w, * RESTRICT l_h; -- short *RESTRICT c_w, * RESTRICT c_h; -- unsigned char *RESTRICT ip, * RESTRICT op; -- unsigned char *RESTRICT hbuf; -- int h, w, lw, lh; -- int temp_sum; -- int phase_offset_w, phase_offset_h; -- -- c_w = g_b_scaler.c_w; -- c_h = g_b_scaler.c_h; -- -- op = output_image; -- -- l_w = g_b_scaler.l_w; -- l_h = g_b_scaler.l_h; -- -- phase_offset_h = 0; -- -- for (h = 0; h < out_height; h++) -- { -- // select the row to work on -- lh = l_h[h]; -- ip = input_image + (in_stride * lh); -- -- // vp8_filter the row vertically into an temporary buffer. -- // If the phase offset == 0 then all the multiplication -- // is going to result in the output equalling the input. -- // So instead point the temporary buffer to the input. -- // Also handle the boundry condition of not being able to -- // filter that last lines. -- if (phase_offset_h && (lh < in_height - 2)) -- { -- hbuf = g_b_scaler.hbuf; -- -- for (w = 0; w < in_width; w++) -- { -- temp_sum = c_h[phase_offset_h*4+3] * ip[w - in_stride]; -- temp_sum += c_h[phase_offset_h*4+2] * ip[w]; -- temp_sum += c_h[phase_offset_h*4+1] * ip[w + in_stride]; -- temp_sum += c_h[phase_offset_h*4] * ip[w + 2*in_stride]; -- -- hbuf[w] = temp_sum >> 12; -- } -- } -- else -- hbuf = ip; -- -- // increase the phase offset for the next time around. -- if (++phase_offset_h >= g_b_scaler.nh) -- phase_offset_h = 0; -- -- // now filter and expand it horizontally into the final -- // output buffer -+ unsigned char *input_image, unsigned char *output_image) { -+ short *RESTRICT l_w, * RESTRICT l_h; -+ short *RESTRICT c_w, * RESTRICT c_h; -+ unsigned char *RESTRICT ip, * RESTRICT op; -+ unsigned char *RESTRICT hbuf; -+ int h, w, lw, lh; -+ int temp_sum; -+ int phase_offset_w, phase_offset_h; -+ -+ c_w = g_b_scaler.c_w; -+ c_h = g_b_scaler.c_h; -+ -+ op = output_image; -+ -+ l_w = g_b_scaler.l_w; -+ l_h = g_b_scaler.l_h; -+ -+ phase_offset_h = 0; -+ -+ for (h = 0; h < out_height; h++) { -+ // select the row to work on -+ lh = l_h[h]; -+ ip = input_image + (in_stride * lh); -+ -+ // vp8_filter the row vertically into an temporary buffer. -+ // If the phase offset == 0 then all the multiplication -+ // is going to result in the output equalling the input. -+ // So instead point the temporary buffer to the input. -+ // Also handle the boundry condition of not being able to -+ // filter that last lines. -+ if (phase_offset_h && (lh < in_height - 2)) { -+ hbuf = g_b_scaler.hbuf; -+ -+ for (w = 0; w < in_width; w++) { -+ temp_sum = c_h[phase_offset_h * 4 + 3] * ip[w - in_stride]; -+ temp_sum += c_h[phase_offset_h * 4 + 2] * ip[w]; -+ temp_sum += c_h[phase_offset_h * 4 + 1] * ip[w + in_stride]; -+ temp_sum += c_h[phase_offset_h * 4] * ip[w + 2 * in_stride]; -+ -+ hbuf[w] = temp_sum >> 12; -+ } -+ } else -+ hbuf = ip; -+ -+ // increase the phase offset for the next time around. -+ if (++phase_offset_h >= g_b_scaler.nh) -+ phase_offset_h = 0; -+ -+ // now filter and expand it horizontally into the final -+ // output buffer -+ phase_offset_w = 0; -+ -+ for (w = 0; w < out_width; w++) { -+ // get the index to use to expand the image -+ lw = l_w[w]; -+ -+ temp_sum = c_w[phase_offset_w * 4] * hbuf[lw - 1]; -+ temp_sum += c_w[phase_offset_w * 4 + 1] * hbuf[lw]; -+ temp_sum += c_w[phase_offset_w * 4 + 2] * hbuf[lw + 1]; -+ temp_sum += c_w[phase_offset_w * 4 + 3] * hbuf[lw + 2]; -+ temp_sum = temp_sum >> 12; -+ -+ if (++phase_offset_w >= g_b_scaler.nw) - phase_offset_w = 0; - -- for (w = 0; w < out_width; w++) -- { -- // get the index to use to expand the image -- lw = l_w[w]; -- -- temp_sum = c_w[phase_offset_w*4] * hbuf[lw - 1]; -- temp_sum += c_w[phase_offset_w*4+1] * hbuf[lw]; -- temp_sum += c_w[phase_offset_w*4+2] * hbuf[lw + 1]; -- temp_sum += c_w[phase_offset_w*4+3] * hbuf[lw + 2]; -- temp_sum = temp_sum >> 12; -+ // boundry conditions -+ if ((lw + 2) >= in_width) -+ temp_sum = hbuf[lw]; - -- if (++phase_offset_w >= g_b_scaler.nw) -- phase_offset_w = 0; -+ if (lw == 0) -+ temp_sum = hbuf[0]; - -- // boundry conditions -- if ((lw + 2) >= in_width) -- temp_sum = hbuf[lw]; -- -- if (lw == 0) -- temp_sum = hbuf[0]; -- -- op[w] = temp_sum; -- } -- -- op += out_stride; -+ op[w] = temp_sum; - } - -- return 0; -+ op += out_stride; -+ } -+ -+ return 0; - } - --void bicubic_scale_frame_reset() --{ -- g_b_scaler.out_width = 0; -- g_b_scaler.out_height = 0; -+void bicubic_scale_frame_reset() { -+ g_b_scaler.out_width = 0; -+ g_b_scaler.out_height = 0; - } - - void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, -- int new_width, int new_height) --{ -+ int new_width, int new_height) { - -- dst->y_width = new_width; -- dst->y_height = new_height; -- dst->uv_width = new_width / 2; -- dst->uv_height = new_height / 2; -+ dst->y_width = new_width; -+ dst->y_height = new_height; -+ dst->uv_width = new_width / 2; -+ dst->uv_height = new_height / 2; - -- dst->y_stride = dst->y_width; -- dst->uv_stride = dst->uv_width; -+ dst->y_stride = dst->y_width; -+ dst->uv_stride = dst->uv_width; - -- bicubic_scale(src->y_width, src->y_height, src->y_stride, -- new_width, new_height, dst->y_stride, -- src->y_buffer, dst->y_buffer); -+ bicubic_scale(src->y_width, src->y_height, src->y_stride, -+ new_width, new_height, dst->y_stride, -+ src->y_buffer, dst->y_buffer); - -- bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, -- new_width / 2, new_height / 2, dst->uv_stride, -- src->u_buffer, dst->u_buffer); -+ bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, -+ new_width / 2, new_height / 2, dst->uv_stride, -+ src->u_buffer, dst->u_buffer); - -- bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, -- new_width / 2, new_height / 2, dst->uv_stride, -- src->v_buffer, dst->v_buffer); -+ bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, -+ new_width / 2, new_height / 2, dst->uv_stride, -+ src->v_buffer, dst->v_buffer); - } -diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c -index 9beb162..60c21fb 100644 ---- a/vpx_scale/generic/gen_scalers.c -+++ b/vpx_scale/generic/gen_scalers.c -@@ -34,47 +34,42 @@ - * SPECIAL NOTES : None. - * - ****************************************************************************/ --void vp8_horizontal_line_4_5_scale_c --( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- unsigned i; -- unsigned int a, b, c; -- unsigned char *des = dest; -- const unsigned char *src = source; -- -- (void) dest_width; -- -- for (i = 0; i < source_width - 4; i += 4) -- { -- a = src[0]; -- b = src[1]; -- des [0] = (unsigned char) a; -- des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); -- c = src[2] * 154; -- a = src[3]; -- des [2] = (unsigned char)((b * 102 + c + 128) >> 8); -- des [3] = (unsigned char)((c + 102 * a + 128) >> 8); -- b = src[4]; -- des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8); -- -- src += 4; -- des += 5; -- } -- -+void vp8_horizontal_line_4_5_scale_c(const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width) { -+ unsigned i; -+ unsigned int a, b, c; -+ unsigned char *des = dest; -+ const unsigned char *src = source; -+ -+ (void) dest_width; -+ -+ for (i = 0; i < source_width - 4; i += 4) { - a = src[0]; - b = src[1]; -- des [0] = (unsigned char)(a); -+ des [0] = (unsigned char) a; - des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - c = src[2] * 154; - a = src[3]; - des [2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [3] = (unsigned char)((c + 102 * a + 128) >> 8); -- des [4] = (unsigned char)(a); -+ b = src[4]; -+ des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8); -+ -+ src += 4; -+ des += 5; -+ } -+ -+ a = src[0]; -+ b = src[1]; -+ des [0] = (unsigned char)(a); -+ des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); -+ c = src[2] * 154; -+ a = src[3]; -+ des [2] = (unsigned char)((b * 102 + c + 128) >> 8); -+ des [3] = (unsigned char)((c + 102 * a + 128) >> 8); -+ des [4] = (unsigned char)(a); - - } - -@@ -97,31 +92,31 @@ void vp8_horizontal_line_4_5_scale_c - * the current band. - * - ****************************************************************************/ --void vp8_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- unsigned int i; -- unsigned int a, b, c, d; -- unsigned char *des = dest; -+void vp8_vertical_band_4_5_scale_c(unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b, c, d; -+ unsigned char *des = dest; - -- for (i = 0; i < dest_width; i++) -- { -- a = des [0]; -- b = des [dest_pitch]; -+ for (i = 0; i < dest_width; i++) { -+ a = des [0]; -+ b = des [dest_pitch]; - -- des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); -+ des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - -- c = des[dest_pitch*2] * 154; -- d = des[dest_pitch*3]; -+ c = des[dest_pitch * 2] * 154; -+ d = des[dest_pitch * 3]; - -- des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8); -- des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8); -+ des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8); -+ des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8); - -- /* First line in next band */ -- a = des [dest_pitch * 5]; -- des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8); -+ /* First line in next band */ -+ a = des [dest_pitch * 5]; -+ des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8); - -- des ++; -- } -+ des++; -+ } - } - - /**************************************************************************** -@@ -144,30 +139,30 @@ void vp8_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, - * last band. - * - ****************************************************************************/ --void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- unsigned int i; -- unsigned int a, b, c, d; -- unsigned char *des = dest; -+void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b, c, d; -+ unsigned char *des = dest; - -- for (i = 0; i < dest_width; ++i) -- { -- a = des[0]; -- b = des[dest_pitch]; -+ for (i = 0; i < dest_width; ++i) { -+ a = des[0]; -+ b = des[dest_pitch]; - -- des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); -+ des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - -- c = des[dest_pitch*2] * 154; -- d = des[dest_pitch*3]; -+ c = des[dest_pitch * 2] * 154; -+ d = des[dest_pitch * 3]; - -- des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8); -- des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8); -+ des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8); -+ des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8); - -- /* No other line for interplation of this line, so .. */ -- des[dest_pitch*4] = (unsigned char) d; -+ /* No other line for interplation of this line, so .. */ -+ des[dest_pitch * 4] = (unsigned char) d; - -- des++; -- } -+ des++; -+ } - } - - /**************************************************************************** -@@ -190,40 +185,35 @@ void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_p - * - * - ****************************************************************************/ --void vp8_horizontal_line_2_3_scale_c --( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- unsigned int i; -- unsigned int a, b, c; -- unsigned char *des = dest; -- const unsigned char *src = source; -- -- (void) dest_width; -- -- for (i = 0; i < source_width - 2; i += 2) -- { -- a = src[0]; -- b = src[1]; -- c = src[2]; -- -- des [0] = (unsigned char)(a); -- des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); -- des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); -- -- src += 2; -- des += 3; -- } -- -+void vp8_horizontal_line_2_3_scale_c(const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b, c; -+ unsigned char *des = dest; -+ const unsigned char *src = source; -+ -+ (void) dest_width; -+ -+ for (i = 0; i < source_width - 2; i += 2) { - a = src[0]; - b = src[1]; -+ c = src[2]; -+ - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); -- des [2] = (unsigned char)(b); -+ des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); -+ -+ src += 2; -+ des += 3; -+ } -+ -+ a = src[0]; -+ b = src[1]; -+ des [0] = (unsigned char)(a); -+ des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); -+ des [2] = (unsigned char)(b); - } - - -@@ -246,22 +236,22 @@ void vp8_horizontal_line_2_3_scale_c - * the current band. - * - ****************************************************************************/ --void vp8_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- unsigned int i; -- unsigned int a, b, c; -- unsigned char *des = dest; -- -- for (i = 0; i < dest_width; i++) -- { -- a = des [0]; -- b = des [dest_pitch]; -- c = des[dest_pitch*3]; -- des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); -- des [dest_pitch*2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); -- -- des++; -- } -+void vp8_vertical_band_2_3_scale_c(unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b, c; -+ unsigned char *des = dest; -+ -+ for (i = 0; i < dest_width; i++) { -+ a = des [0]; -+ b = des [dest_pitch]; -+ c = des[dest_pitch * 3]; -+ des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); -+ des [dest_pitch * 2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); -+ -+ des++; -+ } - } - - /**************************************************************************** -@@ -284,21 +274,21 @@ void vp8_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, - * last band. - * - ****************************************************************************/ --void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- unsigned int i; -- unsigned int a, b; -- unsigned char *des = dest; -- -- for (i = 0; i < dest_width; ++i) -- { -- a = des [0]; -- b = des [dest_pitch]; -- -- des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); -- des [dest_pitch*2] = (unsigned char)(b); -- des++; -- } -+void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b; -+ unsigned char *des = dest; -+ -+ for (i = 0; i < dest_width; ++i) { -+ a = des [0]; -+ b = des [dest_pitch]; -+ -+ des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); -+ des [dest_pitch * 2] = (unsigned char)(b); -+ des++; -+ } - } - - /**************************************************************************** -@@ -321,49 +311,44 @@ void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_p - * - * - ****************************************************************************/ --void vp8_horizontal_line_3_5_scale_c --( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- unsigned int i; -- unsigned int a, b, c; -- unsigned char *des = dest; -- const unsigned char *src = source; -- -- (void) dest_width; -- -- for (i = 0; i < source_width - 3; i += 3) -- { -- a = src[0]; -- b = src[1]; -- des [0] = (unsigned char)(a); -- des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); -- -- c = src[2] ; -- des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); -- des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); -- -- a = src[3]; -- des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); -- -- src += 3; -- des += 5; -- } -- -+void vp8_horizontal_line_3_5_scale_c(const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b, c; -+ unsigned char *des = dest; -+ const unsigned char *src = source; -+ -+ (void) dest_width; -+ -+ for (i = 0; i < source_width - 3; i += 3) { - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); -- - des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); -- c = src[2] ; -+ -+ c = src[2]; - des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - -- des [4] = (unsigned char)(c); -+ a = src[3]; -+ des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); -+ -+ src += 3; -+ des += 5; -+ } -+ -+ a = src[0]; -+ b = src[1]; -+ des [0] = (unsigned char)(a); -+ -+ des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); -+ c = src[2]; -+ des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); -+ des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); -+ -+ des [4] = (unsigned char)(c); - } - - /**************************************************************************** -@@ -385,28 +370,28 @@ void vp8_horizontal_line_3_5_scale_c - * the current band. - * - ****************************************************************************/ --void vp8_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- unsigned int i; -- unsigned int a, b, c; -- unsigned char *des = dest; -- -- for (i = 0; i < dest_width; i++) -- { -- a = des [0]; -- b = des [dest_pitch]; -- des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); -- -- c = des[dest_pitch*2]; -- des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); -- des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); -- -- /* First line in next band... */ -- a = des [dest_pitch * 5]; -- des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); -- -- des++; -- } -+void vp8_vertical_band_3_5_scale_c(unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b, c; -+ unsigned char *des = dest; -+ -+ for (i = 0; i < dest_width; i++) { -+ a = des [0]; -+ b = des [dest_pitch]; -+ des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); -+ -+ c = des[dest_pitch * 2]; -+ des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); -+ des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); -+ -+ /* First line in next band... */ -+ a = des [dest_pitch * 5]; -+ des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); -+ -+ des++; -+ } - } - - /**************************************************************************** -@@ -429,28 +414,28 @@ void vp8_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, - * last band. - * - ****************************************************************************/ --void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- unsigned int i; -- unsigned int a, b, c; -- unsigned char *des = dest; -+void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b, c; -+ unsigned char *des = dest; - -- for (i = 0; i < dest_width; ++i) -- { -- a = des [0]; -- b = des [dest_pitch]; -+ for (i = 0; i < dest_width; ++i) { -+ a = des [0]; -+ b = des [dest_pitch]; - -- des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); -+ des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - -- c = des[dest_pitch*2]; -- des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); -- des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); -+ c = des[dest_pitch * 2]; -+ des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); -+ des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - -- /* No other line for interplation of this line, so .. */ -- des [ dest_pitch * 4 ] = (unsigned char)(c) ; -+ /* No other line for interplation of this line, so .. */ -+ des [ dest_pitch * 4 ] = (unsigned char)(c); - -- des++; -- } -+ des++; -+ } - } - - /**************************************************************************** -@@ -473,46 +458,41 @@ void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_p - * - * - ****************************************************************************/ --void vp8_horizontal_line_3_4_scale_c --( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- unsigned int i; -- unsigned int a, b, c; -- unsigned char *des = dest; -- const unsigned char *src = source; -- -- (void) dest_width; -- -- for (i = 0; i < source_width - 3; i += 3) -- { -- a = src[0]; -- b = src[1]; -- des [0] = (unsigned char)(a); -- des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); -- -- c = src[2]; -- des [2] = (unsigned char)((b + c + 1) >> 1); -- -- a = src[3]; -- des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); -- -- src += 3; -- des += 4; -- } -- -+void vp8_horizontal_line_3_4_scale_c(const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b, c; -+ unsigned char *des = dest; -+ const unsigned char *src = source; -+ -+ (void) dest_width; -+ -+ for (i = 0; i < source_width - 3; i += 3) { - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - -- c = src[2] ; -+ c = src[2]; - des [2] = (unsigned char)((b + c + 1) >> 1); -- des [3] = (unsigned char)(c); -+ -+ a = src[3]; -+ des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); -+ -+ src += 3; -+ des += 4; -+ } -+ -+ a = src[0]; -+ b = src[1]; -+ des [0] = (unsigned char)(a); -+ des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); -+ -+ c = src[2]; -+ des [2] = (unsigned char)((b + c + 1) >> 1); -+ des [3] = (unsigned char)(c); - } - - /**************************************************************************** -@@ -534,27 +514,27 @@ void vp8_horizontal_line_3_4_scale_c - * the current band. - * - ****************************************************************************/ --void vp8_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- unsigned int i; -- unsigned int a, b, c; -- unsigned char *des = dest; -- -- for (i = 0; i < dest_width; i++) -- { -- a = des [0]; -- b = des [dest_pitch]; -- des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); -- -- c = des[dest_pitch*2]; -- des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1); -- -- /* First line in next band... */ -- a = des [dest_pitch*4]; -- des [dest_pitch*3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); -- -- des++; -- } -+void vp8_vertical_band_3_4_scale_c(unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b, c; -+ unsigned char *des = dest; -+ -+ for (i = 0; i < dest_width; i++) { -+ a = des [0]; -+ b = des [dest_pitch]; -+ des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); -+ -+ c = des[dest_pitch * 2]; -+ des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1); -+ -+ /* First line in next band... */ -+ a = des [dest_pitch * 4]; -+ des [dest_pitch * 3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); -+ -+ des++; -+ } - } - - /**************************************************************************** -@@ -577,27 +557,27 @@ void vp8_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, - * last band. - * - ****************************************************************************/ --void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- unsigned int i; -- unsigned int a, b, c; -- unsigned char *des = dest; -+void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b, c; -+ unsigned char *des = dest; - -- for (i = 0; i < dest_width; ++i) -- { -- a = des [0]; -- b = des [dest_pitch]; -+ for (i = 0; i < dest_width; ++i) { -+ a = des [0]; -+ b = des [dest_pitch]; - -- des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); -+ des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - -- c = des[dest_pitch*2]; -- des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1); -+ c = des[dest_pitch * 2]; -+ des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1); - -- /* No other line for interplation of this line, so .. */ -- des [dest_pitch*3] = (unsigned char)(c); -+ /* No other line for interplation of this line, so .. */ -+ des [dest_pitch * 3] = (unsigned char)(c); - -- des++; -- } -+ des++; -+ } - } - - /**************************************************************************** -@@ -619,34 +599,29 @@ void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_p - * SPECIAL NOTES : None. - * - ****************************************************************************/ --void vp8_horizontal_line_1_2_scale_c --( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- unsigned int i; -- unsigned int a, b; -- unsigned char *des = dest; -- const unsigned char *src = source; -- -- (void) dest_width; -- -- for (i = 0; i < source_width - 1; i += 1) -- { -- a = src[0]; -- b = src[1]; -- des [0] = (unsigned char)(a); -- des [1] = (unsigned char)((a + b + 1) >> 1); -- src += 1; -- des += 2; -- } -- -+void vp8_horizontal_line_1_2_scale_c(const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b; -+ unsigned char *des = dest; -+ const unsigned char *src = source; -+ -+ (void) dest_width; -+ -+ for (i = 0; i < source_width - 1; i += 1) { - a = src[0]; -+ b = src[1]; - des [0] = (unsigned char)(a); -- des [1] = (unsigned char)(a); -+ des [1] = (unsigned char)((a + b + 1) >> 1); -+ src += 1; -+ des += 2; -+ } -+ -+ a = src[0]; -+ des [0] = (unsigned char)(a); -+ des [1] = (unsigned char)(a); - } - - /**************************************************************************** -@@ -668,21 +643,21 @@ void vp8_horizontal_line_1_2_scale_c - * the current band. - * - ****************************************************************************/ --void vp8_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- unsigned int i; -- unsigned int a, b; -- unsigned char *des = dest; -+void vp8_vertical_band_1_2_scale_c(unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b; -+ unsigned char *des = dest; - -- for (i = 0; i < dest_width; i++) -- { -- a = des [0]; -- b = des [dest_pitch * 2]; -+ for (i = 0; i < dest_width; i++) { -+ a = des [0]; -+ b = des [dest_pitch * 2]; - -- des[dest_pitch] = (unsigned char)((a + b + 1) >> 1); -+ des[dest_pitch] = (unsigned char)((a + b + 1) >> 1); - -- des++; -- } -+ des++; -+ } - } - - /**************************************************************************** -@@ -705,16 +680,16 @@ void vp8_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, - * last band. - * - ****************************************************************************/ --void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- unsigned int i; -- unsigned char *des = dest; -- -- for (i = 0; i < dest_width; ++i) -- { -- des[dest_pitch] = des[0]; -- des++; -- } -+void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned char *des = dest; -+ -+ for (i = 0; i < dest_width; ++i) { -+ des[dest_pitch] = des[0]; -+ des++; -+ } - } - - -@@ -740,67 +715,64 @@ void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_p - * SPECIAL NOTES : None. - * - ****************************************************************************/ --void vp8_horizontal_line_5_4_scale_c --( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- unsigned i; -- unsigned int a, b, c, d, e; -- unsigned char *des = dest; -- const unsigned char *src = source; -- -- (void) dest_width; -- -- for (i = 0; i < source_width; i += 5) -- { -- a = src[0]; -- b = src[1]; -- c = src[2]; -- d = src[3]; -- e = src[4]; -- -- des[0] = (unsigned char) a; -- des[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); -- des[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); -- des[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); -- -- src += 5; -- des += 4; -- } -+void vp8_horizontal_line_5_4_scale_c(const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width) { -+ unsigned i; -+ unsigned int a, b, c, d, e; -+ unsigned char *des = dest; -+ const unsigned char *src = source; -+ -+ (void) dest_width; -+ -+ for (i = 0; i < source_width; i += 5) { -+ a = src[0]; -+ b = src[1]; -+ c = src[2]; -+ d = src[3]; -+ e = src[4]; -+ -+ des[0] = (unsigned char) a; -+ des[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); -+ des[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); -+ des[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); -+ -+ src += 5; -+ des += 4; -+ } - } - - - - --void vp8_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- unsigned int i; -- unsigned int a, b, c, d, e; -- unsigned char *des = dest; -- unsigned char *src = source; -+void vp8_vertical_band_5_4_scale_c(unsigned char *source, -+ unsigned int src_pitch, -+ unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b, c, d, e; -+ unsigned char *des = dest; -+ unsigned char *src = source; - -- for (i = 0; i < dest_width; i++) -- { -+ for (i = 0; i < dest_width; i++) { - -- a = src[0 * src_pitch]; -- b = src[1 * src_pitch]; -- c = src[2 * src_pitch]; -- d = src[3 * src_pitch]; -- e = src[4 * src_pitch]; -+ a = src[0 * src_pitch]; -+ b = src[1 * src_pitch]; -+ c = src[2 * src_pitch]; -+ d = src[3 * src_pitch]; -+ e = src[4 * src_pitch]; - -- des[0 * dest_pitch] = (unsigned char) a; -- des[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); -- des[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); -- des[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); -+ des[0 * dest_pitch] = (unsigned char) a; -+ des[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); -+ des[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); -+ des[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); - -- src ++; -- des ++; -+ src++; -+ des++; - -- } -+ } - } - - -@@ -824,63 +796,60 @@ void vp8_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch - * - * - ****************************************************************************/ --void vp8_horizontal_line_5_3_scale_c --( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- unsigned int i; -- unsigned int a, b, c, d , e; -- unsigned char *des = dest; -- const unsigned char *src = source; -- -- (void) dest_width; -- -- for (i = 0; i < source_width; i += 5) -- { -- a = src[0]; -- b = src[1]; -- c = src[2]; -- d = src[3]; -- e = src[4]; -- -- des[0] = (unsigned char) a; -- des[1] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); -- des[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); -- -- src += 5; -- des += 3; -- } -+void vp8_horizontal_line_5_3_scale_c(const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b, c, d, e; -+ unsigned char *des = dest; -+ const unsigned char *src = source; -+ -+ (void) dest_width; -+ -+ for (i = 0; i < source_width; i += 5) { -+ a = src[0]; -+ b = src[1]; -+ c = src[2]; -+ d = src[3]; -+ e = src[4]; -+ -+ des[0] = (unsigned char) a; -+ des[1] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); -+ des[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); -+ -+ src += 5; -+ des += 3; -+ } - - } - --void vp8_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- unsigned int i; -- unsigned int a, b, c, d, e; -- unsigned char *des = dest; -- unsigned char *src = source; -+void vp8_vertical_band_5_3_scale_c(unsigned char *source, -+ unsigned int src_pitch, -+ unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a, b, c, d, e; -+ unsigned char *des = dest; -+ unsigned char *src = source; - -- for (i = 0; i < dest_width; i++) -- { -+ for (i = 0; i < dest_width; i++) { - -- a = src[0 * src_pitch]; -- b = src[1 * src_pitch]; -- c = src[2 * src_pitch]; -- d = src[3 * src_pitch]; -- e = src[4 * src_pitch]; -+ a = src[0 * src_pitch]; -+ b = src[1 * src_pitch]; -+ c = src[2 * src_pitch]; -+ d = src[3 * src_pitch]; -+ e = src[4 * src_pitch]; - -- des[0 * dest_pitch] = (unsigned char) a; -- des[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); -- des[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); -+ des[0 * dest_pitch] = (unsigned char) a; -+ des[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); -+ des[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); - -- src ++; -- des ++; -+ src++; -+ des++; - -- } -+ } - } - - /**************************************************************************** -@@ -902,55 +871,52 @@ void vp8_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch - * SPECIAL NOTES : None. - * - ****************************************************************************/ --void vp8_horizontal_line_2_1_scale_c --( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- unsigned int i; -- unsigned int a; -- unsigned char *des = dest; -- const unsigned char *src = source; -- -- (void) dest_width; -- -- for (i = 0; i < source_width; i += 2) -- { -- a = src[0]; -- des [0] = (unsigned char)(a); -- src += 2; -- des += 1; -- } -- -- -- --} --void vp8_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- (void) dest_pitch; -- (void) src_pitch; -- vpx_memcpy(dest, source, dest_width); -+void vp8_horizontal_line_2_1_scale_c(const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width) { -+ unsigned int i; -+ unsigned int a; -+ unsigned char *des = dest; -+ const unsigned char *src = source; -+ -+ (void) dest_width; -+ -+ for (i = 0; i < source_width; i += 2) { -+ a = src[0]; -+ des [0] = (unsigned char)(a); -+ src += 2; -+ des += 1; -+ } - } - --void vp8_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- int i; -- int temp; -- int width = dest_width; -- -- (void) dest_pitch; -- -- for (i = 0; i < width; i++) -- { -- temp = 8; -- temp += source[i-(int)src_pitch] * 3; -- temp += source[i] * 10; -- temp += source[i+src_pitch] * 3; -- temp >>= 4 ; -- dest[i] = (unsigned char)(temp); -- } -+void vp8_vertical_band_2_1_scale_c(unsigned char *source, -+ unsigned int src_pitch, -+ unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ (void) dest_pitch; -+ (void) src_pitch; -+ vpx_memcpy(dest, source, dest_width); -+} - -+void vp8_vertical_band_2_1_scale_i_c(unsigned char *source, -+ unsigned int src_pitch, -+ unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width) { -+ int i; -+ int temp; -+ int width = dest_width; -+ -+ (void) dest_pitch; -+ -+ for (i = 0; i < width; i++) { -+ temp = 8; -+ temp += source[i - (int)src_pitch] * 3; -+ temp += source[i] * 10; -+ temp += source[i + src_pitch] * 3; -+ temp >>= 4; -+ dest[i] = (unsigned char)(temp); -+ } - } -diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c -index c02e4ff..7de85ca 100644 ---- a/vpx_scale/generic/vpxscale.c -+++ b/vpx_scale/generic/vpxscale.c -@@ -20,23 +20,22 @@ - /**************************************************************************** - * Header Files - ****************************************************************************/ --#include "vpx_rtcd.h" -+#include "./vpx_rtcd.h" - #include "vpx_mem/vpx_mem.h" - #include "vpx_scale/yv12config.h" - #include "vpx_scale/scale_mode.h" - --typedef struct --{ -- int expanded_frame_width; -- int expanded_frame_height; -+typedef struct { -+ int expanded_frame_width; -+ int expanded_frame_height; - -- int HScale; -- int HRatio; -- int VScale; -- int VRatio; -+ int HScale; -+ int HRatio; -+ int VScale; -+ int VRatio; - -- YV12_BUFFER_CONFIG *src_yuv_config; -- YV12_BUFFER_CONFIG *dst_yuv_config; -+ YV12_BUFFER_CONFIG *src_yuv_config; -+ YV12_BUFFER_CONFIG *dst_yuv_config; - - } SCALE_VARS; - -@@ -60,15 +59,14 @@ typedef struct - ****************************************************************************/ - static - void horizontal_line_copy( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- (void) dest_width; -- -- duck_memcpy(dest, source, source_width); -+ const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width -+) { -+ (void) dest_width; -+ -+ duck_memcpy(dest, source, source_width); - } - /**************************************************************************** - * -@@ -90,16 +88,15 @@ void horizontal_line_copy( - ****************************************************************************/ - static - void null_scale( -- unsigned char *dest, -- unsigned int dest_pitch, -- unsigned int dest_width --) --{ -- (void) dest; -- (void) dest_pitch; -- (void) dest_width; -- -- return; -+ unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width -+) { -+ (void) dest; -+ (void) dest_pitch; -+ (void) dest_width; -+ -+ return; - } - - /**************************************************************************** -@@ -127,35 +124,33 @@ void null_scale( - static - void scale1d_2t1_i - ( -- const unsigned char *source, -- int source_step, -- unsigned int source_scale, -- unsigned int source_length, -- unsigned char *dest, -- int dest_step, -- unsigned int dest_scale, -- unsigned int dest_length --) --{ -- unsigned int i, j; -- unsigned int temp; -- int source_pitch = source_step; -- (void) source_length; -- (void) source_scale; -- (void) dest_scale; -- -- source_step *= 2; -- dest[0] = source[0]; -- -- for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step) -- { -- temp = 8; -- temp += 3 * source[j-source_pitch]; -- temp += 10 * source[j]; -- temp += 3 * source[j+source_pitch]; -- temp >>= 4; -- dest[i] = (char)(temp); -- } -+ const unsigned char *source, -+ int source_step, -+ unsigned int source_scale, -+ unsigned int source_length, -+ unsigned char *dest, -+ int dest_step, -+ unsigned int dest_scale, -+ unsigned int dest_length -+) { -+ unsigned int i, j; -+ unsigned int temp; -+ int source_pitch = source_step; -+ (void) source_length; -+ (void) source_scale; -+ (void) dest_scale; -+ -+ source_step *= 2; -+ dest[0] = source[0]; -+ -+ for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step) { -+ temp = 8; -+ temp += 3 * source[j - source_pitch]; -+ temp += 10 * source[j]; -+ temp += 3 * source[j + source_pitch]; -+ temp >>= 4; -+ dest[i] = (char)(temp); -+ } - } - - /**************************************************************************** -@@ -183,27 +178,26 @@ void scale1d_2t1_i - static - void scale1d_2t1_ps - ( -- const unsigned char *source, -- int source_step, -- unsigned int source_scale, -- unsigned int source_length, -- unsigned char *dest, -- int dest_step, -- unsigned int dest_scale, -- unsigned int dest_length --) --{ -- unsigned int i, j; -- -- (void) source_length; -- (void) source_scale; -- (void) dest_scale; -- -- source_step *= 2; -- j = 0; -- -- for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step) -- dest[i] = source[j]; -+ const unsigned char *source, -+ int source_step, -+ unsigned int source_scale, -+ unsigned int source_length, -+ unsigned char *dest, -+ int dest_step, -+ unsigned int dest_scale, -+ unsigned int dest_length -+) { -+ unsigned int i, j; -+ -+ (void) source_length; -+ (void) source_scale; -+ (void) dest_scale; -+ -+ source_step *= 2; -+ j = 0; -+ -+ for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step) -+ dest[i] = source[j]; - } - /**************************************************************************** - * -@@ -230,45 +224,42 @@ void scale1d_2t1_ps - static - void scale1d_c - ( -- const unsigned char *source, -- int source_step, -- unsigned int source_scale, -- unsigned int source_length, -- unsigned char *dest, -- int dest_step, -- unsigned int dest_scale, -- unsigned int dest_length --) --{ -- unsigned int i; -- unsigned int round_value = dest_scale / 2; -- unsigned int left_modifier = dest_scale; -- unsigned int right_modifier = 0; -- unsigned char left_pixel = *source; -- unsigned char right_pixel = *(source + source_step); -- -- (void) source_length; -- -- /* These asserts are needed if there are boundary issues... */ -- /*assert ( dest_scale > source_scale );*/ -- /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/ -- -- for (i = 0; i < dest_length * dest_step; i += dest_step) -- { -- dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale); -- -- right_modifier += source_scale; -- -- while (right_modifier > dest_scale) -- { -- right_modifier -= dest_scale; -- source += source_step; -- left_pixel = *source; -- right_pixel = *(source + source_step); -- } -- -- left_modifier = dest_scale - right_modifier; -+ const unsigned char *source, -+ int source_step, -+ unsigned int source_scale, -+ unsigned int source_length, -+ unsigned char *dest, -+ int dest_step, -+ unsigned int dest_scale, -+ unsigned int dest_length -+) { -+ unsigned int i; -+ unsigned int round_value = dest_scale / 2; -+ unsigned int left_modifier = dest_scale; -+ unsigned int right_modifier = 0; -+ unsigned char left_pixel = *source; -+ unsigned char right_pixel = *(source + source_step); -+ -+ (void) source_length; -+ -+ /* These asserts are needed if there are boundary issues... */ -+ /*assert ( dest_scale > source_scale );*/ -+ /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/ -+ -+ for (i = 0; i < dest_length * dest_step; i += dest_step) { -+ dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale); -+ -+ right_modifier += source_scale; -+ -+ while (right_modifier > dest_scale) { -+ right_modifier -= dest_scale; -+ source += source_step; -+ left_pixel = *source; -+ right_pixel = *(source + source_step); - } -+ -+ left_modifier = dest_scale - right_modifier; -+ } - } - - /**************************************************************************** -@@ -304,246 +295,221 @@ void scale1d_c - static - void Scale2D - ( -- /*const*/ -- unsigned char *source, -- int source_pitch, -- unsigned int source_width, -- unsigned int source_height, -- unsigned char *dest, -- int dest_pitch, -- unsigned int dest_width, -- unsigned int dest_height, -- unsigned char *temp_area, -- unsigned char temp_area_height, -- unsigned int hscale, -- unsigned int hratio, -- unsigned int vscale, -- unsigned int vratio, -- unsigned int interlaced --) --{ -- /*unsigned*/ -- int i, j, k; -- int bands; -- int dest_band_height; -- int source_band_height; -- -- typedef void (*Scale1D)(const unsigned char * source, int source_step, unsigned int source_scale, unsigned int source_length, -- unsigned char * dest, int dest_step, unsigned int dest_scale, unsigned int dest_length); -- -- Scale1D Scale1Dv = scale1d_c; -- Scale1D Scale1Dh = scale1d_c; -- -- void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; -- void (*vert_band_scale)(unsigned char *, unsigned int, unsigned char *, unsigned int, unsigned int) = NULL; -- -- int ratio_scalable = 1; -- int interpolation = 0; -- -- unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */ -- unsigned char *line_src; -- -- -- source_base = (unsigned char *)source; -- -- if (source_pitch < 0) -- { -- int offset; -- -- offset = (source_height - 1); -- offset *= source_pitch; -- -- source_base += offset; -- } -- -- /* find out the ratio for each direction */ -- switch (hratio * 10 / hscale) -- { -+ /*const*/ -+ unsigned char *source, -+ int source_pitch, -+ unsigned int source_width, -+ unsigned int source_height, -+ unsigned char *dest, -+ int dest_pitch, -+ unsigned int dest_width, -+ unsigned int dest_height, -+ unsigned char *temp_area, -+ unsigned char temp_area_height, -+ unsigned int hscale, -+ unsigned int hratio, -+ unsigned int vscale, -+ unsigned int vratio, -+ unsigned int interlaced -+) { -+ /*unsigned*/ -+ int i, j, k; -+ int bands; -+ int dest_band_height; -+ int source_band_height; -+ -+ typedef void (*Scale1D)(const unsigned char * source, int source_step, unsigned int source_scale, unsigned int source_length, -+ unsigned char * dest, int dest_step, unsigned int dest_scale, unsigned int dest_length); -+ -+ Scale1D Scale1Dv = scale1d_c; -+ Scale1D Scale1Dh = scale1d_c; -+ -+ void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; -+ void (*vert_band_scale)(unsigned char *, unsigned int, unsigned char *, unsigned int, unsigned int) = NULL; -+ -+ int ratio_scalable = 1; -+ int interpolation = 0; -+ -+ unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */ -+ unsigned char *line_src; -+ -+ -+ source_base = (unsigned char *)source; -+ -+ if (source_pitch < 0) { -+ int offset; -+ -+ offset = (source_height - 1); -+ offset *= source_pitch; -+ -+ source_base += offset; -+ } -+ -+ /* find out the ratio for each direction */ -+ switch (hratio * 10 / hscale) { - case 8: -- /* 4-5 Scale in Width direction */ -- horiz_line_scale = vp8_horizontal_line_5_4_scale; -- break; -+ /* 4-5 Scale in Width direction */ -+ horiz_line_scale = vp8_horizontal_line_5_4_scale; -+ break; - case 6: -- /* 3-5 Scale in Width direction */ -- horiz_line_scale = vp8_horizontal_line_5_3_scale; -- break; -+ /* 3-5 Scale in Width direction */ -+ horiz_line_scale = vp8_horizontal_line_5_3_scale; -+ break; - case 5: -- /* 1-2 Scale in Width direction */ -- horiz_line_scale = vp8_horizontal_line_2_1_scale; -- break; -+ /* 1-2 Scale in Width direction */ -+ horiz_line_scale = vp8_horizontal_line_2_1_scale; -+ break; - default: -- /* The ratio is not acceptable now */ -- /* throw("The ratio is not acceptable for now!"); */ -- ratio_scalable = 0; -- break; -- } -+ /* The ratio is not acceptable now */ -+ /* throw("The ratio is not acceptable for now!"); */ -+ ratio_scalable = 0; -+ break; -+ } - -- switch (vratio * 10 / vscale) -- { -+ switch (vratio * 10 / vscale) { - case 8: -- /* 4-5 Scale in vertical direction */ -- vert_band_scale = vp8_vertical_band_5_4_scale; -- source_band_height = 5; -- dest_band_height = 4; -- break; -+ /* 4-5 Scale in vertical direction */ -+ vert_band_scale = vp8_vertical_band_5_4_scale; -+ source_band_height = 5; -+ dest_band_height = 4; -+ break; - case 6: -- /* 3-5 Scale in vertical direction */ -- vert_band_scale = vp8_vertical_band_5_3_scale; -- source_band_height = 5; -- dest_band_height = 3; -- break; -+ /* 3-5 Scale in vertical direction */ -+ vert_band_scale = vp8_vertical_band_5_3_scale; -+ source_band_height = 5; -+ dest_band_height = 3; -+ break; - case 5: -- /* 1-2 Scale in vertical direction */ -+ /* 1-2 Scale in vertical direction */ - -- if (interlaced) -- { -- /* if the content is interlaced, point sampling is used */ -- vert_band_scale = vp8_vertical_band_2_1_scale; -- } -- else -- { -+ if (interlaced) { -+ /* if the content is interlaced, point sampling is used */ -+ vert_band_scale = vp8_vertical_band_2_1_scale; -+ } else { - -- interpolation = 1; -- /* if the content is progressive, interplo */ -- vert_band_scale = vp8_vertical_band_2_1_scale_i; -+ interpolation = 1; -+ /* if the content is progressive, interplo */ -+ vert_band_scale = vp8_vertical_band_2_1_scale_i; - -- } -+ } - -- source_band_height = 2; -- dest_band_height = 1; -- break; -+ source_band_height = 2; -+ dest_band_height = 1; -+ break; - default: -- /* The ratio is not acceptable now */ -- /* throw("The ratio is not acceptable for now!"); */ -- ratio_scalable = 0; -- break; -+ /* The ratio is not acceptable now */ -+ /* throw("The ratio is not acceptable for now!"); */ -+ ratio_scalable = 0; -+ break; -+ } -+ -+ if (ratio_scalable) { -+ if (source_height == dest_height) { -+ /* for each band of the image */ -+ for (k = 0; k < (int)dest_height; k++) { -+ horiz_line_scale(source, source_width, dest, dest_width); -+ source += source_pitch; -+ dest += dest_pitch; -+ } -+ -+ return; - } - -- if (ratio_scalable) -- { -- if (source_height == dest_height) -- { -- /* for each band of the image */ -- for (k = 0; k < (int)dest_height; k++) -- { -- horiz_line_scale(source, source_width, dest, dest_width); -- source += source_pitch; -- dest += dest_pitch; -- } -- -- return; -- } -- -- if (interpolation) -- { -- if (source < source_base) -- source = source_base; -- -- horiz_line_scale(source, source_width, temp_area, dest_width); -- } -- -- for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++) -- { -- /* scale one band horizontally */ -- for (i = 0; i < source_band_height; i++) -- { -- /* Trap case where we could read off the base of the source buffer */ -- -- line_src = (unsigned char *)source + i * source_pitch; -- -- if (line_src < source_base) -- line_src = source_base; -- -- horiz_line_scale(line_src, source_width, -- temp_area + (i + 1)*dest_pitch, dest_width); -- } -- -- /* Vertical scaling is in place */ -- vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width); -- -- if (interpolation) -- vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width); -- -- /* Next band... */ -- source += (unsigned long) source_band_height * source_pitch; -- dest += (unsigned long) dest_band_height * dest_pitch; -- } -- -- return; -+ if (interpolation) { -+ if (source < source_base) -+ source = source_base; -+ -+ horiz_line_scale(source, source_width, temp_area, dest_width); - } - -- if (hscale == 2 && hratio == 1) -- Scale1Dh = scale1d_2t1_ps; -+ for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++) { -+ /* scale one band horizontally */ -+ for (i = 0; i < source_band_height; i++) { -+ /* Trap case where we could read off the base of the source buffer */ - -- if (vscale == 2 && vratio == 1) -- { -- if (interlaced) -- Scale1Dv = scale1d_2t1_ps; -- else -- Scale1Dv = scale1d_2t1_i; -- } -+ line_src = (unsigned char *)source + i * source_pitch; - -- if (source_height == dest_height) -- { -- /* for each band of the image */ -- for (k = 0; k < (int)dest_height; k++) -- { -- Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width); -- source += source_pitch; -- dest += dest_pitch; -- } -- -- return; -- } -+ if (line_src < source_base) -+ line_src = source_base; -+ -+ horiz_line_scale(line_src, source_width, -+ temp_area + (i + 1)*dest_pitch, dest_width); -+ } -+ -+ /* Vertical scaling is in place */ -+ vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width); - -- if (dest_height > source_height) -- { -- dest_band_height = temp_area_height - 1; -- source_band_height = dest_band_height * source_height / dest_height; -+ if (interpolation) -+ vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width); -+ -+ /* Next band... */ -+ source += (unsigned long) source_band_height * source_pitch; -+ dest += (unsigned long) dest_band_height * dest_pitch; - } -+ -+ return; -+ } -+ -+ if (hscale == 2 && hratio == 1) -+ Scale1Dh = scale1d_2t1_ps; -+ -+ if (vscale == 2 && vratio == 1) { -+ if (interlaced) -+ Scale1Dv = scale1d_2t1_ps; - else -- { -- source_band_height = temp_area_height - 1; -- dest_band_height = source_band_height * vratio / vscale; -+ Scale1Dv = scale1d_2t1_i; -+ } -+ -+ if (source_height == dest_height) { -+ /* for each band of the image */ -+ for (k = 0; k < (int)dest_height; k++) { -+ Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width); -+ source += source_pitch; -+ dest += dest_pitch; - } - -- /* first row needs to be done so that we can stay one row ahead for vertical zoom */ -- Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width); -+ return; -+ } -+ -+ if (dest_height > source_height) { -+ dest_band_height = temp_area_height - 1; -+ source_band_height = dest_band_height * source_height / dest_height; -+ } else { -+ source_band_height = temp_area_height - 1; -+ dest_band_height = source_band_height * vratio / vscale; -+ } - -- /* for each band of the image */ -- bands = (dest_height + dest_band_height - 1) / dest_band_height; -- -- for (k = 0; k < bands; k++) -- { -- /* scale one band horizontally */ -- for (i = 1; i < source_band_height + 1; i++) -- { -- if (k * source_band_height + i < (int) source_height) -- { -- Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1, -- temp_area + i * dest_pitch, 1, hratio, dest_width); -- } -- else /* Duplicate the last row */ -- { -- /* copy temp_area row 0 over from last row in the past */ -- duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch); -- } -- } -- -- /* scale one band vertically */ -- for (j = 0; j < (int)dest_width; j++) -- { -- Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1, -- &dest[j], dest_pitch, vratio, dest_band_height); -- } -+ /* first row needs to be done so that we can stay one row ahead for vertical zoom */ -+ Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width); - -+ /* for each band of the image */ -+ bands = (dest_height + dest_band_height - 1) / dest_band_height; -+ -+ for (k = 0; k < bands; k++) { -+ /* scale one band horizontally */ -+ for (i = 1; i < source_band_height + 1; i++) { -+ if (k * source_band_height + i < (int) source_height) { -+ Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1, -+ temp_area + i * dest_pitch, 1, hratio, dest_width); -+ } else { /* Duplicate the last row */ - /* copy temp_area row 0 over from last row in the past */ -- duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); -+ duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch); -+ } -+ } - -- /* move to the next band */ -- source += source_band_height * source_pitch; -- dest += dest_band_height * dest_pitch; -+ /* scale one band vertically */ -+ for (j = 0; j < (int)dest_width; j++) { -+ Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1, -+ &dest[j], dest_pitch, vratio, dest_band_height); - } -+ -+ /* copy temp_area row 0 over from last row in the past */ -+ duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); -+ -+ /* move to the next band */ -+ source += source_band_height * source_pitch; -+ dest += dest_band_height * dest_pitch; -+ } - } - - /**************************************************************************** -@@ -572,57 +538,56 @@ void Scale2D - ****************************************************************************/ - void vp8_scale_frame - ( -- YV12_BUFFER_CONFIG *src, -- YV12_BUFFER_CONFIG *dst, -- unsigned char *temp_area, -- unsigned char temp_height, -- unsigned int hscale, -- unsigned int hratio, -- unsigned int vscale, -- unsigned int vratio, -- unsigned int interlaced --) --{ -- int i; -- int dw = (hscale - 1 + src->y_width * hratio) / hscale; -- int dh = (vscale - 1 + src->y_height * vratio) / vscale; -- -- /* call our internal scaling routines!! */ -- Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height, -- (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh, -- temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); -- -- if (dw < (int)dst->y_width) -- for (i = 0; i < dh; i++) -- duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i*dst->y_stride+dw-2], dst->y_width - dw + 1); -- -- if (dh < (int)dst->y_height) -- for (i = dh - 1; i < (int)dst->y_height; i++) -- duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); -- -- Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height, -- (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2, -- temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); -- -- if (dw / 2 < (int)dst->uv_width) -- for (i = 0; i < dst->uv_height; i++) -- duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1); -- -- if (dh / 2 < (int)dst->uv_height) -- for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) -- duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); -- -- Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height, -- (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2, -- temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); -- -- if (dw / 2 < (int)dst->uv_width) -- for (i = 0; i < dst->uv_height; i++) -- duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1); -- -- if (dh / 2 < (int) dst->uv_height) -- for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) -- duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); -+ YV12_BUFFER_CONFIG *src, -+ YV12_BUFFER_CONFIG *dst, -+ unsigned char *temp_area, -+ unsigned char temp_height, -+ unsigned int hscale, -+ unsigned int hratio, -+ unsigned int vscale, -+ unsigned int vratio, -+ unsigned int interlaced -+) { -+ int i; -+ int dw = (hscale - 1 + src->y_width * hratio) / hscale; -+ int dh = (vscale - 1 + src->y_height * vratio) / vscale; -+ -+ /* call our internal scaling routines!! */ -+ Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height, -+ (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh, -+ temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); -+ -+ if (dw < (int)dst->y_width) -+ for (i = 0; i < dh; i++) -+ duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i * dst->y_stride + dw - 2], dst->y_width - dw + 1); -+ -+ if (dh < (int)dst->y_height) -+ for (i = dh - 1; i < (int)dst->y_height; i++) -+ duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); -+ -+ Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height, -+ (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2, -+ temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); -+ -+ if (dw / 2 < (int)dst->uv_width) -+ for (i = 0; i < dst->uv_height; i++) -+ duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); -+ -+ if (dh / 2 < (int)dst->uv_height) -+ for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) -+ duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); -+ -+ Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height, -+ (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2, -+ temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); -+ -+ if (dw / 2 < (int)dst->uv_width) -+ for (i = 0; i < dst->uv_height; i++) -+ duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); -+ -+ if (dh / 2 < (int) dst->uv_height) -+ for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) -+ duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); - } - /**************************************************************************** - * -@@ -651,183 +616,177 @@ void vp8_scale_frame - static - int any_ratio_2d_scale - ( -- SCALE_VARS *si, -- const unsigned char *source, -- int source_pitch, -- unsigned int source_width, -- unsigned int source_height, -- unsigned char *dest, -- unsigned int dest_pitch, -- unsigned int dest_width, -- unsigned int dest_height --) --{ -- unsigned int i, k; -- unsigned int src_band_height = 0; -- unsigned int dest_band_height = 0; -- -- /* suggested scale factors */ -- int hs = si->HScale; -- int hr = si->HRatio; -- int vs = si->VScale; -- int vr = si->VRatio; -- -- /* assume the ratios are scalable instead of should be centered */ -- int ratio_scalable = 1; -- -- const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch))); -- const unsigned char *line_src; -- -- void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; -- void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; -- void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; -- -- (void) si; -- -- /* find out the ratio for each direction */ -- switch (hr * 30 / hs) -- { -+ SCALE_VARS *si, -+ const unsigned char *source, -+ int source_pitch, -+ unsigned int source_width, -+ unsigned int source_height, -+ unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width, -+ unsigned int dest_height -+) { -+ unsigned int i, k; -+ unsigned int src_band_height = 0; -+ unsigned int dest_band_height = 0; -+ -+ /* suggested scale factors */ -+ int hs = si->HScale; -+ int hr = si->HRatio; -+ int vs = si->VScale; -+ int vr = si->VRatio; -+ -+ /* assume the ratios are scalable instead of should be centered */ -+ int ratio_scalable = 1; -+ -+ const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch))); -+ const unsigned char *line_src; -+ -+ void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; -+ void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; -+ void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; -+ -+ (void) si; -+ -+ /* find out the ratio for each direction */ -+ switch (hr * 30 / hs) { - case 24: -- /* 4-5 Scale in Width direction */ -- horiz_line_scale = vp8_horizontal_line_4_5_scale; -- break; -+ /* 4-5 Scale in Width direction */ -+ horiz_line_scale = vp8_horizontal_line_4_5_scale; -+ break; - case 22: -- /* 3-4 Scale in Width direction */ -- horiz_line_scale = vp8_horizontal_line_3_4_scale; -- break; -+ /* 3-4 Scale in Width direction */ -+ horiz_line_scale = vp8_horizontal_line_3_4_scale; -+ break; - - case 20: -- /* 4-5 Scale in Width direction */ -- horiz_line_scale = vp8_horizontal_line_2_3_scale; -- break; -+ /* 4-5 Scale in Width direction */ -+ horiz_line_scale = vp8_horizontal_line_2_3_scale; -+ break; - case 18: -- /* 3-5 Scale in Width direction */ -- horiz_line_scale = vp8_horizontal_line_3_5_scale; -- break; -+ /* 3-5 Scale in Width direction */ -+ horiz_line_scale = vp8_horizontal_line_3_5_scale; -+ break; - case 15: -- /* 1-2 Scale in Width direction */ -- horiz_line_scale = vp8_horizontal_line_1_2_scale; -- break; -+ /* 1-2 Scale in Width direction */ -+ horiz_line_scale = vp8_horizontal_line_1_2_scale; -+ break; - case 30: -- /* no scale in Width direction */ -- horiz_line_scale = horizontal_line_copy; -- break; -+ /* no scale in Width direction */ -+ horiz_line_scale = horizontal_line_copy; -+ break; - default: -- /* The ratio is not acceptable now */ -- /* throw("The ratio is not acceptable for now!"); */ -- ratio_scalable = 0; -- break; -- } -+ /* The ratio is not acceptable now */ -+ /* throw("The ratio is not acceptable for now!"); */ -+ ratio_scalable = 0; -+ break; -+ } - -- switch (vr * 30 / vs) -- { -+ switch (vr * 30 / vs) { - case 24: -- /* 4-5 Scale in vertical direction */ -- vert_band_scale = vp8_vertical_band_4_5_scale; -- last_vert_band_scale = vp8_last_vertical_band_4_5_scale; -- src_band_height = 4; -- dest_band_height = 5; -- break; -+ /* 4-5 Scale in vertical direction */ -+ vert_band_scale = vp8_vertical_band_4_5_scale; -+ last_vert_band_scale = vp8_last_vertical_band_4_5_scale; -+ src_band_height = 4; -+ dest_band_height = 5; -+ break; - case 22: -- /* 3-4 Scale in vertical direction */ -- vert_band_scale = vp8_vertical_band_3_4_scale; -- last_vert_band_scale = vp8_last_vertical_band_3_4_scale; -- src_band_height = 3; -- dest_band_height = 4; -- break; -+ /* 3-4 Scale in vertical direction */ -+ vert_band_scale = vp8_vertical_band_3_4_scale; -+ last_vert_band_scale = vp8_last_vertical_band_3_4_scale; -+ src_band_height = 3; -+ dest_band_height = 4; -+ break; - case 20: -- /* 2-3 Scale in vertical direction */ -- vert_band_scale = vp8_vertical_band_2_3_scale; -- last_vert_band_scale = vp8_last_vertical_band_2_3_scale; -- src_band_height = 2; -- dest_band_height = 3; -- break; -+ /* 2-3 Scale in vertical direction */ -+ vert_band_scale = vp8_vertical_band_2_3_scale; -+ last_vert_band_scale = vp8_last_vertical_band_2_3_scale; -+ src_band_height = 2; -+ dest_band_height = 3; -+ break; - case 18: -- /* 3-5 Scale in vertical direction */ -- vert_band_scale = vp8_vertical_band_3_5_scale; -- last_vert_band_scale = vp8_last_vertical_band_3_5_scale; -- src_band_height = 3; -- dest_band_height = 5; -- break; -+ /* 3-5 Scale in vertical direction */ -+ vert_band_scale = vp8_vertical_band_3_5_scale; -+ last_vert_band_scale = vp8_last_vertical_band_3_5_scale; -+ src_band_height = 3; -+ dest_band_height = 5; -+ break; - case 15: -- /* 1-2 Scale in vertical direction */ -- vert_band_scale = vp8_vertical_band_1_2_scale; -- last_vert_band_scale = vp8_last_vertical_band_1_2_scale; -- src_band_height = 1; -- dest_band_height = 2; -- break; -+ /* 1-2 Scale in vertical direction */ -+ vert_band_scale = vp8_vertical_band_1_2_scale; -+ last_vert_band_scale = vp8_last_vertical_band_1_2_scale; -+ src_band_height = 1; -+ dest_band_height = 2; -+ break; - case 30: -- /* no scale in Width direction */ -- vert_band_scale = null_scale; -- last_vert_band_scale = null_scale; -- src_band_height = 4; -- dest_band_height = 4; -- break; -+ /* no scale in Width direction */ -+ vert_band_scale = null_scale; -+ last_vert_band_scale = null_scale; -+ src_band_height = 4; -+ dest_band_height = 4; -+ break; - default: -- /* The ratio is not acceptable now */ -- /* throw("The ratio is not acceptable for now!"); */ -- ratio_scalable = 0; -- break; -- } -+ /* The ratio is not acceptable now */ -+ /* throw("The ratio is not acceptable for now!"); */ -+ ratio_scalable = 0; -+ break; -+ } - -- if (ratio_scalable == 0) -- return ratio_scalable; -+ if (ratio_scalable == 0) -+ return ratio_scalable; - -- horiz_line_scale(source, source_width, dest, dest_width); -+ horiz_line_scale(source, source_width, dest, dest_width); - -- /* except last band */ -- for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++) -- { -- /* scale one band horizontally */ -- for (i = 1; i < src_band_height; i++) -- { -- /* Trap case where we could read off the base of the source buffer */ -- line_src = source + i * source_pitch; -+ /* except last band */ -+ for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++) { -+ /* scale one band horizontally */ -+ for (i = 1; i < src_band_height; i++) { -+ /* Trap case where we could read off the base of the source buffer */ -+ line_src = source + i * source_pitch; - -- if (line_src < source_base) -- line_src = source_base; -+ if (line_src < source_base) -+ line_src = source_base; - -- horiz_line_scale(line_src, source_width, -- dest + i * dest_pitch, dest_width); -- } -+ horiz_line_scale(line_src, source_width, -+ dest + i * dest_pitch, dest_width); -+ } - -- /* first line of next band */ -- /* Trap case where we could read off the base of the source buffer */ -- line_src = source + src_band_height * source_pitch; -+ /* first line of next band */ -+ /* Trap case where we could read off the base of the source buffer */ -+ line_src = source + src_band_height * source_pitch; - -- if (line_src < source_base) -- line_src = source_base; -+ if (line_src < source_base) -+ line_src = source_base; - -- horiz_line_scale(line_src, source_width, -- dest + dest_band_height * dest_pitch, -- dest_width); -+ horiz_line_scale(line_src, source_width, -+ dest + dest_band_height * dest_pitch, -+ dest_width); - -- /* Vertical scaling is in place */ -- vert_band_scale(dest, dest_pitch, dest_width); -+ /* Vertical scaling is in place */ -+ vert_band_scale(dest, dest_pitch, dest_width); - -- /* Next band... */ -- source += src_band_height * source_pitch; -- dest += dest_band_height * dest_pitch; -- } -+ /* Next band... */ -+ source += src_band_height * source_pitch; -+ dest += dest_band_height * dest_pitch; -+ } - -- /* scale one band horizontally */ -- for (i = 1; i < src_band_height; i++) -- { -- /* Trap case where we could read off the base of the source buffer */ -- line_src = source + i * source_pitch; -+ /* scale one band horizontally */ -+ for (i = 1; i < src_band_height; i++) { -+ /* Trap case where we could read off the base of the source buffer */ -+ line_src = source + i * source_pitch; - -- if (line_src < source_base) -- line_src = source_base; -+ if (line_src < source_base) -+ line_src = source_base; - -- horiz_line_scale(line_src, source_width, -- dest + i * dest_pitch, -- dest_width); -- } -+ horiz_line_scale(line_src, source_width, -+ dest + i * dest_pitch, -+ dest_width); -+ } - -- /* Vertical scaling is in place */ -- last_vert_band_scale(dest, dest_pitch, dest_width); -+ /* Vertical scaling is in place */ -+ last_vert_band_scale(dest, dest_pitch, dest_width); - -- return ratio_scalable; -+ return ratio_scalable; - } - - /**************************************************************************** -@@ -849,70 +808,69 @@ int any_ratio_2d_scale - * - ****************************************************************************/ - static --int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) --{ -- int i; -- int ew; -- int eh; -- -- /* suggested scale factors */ -- int hs = scale_vars->HScale; -- int hr = scale_vars->HRatio; -- int vs = scale_vars->VScale; -- int vr = scale_vars->VRatio; -- -- int ratio_scalable = 1; -- -- int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs; -- int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs; -- int dw = scale_vars->expanded_frame_width; -- int dh = scale_vars->expanded_frame_height; -- YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config; -- YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config; -- -- if (hr == 3) -- ew = (sw + 2) / 3 * 3 * hs / hr; -- else -- ew = (sw + 7) / 8 * 8 * hs / hr; -- -- if (vr == 3) -- eh = (sh + 2) / 3 * 3 * vs / vr; -- else -- eh = (sh + 7) / 8 * 8 * vs / vr; -- -- ratio_scalable = any_ratio_2d_scale(scale_vars, -- (const unsigned char *)src_yuv_config->y_buffer, -- src_yuv_config->y_stride, sw, sh, -- (unsigned char *) dst_yuv_config->y_buffer + YOffset, -- dst_yuv_config->y_stride, dw, dh); -- -- for (i = 0; i < eh; i++) -- duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw); -- -- for (i = dh; i < eh; i++) -- duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew); -- -- if (ratio_scalable == 0) -- return ratio_scalable; -+int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) { -+ int i; -+ int ew; -+ int eh; -+ -+ /* suggested scale factors */ -+ int hs = scale_vars->HScale; -+ int hr = scale_vars->HRatio; -+ int vs = scale_vars->VScale; -+ int vr = scale_vars->VRatio; -+ -+ int ratio_scalable = 1; -+ -+ int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs; -+ int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs; -+ int dw = scale_vars->expanded_frame_width; -+ int dh = scale_vars->expanded_frame_height; -+ YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config; -+ YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config; -+ -+ if (hr == 3) -+ ew = (sw + 2) / 3 * 3 * hs / hr; -+ else -+ ew = (sw + 7) / 8 * 8 * hs / hr; -+ -+ if (vr == 3) -+ eh = (sh + 2) / 3 * 3 * vs / vr; -+ else -+ eh = (sh + 7) / 8 * 8 * vs / vr; -+ -+ ratio_scalable = any_ratio_2d_scale(scale_vars, -+ (const unsigned char *)src_yuv_config->y_buffer, -+ src_yuv_config->y_stride, sw, sh, -+ (unsigned char *) dst_yuv_config->y_buffer + YOffset, -+ dst_yuv_config->y_stride, dw, dh); -+ -+ for (i = 0; i < eh; i++) -+ duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw); -+ -+ for (i = dh; i < eh; i++) -+ duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew); -+ -+ if (ratio_scalable == 0) -+ return ratio_scalable; - -- sw = (sw + 1) >> 1; -- sh = (sh + 1) >> 1; -- dw = (dw + 1) >> 1; -- dh = (dh + 1) >> 1; -+ sw = (sw + 1) >> 1; -+ sh = (sh + 1) >> 1; -+ dw = (dw + 1) >> 1; -+ dh = (dh + 1) >> 1; - -- any_ratio_2d_scale(scale_vars, -- (const unsigned char *)src_yuv_config->u_buffer, -- src_yuv_config->y_stride / 2, sw, sh, -- (unsigned char *)dst_yuv_config->u_buffer + UVOffset, -- dst_yuv_config->uv_stride, dw, dh); -+ any_ratio_2d_scale(scale_vars, -+ (const unsigned char *)src_yuv_config->u_buffer, -+ src_yuv_config->y_stride / 2, sw, sh, -+ (unsigned char *)dst_yuv_config->u_buffer + UVOffset, -+ dst_yuv_config->uv_stride, dw, dh); - -- any_ratio_2d_scale(scale_vars, -- (const unsigned char *)src_yuv_config->v_buffer, -- src_yuv_config->y_stride / 2, sw, sh, -- (unsigned char *)dst_yuv_config->v_buffer + UVOffset, -- dst_yuv_config->uv_stride, dw, dh); -+ any_ratio_2d_scale(scale_vars, -+ (const unsigned char *)src_yuv_config->v_buffer, -+ src_yuv_config->y_stride / 2, sw, sh, -+ (unsigned char *)dst_yuv_config->v_buffer + UVOffset, -+ dst_yuv_config->uv_stride, dw, dh); - -- return ratio_scalable; -+ return ratio_scalable; - } - - /**************************************************************************** -@@ -931,52 +889,48 @@ int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) - * - ****************************************************************************/ - static void --center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config) --{ -- int i; -- int row_offset, col_offset; -- unsigned char *src_data_pointer; -- unsigned char *dst_data_pointer; -- -- /* center values */ -- row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2; -- col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2; -- -- /* Y's */ -- src_data_pointer = src_yuv_config->y_buffer; -- dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset; -- -- for (i = 0; i < src_yuv_config->y_height; i++) -- { -- duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width); -- dst_data_pointer += dst_yuv_config->y_stride; -- src_data_pointer += src_yuv_config->y_stride; -- } -- -- row_offset /= 2; -- col_offset /= 2; -- -- /* U's */ -- src_data_pointer = src_yuv_config->u_buffer; -- dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; -- -- for (i = 0; i < src_yuv_config->uv_height; i++) -- { -- duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); -- dst_data_pointer += dst_yuv_config->uv_stride; -- src_data_pointer += src_yuv_config->uv_stride; -- } -- -- /* V's */ -- src_data_pointer = src_yuv_config->v_buffer; -- dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; -- -- for (i = 0; i < src_yuv_config->uv_height; i++) -- { -- duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); -- dst_data_pointer += dst_yuv_config->uv_stride; -- src_data_pointer += src_yuv_config->uv_stride; -- } -+center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config) { -+ int i; -+ int row_offset, col_offset; -+ unsigned char *src_data_pointer; -+ unsigned char *dst_data_pointer; -+ -+ /* center values */ -+ row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2; -+ col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2; -+ -+ /* Y's */ -+ src_data_pointer = src_yuv_config->y_buffer; -+ dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset; -+ -+ for (i = 0; i < src_yuv_config->y_height; i++) { -+ duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width); -+ dst_data_pointer += dst_yuv_config->y_stride; -+ src_data_pointer += src_yuv_config->y_stride; -+ } -+ -+ row_offset /= 2; -+ col_offset /= 2; -+ -+ /* U's */ -+ src_data_pointer = src_yuv_config->u_buffer; -+ dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; -+ -+ for (i = 0; i < src_yuv_config->uv_height; i++) { -+ duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); -+ dst_data_pointer += dst_yuv_config->uv_stride; -+ src_data_pointer += src_yuv_config->uv_stride; -+ } -+ -+ /* V's */ -+ src_data_pointer = src_yuv_config->v_buffer; -+ dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; -+ -+ for (i = 0; i < src_yuv_config->uv_height; i++) { -+ duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); -+ dst_data_pointer += dst_yuv_config->uv_stride; -+ src_data_pointer += src_yuv_config->uv_stride; -+ } - } - - /**************************************************************************** -@@ -999,61 +953,58 @@ center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_con - void - vp8_yv12_scale_or_center - ( -- YV12_BUFFER_CONFIG *src_yuv_config, -- YV12_BUFFER_CONFIG *dst_yuv_config, -- int expanded_frame_width, -- int expanded_frame_height, -- int scaling_mode, -- int HScale, -- int HRatio, -- int VScale, -- int VRatio --) --{ -- /*if ( ppi->post_processing_level ) -- update_umvborder ( ppi, frame_buffer );*/ -- -- -- switch (scaling_mode) -- { -+ YV12_BUFFER_CONFIG *src_yuv_config, -+ YV12_BUFFER_CONFIG *dst_yuv_config, -+ int expanded_frame_width, -+ int expanded_frame_height, -+ int scaling_mode, -+ int HScale, -+ int HRatio, -+ int VScale, -+ int VRatio -+) { -+ /*if ( ppi->post_processing_level ) -+ update_umvborder ( ppi, frame_buffer );*/ -+ -+ -+ switch (scaling_mode) { - case SCALE_TO_FIT: -- case MAINTAIN_ASPECT_RATIO: -- { -- SCALE_VARS scale_vars; -- /* center values */ -+ case MAINTAIN_ASPECT_RATIO: { -+ SCALE_VARS scale_vars; -+ /* center values */ - #if 1 -- int row = (dst_yuv_config->y_height - expanded_frame_height) / 2; -- int col = (dst_yuv_config->y_width - expanded_frame_width) / 2; -- /*int YOffset = row * dst_yuv_config->y_width + col; -- int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/ -- int YOffset = row * dst_yuv_config->y_stride + col; -- int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1); -+ int row = (dst_yuv_config->y_height - expanded_frame_height) / 2; -+ int col = (dst_yuv_config->y_width - expanded_frame_width) / 2; -+ /*int YOffset = row * dst_yuv_config->y_width + col; -+ int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/ -+ int YOffset = row * dst_yuv_config->y_stride + col; -+ int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1); - #else -- int row = (src_yuv_config->y_height - expanded_frame_height) / 2; -- int col = (src_yuv_config->y_width - expanded_frame_width) / 2; -- int YOffset = row * src_yuv_config->y_width + col; -- int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1); -+ int row = (src_yuv_config->y_height - expanded_frame_height) / 2; -+ int col = (src_yuv_config->y_width - expanded_frame_width) / 2; -+ int YOffset = row * src_yuv_config->y_width + col; -+ int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1); - #endif - -- scale_vars.dst_yuv_config = dst_yuv_config; -- scale_vars.src_yuv_config = src_yuv_config; -- scale_vars.HScale = HScale; -- scale_vars.HRatio = HRatio; -- scale_vars.VScale = VScale; -- scale_vars.VRatio = VRatio; -- scale_vars.expanded_frame_width = expanded_frame_width; -- scale_vars.expanded_frame_height = expanded_frame_height; -+ scale_vars.dst_yuv_config = dst_yuv_config; -+ scale_vars.src_yuv_config = src_yuv_config; -+ scale_vars.HScale = HScale; -+ scale_vars.HRatio = HRatio; -+ scale_vars.VScale = VScale; -+ scale_vars.VRatio = VRatio; -+ scale_vars.expanded_frame_width = expanded_frame_width; -+ scale_vars.expanded_frame_height = expanded_frame_height; - -- /* perform center and scale */ -- any_ratio_frame_scale(&scale_vars, YOffset, UVOffset); -+ /* perform center and scale */ -+ any_ratio_frame_scale(&scale_vars, YOffset, UVOffset); - -- break; -+ break; - } - case CENTER: -- center_image(src_yuv_config, dst_yuv_config); -- break; -+ center_image(src_yuv_config, dst_yuv_config); -+ break; - - default: -- break; -- } -+ break; -+ } - } -diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c -index eff594e..4cb2a41 100644 ---- a/vpx_scale/generic/yv12config.c -+++ b/vpx_scale/generic/yv12config.c -@@ -20,81 +20,73 @@ - * - ****************************************************************************/ - int --vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) --{ -- if (ybf) -- { -- vpx_free(ybf->buffer_alloc); -- -- /* buffer_alloc isn't accessed by most functions. Rather y_buffer, -- u_buffer and v_buffer point to buffer_alloc and are used. Clear out -- all of this so that a freed pointer isn't inadvertently used */ -- vpx_memset (ybf, 0, sizeof (YV12_BUFFER_CONFIG)); -- } -- else -- { -- return -1; -- } -- -- return 0; -+vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) { -+ if (ybf) { -+ vpx_free(ybf->buffer_alloc); -+ -+ /* buffer_alloc isn't accessed by most functions. Rather y_buffer, -+ u_buffer and v_buffer point to buffer_alloc and are used. Clear out -+ all of this so that a freed pointer isn't inadvertently used */ -+ vpx_memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); -+ } else { -+ return -1; -+ } -+ -+ return 0; - } - - /**************************************************************************** - * - ****************************************************************************/ - int --vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) --{ --/*NOTE:*/ -- -- if (ybf) -- { -- int y_stride = ((width + 2 * border) + 31) & ~31; -- int yplane_size = (height + 2 * border) * y_stride; -- int uv_width = width >> 1; -- int uv_height = height >> 1; -- /** There is currently a bunch of code which assumes -- * uv_stride == y_stride/2, so enforce this here. */ -- int uv_stride = y_stride >> 1; -- int uvplane_size = (uv_height + border) * uv_stride; -- -- vp8_yv12_de_alloc_frame_buffer(ybf); -- -- /** Only support allocating buffers that have a height and width that -- * are multiples of 16, and a border that's a multiple of 32. -- * The border restriction is required to get 16-byte alignment of the -- * start of the chroma rows without intoducing an arbitrary gap -- * between planes, which would break the semantics of things like -- * vpx_img_set_rect(). */ -- if ((width & 0xf) | (height & 0xf) | (border & 0x1f)) -- return -3; -- -- ybf->y_width = width; -- ybf->y_height = height; -- ybf->y_stride = y_stride; -- -- ybf->uv_width = uv_width; -- ybf->uv_height = uv_height; -- ybf->uv_stride = uv_stride; -- -- ybf->border = border; -- ybf->frame_size = yplane_size + 2 * uvplane_size; -- -- ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size); -- -- if (ybf->buffer_alloc == NULL) -- return -1; -- -- ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; -- ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; -- ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; -- -- ybf->corrupted = 0; /* assume not currupted by errors */ -- } -- else -- { -- return -2; -- } -- -- return 0; -+vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) { -+ /*NOTE:*/ -+ -+ if (ybf) { -+ int y_stride = ((width + 2 * border) + 31) & ~31; -+ int yplane_size = (height + 2 * border) * y_stride; -+ int uv_width = width >> 1; -+ int uv_height = height >> 1; -+ /** There is currently a bunch of code which assumes -+ * uv_stride == y_stride/2, so enforce this here. */ -+ int uv_stride = y_stride >> 1; -+ int uvplane_size = (uv_height + border) * uv_stride; -+ -+ vp8_yv12_de_alloc_frame_buffer(ybf); -+ -+ /** Only support allocating buffers that have a height and width that -+ * are multiples of 16, and a border that's a multiple of 32. -+ * The border restriction is required to get 16-byte alignment of the -+ * start of the chroma rows without intoducing an arbitrary gap -+ * between planes, which would break the semantics of things like -+ * vpx_img_set_rect(). */ -+ if ((width & 0xf) | (height & 0xf) | (border & 0x1f)) -+ return -3; -+ -+ ybf->y_width = width; -+ ybf->y_height = height; -+ ybf->y_stride = y_stride; -+ -+ ybf->uv_width = uv_width; -+ ybf->uv_height = uv_height; -+ ybf->uv_stride = uv_stride; -+ -+ ybf->border = border; -+ ybf->frame_size = yplane_size + 2 * uvplane_size; -+ -+ ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size); -+ -+ if (ybf->buffer_alloc == NULL) -+ return -1; -+ -+ ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; -+ ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; -+ ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; -+ -+ ybf->corrupted = 0; /* assume not currupted by errors */ -+ } else { -+ return -2; -+ } -+ -+ return 0; - } -diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c -index 638633b..247078c 100644 ---- a/vpx_scale/generic/yv12extend.c -+++ b/vpx_scale/generic/yv12extend.c -@@ -21,184 +21,174 @@ - * - ****************************************************************************/ - void --vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) --{ -- int i; -- unsigned char *src_ptr1, *src_ptr2; -- unsigned char *dest_ptr1, *dest_ptr2; -- -- unsigned int Border; -- int plane_stride; -- int plane_height; -- int plane_width; -- -- /***********/ -- /* Y Plane */ -- /***********/ -- Border = ybf->border; -- plane_stride = ybf->y_stride; -- plane_height = ybf->y_height; -- plane_width = ybf->y_width; -- -- /* copy the left and right most columns out */ -- src_ptr1 = ybf->y_buffer; -- src_ptr2 = src_ptr1 + plane_width - 1; -- dest_ptr1 = src_ptr1 - Border; -- dest_ptr2 = src_ptr2 + 1; -- -- for (i = 0; i < plane_height; i++) -- { -- vpx_memset(dest_ptr1, src_ptr1[0], Border); -- vpx_memset(dest_ptr2, src_ptr2[0], Border); -- src_ptr1 += plane_stride; -- src_ptr2 += plane_stride; -- dest_ptr1 += plane_stride; -- dest_ptr2 += plane_stride; -- } -- -- /* Now copy the top and bottom source lines into each line of the respective borders */ -- src_ptr1 = ybf->y_buffer - Border; -- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; -- dest_ptr1 = src_ptr1 - (Border * plane_stride); -- dest_ptr2 = src_ptr2 + plane_stride; -- -- for (i = 0; i < (int)Border; i++) -- { -- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); -- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); -- dest_ptr1 += plane_stride; -- dest_ptr2 += plane_stride; -- } -- -- -- /***********/ -- /* U Plane */ -- /***********/ -- plane_stride = ybf->uv_stride; -- plane_height = ybf->uv_height; -- plane_width = ybf->uv_width; -- Border /= 2; -- -- /* copy the left and right most columns out */ -- src_ptr1 = ybf->u_buffer; -- src_ptr2 = src_ptr1 + plane_width - 1; -- dest_ptr1 = src_ptr1 - Border; -- dest_ptr2 = src_ptr2 + 1; -- -- for (i = 0; i < plane_height; i++) -- { -- vpx_memset(dest_ptr1, src_ptr1[0], Border); -- vpx_memset(dest_ptr2, src_ptr2[0], Border); -- src_ptr1 += plane_stride; -- src_ptr2 += plane_stride; -- dest_ptr1 += plane_stride; -- dest_ptr2 += plane_stride; -- } -- -- /* Now copy the top and bottom source lines into each line of the respective borders */ -- src_ptr1 = ybf->u_buffer - Border; -- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; -- dest_ptr1 = src_ptr1 - (Border * plane_stride); -- dest_ptr2 = src_ptr2 + plane_stride; -- -- for (i = 0; i < (int)(Border); i++) -- { -- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); -- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); -- dest_ptr1 += plane_stride; -- dest_ptr2 += plane_stride; -- } -- -- /***********/ -- /* V Plane */ -- /***********/ -- -- /* copy the left and right most columns out */ -- src_ptr1 = ybf->v_buffer; -- src_ptr2 = src_ptr1 + plane_width - 1; -- dest_ptr1 = src_ptr1 - Border; -- dest_ptr2 = src_ptr2 + 1; -- -- for (i = 0; i < plane_height; i++) -- { -- vpx_memset(dest_ptr1, src_ptr1[0], Border); -- vpx_memset(dest_ptr2, src_ptr2[0], Border); -- src_ptr1 += plane_stride; -- src_ptr2 += plane_stride; -- dest_ptr1 += plane_stride; -- dest_ptr2 += plane_stride; -- } -- -- /* Now copy the top and bottom source lines into each line of the respective borders */ -- src_ptr1 = ybf->v_buffer - Border; -- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; -- dest_ptr1 = src_ptr1 - (Border * plane_stride); -- dest_ptr2 = src_ptr2 + plane_stride; -- -- for (i = 0; i < (int)(Border); i++) -- { -- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); -- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); -- dest_ptr1 += plane_stride; -- dest_ptr2 += plane_stride; -- } -+vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { -+ int i; -+ unsigned char *src_ptr1, *src_ptr2; -+ unsigned char *dest_ptr1, *dest_ptr2; -+ -+ unsigned int Border; -+ int plane_stride; -+ int plane_height; -+ int plane_width; -+ -+ /***********/ -+ /* Y Plane */ -+ /***********/ -+ Border = ybf->border; -+ plane_stride = ybf->y_stride; -+ plane_height = ybf->y_height; -+ plane_width = ybf->y_width; -+ -+ /* copy the left and right most columns out */ -+ src_ptr1 = ybf->y_buffer; -+ src_ptr2 = src_ptr1 + plane_width - 1; -+ dest_ptr1 = src_ptr1 - Border; -+ dest_ptr2 = src_ptr2 + 1; -+ -+ for (i = 0; i < plane_height; i++) { -+ vpx_memset(dest_ptr1, src_ptr1[0], Border); -+ vpx_memset(dest_ptr2, src_ptr2[0], Border); -+ src_ptr1 += plane_stride; -+ src_ptr2 += plane_stride; -+ dest_ptr1 += plane_stride; -+ dest_ptr2 += plane_stride; -+ } -+ -+ /* Now copy the top and bottom source lines into each line of the respective borders */ -+ src_ptr1 = ybf->y_buffer - Border; -+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; -+ dest_ptr1 = src_ptr1 - (Border * plane_stride); -+ dest_ptr2 = src_ptr2 + plane_stride; -+ -+ for (i = 0; i < (int)Border; i++) { -+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); -+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); -+ dest_ptr1 += plane_stride; -+ dest_ptr2 += plane_stride; -+ } -+ -+ -+ /***********/ -+ /* U Plane */ -+ /***********/ -+ plane_stride = ybf->uv_stride; -+ plane_height = ybf->uv_height; -+ plane_width = ybf->uv_width; -+ Border /= 2; -+ -+ /* copy the left and right most columns out */ -+ src_ptr1 = ybf->u_buffer; -+ src_ptr2 = src_ptr1 + plane_width - 1; -+ dest_ptr1 = src_ptr1 - Border; -+ dest_ptr2 = src_ptr2 + 1; -+ -+ for (i = 0; i < plane_height; i++) { -+ vpx_memset(dest_ptr1, src_ptr1[0], Border); -+ vpx_memset(dest_ptr2, src_ptr2[0], Border); -+ src_ptr1 += plane_stride; -+ src_ptr2 += plane_stride; -+ dest_ptr1 += plane_stride; -+ dest_ptr2 += plane_stride; -+ } -+ -+ /* Now copy the top and bottom source lines into each line of the respective borders */ -+ src_ptr1 = ybf->u_buffer - Border; -+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; -+ dest_ptr1 = src_ptr1 - (Border * plane_stride); -+ dest_ptr2 = src_ptr2 + plane_stride; -+ -+ for (i = 0; i < (int)(Border); i++) { -+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); -+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); -+ dest_ptr1 += plane_stride; -+ dest_ptr2 += plane_stride; -+ } -+ -+ /***********/ -+ /* V Plane */ -+ /***********/ -+ -+ /* copy the left and right most columns out */ -+ src_ptr1 = ybf->v_buffer; -+ src_ptr2 = src_ptr1 + plane_width - 1; -+ dest_ptr1 = src_ptr1 - Border; -+ dest_ptr2 = src_ptr2 + 1; -+ -+ for (i = 0; i < plane_height; i++) { -+ vpx_memset(dest_ptr1, src_ptr1[0], Border); -+ vpx_memset(dest_ptr2, src_ptr2[0], Border); -+ src_ptr1 += plane_stride; -+ src_ptr2 += plane_stride; -+ dest_ptr1 += plane_stride; -+ dest_ptr2 += plane_stride; -+ } -+ -+ /* Now copy the top and bottom source lines into each line of the respective borders */ -+ src_ptr1 = ybf->v_buffer - Border; -+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; -+ dest_ptr1 = src_ptr1 - (Border * plane_stride); -+ dest_ptr2 = src_ptr2 + plane_stride; -+ -+ for (i = 0; i < (int)(Border); i++) { -+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); -+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); -+ dest_ptr1 += plane_stride; -+ dest_ptr2 += plane_stride; -+ } - } - - - static void --extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) --{ -- int i; -- unsigned char *src_ptr1, *src_ptr2; -- unsigned char *dest_ptr1, *dest_ptr2; -- -- unsigned int Border; -- int plane_stride; -- int plane_height; -- int plane_width; -- -- /***********/ -- /* Y Plane */ -- /***********/ -- Border = ybf->border; -- plane_stride = ybf->y_stride; -- plane_height = ybf->y_height; -- plane_width = ybf->y_width; -- -- /* copy the left and right most columns out */ -- src_ptr1 = ybf->y_buffer; -- src_ptr2 = src_ptr1 + plane_width - 1; -- dest_ptr1 = src_ptr1 - Border; -- dest_ptr2 = src_ptr2 + 1; -- -- for (i = 0; i < plane_height; i++) -- { -- vpx_memset(dest_ptr1, src_ptr1[0], Border); -- vpx_memset(dest_ptr2, src_ptr2[0], Border); -- src_ptr1 += plane_stride; -- src_ptr2 += plane_stride; -- dest_ptr1 += plane_stride; -- dest_ptr2 += plane_stride; -- } -- -- /* Now copy the top and bottom source lines into each line of the respective borders */ -- src_ptr1 = ybf->y_buffer - Border; -- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; -- dest_ptr1 = src_ptr1 - (Border * plane_stride); -- dest_ptr2 = src_ptr2 + plane_stride; -- -- for (i = 0; i < (int)Border; i++) -- { -- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); -- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); -- dest_ptr1 += plane_stride; -- dest_ptr2 += plane_stride; -- } -- -- plane_stride /= 2; -- plane_height /= 2; -- plane_width /= 2; -- Border /= 2; -+extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) { -+ int i; -+ unsigned char *src_ptr1, *src_ptr2; -+ unsigned char *dest_ptr1, *dest_ptr2; -+ -+ unsigned int Border; -+ int plane_stride; -+ int plane_height; -+ int plane_width; -+ -+ /***********/ -+ /* Y Plane */ -+ /***********/ -+ Border = ybf->border; -+ plane_stride = ybf->y_stride; -+ plane_height = ybf->y_height; -+ plane_width = ybf->y_width; -+ -+ /* copy the left and right most columns out */ -+ src_ptr1 = ybf->y_buffer; -+ src_ptr2 = src_ptr1 + plane_width - 1; -+ dest_ptr1 = src_ptr1 - Border; -+ dest_ptr2 = src_ptr2 + 1; -+ -+ for (i = 0; i < plane_height; i++) { -+ vpx_memset(dest_ptr1, src_ptr1[0], Border); -+ vpx_memset(dest_ptr2, src_ptr2[0], Border); -+ src_ptr1 += plane_stride; -+ src_ptr2 += plane_stride; -+ dest_ptr1 += plane_stride; -+ dest_ptr2 += plane_stride; -+ } -+ -+ /* Now copy the top and bottom source lines into each line of the respective borders */ -+ src_ptr1 = ybf->y_buffer - Border; -+ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; -+ dest_ptr1 = src_ptr1 - (Border * plane_stride); -+ dest_ptr2 = src_ptr2 + plane_stride; -+ -+ for (i = 0; i < (int)Border; i++) { -+ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); -+ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); -+ dest_ptr1 += plane_stride; -+ dest_ptr2 += plane_stride; -+ } -+ -+ plane_stride /= 2; -+ plane_height /= 2; -+ plane_width /= 2; -+ Border /= 2; - - } - -@@ -221,57 +211,53 @@ extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) - * - ****************************************************************************/ - void --vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) --{ -- int row; -- unsigned char *source, *dest; -- -- source = src_ybc->y_buffer; -- dest = dst_ybc->y_buffer; -- -- for (row = 0; row < src_ybc->y_height; row++) -- { -- vpx_memcpy(dest, source, src_ybc->y_width); -- source += src_ybc->y_stride; -- dest += dst_ybc->y_stride; -- } -- -- source = src_ybc->u_buffer; -- dest = dst_ybc->u_buffer; -- -- for (row = 0; row < src_ybc->uv_height; row++) -- { -- vpx_memcpy(dest, source, src_ybc->uv_width); -- source += src_ybc->uv_stride; -- dest += dst_ybc->uv_stride; -- } -- -- source = src_ybc->v_buffer; -- dest = dst_ybc->v_buffer; -- -- for (row = 0; row < src_ybc->uv_height; row++) -- { -- vpx_memcpy(dest, source, src_ybc->uv_width); -- source += src_ybc->uv_stride; -- dest += dst_ybc->uv_stride; -- } -- -- vp8_yv12_extend_frame_borders_c(dst_ybc); -+vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc, -+ YV12_BUFFER_CONFIG *dst_ybc) { -+ int row; -+ unsigned char *source, *dest; -+ -+ source = src_ybc->y_buffer; -+ dest = dst_ybc->y_buffer; -+ -+ for (row = 0; row < src_ybc->y_height; row++) { -+ vpx_memcpy(dest, source, src_ybc->y_width); -+ source += src_ybc->y_stride; -+ dest += dst_ybc->y_stride; -+ } -+ -+ source = src_ybc->u_buffer; -+ dest = dst_ybc->u_buffer; -+ -+ for (row = 0; row < src_ybc->uv_height; row++) { -+ vpx_memcpy(dest, source, src_ybc->uv_width); -+ source += src_ybc->uv_stride; -+ dest += dst_ybc->uv_stride; -+ } -+ -+ source = src_ybc->v_buffer; -+ dest = dst_ybc->v_buffer; -+ -+ for (row = 0; row < src_ybc->uv_height; row++) { -+ vpx_memcpy(dest, source, src_ybc->uv_width); -+ source += src_ybc->uv_stride; -+ dest += dst_ybc->uv_stride; -+ } -+ -+ vp8_yv12_extend_frame_borders_c(dst_ybc); - } - --void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) --{ -- int row; -- unsigned char *source, *dest; -+void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc, -+ YV12_BUFFER_CONFIG *dst_ybc) { -+ int row; -+ unsigned char *source, *dest; - - -- source = src_ybc->y_buffer; -- dest = dst_ybc->y_buffer; -+ source = src_ybc->y_buffer; -+ dest = dst_ybc->y_buffer; - -- for (row = 0; row < src_ybc->y_height; row++) -- { -- vpx_memcpy(dest, source, src_ybc->y_width); -- source += src_ybc->y_stride; -- dest += dst_ybc->y_stride; -- } -+ for (row = 0; row < src_ybc->y_height; row++) { -+ vpx_memcpy(dest, source, src_ybc->y_width); -+ source += src_ybc->y_stride; -+ dest += dst_ybc->y_stride; -+ } - } -diff --git a/vpx_scale/include/generic/vpxscale_arbitrary.h b/vpx_scale/include/generic/vpxscale_arbitrary.h -index 39de181..c535252 100644 ---- a/vpx_scale/include/generic/vpxscale_arbitrary.h -+++ b/vpx_scale/include/generic/vpxscale_arbitrary.h -@@ -14,33 +14,32 @@ - - #include "vpx_scale/yv12config.h" - --typedef struct --{ -- int in_width; -- int in_height; -- -- int out_width; -- int out_height; -- int max_usable_out_width; -- -- // numerator for the width and height -- int nw; -- int nh; -- int nh_uv; -- -- // output to input correspondance array -- short *l_w; -- short *l_h; -- short *l_h_uv; -- -- // polyphase coefficients -- short *c_w; -- short *c_h; -- short *c_h_uv; -- -- // buffer for horizontal filtering. -- unsigned char *hbuf; -- unsigned char *hbuf_uv; -+typedef struct { -+ int in_width; -+ int in_height; -+ -+ int out_width; -+ int out_height; -+ int max_usable_out_width; -+ -+ // numerator for the width and height -+ int nw; -+ int nh; -+ int nh_uv; -+ -+ // output to input correspondance array -+ short *l_w; -+ short *l_h; -+ short *l_h_uv; -+ -+ // polyphase coefficients -+ short *c_w; -+ short *c_h; -+ short *c_h_uv; -+ -+ // buffer for horizontal filtering. -+ unsigned char *hbuf; -+ unsigned char *hbuf_uv; - } BICUBIC_SCALER_STRUCT; - - int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height); -diff --git a/vpx_scale/scale_mode.h b/vpx_scale/scale_mode.h -index 1476e64..5581385 100644 ---- a/vpx_scale/scale_mode.h -+++ b/vpx_scale/scale_mode.h -@@ -17,12 +17,11 @@ - #ifndef SCALE_MODE_H - #define SCALE_MODE_H - --typedef enum --{ -- MAINTAIN_ASPECT_RATIO = 0x0, -- SCALE_TO_FIT = 0x1, -- CENTER = 0x2, -- OTHER = 0x3 -+typedef enum { -+ MAINTAIN_ASPECT_RATIO = 0x0, -+ SCALE_TO_FIT = 0x1, -+ CENTER = 0x2, -+ OTHER = 0x3 - } SCALE_MODE; - - -diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h -index 8919a24..3c2194d 100644 ---- a/vpx_scale/vpxscale.h -+++ b/vpx_scale/vpxscale.h -@@ -14,29 +14,24 @@ - - #include "vpx_scale/yv12config.h" - --extern void vp8_yv12_scale_or_center --( -- YV12_BUFFER_CONFIG *src_yuv_config, -- YV12_BUFFER_CONFIG *dst_yuv_config, -- int expanded_frame_width, -- int expanded_frame_height, -- int scaling_mode, -- int HScale, -- int HRatio, -- int VScale, -- int VRatio --); --extern void vp8_scale_frame --( -- YV12_BUFFER_CONFIG *src, -- YV12_BUFFER_CONFIG *dst, -- unsigned char *temp_area, -- unsigned char temp_height, -- unsigned int hscale, -- unsigned int hratio, -- unsigned int vscale, -- unsigned int vratio, -- unsigned int interlaced --); -+extern void vp8_yv12_scale_or_center(YV12_BUFFER_CONFIG *src_yuv_config, -+ YV12_BUFFER_CONFIG *dst_yuv_config, -+ int expanded_frame_width, -+ int expanded_frame_height, -+ int scaling_mode, -+ int HScale, -+ int HRatio, -+ int VScale, -+ int VRatio); -+ -+extern void vp8_scale_frame(YV12_BUFFER_CONFIG *src, -+ YV12_BUFFER_CONFIG *dst, -+ unsigned char *temp_area, -+ unsigned char temp_height, -+ unsigned int hscale, -+ unsigned int hratio, -+ unsigned int vscale, -+ unsigned int vratio, -+ unsigned int interlaced); - - #endif -diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c -index 3711fe5..2d96cc7 100644 ---- a/vpx_scale/win32/scaleopt.c -+++ b/vpx_scale/win32/scaleopt.c -@@ -61,114 +61,112 @@ __declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51, - static - void horizontal_line_3_5_scale_mmx - ( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- (void) dest_width; -+ const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width -+) { -+ (void) dest_width; - -- __asm -- { -+ __asm { - -- push ebx -+ push ebx - -- mov esi, source -- mov edi, dest -+ mov esi, source -+ mov edi, dest - -- mov ecx, source_width -- lea edx, [esi+ecx-3]; -+ mov ecx, source_width -+ lea edx, [esi+ecx-3]; - -- movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx -- movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx -+ movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx -+ movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx - -- movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx -- pxor mm7, mm7 // clear mm7 -+ movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx -+ pxor mm7, mm7 // clear mm7 - -- horiz_line_3_5_loop: -+ horiz_line_3_5_loop: - -- mov eax, DWORD PTR [esi] // eax = 00 01 02 03 -- mov ebx, eax -+ mov eax, DWORD PTR [esi] // eax = 00 01 02 03 -+ mov ebx, eax - -- and ebx, 0xffff00 // ebx = xx 01 02 xx -- mov ecx, eax // ecx = 00 01 02 03 -+ and ebx, 0xffff00 // ebx = xx 01 02 xx -+ mov ecx, eax // ecx = 00 01 02 03 - -- and eax, 0xffff0000 // eax = xx xx 02 03 -- xor ecx, eax // ecx = 00 01 xx xx -+ and eax, 0xffff0000 // eax = xx xx 02 03 -+ xor ecx, eax // ecx = 00 01 xx xx - -- shr ebx, 8 // ebx = 01 02 xx xx -- or eax, ebx // eax = 01 02 02 03 -+ shr ebx, 8 // ebx = 01 02 xx xx -+ or eax, ebx // eax = 01 02 02 03 - -- shl ebx, 16 // ebx = xx xx 01 02 -- movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx -+ shl ebx, 16 // ebx = xx xx 01 02 -+ movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx - -- or ebx, ecx // ebx = 00 01 01 02 -- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx -+ or ebx, ecx // ebx = 00 01 01 02 -+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx - -- movd mm0, ebx // mm0 = 00 01 01 02 -- pmullw mm1, mm6 // -+ movd mm0, ebx // mm0 = 00 01 01 02 -+ pmullw mm1, mm6 // - -- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx -- pmullw mm0, mm5 // -+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx -+ pmullw mm0, mm5 // - -- mov [edi], ebx // writeoutput 00 xx xx xx -- add esi, 3 -+ mov [edi], ebx // writeoutput 00 xx xx xx -+ add esi, 3 - -- add edi, 5 -- paddw mm0, mm1 -+ add edi, 5 -+ paddw mm0, mm1 - -- paddw mm0, mm4 -- psrlw mm0, 8 -+ paddw mm0, mm4 -+ psrlw mm0, 8 - -- cmp esi, edx -- packuswb mm0, mm7 -+ cmp esi, edx -+ packuswb mm0, mm7 - -- movd DWORD Ptr [edi-4], mm0 -- jl horiz_line_3_5_loop -+ movd DWORD Ptr [edi-4], mm0 -+ jl horiz_line_3_5_loop - --//Exit: -- mov eax, DWORD PTR [esi] // eax = 00 01 02 03 -- mov ebx, eax -+// Exit: -+ mov eax, DWORD PTR [esi] // eax = 00 01 02 03 -+ mov ebx, eax - -- and ebx, 0xffff00 // ebx = xx 01 02 xx -- mov ecx, eax // ecx = 00 01 02 03 -+ and ebx, 0xffff00 // ebx = xx 01 02 xx -+ mov ecx, eax // ecx = 00 01 02 03 - -- and eax, 0xffff0000 // eax = xx xx 02 03 -- xor ecx, eax // ecx = 00 01 xx xx -+ and eax, 0xffff0000 // eax = xx xx 02 03 -+ xor ecx, eax // ecx = 00 01 xx xx - -- shr ebx, 8 // ebx = 01 02 xx xx -- or eax, ebx // eax = 01 02 02 03 -+ shr ebx, 8 // ebx = 01 02 xx xx -+ or eax, ebx // eax = 01 02 02 03 - -- shl eax, 8 // eax = xx 01 02 02 -- and eax, 0xffff0000 // eax = xx xx 02 02 -+ shl eax, 8 // eax = xx 01 02 02 -+ and eax, 0xffff0000 // eax = xx xx 02 02 - -- or eax, ebx // eax = 01 02 02 02 -+ or eax, ebx // eax = 01 02 02 02 - -- shl ebx, 16 // ebx = xx xx 01 02 -- movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx -+ shl ebx, 16 // ebx = xx xx 01 02 -+ movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx - -- or ebx, ecx // ebx = 00 01 01 02 -- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx -+ or ebx, ecx // ebx = 00 01 01 02 -+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx - -- movd mm0, ebx // mm0 = 00 01 01 02 -- pmullw mm1, mm6 // -+ movd mm0, ebx // mm0 = 00 01 01 02 -+ pmullw mm1, mm6 // - -- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx -- pmullw mm0, mm5 // -+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx -+ pmullw mm0, mm5 // - -- mov [edi], ebx // writeoutput 00 xx xx xx -- paddw mm0, mm1 -+ mov [edi], ebx // writeoutput 00 xx xx xx -+ paddw mm0, mm1 - -- paddw mm0, mm4 -- psrlw mm0, 8 -+ paddw mm0, mm4 -+ psrlw mm0, 8 - -- packuswb mm0, mm7 -- movd DWORD Ptr [edi+1], mm0 -+ packuswb mm0, mm7 -+ movd DWORD Ptr [edi+1], mm0 - -- pop ebx -+ pop ebx - -- } -+ } - - } - -@@ -194,120 +192,118 @@ void horizontal_line_3_5_scale_mmx - static - void horizontal_line_4_5_scale_mmx - ( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- (void)dest_width; -+ const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width -+) { -+ (void)dest_width; - -- __asm -- { -+ __asm { - -- mov esi, source -- mov edi, dest -+ mov esi, source -+ mov edi, dest - -- mov ecx, source_width -- lea edx, [esi+ecx-8]; -+ mov ecx, source_width -+ lea edx, [esi+ecx-8]; - -- movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx -- movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx -+ movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx -+ movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx - -- movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx -- pxor mm7, mm7 // clear mm7 -+ movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx -+ pxor mm7, mm7 // clear mm7 - -- horiz_line_4_5_loop: -+ horiz_line_4_5_loop: - -- movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07 -- movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08 -+ movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07 -+ movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08 - -- movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 -- movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 -+ movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 -+ movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 - -- movd DWORD PTR [edi], mm0 // write output 00 xx xx xx -- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx -+ movd DWORD PTR [edi], mm0 // write output 00 xx xx xx -+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - -- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx -- pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 -+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx -+ pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - -- pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 -- punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx -+ pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 -+ punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - -- movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx -- pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 -+ movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx -+ pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - -- punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx -- pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 -+ punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx -+ pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 - -- paddw mm0, mm1 // added round values -- paddw mm0, mm4 -+ paddw mm0, mm1 // added round values -+ paddw mm0, mm4 - -- psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx -- packuswb mm0, mm7 -+ psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx -+ packuswb mm0, mm7 - -- movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 -- add edi, 10 -+ movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 -+ add edi, 10 - -- add esi, 8 -- paddw mm2, mm3 // -+ add esi, 8 -+ paddw mm2, mm3 // - -- paddw mm2, mm4 // added round values -- cmp esi, edx -+ paddw mm2, mm4 // added round values -+ cmp esi, edx - -- psrlw mm2, 8 -- packuswb mm2, mm7 -+ psrlw mm2, 8 -+ packuswb mm2, mm7 - -- movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09 -- jl horiz_line_4_5_loop -+ movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09 -+ jl horiz_line_4_5_loop - --//Exit: -- movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07 -- movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 -+// Exit: -+ movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07 -+ movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 - -- movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 -- psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 -+ movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 -+ psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 - -- movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 -- pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 -+ movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 -+ pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 - -- psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 -- por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 -+ psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 -+ por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 - -- movq mm3, mm1 -+ movq mm3, mm1 - -- movd DWORD PTR [edi], mm0 // write output 00 xx xx xx -- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx -+ movd DWORD PTR [edi], mm0 // write output 00 xx xx xx -+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - -- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx -- pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 -+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx -+ pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - -- pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 -- punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx -+ pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 -+ punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - -- movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx -- pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 -+ movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx -+ pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - -- punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx -- pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 -+ punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx -+ pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 - -- paddw mm0, mm1 // added round values -- paddw mm0, mm4 -+ paddw mm0, mm1 // added round values -+ paddw mm0, mm4 - -- psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx -- packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx -+ psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx -+ packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx - -- movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 -- paddw mm2, mm3 // -+ movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 -+ paddw mm2, mm3 // - -- paddw mm2, mm4 // added round values -- psrlw mm2, 8 -+ paddw mm2, mm4 // added round values -+ psrlw mm2, 8 - -- packuswb mm2, mm7 -- movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09 -+ packuswb mm2, mm7 -+ movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09 - - -- } -+ } - } - - /**************************************************************************** -@@ -332,167 +328,165 @@ void horizontal_line_4_5_scale_mmx - static - void vertical_band_4_5_scale_mmx - ( -- unsigned char *dest, -- unsigned int dest_pitch, -- unsigned int dest_width --) --{ -- __asm -- { -+ unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width -+) { -+ __asm { - -- mov esi, dest // Get the source and destination pointer -- mov ecx, dest_pitch // Get the pitch size -+ mov esi, dest // Get the source and destination pointer -+ mov ecx, dest_pitch // Get the pitch size - -- lea edi, [esi+ecx*2] // tow lines below -- add edi, ecx // three lines below -+ lea edi, [esi+ecx*2] // tow lines below -+ add edi, ecx // three lines below - -- pxor mm7, mm7 // clear out mm7 -- mov edx, dest_width // Loop counter -+ pxor mm7, mm7 // clear out mm7 -+ mov edx, dest_width // Loop counter - -- vs_4_5_loop: -+ vs_4_5_loop: - -- movq mm0, QWORD ptr [esi] // src[0]; -- movq mm1, QWORD ptr [esi+ecx] // src[1]; -+ movq mm0, QWORD ptr [esi] // src[0]; -+ movq mm1, QWORD ptr [esi+ecx] // src[1]; - -- movq mm2, mm0 // Make a copy -- punpcklbw mm0, mm7 // unpack low to word -+ movq mm2, mm0 // Make a copy -+ punpcklbw mm0, mm7 // unpack low to word - -- movq mm5, one_fifth -- punpckhbw mm2, mm7 // unpack high to word -+ movq mm5, one_fifth -+ punpckhbw mm2, mm7 // unpack high to word - -- pmullw mm0, mm5 // a * 1/5 -+ pmullw mm0, mm5 // a * 1/5 - -- movq mm3, mm1 // make a copy -- punpcklbw mm1, mm7 // unpack low to word -+ movq mm3, mm1 // make a copy -+ punpcklbw mm1, mm7 // unpack low to word - -- pmullw mm2, mm5 // a * 1/5 -- movq mm6, four_fifths // constan -+ pmullw mm2, mm5 // a * 1/5 -+ movq mm6, four_fifths // constan - -- movq mm4, mm1 // copy of low b -- pmullw mm4, mm6 // b * 4/5 -+ movq mm4, mm1 // copy of low b -+ pmullw mm4, mm6 // b * 4/5 - -- punpckhbw mm3, mm7 // unpack high to word -- movq mm5, mm3 // copy of high b -+ punpckhbw mm3, mm7 // unpack high to word -+ movq mm5, mm3 // copy of high b - -- pmullw mm5, mm6 // b * 4/5 -- paddw mm0, mm4 // a * 1/5 + b * 4/5 -+ pmullw mm5, mm6 // b * 4/5 -+ paddw mm0, mm4 // a * 1/5 + b * 4/5 - -- paddw mm2, mm5 // a * 1/5 + b * 4/5 -- paddw mm0, round_values // + 128 -+ paddw mm2, mm5 // a * 1/5 + b * 4/5 -+ paddw mm0, round_values // + 128 - -- paddw mm2, round_values // + 128 -- psrlw mm0, 8 -+ paddw mm2, round_values // + 128 -+ psrlw mm0, 8 - -- psrlw mm2, 8 -- packuswb mm0, mm2 // des [1] -+ psrlw mm2, 8 -+ packuswb mm0, mm2 // des [1] - -- movq QWORD ptr [esi+ecx], mm0 // write des[1] -- movq mm0, [esi+ecx*2] // mm0 = src[2] -+ movq QWORD ptr [esi+ecx], mm0 // write des[1] -+ movq mm0, [esi+ecx*2] // mm0 = src[2] - -- // mm1, mm3 --- Src[1] -- // mm0 --- Src[2] -- // mm7 for unpacking -+ // mm1, mm3 --- Src[1] -+ // mm0 --- Src[2] -+ // mm7 for unpacking - -- movq mm5, two_fifths -- movq mm2, mm0 // make a copy -+ movq mm5, two_fifths -+ movq mm2, mm0 // make a copy - -- pmullw mm1, mm5 // b * 2/5 -- movq mm6, three_fifths -+ pmullw mm1, mm5 // b * 2/5 -+ movq mm6, three_fifths - - -- punpcklbw mm0, mm7 // unpack low to word -- pmullw mm3, mm5 // b * 2/5 -+ punpcklbw mm0, mm7 // unpack low to word -+ pmullw mm3, mm5 // b * 2/5 - -- movq mm4, mm0 // make copy of c -- punpckhbw mm2, mm7 // unpack high to word -+ movq mm4, mm0 // make copy of c -+ punpckhbw mm2, mm7 // unpack high to word - -- pmullw mm4, mm6 // c * 3/5 -- movq mm5, mm2 -+ pmullw mm4, mm6 // c * 3/5 -+ movq mm5, mm2 - -- pmullw mm5, mm6 // c * 3/5 -- paddw mm1, mm4 // b * 2/5 + c * 3/5 -+ pmullw mm5, mm6 // c * 3/5 -+ paddw mm1, mm4 // b * 2/5 + c * 3/5 - -- paddw mm3, mm5 // b * 2/5 + c * 3/5 -- paddw mm1, round_values // + 128 -+ paddw mm3, mm5 // b * 2/5 + c * 3/5 -+ paddw mm1, round_values // + 128 - -- paddw mm3, round_values // + 128 -- psrlw mm1, 8 -+ paddw mm3, round_values // + 128 -+ psrlw mm1, 8 - -- psrlw mm3, 8 -- packuswb mm1, mm3 // des[2] -+ psrlw mm3, 8 -+ packuswb mm1, mm3 // des[2] - -- movq QWORD ptr [esi+ecx*2], mm1 // write des[2] -- movq mm1, [edi] // mm1=Src[3]; -+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2] -+ movq mm1, [edi] // mm1=Src[3]; - -- // mm0, mm2 --- Src[2] -- // mm1 --- Src[3] -- // mm6 --- 3/5 -- // mm7 for unpacking -+ // mm0, mm2 --- Src[2] -+ // mm1 --- Src[3] -+ // mm6 --- 3/5 -+ // mm7 for unpacking - -- pmullw mm0, mm6 // c * 3/5 -- movq mm5, two_fifths // mm5 = 2/5 -+ pmullw mm0, mm6 // c * 3/5 -+ movq mm5, two_fifths // mm5 = 2/5 - -- movq mm3, mm1 // make a copy -- pmullw mm2, mm6 // c * 3/5 -+ movq mm3, mm1 // make a copy -+ pmullw mm2, mm6 // c * 3/5 - -- punpcklbw mm1, mm7 // unpack low -- movq mm4, mm1 // make a copy -+ punpcklbw mm1, mm7 // unpack low -+ movq mm4, mm1 // make a copy - -- punpckhbw mm3, mm7 // unpack high -- pmullw mm4, mm5 // d * 2/5 -+ punpckhbw mm3, mm7 // unpack high -+ pmullw mm4, mm5 // d * 2/5 - -- movq mm6, mm3 // make a copy -- pmullw mm6, mm5 // d * 2/5 -+ movq mm6, mm3 // make a copy -+ pmullw mm6, mm5 // d * 2/5 - -- paddw mm0, mm4 // c * 3/5 + d * 2/5 -- paddw mm2, mm6 // c * 3/5 + d * 2/5 -+ paddw mm0, mm4 // c * 3/5 + d * 2/5 -+ paddw mm2, mm6 // c * 3/5 + d * 2/5 - -- paddw mm0, round_values // + 128 -- paddw mm2, round_values // + 128 -+ paddw mm0, round_values // + 128 -+ paddw mm2, round_values // + 128 - -- psrlw mm0, 8 -- psrlw mm2, 8 -+ psrlw mm0, 8 -+ psrlw mm2, 8 - -- packuswb mm0, mm2 // des[3] -- movq QWORD ptr [edi], mm0 // write des[3] -+ packuswb mm0, mm2 // des[3] -+ movq QWORD ptr [edi], mm0 // write des[3] - -- // mm1, mm3 --- Src[3] -- // mm7 -- cleared for unpacking -+ // mm1, mm3 --- Src[3] -+ // mm7 -- cleared for unpacking - -- movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group -+ movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group - -- movq mm5, four_fifths // mm5 = 4/5 -- pmullw mm1, mm5 // d * 4/5 -+ movq mm5, four_fifths // mm5 = 4/5 -+ pmullw mm1, mm5 // d * 4/5 - -- movq mm6, one_fifth // mm6 = 1/5 -- movq mm2, mm0 // make a copy -+ movq mm6, one_fifth // mm6 = 1/5 -+ movq mm2, mm0 // make a copy - -- pmullw mm3, mm5 // d * 4/5 -- punpcklbw mm0, mm7 // unpack low -+ pmullw mm3, mm5 // d * 4/5 -+ punpcklbw mm0, mm7 // unpack low - -- pmullw mm0, mm6 // an * 1/5 -- punpckhbw mm2, mm7 // unpack high -+ pmullw mm0, mm6 // an * 1/5 -+ punpckhbw mm2, mm7 // unpack high - -- paddw mm1, mm0 // d * 4/5 + an * 1/5 -- pmullw mm2, mm6 // an * 1/5 -+ paddw mm1, mm0 // d * 4/5 + an * 1/5 -+ pmullw mm2, mm6 // an * 1/5 - -- paddw mm3, mm2 // d * 4/5 + an * 1/5 -- paddw mm1, round_values // + 128 -+ paddw mm3, mm2 // d * 4/5 + an * 1/5 -+ paddw mm1, round_values // + 128 - -- paddw mm3, round_values // + 128 -- psrlw mm1, 8 -+ paddw mm3, round_values // + 128 -+ psrlw mm1, 8 - -- psrlw mm3, 8 -- packuswb mm1, mm3 // des[4] -+ psrlw mm3, 8 -+ packuswb mm1, mm3 // des[4] - -- movq QWORD ptr [edi+ecx], mm1 // write des[4] -+ movq QWORD ptr [edi+ecx], mm1 // write des[4] - -- add edi, 8 -- add esi, 8 -+ add edi, 8 -+ add esi, 8 - -- sub edx, 8 -- jg vs_4_5_loop -- } -+ sub edx, 8 -+ jg vs_4_5_loop -+ } - } - - /**************************************************************************** -@@ -517,139 +511,137 @@ void vertical_band_4_5_scale_mmx - static - void last_vertical_band_4_5_scale_mmx - ( -- unsigned char *dest, -- unsigned int dest_pitch, -- unsigned int dest_width --) --{ -- __asm -- { -- mov esi, dest // Get the source and destination pointer -- mov ecx, dest_pitch // Get the pitch size -+ unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width -+) { -+ __asm { -+ mov esi, dest // Get the source and destination pointer -+ mov ecx, dest_pitch // Get the pitch size - -- lea edi, [esi+ecx*2] // tow lines below -- add edi, ecx // three lines below -+ lea edi, [esi+ecx*2] // tow lines below -+ add edi, ecx // three lines below - -- pxor mm7, mm7 // clear out mm7 -- mov edx, dest_width // Loop counter -+ pxor mm7, mm7 // clear out mm7 -+ mov edx, dest_width // Loop counter - -- last_vs_4_5_loop: -+ last_vs_4_5_loop: - -- movq mm0, QWORD ptr [esi] // src[0]; -- movq mm1, QWORD ptr [esi+ecx] // src[1]; -+ movq mm0, QWORD ptr [esi] // src[0]; -+ movq mm1, QWORD ptr [esi+ecx] // src[1]; - -- movq mm2, mm0 // Make a copy -- punpcklbw mm0, mm7 // unpack low to word -+ movq mm2, mm0 // Make a copy -+ punpcklbw mm0, mm7 // unpack low to word - -- movq mm5, one_fifth -- punpckhbw mm2, mm7 // unpack high to word -+ movq mm5, one_fifth -+ punpckhbw mm2, mm7 // unpack high to word - -- pmullw mm0, mm5 // a * 1/5 -+ pmullw mm0, mm5 // a * 1/5 - -- movq mm3, mm1 // make a copy -- punpcklbw mm1, mm7 // unpack low to word -+ movq mm3, mm1 // make a copy -+ punpcklbw mm1, mm7 // unpack low to word - -- pmullw mm2, mm5 // a * 1/5 -- movq mm6, four_fifths // constan -+ pmullw mm2, mm5 // a * 1/5 -+ movq mm6, four_fifths // constan - -- movq mm4, mm1 // copy of low b -- pmullw mm4, mm6 // b * 4/5 -+ movq mm4, mm1 // copy of low b -+ pmullw mm4, mm6 // b * 4/5 - -- punpckhbw mm3, mm7 // unpack high to word -- movq mm5, mm3 // copy of high b -+ punpckhbw mm3, mm7 // unpack high to word -+ movq mm5, mm3 // copy of high b - -- pmullw mm5, mm6 // b * 4/5 -- paddw mm0, mm4 // a * 1/5 + b * 4/5 -+ pmullw mm5, mm6 // b * 4/5 -+ paddw mm0, mm4 // a * 1/5 + b * 4/5 - -- paddw mm2, mm5 // a * 1/5 + b * 4/5 -- paddw mm0, round_values // + 128 -+ paddw mm2, mm5 // a * 1/5 + b * 4/5 -+ paddw mm0, round_values // + 128 - -- paddw mm2, round_values // + 128 -- psrlw mm0, 8 -+ paddw mm2, round_values // + 128 -+ psrlw mm0, 8 - -- psrlw mm2, 8 -- packuswb mm0, mm2 // des [1] -+ psrlw mm2, 8 -+ packuswb mm0, mm2 // des [1] - -- movq QWORD ptr [esi+ecx], mm0 // write des[1] -- movq mm0, [esi+ecx*2] // mm0 = src[2] -+ movq QWORD ptr [esi+ecx], mm0 // write des[1] -+ movq mm0, [esi+ecx*2] // mm0 = src[2] - -- // mm1, mm3 --- Src[1] -- // mm0 --- Src[2] -- // mm7 for unpacking -+ // mm1, mm3 --- Src[1] -+ // mm0 --- Src[2] -+ // mm7 for unpacking - -- movq mm5, two_fifths -- movq mm2, mm0 // make a copy -+ movq mm5, two_fifths -+ movq mm2, mm0 // make a copy - -- pmullw mm1, mm5 // b * 2/5 -- movq mm6, three_fifths -+ pmullw mm1, mm5 // b * 2/5 -+ movq mm6, three_fifths - - -- punpcklbw mm0, mm7 // unpack low to word -- pmullw mm3, mm5 // b * 2/5 -+ punpcklbw mm0, mm7 // unpack low to word -+ pmullw mm3, mm5 // b * 2/5 - -- movq mm4, mm0 // make copy of c -- punpckhbw mm2, mm7 // unpack high to word -+ movq mm4, mm0 // make copy of c -+ punpckhbw mm2, mm7 // unpack high to word - -- pmullw mm4, mm6 // c * 3/5 -- movq mm5, mm2 -+ pmullw mm4, mm6 // c * 3/5 -+ movq mm5, mm2 - -- pmullw mm5, mm6 // c * 3/5 -- paddw mm1, mm4 // b * 2/5 + c * 3/5 -+ pmullw mm5, mm6 // c * 3/5 -+ paddw mm1, mm4 // b * 2/5 + c * 3/5 - -- paddw mm3, mm5 // b * 2/5 + c * 3/5 -- paddw mm1, round_values // + 128 -+ paddw mm3, mm5 // b * 2/5 + c * 3/5 -+ paddw mm1, round_values // + 128 - -- paddw mm3, round_values // + 128 -- psrlw mm1, 8 -+ paddw mm3, round_values // + 128 -+ psrlw mm1, 8 - -- psrlw mm3, 8 -- packuswb mm1, mm3 // des[2] -+ psrlw mm3, 8 -+ packuswb mm1, mm3 // des[2] - -- movq QWORD ptr [esi+ecx*2], mm1 // write des[2] -- movq mm1, [edi] // mm1=Src[3]; -+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2] -+ movq mm1, [edi] // mm1=Src[3]; - -- movq QWORD ptr [edi+ecx], mm1 // write des[4]; -+ movq QWORD ptr [edi+ecx], mm1 // write des[4]; - -- // mm0, mm2 --- Src[2] -- // mm1 --- Src[3] -- // mm6 --- 3/5 -- // mm7 for unpacking -+ // mm0, mm2 --- Src[2] -+ // mm1 --- Src[3] -+ // mm6 --- 3/5 -+ // mm7 for unpacking - -- pmullw mm0, mm6 // c * 3/5 -- movq mm5, two_fifths // mm5 = 2/5 -+ pmullw mm0, mm6 // c * 3/5 -+ movq mm5, two_fifths // mm5 = 2/5 - -- movq mm3, mm1 // make a copy -- pmullw mm2, mm6 // c * 3/5 -+ movq mm3, mm1 // make a copy -+ pmullw mm2, mm6 // c * 3/5 - -- punpcklbw mm1, mm7 // unpack low -- movq mm4, mm1 // make a copy -+ punpcklbw mm1, mm7 // unpack low -+ movq mm4, mm1 // make a copy - -- punpckhbw mm3, mm7 // unpack high -- pmullw mm4, mm5 // d * 2/5 -+ punpckhbw mm3, mm7 // unpack high -+ pmullw mm4, mm5 // d * 2/5 - -- movq mm6, mm3 // make a copy -- pmullw mm6, mm5 // d * 2/5 -+ movq mm6, mm3 // make a copy -+ pmullw mm6, mm5 // d * 2/5 - -- paddw mm0, mm4 // c * 3/5 + d * 2/5 -- paddw mm2, mm6 // c * 3/5 + d * 2/5 -+ paddw mm0, mm4 // c * 3/5 + d * 2/5 -+ paddw mm2, mm6 // c * 3/5 + d * 2/5 - -- paddw mm0, round_values // + 128 -- paddw mm2, round_values // + 128 -+ paddw mm0, round_values // + 128 -+ paddw mm2, round_values // + 128 - -- psrlw mm0, 8 -- psrlw mm2, 8 -+ psrlw mm0, 8 -+ psrlw mm2, 8 - -- packuswb mm0, mm2 // des[3] -- movq QWORD ptr [edi], mm0 // write des[3] -+ packuswb mm0, mm2 // des[3] -+ movq QWORD ptr [edi], mm0 // write des[3] - -- // mm1, mm3 --- Src[3] -- // mm7 -- cleared for unpacking -- add edi, 8 -- add esi, 8 -+ // mm1, mm3 --- Src[3] -+ // mm7 -- cleared for unpacking -+ add edi, 8 -+ add esi, 8 - -- sub edx, 8 -- jg last_vs_4_5_loop -- } -+ sub edx, 8 -+ jg last_vs_4_5_loop -+ } - } - - /**************************************************************************** -@@ -674,153 +666,151 @@ void last_vertical_band_4_5_scale_mmx - static - void vertical_band_3_5_scale_mmx - ( -- unsigned char *dest, -- unsigned int dest_pitch, -- unsigned int dest_width --) --{ -- __asm -- { -- mov esi, dest // Get the source and destination pointer -- mov ecx, dest_pitch // Get the pitch size -+ unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width -+) { -+ __asm { -+ mov esi, dest // Get the source and destination pointer -+ mov ecx, dest_pitch // Get the pitch size - -- lea edi, [esi+ecx*2] // tow lines below -- add edi, ecx // three lines below -+ lea edi, [esi+ecx*2] // tow lines below -+ add edi, ecx // three lines below - -- pxor mm7, mm7 // clear out mm7 -- mov edx, dest_width // Loop counter -+ pxor mm7, mm7 // clear out mm7 -+ mov edx, dest_width // Loop counter - -- vs_3_5_loop: -+ vs_3_5_loop: - -- movq mm0, QWORD ptr [esi] // src[0]; -- movq mm1, QWORD ptr [esi+ecx] // src[1]; -+ movq mm0, QWORD ptr [esi] // src[0]; -+ movq mm1, QWORD ptr [esi+ecx] // src[1]; - -- movq mm2, mm0 // Make a copy -- punpcklbw mm0, mm7 // unpack low to word -+ movq mm2, mm0 // Make a copy -+ punpcklbw mm0, mm7 // unpack low to word - -- movq mm5, two_fifths // mm5 = 2/5 -- punpckhbw mm2, mm7 // unpack high to word -+ movq mm5, two_fifths // mm5 = 2/5 -+ punpckhbw mm2, mm7 // unpack high to word - -- pmullw mm0, mm5 // a * 2/5 -+ pmullw mm0, mm5 // a * 2/5 - -- movq mm3, mm1 // make a copy -- punpcklbw mm1, mm7 // unpack low to word -+ movq mm3, mm1 // make a copy -+ punpcklbw mm1, mm7 // unpack low to word - -- pmullw mm2, mm5 // a * 2/5 -- movq mm6, three_fifths // mm6 = 3/5 -+ pmullw mm2, mm5 // a * 2/5 -+ movq mm6, three_fifths // mm6 = 3/5 - -- movq mm4, mm1 // copy of low b -- pmullw mm4, mm6 // b * 3/5 -+ movq mm4, mm1 // copy of low b -+ pmullw mm4, mm6 // b * 3/5 - -- punpckhbw mm3, mm7 // unpack high to word -- movq mm5, mm3 // copy of high b -+ punpckhbw mm3, mm7 // unpack high to word -+ movq mm5, mm3 // copy of high b - -- pmullw mm5, mm6 // b * 3/5 -- paddw mm0, mm4 // a * 2/5 + b * 3/5 -+ pmullw mm5, mm6 // b * 3/5 -+ paddw mm0, mm4 // a * 2/5 + b * 3/5 - -- paddw mm2, mm5 // a * 2/5 + b * 3/5 -- paddw mm0, round_values // + 128 -+ paddw mm2, mm5 // a * 2/5 + b * 3/5 -+ paddw mm0, round_values // + 128 - -- paddw mm2, round_values // + 128 -- psrlw mm0, 8 -+ paddw mm2, round_values // + 128 -+ psrlw mm0, 8 - -- psrlw mm2, 8 -- packuswb mm0, mm2 // des [1] -+ psrlw mm2, 8 -+ packuswb mm0, mm2 // des [1] - -- movq QWORD ptr [esi+ecx], mm0 // write des[1] -- movq mm0, [esi+ecx*2] // mm0 = src[2] -+ movq QWORD ptr [esi+ecx], mm0 // write des[1] -+ movq mm0, [esi+ecx*2] // mm0 = src[2] - -- // mm1, mm3 --- Src[1] -- // mm0 --- Src[2] -- // mm7 for unpacking -+ // mm1, mm3 --- Src[1] -+ // mm0 --- Src[2] -+ // mm7 for unpacking - -- movq mm4, mm1 // b low -- pmullw mm1, four_fifths // b * 4/5 low -+ movq mm4, mm1 // b low -+ pmullw mm1, four_fifths // b * 4/5 low - -- movq mm5, mm3 // b high -- pmullw mm3, four_fifths // b * 4/5 high -+ movq mm5, mm3 // b high -+ pmullw mm3, four_fifths // b * 4/5 high - -- movq mm2, mm0 // c -- pmullw mm4, one_fifth // b * 1/5 -+ movq mm2, mm0 // c -+ pmullw mm4, one_fifth // b * 1/5 - -- punpcklbw mm0, mm7 // c low -- pmullw mm5, one_fifth // b * 1/5 -+ punpcklbw mm0, mm7 // c low -+ pmullw mm5, one_fifth // b * 1/5 - -- movq mm6, mm0 // make copy of c low -- punpckhbw mm2, mm7 // c high -+ movq mm6, mm0 // make copy of c low -+ punpckhbw mm2, mm7 // c high - -- pmullw mm6, one_fifth // c * 1/5 low -- movq mm7, mm2 // make copy of c high -+ pmullw mm6, one_fifth // c * 1/5 low -+ movq mm7, mm2 // make copy of c high - -- pmullw mm7, one_fifth // c * 1/5 high -- paddw mm1, mm6 // b * 4/5 + c * 1/5 low -+ pmullw mm7, one_fifth // c * 1/5 high -+ paddw mm1, mm6 // b * 4/5 + c * 1/5 low - -- paddw mm3, mm7 // b * 4/5 + c * 1/5 high -- movq mm6, mm0 // make copy of c low -+ paddw mm3, mm7 // b * 4/5 + c * 1/5 high -+ movq mm6, mm0 // make copy of c low - -- pmullw mm6, four_fifths // c * 4/5 low -- movq mm7, mm2 // make copy of c high -+ pmullw mm6, four_fifths // c * 4/5 low -+ movq mm7, mm2 // make copy of c high - -- pmullw mm7, four_fifths // c * 4/5 high -+ pmullw mm7, four_fifths // c * 4/5 high - -- paddw mm4, mm6 // b * 1/5 + c * 4/5 low -- paddw mm5, mm7 // b * 1/5 + c * 4/5 high -+ paddw mm4, mm6 // b * 1/5 + c * 4/5 low -+ paddw mm5, mm7 // b * 1/5 + c * 4/5 high - -- paddw mm1, round_values // + 128 -- paddw mm3, round_values // + 128 -+ paddw mm1, round_values // + 128 -+ paddw mm3, round_values // + 128 - -- psrlw mm1, 8 -- psrlw mm3, 8 -+ psrlw mm1, 8 -+ psrlw mm3, 8 - -- packuswb mm1, mm3 // des[2] -- movq QWORD ptr [esi+ecx*2], mm1 // write des[2] -+ packuswb mm1, mm3 // des[2] -+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - -- paddw mm4, round_values // + 128 -- paddw mm5, round_values // + 128 -+ paddw mm4, round_values // + 128 -+ paddw mm5, round_values // + 128 - -- psrlw mm4, 8 -- psrlw mm5, 8 -+ psrlw mm4, 8 -+ psrlw mm5, 8 - -- packuswb mm4, mm5 // des[3] -- movq QWORD ptr [edi], mm4 // write des[3] -+ packuswb mm4, mm5 // des[3] -+ movq QWORD ptr [edi], mm4 // write des[3] - -- // mm0, mm2 --- Src[3] -+ // mm0, mm2 --- Src[3] - -- pxor mm7, mm7 // clear mm7 for unpacking -- movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group -+ pxor mm7, mm7 // clear mm7 for unpacking -+ movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group - -- movq mm5, three_fifths // mm5 = 3/5 -- pmullw mm0, mm5 // d * 3/5 -+ movq mm5, three_fifths // mm5 = 3/5 -+ pmullw mm0, mm5 // d * 3/5 - -- movq mm6, two_fifths // mm6 = 2/5 -- movq mm3, mm1 // make a copy -+ movq mm6, two_fifths // mm6 = 2/5 -+ movq mm3, mm1 // make a copy - -- pmullw mm2, mm5 // d * 3/5 -- punpcklbw mm1, mm7 // unpack low -+ pmullw mm2, mm5 // d * 3/5 -+ punpcklbw mm1, mm7 // unpack low - -- pmullw mm1, mm6 // an * 2/5 -- punpckhbw mm3, mm7 // unpack high -+ pmullw mm1, mm6 // an * 2/5 -+ punpckhbw mm3, mm7 // unpack high - -- paddw mm0, mm1 // d * 3/5 + an * 2/5 -- pmullw mm3, mm6 // an * 2/5 -+ paddw mm0, mm1 // d * 3/5 + an * 2/5 -+ pmullw mm3, mm6 // an * 2/5 - -- paddw mm2, mm3 // d * 3/5 + an * 2/5 -- paddw mm0, round_values // + 128 -+ paddw mm2, mm3 // d * 3/5 + an * 2/5 -+ paddw mm0, round_values // + 128 - -- paddw mm2, round_values // + 128 -- psrlw mm0, 8 -+ paddw mm2, round_values // + 128 -+ psrlw mm0, 8 - -- psrlw mm2, 8 -- packuswb mm0, mm2 // des[4] -+ psrlw mm2, 8 -+ packuswb mm0, mm2 // des[4] - -- movq QWORD ptr [edi+ecx], mm0 // write des[4] -+ movq QWORD ptr [edi+ecx], mm0 // write des[4] - -- add edi, 8 -- add esi, 8 -+ add edi, 8 -+ add esi, 8 - -- sub edx, 8 -- jg vs_3_5_loop -- } -+ sub edx, 8 -+ jg vs_3_5_loop -+ } - } - - /**************************************************************************** -@@ -845,129 +835,127 @@ void vertical_band_3_5_scale_mmx - static - void last_vertical_band_3_5_scale_mmx - ( -- unsigned char *dest, -- unsigned int dest_pitch, -- unsigned int dest_width --) --{ -- __asm -- { -- mov esi, dest // Get the source and destination pointer -- mov ecx, dest_pitch // Get the pitch size -+ unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width -+) { -+ __asm { -+ mov esi, dest // Get the source and destination pointer -+ mov ecx, dest_pitch // Get the pitch size - -- lea edi, [esi+ecx*2] // tow lines below -- add edi, ecx // three lines below -+ lea edi, [esi+ecx*2] // tow lines below -+ add edi, ecx // three lines below - -- pxor mm7, mm7 // clear out mm7 -- mov edx, dest_width // Loop counter -+ pxor mm7, mm7 // clear out mm7 -+ mov edx, dest_width // Loop counter - - -- last_vs_3_5_loop: -+ last_vs_3_5_loop: - -- movq mm0, QWORD ptr [esi] // src[0]; -- movq mm1, QWORD ptr [esi+ecx] // src[1]; -+ movq mm0, QWORD ptr [esi] // src[0]; -+ movq mm1, QWORD ptr [esi+ecx] // src[1]; - -- movq mm2, mm0 // Make a copy -- punpcklbw mm0, mm7 // unpack low to word -+ movq mm2, mm0 // Make a copy -+ punpcklbw mm0, mm7 // unpack low to word - -- movq mm5, two_fifths // mm5 = 2/5 -- punpckhbw mm2, mm7 // unpack high to word -+ movq mm5, two_fifths // mm5 = 2/5 -+ punpckhbw mm2, mm7 // unpack high to word - -- pmullw mm0, mm5 // a * 2/5 -+ pmullw mm0, mm5 // a * 2/5 - -- movq mm3, mm1 // make a copy -- punpcklbw mm1, mm7 // unpack low to word -+ movq mm3, mm1 // make a copy -+ punpcklbw mm1, mm7 // unpack low to word - -- pmullw mm2, mm5 // a * 2/5 -- movq mm6, three_fifths // mm6 = 3/5 -+ pmullw mm2, mm5 // a * 2/5 -+ movq mm6, three_fifths // mm6 = 3/5 - -- movq mm4, mm1 // copy of low b -- pmullw mm4, mm6 // b * 3/5 -+ movq mm4, mm1 // copy of low b -+ pmullw mm4, mm6 // b * 3/5 - -- punpckhbw mm3, mm7 // unpack high to word -- movq mm5, mm3 // copy of high b -+ punpckhbw mm3, mm7 // unpack high to word -+ movq mm5, mm3 // copy of high b - -- pmullw mm5, mm6 // b * 3/5 -- paddw mm0, mm4 // a * 2/5 + b * 3/5 -+ pmullw mm5, mm6 // b * 3/5 -+ paddw mm0, mm4 // a * 2/5 + b * 3/5 - -- paddw mm2, mm5 // a * 2/5 + b * 3/5 -- paddw mm0, round_values // + 128 -+ paddw mm2, mm5 // a * 2/5 + b * 3/5 -+ paddw mm0, round_values // + 128 - -- paddw mm2, round_values // + 128 -- psrlw mm0, 8 -+ paddw mm2, round_values // + 128 -+ psrlw mm0, 8 - -- psrlw mm2, 8 -- packuswb mm0, mm2 // des [1] -+ psrlw mm2, 8 -+ packuswb mm0, mm2 // des [1] - -- movq QWORD ptr [esi+ecx], mm0 // write des[1] -- movq mm0, [esi+ecx*2] // mm0 = src[2] -+ movq QWORD ptr [esi+ecx], mm0 // write des[1] -+ movq mm0, [esi+ecx*2] // mm0 = src[2] - - - -- // mm1, mm3 --- Src[1] -- // mm0 --- Src[2] -- // mm7 for unpacking -+ // mm1, mm3 --- Src[1] -+ // mm0 --- Src[2] -+ // mm7 for unpacking - -- movq mm4, mm1 // b low -- pmullw mm1, four_fifths // b * 4/5 low -+ movq mm4, mm1 // b low -+ pmullw mm1, four_fifths // b * 4/5 low - -- movq QWORD ptr [edi+ecx], mm0 // write des[4] -+ movq QWORD ptr [edi+ecx], mm0 // write des[4] - -- movq mm5, mm3 // b high -- pmullw mm3, four_fifths // b * 4/5 high -+ movq mm5, mm3 // b high -+ pmullw mm3, four_fifths // b * 4/5 high - -- movq mm2, mm0 // c -- pmullw mm4, one_fifth // b * 1/5 -+ movq mm2, mm0 // c -+ pmullw mm4, one_fifth // b * 1/5 - -- punpcklbw mm0, mm7 // c low -- pmullw mm5, one_fifth // b * 1/5 -+ punpcklbw mm0, mm7 // c low -+ pmullw mm5, one_fifth // b * 1/5 - -- movq mm6, mm0 // make copy of c low -- punpckhbw mm2, mm7 // c high -+ movq mm6, mm0 // make copy of c low -+ punpckhbw mm2, mm7 // c high - -- pmullw mm6, one_fifth // c * 1/5 low -- movq mm7, mm2 // make copy of c high -+ pmullw mm6, one_fifth // c * 1/5 low -+ movq mm7, mm2 // make copy of c high - -- pmullw mm7, one_fifth // c * 1/5 high -- paddw mm1, mm6 // b * 4/5 + c * 1/5 low -+ pmullw mm7, one_fifth // c * 1/5 high -+ paddw mm1, mm6 // b * 4/5 + c * 1/5 low - -- paddw mm3, mm7 // b * 4/5 + c * 1/5 high -- movq mm6, mm0 // make copy of c low -+ paddw mm3, mm7 // b * 4/5 + c * 1/5 high -+ movq mm6, mm0 // make copy of c low - -- pmullw mm6, four_fifths // c * 4/5 low -- movq mm7, mm2 // make copy of c high -+ pmullw mm6, four_fifths // c * 4/5 low -+ movq mm7, mm2 // make copy of c high - -- pmullw mm7, four_fifths // c * 4/5 high -+ pmullw mm7, four_fifths // c * 4/5 high - -- paddw mm4, mm6 // b * 1/5 + c * 4/5 low -- paddw mm5, mm7 // b * 1/5 + c * 4/5 high -+ paddw mm4, mm6 // b * 1/5 + c * 4/5 low -+ paddw mm5, mm7 // b * 1/5 + c * 4/5 high - -- paddw mm1, round_values // + 128 -- paddw mm3, round_values // + 128 -+ paddw mm1, round_values // + 128 -+ paddw mm3, round_values // + 128 - -- psrlw mm1, 8 -- psrlw mm3, 8 -+ psrlw mm1, 8 -+ psrlw mm3, 8 - -- packuswb mm1, mm3 // des[2] -- movq QWORD ptr [esi+ecx*2], mm1 // write des[2] -+ packuswb mm1, mm3 // des[2] -+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - -- paddw mm4, round_values // + 128 -- paddw mm5, round_values // + 128 -+ paddw mm4, round_values // + 128 -+ paddw mm5, round_values // + 128 - -- psrlw mm4, 8 -- psrlw mm5, 8 -+ psrlw mm4, 8 -+ psrlw mm5, 8 - -- packuswb mm4, mm5 // des[3] -- movq QWORD ptr [edi], mm4 // write des[3] -+ packuswb mm4, mm5 // des[3] -+ movq QWORD ptr [edi], mm4 // write des[3] - -- // mm0, mm2 --- Src[3] -+ // mm0, mm2 --- Src[3] - -- add edi, 8 -- add esi, 8 -+ add edi, 8 -+ add esi, 8 - -- sub edx, 8 -- jg last_vs_3_5_loop -- } -+ sub edx, 8 -+ jg last_vs_3_5_loop -+ } - } - - /**************************************************************************** -@@ -992,52 +980,50 @@ void last_vertical_band_3_5_scale_mmx - static - void vertical_band_1_2_scale_mmx - ( -- unsigned char *dest, -- unsigned int dest_pitch, -- unsigned int dest_width --) --{ -- __asm -- { -+ unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width -+) { -+ __asm { - -- mov esi, dest // Get the source and destination pointer -- mov ecx, dest_pitch // Get the pitch size -+ mov esi, dest // Get the source and destination pointer -+ mov ecx, dest_pitch // Get the pitch size - -- pxor mm7, mm7 // clear out mm7 -- mov edx, dest_width // Loop counter -+ pxor mm7, mm7 // clear out mm7 -+ mov edx, dest_width // Loop counter - -- vs_1_2_loop: -+ vs_1_2_loop: - -- movq mm0, [esi] // get Src[0] -- movq mm1, [esi + ecx * 2] // get Src[1] -+ movq mm0, [esi] // get Src[0] -+ movq mm1, [esi + ecx * 2] // get Src[1] - -- movq mm2, mm0 // make copy before unpack -- movq mm3, mm1 // make copy before unpack -+ movq mm2, mm0 // make copy before unpack -+ movq mm3, mm1 // make copy before unpack - -- punpcklbw mm0, mm7 // low Src[0] -- movq mm6, four_ones // mm6= 1, 1, 1, 1 -+ punpcklbw mm0, mm7 // low Src[0] -+ movq mm6, four_ones // mm6= 1, 1, 1, 1 - -- punpcklbw mm1, mm7 // low Src[1] -- paddw mm0, mm1 // low (a + b) -+ punpcklbw mm1, mm7 // low Src[1] -+ paddw mm0, mm1 // low (a + b) - -- punpckhbw mm2, mm7 // high Src[0] -- paddw mm0, mm6 // low (a + b + 1) -+ punpckhbw mm2, mm7 // high Src[0] -+ paddw mm0, mm6 // low (a + b + 1) - -- punpckhbw mm3, mm7 -- paddw mm2, mm3 // high (a + b ) -+ punpckhbw mm3, mm7 -+ paddw mm2, mm3 // high (a + b ) - -- psraw mm0, 1 // low (a + b +1 )/2 -- paddw mm2, mm6 // high (a + b + 1) -+ psraw mm0, 1 // low (a + b +1 )/2 -+ paddw mm2, mm6 // high (a + b + 1) - -- psraw mm2, 1 // high (a + b + 1)/2 -- packuswb mm0, mm2 // pack results -+ psraw mm2, 1 // high (a + b + 1)/2 -+ packuswb mm0, mm2 // pack results - -- movq [esi+ecx], mm0 // write out eight bytes -- add esi, 8 -+ movq [esi+ecx], mm0 // write out eight bytes -+ add esi, 8 - -- sub edx, 8 -- jg vs_1_2_loop -- } -+ sub edx, 8 -+ jg vs_1_2_loop -+ } - - } - -@@ -1063,28 +1049,26 @@ void vertical_band_1_2_scale_mmx - static - void last_vertical_band_1_2_scale_mmx - ( -- unsigned char *dest, -- unsigned int dest_pitch, -- unsigned int dest_width --) --{ -- __asm -- { -- mov esi, dest // Get the source and destination pointer -- mov ecx, dest_pitch // Get the pitch size -+ unsigned char *dest, -+ unsigned int dest_pitch, -+ unsigned int dest_width -+) { -+ __asm { -+ mov esi, dest // Get the source and destination pointer -+ mov ecx, dest_pitch // Get the pitch size - -- mov edx, dest_width // Loop counter -+ mov edx, dest_width // Loop counter - -- last_vs_1_2_loop: -+ last_vs_1_2_loop: - -- movq mm0, [esi] // get Src[0] -- movq [esi+ecx], mm0 // write out eight bytes -+ movq mm0, [esi] // get Src[0] -+ movq [esi+ecx], mm0 // write out eight bytes - -- add esi, 8 -- sub edx, 8 -+ add esi, 8 -+ sub edx, 8 - -- jg last_vs_1_2_loop -- } -+ jg last_vs_1_2_loop -+ } - } - - /**************************************************************************** -@@ -1108,106 +1092,104 @@ void last_vertical_band_1_2_scale_mmx - static - void horizontal_line_1_2_scale_mmx - ( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- (void) dest_width; -+ const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width -+) { -+ (void) dest_width; - -- __asm -- { -- mov esi, source -- mov edi, dest -+ __asm { -+ mov esi, source -+ mov edi, dest - -- pxor mm7, mm7 -- movq mm6, four_ones -+ pxor mm7, mm7 -+ movq mm6, four_ones - -- mov ecx, source_width -+ mov ecx, source_width - -- hs_1_2_loop: -+ hs_1_2_loop: - -- movq mm0, [esi] -- movq mm1, [esi+1] -+ movq mm0, [esi] -+ movq mm1, [esi+1] - -- movq mm2, mm0 -- movq mm3, mm1 -+ movq mm2, mm0 -+ movq mm3, mm1 - -- movq mm4, mm0 -- punpcklbw mm0, mm7 -+ movq mm4, mm0 -+ punpcklbw mm0, mm7 - -- punpcklbw mm1, mm7 -- paddw mm0, mm1 -+ punpcklbw mm1, mm7 -+ paddw mm0, mm1 - -- paddw mm0, mm6 -- punpckhbw mm2, mm7 -+ paddw mm0, mm6 -+ punpckhbw mm2, mm7 - -- punpckhbw mm3, mm7 -- paddw mm2, mm3 -+ punpckhbw mm3, mm7 -+ paddw mm2, mm3 - -- paddw mm2, mm6 -- psraw mm0, 1 -+ paddw mm2, mm6 -+ psraw mm0, 1 - -- psraw mm2, 1 -- packuswb mm0, mm2 -+ psraw mm2, 1 -+ packuswb mm0, mm2 - -- movq mm2, mm4 -- punpcklbw mm2, mm0 -+ movq mm2, mm4 -+ punpcklbw mm2, mm0 - -- movq [edi], mm2 -- punpckhbw mm4, mm0 -+ movq [edi], mm2 -+ punpckhbw mm4, mm0 - -- movq [edi+8], mm4 -- add esi, 8 -+ movq [edi+8], mm4 -+ add esi, 8 - -- add edi, 16 -- sub ecx, 8 -+ add edi, 16 -+ sub ecx, 8 - -- cmp ecx, 8 -- jg hs_1_2_loop -+ cmp ecx, 8 -+ jg hs_1_2_loop - - // last eight pixel - -- movq mm0, [esi] -- movq mm1, mm0 -+ movq mm0, [esi] -+ movq mm1, mm0 - -- movq mm2, mm0 -- movq mm3, mm1 -+ movq mm2, mm0 -+ movq mm3, mm1 - -- psrlq mm1, 8 -- psrlq mm3, 56 -+ psrlq mm1, 8 -+ psrlq mm3, 56 - -- psllq mm3, 56 -- por mm1, mm3 -+ psllq mm3, 56 -+ por mm1, mm3 - -- movq mm3, mm1 -- movq mm4, mm0 -+ movq mm3, mm1 -+ movq mm4, mm0 - -- punpcklbw mm0, mm7 -- punpcklbw mm1, mm7 -+ punpcklbw mm0, mm7 -+ punpcklbw mm1, mm7 - -- paddw mm0, mm1 -- paddw mm0, mm6 -+ paddw mm0, mm1 -+ paddw mm0, mm6 - -- punpckhbw mm2, mm7 -- punpckhbw mm3, mm7 -+ punpckhbw mm2, mm7 -+ punpckhbw mm3, mm7 - -- paddw mm2, mm3 -- paddw mm2, mm6 -+ paddw mm2, mm3 -+ paddw mm2, mm6 - -- psraw mm0, 1 -- psraw mm2, 1 -+ psraw mm0, 1 -+ psraw mm2, 1 - -- packuswb mm0, mm2 -- movq mm2, mm4 -+ packuswb mm0, mm2 -+ movq mm2, mm4 - -- punpcklbw mm2, mm0 -- movq [edi], mm2 -+ punpcklbw mm2, mm0 -+ movq [edi], mm2 - -- punpckhbw mm4, mm0 -- movq [edi+8], mm4 -- } -+ punpckhbw mm4, mm0 -+ movq [edi+8], mm4 -+ } - } - - -@@ -1240,86 +1222,84 @@ __declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128, - static - void horizontal_line_5_4_scale_mmx - ( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- /* -- unsigned i; -- unsigned int a, b, c, d, e; -- unsigned char *des = dest; -- const unsigned char *src = source; -+ const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width -+) { -+ /* -+ unsigned i; -+ unsigned int a, b, c, d, e; -+ unsigned char *des = dest; -+ const unsigned char *src = source; - -- (void) dest_width; -+ (void) dest_width; - -- for ( i=0; i>8); -- des[2] = ((c*128 + d*128 + 128)>>8); -- des[3] = ((d* 64 + e*192 + 128)>>8); -+ des[0] = a; -+ des[1] = ((b*192 + c* 64 + 128)>>8); -+ des[2] = ((c*128 + d*128 + 128)>>8); -+ des[3] = ((d* 64 + e*192 + 128)>>8); - -- src += 5; -- des += 4; -- } -- */ -- (void) dest_width; -+ src += 5; -+ des += 4; -+ } -+ */ -+ (void) dest_width; - -- __asm -- { -+ __asm { - -- mov esi, source ; -- mov edi, dest ; -+ mov esi, source; -+ mov edi, dest; - -- mov ecx, source_width ; -- movq mm5, const54_1 ; -+ mov ecx, source_width; -+ movq mm5, const54_1; - -- pxor mm7, mm7 ; -- movq mm6, const54_2 ; -+ pxor mm7, mm7; -+ movq mm6, const54_2; - -- movq mm4, round_values ; -- lea edx, [esi+ecx] ; -- horizontal_line_5_4_loop: -+ movq mm4, round_values; -+ lea edx, [esi+ecx]; -+ horizontal_line_5_4_loop: - -- movq mm0, QWORD PTR [esi] ; -- 00 01 02 03 04 05 06 07 -- movq mm1, mm0 ; -- 00 01 02 03 04 05 06 07 -+ movq mm0, QWORD PTR [esi]; -+ 00 01 02 03 04 05 06 07 -+ movq mm1, mm0; -+ 00 01 02 03 04 05 06 07 - -- psrlq mm0, 8 ; -- 01 02 03 04 05 06 07 xx -- punpcklbw mm1, mm7 ; -- xx 00 xx 01 xx 02 xx 03 -+ psrlq mm0, 8; -+ 01 02 03 04 05 06 07 xx -+ punpcklbw mm1, mm7; -+ xx 00 xx 01 xx 02 xx 03 - -- punpcklbw mm0, mm7 ; -- xx 01 xx 02 xx 03 xx 04 -- pmullw mm1, mm5 -+ punpcklbw mm0, mm7; -+ xx 01 xx 02 xx 03 xx 04 -+ pmullw mm1, mm5 - -- pmullw mm0, mm6 -- add esi, 5 -+ pmullw mm0, mm6 -+ add esi, 5 - -- add edi, 4 -- paddw mm1, mm0 -+ add edi, 4 -+ paddw mm1, mm0 - -- paddw mm1, mm4 -- psrlw mm1, 8 -+ paddw mm1, mm4 -+ psrlw mm1, 8 - -- cmp esi, edx -- packuswb mm1, mm7 -+ cmp esi, edx -+ packuswb mm1, mm7 - -- movd DWORD PTR [edi-4], mm1 -+ movd DWORD PTR [edi-4], mm1 - -- jl horizontal_line_5_4_loop -+ jl horizontal_line_5_4_loop - -- } -+ } - - } - __declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64, 64, 64 }; -@@ -1327,86 +1307,84 @@ __declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128, - __declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 }; - - static --void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -+void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - -- __asm -- { -- push ebx -+ __asm { -+ push ebx - -- mov esi, source // Get the source and destination pointer -- mov ecx, src_pitch // Get the pitch size -+ mov esi, source // Get the source and destination pointer -+ mov ecx, src_pitch // Get the pitch size - -- mov edi, dest // tow lines below -- pxor mm7, mm7 // clear out mm7 -+ mov edi, dest // tow lines below -+ pxor mm7, mm7 // clear out mm7 - -- mov edx, dest_pitch // Loop counter -- mov ebx, dest_width -+ mov edx, dest_pitch // Loop counter -+ mov ebx, dest_width - -- vs_5_4_loop: -+ vs_5_4_loop: - -- movd mm0, DWORD ptr [esi] // src[0]; -- movd mm1, DWORD ptr [esi+ecx] // src[1]; -+ movd mm0, DWORD ptr [esi] // src[0]; -+ movd mm1, DWORD ptr [esi+ecx] // src[1]; - -- movd mm2, DWORD ptr [esi+ecx*2] -- lea eax, [esi+ecx*2] // -+ movd mm2, DWORD ptr [esi+ecx*2] -+ lea eax, [esi+ecx*2] // - -- punpcklbw mm1, mm7 -- punpcklbw mm2, mm7 -+ punpcklbw mm1, mm7 -+ punpcklbw mm2, mm7 - -- movq mm3, mm2 -- pmullw mm1, three_fourths -+ movq mm3, mm2 -+ pmullw mm1, three_fourths - -- pmullw mm2, one_fourths -- movd mm4, [eax+ecx] -+ pmullw mm2, one_fourths -+ movd mm4, [eax+ecx] - -- pmullw mm3, two_fourths -- punpcklbw mm4, mm7 -+ pmullw mm3, two_fourths -+ punpcklbw mm4, mm7 - -- movq mm5, mm4 -- pmullw mm4, two_fourths -+ movq mm5, mm4 -+ pmullw mm4, two_fourths - -- paddw mm1, mm2 -- movd mm6, [eax+ecx*2] -+ paddw mm1, mm2 -+ movd mm6, [eax+ecx*2] - -- pmullw mm5, one_fourths -- paddw mm1, round_values; -+ pmullw mm5, one_fourths -+ paddw mm1, round_values; - -- paddw mm3, mm4 -- psrlw mm1, 8 -+ paddw mm3, mm4 -+ psrlw mm1, 8 - -- punpcklbw mm6, mm7 -- paddw mm3, round_values -+ punpcklbw mm6, mm7 -+ paddw mm3, round_values - -- pmullw mm6, three_fourths -- psrlw mm3, 8 -+ pmullw mm6, three_fourths -+ psrlw mm3, 8 - -- packuswb mm1, mm7 -- packuswb mm3, mm7 -+ packuswb mm1, mm7 -+ packuswb mm3, mm7 - -- movd DWORD PTR [edi], mm0 -- movd DWORD PTR [edi+edx], mm1 -+ movd DWORD PTR [edi], mm0 -+ movd DWORD PTR [edi+edx], mm1 - - -- paddw mm5, mm6 -- movd DWORD PTR [edi+edx*2], mm3 -+ paddw mm5, mm6 -+ movd DWORD PTR [edi+edx*2], mm3 - -- lea eax, [edi+edx*2] -- paddw mm5, round_values -+ lea eax, [edi+edx*2] -+ paddw mm5, round_values - -- psrlw mm5, 8 -- add edi, 4 -+ psrlw mm5, 8 -+ add edi, 4 - -- packuswb mm5, mm7 -- movd DWORD PTR [eax+edx], mm5 -+ packuswb mm5, mm7 -+ movd DWORD PTR [eax+edx], mm5 - -- add esi, 4 -- sub ebx, 4 -+ add esi, 4 -+ sub ebx, 4 - -- jg vs_5_4_loop -+ jg vs_5_4_loop - -- pop ebx -- } -+ pop ebx -+ } - } - - -@@ -1417,96 +1395,94 @@ __declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85, - static - void horizontal_line_5_3_scale_mmx - ( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -+ const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width -+) { - -- (void) dest_width; -- __asm -- { -+ (void) dest_width; -+ __asm { - -- mov esi, source ; -- mov edi, dest ; -+ mov esi, source; -+ mov edi, dest; - -- mov ecx, source_width ; -- movq mm5, const53_1 ; -+ mov ecx, source_width; -+ movq mm5, const53_1; - -- pxor mm7, mm7 ; -- movq mm6, const53_2 ; -+ pxor mm7, mm7; -+ movq mm6, const53_2; - -- movq mm4, round_values ; -- lea edx, [esi+ecx-5] ; -- horizontal_line_5_3_loop: -+ movq mm4, round_values; -+ lea edx, [esi+ecx-5]; -+ horizontal_line_5_3_loop: - -- movq mm0, QWORD PTR [esi] ; -- 00 01 02 03 04 05 06 07 -- movq mm1, mm0 ; -- 00 01 02 03 04 05 06 07 -+ movq mm0, QWORD PTR [esi]; -+ 00 01 02 03 04 05 06 07 -+ movq mm1, mm0; -+ 00 01 02 03 04 05 06 07 - -- psllw mm0, 8 ; -- xx 00 xx 02 xx 04 xx 06 -- psrlw mm1, 8 ; -- 01 xx 03 xx 05 xx 07 xx -+ psllw mm0, 8; -+ xx 00 xx 02 xx 04 xx 06 -+ psrlw mm1, 8; -+ 01 xx 03 xx 05 xx 07 xx - -- psrlw mm0, 8 ; -- 00 xx 02 xx 04 xx 06 xx -- psllq mm1, 16 ; -- xx xx 01 xx 03 xx 05 xx -+ psrlw mm0, 8; -+ 00 xx 02 xx 04 xx 06 xx -+ psllq mm1, 16; -+ xx xx 01 xx 03 xx 05 xx - -- pmullw mm0, mm6 -+ pmullw mm0, mm6 - -- pmullw mm1, mm5 -- add esi, 5 -+ pmullw mm1, mm5 -+ add esi, 5 - -- add edi, 3 -- paddw mm1, mm0 -+ add edi, 3 -+ paddw mm1, mm0 - -- paddw mm1, mm4 -- psrlw mm1, 8 -+ paddw mm1, mm4 -+ psrlw mm1, 8 - -- cmp esi, edx -- packuswb mm1, mm7 -+ cmp esi, edx -+ packuswb mm1, mm7 - -- movd DWORD PTR [edi-3], mm1 -- jl horizontal_line_5_3_loop -+ movd DWORD PTR [edi-3], mm1 -+ jl horizontal_line_5_3_loop - --//exit condition -- movq mm0, QWORD PTR [esi] ; -- 00 01 02 03 04 05 06 07 -- movq mm1, mm0 ; -- 00 01 02 03 04 05 06 07 -+// exit condition -+ movq mm0, QWORD PTR [esi]; -+ 00 01 02 03 04 05 06 07 -+ movq mm1, mm0; -+ 00 01 02 03 04 05 06 07 - -- psllw mm0, 8 ; -- xx 00 xx 02 xx 04 xx 06 -- psrlw mm1, 8 ; -- 01 xx 03 xx 05 xx 07 xx -+ psllw mm0, 8; -+ xx 00 xx 02 xx 04 xx 06 -+ psrlw mm1, 8; -+ 01 xx 03 xx 05 xx 07 xx - -- psrlw mm0, 8 ; -- 00 xx 02 xx 04 xx 06 xx -- psllq mm1, 16 ; -- xx xx 01 xx 03 xx 05 xx -+ psrlw mm0, 8; -+ 00 xx 02 xx 04 xx 06 xx -+ psllq mm1, 16; -+ xx xx 01 xx 03 xx 05 xx - -- pmullw mm0, mm6 -+ pmullw mm0, mm6 - -- pmullw mm1, mm5 -- paddw mm1, mm0 -+ pmullw mm1, mm5 -+ paddw mm1, mm0 - -- paddw mm1, mm4 -- psrlw mm1, 8 -+ paddw mm1, mm4 -+ psrlw mm1, 8 - -- packuswb mm1, mm7 -- movd eax, mm1 -+ packuswb mm1, mm7 -+ movd eax, mm1 - -- mov edx, eax -- shr edx, 16 -+ mov edx, eax -+ shr edx, 16 - -- mov WORD PTR[edi], ax -- mov BYTE PTR[edi+2], dl -+ mov WORD PTR[edi], ax -+ mov BYTE PTR[edi+2], dl - -- } -+ } - - } - -@@ -1514,75 +1490,73 @@ __declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85 - __declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 }; - - static --void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -+void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - -- __asm -- { -- push ebx -+ __asm { -+ push ebx - -- mov esi, source // Get the source and destination pointer -- mov ecx, src_pitch // Get the pitch size -+ mov esi, source // Get the source and destination pointer -+ mov ecx, src_pitch // Get the pitch size - -- mov edi, dest // tow lines below -- pxor mm7, mm7 // clear out mm7 -+ mov edi, dest // tow lines below -+ pxor mm7, mm7 // clear out mm7 - -- mov edx, dest_pitch // Loop counter -- movq mm5, one_thirds -+ mov edx, dest_pitch // Loop counter -+ movq mm5, one_thirds - -- movq mm6, two_thirds -- mov ebx, dest_width; -+ movq mm6, two_thirds -+ mov ebx, dest_width; - -- vs_5_3_loop: -+ vs_5_3_loop: - -- movd mm0, DWORD ptr [esi] // src[0]; -- movd mm1, DWORD ptr [esi+ecx] // src[1]; -+ movd mm0, DWORD ptr [esi] // src[0]; -+ movd mm1, DWORD ptr [esi+ecx] // src[1]; - -- movd mm2, DWORD ptr [esi+ecx*2] -- lea eax, [esi+ecx*2] // -+ movd mm2, DWORD ptr [esi+ecx*2] -+ lea eax, [esi+ecx*2] // - -- punpcklbw mm1, mm7 -- punpcklbw mm2, mm7 -+ punpcklbw mm1, mm7 -+ punpcklbw mm2, mm7 - -- pmullw mm1, mm5 -- pmullw mm2, mm6 -+ pmullw mm1, mm5 -+ pmullw mm2, mm6 - -- movd mm3, DWORD ptr [eax+ecx] -- movd mm4, DWORD ptr [eax+ecx*2] -+ movd mm3, DWORD ptr [eax+ecx] -+ movd mm4, DWORD ptr [eax+ecx*2] - -- punpcklbw mm3, mm7 -- punpcklbw mm4, mm7 -+ punpcklbw mm3, mm7 -+ punpcklbw mm4, mm7 - -- pmullw mm3, mm6 -- pmullw mm4, mm5 -+ pmullw mm3, mm6 -+ pmullw mm4, mm5 - - -- movd DWORD PTR [edi], mm0 -- paddw mm1, mm2 -+ movd DWORD PTR [edi], mm0 -+ paddw mm1, mm2 - -- paddw mm1, round_values -- psrlw mm1, 8 -+ paddw mm1, round_values -+ psrlw mm1, 8 - -- packuswb mm1, mm7 -- paddw mm3, mm4 -+ packuswb mm1, mm7 -+ paddw mm3, mm4 - -- paddw mm3, round_values -- movd DWORD PTR [edi+edx], mm1 -+ paddw mm3, round_values -+ movd DWORD PTR [edi+edx], mm1 - -- psrlw mm3, 8 -- packuswb mm3, mm7 -+ psrlw mm3, 8 -+ packuswb mm3, mm7 - -- movd DWORD PTR [edi+edx*2], mm3 -+ movd DWORD PTR [edi+edx*2], mm3 - - -- add edi, 4 -- add esi, 4 -+ add edi, 4 -+ add esi, 4 - -- sub ebx, 4 -- jg vs_5_3_loop -+ sub ebx, 4 -+ jg vs_5_3_loop - -- pop ebx -- } -+ pop ebx -+ } - } - - -@@ -1609,48 +1583,45 @@ void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, - static - void horizontal_line_2_1_scale_mmx - ( -- const unsigned char *source, -- unsigned int source_width, -- unsigned char *dest, -- unsigned int dest_width --) --{ -- (void) dest_width; -- (void) source_width; -- __asm -- { -- mov esi, source -- mov edi, dest -- -- pxor mm7, mm7 -- mov ecx, dest_width -- -- xor edx, edx -- hs_2_1_loop: -- -- movq mm0, [esi+edx*2] -- psllw mm0, 8 -- -- psrlw mm0, 8 -- packuswb mm0, mm7 -- -- movd DWORD Ptr [edi+edx], mm0; -- add edx, 4 -- -- cmp edx, ecx -- jl hs_2_1_loop -- -- } -+ const unsigned char *source, -+ unsigned int source_width, -+ unsigned char *dest, -+ unsigned int dest_width -+) { -+ (void) dest_width; -+ (void) source_width; -+ __asm { -+ mov esi, source -+ mov edi, dest -+ -+ pxor mm7, mm7 -+ mov ecx, dest_width -+ -+ xor edx, edx -+ hs_2_1_loop: -+ -+ movq mm0, [esi+edx*2] -+ psllw mm0, 8 -+ -+ psrlw mm0, 8 -+ packuswb mm0, mm7 -+ -+ movd DWORD Ptr [edi+edx], mm0; -+ add edx, 4 -+ -+ cmp edx, ecx -+ jl hs_2_1_loop -+ -+ } - } - - - - static --void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -- (void) dest_pitch; -- (void) src_pitch; -- vpx_memcpy(dest, source, dest_width); -+void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { -+ (void) dest_pitch; -+ (void) src_pitch; -+ vpx_memcpy(dest, source, dest_width); - } - - -@@ -1658,91 +1629,88 @@ __declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 4 - __declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 160, 160, 160 }; - - static --void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) --{ -+void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - -- (void) dest_pitch; -- __asm -- { -- mov esi, source -- mov edi, dest -+ (void) dest_pitch; -+ __asm { -+ mov esi, source -+ mov edi, dest - -- mov eax, src_pitch -- mov edx, dest_width -+ mov eax, src_pitch -+ mov edx, dest_width - -- pxor mm7, mm7 -- sub esi, eax //back one line -+ pxor mm7, mm7 -+ sub esi, eax // back one line - - -- lea ecx, [esi+edx]; -- movq mm6, round_values; -+ lea ecx, [esi+edx]; -+ movq mm6, round_values; - -- movq mm5, three_sixteenths; -- movq mm4, ten_sixteenths; -+ movq mm5, three_sixteenths; -+ movq mm4, ten_sixteenths; - -- vs_2_1_i_loop: -- movd mm0, [esi] // -- movd mm1, [esi+eax] // -+ vs_2_1_i_loop: -+ movd mm0, [esi] // -+ movd mm1, [esi+eax] // - -- movd mm2, [esi+eax*2] // -- punpcklbw mm0, mm7 -+ movd mm2, [esi+eax*2] // -+ punpcklbw mm0, mm7 - -- pmullw mm0, mm5 -- punpcklbw mm1, mm7 -+ pmullw mm0, mm5 -+ punpcklbw mm1, mm7 - -- pmullw mm1, mm4 -- punpcklbw mm2, mm7 -+ pmullw mm1, mm4 -+ punpcklbw mm2, mm7 - -- pmullw mm2, mm5 -- paddw mm0, round_values -+ pmullw mm2, mm5 -+ paddw mm0, round_values - -- paddw mm1, mm2 -- paddw mm0, mm1 -+ paddw mm1, mm2 -+ paddw mm0, mm1 - -- psrlw mm0, 8 -- packuswb mm0, mm7 -+ psrlw mm0, 8 -+ packuswb mm0, mm7 - -- movd DWORD PTR [edi], mm0 -- add esi, 4 -+ movd DWORD PTR [edi], mm0 -+ add esi, 4 - -- add edi, 4; -- cmp esi, ecx -- jl vs_2_1_i_loop -+ add edi, 4; -+ cmp esi, ecx -+ jl vs_2_1_i_loop - -- } -+ } - } - - - - void --register_mmxscalers(void) --{ -- vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; -- vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; -- vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; -- vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; -- vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; -- vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; -- vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; -- vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; -- vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; -- -- vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; -- vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; -- vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; -- vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; -- vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; -- vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; -- -- -- -- vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; -- vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; -- vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; -- vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; -- vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; -- vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; -- vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; -+register_mmxscalers(void) { -+ vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; -+ vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; -+ vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; -+ vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; -+ vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; -+ vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; -+ vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; -+ vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; -+ vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; -+ -+ vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; -+ vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; -+ vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; -+ vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; -+ vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; -+ vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; -+ -+ -+ -+ vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; -+ vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; -+ vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; -+ vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; -+ vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; -+ vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; -+ vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; - - - -diff --git a/vpx_scale/win32/scalesystemdependent.c b/vpx_scale/win32/scalesystemdependent.c -index 19e61c3..98913d1 100644 ---- a/vpx_scale/win32/scalesystemdependent.c -+++ b/vpx_scale/win32/scalesystemdependent.c -@@ -46,46 +46,42 @@ extern void register_mmxscalers(void); - * - ****************************************************************************/ - void --vp8_scale_machine_specific_config(void) --{ -- // If MMX supported then set to use MMX versions of functions else -- // use original 'C' versions. -- int mmx_enabled; -- int xmm_enabled; -- int wmt_enabled; -+vp8_scale_machine_specific_config(void) { -+ // If MMX supported then set to use MMX versions of functions else -+ // use original 'C' versions. -+ int mmx_enabled; -+ int xmm_enabled; -+ int wmt_enabled; - -- vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); -+ vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); - -- if (mmx_enabled || xmm_enabled || wmt_enabled) -- { -- register_mmxscalers(); -- } -- else -- { -- vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c; -- vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c; -- vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; -- vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c; -- vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c; -- vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; -- vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; -- vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; -- vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; -- vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; -- vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; -- vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; -- vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c; -- vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c; -- vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; -+ if (mmx_enabled || xmm_enabled || wmt_enabled) { -+ register_mmxscalers(); -+ } else { -+ vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c; -+ vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c; -+ vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; -+ vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c; -+ vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c; -+ vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; -+ vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; -+ vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; -+ vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; -+ vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; -+ vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; -+ vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; -+ vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c; -+ vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c; -+ vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; - - -- vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; -- vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; -- vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; -- vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; -- vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; -- vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; -- vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; -+ vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; -+ vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; -+ vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; -+ vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; -+ vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; -+ vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; -+ vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; - -- } -+ } - } -diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h -index 800f700..6a8a1fc 100644 ---- a/vpx_scale/yv12config.h -+++ b/vpx_scale/yv12config.h -@@ -16,54 +16,54 @@ extern "C" - { - #endif - --#define VP7BORDERINPIXELS 48 - #define VP8BORDERINPIXELS 32 -+#define VP9BORDERINPIXELS 64 -+#define VP9_INTERP_EXTEND 4 - -- /************************************* -- For INT_YUV: -+ /************************************* -+ For INT_YUV: - -- Y = (R+G*2+B)/4; -- U = (R-B)/2; -- V = (G*2 - R - B)/4; -- And -- R = Y+U-V; -- G = Y+V; -- B = Y-U-V; -- ************************************/ -- typedef enum -- { -- REG_YUV = 0, /* Regular yuv */ -- INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */ -- } -- YUV_TYPE; -+ Y = (R+G*2+B)/4; -+ U = (R-B)/2; -+ V = (G*2 - R - B)/4; -+ And -+ R = Y+U-V; -+ G = Y+V; -+ B = Y-U-V; -+ ************************************/ -+ typedef enum -+ { -+ REG_YUV = 0, /* Regular yuv */ -+ INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */ -+ } -+ YUV_TYPE; - -- typedef struct yv12_buffer_config -- { -- int y_width; -- int y_height; -- int y_stride; --/* int yinternal_width; */ -+ typedef struct yv12_buffer_config { -+ int y_width; -+ int y_height; -+ int y_stride; -+ /* int yinternal_width; */ - -- int uv_width; -- int uv_height; -- int uv_stride; --/* int uvinternal_width; */ -+ int uv_width; -+ int uv_height; -+ int uv_stride; -+ /* int uvinternal_width; */ - -- unsigned char *y_buffer; -- unsigned char *u_buffer; -- unsigned char *v_buffer; -+ unsigned char *y_buffer; -+ unsigned char *u_buffer; -+ unsigned char *v_buffer; - -- unsigned char *buffer_alloc; -- int border; -- int frame_size; -- YUV_TYPE clrtype; -+ unsigned char *buffer_alloc; -+ int border; -+ int frame_size; -+ YUV_TYPE clrtype; - -- int corrupted; -- int flags; -- } YV12_BUFFER_CONFIG; -+ int corrupted; -+ int flags; -+ } YV12_BUFFER_CONFIG; - -- int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border); -- int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); -+ int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border); -+ int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); - - #ifdef __cplusplus - } -diff --git a/vpxdec.c b/vpxdec.c -index 4482f3d..9b728bf 100644 ---- a/vpxdec.c -+++ b/vpxdec.c -@@ -52,7 +52,7 @@ static const char *exec_name; - static const struct - { - char const *name; -- const vpx_codec_iface_t *iface; -+ vpx_codec_iface_t *iface; - unsigned int fourcc; - unsigned int fourcc_mask; - } ifaces[] = -@@ -152,7 +152,8 @@ static void usage_exit() - "write to. If the\n argument does not include any escape " - "characters, the output will be\n written to a single file. " - "Otherwise, the filename will be calculated by\n expanding " -- "the following escape characters:\n" -+ "the following escape characters:\n"); -+ fprintf(stderr, - "\n\t%%w - Frame width" - "\n\t%%h - Frame height" - "\n\t%% - Frame number, zero padded to places (1..9)" -@@ -356,7 +357,7 @@ void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5) - } - else - { -- if(fwrite(buf, 1, len, out)); -+ (void) fwrite(buf, 1, len, out); - } - } - -@@ -502,7 +503,7 @@ nestegg_seek_cb(int64_t offset, int whence, void * userdata) - case NESTEGG_SEEK_CUR: whence = SEEK_CUR; break; - case NESTEGG_SEEK_END: whence = SEEK_END; break; - }; -- return fseek(userdata, offset, whence)? -1 : 0; -+ return fseek(userdata, (long)offset, whence)? -1 : 0; - } - - -@@ -559,7 +560,7 @@ webm_guess_framerate(struct input_ctx *input, - goto fail; - - *fps_num = (i - 1) * 1000000; -- *fps_den = tstamp / 1000; -+ *fps_den = (unsigned int)(tstamp / 1000); - return 0; - fail: - nestegg_destroy(input->nestegg_ctx); -@@ -580,10 +581,10 @@ file_is_webm(struct input_ctx *input, - unsigned int i, n; - int track_type = -1; - -- nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, -- input->infile}; -+ nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, 0}; - nestegg_video_params params; - -+ io.userdata = input->infile; - if(nestegg_init(&input->nestegg_ctx, io, NULL)) - goto fail; - -@@ -647,7 +648,7 @@ void generate_filename(const char *pattern, char *out, size_t q_len, - { - size_t pat_len; - -- // parse the pattern -+ /* parse the pattern */ - q[q_len - 1] = '\0'; - switch(p[1]) - { -@@ -677,7 +678,7 @@ void generate_filename(const char *pattern, char *out, size_t q_len, - { - size_t copy_len; - -- // copy the next segment -+ /* copy the next segment */ - if(!next_pat) - copy_len = strlen(p); - else -@@ -922,7 +923,7 @@ int main(int argc, const char **argv_) - p = strchr(p, '%'); - if(p && p[1] >= '1' && p[1] <= '9') - { -- // pattern contains sequence number, so it's not unique. -+ /* pattern contains sequence number, so it's not unique. */ - single_file = 0; - break; - } -@@ -962,7 +963,8 @@ int main(int argc, const char **argv_) - That will have to wait until these tools support WebM natively.*/ - sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n", - "420jpeg", width, height, fps_num, fps_den, 'p'); -- out_put(out, (unsigned char *)buffer, strlen(buffer), do_md5); -+ out_put(out, (unsigned char *)buffer, -+ (unsigned int)strlen(buffer), do_md5); - } - - /* Try to determine the codec from the fourcc. */ -@@ -1040,7 +1042,7 @@ int main(int argc, const char **argv_) - - vpx_usec_timer_start(&timer); - -- if (vpx_codec_decode(&decoder, buf, buf_sz, NULL, 0)) -+ if (vpx_codec_decode(&decoder, buf, (unsigned int)buf_sz, NULL, 0)) - { - const char *detail = vpx_codec_error_detail(&decoder); - fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder)); -@@ -1052,7 +1054,7 @@ int main(int argc, const char **argv_) - } - - vpx_usec_timer_mark(&timer); -- dx_time += vpx_usec_timer_elapsed(&timer); -+ dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer); - - ++frame_in; - -@@ -1064,9 +1066,14 @@ int main(int argc, const char **argv_) - } - frames_corrupted += corrupted; - -+ vpx_usec_timer_start(&timer); -+ - if ((img = vpx_codec_get_frame(&decoder, &iter))) - ++frame_out; - -+ vpx_usec_timer_mark(&timer); -+ dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer); -+ - if (progress) - show_progress(frame_in, frame_out, dx_time); - -diff --git a/vpxenc.c b/vpxenc.c -index d32b21b..c9547ea 100644 ---- a/vpxenc.c -+++ b/vpxenc.c -@@ -54,11 +54,7 @@ typedef __int64 off_t; - #define off_t off64_t - #endif - --#if defined(_MSC_VER) --#define LITERALU64(n) n --#else --#define LITERALU64(n) n##LLU --#endif -+#define LITERALU64(hi,lo) ((((uint64_t)hi)<<32)|lo) - - /* We should use 32-bit file operations in WebM file format - * when building ARM executable file (.axf) with RVCT */ -@@ -68,12 +64,28 @@ typedef long off_t; - #define ftello ftell - #endif - -+/* Swallow warnings about unused results of fread/fwrite */ -+static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, -+ FILE *stream) -+{ -+ return fread(ptr, size, nmemb, stream); -+} -+#define fread wrap_fread -+ -+static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb, -+ FILE *stream) -+{ -+ return fwrite(ptr, size, nmemb, stream); -+} -+#define fwrite wrap_fwrite -+ -+ - static const char *exec_name; - - static const struct codec_item - { - char const *name; -- const vpx_codec_iface_t *iface; -+ vpx_codec_iface_t *iface; - unsigned int fourcc; - } codecs[] = - { -@@ -245,7 +257,7 @@ void stats_write(stats_io_t *stats, const void *pkt, size_t len) - { - if (stats->file) - { -- if(fwrite(pkt, 1, len, stats->file)); -+ (void) fwrite(pkt, 1, len, stats->file); - } - else - { -@@ -338,7 +350,7 @@ static int read_frame(struct input_state *input, vpx_image_t *img) - * write_ivf_frame_header() for documentation on the frame header - * layout. - */ -- if(fread(junk, 1, IVF_FRAME_HDR_SZ, f)); -+ (void) fread(junk, 1, IVF_FRAME_HDR_SZ, f); - } - - for (plane = 0; plane < 3; plane++) -@@ -468,7 +480,7 @@ static void write_ivf_file_header(FILE *outfile, - mem_put_le32(header + 24, frame_cnt); /* length */ - mem_put_le32(header + 28, 0); /* unused */ - -- if(fwrite(header, 1, 32, outfile)); -+ (void) fwrite(header, 1, 32, outfile); - } - - -@@ -482,18 +494,18 @@ static void write_ivf_frame_header(FILE *outfile, - return; - - pts = pkt->data.frame.pts; -- mem_put_le32(header, pkt->data.frame.sz); -+ mem_put_le32(header, (int)pkt->data.frame.sz); - mem_put_le32(header + 4, pts & 0xFFFFFFFF); - mem_put_le32(header + 8, pts >> 32); - -- if(fwrite(header, 1, 12, outfile)); -+ (void) fwrite(header, 1, 12, outfile); - } - - static void write_ivf_frame_size(FILE *outfile, size_t size) - { - char header[4]; -- mem_put_le32(header, size); -- fwrite(header, 1, 4, outfile); -+ mem_put_le32(header, (int)size); -+ (void) fwrite(header, 1, 4, outfile); - } - - -@@ -541,13 +553,13 @@ struct EbmlGlobal - - void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len) - { -- if(fwrite(buffer_in, 1, len, glob->stream)); -+ (void) fwrite(buffer_in, 1, len, glob->stream); - } - - #define WRITE_BUFFER(s) \ - for(i = len-1; i>=0; i--)\ - { \ -- x = *(const s *)buffer_in >> (i * CHAR_BIT); \ -+ x = (char)(*(const s *)buffer_in >> (i * CHAR_BIT)); \ - Ebml_Write(glob, &x, 1); \ - } - void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, int buffer_size, unsigned long len) -@@ -597,9 +609,9 @@ static void - Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, - unsigned long class_id) - { -- //todo this is always taking 8 bytes, this may need later optimization -- //this is a key that says length unknown -- uint64_t unknownLen = LITERALU64(0x01FFFFFFFFFFFFFF); -+ /* todo this is always taking 8 bytes, this may need later optimization */ -+ /* this is a key that says length unknown */ -+ uint64_t unknownLen = LITERALU64(0x01FFFFFF, 0xFFFFFFFF); - - Ebml_WriteID(glob, class_id); - *ebmlLoc = ftello(glob->stream); -@@ -617,7 +629,7 @@ Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc) - - /* Calculate the size of this element */ - size = pos - *ebmlLoc - 8; -- size |= LITERALU64(0x0100000000000000); -+ size |= LITERALU64(0x01000000,0x00000000); - - /* Seek back to the beginning of the element and write the new size */ - fseeko(glob->stream, *ebmlLoc, SEEK_SET); -@@ -664,7 +676,7 @@ write_webm_seek_info(EbmlGlobal *ebml) - Ebml_EndSubElement(ebml, &start); - } - { -- //segment info -+ /* segment info */ - EbmlLoc startInfo; - uint64_t frame_time; - char version_string[64]; -@@ -686,7 +698,7 @@ write_webm_seek_info(EbmlGlobal *ebml) - Ebml_StartSubElement(ebml, &startInfo, Info); - Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000); - Ebml_SerializeFloat(ebml, Segment_Duration, -- ebml->last_pts_ms + frame_time); -+ (double)(ebml->last_pts_ms + frame_time)); - Ebml_SerializeString(ebml, 0x4D80, version_string); - Ebml_SerializeString(ebml, 0x5741, version_string); - Ebml_EndSubElement(ebml, &startInfo); -@@ -704,16 +716,16 @@ write_webm_file_header(EbmlGlobal *glob, - EbmlLoc start; - Ebml_StartSubElement(glob, &start, EBML); - Ebml_SerializeUnsigned(glob, EBMLVersion, 1); -- Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); //EBML Read Version -- Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); //EBML Max ID Length -- Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); //EBML Max Size Length -- Ebml_SerializeString(glob, DocType, "webm"); //Doc Type -- Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); //Doc Type Version -- Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); //Doc Type Read Version -+ Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); -+ Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); -+ Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); -+ Ebml_SerializeString(glob, DocType, "webm"); -+ Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); -+ Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); - Ebml_EndSubElement(glob, &start); - } - { -- Ebml_StartSubElement(glob, &glob->startSegment, Segment); //segment -+ Ebml_StartSubElement(glob, &glob->startSegment, Segment); - glob->position_reference = ftello(glob->stream); - glob->framerate = *fps; - write_webm_seek_info(glob); -@@ -731,7 +743,7 @@ write_webm_file_header(EbmlGlobal *glob, - Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber); - glob->track_id_pos = ftello(glob->stream); - Ebml_SerializeUnsigned32(glob, TrackUID, trackID); -- Ebml_SerializeUnsigned(glob, TrackType, 1); //video is always 1 -+ Ebml_SerializeUnsigned(glob, TrackType, 1); - Ebml_SerializeString(glob, CodecID, "V_VP8"); - { - unsigned int pixelWidth = cfg->g_w; -@@ -744,13 +756,13 @@ write_webm_file_header(EbmlGlobal *glob, - Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight); - Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt); - Ebml_SerializeFloat(glob, FrameRate, frameRate); -- Ebml_EndSubElement(glob, &videoStart); //Video -+ Ebml_EndSubElement(glob, &videoStart); - } -- Ebml_EndSubElement(glob, &start); //Track Entry -+ Ebml_EndSubElement(glob, &start); /* Track Entry */ - } - Ebml_EndSubElement(glob, &trackStart); - } -- // segment element is open -+ /* segment element is open */ - } - } - -@@ -778,7 +790,7 @@ write_webm_block(EbmlGlobal *glob, - if(pts_ms - glob->cluster_timecode > SHRT_MAX) - start_cluster = 1; - else -- block_timecode = pts_ms - glob->cluster_timecode; -+ block_timecode = (unsigned short)pts_ms - glob->cluster_timecode; - - is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY); - if(start_cluster || is_keyframe) -@@ -789,9 +801,9 @@ write_webm_block(EbmlGlobal *glob, - /* Open the new cluster */ - block_timecode = 0; - glob->cluster_open = 1; -- glob->cluster_timecode = pts_ms; -+ glob->cluster_timecode = (uint32_t)pts_ms; - glob->cluster_pos = ftello(glob->stream); -- Ebml_StartSubElement(glob, &glob->startCluster, Cluster); //cluster -+ Ebml_StartSubElement(glob, &glob->startCluster, Cluster); /* cluster */ - Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode); - - /* Save a cue point if this is a keyframe. */ -@@ -816,7 +828,7 @@ write_webm_block(EbmlGlobal *glob, - /* Write the Simple Block */ - Ebml_WriteID(glob, SimpleBlock); - -- block_length = pkt->data.frame.sz + 4; -+ block_length = (unsigned long)pkt->data.frame.sz + 4; - block_length |= 0x10000000; - Ebml_Serialize(glob, &block_length, sizeof(block_length), 4); - -@@ -833,7 +845,7 @@ write_webm_block(EbmlGlobal *glob, - flags |= 0x08; - Ebml_Write(glob, &flags, 1); - -- Ebml_Write(glob, pkt->data.frame.buf, pkt->data.frame.sz); -+ Ebml_Write(glob, pkt->data.frame.buf, (unsigned long)pkt->data.frame.sz); - } - - -@@ -865,7 +877,6 @@ write_webm_file_footer(EbmlGlobal *glob, long hash) - Ebml_SerializeUnsigned(glob, CueTrack, 1); - Ebml_SerializeUnsigned64(glob, CueClusterPosition, - cue->loc - glob->position_reference); -- //Ebml_SerializeUnsigned(glob, CueBlockNumber, cue->blockNumber); - Ebml_EndSubElement(glob, &start); - } - Ebml_EndSubElement(glob, &start); -@@ -942,7 +953,7 @@ static double vp8_mse2psnr(double Samples, double Peak, double Mse) - if ((double)Mse > 0.0) - psnr = 10.0 * log10(Peak * Peak * Samples / Mse); - else -- psnr = 60; // Limit to prevent / 0 -+ psnr = 60; /* Limit to prevent / 0 */ - - if (psnr > 60) - psnr = 60; -@@ -978,6 +989,8 @@ static const arg_def_t good_dl = ARG_DEF(NULL, "good", 0, - "Use Good Quality Deadline"); - static const arg_def_t rt_dl = ARG_DEF(NULL, "rt", 0, - "Use Realtime Quality Deadline"); -+static const arg_def_t quietarg = ARG_DEF("q", "quiet", 0, -+ "Do not print encode progress"); - static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0, - "Show encoder parameters"); - static const arg_def_t psnrarg = ARG_DEF(NULL, "psnr", 0, -@@ -997,7 +1010,7 @@ static const arg_def_t *main_args[] = - &debugmode, - &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline, - &best_dl, &good_dl, &rt_dl, -- &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, &rate_hist_n, -+ &quietarg, &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, &rate_hist_n, - NULL - }; - -@@ -1225,7 +1238,7 @@ static int merge_hist_buckets(struct hist_bucket *bucket, - { - int last_bucket = buckets - 1; - -- // merge the small bucket with an adjacent one. -+ /* merge the small bucket with an adjacent one. */ - if(small_bucket == 0) - merge_bucket = 1; - else if(small_bucket == last_bucket) -@@ -1325,7 +1338,7 @@ static void show_histogram(const struct hist_bucket *bucket, - int j; - float pct; - -- pct = 100.0 * (float)bucket[i].count / (float)total; -+ pct = (float)(100.0 * bucket[i].count / total); - len = HIST_BAR_MAX * bucket[i].count / scale; - if(len < 1) - len = 1; -@@ -1393,7 +1406,7 @@ static void init_rate_histogram(struct rate_hist *hist, - */ - hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000; - -- // prevent division by zero -+ /* prevent division by zero */ - if (hist->samples == 0) - hist->samples=1; - -@@ -1427,7 +1440,7 @@ static void update_rate_histogram(struct rate_hist *hist, - - idx = hist->frames++ % hist->samples; - hist->pts[idx] = now; -- hist->sz[idx] = pkt->data.frame.sz; -+ hist->sz[idx] = (int)pkt->data.frame.sz; - - if(now < cfg->rc_buf_initial_sz) - return; -@@ -1449,15 +1462,15 @@ static void update_rate_histogram(struct rate_hist *hist, - return; - - avg_bitrate = sum_sz * 8 * 1000 / (now - then); -- idx = avg_bitrate * (RATE_BINS/2) / (cfg->rc_target_bitrate * 1000); -+ idx = (int)(avg_bitrate * (RATE_BINS/2) / (cfg->rc_target_bitrate * 1000)); - if(idx < 0) - idx = 0; - if(idx > RATE_BINS-1) - idx = RATE_BINS-1; - if(hist->bucket[idx].low > avg_bitrate) -- hist->bucket[idx].low = avg_bitrate; -+ hist->bucket[idx].low = (int)avg_bitrate; - if(hist->bucket[idx].high < avg_bitrate) -- hist->bucket[idx].high = avg_bitrate; -+ hist->bucket[idx].high = (int)avg_bitrate; - hist->bucket[idx].count++; - hist->total++; - } -@@ -1495,6 +1508,7 @@ struct global_config - int usage; - int deadline; - int use_i420; -+ int quiet; - int verbose; - int limit; - int show_psnr; -@@ -1619,6 +1633,8 @@ static void parse_global_config(struct global_config *global, char **argv) - global->use_i420 = 0; - else if (arg_match(&arg, &use_i420, argi)) - global->use_i420 = 1; -+ else if (arg_match(&arg, &quietarg, argi)) -+ global->quiet = 1; - else if (arg_match(&arg, &verbosearg, argi)) - global->verbose = 1; - else if (arg_match(&arg, &limit, argi)) -@@ -2000,7 +2016,7 @@ static void set_default_kf_interval(struct stream_state *stream, - { - double framerate = (double)global->framerate.num/global->framerate.den; - if (framerate > 0.0) -- stream->config.cfg.kf_max_dist = 5.0*framerate; -+ stream->config.cfg.kf_max_dist = (unsigned int)(5.0*framerate); - } - } - -@@ -2180,7 +2196,7 @@ static void encode_frame(struct stream_state *stream, - / cfg->g_timebase.num / global->framerate.num; - vpx_usec_timer_start(&timer); - vpx_codec_encode(&stream->encoder, img, frame_start, -- next_frame_start - frame_start, -+ (unsigned long)(next_frame_start - frame_start), - 0, global->deadline); - vpx_usec_timer_mark(&timer); - stream->cx_time += vpx_usec_timer_elapsed(&timer); -@@ -2224,8 +2240,9 @@ static void get_cx_data(struct stream_state *stream, - { - stream->frames_out++; - } -- fprintf(stderr, " %6luF", -- (unsigned long)pkt->data.frame.sz); -+ if (!global->quiet) -+ fprintf(stderr, " %6luF", -+ (unsigned long)pkt->data.frame.sz); - - update_rate_histogram(&stream->rate_hist, cfg, pkt); - if(stream->config.write_webm) -@@ -2233,7 +2250,8 @@ static void get_cx_data(struct stream_state *stream, - /* Update the hash */ - if(!stream->ebml.debug) - stream->hash = murmur(pkt->data.frame.buf, -- pkt->data.frame.sz, stream->hash); -+ (int)pkt->data.frame.sz, -+ stream->hash); - - write_webm_block(&stream->ebml, cfg, pkt); - } -@@ -2259,15 +2277,16 @@ static void get_cx_data(struct stream_state *stream, - } - } - -- fwrite(pkt->data.frame.buf, 1, -- pkt->data.frame.sz, stream->file); -+ (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, -+ stream->file); - } - stream->nbytes += pkt->data.raw.sz; - break; - case VPX_CODEC_STATS_PKT: - stream->frames_out++; -- fprintf(stderr, " %6luS", -- (unsigned long)pkt->data.twopass_stats.sz); -+ if (!global->quiet) -+ fprintf(stderr, " %6luS", -+ (unsigned long)pkt->data.twopass_stats.sz); - stats_write(&stream->stats, - pkt->data.twopass_stats.buf, - pkt->data.twopass_stats.sz); -@@ -2283,7 +2302,8 @@ static void get_cx_data(struct stream_state *stream, - stream->psnr_samples_total += pkt->data.psnr.samples[0]; - for (i = 0; i < 4; i++) - { -- fprintf(stderr, "%.3lf ", pkt->data.psnr.psnr[i]); -+ if (!global->quiet) -+ fprintf(stderr, "%.3f ", pkt->data.psnr.psnr[i]); - stream->psnr_totals[i] += pkt->data.psnr.psnr[i]; - } - stream->psnr_count++; -@@ -2306,13 +2326,13 @@ static void show_psnr(struct stream_state *stream) - return; - - fprintf(stderr, "Stream %d PSNR (Overall/Avg/Y/U/V)", stream->index); -- ovpsnr = vp8_mse2psnr(stream->psnr_samples_total, 255.0, -- stream->psnr_sse_total); -- fprintf(stderr, " %.3lf", ovpsnr); -+ ovpsnr = vp8_mse2psnr((double)stream->psnr_samples_total, 255.0, -+ (double)stream->psnr_sse_total); -+ fprintf(stderr, " %.3f", ovpsnr); - - for (i = 0; i < 4; i++) - { -- fprintf(stderr, " %.3lf", stream->psnr_totals[i]/stream->psnr_count); -+ fprintf(stderr, " %.3f", stream->psnr_totals[i]/stream->psnr_count); - } - fprintf(stderr, "\n"); - } -@@ -2320,7 +2340,7 @@ static void show_psnr(struct stream_state *stream) - - float usec_to_fps(uint64_t usec, unsigned int frames) - { -- return usec > 0 ? (float)frames * 1000000.0 / (float)usec : 0; -+ return (float)(usec > 0 ? frames * 1000000.0 / (float)usec : 0); - } - - -@@ -2437,7 +2457,7 @@ int main(int argc, const char **argv_) - vpx_img_alloc(&raw, - input.use_i420 ? VPX_IMG_FMT_I420 - : VPX_IMG_FMT_YV12, -- input.w, input.h, 1); -+ input.w, input.h, 32); - - FOREACH_STREAM(init_rate_histogram(&stream->rate_hist, - &stream->config.cfg, -@@ -2462,18 +2482,21 @@ int main(int argc, const char **argv_) - if (frame_avail) - frames_in++; - -- if(stream_cnt == 1) -- fprintf(stderr, -- "\rPass %d/%d frame %4d/%-4d %7"PRId64"B \033[K", -- pass + 1, global.passes, frames_in, -- streams->frames_out, (int64_t)streams->nbytes); -- else -- fprintf(stderr, -- "\rPass %d/%d frame %4d %7lu %s (%.2f fps)\033[K", -- pass + 1, global.passes, frames_in, -- cx_time > 9999999 ? cx_time / 1000 : cx_time, -- cx_time > 9999999 ? "ms" : "us", -- usec_to_fps(cx_time, frames_in)); -+ if (!global.quiet) -+ { -+ if(stream_cnt == 1) -+ fprintf(stderr, -+ "\rPass %d/%d frame %4d/%-4d %7"PRId64"B \033[K", -+ pass + 1, global.passes, frames_in, -+ streams->frames_out, (int64_t)streams->nbytes); -+ else -+ fprintf(stderr, -+ "\rPass %d/%d frame %4d %7lu %s (%.2f fps)\033[K", -+ pass + 1, global.passes, frames_in, -+ cx_time > 9999999 ? cx_time / 1000 : cx_time, -+ cx_time > 9999999 ? "ms" : "us", -+ usec_to_fps(cx_time, frames_in)); -+ } - - } - else -@@ -2484,7 +2507,7 @@ int main(int argc, const char **argv_) - frame_avail ? &raw : NULL, - frames_in)); - vpx_usec_timer_mark(&timer); -- cx_time += vpx_usec_timer_elapsed(&timer); -+ cx_time += (unsigned long)vpx_usec_timer_elapsed(&timer); - - FOREACH_STREAM(update_quantizer_histogram(stream)); - -@@ -2497,20 +2520,21 @@ int main(int argc, const char **argv_) - if(stream_cnt > 1) - fprintf(stderr, "\n"); - -- FOREACH_STREAM(fprintf( -- stderr, -- "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s" -- " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1, -- global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes, -- frames_in ? (unsigned long)(stream->nbytes * 8 / frames_in) : 0, -- frames_in ? (int64_t)stream->nbytes * 8 -- * (int64_t)global.framerate.num / global.framerate.den -- / frames_in -- : 0, -- stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time, -- stream->cx_time > 9999999 ? "ms" : "us", -- usec_to_fps(stream->cx_time, frames_in)); -- ); -+ if (!global.quiet) -+ FOREACH_STREAM(fprintf( -+ stderr, -+ "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s" -+ " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1, -+ global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes, -+ frames_in ? (unsigned long)(stream->nbytes * 8 / frames_in) : 0, -+ frames_in ? (int64_t)stream->nbytes * 8 -+ * (int64_t)global.framerate.num / global.framerate.den -+ / frames_in -+ : 0, -+ stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time, -+ stream->cx_time > 9999999 ? "ms" : "us", -+ usec_to_fps(stream->cx_time, frames_in)); -+ ); - - if (global.show_psnr) - FOREACH_STREAM(show_psnr(stream)); -diff --git a/y4minput.c b/y4minput.c -index dd51421..ff9ffbc 100644 ---- a/y4minput.c -+++ b/y4minput.c -@@ -662,7 +662,7 @@ int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip){ - _nskip--; - } - else{ -- ret=fread(buffer+i,1,1,_fin); -+ ret=(int)fread(buffer+i,1,1,_fin); - if(ret<1)return -1; - } - if(buffer[i]=='\n')break; -@@ -818,7 +818,7 @@ int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *_img){ - int c_sz; - int ret; - /*Read and skip the frame header.*/ -- ret=fread(frame,1,6,_fin); -+ ret=(int)fread(frame,1,6,_fin); - if(ret<6)return 0; - if(memcmp(frame,"FRAME",5)){ - fprintf(stderr,"Loss of framing in Y4M input data\n");