From fcaded0055fe60509cfa7a00f1847bdd20b47b37a057e58ed3b57fd4a95a3760 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Schr=C3=B6ter?= Date: Mon, 28 Jan 2013 17:08:03 +0000 Subject: [PATCH] update OBS-URL: https://build.opensuse.org/package/show/multimedia:libs/libvpx?expand=0&rev=44 --- libvpx-armv7-use-hard-float.patch | 28 +- libvpx-configure-add-s390.patch | 12 +- libvpx-disable-cross-for-arm.patch | 8 +- libvpx.changes | 22 + libvpx.spec | 21 +- version_1.1.0_to_1.2.0.diff | 45439 +++++++++++++++++++++++++++ 6 files changed, 45501 insertions(+), 29 deletions(-) create mode 100644 version_1.1.0_to_1.2.0.diff diff --git a/libvpx-armv7-use-hard-float.patch b/libvpx-armv7-use-hard-float.patch index 76b9862..8fb72a5 100644 --- a/libvpx-armv7-use-hard-float.patch +++ b/libvpx-armv7-use-hard-float.patch @@ -1,13 +1,15 @@ ---- build/make/configure.sh.orig 2012-12-10 12:55:30.276337981 +0100 -+++ build/make/configure.sh 2012-12-10 13:38:41.656642859 +0100 -@@ -738,8 +738,8 @@ process_common_toolchain() { - check_add_cflags -mfpu=neon #-ftree-vectorize - check_add_asflags -mfpu=neon - fi -- check_add_cflags -march=armv7-a -mcpu=cortex-a8 -mfloat-abi=softfp -- check_add_asflags -mcpu=cortex-a8 -mfloat-abi=softfp #-march=armv7-a -+ check_add_cflags -march=armv7-a -mcpu=cortex-a8 -mfloat-abi=hard -+ check_add_asflags -mcpu=cortex-a8 -mfloat-abi=hard #-march=armv7-a - else - check_add_cflags -march=${tgt_isa} - check_add_asflags -march=${tgt_isa} +Index: build/make/configure.sh +=================================================================== +--- build/make/configure.sh.orig ++++ build/make/configure.sh +@@ -789,8 +789,8 @@ process_common_toolchain() { + check_add_asflags --defsym ARCHITECTURE=${arch_int} + tune_cflags="-mtune=" + if [ ${tgt_isa} == "armv7" ]; then +- check_add_cflags -march=armv7-a -mfloat-abi=softfp +- check_add_asflags -march=armv7-a -mfloat-abi=softfp ++ check_add_cflags -march=armv7-a -mfloat-abi=hard ++ check_add_asflags -march=armv7-a -mfloat-abi=hard + + if enabled neon + then diff --git a/libvpx-configure-add-s390.patch b/libvpx-configure-add-s390.patch index 4d8ccdc..ca07651 100644 --- a/libvpx-configure-add-s390.patch +++ b/libvpx-configure-add-s390.patch @@ -1,6 +1,8 @@ ---- build/make/configure.sh +Index: build/make/configure.sh +=================================================================== +--- build/make/configure.sh.orig +++ build/make/configure.sh -@@ -539,6 +539,12 @@ +@@ -614,6 +614,12 @@ process_common_toolchain() { *powerpc*) tgt_isa=ppc32 ;; @@ -13,9 +15,11 @@ *sparc*) tgt_isa=sparc ;; ---- configure +Index: configure +=================================================================== +--- configure.orig +++ configure -@@ -102,6 +102,8 @@ +@@ -104,6 +104,8 @@ all_platforms="${all_platforms} ppc32-li all_platforms="${all_platforms} ppc64-darwin8-gcc" all_platforms="${all_platforms} ppc64-darwin9-gcc" all_platforms="${all_platforms} ppc64-linux-gcc" diff --git a/libvpx-disable-cross-for-arm.patch b/libvpx-disable-cross-for-arm.patch index b3e759a..f157b22 100644 --- a/libvpx-disable-cross-for-arm.patch +++ b/libvpx-disable-cross-for-arm.patch @@ -1,6 +1,8 @@ ---- build/make/configure.sh.orig 2012-12-10 12:55:30.276337981 +0100 -+++ build/make/configure.sh 2012-12-10 12:58:16.721521547 +0100 -@@ -725,7 +725,7 @@ process_common_toolchain() { +Index: build/make/configure.sh +=================================================================== +--- build/make/configure.sh.orig ++++ build/make/configure.sh +@@ -781,7 +781,7 @@ process_common_toolchain() { case ${tgt_cc} in gcc) diff --git a/libvpx.changes b/libvpx.changes index 4f0fd9f..51d3aee 100644 --- a/libvpx.changes +++ b/libvpx.changes @@ -1,3 +1,25 @@ +------------------------------------------------------------------- +Mon Jan 28 17:07:51 UTC 2013 - adrian@suse.de + +- update to version 1.2.0 + - doing it via patch due to lacking release tar ball + +- From release notes: + - full api and abi compatible to 1.x release line + - Enhancements: + VP8 optimizations for MIPS dspr2 + vpxenc: add -quiet option + - Speed: + Encoder and decoder speed is consistent with the Eider release. + - Quality: + In general, quality is consistent with the Eider release. + Minor tweaks to ARNR filtering + Minor improvements to real time encoding with multiple temporal layers + - Bug Fixes: + Fixes multithreaded encoder race condition in loopfilter + Fixes multi-resolution threaded encoding + Fix potential encoder dead-lock after picture resize + ------------------------------------------------------------------- Mon Dec 10 12:48:46 UTC 2012 - guillaume@opensuse.org diff --git a/libvpx.spec b/libvpx.spec index 4512f66..3f3c10a 100644 --- a/libvpx.spec +++ b/libvpx.spec @@ -1,7 +1,7 @@ # # spec file for package libvpx # -# Copyright (c) 2012 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2013 SUSE LINUX Products GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,18 +17,20 @@ Name: libvpx -Version: 1.1.0 +Version: 1.2.0 Release: 0 Summary: VP8 codec library License: BSD-3-Clause and GPL-2.0+ Group: Productivity/Multimedia/Other Url: http://www.webmproject.org/ -Source0: http://webm.googlecode.com/files/%{name}-v%{version}.tar.bz2 +Source0: http://webm.googlecode.com/files/%{name}-v1.1.0.tar.bz2 +# The upstream project did not release a tar ball, just a git tag of version 1.2.0 :/ +Patch0: version_1.1.0_to_1.2.0.diff # PATCH-FIX-UPSTREAM libvpx-define-config_pic.patch dimstar@opensuse.org -- For older compilers, CONFIG_PIC need to be defined. -Patch0: libvpx-define-config_pic.patch -Patch1: libvpx-configure-add-s390.patch -Patch2: libvpx-disable-cross-for-arm.patch -Patch3: libvpx-armv7-use-hard-float.patch +Patch1: libvpx-define-config_pic.patch +Patch2: libvpx-configure-add-s390.patch +Patch3: libvpx-disable-cross-for-arm.patch +Patch4: libvpx-armv7-use-hard-float.patch # Needed to be able to create pkgconfig() provides. BuildRequires: pkg-config BuildRequires: yasm @@ -88,11 +90,12 @@ and audio streams compressed with the Vorbis audio codec. The WebM file structure is based on the Matroska container. %prep -%setup -q -n %name-v%version +%setup -q -n %name-v1.1.0 %patch0 -p1 -%patch1 +%patch1 -p1 %patch2 %patch3 +%patch4 %build cd build diff --git a/version_1.1.0_to_1.2.0.diff b/version_1.1.0_to_1.2.0.diff new file mode 100644 index 0000000..1e40126 --- /dev/null +++ b/version_1.1.0_to_1.2.0.diff @@ -0,0 +1,45439 @@ +diff --git a/.gitignore b/.gitignore +index 110146d..4074b0b 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -32,6 +32,8 @@ + /ivfdec.dox + /ivfenc + /ivfenc.dox ++/libvpx.so* ++/libvpx.ver + /obj_int_extract + /postproc + /postproc.c +@@ -43,6 +45,7 @@ + /simple_encoder + /simple_encoder.c + /simple_encoder.dox ++/test_libvpx + /twopass_encoder + /twopass_encoder.c + /twopass_encoder.dox +@@ -55,7 +58,14 @@ + /vp8cx_set_ref + /vp8cx_set_ref.c + /vp8cx_set_ref.dox ++/vpx.pc + /vpx_config.c + /vpx_config.h ++/vpx_rtcd.h + /vpx_version.h ++/vpxdec ++/vpxenc + TAGS ++.cproject ++.project ++.settings +diff --git a/CHANGELOG b/CHANGELOG +index dcb9f73..ef64a96 100644 +--- a/CHANGELOG ++++ b/CHANGELOG +@@ -1,3 +1,32 @@ ++2012-12-21 v1.2.0 ++ This release acts as a checkpoint for a large amount of internal refactoring ++ and testing. It also contains a number of small bugfixes, so all users are ++ encouraged to upgrade. ++ ++ - Upgrading: ++ This release is ABI and API compatible with Duclair (v1.0.0). Users ++ of older releases should refer to the Upgrading notes in this ++ document for that release. ++ ++ - Enhancements: ++ VP8 optimizations for MIPS dspr2 ++ vpxenc: add -quiet option ++ ++ - Speed: ++ Encoder and decoder speed is consistent with the Eider release. ++ ++ - Quality: ++ In general, quality is consistent with the Eider release. ++ ++ Minor tweaks to ARNR filtering ++ Minor improvements to real time encoding with multiple temporal layers ++ ++ - Bug Fixes: ++ Fixes multithreaded encoder race condition in loopfilter ++ Fixes multi-resolution threaded encoding ++ Fix potential encoder dead-lock after picture resize ++ ++ + 2012-05-09 v1.1.0 "Eider" + This introduces a number of enhancements, mostly focused on real-time + encoding. In addition, it fixes a decoder bug (first introduced in +diff --git a/README b/README +index 0dfb0fe..0475dad 100644 +--- a/README ++++ b/README +@@ -1,5 +1,5 @@ + vpx Multi-Format Codec SDK +-README - 19 May 2010 ++README - 21 June 2012 + + Welcome to the WebM VP8 Codec SDK! + +@@ -15,11 +15,19 @@ COMPILING THE APPLICATIONS/LIBRARIES: + * Building the documentation requires PHP[3] and Doxygen[4]. If you do not + have these packages, you must pass --disable-install-docs to the + configure script. ++ * Downloading the data for the unit tests requires curl[5] and sha1sum. ++ sha1sum is provided via the GNU coreutils, installed by default on ++ many *nix platforms, as well as MinGW and Cygwin. If coreutils is not ++ available, a compatible version of sha1sum can be built from ++ source[6]. These requirements are optional if not running the unit ++ tests. + + [1]: http://www.tortall.net/projects/yasm + [2]: http://www.cygwin.com + [3]: http://php.net + [4]: http://www.doxygen.org ++ [5]: http://curl.haxx.se ++ [6]: http://www.microbrew.org/tools/md5sha1sum/ + + 2. Out-of-tree builds + Out of tree builds are a supported method of building the application. For +@@ -94,5 +102,5 @@ COMPILING THE APPLICATIONS/LIBRARIES: + + SUPPORT + This library is an open source project supported by its community. Please +- please email webm-users@webmproject.org for help. ++ please email webm-discuss@webmproject.org for help. + +diff --git a/build/make/Android.mk b/build/make/Android.mk +index 6fcd4ae..c6b9cf9 100644 +--- a/build/make/Android.mk ++++ b/build/make/Android.mk +@@ -27,15 +27,22 @@ + # Android.mk file in the libvpx directory: + # LOCAL_PATH := $(call my-dir) + # include $(CLEAR_VARS) +-# include libvpx/build/make/Android.mk ++# include jni/libvpx/build/make/Android.mk + # + # There are currently two TARGET_ARCH_ABI targets for ARM. + # armeabi and armeabi-v7a. armeabi-v7a is selected by creating an + # Application.mk in the jni directory that contains: + # APP_ABI := armeabi-v7a + # ++# By default libvpx will detect at runtime the existance of NEON extension. ++# For this we import the 'cpufeatures' module from the NDK sources. ++# libvpx can also be configured without this runtime detection method. ++# Configuring with --disable-runtime-cpu-detect will assume presence of NEON. ++# Configuring with --disable-runtime-cpu-detect --disable-neon will remove any ++# NEON dependency. ++ + # To change to building armeabi, run ./libvpx/configure again, but with +-# --target=arm5te-android-gcc and and modify the Application.mk file to ++# --target=arm5te-android-gcc and modify the Application.mk file to + # set APP_ABI := armeabi + # + # Running ndk-build will build libvpx and include it in your project. +@@ -166,7 +173,9 @@ LOCAL_MODULE := libvpx + + LOCAL_LDLIBS := -llog + +-LOCAL_STATIC_LIBRARIES := cpufeatures ++ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes) ++ LOCAL_STATIC_LIBRARIES := cpufeatures ++endif + + $(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_rtcd.h + +@@ -196,4 +205,7 @@ ifeq ($(CONFIG_VP8_ENCODER), yes) + $(LIBVPX_PATH)/vp8/encoder/asm_enc_offsets.c)) + endif + ++ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes) + $(call import-module,cpufeatures) ++endif ++ +diff --git a/build/make/Makefile b/build/make/Makefile +index b6cf320..1088c84 100644 +--- a/build/make/Makefile ++++ b/build/make/Makefile +@@ -21,6 +21,7 @@ all: .DEFAULT + clean:: .DEFAULT + install:: .DEFAULT + test:: .DEFAULT ++testdata:: .DEFAULT + + + # Note: md5sum is not installed on OS X, but openssl is. Openssl may not be +@@ -66,6 +67,7 @@ endif + BUILD_ROOT?=. + VPATH=$(SRC_PATH_BARE) + CFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH) ++CXXFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH) + ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT)/ -I$(SRC_PATH)/ + DIST_DIR?=dist + HOSTCC?=gcc +@@ -98,6 +100,8 @@ dist: + install:: + .PHONY: test + test:: ++.PHONY: testdata ++testdata:: + + $(BUILD_PFX)%.c.d: %.c + $(if $(quiet),@echo " [DEP] $@") +@@ -111,11 +115,11 @@ $(BUILD_PFX)%.c.o: %.c + $(BUILD_PFX)%.cc.d: %.cc + $(if $(quiet),@echo " [DEP] $@") + $(qexec)mkdir -p $(dir $@) +- $(qexec)g++ $(INTERNAL_CFLAGS) $(CFLAGS) -M $< | $(fmt_deps) > $@ ++ $(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -M $< | $(fmt_deps) > $@ + + $(BUILD_PFX)%.cc.o: %.cc + $(if $(quiet),@echo " [CXX] $@") +- $(qexec)g++ $(INTERNAL_CFLAGS) $(CFLAGS) -c -o $@ $< ++ $(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $< + + $(BUILD_PFX)%.asm.d: %.asm + $(if $(quiet),@echo " [DEP] $@") +@@ -213,7 +217,7 @@ define linkerxx_template + $(1): $(filter-out -%,$(2)) + $(1): + $(if $(quiet),@echo " [LD] $$@") +- $(qexec)g++ $$(strip $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -o $$@ $(2) $(3) $$(extralibs)) ++ $(qexec)$$(CXX) $$(strip $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -o $$@ $(2) $(3) $$(extralibs)) + endef + # make-3.80 has a bug with expanding large input strings to the eval function, + # which was triggered in some cases by the following component of +diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl +index c55ed0f..95be467 100755 +--- a/build/make/ads2gas.pl ++++ b/build/make/ads2gas.pl +@@ -26,12 +26,22 @@ print "\t.equ DO1STROUNDING, 0\n"; + + while () + { ++ undef $comment; ++ undef $line; ++ $comment_char = ";"; ++ $comment_sub = "@"; ++ ++ # Handle comments. ++ if (/$comment_char/) ++ { ++ $comment = ""; ++ ($line, $comment) = /(.*?)$comment_char(.*)/; ++ $_ = $line; ++ } ++ + # Load and store alignment + s/@/,:/g; + +- # Comment character +- s/;/@/g; +- + # Hexadecimal constants prefaced by 0x + s/#&/#0x/g; + +@@ -51,16 +61,27 @@ while () + s/:SHR:/ >> /g; + + # Convert ELSE to .else +- s/ELSE/.else/g; ++ s/\bELSE\b/.else/g; + + # Convert ENDIF to .endif +- s/ENDIF/.endif/g; ++ s/\bENDIF\b/.endif/g; + + # Convert ELSEIF to .elseif +- s/ELSEIF/.elseif/g; ++ s/\bELSEIF\b/.elseif/g; + + # Convert LTORG to .ltorg +- s/LTORG/.ltorg/g; ++ s/\bLTORG\b/.ltorg/g; ++ ++ # Convert endfunc to nothing. ++ s/\bendfunc\b//ig; ++ ++ # Convert FUNCTION to nothing. ++ s/\bFUNCTION\b//g; ++ s/\bfunction\b//g; ++ ++ s/\bENTRY\b//g; ++ s/\bMSARMASM\b/0/g; ++ s/^\s+end\s+$//g; + + # Convert IF :DEF:to .if + # gcc doesn't have the ability to do a conditional +@@ -106,6 +127,7 @@ while () + if (s/RN\s+([Rr]\d+|lr)/.req $1/) + { + print; ++ print "$comment_sub$comment\n" if defined $comment; + next; + } + +@@ -114,6 +136,9 @@ while () + s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/; + s/IMPORT\s+\|([\$\w]*)\|/.global $1/; + ++ s/EXPORT\s+([\$\w]*)/.global $1/; ++ s/export\s+([\$\w]*)/.global $1/; ++ + # No vertical bars required; make additional symbol with prepended + # underscore + s/^\|(\$?\w+)\|/_$1\n\t$1:/g; +@@ -124,11 +149,19 @@ while () + s/^([a-zA-Z_0-9\$]+)/$1:/ if !/EQU/; + + # ALIGN directive +- s/ALIGN/.balign/g; ++ s/\bALIGN\b/.balign/g; + + # ARM code + s/\sARM/.arm/g; + ++ # push/pop ++ s/(push\s+)(r\d+)/stmdb sp\!, \{$2\}/g; ++ s/(pop\s+)(r\d+)/ldmia sp\!, \{$2\}/g; ++ ++ # NEON code ++ s/(vld1.\d+\s+)(q\d+)/$1\{$2\}/g; ++ s/(vtbl.\d+\s+[^,]+),([^,]+)/$1,\{$2\}/g; ++ + # eabi_attributes numerical equivalents can be found in the + # "ARM IHI 0045C" document. + +@@ -157,10 +190,10 @@ while () + } + + # EQU directive +- s/(.*)EQU(.*)/.equ $1, $2/; ++ s/(\S+\s+)EQU(\s+\S+)/.equ $1, $2/; + + # Begin macro definition +- if (/MACRO/) { ++ if (/\bMACRO\b/) { + $_ = ; + s/^/.macro/; + s/\$//g; # remove formal param reference +@@ -169,9 +202,10 @@ while () + + # For macros, use \ to reference formal params + s/\$/\\/g; # End macro definition +- s/MEND/.endm/; # No need to tell it where to stop assembling ++ s/\bMEND\b/.endm/; # No need to tell it where to stop assembling + next if /^\s*END\s*$/; + print; ++ print "$comment_sub$comment\n" if defined $comment; + } + + # Mark that this object doesn't need an executable stack. +diff --git a/build/make/configure.sh b/build/make/configure.sh +index 3c772e5..c99a01c 100755 +--- a/build/make/configure.sh ++++ b/build/make/configure.sh +@@ -166,6 +166,17 @@ is_in(){ + + add_cflags() { + CFLAGS="${CFLAGS} $@" ++ CXXFLAGS="${CXXFLAGS} $@" ++} ++ ++ ++add_cflags_only() { ++ CFLAGS="${CFLAGS} $@" ++} ++ ++ ++add_cxxflags_only() { ++ CXXFLAGS="${CXXFLAGS} $@" + } + + +@@ -277,6 +288,13 @@ check_cc() { + check_cmd ${CC} ${CFLAGS} "$@" -c -o ${TMP_O} ${TMP_C} + } + ++check_cxx() { ++ log check_cxx "$@" ++ cat >${TMP_C} ++ log_file ${TMP_C} ++ check_cmd ${CXX} ${CXXFLAGS} "$@" -c -o ${TMP_O} ${TMP_C} ++} ++ + check_cpp() { + log check_cpp "$@" + cat > ${TMP_C} +@@ -310,8 +328,25 @@ int x; + EOF + } + ++check_cxxflags() { ++ log check_cxxflags "$@" ++ ++ # Catch CFLAGS that trigger CXX warnings ++ case "$CXX" in ++ *g++*) check_cxx -Werror "$@" <> $1 << EOF +@@ -379,6 +416,7 @@ TOOLCHAIN=${toolchain} + ASM_CONVERSION=${asm_conversion_cmd:-${source_path}/build/make/ads2gas.pl} + + CC=${CC} ++CXX=${CXX} + AR=${AR} + LD=${LD} + AS=${AS} +@@ -386,6 +424,7 @@ STRIP=${STRIP} + NM=${NM} + + CFLAGS = ${CFLAGS} ++CXXFLAGS = ${CXXFLAGS} + ARFLAGS = -rus\$(if \$(quiet),c,v) + LDFLAGS = ${LDFLAGS} + ASFLAGS = ${ASFLAGS} +@@ -538,6 +577,7 @@ post_process_cmdline() { + + setup_gnu_toolchain() { + CC=${CC:-${CROSS}gcc} ++ CXX=${CXX:-${CROSS}g++} + AR=${AR:-${CROSS}ar} + LD=${LD:-${CROSS}${link_with_cc:-ld}} + AS=${AS:-${CROSS}as} +@@ -549,10 +589,19 @@ setup_gnu_toolchain() { + + process_common_toolchain() { + if [ -z "$toolchain" ]; then +- gcctarget="$(gcc -dumpmachine 2> /dev/null)" ++ gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}" + + # detect tgt_isa + case "$gcctarget" in ++ armv6*) ++ tgt_isa=armv6 ++ ;; ++ armv7*) ++ tgt_isa=armv7 ++ ;; ++ armv5te*) ++ tgt_isa=armv5te ++ ;; + *x86_64*|*amd64*) + tgt_isa=x86_64 + ;; +@@ -718,6 +767,7 @@ process_common_toolchain() { + ;; + armv5te) + soft_enable edsp ++ disable fast_unaligned + ;; + esac + +@@ -733,17 +783,23 @@ process_common_toolchain() { + check_add_asflags --defsym ARCHITECTURE=${arch_int} + tune_cflags="-mtune=" + if [ ${tgt_isa} == "armv7" ]; then ++ check_add_cflags -march=armv7-a -mfloat-abi=softfp ++ check_add_asflags -march=armv7-a -mfloat-abi=softfp ++ + if enabled neon + then + check_add_cflags -mfpu=neon #-ftree-vectorize + check_add_asflags -mfpu=neon + fi +- check_add_cflags -march=armv7-a -mcpu=cortex-a8 -mfloat-abi=softfp +- check_add_asflags -mcpu=cortex-a8 -mfloat-abi=softfp #-march=armv7-a ++ ++ if [ -z "${tune_cpu}" ]; then ++ tune_cpu=cortex-a8 ++ fi + else + check_add_cflags -march=${tgt_isa} + check_add_asflags -march=${tgt_isa} + fi ++ + enabled debug && add_asflags -g + asm_conversion_cmd="${source_path}/build/make/ads2gas.pl" + ;; +@@ -792,6 +848,7 @@ process_common_toolchain() { + -name "arm-linux-androideabi-gcc*" -print -quit` + TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi- + CC=${TOOLCHAIN_PATH}gcc ++ CXX=${TOOLCHAIN_PATH}g++ + AR=${TOOLCHAIN_PATH}ar + LD=${TOOLCHAIN_PATH}gcc + AS=${TOOLCHAIN_PATH}as +@@ -810,12 +867,17 @@ process_common_toolchain() { + add_cflags "--sysroot=${alt_libc}" + add_ldflags "--sysroot=${alt_libc}" + +- add_cflags "-I${SDK_PATH}/sources/android/cpufeatures/" ++ # linker flag that routes around a CPU bug in some ++ # Cortex-A8 implementations (NDK Dev Guide) ++ add_ldflags "-Wl,--fix-cortex-a8" + + enable pic + soft_enable realtime_only + if [ ${tgt_isa} == "armv7" ]; then +- enable runtime_cpu_detect ++ soft_enable runtime_cpu_detect ++ fi ++ if enabled runtime_cpu_detect; then ++ add_cflags "-I${SDK_PATH}/sources/android/cpufeatures" + fi + ;; + +@@ -827,6 +889,7 @@ process_common_toolchain() { + SDK_PATH=${sdk_path} + fi + TOOLCHAIN_PATH=${SDK_PATH}/usr/bin ++ CXX=${TOOLCHAIN_PATH}/g++ + CC=${TOOLCHAIN_PATH}/gcc + AR=${TOOLCHAIN_PATH}/ar + LD=${TOOLCHAIN_PATH}/arm-apple-darwin10-llvm-gcc-4.2 +@@ -890,13 +953,16 @@ process_common_toolchain() { + esac + ;; + mips*) +- CROSS=${CROSS:-mipsel-linux-uclibc-} + link_with_cc=gcc + setup_gnu_toolchain + tune_cflags="-mtune=" ++ if enabled dspr2; then ++ check_add_cflags -mips32r2 -mdspr2 ++ disable fast_unaligned ++ fi + check_add_cflags -march=${tgt_isa} +- check_add_asflags -march=${tgt_isa} +- check_add_asflags -KPIC ++ check_add_asflags -march=${tgt_isa} ++ check_add_asflags -KPIC + ;; + ppc*) + enable ppc +@@ -924,6 +990,11 @@ process_common_toolchain() { + x86*) + bits=32 + enabled x86_64 && bits=64 ++ check_cpp </dev/null 2>&1 && AS=yasm + [ "${AS}" = auto -o -z "${AS}" ] \ + && die "Neither yasm nor nasm have been found" +- ;; ++ ;; + esac + log_echo " using $AS" + [ "${AS##*/}" = nasm ] && add_asflags -Ox +@@ -1065,7 +1143,7 @@ process_common_toolchain() { + + # Work around longjmp interception on glibc >= 2.11, to improve binary + # compatibility. See http://code.google.com/p/webm/issues/detail?id=166 +- enabled linux && check_add_cflags -D_FORTIFY_SOURCE=0 ++ enabled linux && check_add_cflags -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 + + # Check for strip utility variant + ${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable gnu_strip +@@ -1080,12 +1158,24 @@ EOF + # Almost every platform uses pthreads. + if enabled multithread; then + case ${toolchain} in +- *-win*);; ++ *-win*-vs*);; + *-android-gcc);; + *) check_header pthread.h && add_extralibs -lpthread + esac + fi + ++ # only for MIPS platforms ++ case ${toolchain} in ++ mips*) ++ if enabled dspr2; then ++ if enabled big_endian; then ++ echo "dspr2 optimizations are available only for little endian platforms" ++ disable dspr2 ++ fi ++ fi ++ ;; ++ esac ++ + # for sysconf(3) and friends. + check_header unistd.h + +diff --git a/build/make/gen_asm_deps.sh b/build/make/gen_asm_deps.sh +index 717f870..0b4e3aa 100755 +--- a/build/make/gen_asm_deps.sh ++++ b/build/make/gen_asm_deps.sh +@@ -42,7 +42,7 @@ done + + [ -n "$srcfile" ] || show_help + sfx=${sfx:-asm} +-includes=$(LC_ALL=C egrep -i "include +\"?+[a-z0-9_/]+\.${sfx}" $srcfile | ++includes=$(LC_ALL=C egrep -i "include +\"?[a-z0-9_/]+\.${sfx}" $srcfile | + perl -p -e "s;.*?([a-z0-9_/]+.${sfx}).*;\1;") + #" restore editor state + for inc in ${includes}; do +diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c +index 04e14a6..bf317bd 100644 +--- a/build/make/obj_int_extract.c ++++ b/build/make/obj_int_extract.c +@@ -680,7 +680,7 @@ int parse_coff(uint8_t *buf, size_t sz) + uint32_t symoffset; + + char **sectionlist; //this array holds all section names in their correct order. +- //it is used to check if the symbol is in .bss or .data section. ++ //it is used to check if the symbol is in .bss or .rdata section. + + nsections = get_le16(buf + 2); + symtab_ptr = get_le32(buf + 8); +@@ -725,15 +725,15 @@ int parse_coff(uint8_t *buf, size_t sz) + } + strcpy(sectionlist[i], sectionname); + +- if (!strcmp(sectionname, ".data")) sectionrawdata_ptr = get_le32(ptr + 20); ++ if (!strcmp(sectionname, ".rdata")) sectionrawdata_ptr = get_le32(ptr + 20); + + ptr += 40; + } + + //log_msg("COFF: Symbol table at offset %u\n", symtab_ptr); +- //log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr); ++ //log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr); + +- /* The compiler puts the data with non-zero offset in .data section, but puts the data with ++ /* The compiler puts the data with non-zero offset in .rdata section, but puts the data with + zero offset in .bss section. So, if the data in in .bss section, set offset=0. + Note from Wiki: In an object module compiled from C, the bss section contains + the local variables (but not functions) that were declared with the static keyword, +diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh +index 1dffde5..ddf9e09 100755 +--- a/build/make/rtcd.sh ++++ b/build/make/rtcd.sh +@@ -211,6 +211,8 @@ common_top() { + $(process_forward_decls) + + $(declare_function_pointers c $ALL_ARCHS) ++ ++void ${symbol:-rtcd}(void); + EOF + } + +@@ -231,11 +233,10 @@ x86() { + + cat <planes[plane]; + + for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) { +- if(fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w), +- outfile)); ++ (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w), ++ outfile); + buf += img->stride[plane]; + } + } +diff --git a/examples/encoder_tmpl.c b/examples/encoder_tmpl.c +index cc70b00..e2b65ec 100644 +--- a/examples/encoder_tmpl.c ++++ b/examples/encoder_tmpl.c +@@ -85,7 +85,7 @@ static void write_ivf_file_header(FILE *outfile, + mem_put_le32(header+24, frame_cnt); /* length */ + mem_put_le32(header+28, 0); /* unused */ + +- if(fwrite(header, 1, 32, outfile)); ++ (void) fwrite(header, 1, 32, outfile); + } + + +@@ -103,7 +103,7 @@ static void write_ivf_frame_header(FILE *outfile, + mem_put_le32(header+4, pts&0xFFFFFFFF); + mem_put_le32(header+8, pts >> 32); + +- if(fwrite(header, 1, 12, outfile)); ++ (void) fwrite(header, 1, 12, outfile); + } + + int main(int argc, char **argv) { +diff --git a/examples/encoder_tmpl.txt b/examples/encoder_tmpl.txt +index 0042071..1afbd8b 100644 +--- a/examples/encoder_tmpl.txt ++++ b/examples/encoder_tmpl.txt +@@ -61,13 +61,14 @@ if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt, + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME + case VPX_CODEC_CX_FRAME_PKT: + write_ivf_frame_header(outfile, pkt); +- if(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, +- outfile)); ++ (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, ++ outfile); + break; + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY ++vpx_img_free(&raw); + if(vpx_codec_destroy(&codec)) + die_codec(&codec, "Failed to destroy codec"); + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY +diff --git a/examples/twopass_encoder.txt b/examples/twopass_encoder.txt +index 4683bc7..2f81a90 100644 +--- a/examples/twopass_encoder.txt ++++ b/examples/twopass_encoder.txt +@@ -71,5 +71,17 @@ Pass Progress Reporting + It's sometimes helpful to see when each pass completes. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_LOOP_END + printf("Pass %d complete.\n", pass+1); ++ if(vpx_codec_destroy(&codec)) ++ die_codec(&codec, "Failed to destroy codec"); + } + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_LOOP_END ++ ++ ++Clean-up ++----------------------------- ++Destruction of the encoder instance must be done on each pass. The ++raw image should be destroyed at the end as usual. ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY ++vpx_img_free(&raw); ++free(stats.buf); ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY +diff --git a/libmkv/EbmlIDs.h b/libmkv/EbmlIDs.h +index 3418e36..e3ce585 100644 +--- a/libmkv/EbmlIDs.h ++++ b/libmkv/EbmlIDs.h +@@ -1,16 +1,16 @@ +-// Copyright (c) 2010 The WebM project authors. All Rights Reserved. +-// +-// Use of this source code is governed by a BSD-style license +-// that can be found in the LICENSE file in the root of the source +-// tree. An additional intellectual property rights grant can be found +-// in the file PATENTS. All contributing project authors may +-// be found in the AUTHORS file in the root of the source tree. +- +- ++/* ++ * Copyright (c) 2010 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ + #ifndef MKV_DEFS_HPP + #define MKV_DEFS_HPP 1 + +-//Commenting out values not available in webm, but available in matroska ++/* Commenting out values not available in webm, but available in matroska */ + + enum mkv + { +@@ -22,7 +22,7 @@ enum mkv + DocType = 0x4282, + DocTypeVersion = 0x4287, + DocTypeReadVersion = 0x4285, +-// CRC_32 = 0xBF, ++/* CRC_32 = 0xBF, */ + Void = 0xEC, + SignatureSlot = 0x1B538667, + SignatureAlgo = 0x7E8A, +@@ -32,61 +32,61 @@ enum mkv + SignatureElements = 0x7E5B, + SignatureElementList = 0x7E7B, + SignedElement = 0x6532, +- //segment ++ /* segment */ + Segment = 0x18538067, +- //Meta Seek Information ++ /* Meta Seek Information */ + SeekHead = 0x114D9B74, + Seek = 0x4DBB, + SeekID = 0x53AB, + SeekPosition = 0x53AC, +- //Segment Information ++ /* Segment Information */ + Info = 0x1549A966, +-// SegmentUID = 0x73A4, +-// SegmentFilename = 0x7384, +-// PrevUID = 0x3CB923, +-// PrevFilename = 0x3C83AB, +-// NextUID = 0x3EB923, +-// NextFilename = 0x3E83BB, +-// SegmentFamily = 0x4444, +-// ChapterTranslate = 0x6924, +-// ChapterTranslateEditionUID = 0x69FC, +-// ChapterTranslateCodec = 0x69BF, +-// ChapterTranslateID = 0x69A5, ++/* SegmentUID = 0x73A4, */ ++/* SegmentFilename = 0x7384, */ ++/* PrevUID = 0x3CB923, */ ++/* PrevFilename = 0x3C83AB, */ ++/* NextUID = 0x3EB923, */ ++/* NextFilename = 0x3E83BB, */ ++/* SegmentFamily = 0x4444, */ ++/* ChapterTranslate = 0x6924, */ ++/* ChapterTranslateEditionUID = 0x69FC, */ ++/* ChapterTranslateCodec = 0x69BF, */ ++/* ChapterTranslateID = 0x69A5, */ + TimecodeScale = 0x2AD7B1, + Segment_Duration = 0x4489, + DateUTC = 0x4461, +-// Title = 0x7BA9, ++/* Title = 0x7BA9, */ + MuxingApp = 0x4D80, + WritingApp = 0x5741, +- //Cluster ++ /* Cluster */ + Cluster = 0x1F43B675, + Timecode = 0xE7, +-// SilentTracks = 0x5854, +-// SilentTrackNumber = 0x58D7, +-// Position = 0xA7, ++/* SilentTracks = 0x5854, */ ++/* SilentTrackNumber = 0x58D7, */ ++/* Position = 0xA7, */ + PrevSize = 0xAB, + BlockGroup = 0xA0, + Block = 0xA1, +-// BlockVirtual = 0xA2, +-// BlockAdditions = 0x75A1, +-// BlockMore = 0xA6, +-// BlockAddID = 0xEE, +-// BlockAdditional = 0xA5, ++/* BlockVirtual = 0xA2, */ ++/* BlockAdditions = 0x75A1, */ ++/* BlockMore = 0xA6, */ ++/* BlockAddID = 0xEE, */ ++/* BlockAdditional = 0xA5, */ + BlockDuration = 0x9B, +-// ReferencePriority = 0xFA, ++/* ReferencePriority = 0xFA, */ + ReferenceBlock = 0xFB, +-// ReferenceVirtual = 0xFD, +-// CodecState = 0xA4, +-// Slices = 0x8E, +-// TimeSlice = 0xE8, ++/* ReferenceVirtual = 0xFD, */ ++/* CodecState = 0xA4, */ ++/* Slices = 0x8E, */ ++/* TimeSlice = 0xE8, */ + LaceNumber = 0xCC, +-// FrameNumber = 0xCD, +-// BlockAdditionID = 0xCB, +-// MkvDelay = 0xCE, +-// Cluster_Duration = 0xCF, ++/* FrameNumber = 0xCD, */ ++/* BlockAdditionID = 0xCB, */ ++/* MkvDelay = 0xCE, */ ++/* Cluster_Duration = 0xCF, */ + SimpleBlock = 0xA3, +-// EncryptedBlock = 0xAF, +- //Track ++/* EncryptedBlock = 0xAF, */ ++ /* Track */ + Tracks = 0x1654AE6B, + TrackEntry = 0xAE, + TrackNumber = 0xD7, +@@ -96,28 +96,28 @@ enum mkv + FlagDefault = 0x88, + FlagForced = 0x55AA, + FlagLacing = 0x9C, +-// MinCache = 0x6DE7, +-// MaxCache = 0x6DF8, ++/* MinCache = 0x6DE7, */ ++/* MaxCache = 0x6DF8, */ + DefaultDuration = 0x23E383, +-// TrackTimecodeScale = 0x23314F, +-// TrackOffset = 0x537F, +-// MaxBlockAdditionID = 0x55EE, ++/* TrackTimecodeScale = 0x23314F, */ ++/* TrackOffset = 0x537F, */ ++/* MaxBlockAdditionID = 0x55EE, */ + Name = 0x536E, + Language = 0x22B59C, + CodecID = 0x86, + CodecPrivate = 0x63A2, + CodecName = 0x258688, +-// AttachmentLink = 0x7446, +-// CodecSettings = 0x3A9697, +-// CodecInfoURL = 0x3B4040, +-// CodecDownloadURL = 0x26B240, +-// CodecDecodeAll = 0xAA, +-// TrackOverlay = 0x6FAB, +-// TrackTranslate = 0x6624, +-// TrackTranslateEditionUID = 0x66FC, +-// TrackTranslateCodec = 0x66BF, +-// TrackTranslateTrackID = 0x66A5, +- //video ++/* AttachmentLink = 0x7446, */ ++/* CodecSettings = 0x3A9697, */ ++/* CodecInfoURL = 0x3B4040, */ ++/* CodecDownloadURL = 0x26B240, */ ++/* CodecDecodeAll = 0xAA, */ ++/* TrackOverlay = 0x6FAB, */ ++/* TrackTranslate = 0x6624, */ ++/* TrackTranslateEditionUID = 0x66FC, */ ++/* TrackTranslateCodec = 0x66BF, */ ++/* TrackTranslateTrackID = 0x66A5, */ ++ /* video */ + Video = 0xE0, + FlagInterlaced = 0x9A, + StereoMode = 0x53B8, +@@ -131,101 +131,101 @@ enum mkv + DisplayHeight = 0x54BA, + DisplayUnit = 0x54B2, + AspectRatioType = 0x54B3, +-// ColourSpace = 0x2EB524, +-// GammaValue = 0x2FB523, ++/* ColourSpace = 0x2EB524, */ ++/* GammaValue = 0x2FB523, */ + FrameRate = 0x2383E3, +- //end video +- //audio ++ /* end video */ ++ /* audio */ + Audio = 0xE1, + SamplingFrequency = 0xB5, + OutputSamplingFrequency = 0x78B5, + Channels = 0x9F, +-// ChannelPositions = 0x7D7B, ++/* ChannelPositions = 0x7D7B, */ + BitDepth = 0x6264, +- //end audio +- //content encoding +-// ContentEncodings = 0x6d80, +-// ContentEncoding = 0x6240, +-// ContentEncodingOrder = 0x5031, +-// ContentEncodingScope = 0x5032, +-// ContentEncodingType = 0x5033, +-// ContentCompression = 0x5034, +-// ContentCompAlgo = 0x4254, +-// ContentCompSettings = 0x4255, +-// ContentEncryption = 0x5035, +-// ContentEncAlgo = 0x47e1, +-// ContentEncKeyID = 0x47e2, +-// ContentSignature = 0x47e3, +-// ContentSigKeyID = 0x47e4, +-// ContentSigAlgo = 0x47e5, +-// ContentSigHashAlgo = 0x47e6, +- //end content encoding +- //Cueing Data ++ /* end audio */ ++ /* content encoding */ ++/* ContentEncodings = 0x6d80, */ ++/* ContentEncoding = 0x6240, */ ++/* ContentEncodingOrder = 0x5031, */ ++/* ContentEncodingScope = 0x5032, */ ++/* ContentEncodingType = 0x5033, */ ++/* ContentCompression = 0x5034, */ ++/* ContentCompAlgo = 0x4254, */ ++/* ContentCompSettings = 0x4255, */ ++/* ContentEncryption = 0x5035, */ ++/* ContentEncAlgo = 0x47e1, */ ++/* ContentEncKeyID = 0x47e2, */ ++/* ContentSignature = 0x47e3, */ ++/* ContentSigKeyID = 0x47e4, */ ++/* ContentSigAlgo = 0x47e5, */ ++/* ContentSigHashAlgo = 0x47e6, */ ++ /* end content encoding */ ++ /* Cueing Data */ + Cues = 0x1C53BB6B, + CuePoint = 0xBB, + CueTime = 0xB3, + CueTrackPositions = 0xB7, + CueTrack = 0xF7, + CueClusterPosition = 0xF1, +- CueBlockNumber = 0x5378, +-// CueCodecState = 0xEA, +-// CueReference = 0xDB, +-// CueRefTime = 0x96, +-// CueRefCluster = 0x97, +-// CueRefNumber = 0x535F, +-// CueRefCodecState = 0xEB, +- //Attachment +-// Attachments = 0x1941A469, +-// AttachedFile = 0x61A7, +-// FileDescription = 0x467E, +-// FileName = 0x466E, +-// FileMimeType = 0x4660, +-// FileData = 0x465C, +-// FileUID = 0x46AE, +-// FileReferral = 0x4675, +- //Chapters +-// Chapters = 0x1043A770, +-// EditionEntry = 0x45B9, +-// EditionUID = 0x45BC, +-// EditionFlagHidden = 0x45BD, +-// EditionFlagDefault = 0x45DB, +-// EditionFlagOrdered = 0x45DD, +-// ChapterAtom = 0xB6, +-// ChapterUID = 0x73C4, +-// ChapterTimeStart = 0x91, +-// ChapterTimeEnd = 0x92, +-// ChapterFlagHidden = 0x98, +-// ChapterFlagEnabled = 0x4598, +-// ChapterSegmentUID = 0x6E67, +-// ChapterSegmentEditionUID = 0x6EBC, +-// ChapterPhysicalEquiv = 0x63C3, +-// ChapterTrack = 0x8F, +-// ChapterTrackNumber = 0x89, +-// ChapterDisplay = 0x80, +-// ChapString = 0x85, +-// ChapLanguage = 0x437C, +-// ChapCountry = 0x437E, +-// ChapProcess = 0x6944, +-// ChapProcessCodecID = 0x6955, +-// ChapProcessPrivate = 0x450D, +-// ChapProcessCommand = 0x6911, +-// ChapProcessTime = 0x6922, +-// ChapProcessData = 0x6933, +- //Tagging +-// Tags = 0x1254C367, +-// Tag = 0x7373, +-// Targets = 0x63C0, +-// TargetTypeValue = 0x68CA, +-// TargetType = 0x63CA, +-// Tagging_TrackUID = 0x63C5, +-// Tagging_EditionUID = 0x63C9, +-// Tagging_ChapterUID = 0x63C4, +-// AttachmentUID = 0x63C6, +-// SimpleTag = 0x67C8, +-// TagName = 0x45A3, +-// TagLanguage = 0x447A, +-// TagDefault = 0x4484, +-// TagString = 0x4487, +-// TagBinary = 0x4485, ++ CueBlockNumber = 0x5378 ++/* CueCodecState = 0xEA, */ ++/* CueReference = 0xDB, */ ++/* CueRefTime = 0x96, */ ++/* CueRefCluster = 0x97, */ ++/* CueRefNumber = 0x535F, */ ++/* CueRefCodecState = 0xEB, */ ++ /* Attachment */ ++/* Attachments = 0x1941A469, */ ++/* AttachedFile = 0x61A7, */ ++/* FileDescription = 0x467E, */ ++/* FileName = 0x466E, */ ++/* FileMimeType = 0x4660, */ ++/* FileData = 0x465C, */ ++/* FileUID = 0x46AE, */ ++/* FileReferral = 0x4675, */ ++ /* Chapters */ ++/* Chapters = 0x1043A770, */ ++/* EditionEntry = 0x45B9, */ ++/* EditionUID = 0x45BC, */ ++/* EditionFlagHidden = 0x45BD, */ ++/* EditionFlagDefault = 0x45DB, */ ++/* EditionFlagOrdered = 0x45DD, */ ++/* ChapterAtom = 0xB6, */ ++/* ChapterUID = 0x73C4, */ ++/* ChapterTimeStart = 0x91, */ ++/* ChapterTimeEnd = 0x92, */ ++/* ChapterFlagHidden = 0x98, */ ++/* ChapterFlagEnabled = 0x4598, */ ++/* ChapterSegmentUID = 0x6E67, */ ++/* ChapterSegmentEditionUID = 0x6EBC, */ ++/* ChapterPhysicalEquiv = 0x63C3, */ ++/* ChapterTrack = 0x8F, */ ++/* ChapterTrackNumber = 0x89, */ ++/* ChapterDisplay = 0x80, */ ++/* ChapString = 0x85, */ ++/* ChapLanguage = 0x437C, */ ++/* ChapCountry = 0x437E, */ ++/* ChapProcess = 0x6944, */ ++/* ChapProcessCodecID = 0x6955, */ ++/* ChapProcessPrivate = 0x450D, */ ++/* ChapProcessCommand = 0x6911, */ ++/* ChapProcessTime = 0x6922, */ ++/* ChapProcessData = 0x6933, */ ++ /* Tagging */ ++/* Tags = 0x1254C367, */ ++/* Tag = 0x7373, */ ++/* Targets = 0x63C0, */ ++/* TargetTypeValue = 0x68CA, */ ++/* TargetType = 0x63CA, */ ++/* Tagging_TrackUID = 0x63C5, */ ++/* Tagging_EditionUID = 0x63C9, */ ++/* Tagging_ChapterUID = 0x63C4, */ ++/* AttachmentUID = 0x63C6, */ ++/* SimpleTag = 0x67C8, */ ++/* TagName = 0x45A3, */ ++/* TagLanguage = 0x447A, */ ++/* TagDefault = 0x4484, */ ++/* TagString = 0x4487, */ ++/* TagBinary = 0x4485, */ + }; + #endif +diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c +index fbf2c66..d70f06e 100644 +--- a/libmkv/EbmlWriter.c ++++ b/libmkv/EbmlWriter.c +@@ -1,12 +1,12 @@ +-// Copyright (c) 2010 The WebM project authors. All Rights Reserved. +-// +-// Use of this source code is governed by a BSD-style license +-// that can be found in the LICENSE file in the root of the source +-// tree. An additional intellectual property rights grant can be found +-// in the file PATENTS. All contributing project authors may +-// be found in the AUTHORS file in the root of the source tree. +- +- ++/* ++ * Copyright (c) 2010 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ + #include "EbmlWriter.h" + #include + #include +@@ -18,11 +18,13 @@ + #define LITERALU64(n) n##LLU + #endif + +-void Ebml_WriteLen(EbmlGlobal *glob, long long val) ++void Ebml_WriteLen(EbmlGlobal *glob, int64_t val) + { +- //TODO check and make sure we are not > than 0x0100000000000000LLU +- unsigned char size = 8; //size in bytes to output +- unsigned long long minVal = LITERALU64(0x00000000000000ff); //mask to compare for byte size ++ /* TODO check and make sure we are not > than 0x0100000000000000LLU */ ++ unsigned char size = 8; /* size in bytes to output */ ++ ++ /* mask to compare for byte size */ ++ int64_t minVal = 0xff; + + for (size = 1; size < 8; size ++) + { +@@ -32,7 +34,7 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val) + minVal = (minVal << 7); + } + +- val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7)); ++ val |= (((uint64_t)0x80) << ((size - 1) * 7)); + + Ebml_Serialize(glob, (void *) &val, sizeof(val), size); + } +@@ -40,23 +42,25 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val) + void Ebml_WriteString(EbmlGlobal *glob, const char *str) + { + const size_t size_ = strlen(str); +- const unsigned long long size = size_; ++ const uint64_t size = size_; + Ebml_WriteLen(glob, size); +- //TODO: it's not clear from the spec whether the nul terminator +- //should be serialized too. For now we omit the null terminator. +- Ebml_Write(glob, str, size); ++ /* TODO: it's not clear from the spec whether the nul terminator ++ * should be serialized too. For now we omit the null terminator. ++ */ ++ Ebml_Write(glob, str, (unsigned long)size); + } + + void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) + { + const size_t strlen = wcslen(wstr); + +- //TODO: it's not clear from the spec whether the nul terminator +- //should be serialized too. For now we include it. +- const unsigned long long size = strlen; ++ /* TODO: it's not clear from the spec whether the nul terminator ++ * should be serialized too. For now we include it. ++ */ ++ const uint64_t size = strlen; + + Ebml_WriteLen(glob, size); +- Ebml_Write(glob, wstr, size); ++ Ebml_Write(glob, wstr, (unsigned long)size); + } + + void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) +@@ -85,12 +89,12 @@ void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t + + void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) + { +- unsigned char size = 8; //size in bytes to output ++ unsigned char size = 8; /* size in bytes to output */ + unsigned char sizeSerialized = 0; + unsigned long minVal; + + Ebml_WriteID(glob, class_id); +- minVal = 0x7fLU; //mask to compare for byte size ++ minVal = 0x7fLU; /* mask to compare for byte size */ + + for (size = 1; size < 4; size ++) + { +@@ -106,7 +110,7 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l + Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); + Ebml_Serialize(glob, &ui, sizeof(ui), size); + } +-//TODO: perhaps this is a poor name for this id serializer helper function ++/* TODO: perhaps this is a poor name for this id serializer helper function */ + void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) + { + int size; +@@ -168,4 +172,4 @@ void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) + } + } + +-//TODO Serialize Date ++/* TODO Serialize Date */ +diff --git a/libmkv/EbmlWriter.h b/libmkv/EbmlWriter.h +index 324c9bc..b94f757 100644 +--- a/libmkv/EbmlWriter.h ++++ b/libmkv/EbmlWriter.h +@@ -1,26 +1,30 @@ ++/* ++ * Copyright (c) 2010 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ + #ifndef EBMLWRITER_HPP + #define EBMLWRITER_HPP +- +-// Copyright (c) 2010 The WebM project authors. All Rights Reserved. +-// +-// Use of this source code is governed by a BSD-style license +-// that can be found in the LICENSE file in the root of the source +-// tree. An additional intellectual property rights grant can be found +-// in the file PATENTS. All contributing project authors may +-// be found in the AUTHORS file in the root of the source tree. +- +-//note: you must define write and serialize functions as well as your own EBML_GLOBAL +-//These functions MUST be implemented + #include + #include "vpx/vpx_integer.h" + ++/* note: you must define write and serialize functions as well as your own ++ * EBML_GLOBAL ++ * ++ * These functions MUST be implemented ++ */ ++ + typedef struct EbmlGlobal EbmlGlobal; + void Ebml_Serialize(EbmlGlobal *glob, const void *, int, unsigned long); + void Ebml_Write(EbmlGlobal *glob, const void *, unsigned long); +-///// + ++/*****/ + +-void Ebml_WriteLen(EbmlGlobal *glob, long long val); ++void Ebml_WriteLen(EbmlGlobal *glob, int64_t val); + void Ebml_WriteString(EbmlGlobal *glob, const char *str); + void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr); + void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id); +@@ -28,11 +32,11 @@ void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t + void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui); + void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui); + void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d); +-//TODO make this more generic to signed ++/* TODO make this more generic to signed */ + void Ebml_WriteSigned16(EbmlGlobal *glob, short val); + void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s); + void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s); + void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length); + void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize); +-//TODO need date function ++/* TODO need date function */ + #endif +diff --git a/libs.mk b/libs.mk +index e2ba737..4115dd8 100644 +--- a/libs.mk ++++ b/libs.mk +@@ -20,8 +20,16 @@ endif + CODEC_SRCS-yes += CHANGELOG + CODEC_SRCS-yes += libs.mk + ++# If this is a universal (fat) binary, then all the subarchitectures have ++# already been built and our job is to stitch them together. The ++# BUILD_LIBVPX variable indicates whether we should be building ++# (compiling, linking) the library. The LIPO_LIBVPX variable indicates ++# that we're stitching. ++$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes) ++ + include $(SRC_PATH_BARE)/vpx/vpx_codec.mk + CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS)) ++CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS)) + + include $(SRC_PATH_BARE)/vpx_mem/vpx_mem.mk + CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS)) +@@ -29,17 +37,17 @@ CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS)) + include $(SRC_PATH_BARE)/vpx_scale/vpx_scale.mk + CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS)) + ++include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk ++CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS)) ++ + + ifeq ($(CONFIG_VP8_ENCODER),yes) + VP8_PREFIX=vp8/ + include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx.mk + CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS)) + CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS)) +- CODEC_SRCS-yes += $(VP8_PREFIX)vp8cx.mk vpx/vp8.h vpx/vp8cx.h +- CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp8cx_arm.mk + INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h + INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/% +- CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h + CODEC_DOC_SECTIONS += vp8 vp8_encoder + endif + +@@ -48,10 +56,8 @@ ifeq ($(CONFIG_VP8_DECODER),yes) + include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx.mk + CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS)) + CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS)) +- CODEC_SRCS-yes += $(VP8_PREFIX)vp8dx.mk vpx/vp8.h vpx/vp8dx.h + INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h + INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/% +- CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h + CODEC_DOC_SECTIONS += vp8 vp8_decoder + endif + +@@ -66,6 +72,7 @@ endif + + ifeq ($(CONFIG_MSVS),yes) + CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd) ++GTEST_LIB=$(if $(CONFIG_STATIC_MSVCRT),gtestmt,gtestmd) + # This variable uses deferred expansion intentionally, since the results of + # $(wildcard) may change during the course of the Make. + VS_PLATFORMS = $(foreach d,$(wildcard */Release/$(CODEC_LIB).lib),$(word 1,$(subst /, ,$(d)))) +@@ -82,29 +89,10 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Release/%) + INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Debug/%) + endif + +-# If this is a universal (fat) binary, then all the subarchitectures have +-# already been built and our job is to stitch them together. The +-# BUILD_LIBVPX variable indicates whether we should be building +-# (compiling, linking) the library. The LIPO_LIBVPX variable indicates +-# that we're stitching. +-$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes) +- + CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh + CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.sh +-CODEC_SRCS-$(BUILD_LIBVPX) += vpx/vpx_integer.h +-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/asm_offsets.h +-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_timer.h +-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem.h + CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c + INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c +-ifeq ($(ARCH_X86)$(ARCH_X86_64),yes) +-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emms.asm +-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h +-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm +-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c +-endif +-CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c +-CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm.h + CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com + CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc + CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec +@@ -146,7 +134,7 @@ ifeq ($(CONFIG_MSVS),yes) + obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c + @cp $(SRC_PATH_BARE)/build/x86-msvs/obj_int_extract.bat . + @echo " [CREATE] $@" +- $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ ++ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ + --exe \ + --target=$(TOOLCHAIN) \ + --name=obj_int_extract \ +@@ -162,14 +150,14 @@ PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.bat + + vpx.def: $(call enabled,CODEC_EXPORTS) + @echo " [CREATE] $@" +- $(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\ ++ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\ + --name=vpx\ + --out=$@ $^ + CLEAN-OBJS += vpx.def + + vpx.vcproj: $(CODEC_SRCS) vpx.def + @echo " [CREATE] $@" +- $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ ++ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ + --lib \ + --target=$(TOOLCHAIN) \ + $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ +@@ -242,6 +230,7 @@ vpx.pc: config.mk libs.mk + $(qexec)echo 'Requires:' >> $@ + $(qexec)echo 'Conflicts:' >> $@ + $(qexec)echo 'Libs: -L$${libdir} -lvpx' >> $@ ++ $(qexec)echo 'Libs.private: -lm -lpthread' >> $@ + $(qexec)echo 'Cflags: -I$${includedir}' >> $@ + INSTALL-LIBS-yes += $(LIBSUBDIR)/pkgconfig/vpx.pc + INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc +@@ -284,38 +273,44 @@ OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU' + + ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC)) + $(BUILD_PFX)asm_com_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S +- LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ ++ @echo " [CREATE] $@" ++ $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ + $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S: $(VP8_PREFIX)common/asm_com_offsets.c + CLEAN-OBJS += $(BUILD_PFX)asm_com_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S + + $(BUILD_PFX)asm_enc_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S +- LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ ++ @echo " [CREATE] $@" ++ $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ + $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S: $(VP8_PREFIX)encoder/asm_enc_offsets.c + CLEAN-OBJS += $(BUILD_PFX)asm_enc_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S + + $(BUILD_PFX)asm_dec_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S +- LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ ++ @echo " [CREATE] $@" ++ $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ + $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S: $(VP8_PREFIX)decoder/asm_dec_offsets.c + CLEAN-OBJS += $(BUILD_PFX)asm_dec_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S + else + ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC)) + asm_com_offsets.asm: obj_int_extract + asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o +- ./obj_int_extract rvds $< $(ADS2GAS) > $@ ++ @echo " [CREATE] $@" ++ $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@ + OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o + CLEAN-OBJS += asm_com_offsets.asm + $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm + + asm_enc_offsets.asm: obj_int_extract + asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o +- ./obj_int_extract rvds $< $(ADS2GAS) > $@ ++ @echo " [CREATE] $@" ++ $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@ + OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o + CLEAN-OBJS += asm_enc_offsets.asm + $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm + + asm_dec_offsets.asm: obj_int_extract + asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o +- ./obj_int_extract rvds $< $(ADS2GAS) > $@ ++ @echo " [CREATE] $@" ++ $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@ + OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o + CLEAN-OBJS += asm_dec_offsets.asm + $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm +@@ -328,7 +323,6 @@ CLEAN-OBJS += $(BUILD_PFX)vpx_version.h + # + # Rule to generate runtime cpu detection files + # +-$(OBJS-yes:.o=.d): $(BUILD_PFX)vpx_rtcd.h + $(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS))) + @echo " [CREATE] $@" + $(qexec)$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$(TGT_ISA) \ +@@ -337,25 +331,43 @@ $(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_S + $(RTCD_OPTIONS) $^ > $@ + CLEAN-OBJS += $(BUILD_PFX)vpx_rtcd.h + +-CODEC_DOC_SRCS += vpx/vpx_codec.h \ +- vpx/vpx_decoder.h \ +- vpx/vpx_encoder.h \ +- vpx/vpx_image.h +- + ## + ## libvpx test directives + ## +- + ifeq ($(CONFIG_UNIT_TESTS),yes) ++LIBVPX_TEST_DATA_PATH ?= . ++ ++include $(SRC_PATH_BARE)/test/test.mk ++LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS)) ++LIBVPX_TEST_BINS=./test_libvpx ++LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\ ++ $(call enabled,LIBVPX_TEST_DATA)) ++libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1) ++ ++$(LIBVPX_TEST_DATA): ++ @echo " [DOWNLOAD] $@" ++ $(qexec)trap 'rm -f $@' INT TERM &&\ ++ curl -L -o $@ $(call libvpx_test_data_url,$(@F)) ++ ++testdata:: $(LIBVPX_TEST_DATA) ++ $(qexec)if [ -x "$$(which sha1sum)" ]; then\ ++ echo "Checking test data:";\ ++ if [ -n "$(LIBVPX_TEST_DATA)" ]; then\ ++ for f in $(call enabled,LIBVPX_TEST_DATA); do\ ++ grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\ ++ (cd $(LIBVPX_TEST_DATA_PATH); sha1sum -c);\ ++ done; \ ++ fi; \ ++ else\ ++ echo "Skipping test data integrity check, sha1sum not found.";\ ++ fi ++ + ifeq ($(CONFIG_EXTERNAL_BUILD),yes) + ifeq ($(CONFIG_MSVS),yes) + +-LIBVPX_TEST_SRCS=$(filter %_test.cc,$(call enabled,CODEC_SRCS)) +-LIBVPX_TEST_BINS=$(sort $(LIBVPX_TEST_SRCS:.cc.o=)) +- + gtest.vcproj: $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc + @echo " [CREATE] $@" +- $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ ++ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ + --lib \ + --target=$(TOOLCHAIN) \ + $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ +@@ -368,27 +380,22 @@ gtest.vcproj: $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc + + PROJECTS-$(CONFIG_MSVS) += gtest.vcproj + +-define unit_test_vcproj_template +-$(notdir $(1:.cc=.vcproj)): $(SRC_PATH_BARE)/$(1) +- @echo " [vcproj] $$@" +- $$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh\ +- --exe\ +- --target=$$(TOOLCHAIN)\ +- --name=$(notdir $(1:.cc=))\ +- --ver=$$(CONFIG_VS_VERSION)\ +- $$(if $$(CONFIG_STATIC_MSVCRT),--static-crt) \ +- --out=$$@ $$(INTERNAL_CFLAGS) $$(CFLAGS) \ ++test_libvpx.vcproj: $(LIBVPX_TEST_SRCS) ++ @echo " [CREATE] $@" ++ $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ ++ --exe \ ++ --target=$(TOOLCHAIN) \ ++ --name=test_libvpx \ ++ --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \ ++ --ver=$(CONFIG_VS_VERSION) \ ++ $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ ++ --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \ + -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \ +- -L. -lvpxmt -lwinmm -lgtestmt $$^ +-endef ++ -L. -l$(CODEC_LIB) -lwinmm -l$(GTEST_LIB) $^ + +-$(foreach proj,$(LIBVPX_TEST_BINS),\ +- $(eval $(call unit_test_vcproj_template,$(proj)))) ++PROJECTS-$(CONFIG_MSVS) += test_libvpx.vcproj + +-PROJECTS-$(CONFIG_MSVS) += $(foreach proj,$(LIBVPX_TEST_BINS),\ +- $(notdir $(proj:.cc=.vcproj))) +- +-test:: ++test:: testdata + @set -e; for t in $(addprefix Win32/Release/,$(notdir $(LIBVPX_TEST_BINS:.cc=.exe))); do $$t; done + endif + else +@@ -396,28 +403,35 @@ else + include $(SRC_PATH_BARE)/third_party/googletest/gtest.mk + GTEST_SRCS := $(addprefix third_party/googletest/src/,$(call enabled,GTEST_SRCS)) + GTEST_OBJS=$(call objs,$(GTEST_SRCS)) +-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src +-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include ++$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src ++$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include + OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS) + LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a + $(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS) + +-LIBVPX_TEST_SRCS=$(filter %_test.cc,$(call enabled,CODEC_SRCS)) +-LIBVPX_TEST_OBJS=$(call objs,$(LIBVPX_TEST_SRCS)) +-$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src +-$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include +-LIBVPX_TEST_BINS=$(sort $(LIBVPX_TEST_OBJS:.cc.o=)) ++LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS))) ++$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src ++$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include + OBJS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_OBJS) ++BINS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_BINS) ++ ++# Install test sources only if codec source is included ++INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\ ++ $(shell find $(SRC_PATH_BARE)/third_party/googletest -type f)) ++INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS) + ++CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx) ++CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a) + $(foreach bin,$(LIBVPX_TEST_BINS),\ +- $(if $(BUILD_LIBVPX),$(eval $(bin): libvpx.a libgtest.a ))\ ++ $(if $(BUILD_LIBVPX),$(eval $(bin): \ ++ lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\ + $(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\ +- $(bin).cc.o \ ++ $(LIBVPX_TEST_OBJS) \ + -L. -lvpx -lgtest -lpthread -lm)\ + )))\ + $(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\ + +-test:: $(LIBVPX_TEST_BINS) ++test:: $(LIBVPX_TEST_BINS) testdata + @set -e; for t in $(LIBVPX_TEST_BINS); do $$t; done + + endif +@@ -435,3 +449,6 @@ libs.doxy: $(CODEC_DOC_SRCS) + @echo "PREDEFINED = VPX_CODEC_DISABLE_COMPAT" >> $@ + @echo "INCLUDE_PATH += ." >> $@; + @echo "ENABLED_SECTIONS += $(sort $(CODEC_DOC_SECTIONS))" >> $@ ++ ++## Generate vpx_rtcd.h for all objects ++$(OBJS-yes:.o=.d): $(BUILD_PFX)vpx_rtcd.h +diff --git a/nestegg/src/nestegg.c b/nestegg/src/nestegg.c +index 63a0e83..cc87788 100644 +--- a/nestegg/src/nestegg.c ++++ b/nestegg/src/nestegg.c +@@ -1272,7 +1272,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac + if (total > block_size) + return -1; + +- entry = ne_find_track_entry(ctx, track - 1); ++ entry = ne_find_track_entry(ctx, (unsigned int)(track - 1)); + if (!entry) + return -1; + +@@ -1291,7 +1291,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac + + pkt = ne_alloc(sizeof(*pkt)); + pkt->track = track - 1; +- pkt->timecode = abs_timecode * tc_scale * track_scale; ++ pkt->timecode = (uint64_t)(abs_timecode * tc_scale * track_scale); + + ctx->log(ctx, NESTEGG_LOG_DEBUG, "%sblock t %lld pts %f f %llx frames: %llu", + block_id == ID_BLOCK ? "" : "simple", pkt->track, pkt->timecode / 1e9, flags, frames); +@@ -1774,35 +1774,35 @@ nestegg_track_video_params(nestegg * ctx, unsigned int track, + + if (ne_get_uint(entry->video.pixel_width, &value) != 0) + return -1; +- params->width = value; ++ params->width = (unsigned int)value; + + if (ne_get_uint(entry->video.pixel_height, &value) != 0) + return -1; +- params->height = value; ++ params->height = (unsigned int)value; + + value = 0; + ne_get_uint(entry->video.pixel_crop_bottom, &value); +- params->crop_bottom = value; ++ params->crop_bottom = (unsigned int)value; + + value = 0; + ne_get_uint(entry->video.pixel_crop_top, &value); +- params->crop_top = value; ++ params->crop_top = (unsigned int)value; + + value = 0; + ne_get_uint(entry->video.pixel_crop_left, &value); +- params->crop_left = value; ++ params->crop_left = (unsigned int)value; + + value = 0; + ne_get_uint(entry->video.pixel_crop_right, &value); +- params->crop_right = value; ++ params->crop_right = (unsigned int)value; + + value = params->width; + ne_get_uint(entry->video.display_width, &value); +- params->display_width = value; ++ params->display_width = (unsigned int)value; + + value = params->height; + ne_get_uint(entry->video.display_height, &value); +- params->display_height = value; ++ params->display_height = (unsigned int)value; + + return 0; + } +@@ -1828,11 +1828,11 @@ nestegg_track_audio_params(nestegg * ctx, unsigned int track, + + value = 1; + ne_get_uint(entry->audio.channels, &value); +- params->channels = value; ++ params->channels = (unsigned int)value; + + value = 16; + ne_get_uint(entry->audio.bit_depth, &value); +- params->depth = value; ++ params->depth = (unsigned int)value; + + return 0; + } +@@ -1888,7 +1888,7 @@ nestegg_free_packet(nestegg_packet * pkt) + int + nestegg_packet_track(nestegg_packet * pkt, unsigned int * track) + { +- *track = pkt->track; ++ *track = (unsigned int)pkt->track; + return 0; + } + +diff --git a/solution.mk b/solution.mk +index 2de1d8d..948305f 100644 +--- a/solution.mk ++++ b/solution.mk +@@ -8,18 +8,19 @@ + ## be found in the AUTHORS file in the root of the source tree. + ## + ++# libvpx reverse dependencies (targets that depend on libvpx) ++VPX_NONDEPS=$(addsuffix .vcproj,vpx gtest obj_int_extract) ++VPX_RDEPS=$(foreach vcp,\ ++ $(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.vcproj=):vpx) + + vpx.sln: $(wildcard *.vcproj) + @echo " [CREATE] $@" + $(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \ +- $(if $(filter %vpx.vcproj,$^),\ +- $(foreach vcp,$(filter-out %vpx.vcproj %gtest.vcproj %obj_int_extract.vcproj,$^),\ +- --dep=$(vcp:.vcproj=):vpx) \ +- $(foreach vcp,$(filter %_test.vcproj,$^),\ +- --dep=$(vcp:.vcproj=):gtest)) \ +- --dep=vpx:obj_int_extract \ +- --ver=$(CONFIG_VS_VERSION)\ +- --out=$@ $^ ++ $(if $(filter vpx.vcproj,$^),$(VPX_RDEPS)) \ ++ --dep=vpx:obj_int_extract \ ++ --dep=test_libvpx:gtest \ ++ --ver=$(CONFIG_VS_VERSION)\ ++ --out=$@ $^ + vpx.sln.mk: vpx.sln + @true + +diff --git a/test/acm_random.h b/test/acm_random.h +new file mode 100644 +index 0000000..514894e +--- /dev/null ++++ b/test/acm_random.h +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++#ifndef LIBVPX_TEST_ACM_RANDOM_H_ ++#define LIBVPX_TEST_ACM_RANDOM_H_ ++ ++#include ++ ++#include "vpx/vpx_integer.h" ++ ++namespace libvpx_test { ++ ++class ACMRandom { ++ public: ++ ACMRandom() { ++ Reset(DeterministicSeed()); ++ } ++ ++ explicit ACMRandom(int seed) { ++ Reset(seed); ++ } ++ ++ void Reset(int seed) { ++ srand(seed); ++ } ++ ++ uint8_t Rand8(void) { ++ return (rand() >> 8) & 0xff; ++ } ++ ++ int PseudoUniform(int range) { ++ return (rand() >> 8) % range; ++ } ++ ++ int operator()(int n) { ++ return PseudoUniform(n); ++ } ++ ++ static int DeterministicSeed(void) { ++ return 0xbaba; ++ } ++}; ++ ++} // namespace libvpx_test ++ ++#endif // LIBVPX_TEST_ACM_RANDOM_H_ +diff --git a/test/altref_test.cc b/test/altref_test.cc +new file mode 100644 +index 0000000..ca05577 +--- /dev/null ++++ b/test/altref_test.cc +@@ -0,0 +1,71 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#include "third_party/googletest/src/include/gtest/gtest.h" ++#include "test/encode_test_driver.h" ++#include "test/i420_video_source.h" ++ ++namespace { ++ ++// lookahead range: [kLookAheadMin, kLookAheadMax). ++const int kLookAheadMin = 5; ++const int kLookAheadMax = 26; ++ ++class AltRefTest : public libvpx_test::EncoderTest, ++ public ::testing::TestWithParam { ++ protected: ++ AltRefTest() : altref_count_(0) {} ++ virtual ~AltRefTest() {} ++ ++ virtual void SetUp() { ++ InitializeConfig(); ++ SetMode(libvpx_test::kTwoPassGood); ++ } ++ ++ virtual void BeginPassHook(unsigned int pass) { ++ altref_count_ = 0; ++ } ++ ++ virtual bool Continue() const { ++ return !HasFatalFailure() && !abort_; ++ } ++ ++ virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, ++ libvpx_test::Encoder *encoder) { ++ if (video->frame() == 1) { ++ encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); ++ encoder->Control(VP8E_SET_CPUUSED, 3); ++ } ++ } ++ ++ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { ++ if (pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE) ++altref_count_; ++ } ++ ++ int altref_count() const { return altref_count_; } ++ ++ private: ++ int altref_count_; ++}; ++ ++TEST_P(AltRefTest, MonotonicTimestamps) { ++ const vpx_rational timebase = { 33333333, 1000000000 }; ++ cfg_.g_timebase = timebase; ++ cfg_.rc_target_bitrate = 1000; ++ cfg_.g_lag_in_frames = GetParam(); ++ ++ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, ++ timebase.den, timebase.num, 0, 30); ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ EXPECT_GE(altref_count(), 1); ++} ++ ++INSTANTIATE_TEST_CASE_P(NonZeroLag, AltRefTest, ++ ::testing::Range(kLookAheadMin, kLookAheadMax)); ++} // namespace +diff --git a/test/boolcoder_test.cc b/test/boolcoder_test.cc +new file mode 100644 +index 0000000..4e21be8 +--- /dev/null ++++ b/test/boolcoder_test.cc +@@ -0,0 +1,90 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++extern "C" { ++#include "vp8/encoder/boolhuff.h" ++#include "vp8/decoder/dboolhuff.h" ++} ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "test/acm_random.h" ++#include "third_party/googletest/src/include/gtest/gtest.h" ++#include "vpx/vpx_integer.h" ++ ++namespace { ++const int num_tests = 10; ++} // namespace ++ ++using libvpx_test::ACMRandom; ++ ++TEST(VP8, TestBitIO) { ++ ACMRandom rnd(ACMRandom::DeterministicSeed()); ++ for (int n = 0; n < num_tests; ++n) { ++ for (int method = 0; method <= 7; ++method) { // we generate various proba ++ const int bits_to_test = 1000; ++ uint8_t probas[bits_to_test]; ++ ++ for (int i = 0; i < bits_to_test; ++i) { ++ const int parity = i & 1; ++ probas[i] = ++ (method == 0) ? 0 : (method == 1) ? 255 : ++ (method == 2) ? 128 : ++ (method == 3) ? rnd.Rand8() : ++ (method == 4) ? (parity ? 0 : 255) : ++ // alternate between low and high proba: ++ (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) : ++ (method == 6) ? ++ (parity ? rnd(64) : 255 - rnd(64)) : ++ (parity ? rnd(32) : 255 - rnd(32)); ++ } ++ for (int bit_method = 0; bit_method <= 3; ++bit_method) { ++ const int random_seed = 6432; ++ const int buffer_size = 10000; ++ ACMRandom bit_rnd(random_seed); ++ BOOL_CODER bw; ++ uint8_t bw_buffer[buffer_size]; ++ vp8_start_encode(&bw, bw_buffer, bw_buffer + buffer_size); ++ ++ int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0; ++ for (int i = 0; i < bits_to_test; ++i) { ++ if (bit_method == 2) { ++ bit = (i & 1); ++ } else if (bit_method == 3) { ++ bit = bit_rnd(2); ++ } ++ vp8_encode_bool(&bw, bit, static_cast(probas[i])); ++ } ++ ++ vp8_stop_encode(&bw); ++ ++ BOOL_DECODER br; ++ vp8dx_start_decode(&br, bw_buffer, buffer_size); ++ bit_rnd.Reset(random_seed); ++ for (int i = 0; i < bits_to_test; ++i) { ++ if (bit_method == 2) { ++ bit = (i & 1); ++ } else if (bit_method == 3) { ++ bit = bit_rnd(2); ++ } ++ GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit) ++ << "pos: "<< i << " / " << bits_to_test ++ << " bit_method: " << bit_method ++ << " method: " << method; ++ } ++ } ++ } ++ } ++} +diff --git a/test/config_test.cc b/test/config_test.cc +new file mode 100644 +index 0000000..c4da46e +--- /dev/null ++++ b/test/config_test.cc +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#include "third_party/googletest/src/include/gtest/gtest.h" ++#include "test/encode_test_driver.h" ++#include "test/video_source.h" ++ ++namespace { ++ ++class ConfigTest : public ::libvpx_test::EncoderTest, ++ public ::testing::TestWithParam { ++ public: ++ ConfigTest() : frame_count_in_(0), frame_count_out_(0), frame_count_max_(0) {} ++ ++ protected: ++ virtual void SetUp() { ++ InitializeConfig(); ++ SetMode(GetParam()); ++ } ++ ++ virtual void BeginPassHook(unsigned int /*pass*/) { ++ frame_count_in_ = 0; ++ frame_count_out_ = 0; ++ } ++ ++ virtual void PreEncodeFrameHook(libvpx_test::VideoSource* /*video*/) { ++ ++frame_count_in_; ++ abort_ |= (frame_count_in_ >= frame_count_max_); ++ } ++ ++ virtual void FramePktHook(const vpx_codec_cx_pkt_t* /*pkt*/) { ++ ++frame_count_out_; ++ } ++ ++ virtual bool Continue() const { ++ return !HasFatalFailure() && !abort_; ++ } ++ ++ unsigned int frame_count_in_; ++ unsigned int frame_count_out_; ++ unsigned int frame_count_max_; ++}; ++ ++TEST_P(ConfigTest, LagIsDisabled) { ++ frame_count_max_ = 2; ++ cfg_.g_lag_in_frames = 15; ++ ++ libvpx_test::DummyVideoSource video; ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ ++ EXPECT_EQ(frame_count_in_, frame_count_out_); ++} ++ ++INSTANTIATE_TEST_CASE_P(OnePassModes, ConfigTest, ONE_PASS_TEST_MODES); ++} // namespace +diff --git a/test/cq_test.cc b/test/cq_test.cc +new file mode 100644 +index 0000000..42ee2a2 +--- /dev/null ++++ b/test/cq_test.cc +@@ -0,0 +1,106 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#include ++#include "third_party/googletest/src/include/gtest/gtest.h" ++#include "test/encode_test_driver.h" ++#include "test/i420_video_source.h" ++ ++// CQ level range: [kCQLevelMin, kCQLevelMax). ++const int kCQLevelMin = 4; ++const int kCQLevelMax = 63; ++const int kCQLevelStep = 8; ++const int kCQTargetBitrate = 2000; ++ ++namespace { ++ ++class CQTest : public libvpx_test::EncoderTest, ++ public ::testing::TestWithParam { ++ protected: ++ CQTest() : cq_level_(GetParam()) { init_flags_ = VPX_CODEC_USE_PSNR; } ++ virtual ~CQTest() {} ++ ++ virtual void SetUp() { ++ InitializeConfig(); ++ SetMode(libvpx_test::kTwoPassGood); ++ } ++ ++ virtual void BeginPassHook(unsigned int /*pass*/) { ++ file_size_ = 0; ++ psnr_ = 0.0; ++ n_frames_ = 0; ++ } ++ ++ virtual bool Continue() const { ++ return !HasFatalFailure() && !abort_; ++ } ++ ++ virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, ++ libvpx_test::Encoder *encoder) { ++ if (video->frame() == 1) { ++ if (cfg_.rc_end_usage == VPX_CQ) { ++ encoder->Control(VP8E_SET_CQ_LEVEL, cq_level_); ++ } ++ encoder->Control(VP8E_SET_CPUUSED, 3); ++ } ++ } ++ ++ virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { ++ psnr_ += pow(10.0, pkt->data.psnr.psnr[0] / 10.0); ++ n_frames_++; ++ } ++ ++ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { ++ file_size_ += pkt->data.frame.sz; ++ } ++ ++ double GetLinearPSNROverBitrate() const { ++ double avg_psnr = log10(psnr_ / n_frames_) * 10.0; ++ return pow(10.0, avg_psnr / 10.0) / file_size_; ++ } ++ ++ int file_size() const { return file_size_; } ++ int n_frames() const { return n_frames_; } ++ ++ private: ++ int cq_level_; ++ int file_size_; ++ double psnr_; ++ int n_frames_; ++}; ++ ++int prev_actual_bitrate = kCQTargetBitrate; ++TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) { ++ const vpx_rational timebase = { 33333333, 1000000000 }; ++ cfg_.g_timebase = timebase; ++ cfg_.rc_target_bitrate = kCQTargetBitrate; ++ cfg_.g_lag_in_frames = 25; ++ ++ cfg_.rc_end_usage = VPX_CQ; ++ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, ++ timebase.den, timebase.num, 0, 30); ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ const double cq_psnr_lin = GetLinearPSNROverBitrate(); ++ const int cq_actual_bitrate = file_size() * 8 * 30 / (n_frames() * 1000); ++ EXPECT_LE(cq_actual_bitrate, kCQTargetBitrate); ++ EXPECT_LE(cq_actual_bitrate, prev_actual_bitrate); ++ prev_actual_bitrate = cq_actual_bitrate; ++ ++ // try targeting the approximate same bitrate with VBR mode ++ cfg_.rc_end_usage = VPX_VBR; ++ cfg_.rc_target_bitrate = cq_actual_bitrate; ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ const double vbr_psnr_lin = GetLinearPSNROverBitrate(); ++ EXPECT_GE(cq_psnr_lin, vbr_psnr_lin); ++} ++ ++INSTANTIATE_TEST_CASE_P(CQLevelRange, CQTest, ++ ::testing::Range(kCQLevelMin, kCQLevelMax, ++ kCQLevelStep)); ++} // namespace +diff --git a/test/datarate_test.cc b/test/datarate_test.cc +new file mode 100644 +index 0000000..6fbcb64 +--- /dev/null ++++ b/test/datarate_test.cc +@@ -0,0 +1,178 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#include "test/encode_test_driver.h" ++#include "test/i420_video_source.h" ++#include "third_party/googletest/src/include/gtest/gtest.h" ++namespace { ++ ++class DatarateTest : public ::libvpx_test::EncoderTest, ++ public ::testing::TestWithParam { ++ protected: ++ virtual void SetUp() { ++ InitializeConfig(); ++ SetMode(GetParam()); ++ ResetModel(); ++ } ++ ++ virtual void ResetModel() { ++ last_pts_ = 0; ++ bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz; ++ frame_number_ = 0; ++ first_drop_ = 0; ++ bits_total_ = 0; ++ duration_ = 0.0; ++ } ++ ++ virtual bool Continue() const { ++ return !HasFatalFailure() && !abort_; ++ } ++ ++ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ++ ::libvpx_test::Encoder *encoder) { ++ const vpx_rational_t tb = video->timebase(); ++ timebase_ = static_cast(tb.num) / tb.den; ++ duration_ = 0; ++ } ++ ++ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { ++ // Time since last timestamp = duration. ++ vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_; ++ ++ // TODO(jimbankoski): Remove these lines when the issue: ++ // http://code.google.com/p/webm/issues/detail?id=496 is fixed. ++ // For now the codec assumes buffer starts at starting buffer rate ++ // plus one frame's time. ++ if (last_pts_ == 0) ++ duration = 1; ++ ++ // Add to the buffer the bits we'd expect from a constant bitrate server. ++ bits_in_buffer_model_ += duration * timebase_ * cfg_.rc_target_bitrate ++ * 1000; ++ ++ /* Test the buffer model here before subtracting the frame. Do so because ++ * the way the leaky bucket model works in libvpx is to allow the buffer to ++ * empty - and then stop showing frames until we've got enough bits to ++ * show one. As noted in comment below (issue 495), this does not currently ++ * apply to key frames. For now exclude key frames in condition below. */ ++ bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true: false; ++ if (!key_frame) { ++ ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame " ++ << pkt->data.frame.pts; ++ } ++ ++ const int frame_size_in_bits = pkt->data.frame.sz * 8; ++ ++ // Subtract from the buffer the bits associated with a played back frame. ++ bits_in_buffer_model_ -= frame_size_in_bits; ++ ++ // Update the running total of bits for end of test datarate checks. ++ bits_total_ += frame_size_in_bits ; ++ ++ // If first drop not set and we have a drop set it to this time. ++ if (!first_drop_ && duration > 1) ++ first_drop_ = last_pts_ + 1; ++ ++ // Update the most recent pts. ++ last_pts_ = pkt->data.frame.pts; ++ ++ // We update this so that we can calculate the datarate minus the last ++ // frame encoded in the file. ++ bits_in_last_frame_ = frame_size_in_bits; ++ ++ ++frame_number_; ++ } ++ ++ virtual void EndPassHook(void) { ++ if (bits_total_) { ++ const double file_size_in_kb = bits_total_ / 1000; /* bits per kilobit */ ++ ++ duration_ = (last_pts_ + 1) * timebase_; ++ ++ // Effective file datarate includes the time spent prebuffering. ++ effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0 ++ / (cfg_.rc_buf_initial_sz / 1000.0 + duration_); ++ ++ file_datarate_ = file_size_in_kb / duration_; ++ } ++ } ++ ++ vpx_codec_pts_t last_pts_; ++ int bits_in_buffer_model_; ++ double timebase_; ++ int frame_number_; ++ vpx_codec_pts_t first_drop_; ++ int64_t bits_total_; ++ double duration_; ++ double file_datarate_; ++ double effective_datarate_; ++ int bits_in_last_frame_; ++}; ++ ++TEST_P(DatarateTest, BasicBufferModel) { ++ cfg_.rc_buf_initial_sz = 500; ++ cfg_.rc_dropframe_thresh = 1; ++ cfg_.rc_max_quantizer = 56; ++ cfg_.rc_end_usage = VPX_CBR; ++ // 2 pass cbr datarate control has a bug hidden by the small # of ++ // frames selected in this encode. The problem is that even if the buffer is ++ // negative we produce a keyframe on a cutscene. Ignoring datarate ++ // constraints ++ // TODO(jimbankoski): ( Fix when issue ++ // http://code.google.com/p/webm/issues/detail?id=495 is addressed. ) ++ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, ++ 30, 1, 0, 140); ++ ++ // There is an issue for low bitrates in real-time mode, where the ++ // effective_datarate slightly overshoots the target bitrate. ++ // This is same the issue as noted about (#495). ++ // TODO(jimbankoski/marpan): Update test to run for lower bitrates (< 100), ++ // when the issue is resolved. ++ for (int i = 100; i < 800; i += 200) { ++ cfg_.rc_target_bitrate = i; ++ ResetModel(); ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_) ++ << " The datarate for the file exceeds the target!"; ++ ++ ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3) ++ << " The datarate for the file missed the target!"; ++ } ++} ++ ++TEST_P(DatarateTest, ChangingDropFrameThresh) { ++ cfg_.rc_buf_initial_sz = 500; ++ cfg_.rc_max_quantizer = 36; ++ cfg_.rc_end_usage = VPX_CBR; ++ cfg_.rc_target_bitrate = 200; ++ cfg_.kf_mode = VPX_KF_DISABLED; ++ ++ const int frame_count = 40; ++ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, ++ 30, 1, 0, frame_count); ++ ++ // Here we check that the first dropped frame gets earlier and earlier ++ // as the drop frame threshold is increased. ++ ++ const int kDropFrameThreshTestStep = 30; ++ vpx_codec_pts_t last_drop = frame_count; ++ for (int i = 1; i < 91; i += kDropFrameThreshTestStep) { ++ cfg_.rc_dropframe_thresh = i; ++ ResetModel(); ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ ASSERT_LE(first_drop_, last_drop) ++ << " The first dropped frame for drop_thresh " << i ++ << " > first dropped frame for drop_thresh " ++ << i - kDropFrameThreshTestStep; ++ last_drop = first_drop_; ++ } ++} ++ ++INSTANTIATE_TEST_CASE_P(AllModes, DatarateTest, ALL_TEST_MODES); ++} // namespace +diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc +new file mode 100644 +index 0000000..84afe7f +--- /dev/null ++++ b/test/decode_test_driver.cc +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#include "test/decode_test_driver.h" ++#include "third_party/googletest/src/include/gtest/gtest.h" ++#include "test/register_state_check.h" ++#include "test/video_source.h" ++ ++namespace libvpx_test { ++#if CONFIG_VP8_DECODER ++void Decoder::DecodeFrame(const uint8_t *cxdata, int size) { ++ if (!decoder_.priv) { ++ const vpx_codec_err_t res_init = vpx_codec_dec_init(&decoder_, ++ &vpx_codec_vp8_dx_algo, ++ &cfg_, 0); ++ ASSERT_EQ(VPX_CODEC_OK, res_init) << DecodeError(); ++ } ++ ++ vpx_codec_err_t res_dec; ++ REGISTER_STATE_CHECK(res_dec = vpx_codec_decode(&decoder_, ++ cxdata, size, NULL, 0)); ++ ASSERT_EQ(VPX_CODEC_OK, res_dec) << DecodeError(); ++} ++ ++void DecoderTest::RunLoop(CompressedVideoSource *video) { ++ vpx_codec_dec_cfg_t dec_cfg = {0}; ++ Decoder decoder(dec_cfg, 0); ++ ++ // Decode frames. ++ for (video->Begin(); video->cxdata(); video->Next()) { ++ decoder.DecodeFrame(video->cxdata(), video->frame_size()); ++ ++ DxDataIterator dec_iter = decoder.GetDxData(); ++ const vpx_image_t *img = NULL; ++ ++ // Get decompressed data ++ while ((img = dec_iter.Next())) ++ DecompressedFrameHook(*img, video->frame_number()); ++ } ++} ++#endif ++} // namespace libvpx_test +diff --git a/test/decode_test_driver.h b/test/decode_test_driver.h +new file mode 100644 +index 0000000..6408bee +--- /dev/null ++++ b/test/decode_test_driver.h +@@ -0,0 +1,97 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++#ifndef TEST_DECODE_TEST_DRIVER_H_ ++#define TEST_DECODE_TEST_DRIVER_H_ ++#include ++#include "third_party/googletest/src/include/gtest/gtest.h" ++#include "vpx_config.h" ++#include "vpx/vpx_decoder.h" ++#include "vpx/vp8dx.h" ++ ++namespace libvpx_test { ++ ++class CompressedVideoSource; ++ ++// Provides an object to handle decoding output ++class DxDataIterator { ++ public: ++ explicit DxDataIterator(vpx_codec_ctx_t *decoder) ++ : decoder_(decoder), iter_(NULL) {} ++ ++ const vpx_image_t *Next() { ++ return vpx_codec_get_frame(decoder_, &iter_); ++ } ++ ++ private: ++ vpx_codec_ctx_t *decoder_; ++ vpx_codec_iter_t iter_; ++}; ++ ++// Provides a simplified interface to manage one video decoding. ++// ++// TODO: similar to Encoder class, the exact services should be ++// added as more tests are added. ++class Decoder { ++ public: ++ Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline) ++ : cfg_(cfg), deadline_(deadline) { ++ memset(&decoder_, 0, sizeof(decoder_)); ++ } ++ ++ ~Decoder() { ++ vpx_codec_destroy(&decoder_); ++ } ++ ++ void DecodeFrame(const uint8_t *cxdata, int size); ++ ++ DxDataIterator GetDxData() { ++ return DxDataIterator(&decoder_); ++ } ++ ++ void set_deadline(unsigned long deadline) { ++ deadline_ = deadline; ++ } ++ ++ void Control(int ctrl_id, int arg) { ++ const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg); ++ ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError(); ++ } ++ ++ protected: ++ const char *DecodeError() { ++ const char *detail = vpx_codec_error_detail(&decoder_); ++ return detail ? detail : vpx_codec_error(&decoder_); ++ } ++ ++ vpx_codec_ctx_t decoder_; ++ vpx_codec_dec_cfg_t cfg_; ++ unsigned int deadline_; ++}; ++ ++// Common test functionality for all Decoder tests. ++class DecoderTest { ++ public: ++ // Main loop. ++ virtual void RunLoop(CompressedVideoSource *video); ++ ++ // Hook to be called on every decompressed frame. ++ virtual void DecompressedFrameHook(const vpx_image_t& img, ++ const unsigned int frame_number) {} ++ ++ protected: ++ DecoderTest() {} ++ ++ virtual ~DecoderTest() {} ++}; ++ ++} // namespace libvpx_test ++ ++#endif // TEST_DECODE_TEST_DRIVER_H_ +diff --git a/test/encode_test_driver.cc b/test/encode_test_driver.cc +new file mode 100644 +index 0000000..56339ca +--- /dev/null ++++ b/test/encode_test_driver.cc +@@ -0,0 +1,206 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#include "vpx_config.h" ++#include "test/encode_test_driver.h" ++#if CONFIG_VP8_DECODER ++#include "test/decode_test_driver.h" ++#endif ++#include "test/register_state_check.h" ++#include "test/video_source.h" ++#include "third_party/googletest/src/include/gtest/gtest.h" ++ ++namespace libvpx_test { ++void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) { ++ if (video->img()) ++ EncodeFrameInternal(*video, frame_flags); ++ else ++ Flush(); ++ ++ // Handle twopass stats ++ CxDataIterator iter = GetCxData(); ++ ++ while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) { ++ if (pkt->kind != VPX_CODEC_STATS_PKT) ++ continue; ++ ++ stats_->Append(*pkt); ++ } ++} ++ ++void Encoder::EncodeFrameInternal(const VideoSource &video, ++ const unsigned long frame_flags) { ++ vpx_codec_err_t res; ++ const vpx_image_t *img = video.img(); ++ ++ // Handle first frame initialization ++ if (!encoder_.priv) { ++ cfg_.g_w = img->d_w; ++ cfg_.g_h = img->d_h; ++ cfg_.g_timebase = video.timebase(); ++ cfg_.rc_twopass_stats_in = stats_->buf(); ++ res = vpx_codec_enc_init(&encoder_, &vpx_codec_vp8_cx_algo, &cfg_, ++ init_flags_); ++ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); ++ } ++ ++ // Handle frame resizing ++ if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) { ++ cfg_.g_w = img->d_w; ++ cfg_.g_h = img->d_h; ++ res = vpx_codec_enc_config_set(&encoder_, &cfg_); ++ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); ++ } ++ ++ // Encode the frame ++ REGISTER_STATE_CHECK( ++ res = vpx_codec_encode(&encoder_, ++ video.img(), video.pts(), video.duration(), ++ frame_flags, deadline_)); ++ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); ++} ++ ++void Encoder::Flush() { ++ const vpx_codec_err_t res = vpx_codec_encode(&encoder_, NULL, 0, 0, 0, ++ deadline_); ++ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); ++} ++ ++void EncoderTest::SetMode(TestMode mode) { ++ switch (mode) { ++ case kRealTime: ++ deadline_ = VPX_DL_REALTIME; ++ break; ++ ++ case kOnePassGood: ++ case kTwoPassGood: ++ deadline_ = VPX_DL_GOOD_QUALITY; ++ break; ++ ++ case kOnePassBest: ++ case kTwoPassBest: ++ deadline_ = VPX_DL_BEST_QUALITY; ++ break; ++ ++ default: ++ ASSERT_TRUE(false) << "Unexpected mode " << mode; ++ } ++ ++ if (mode == kTwoPassGood || mode == kTwoPassBest) ++ passes_ = 2; ++ else ++ passes_ = 1; ++} ++// The function should return "true" most of the time, therefore no early ++// break-out is implemented within the match checking process. ++static bool compare_img(const vpx_image_t *img1, ++ const vpx_image_t *img2) { ++ bool match = (img1->fmt == img2->fmt) && ++ (img1->d_w == img2->d_w) && ++ (img1->d_h == img2->d_h); ++ ++ const unsigned int width_y = img1->d_w; ++ const unsigned int height_y = img1->d_h; ++ unsigned int i; ++ for (i = 0; i < height_y; ++i) ++ match = ( memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y], ++ img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y], ++ width_y) == 0) && match; ++ const unsigned int width_uv = (img1->d_w + 1) >> 1; ++ const unsigned int height_uv = (img1->d_h + 1) >> 1; ++ for (i = 0; i < height_uv; ++i) ++ match = ( memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U], ++ img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U], ++ width_uv) == 0) && match; ++ for (i = 0; i < height_uv; ++i) ++ match = ( memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V], ++ img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V], ++ width_uv) == 0) && match; ++ return match; ++} ++ ++void EncoderTest::RunLoop(VideoSource *video) { ++#if CONFIG_VP8_DECODER ++ vpx_codec_dec_cfg_t dec_cfg = {0}; ++#endif ++ ++ stats_.Reset(); ++ ++ for (unsigned int pass = 0; pass < passes_; pass++) { ++ last_pts_ = 0; ++ ++ if (passes_ == 1) ++ cfg_.g_pass = VPX_RC_ONE_PASS; ++ else if (pass == 0) ++ cfg_.g_pass = VPX_RC_FIRST_PASS; ++ else ++ cfg_.g_pass = VPX_RC_LAST_PASS; ++ ++ BeginPassHook(pass); ++ Encoder encoder(cfg_, deadline_, init_flags_, &stats_); ++#if CONFIG_VP8_DECODER ++ Decoder decoder(dec_cfg, 0); ++ bool has_cxdata = false; ++#endif ++ bool again; ++ for (again = true, video->Begin(); again; video->Next()) { ++ again = video->img() != NULL; ++ ++ PreEncodeFrameHook(video); ++ PreEncodeFrameHook(video, &encoder); ++ encoder.EncodeFrame(video, frame_flags_); ++ ++ CxDataIterator iter = encoder.GetCxData(); ++ ++ while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) { ++ again = true; ++ ++ switch (pkt->kind) { ++ case VPX_CODEC_CX_FRAME_PKT: ++#if CONFIG_VP8_DECODER ++ has_cxdata = true; ++ decoder.DecodeFrame((const uint8_t*)pkt->data.frame.buf, ++ pkt->data.frame.sz); ++#endif ++ ASSERT_GE(pkt->data.frame.pts, last_pts_); ++ last_pts_ = pkt->data.frame.pts; ++ FramePktHook(pkt); ++ break; ++ ++ case VPX_CODEC_PSNR_PKT: ++ PSNRPktHook(pkt); ++ break; ++ ++ default: ++ break; ++ } ++ } ++ ++#if CONFIG_VP8_DECODER ++ if (has_cxdata) { ++ const vpx_image_t *img_enc = encoder.GetPreviewFrame(); ++ DxDataIterator dec_iter = decoder.GetDxData(); ++ const vpx_image_t *img_dec = dec_iter.Next(); ++ if(img_enc && img_dec) { ++ const bool res = compare_img(img_enc, img_dec); ++ ASSERT_TRUE(res)<< "Encoder/Decoder mismatch found."; ++ } ++ } ++#endif ++ if (!Continue()) ++ break; ++ } ++ ++ EndPassHook(); ++ ++ if (!Continue()) ++ break; ++ } ++} ++} // namespace libvpx_test +diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h +new file mode 100644 +index 0000000..0141fa9 +--- /dev/null ++++ b/test/encode_test_driver.h +@@ -0,0 +1,197 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#ifndef TEST_ENCODE_TEST_DRIVER_H_ ++#define TEST_ENCODE_TEST_DRIVER_H_ ++#include ++#include ++#include "third_party/googletest/src/include/gtest/gtest.h" ++#include "vpx/vpx_encoder.h" ++#include "vpx/vp8cx.h" ++ ++namespace libvpx_test { ++ ++class VideoSource; ++ ++enum TestMode { ++ kRealTime, ++ kOnePassGood, ++ kOnePassBest, ++ kTwoPassGood, ++ kTwoPassBest ++}; ++#define ALL_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \ ++ ::libvpx_test::kOnePassGood, \ ++ ::libvpx_test::kOnePassBest, \ ++ ::libvpx_test::kTwoPassGood, \ ++ ::libvpx_test::kTwoPassBest) ++ ++#define ONE_PASS_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \ ++ ::libvpx_test::kOnePassGood, \ ++ ::libvpx_test::kOnePassBest) ++ ++ ++// Provides an object to handle the libvpx get_cx_data() iteration pattern ++class CxDataIterator { ++ public: ++ explicit CxDataIterator(vpx_codec_ctx_t *encoder) ++ : encoder_(encoder), iter_(NULL) {} ++ ++ const vpx_codec_cx_pkt_t *Next() { ++ return vpx_codec_get_cx_data(encoder_, &iter_); ++ } ++ ++ private: ++ vpx_codec_ctx_t *encoder_; ++ vpx_codec_iter_t iter_; ++}; ++ ++// Implements an in-memory store for libvpx twopass statistics ++class TwopassStatsStore { ++ public: ++ void Append(const vpx_codec_cx_pkt_t &pkt) { ++ buffer_.append(reinterpret_cast(pkt.data.twopass_stats.buf), ++ pkt.data.twopass_stats.sz); ++ } ++ ++ vpx_fixed_buf_t buf() { ++ const vpx_fixed_buf_t buf = { &buffer_[0], buffer_.size() }; ++ return buf; ++ } ++ ++ void Reset() { ++ buffer_.clear(); ++ } ++ ++ protected: ++ std::string buffer_; ++}; ++ ++ ++// Provides a simplified interface to manage one video encoding pass, given ++// a configuration and video source. ++// ++// TODO(jkoleszar): The exact services it provides and the appropriate ++// level of abstraction will be fleshed out as more tests are written. ++class Encoder { ++ public: ++ Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline, ++ const unsigned long init_flags, TwopassStatsStore *stats) ++ : cfg_(cfg), deadline_(deadline), init_flags_(init_flags), stats_(stats) { ++ memset(&encoder_, 0, sizeof(encoder_)); ++ } ++ ++ ~Encoder() { ++ vpx_codec_destroy(&encoder_); ++ } ++ ++ CxDataIterator GetCxData() { ++ return CxDataIterator(&encoder_); ++ } ++ ++ const vpx_image_t *GetPreviewFrame() { ++ return vpx_codec_get_preview_frame(&encoder_); ++ } ++ // This is a thin wrapper around vpx_codec_encode(), so refer to ++ // vpx_encoder.h for its semantics. ++ void EncodeFrame(VideoSource *video, const unsigned long frame_flags); ++ ++ // Convenience wrapper for EncodeFrame() ++ void EncodeFrame(VideoSource *video) { ++ EncodeFrame(video, 0); ++ } ++ ++ void Control(int ctrl_id, int arg) { ++ const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ++ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); ++ } ++ ++ void set_deadline(unsigned long deadline) { ++ deadline_ = deadline; ++ } ++ ++ protected: ++ const char *EncoderError() { ++ const char *detail = vpx_codec_error_detail(&encoder_); ++ return detail ? detail : vpx_codec_error(&encoder_); ++ } ++ ++ // Encode an image ++ void EncodeFrameInternal(const VideoSource &video, ++ const unsigned long frame_flags); ++ ++ // Flush the encoder on EOS ++ void Flush(); ++ ++ vpx_codec_ctx_t encoder_; ++ vpx_codec_enc_cfg_t cfg_; ++ unsigned long deadline_; ++ unsigned long init_flags_; ++ TwopassStatsStore *stats_; ++}; ++ ++// Common test functionality for all Encoder tests. ++// ++// This class is a mixin which provides the main loop common to all ++// encoder tests. It provides hooks which can be overridden by subclasses ++// to implement each test's specific behavior, while centralizing the bulk ++// of the boilerplate. Note that it doesn't inherit the gtest testing ++// classes directly, so that tests can be parameterized differently. ++class EncoderTest { ++ protected: ++ EncoderTest() : abort_(false), init_flags_(0), frame_flags_(0), ++ last_pts_(0) {} ++ ++ virtual ~EncoderTest() {} ++ ++ // Initialize the cfg_ member with the default configuration. ++ void InitializeConfig() { ++ const vpx_codec_err_t res = vpx_codec_enc_config_default( ++ &vpx_codec_vp8_cx_algo, &cfg_, 0); ++ ASSERT_EQ(VPX_CODEC_OK, res); ++ } ++ ++ // Map the TestMode enum to the deadline_ and passes_ variables. ++ void SetMode(TestMode mode); ++ ++ // Main loop. ++ virtual void RunLoop(VideoSource *video); ++ ++ // Hook to be called at the beginning of a pass. ++ virtual void BeginPassHook(unsigned int pass) {} ++ ++ // Hook to be called at the end of a pass. ++ virtual void EndPassHook() {} ++ ++ // Hook to be called before encoding a frame. ++ virtual void PreEncodeFrameHook(VideoSource *video) {} ++ virtual void PreEncodeFrameHook(VideoSource *video, Encoder *encoder) {} ++ ++ // Hook to be called on every compressed data packet. ++ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {} ++ ++ // Hook to be called on every PSNR packet. ++ virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {} ++ ++ // Hook to determine whether the encode loop should continue. ++ virtual bool Continue() const { return !abort_; } ++ ++ bool abort_; ++ vpx_codec_enc_cfg_t cfg_; ++ unsigned int passes_; ++ unsigned long deadline_; ++ TwopassStatsStore stats_; ++ unsigned long init_flags_; ++ unsigned long frame_flags_; ++ vpx_codec_pts_t last_pts_; ++}; ++ ++} // namespace libvpx_test ++ ++#endif // TEST_ENCODE_TEST_DRIVER_H_ +diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc +new file mode 100644 +index 0000000..25c6731 +--- /dev/null ++++ b/test/error_resilience_test.cc +@@ -0,0 +1,90 @@ ++/* ++ Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ ++ Use of this source code is governed by a BSD-style license ++ that can be found in the LICENSE file in the root of the source ++ tree. An additional intellectual property rights grant can be found ++ in the file PATENTS. All contributing project authors may ++ be found in the AUTHORS file in the root of the source tree. ++*/ ++#include "third_party/googletest/src/include/gtest/gtest.h" ++#include "test/encode_test_driver.h" ++#include "test/i420_video_source.h" ++ ++namespace { ++ ++class ErrorResilienceTest : public libvpx_test::EncoderTest, ++ public ::testing::TestWithParam { ++ protected: ++ ErrorResilienceTest() { ++ psnr_ = 0.0; ++ nframes_ = 0; ++ encoding_mode_ = static_cast(GetParam()); ++ } ++ virtual ~ErrorResilienceTest() {} ++ ++ virtual void SetUp() { ++ InitializeConfig(); ++ SetMode(encoding_mode_); ++ } ++ ++ virtual void BeginPassHook(unsigned int /*pass*/) { ++ psnr_ = 0.0; ++ nframes_ = 0; ++ } ++ ++ virtual bool Continue() const { ++ return !HasFatalFailure() && !abort_; ++ } ++ ++ virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { ++ psnr_ += pkt->data.psnr.psnr[0]; ++ nframes_++; ++ } ++ ++ double GetAveragePsnr() const { ++ if (nframes_) ++ return psnr_ / nframes_; ++ return 0.0; ++ } ++ ++ private: ++ double psnr_; ++ unsigned int nframes_; ++ libvpx_test::TestMode encoding_mode_; ++}; ++ ++TEST_P(ErrorResilienceTest, OnVersusOff) { ++ const vpx_rational timebase = { 33333333, 1000000000 }; ++ cfg_.g_timebase = timebase; ++ cfg_.rc_target_bitrate = 2000; ++ cfg_.g_lag_in_frames = 25; ++ ++ init_flags_ = VPX_CODEC_USE_PSNR; ++ ++ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, ++ timebase.den, timebase.num, 0, 30); ++ ++ // Error resilient mode OFF. ++ cfg_.g_error_resilient = 0; ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ const double psnr_resilience_off = GetAveragePsnr(); ++ EXPECT_GT(psnr_resilience_off, 25.0); ++ ++ // Error resilient mode ON. ++ cfg_.g_error_resilient = 1; ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ const double psnr_resilience_on = GetAveragePsnr(); ++ EXPECT_GT(psnr_resilience_on, 25.0); ++ ++ // Test that turning on error resilient mode hurts by 10% at most. ++ if (psnr_resilience_off > 0.0) { ++ const double psnr_ratio = psnr_resilience_on / psnr_resilience_off; ++ EXPECT_GE(psnr_ratio, 0.9); ++ EXPECT_LE(psnr_ratio, 1.1); ++ } ++} ++ ++INSTANTIATE_TEST_CASE_P(OnOffTest, ErrorResilienceTest, ++ ONE_PASS_TEST_MODES); ++} // namespace +diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc +new file mode 100644 +index 0000000..619b23d +--- /dev/null ++++ b/test/fdct4x4_test.cc +@@ -0,0 +1,169 @@ ++/* ++* Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++* ++* Use of this source code is governed by a BSD-style license ++* that can be found in the LICENSE file in the root of the source ++* tree. An additional intellectual property rights grant can be found ++* in the file PATENTS. All contributing project authors may ++* be found in the AUTHORS file in the root of the source tree. ++*/ ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++extern "C" { ++#include "vpx_rtcd.h" ++} ++ ++#include "test/acm_random.h" ++#include "third_party/googletest/src/include/gtest/gtest.h" ++#include "vpx/vpx_integer.h" ++ ++ ++namespace { ++ ++const int cospi8sqrt2minus1 = 20091; ++const int sinpi8sqrt2 = 35468; ++ ++void reference_idct4x4(const int16_t *input, int16_t *output) { ++ const int16_t *ip = input; ++ int16_t *op = output; ++ ++ for (int i = 0; i < 4; ++i) { ++ const int a1 = ip[0] + ip[8]; ++ const int b1 = ip[0] - ip[8]; ++ const int temp1 = (ip[4] * sinpi8sqrt2) >> 16; ++ const int temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); ++ const int c1 = temp1 - temp2; ++ const int temp3 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); ++ const int temp4 = (ip[12] * sinpi8sqrt2) >> 16; ++ const int d1 = temp3 + temp4; ++ op[0] = a1 + d1; ++ op[12] = a1 - d1; ++ op[4] = b1 + c1; ++ op[8] = b1 - c1; ++ ++ip; ++ ++op; ++ } ++ ip = output; ++ op = output; ++ for (int i = 0; i < 4; ++i) { ++ const int a1 = ip[0] + ip[2]; ++ const int b1 = ip[0] - ip[2]; ++ const int temp1 = (ip[1] * sinpi8sqrt2) >> 16; ++ const int temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); ++ const int c1 = temp1 - temp2; ++ const int temp3 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); ++ const int temp4 = (ip[3] * sinpi8sqrt2) >> 16; ++ const int d1 = temp3 + temp4; ++ op[0] = (a1 + d1 + 4) >> 3; ++ op[3] = (a1 - d1 + 4) >> 3; ++ op[1] = (b1 + c1 + 4) >> 3; ++ op[2] = (b1 - c1 + 4) >> 3; ++ ip += 4; ++ op += 4; ++ } ++} ++ ++using libvpx_test::ACMRandom; ++ ++TEST(Vp8FdctTest, SignBiasCheck) { ++ ACMRandom rnd(ACMRandom::DeterministicSeed()); ++ int16_t test_input_block[16]; ++ int16_t test_output_block[16]; ++ const int pitch = 8; ++ int count_sign_block[16][2]; ++ const int count_test_block = 1000000; ++ ++ memset(count_sign_block, 0, sizeof(count_sign_block)); ++ ++ for (int i = 0; i < count_test_block; ++i) { ++ // Initialize a test block with input range [-255, 255]. ++ for (int j = 0; j < 16; ++j) ++ test_input_block[j] = rnd.Rand8() - rnd.Rand8(); ++ ++ vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch); ++ ++ for (int j = 0; j < 16; ++j) { ++ if (test_output_block[j] < 0) ++ ++count_sign_block[j][0]; ++ else if (test_output_block[j] > 0) ++ ++count_sign_block[j][1]; ++ } ++ } ++ ++ bool bias_acceptable = true; ++ for (int j = 0; j < 16; ++j) ++ bias_acceptable = bias_acceptable && ++ (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 10000); ++ ++ EXPECT_EQ(true, bias_acceptable) ++ << "Error: 4x4 FDCT has a sign bias > 1% for input range [-255, 255]"; ++ ++ memset(count_sign_block, 0, sizeof(count_sign_block)); ++ ++ for (int i = 0; i < count_test_block; ++i) { ++ // Initialize a test block with input range [-15, 15]. ++ for (int j = 0; j < 16; ++j) ++ test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4); ++ ++ vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch); ++ ++ for (int j = 0; j < 16; ++j) { ++ if (test_output_block[j] < 0) ++ ++count_sign_block[j][0]; ++ else if (test_output_block[j] > 0) ++ ++count_sign_block[j][1]; ++ } ++ } ++ ++ bias_acceptable = true; ++ for (int j = 0; j < 16; ++j) ++ bias_acceptable = bias_acceptable && ++ (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 100000); ++ ++ EXPECT_EQ(true, bias_acceptable) ++ << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]"; ++}; ++ ++TEST(Vp8FdctTest, RoundTripErrorCheck) { ++ ACMRandom rnd(ACMRandom::DeterministicSeed()); ++ int max_error = 0; ++ double total_error = 0; ++ const int count_test_block = 1000000; ++ for (int i = 0; i < count_test_block; ++i) { ++ int16_t test_input_block[16]; ++ int16_t test_temp_block[16]; ++ int16_t test_output_block[16]; ++ ++ // Initialize a test block with input range [-255, 255]. ++ for (int j = 0; j < 16; ++j) ++ test_input_block[j] = rnd.Rand8() - rnd.Rand8(); ++ ++ const int pitch = 8; ++ vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch); ++ reference_idct4x4(test_temp_block, test_output_block); ++ ++ for (int j = 0; j < 16; ++j) { ++ const int diff = test_input_block[j] - test_output_block[j]; ++ const int error = diff * diff; ++ if (max_error < error) ++ max_error = error; ++ total_error += error; ++ } ++ } ++ ++ EXPECT_GE(1, max_error ) ++ << "Error: FDCT/IDCT has an individual roundtrip error > 1"; ++ ++ EXPECT_GE(count_test_block, total_error) ++ << "Error: FDCT/IDCT has average roundtrip error > 1 per block"; ++}; ++ ++} // namespace +diff --git a/test/i420_video_source.h b/test/i420_video_source.h +new file mode 100644 +index 0000000..219bd33 +--- /dev/null ++++ b/test/i420_video_source.h +@@ -0,0 +1,117 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#ifndef TEST_I420_VIDEO_SOURCE_H_ ++#define TEST_I420_VIDEO_SOURCE_H_ ++#include ++#include ++ ++#include "test/video_source.h" ++ ++namespace libvpx_test { ++ ++// This class extends VideoSource to allow parsing of raw yv12 ++// so that we can do actual file encodes. ++class I420VideoSource : public VideoSource { ++ public: ++ I420VideoSource(const std::string &file_name, ++ unsigned int width, unsigned int height, ++ int rate_numerator, int rate_denominator, ++ unsigned int start, int limit) ++ : file_name_(file_name), ++ input_file_(NULL), ++ img_(NULL), ++ start_(start), ++ limit_(limit), ++ frame_(0), ++ width_(0), ++ height_(0), ++ framerate_numerator_(rate_numerator), ++ framerate_denominator_(rate_denominator) { ++ ++ // This initializes raw_sz_, width_, height_ and allocates an img. ++ SetSize(width, height); ++ } ++ ++ virtual ~I420VideoSource() { ++ vpx_img_free(img_); ++ if (input_file_) ++ fclose(input_file_); ++ } ++ ++ virtual void Begin() { ++ if (input_file_) ++ fclose(input_file_); ++ input_file_ = OpenTestDataFile(file_name_); ++ ASSERT_TRUE(input_file_) << "Input file open failed. Filename: " ++ << file_name_; ++ if (start_) { ++ fseek(input_file_, raw_sz_ * start_, SEEK_SET); ++ } ++ ++ frame_ = start_; ++ FillFrame(); ++ } ++ ++ virtual void Next() { ++ ++frame_; ++ FillFrame(); ++ } ++ ++ virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; } ++ ++ // Models a stream where Timebase = 1/FPS, so pts == frame. ++ virtual vpx_codec_pts_t pts() const { return frame_; } ++ ++ virtual unsigned long duration() const { return 1; } ++ ++ virtual vpx_rational_t timebase() const { ++ const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ }; ++ return t; ++ } ++ ++ virtual unsigned int frame() const { return frame_; } ++ ++ virtual unsigned int limit() const { return limit_; } ++ ++ void SetSize(unsigned int width, unsigned int height) { ++ if (width != width_ || height != height_) { ++ vpx_img_free(img_); ++ img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 1); ++ ASSERT_TRUE(img_ != NULL); ++ width_ = width; ++ height_ = height; ++ raw_sz_ = width * height * 3 / 2; ++ } ++ } ++ ++ virtual void FillFrame() { ++ // Read a frame from input_file. ++ if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) { ++ limit_ = frame_; ++ } ++ } ++ ++ protected: ++ std::string file_name_; ++ FILE *input_file_; ++ vpx_image_t *img_; ++ size_t raw_sz_; ++ unsigned int start_; ++ unsigned int limit_; ++ unsigned int frame_; ++ unsigned int width_; ++ unsigned int height_; ++ unsigned int framerate_numerator_; ++ unsigned int framerate_denominator_; ++}; ++ ++} // namespace libvpx_test ++ ++#endif // TEST_I420_VIDEO_SOURCE_H_ +diff --git a/test/idctllm_test.cc b/test/idctllm_test.cc +new file mode 100644 +index 0000000..1be5fa0 +--- /dev/null ++++ b/test/idctllm_test.cc +@@ -0,0 +1,126 @@ ++/* ++ * Copyright (c) 2010 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++ ++extern "C" { ++#include "vpx_config.h" ++#include "vpx_rtcd.h" ++} ++#include "test/register_state_check.h" ++#include "third_party/googletest/src/include/gtest/gtest.h" ++ ++typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr, ++ int pred_stride, unsigned char *dst_ptr, ++ int dst_stride); ++namespace { ++class IDCTTest : public ::testing::TestWithParam ++{ ++ protected: ++ virtual void SetUp() ++ { ++ int i; ++ ++ UUT = GetParam(); ++ memset(input, 0, sizeof(input)); ++ /* Set up guard blocks */ ++ for(i=0; i<256; i++) ++ output[i] = ((i&0xF)<4&&(i<64))?0:-1; ++ } ++ ++ idct_fn_t UUT; ++ short input[16]; ++ unsigned char output[256]; ++ unsigned char predict[256]; ++}; ++ ++TEST_P(IDCTTest, TestGuardBlocks) ++{ ++ int i; ++ ++ for(i=0; i<256; i++) ++ if((i&0xF) < 4 && i<64) ++ EXPECT_EQ(0, output[i]) << i; ++ else ++ EXPECT_EQ(255, output[i]); ++} ++ ++TEST_P(IDCTTest, TestAllZeros) ++{ ++ int i; ++ ++ REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); ++ ++ for(i=0; i<256; i++) ++ if((i&0xF) < 4 && i<64) ++ EXPECT_EQ(0, output[i]) << "i==" << i; ++ else ++ EXPECT_EQ(255, output[i]) << "i==" << i; ++} ++ ++TEST_P(IDCTTest, TestAllOnes) ++{ ++ int i; ++ ++ input[0] = 4; ++ REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); ++ ++ for(i=0; i<256; i++) ++ if((i&0xF) < 4 && i<64) ++ EXPECT_EQ(1, output[i]) << "i==" << i; ++ else ++ EXPECT_EQ(255, output[i]) << "i==" << i; ++} ++ ++TEST_P(IDCTTest, TestAddOne) ++{ ++ int i; ++ ++ for(i=0; i<256; i++) ++ predict[i] = i; ++ ++ input[0] = 4; ++ REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16)); ++ ++ for(i=0; i<256; i++) ++ if((i&0xF) < 4 && i<64) ++ EXPECT_EQ(i+1, output[i]) << "i==" << i; ++ else ++ EXPECT_EQ(255, output[i]) << "i==" << i; ++} ++ ++TEST_P(IDCTTest, TestWithData) ++{ ++ int i; ++ ++ for(i=0; i<16; i++) ++ input[i] = i; ++ ++ REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); ++ ++ for(i=0; i<256; i++) ++ if((i&0xF) > 3 || i>63) ++ EXPECT_EQ(255, output[i]) << "i==" << i; ++ else if(i == 0) ++ EXPECT_EQ(11, output[i]) << "i==" << i; ++ else if(i == 34) ++ EXPECT_EQ(1, output[i]) << "i==" << i; ++ else if(i == 2 || i == 17 || i == 32) ++ EXPECT_EQ(3, output[i]) << "i==" << i; ++ else ++ EXPECT_EQ(0, output[i]) << "i==" << i; ++} ++ ++INSTANTIATE_TEST_CASE_P(C, IDCTTest, ++ ::testing::Values(vp8_short_idct4x4llm_c)); ++#if HAVE_MMX ++INSTANTIATE_TEST_CASE_P(MMX, IDCTTest, ++ ::testing::Values(vp8_short_idct4x4llm_mmx)); ++#endif ++} +diff --git a/test/intrapred_test.cc b/test/intrapred_test.cc +new file mode 100644 +index 0000000..4c16c3f +--- /dev/null ++++ b/test/intrapred_test.cc +@@ -0,0 +1,357 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++ ++#include ++#include "test/acm_random.h" ++#include "test/register_state_check.h" ++#include "third_party/googletest/src/include/gtest/gtest.h" ++extern "C" { ++#include "vpx_config.h" ++#include "vpx_rtcd.h" ++#include "vp8/common/blockd.h" ++#include "vpx_mem/vpx_mem.h" ++} ++ ++namespace { ++ ++using libvpx_test::ACMRandom; ++ ++class IntraPredBase { ++ protected: ++ void SetupMacroblock(uint8_t *data, int block_size, int stride, ++ int num_planes) { ++ memset(&mb_, 0, sizeof(mb_)); ++ memset(&mi_, 0, sizeof(mi_)); ++ mb_.up_available = 1; ++ mb_.left_available = 1; ++ mb_.mode_info_context = &mi_; ++ stride_ = stride; ++ block_size_ = block_size; ++ num_planes_ = num_planes; ++ for (int p = 0; p < num_planes; p++) ++ data_ptr_[p] = data + stride * (block_size + 1) * p + ++ stride + block_size; ++ } ++ ++ void FillRandom() { ++ // Fill edges with random data ++ ACMRandom rnd(ACMRandom::DeterministicSeed()); ++ for (int p = 0; p < num_planes_; p++) { ++ for (int x = -1 ; x <= block_size_; x++) ++ data_ptr_[p][x - stride_] = rnd.Rand8(); ++ for (int y = 0; y < block_size_; y++) ++ data_ptr_[p][y * stride_ - 1] = rnd.Rand8(); ++ } ++ } ++ ++ virtual void Predict(MB_PREDICTION_MODE mode) = 0; ++ ++ void SetLeftUnavailable() { ++ mb_.left_available = 0; ++ for (int p = 0; p < num_planes_; p++) ++ for (int i = -1; i < block_size_; ++i) ++ data_ptr_[p][stride_ * i - 1] = 129; ++ } ++ ++ void SetTopUnavailable() { ++ mb_.up_available = 0; ++ for (int p = 0; p < num_planes_; p++) ++ memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2); ++ } ++ ++ void SetTopLeftUnavailable() { ++ SetLeftUnavailable(); ++ SetTopUnavailable(); ++ } ++ ++ int BlockSizeLog2Min1() const { ++ switch (block_size_) { ++ case 16: ++ return 3; ++ case 8: ++ return 2; ++ default: ++ return 0; ++ } ++ } ++ ++ // check DC prediction output against a reference ++ void CheckDCPrediction() const { ++ for (int p = 0; p < num_planes_; p++) { ++ // calculate expected DC ++ int expected; ++ if (mb_.up_available || mb_.left_available) { ++ int sum = 0, shift = BlockSizeLog2Min1() + mb_.up_available + ++ mb_.left_available; ++ if (mb_.up_available) ++ for (int x = 0; x < block_size_; x++) ++ sum += data_ptr_[p][x - stride_]; ++ if (mb_.left_available) ++ for (int y = 0; y < block_size_; y++) ++ sum += data_ptr_[p][y * stride_ - 1]; ++ expected = (sum + (1 << (shift - 1))) >> shift; ++ } else ++ expected = 0x80; ++ ++ // check that all subsequent lines are equal to the first ++ for (int y = 1; y < block_size_; ++y) ++ ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_], ++ block_size_)); ++ // within the first line, ensure that each pixel has the same value ++ for (int x = 1; x < block_size_; ++x) ++ ASSERT_EQ(data_ptr_[p][0], data_ptr_[p][x]); ++ // now ensure that that pixel has the expected (DC) value ++ ASSERT_EQ(expected, data_ptr_[p][0]); ++ } ++ } ++ ++ // check V prediction output against a reference ++ void CheckVPrediction() const { ++ // check that all lines equal the top border ++ for (int p = 0; p < num_planes_; p++) ++ for (int y = 0; y < block_size_; y++) ++ ASSERT_EQ(0, memcmp(&data_ptr_[p][-stride_], ++ &data_ptr_[p][y * stride_], block_size_)); ++ } ++ ++ // check H prediction output against a reference ++ void CheckHPrediction() const { ++ // for each line, ensure that each pixel is equal to the left border ++ for (int p = 0; p < num_planes_; p++) ++ for (int y = 0; y < block_size_; y++) ++ for (int x = 0; x < block_size_; x++) ++ ASSERT_EQ(data_ptr_[p][-1 + y * stride_], ++ data_ptr_[p][x + y * stride_]); ++ } ++ ++ static int ClipByte(int value) { ++ if (value > 255) ++ return 255; ++ else if (value < 0) ++ return 0; ++ return value; ++ } ++ ++ // check TM prediction output against a reference ++ void CheckTMPrediction() const { ++ for (int p = 0; p < num_planes_; p++) ++ for (int y = 0; y < block_size_; y++) ++ for (int x = 0; x < block_size_; x++) { ++ const int expected = ClipByte(data_ptr_[p][x - stride_] ++ + data_ptr_[p][stride_ * y - 1] ++ - data_ptr_[p][-1 - stride_]); ++ ASSERT_EQ(expected, data_ptr_[p][y * stride_ + x]); ++ } ++ } ++ ++ // Actual test ++ void RunTest() { ++ { ++ SCOPED_TRACE("DC_PRED"); ++ FillRandom(); ++ Predict(DC_PRED); ++ CheckDCPrediction(); ++ } ++ { ++ SCOPED_TRACE("DC_PRED LEFT"); ++ FillRandom(); ++ SetLeftUnavailable(); ++ Predict(DC_PRED); ++ CheckDCPrediction(); ++ } ++ { ++ SCOPED_TRACE("DC_PRED TOP"); ++ FillRandom(); ++ SetTopUnavailable(); ++ Predict(DC_PRED); ++ CheckDCPrediction(); ++ } ++ { ++ SCOPED_TRACE("DC_PRED TOP_LEFT"); ++ FillRandom(); ++ SetTopLeftUnavailable(); ++ Predict(DC_PRED); ++ CheckDCPrediction(); ++ } ++ { ++ SCOPED_TRACE("H_PRED"); ++ FillRandom(); ++ Predict(H_PRED); ++ CheckHPrediction(); ++ } ++ { ++ SCOPED_TRACE("V_PRED"); ++ FillRandom(); ++ Predict(V_PRED); ++ CheckVPrediction(); ++ } ++ { ++ SCOPED_TRACE("TM_PRED"); ++ FillRandom(); ++ Predict(TM_PRED); ++ CheckTMPrediction(); ++ } ++ } ++ ++ MACROBLOCKD mb_; ++ MODE_INFO mi_; ++ uint8_t *data_ptr_[2]; // in the case of Y, only [0] is used ++ int stride_; ++ int block_size_; ++ int num_planes_; ++}; ++ ++typedef void (*intra_pred_y_fn_t)(MACROBLOCKD *x, ++ uint8_t *yabove_row, ++ uint8_t *yleft, ++ int left_stride, ++ uint8_t *ypred_ptr, ++ int y_stride); ++ ++class IntraPredYTest : public ::testing::TestWithParam, ++ protected IntraPredBase { ++ public: ++ static void SetUpTestCase() { ++ data_array_ = reinterpret_cast( ++ vpx_memalign(kDataAlignment, kDataBufferSize)); ++ } ++ ++ static void TearDownTestCase() { ++ vpx_free(data_array_); ++ data_array_ = NULL; ++ } ++ ++ protected: ++ static const int kBlockSize = 16; ++ static const int kDataAlignment = 16; ++ static const int kStride = kBlockSize * 3; ++ // We use 48 so that the data pointer of the first pixel in each row of ++ // each macroblock is 16-byte aligned, and this gives us access to the ++ // top-left and top-right corner pixels belonging to the top-left/right ++ // macroblocks. ++ // We use 17 lines so we have one line above us for top-prediction. ++ static const int kDataBufferSize = kStride * (kBlockSize + 1); ++ ++ virtual void SetUp() { ++ pred_fn_ = GetParam(); ++ SetupMacroblock(data_array_, kBlockSize, kStride, 1); ++ } ++ ++ virtual void Predict(MB_PREDICTION_MODE mode) { ++ mb_.mode_info_context->mbmi.mode = mode; ++ REGISTER_STATE_CHECK(pred_fn_(&mb_, ++ data_ptr_[0] - kStride, ++ data_ptr_[0] - 1, kStride, ++ data_ptr_[0], kStride)); ++ } ++ ++ intra_pred_y_fn_t pred_fn_; ++ static uint8_t* data_array_; ++}; ++ ++uint8_t* IntraPredYTest::data_array_ = NULL; ++ ++TEST_P(IntraPredYTest, IntraPredTests) { ++ RunTest(); ++} ++ ++INSTANTIATE_TEST_CASE_P(C, IntraPredYTest, ++ ::testing::Values( ++ vp8_build_intra_predictors_mby_s_c)); ++#if HAVE_SSE2 ++INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest, ++ ::testing::Values( ++ vp8_build_intra_predictors_mby_s_sse2)); ++#endif ++#if HAVE_SSSE3 ++INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest, ++ ::testing::Values( ++ vp8_build_intra_predictors_mby_s_ssse3)); ++#endif ++ ++typedef void (*intra_pred_uv_fn_t)(MACROBLOCKD *x, ++ uint8_t *uabove_row, ++ uint8_t *vabove_row, ++ uint8_t *uleft, ++ uint8_t *vleft, ++ int left_stride, ++ uint8_t *upred_ptr, ++ uint8_t *vpred_ptr, ++ int pred_stride); ++ ++class IntraPredUVTest : public ::testing::TestWithParam, ++ protected IntraPredBase { ++ public: ++ static void SetUpTestCase() { ++ data_array_ = reinterpret_cast( ++ vpx_memalign(kDataAlignment, kDataBufferSize)); ++ } ++ ++ static void TearDownTestCase() { ++ vpx_free(data_array_); ++ data_array_ = NULL; ++ } ++ ++ protected: ++ static const int kBlockSize = 8; ++ static const int kDataAlignment = 8; ++ static const int kStride = kBlockSize * 3; ++ // We use 24 so that the data pointer of the first pixel in each row of ++ // each macroblock is 8-byte aligned, and this gives us access to the ++ // top-left and top-right corner pixels belonging to the top-left/right ++ // macroblocks. ++ // We use 9 lines so we have one line above us for top-prediction. ++ // [0] = U, [1] = V ++ static const int kDataBufferSize = 2 * kStride * (kBlockSize + 1); ++ ++ virtual void SetUp() { ++ pred_fn_ = GetParam(); ++ SetupMacroblock(data_array_, kBlockSize, kStride, 2); ++ } ++ ++ virtual void Predict(MB_PREDICTION_MODE mode) { ++ mb_.mode_info_context->mbmi.uv_mode = mode; ++ pred_fn_(&mb_, data_ptr_[0] - kStride, data_ptr_[1] - kStride, ++ data_ptr_[0] - 1, data_ptr_[1] - 1, kStride, ++ data_ptr_[0], data_ptr_[1], kStride); ++ } ++ ++ intra_pred_uv_fn_t pred_fn_; ++ // We use 24 so that the data pointer of the first pixel in each row of ++ // each macroblock is 8-byte aligned, and this gives us access to the ++ // top-left and top-right corner pixels belonging to the top-left/right ++ // macroblocks. ++ // We use 9 lines so we have one line above us for top-prediction. ++ // [0] = U, [1] = V ++ static uint8_t* data_array_; ++}; ++ ++uint8_t* IntraPredUVTest::data_array_ = NULL; ++ ++TEST_P(IntraPredUVTest, IntraPredTests) { ++ RunTest(); ++} ++ ++INSTANTIATE_TEST_CASE_P(C, IntraPredUVTest, ++ ::testing::Values( ++ vp8_build_intra_predictors_mbuv_s_c)); ++#if HAVE_SSE2 ++INSTANTIATE_TEST_CASE_P(SSE2, IntraPredUVTest, ++ ::testing::Values( ++ vp8_build_intra_predictors_mbuv_s_sse2)); ++#endif ++#if HAVE_SSSE3 ++INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredUVTest, ++ ::testing::Values( ++ vp8_build_intra_predictors_mbuv_s_ssse3)); ++#endif ++ ++} // namespace +diff --git a/test/ivf_video_source.h b/test/ivf_video_source.h +new file mode 100644 +index 0000000..48c3a7d +--- /dev/null ++++ b/test/ivf_video_source.h +@@ -0,0 +1,109 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#ifndef TEST_IVF_VIDEO_SOURCE_H_ ++#define TEST_IVF_VIDEO_SOURCE_H_ ++#include ++#include ++#include ++#include ++#include "test/video_source.h" ++ ++namespace libvpx_test { ++const unsigned int kCodeBufferSize = 256 * 1024; ++const unsigned int kIvfFileHdrSize = 32; ++const unsigned int kIvfFrameHdrSize = 12; ++ ++static unsigned int MemGetLe32(const uint8_t *mem) { ++ return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]); ++} ++ ++// This class extends VideoSource to allow parsing of ivf files, ++// so that we can do actual file decodes. ++class IVFVideoSource : public CompressedVideoSource { ++ public: ++ IVFVideoSource(const std::string &file_name) ++ : file_name_(file_name), ++ input_file_(NULL), ++ compressed_frame_buf_(NULL), ++ frame_sz_(0), ++ frame_(0), ++ end_of_file_(false) { ++ } ++ ++ virtual ~IVFVideoSource() { ++ delete[] compressed_frame_buf_; ++ ++ if (input_file_) ++ fclose(input_file_); ++ } ++ ++ virtual void Init() { ++ // Allocate a buffer for read in the compressed video frame. ++ compressed_frame_buf_ = new uint8_t[libvpx_test::kCodeBufferSize]; ++ ASSERT_TRUE(compressed_frame_buf_) << "Allocate frame buffer failed"; ++ } ++ ++ virtual void Begin() { ++ input_file_ = OpenTestDataFile(file_name_); ++ ASSERT_TRUE(input_file_) << "Input file open failed. Filename: " ++ << file_name_; ++ ++ // Read file header ++ uint8_t file_hdr[kIvfFileHdrSize]; ++ ASSERT_EQ(kIvfFileHdrSize, fread(file_hdr, 1, kIvfFileHdrSize, input_file_)) ++ << "File header read failed."; ++ // Check file header ++ ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' && file_hdr[2] == 'I' ++ && file_hdr[3] == 'F') << "Input is not an IVF file."; ++ ++ FillFrame(); ++ } ++ ++ virtual void Next() { ++ ++frame_; ++ FillFrame(); ++ } ++ ++ void FillFrame() { ++ uint8_t frame_hdr[kIvfFrameHdrSize]; ++ // Check frame header and read a frame from input_file. ++ if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_) ++ != kIvfFrameHdrSize) { ++ end_of_file_ = true; ++ } else { ++ end_of_file_ = false; ++ ++ frame_sz_ = MemGetLe32(frame_hdr); ++ ASSERT_LE(frame_sz_, kCodeBufferSize) ++ << "Frame is too big for allocated code buffer"; ++ ASSERT_EQ(frame_sz_, ++ fread(compressed_frame_buf_, 1, frame_sz_, input_file_)) ++ << "Failed to read complete frame"; ++ } ++ } ++ ++ virtual const uint8_t *cxdata() const { ++ return end_of_file_ ? NULL : compressed_frame_buf_; ++ } ++ virtual const unsigned int frame_size() const { return frame_sz_; } ++ virtual const unsigned int frame_number() const { return frame_; } ++ ++ protected: ++ std::string file_name_; ++ FILE *input_file_; ++ uint8_t *compressed_frame_buf_; ++ unsigned int frame_sz_; ++ unsigned int frame_; ++ bool end_of_file_; ++}; ++ ++} // namespace libvpx_test ++ ++#endif // TEST_IVF_VIDEO_SOURCE_H_ +diff --git a/test/keyframe_test.cc b/test/keyframe_test.cc +new file mode 100644 +index 0000000..d0c81df +--- /dev/null ++++ b/test/keyframe_test.cc +@@ -0,0 +1,145 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#include ++#include ++#include "test/encode_test_driver.h" ++#include "test/i420_video_source.h" ++#include "third_party/googletest/src/include/gtest/gtest.h" ++ ++namespace { ++ ++class KeyframeTest : public ::libvpx_test::EncoderTest, ++ public ::testing::TestWithParam { ++ protected: ++ virtual void SetUp() { ++ InitializeConfig(); ++ SetMode(GetParam()); ++ kf_count_ = 0; ++ kf_count_max_ = INT_MAX; ++ kf_do_force_kf_ = false; ++ set_cpu_used_ = 0; ++ } ++ ++ virtual bool Continue() const { ++ return !HasFatalFailure() && !abort_; ++ } ++ ++ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ++ ::libvpx_test::Encoder *encoder) { ++ if (kf_do_force_kf_) ++ frame_flags_ = (video->frame() % 3) ? 0 : VPX_EFLAG_FORCE_KF; ++ if (set_cpu_used_ && video->frame() == 1) ++ encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); ++ } ++ ++ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { ++ if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { ++ kf_pts_list_.push_back(pkt->data.frame.pts); ++ kf_count_++; ++ abort_ |= kf_count_ > kf_count_max_; ++ } ++ } ++ ++ bool kf_do_force_kf_; ++ int kf_count_; ++ int kf_count_max_; ++ std::vector kf_pts_list_; ++ int set_cpu_used_; ++}; ++ ++TEST_P(KeyframeTest, TestRandomVideoSource) { ++ // Validate that encoding the RandomVideoSource produces multiple keyframes. ++ // This validates the results of the TestDisableKeyframes test. ++ kf_count_max_ = 2; // early exit successful tests. ++ ++ ::libvpx_test::RandomVideoSource video; ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ ++ // In realtime mode - auto placed keyframes are exceedingly rare, don't ++ // bother with this check if(GetParam() > 0) ++ if(GetParam() > 0) ++ EXPECT_GT(kf_count_, 1); ++} ++ ++TEST_P(KeyframeTest, TestDisableKeyframes) { ++ cfg_.kf_mode = VPX_KF_DISABLED; ++ kf_count_max_ = 1; // early exit failed tests. ++ ++ ::libvpx_test::RandomVideoSource video; ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ ++ EXPECT_EQ(1, kf_count_); ++} ++ ++TEST_P(KeyframeTest, TestForceKeyframe) { ++ cfg_.kf_mode = VPX_KF_DISABLED; ++ kf_do_force_kf_ = true; ++ ++ ::libvpx_test::DummyVideoSource video; ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ ++ // verify that every third frame is a keyframe. ++ for (std::vector::const_iterator iter = kf_pts_list_.begin(); ++ iter != kf_pts_list_.end(); ++iter) { ++ ASSERT_EQ(0, *iter % 3) << "Unexpected keyframe at frame " << *iter; ++ } ++} ++ ++TEST_P(KeyframeTest, TestKeyframeMaxDistance) { ++ cfg_.kf_max_dist = 25; ++ ++ ::libvpx_test::DummyVideoSource video; ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ ++ // verify that keyframe interval matches kf_max_dist ++ for (std::vector::const_iterator iter = kf_pts_list_.begin(); ++ iter != kf_pts_list_.end(); ++iter) { ++ ASSERT_EQ(0, *iter % 25) << "Unexpected keyframe at frame " << *iter; ++ } ++} ++ ++TEST_P(KeyframeTest, TestAutoKeyframe) { ++ cfg_.kf_mode = VPX_KF_AUTO; ++ kf_do_force_kf_ = false; ++ ++ // Force a deterministic speed step in Real Time mode, as the faster modes ++ // may not produce a keyframe like we expect. This is necessary when running ++ // on very slow environments (like Valgrind). The step -11 was determined ++ // experimentally as the fastest mode that still throws the keyframe. ++ if (deadline_ == VPX_DL_REALTIME) ++ set_cpu_used_ = -11; ++ ++ // This clip has a cut scene every 30 frames -> Frame 0, 30, 60, 90, 120. ++ // I check only the first 40 frames to make sure there's a keyframe at frame ++ // 0 and 30. ++ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, ++ 30, 1, 0, 40); ++ ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ ++ // In realtime mode - auto placed keyframes are exceedingly rare, don't ++ // bother with this check ++ if(GetParam() > 0) ++ EXPECT_EQ(2u, kf_pts_list_.size()) << " Not the right number of keyframes "; ++ ++ // Verify that keyframes match the file keyframes in the file. ++ for (std::vector::const_iterator iter = kf_pts_list_.begin(); ++ iter != kf_pts_list_.end(); ++iter) { ++ ++ if (deadline_ == VPX_DL_REALTIME && *iter > 0) ++ EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame " ++ << *iter; ++ else ++ EXPECT_EQ(0, *iter % 30) << "Unexpected keyframe at frame " << *iter; ++ } ++} ++ ++INSTANTIATE_TEST_CASE_P(AllModes, KeyframeTest, ALL_TEST_MODES); ++} // namespace +diff --git a/test/pp_filter_test.cc b/test/pp_filter_test.cc +new file mode 100644 +index 0000000..9227449 +--- /dev/null ++++ b/test/pp_filter_test.cc +@@ -0,0 +1,107 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#include "test/register_state_check.h" ++#include "third_party/googletest/src/include/gtest/gtest.h" ++extern "C" { ++#include "vpx_config.h" ++#include "vpx_rtcd.h" ++#include "vpx/vpx_integer.h" ++#include "vpx_mem/vpx_mem.h" ++} ++ ++typedef void (*post_proc_func_t)(unsigned char *src_ptr, ++ unsigned char *dst_ptr, ++ int src_pixels_per_line, ++ int dst_pixels_per_line, ++ int cols, ++ unsigned char *flimit, ++ int size); ++ ++namespace { ++ ++class Vp8PostProcessingFilterTest ++ : public ::testing::TestWithParam {}; ++ ++// Test routine for the VP8 post-processing function ++// vp8_post_proc_down_and_across_mb_row_c. ++ ++TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) { ++ // Size of the underlying data block that will be filtered. ++ const int block_width = 16; ++ const int block_height = 16; ++ ++ // 5-tap filter needs 2 padding rows above and below the block in the input. ++ const int input_width = block_width; ++ const int input_height = block_height + 4; ++ const int input_stride = input_width; ++ const int input_size = input_width * input_height; ++ ++ // Filter extends output block by 8 samples at left and right edges. ++ const int output_width = block_width + 16; ++ const int output_height = block_height; ++ const int output_stride = output_width; ++ const int output_size = output_width * output_height; ++ ++ uint8_t *const src_image = ++ reinterpret_cast(vpx_calloc(input_size, 1)); ++ uint8_t *const dst_image = ++ reinterpret_cast(vpx_calloc(output_size, 1)); ++ ++ // Pointers to top-left pixel of block in the input and output images. ++ uint8_t *const src_image_ptr = src_image + (input_stride << 1); ++ uint8_t *const dst_image_ptr = dst_image + 8; ++ uint8_t *const flimits = reinterpret_cast(vpx_memalign(16, block_width)); ++ (void)vpx_memset(flimits, 255, block_width); ++ ++ // Initialize pixels in the input: ++ // block pixels to value 1, ++ // border pixels to value 10. ++ (void)vpx_memset(src_image, 10, input_size); ++ uint8_t *pixel_ptr = src_image_ptr; ++ for (int i = 0; i < block_height; ++i) { ++ for (int j = 0; j < block_width; ++j) { ++ pixel_ptr[j] = 1; ++ } ++ pixel_ptr += input_stride; ++ } ++ ++ // Initialize pixels in the output to 99. ++ (void)vpx_memset(dst_image, 99, output_size); ++ ++ REGISTER_STATE_CHECK(GetParam()(src_image_ptr, dst_image_ptr, input_stride, ++ output_stride, block_width, flimits, 16)); ++ ++ static const uint8_t expected_data[block_height] = { ++ 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4 ++ }; ++ ++ pixel_ptr = dst_image_ptr; ++ for (int i = 0; i < block_height; ++i) { ++ for (int j = 0; j < block_width; ++j) { ++ EXPECT_EQ(expected_data[i], pixel_ptr[j]) ++ << "Vp8PostProcessingFilterTest failed with invalid filter output"; ++ } ++ pixel_ptr += output_stride; ++ } ++ ++ vpx_free(src_image); ++ vpx_free(dst_image); ++ vpx_free(flimits); ++}; ++ ++INSTANTIATE_TEST_CASE_P(C, Vp8PostProcessingFilterTest, ++ ::testing::Values(vp8_post_proc_down_and_across_mb_row_c)); ++ ++#if HAVE_SSE2 ++INSTANTIATE_TEST_CASE_P(SSE2, Vp8PostProcessingFilterTest, ++ ::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2)); ++#endif ++ ++} // namespace +diff --git a/test/register_state_check.h b/test/register_state_check.h +new file mode 100644 +index 0000000..fb3f53b +--- /dev/null ++++ b/test/register_state_check.h +@@ -0,0 +1,95 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++#ifndef LIBVPX_TEST_REGISTER_STATE_CHECK_H_ ++#define LIBVPX_TEST_REGISTER_STATE_CHECK_H_ ++ ++#ifdef _WIN64 ++ ++#define _WIN32_LEAN_AND_MEAN ++#include ++#include ++ ++#include "third_party/googletest/src/include/gtest/gtest.h" ++ ++namespace testing { ++namespace internal { ++ ++inline bool operator==(const M128A& lhs, const M128A& rhs) { ++ return (lhs.Low == rhs.Low && lhs.High == rhs.High); ++} ++ ++} // namespace internal ++} // namespace testing ++ ++namespace libvpx_test { ++ ++// Compares the state of xmm[6-15] at construction with their state at ++// destruction. These registers should be preserved by the callee on ++// Windows x64. ++// Usage: ++// { ++// RegisterStateCheck reg_check; ++// FunctionToVerify(); ++// } ++class RegisterStateCheck { ++ public: ++ RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); } ++ ~RegisterStateCheck() { EXPECT_TRUE(Check()); } ++ ++ private: ++ static bool StoreRegisters(CONTEXT* const context) { ++ const HANDLE this_thread = GetCurrentThread(); ++ EXPECT_TRUE(this_thread != NULL); ++ context->ContextFlags = CONTEXT_FLOATING_POINT; ++ const bool context_saved = GetThreadContext(this_thread, context) == TRUE; ++ EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError(); ++ return context_saved; ++ } ++ ++ // Compares the register state. Returns true if the states match. ++ bool Check() const { ++ if (!initialized_) return false; ++ CONTEXT post_context; ++ if (!StoreRegisters(&post_context)) return false; ++ ++ const M128A* xmm_pre = &pre_context_.Xmm6; ++ const M128A* xmm_post = &post_context.Xmm6; ++ for (int i = 6; i <= 15; ++i) { ++ EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!"; ++ ++xmm_pre; ++ ++xmm_post; ++ } ++ return !testing::Test::HasNonfatalFailure(); ++ } ++ ++ bool initialized_; ++ CONTEXT pre_context_; ++}; ++ ++#define REGISTER_STATE_CHECK(statement) do { \ ++ libvpx_test::RegisterStateCheck reg_check; \ ++ statement; \ ++} while (false) ++ ++} // namespace libvpx_test ++ ++#else // !_WIN64 ++ ++namespace libvpx_test { ++ ++class RegisterStateCheck {}; ++#define REGISTER_STATE_CHECK(statement) statement ++ ++} // namespace libvpx_test ++ ++#endif // _WIN64 ++ ++#endif // LIBVPX_TEST_REGISTER_STATE_CHECK_H_ +diff --git a/test/resize_test.cc b/test/resize_test.cc +new file mode 100644 +index 0000000..c846157 +--- /dev/null ++++ b/test/resize_test.cc +@@ -0,0 +1,104 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#include ++#include ++#include "test/encode_test_driver.h" ++#include "test/video_source.h" ++#include "third_party/googletest/src/include/gtest/gtest.h" ++ ++namespace { ++ ++const unsigned int kInitialWidth = 320; ++const unsigned int kInitialHeight = 240; ++ ++unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) { ++ if (frame < 10) ++ return val; ++ if (frame < 20) ++ return val / 2; ++ if (frame < 30) ++ return val * 2 / 3; ++ if (frame < 40) ++ return val / 4; ++ if (frame < 50) ++ return val * 7 / 8; ++ return val; ++} ++ ++class ResizingVideoSource : public ::libvpx_test::DummyVideoSource { ++ public: ++ ResizingVideoSource() { ++ SetSize(kInitialWidth, kInitialHeight); ++ limit_ = 60; ++ } ++ ++ protected: ++ virtual void Next() { ++ ++frame_; ++ SetSize(ScaleForFrameNumber(frame_, kInitialWidth), ++ ScaleForFrameNumber(frame_, kInitialHeight)); ++ FillFrame(); ++ } ++}; ++ ++class ResizeTest : public ::libvpx_test::EncoderTest, ++ public ::testing::TestWithParam { ++ protected: ++ struct FrameInfo { ++ FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h) ++ : pts(_pts), w(_w), h(_h) {} ++ ++ vpx_codec_pts_t pts; ++ unsigned int w; ++ unsigned int h; ++ }; ++ ++ virtual void SetUp() { ++ InitializeConfig(); ++ SetMode(GetParam()); ++ } ++ ++ virtual bool Continue() const { ++ return !HasFatalFailure() && !abort_; ++ } ++ ++ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { ++ if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { ++ const unsigned char *buf = ++ reinterpret_cast(pkt->data.frame.buf); ++ const unsigned int w = (buf[6] | (buf[7] << 8)) & 0x3fff; ++ const unsigned int h = (buf[8] | (buf[9] << 8)) & 0x3fff; ++ ++ frame_info_list_.push_back(FrameInfo(pkt->data.frame.pts, w, h)); ++ } ++ } ++ ++ std::vector< FrameInfo > frame_info_list_; ++}; ++ ++TEST_P(ResizeTest, TestExternalResizeWorks) { ++ ResizingVideoSource video; ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++ ++ for (std::vector::iterator info = frame_info_list_.begin(); ++ info != frame_info_list_.end(); ++info) { ++ const vpx_codec_pts_t pts = info->pts; ++ const unsigned int expected_w = ScaleForFrameNumber(pts, kInitialWidth); ++ const unsigned int expected_h = ScaleForFrameNumber(pts, kInitialHeight); ++ ++ EXPECT_EQ(expected_w, info->w) ++ << "Frame " << pts << "had unexpected width"; ++ EXPECT_EQ(expected_h, info->h) ++ << "Frame " << pts << "had unexpected height"; ++ } ++} ++ ++INSTANTIATE_TEST_CASE_P(OnePass, ResizeTest, ONE_PASS_TEST_MODES); ++} // namespace +diff --git a/test/sad_test.cc b/test/sad_test.cc +new file mode 100644 +index 0000000..5a0653b +--- /dev/null ++++ b/test/sad_test.cc +@@ -0,0 +1,253 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++ ++#include ++#include ++#include ++ ++extern "C" { ++#include "./vpx_config.h" ++#include "./vpx_rtcd.h" ++#include "vp8/common/blockd.h" ++#include "vpx_mem/vpx_mem.h" ++} ++ ++#include "test/acm_random.h" ++#include "test/register_state_check.h" ++#include "test/util.h" ++#include "third_party/googletest/src/include/gtest/gtest.h" ++ ++ ++typedef unsigned int (*sad_m_by_n_fn_t)(const unsigned char *source_ptr, ++ int source_stride, ++ const unsigned char *reference_ptr, ++ int reference_stride, ++ unsigned int max_sad); ++ ++using libvpx_test::ACMRandom; ++ ++namespace { ++class SADTest : public PARAMS(int, int, sad_m_by_n_fn_t) { ++ public: ++ static void SetUpTestCase() { ++ source_data_ = reinterpret_cast( ++ vpx_memalign(kDataAlignment, kDataBufferSize)); ++ reference_data_ = reinterpret_cast( ++ vpx_memalign(kDataAlignment, kDataBufferSize)); ++ } ++ ++ static void TearDownTestCase() { ++ vpx_free(source_data_); ++ source_data_ = NULL; ++ vpx_free(reference_data_); ++ reference_data_ = NULL; ++ } ++ ++ protected: ++ static const int kDataAlignment = 16; ++ static const int kDataBufferSize = 16 * 32; ++ ++ virtual void SetUp() { ++ sad_fn_ = GET_PARAM(2); ++ height_ = GET_PARAM(1); ++ width_ = GET_PARAM(0); ++ source_stride_ = width_ * 2; ++ reference_stride_ = width_ * 2; ++ rnd_.Reset(ACMRandom::DeterministicSeed()); ++ } ++ ++ sad_m_by_n_fn_t sad_fn_; ++ virtual unsigned int SAD(unsigned int max_sad) { ++ unsigned int ret; ++ REGISTER_STATE_CHECK(ret = sad_fn_(source_data_, source_stride_, ++ reference_data_, reference_stride_, ++ max_sad)); ++ return ret; ++ } ++ ++ // Sum of Absolute Differences. Given two blocks, calculate the absolute ++ // difference between two pixels in the same relative location; accumulate. ++ unsigned int ReferenceSAD(unsigned int max_sad) { ++ unsigned int sad = 0; ++ ++ for (int h = 0; h < height_; ++h) { ++ for (int w = 0; w < width_; ++w) { ++ sad += abs(source_data_[h * source_stride_ + w] ++ - reference_data_[h * reference_stride_ + w]); ++ } ++ if (sad > max_sad) { ++ break; ++ } ++ } ++ return sad; ++ } ++ ++ void FillConstant(uint8_t *data, int stride, uint8_t fill_constant) { ++ for (int h = 0; h < height_; ++h) { ++ for (int w = 0; w < width_; ++w) { ++ data[h * stride + w] = fill_constant; ++ } ++ } ++ } ++ ++ void FillRandom(uint8_t *data, int stride) { ++ for (int h = 0; h < height_; ++h) { ++ for (int w = 0; w < width_; ++w) { ++ data[h * stride + w] = rnd_.Rand8(); ++ } ++ } ++ } ++ ++ void CheckSad(unsigned int max_sad) { ++ unsigned int reference_sad, exp_sad; ++ ++ reference_sad = ReferenceSAD(max_sad); ++ exp_sad = SAD(max_sad); ++ ++ if (reference_sad <= max_sad) { ++ ASSERT_EQ(exp_sad, reference_sad); ++ } else { ++ // Alternative implementations are not required to check max_sad ++ ASSERT_GE(exp_sad, reference_sad); ++ } ++ } ++ ++ // Handle blocks up to 16x16 with stride up to 32 ++ int height_, width_; ++ static uint8_t* source_data_; ++ int source_stride_; ++ static uint8_t* reference_data_; ++ int reference_stride_; ++ ++ ACMRandom rnd_; ++}; ++ ++uint8_t* SADTest::source_data_ = NULL; ++uint8_t* SADTest::reference_data_ = NULL; ++ ++TEST_P(SADTest, MaxRef) { ++ FillConstant(source_data_, source_stride_, 0); ++ FillConstant(reference_data_, reference_stride_, 255); ++ CheckSad(UINT_MAX); ++} ++ ++TEST_P(SADTest, MaxSrc) { ++ FillConstant(source_data_, source_stride_, 255); ++ FillConstant(reference_data_, reference_stride_, 0); ++ CheckSad(UINT_MAX); ++} ++ ++TEST_P(SADTest, ShortRef) { ++ int tmp_stride = reference_stride_; ++ reference_stride_ >>= 1; ++ FillRandom(source_data_, source_stride_); ++ FillRandom(reference_data_, reference_stride_); ++ CheckSad(UINT_MAX); ++ reference_stride_ = tmp_stride; ++} ++ ++TEST_P(SADTest, UnalignedRef) { ++ // The reference frame, but not the source frame, may be unaligned for ++ // certain types of searches. ++ int tmp_stride = reference_stride_; ++ reference_stride_ -= 1; ++ FillRandom(source_data_, source_stride_); ++ FillRandom(reference_data_, reference_stride_); ++ CheckSad(UINT_MAX); ++ reference_stride_ = tmp_stride; ++} ++ ++TEST_P(SADTest, ShortSrc) { ++ int tmp_stride = source_stride_; ++ source_stride_ >>= 1; ++ FillRandom(source_data_, source_stride_); ++ FillRandom(reference_data_, reference_stride_); ++ CheckSad(UINT_MAX); ++ source_stride_ = tmp_stride; ++} ++ ++TEST_P(SADTest, MaxSAD) { ++ // Verify that, when max_sad is set, the implementation does not return a ++ // value lower than the reference. ++ FillConstant(source_data_, source_stride_, 255); ++ FillConstant(reference_data_, reference_stride_, 0); ++ CheckSad(128); ++} ++ ++using std::tr1::make_tuple; ++ ++const sad_m_by_n_fn_t sad_16x16_c = vp8_sad16x16_c; ++const sad_m_by_n_fn_t sad_8x16_c = vp8_sad8x16_c; ++const sad_m_by_n_fn_t sad_16x8_c = vp8_sad16x8_c; ++const sad_m_by_n_fn_t sad_8x8_c = vp8_sad8x8_c; ++const sad_m_by_n_fn_t sad_4x4_c = vp8_sad4x4_c; ++INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::Values( ++ make_tuple(16, 16, sad_16x16_c), ++ make_tuple(8, 16, sad_8x16_c), ++ make_tuple(16, 8, sad_16x8_c), ++ make_tuple(8, 8, sad_8x8_c), ++ make_tuple(4, 4, sad_4x4_c))); ++ ++// ARM tests ++#if HAVE_MEDIA ++const sad_m_by_n_fn_t sad_16x16_armv6 = vp8_sad16x16_armv6; ++INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::Values( ++ make_tuple(16, 16, sad_16x16_armv6))); ++ ++#endif ++#if HAVE_NEON ++const sad_m_by_n_fn_t sad_16x16_neon = vp8_sad16x16_neon; ++const sad_m_by_n_fn_t sad_8x16_neon = vp8_sad8x16_neon; ++const sad_m_by_n_fn_t sad_16x8_neon = vp8_sad16x8_neon; ++const sad_m_by_n_fn_t sad_8x8_neon = vp8_sad8x8_neon; ++const sad_m_by_n_fn_t sad_4x4_neon = vp8_sad4x4_neon; ++INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::Values( ++ make_tuple(16, 16, sad_16x16_neon), ++ make_tuple(8, 16, sad_8x16_neon), ++ make_tuple(16, 8, sad_16x8_neon), ++ make_tuple(8, 8, sad_8x8_neon), ++ make_tuple(4, 4, sad_4x4_neon))); ++#endif ++ ++// X86 tests ++#if HAVE_MMX ++const sad_m_by_n_fn_t sad_16x16_mmx = vp8_sad16x16_mmx; ++const sad_m_by_n_fn_t sad_8x16_mmx = vp8_sad8x16_mmx; ++const sad_m_by_n_fn_t sad_16x8_mmx = vp8_sad16x8_mmx; ++const sad_m_by_n_fn_t sad_8x8_mmx = vp8_sad8x8_mmx; ++const sad_m_by_n_fn_t sad_4x4_mmx = vp8_sad4x4_mmx; ++INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::Values( ++ make_tuple(16, 16, sad_16x16_mmx), ++ make_tuple(8, 16, sad_8x16_mmx), ++ make_tuple(16, 8, sad_16x8_mmx), ++ make_tuple(8, 8, sad_8x8_mmx), ++ make_tuple(4, 4, sad_4x4_mmx))); ++#endif ++#if HAVE_SSE2 ++const sad_m_by_n_fn_t sad_16x16_wmt = vp8_sad16x16_wmt; ++const sad_m_by_n_fn_t sad_8x16_wmt = vp8_sad8x16_wmt; ++const sad_m_by_n_fn_t sad_16x8_wmt = vp8_sad16x8_wmt; ++const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt; ++const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt; ++INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::Values( ++ make_tuple(16, 16, sad_16x16_wmt), ++ make_tuple(8, 16, sad_8x16_wmt), ++ make_tuple(16, 8, sad_16x8_wmt), ++ make_tuple(8, 8, sad_8x8_wmt), ++ make_tuple(4, 4, sad_4x4_wmt))); ++#endif ++#if HAVE_SSSE3 ++const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3; ++INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values( ++ make_tuple(16, 16, sad_16x16_sse3))); ++#endif ++ ++} // namespace +diff --git a/test/set_roi.cc b/test/set_roi.cc +new file mode 100644 +index 0000000..3b6112e +--- /dev/null ++++ b/test/set_roi.cc +@@ -0,0 +1,182 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "third_party/googletest/src/include/gtest/gtest.h" ++#include "vpx/vpx_integer.h" ++#include "vpx_mem/vpx_mem.h" ++extern "C" { ++#include "vp8/encoder/onyx_int.h" ++} ++ ++namespace { ++ ++TEST(Vp8RoiMapTest, ParameterCheck) { ++ int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 }; ++ int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 }; ++ unsigned int threshold[MAX_MB_SEGMENTS] = { 0, 100, 200, 300 }; ++ ++ const int internalq_trans[] = { ++ 0, 1, 2, 3, 4, 5, 7, 8, ++ 9, 10, 12, 13, 15, 17, 18, 19, ++ 20, 21, 23, 24, 25, 26, 27, 28, ++ 29, 30, 31, 33, 35, 37, 39, 41, ++ 43, 45, 47, 49, 51, 53, 55, 57, ++ 59, 61, 64, 67, 70, 73, 76, 79, ++ 82, 85, 88, 91, 94, 97, 100, 103, ++ 106, 109, 112, 115, 118, 121, 124, 127, ++ }; ++ ++ // Initialize elements of cpi with valid defaults. ++ VP8_COMP cpi; ++ cpi.mb.e_mbd.mb_segement_abs_delta = SEGMENT_DELTADATA; ++ cpi.cyclic_refresh_mode_enabled = 0; ++ cpi.mb.e_mbd.segmentation_enabled = 0; ++ cpi.mb.e_mbd.update_mb_segmentation_map = 0; ++ cpi.mb.e_mbd.update_mb_segmentation_data = 0; ++ cpi.common.mb_rows = 240 >> 4; ++ cpi.common.mb_cols = 320 >> 4; ++ const int mbs = (cpi.common.mb_rows * cpi.common.mb_cols); ++ vpx_memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data)); ++ ++ // Segment map ++ cpi.segmentation_map = reinterpret_cast(vpx_calloc(mbs, 1)); ++ ++ // Allocate memory for the source memory map. ++ unsigned char *roi_map = ++ reinterpret_cast(vpx_calloc(mbs, 1)); ++ vpx_memset(&roi_map[mbs >> 2], 1, (mbs >> 2)); ++ vpx_memset(&roi_map[mbs >> 1], 2, (mbs >> 2)); ++ vpx_memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2)); ++ ++ // Do a test call with valid parameters. ++ int roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, ++ cpi.common.mb_cols, delta_q, delta_lf, ++ threshold); ++ EXPECT_EQ(0, roi_retval) ++ << "vp8_set_roimap roi failed with default test parameters"; ++ ++ // Check that the values in the cpi structure get set as expected. ++ if (roi_retval == 0) { ++ // Check that the segment map got set. ++ const int mapcompare = memcmp(roi_map, cpi.segmentation_map, mbs); ++ EXPECT_EQ(0, mapcompare) << "segment map error"; ++ ++ // Check the q deltas (note the need to translate into ++ // the interanl range of 0-127. ++ for (int i = 0; i < MAX_MB_SEGMENTS; ++i) { ++ const int transq = internalq_trans[abs(delta_q[i])]; ++ if (abs(cpi.segment_feature_data[MB_LVL_ALT_Q][i]) != transq) { ++ EXPECT_EQ(transq, cpi.segment_feature_data[MB_LVL_ALT_Q][i]) ++ << "segment delta_q error"; ++ break; ++ } ++ } ++ ++ // Check the loop filter deltas ++ for (int i = 0; i < MAX_MB_SEGMENTS; ++i) { ++ if (cpi.segment_feature_data[MB_LVL_ALT_LF][i] != delta_lf[i]) { ++ EXPECT_EQ(delta_lf[i], cpi.segment_feature_data[MB_LVL_ALT_LF][i]) ++ << "segment delta_lf error"; ++ break; ++ } ++ } ++ ++ // Check the breakout thresholds ++ for (int i = 0; i < MAX_MB_SEGMENTS; ++i) { ++ unsigned int breakout = ++ static_cast(cpi.segment_encode_breakout[i]); ++ ++ if (threshold[i] != breakout) { ++ EXPECT_EQ(threshold[i], breakout) ++ << "breakout threshold error"; ++ break; ++ } ++ } ++ ++ // Segmentation, and segmentation update flages should be set. ++ EXPECT_EQ(1, cpi.mb.e_mbd.segmentation_enabled) ++ << "segmentation_enabled error"; ++ EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_map) ++ << "update_mb_segmentation_map error"; ++ EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_data) ++ << "update_mb_segmentation_data error"; ++ ++ ++ // Try a range of delta q and lf parameters (some legal, some not) ++ for (int i = 0; i < 1000; ++i) { ++ int rand_deltas[4]; ++ int deltas_valid; ++ rand_deltas[0] = (rand() % 160) - 80; ++ rand_deltas[1] = (rand() % 160) - 80; ++ rand_deltas[2] = (rand() % 160) - 80; ++ rand_deltas[3] = (rand() % 160) - 80; ++ ++ deltas_valid = ((abs(rand_deltas[0]) <= 63) && ++ (abs(rand_deltas[1]) <= 63) && ++ (abs(rand_deltas[2]) <= 63) && ++ (abs(rand_deltas[3]) <= 63)) ? 0 : -1; ++ ++ // Test with random delta q values. ++ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, ++ cpi.common.mb_cols, rand_deltas, ++ delta_lf, threshold); ++ EXPECT_EQ(deltas_valid, roi_retval) << "dq range check error"; ++ ++ // One delta_q error shown at a time ++ if (deltas_valid != roi_retval) ++ break; ++ ++ // Test with random loop filter values. ++ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, ++ cpi.common.mb_cols, delta_q, ++ rand_deltas, threshold); ++ EXPECT_EQ(deltas_valid, roi_retval) << "dlf range check error"; ++ ++ // One delta loop filter error shown at a time ++ if (deltas_valid != roi_retval) ++ break; ++ } ++ ++ // Test that we report and error if cyclic refresh is enabled. ++ cpi.cyclic_refresh_mode_enabled = 1; ++ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, ++ cpi.common.mb_cols, delta_q, ++ delta_lf, threshold); ++ EXPECT_EQ(-1, roi_retval) << "cyclic refresh check error"; ++ cpi.cyclic_refresh_mode_enabled = 0; ++ ++ // Test invalid number of rows or colums. ++ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows + 1, ++ cpi.common.mb_cols, delta_q, ++ delta_lf, threshold); ++ EXPECT_EQ(-1, roi_retval) << "MB rows bounds check error"; ++ ++ roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, ++ cpi.common.mb_cols - 1, delta_q, ++ delta_lf, threshold); ++ EXPECT_EQ(-1, roi_retval) << "MB cols bounds check error"; ++ } ++ ++ // Free allocated memory ++ if (cpi.segmentation_map) ++ vpx_free(cpi.segmentation_map); ++ if (roi_map) ++ vpx_free(roi_map); ++}; ++ ++} // namespace +diff --git a/test/sixtap_predict_test.cc b/test/sixtap_predict_test.cc +new file mode 100644 +index 0000000..c9dcceb +--- /dev/null ++++ b/test/sixtap_predict_test.cc +@@ -0,0 +1,224 @@ ++/* ++* Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++* ++* Use of this source code is governed by a BSD-style license ++* that can be found in the LICENSE file in the root of the source ++* tree. An additional intellectual property rights grant can be found ++* in the file PATENTS. All contributing project authors may ++* be found in the AUTHORS file in the root of the source tree. ++*/ ++ ++#include ++#include ++#include ++#include "test/acm_random.h" ++#include "test/register_state_check.h" ++#include "test/util.h" ++#include "third_party/googletest/src/include/gtest/gtest.h" ++extern "C" { ++#include "./vpx_config.h" ++#include "./vpx_rtcd.h" ++#include "vpx/vpx_integer.h" ++#include "vpx_mem/vpx_mem.h" ++} ++ ++namespace { ++ ++typedef void (*sixtap_predict_fn_t)(uint8_t *src_ptr, ++ int src_pixels_per_line, ++ int xoffset, ++ int yoffset, ++ uint8_t *dst_ptr, ++ int dst_pitch); ++ ++class SixtapPredictTest : public PARAMS(int, int, sixtap_predict_fn_t) { ++ public: ++ static void SetUpTestCase() { ++ src_ = reinterpret_cast(vpx_memalign(kDataAlignment, kSrcSize)); ++ dst_ = reinterpret_cast(vpx_memalign(kDataAlignment, kDstSize)); ++ dst_c_ = reinterpret_cast(vpx_memalign(kDataAlignment, kDstSize)); ++ } ++ ++ static void TearDownTestCase() { ++ vpx_free(src_); ++ src_ = NULL; ++ vpx_free(dst_); ++ dst_ = NULL; ++ vpx_free(dst_c_); ++ dst_c_ = NULL; ++ } ++ ++ protected: ++ // Make test arrays big enough for 16x16 functions. Six-tap filters ++ // need 5 extra pixels outside of the macroblock. ++ static const int kSrcStride = 21; ++ static const int kDstStride = 16; ++ static const int kDataAlignment = 16; ++ static const int kSrcSize = kSrcStride * kSrcStride + 1; ++ static const int kDstSize = kDstStride * kDstStride; ++ ++ virtual void SetUp() { ++ width_ = GET_PARAM(0); ++ height_ = GET_PARAM(1); ++ sixtap_predict_ = GET_PARAM(2); ++ memset(src_, 0, sizeof(src_)); ++ memset(dst_, 0, sizeof(dst_)); ++ memset(dst_c_, 0, sizeof(dst_c_)); ++ } ++ ++ int width_; ++ int height_; ++ sixtap_predict_fn_t sixtap_predict_; ++ // The src stores the macroblock we will filter on, and makes it 1 byte larger ++ // in order to test unaligned access. The result is stored in dst and dst_c(c ++ // reference code result). ++ static uint8_t* src_; ++ static uint8_t* dst_; ++ static uint8_t* dst_c_; ++}; ++ ++uint8_t* SixtapPredictTest::src_ = NULL; ++uint8_t* SixtapPredictTest::dst_ = NULL; ++uint8_t* SixtapPredictTest::dst_c_ = NULL; ++ ++TEST_P(SixtapPredictTest, TestWithPresetData) { ++ // Test input ++ static const uint8_t test_data[kSrcSize] = { ++ 216, 184, 4, 191, 82, 92, 41, 0, 1, 226, 236, 172, 20, 182, 42, 226, 177, ++ 79, 94, 77, 179, 203, 206, 198, 22, 192, 19, 75, 17, 192, 44, 233, 120, ++ 48, 168, 203, 141, 210, 203, 143, 180, 184, 59, 201, 110, 102, 171, 32, ++ 182, 10, 109, 105, 213, 60, 47, 236, 253, 67, 55, 14, 3, 99, 247, 124, ++ 148, 159, 71, 34, 114, 19, 177, 38, 203, 237, 239, 58, 83, 155, 91, 10, ++ 166, 201, 115, 124, 5, 163, 104, 2, 231, 160, 16, 234, 4, 8, 103, 153, ++ 167, 174, 187, 26, 193, 109, 64, 141, 90, 48, 200, 174, 204, 36, 184, ++ 114, 237, 43, 238, 242, 207, 86, 245, 182, 247, 6, 161, 251, 14, 8, 148, ++ 182, 182, 79, 208, 120, 188, 17, 6, 23, 65, 206, 197, 13, 242, 126, 128, ++ 224, 170, 110, 211, 121, 197, 200, 47, 188, 207, 208, 184, 221, 216, 76, ++ 148, 143, 156, 100, 8, 89, 117, 14, 112, 183, 221, 54, 197, 208, 180, 69, ++ 176, 94, 180, 131, 215, 121, 76, 7, 54, 28, 216, 238, 249, 176, 58, 142, ++ 64, 215, 242, 72, 49, 104, 87, 161, 32, 52, 216, 230, 4, 141, 44, 181, ++ 235, 224, 57, 195, 89, 134, 203, 144, 162, 163, 126, 156, 84, 185, 42, ++ 148, 145, 29, 221, 194, 134, 52, 100, 166, 105, 60, 140, 110, 201, 184, ++ 35, 181, 153, 93, 121, 243, 227, 68, 131, 134, 232, 2, 35, 60, 187, 77, ++ 209, 76, 106, 174, 15, 241, 227, 115, 151, 77, 175, 36, 187, 121, 221, ++ 223, 47, 118, 61, 168, 105, 32, 237, 236, 167, 213, 238, 202, 17, 170, ++ 24, 226, 247, 131, 145, 6, 116, 117, 121, 11, 194, 41, 48, 126, 162, 13, ++ 93, 209, 131, 154, 122, 237, 187, 103, 217, 99, 60, 200, 45, 78, 115, 69, ++ 49, 106, 200, 194, 112, 60, 56, 234, 72, 251, 19, 120, 121, 182, 134, 215, ++ 135, 10, 114, 2, 247, 46, 105, 209, 145, 165, 153, 191, 243, 12, 5, 36, ++ 119, 206, 231, 231, 11, 32, 209, 83, 27, 229, 204, 149, 155, 83, 109, 35, ++ 93, 223, 37, 84, 14, 142, 37, 160, 52, 191, 96, 40, 204, 101, 77, 67, 52, ++ 53, 43, 63, 85, 253, 147, 113, 226, 96, 6, 125, 179, 115, 161, 17, 83, ++ 198, 101, 98, 85, 139, 3, 137, 75, 99, 178, 23, 201, 255, 91, 253, 52, ++ 134, 60, 138, 131, 208, 251, 101, 48, 2, 227, 228, 118, 132, 245, 202, ++ 75, 91, 44, 160, 231, 47, 41, 50, 147, 220, 74, 92, 219, 165, 89, 16 ++ }; ++ ++ // Expected result ++ static const uint8_t expected_dst[kDstSize] = { ++ 117, 102, 74, 135, 42, 98, 175, 206, 70, 73, 222, 197, 50, 24, 39, 49, 38, ++ 105, 90, 47, 169, 40, 171, 215, 200, 73, 109, 141, 53, 85, 177, 164, 79, ++ 208, 124, 89, 212, 18, 81, 145, 151, 164, 217, 153, 91, 154, 102, 102, ++ 159, 75, 164, 152, 136, 51, 213, 219, 186, 116, 193, 224, 186, 36, 231, ++ 208, 84, 211, 155, 167, 35, 59, 42, 76, 216, 149, 73, 201, 78, 149, 184, ++ 100, 96, 196, 189, 198, 188, 235, 195, 117, 129, 120, 129, 49, 25, 133, ++ 113, 69, 221, 114, 70, 143, 99, 157, 108, 189, 140, 78, 6, 55, 65, 240, ++ 255, 245, 184, 72, 90, 100, 116, 131, 39, 60, 234, 167, 33, 160, 88, 185, ++ 200, 157, 159, 176, 127, 151, 138, 102, 168, 106, 170, 86, 82, 219, 189, ++ 76, 33, 115, 197, 106, 96, 198, 136, 97, 141, 237, 151, 98, 137, 191, ++ 185, 2, 57, 95, 142, 91, 255, 185, 97, 137, 76, 162, 94, 173, 131, 193, ++ 161, 81, 106, 72, 135, 222, 234, 137, 66, 137, 106, 243, 210, 147, 95, ++ 15, 137, 110, 85, 66, 16, 96, 167, 147, 150, 173, 203, 140, 118, 196, ++ 84, 147, 160, 19, 95, 101, 123, 74, 132, 202, 82, 166, 12, 131, 166, ++ 189, 170, 159, 85, 79, 66, 57, 152, 132, 203, 194, 0, 1, 56, 146, 180, ++ 224, 156, 28, 83, 181, 79, 76, 80, 46, 160, 175, 59, 106, 43, 87, 75, ++ 136, 85, 189, 46, 71, 200, 90 ++ }; ++ ++ uint8_t *src = const_cast(test_data); ++ ++ REGISTER_STATE_CHECK(sixtap_predict_(&src[kSrcStride * 2 + 2 + 1], kSrcStride, ++ 2, 2, dst_, kDstStride)); ++ ++ for (int i = 0; i < height_; ++i) ++ for (int j = 0; j < width_; ++j) ++ ASSERT_EQ(expected_dst[i * kDstStride + j], dst_[i * kDstStride + j]) ++ << "i==" << (i * width_ + j); ++} ++ ++using libvpx_test::ACMRandom; ++ ++TEST_P(SixtapPredictTest, TestWithRandomData) { ++ ACMRandom rnd(ACMRandom::DeterministicSeed()); ++ for (int i = 0; i < kSrcSize; ++i) ++ src_[i] = rnd.Rand8(); ++ ++ // Run tests for all possible offsets. ++ for (int xoffset = 0; xoffset < 8; ++xoffset) { ++ for (int yoffset = 0; yoffset < 8; ++yoffset) { ++ // Call c reference function. ++ // Move start point to next pixel to test if the function reads ++ // unaligned data correctly. ++ vp8_sixtap_predict16x16_c(&src_[kSrcStride * 2 + 2 + 1], kSrcStride, ++ xoffset, yoffset, dst_c_, kDstStride); ++ ++ // Run test. ++ REGISTER_STATE_CHECK( ++ sixtap_predict_(&src_[kSrcStride * 2 + 2 + 1], kSrcStride, ++ xoffset, yoffset, dst_, kDstStride)); ++ ++ for (int i = 0; i < height_; ++i) ++ for (int j = 0; j < width_; ++j) ++ ASSERT_EQ(dst_c_[i * kDstStride + j], dst_[i * kDstStride + j]) ++ << "i==" << (i * width_ + j); ++ } ++ } ++} ++ ++using std::tr1::make_tuple; ++ ++const sixtap_predict_fn_t sixtap_16x16_c = vp8_sixtap_predict16x16_c; ++const sixtap_predict_fn_t sixtap_8x8_c = vp8_sixtap_predict8x8_c; ++const sixtap_predict_fn_t sixtap_8x4_c = vp8_sixtap_predict8x4_c; ++const sixtap_predict_fn_t sixtap_4x4_c = vp8_sixtap_predict4x4_c; ++INSTANTIATE_TEST_CASE_P( ++ C, SixtapPredictTest, ::testing::Values( ++ make_tuple(16, 16, sixtap_16x16_c), ++ make_tuple(8, 8, sixtap_8x8_c), ++ make_tuple(8, 4, sixtap_8x4_c), ++ make_tuple(4, 4, sixtap_4x4_c))); ++#if HAVE_MMX ++const sixtap_predict_fn_t sixtap_16x16_mmx = vp8_sixtap_predict16x16_mmx; ++const sixtap_predict_fn_t sixtap_8x8_mmx = vp8_sixtap_predict8x8_mmx; ++const sixtap_predict_fn_t sixtap_8x4_mmx = vp8_sixtap_predict8x4_mmx; ++const sixtap_predict_fn_t sixtap_4x4_mmx = vp8_sixtap_predict4x4_mmx; ++INSTANTIATE_TEST_CASE_P( ++ MMX, SixtapPredictTest, ::testing::Values( ++ make_tuple(16, 16, sixtap_16x16_mmx), ++ make_tuple(8, 8, sixtap_8x8_mmx), ++ make_tuple(8, 4, sixtap_8x4_mmx), ++ make_tuple(4, 4, sixtap_4x4_mmx))); ++#endif ++#if HAVE_SSE2 ++const sixtap_predict_fn_t sixtap_16x16_sse2 = vp8_sixtap_predict16x16_sse2; ++const sixtap_predict_fn_t sixtap_8x8_sse2 = vp8_sixtap_predict8x8_sse2; ++const sixtap_predict_fn_t sixtap_8x4_sse2 = vp8_sixtap_predict8x4_sse2; ++INSTANTIATE_TEST_CASE_P( ++ SSE2, SixtapPredictTest, ::testing::Values( ++ make_tuple(16, 16, sixtap_16x16_sse2), ++ make_tuple(8, 8, sixtap_8x8_sse2), ++ make_tuple(8, 4, sixtap_8x4_sse2))); ++#endif ++#if HAVE_SSSE3 ++const sixtap_predict_fn_t sixtap_16x16_ssse3 = vp8_sixtap_predict16x16_ssse3; ++const sixtap_predict_fn_t sixtap_8x8_ssse3 = vp8_sixtap_predict8x8_ssse3; ++const sixtap_predict_fn_t sixtap_8x4_ssse3 = vp8_sixtap_predict8x4_ssse3; ++const sixtap_predict_fn_t sixtap_4x4_ssse3 = vp8_sixtap_predict4x4_ssse3; ++INSTANTIATE_TEST_CASE_P( ++ SSSE3, SixtapPredictTest, ::testing::Values( ++ make_tuple(16, 16, sixtap_16x16_ssse3), ++ make_tuple(8, 8, sixtap_8x8_ssse3), ++ make_tuple(8, 4, sixtap_8x4_ssse3), ++ make_tuple(4, 4, sixtap_4x4_ssse3))); ++#endif ++} // namespace +diff --git a/test/subtract_test.cc b/test/subtract_test.cc +new file mode 100644 +index 0000000..60acf81 +--- /dev/null ++++ b/test/subtract_test.cc +@@ -0,0 +1,114 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++#include "third_party/googletest/src/include/gtest/gtest.h" ++#include "test/acm_random.h" ++#include "test/register_state_check.h" ++extern "C" { ++#include "vpx_config.h" ++#include "vpx_rtcd.h" ++#include "vp8/common/blockd.h" ++#include "vp8/encoder/block.h" ++#include "vpx_mem/vpx_mem.h" ++} ++ ++typedef void (*subtract_b_fn_t)(BLOCK *be, BLOCKD *bd, int pitch); ++ ++namespace { ++ ++class SubtractBlockTest : public ::testing::TestWithParam {}; ++ ++using libvpx_test::ACMRandom; ++ ++TEST_P(SubtractBlockTest, SimpleSubtract) { ++ ACMRandom rnd(ACMRandom::DeterministicSeed()); ++ BLOCK be; ++ BLOCKD bd; ++ // in libvpx, this stride is always 16 ++ const int kDiffPredStride = 16; ++ const int kSrcStride[] = {32, 16, 8, 4, 0}; ++ const int kBlockWidth = 4; ++ const int kBlockHeight = 4; ++ ++ // Allocate... align to 16 for mmx/sse tests ++ uint8_t *source = reinterpret_cast( ++ vpx_memalign(16, kBlockHeight * kSrcStride[0] * sizeof(*source))); ++ be.src_diff = reinterpret_cast( ++ vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*be.src_diff))); ++ bd.predictor = reinterpret_cast( ++ vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor))); ++ ++ for(int i = 0; kSrcStride[i] > 0; ++i) { ++ // start at block0 ++ be.src = 0; ++ be.base_src = &source; ++ be.src_stride = kSrcStride[i]; ++ ++ // set difference ++ int16_t *src_diff = be.src_diff; ++ for (int r = 0; r < kBlockHeight; ++r) { ++ for (int c = 0; c < kBlockWidth; ++c) { ++ src_diff[c] = 0xa5a5; ++ } ++ src_diff += kDiffPredStride; ++ } ++ ++ // set destination ++ uint8_t *base_src = *be.base_src; ++ for (int r = 0; r < kBlockHeight; ++r) { ++ for (int c = 0; c < kBlockWidth; ++c) { ++ base_src[c] = rnd.Rand8(); ++ } ++ base_src += be.src_stride; ++ } ++ ++ // set predictor ++ uint8_t *predictor = bd.predictor; ++ for (int r = 0; r < kBlockHeight; ++r) { ++ for (int c = 0; c < kBlockWidth; ++c) { ++ predictor[c] = rnd.Rand8(); ++ } ++ predictor += kDiffPredStride; ++ } ++ ++ REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride)); ++ ++ base_src = *be.base_src; ++ src_diff = be.src_diff; ++ predictor = bd.predictor; ++ for (int r = 0; r < kBlockHeight; ++r) { ++ for (int c = 0; c < kBlockWidth; ++c) { ++ EXPECT_EQ(base_src[c], (src_diff[c] + predictor[c])) << "r = " << r ++ << ", c = " << c; ++ } ++ src_diff += kDiffPredStride; ++ predictor += kDiffPredStride; ++ base_src += be.src_stride; ++ } ++ } ++ vpx_free(be.src_diff); ++ vpx_free(source); ++ vpx_free(bd.predictor); ++} ++ ++INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest, ++ ::testing::Values(vp8_subtract_b_c)); ++ ++#if HAVE_MMX ++INSTANTIATE_TEST_CASE_P(MMX, SubtractBlockTest, ++ ::testing::Values(vp8_subtract_b_mmx)); ++#endif ++ ++#if HAVE_SSE2 ++INSTANTIATE_TEST_CASE_P(SSE2, SubtractBlockTest, ++ ::testing::Values(vp8_subtract_b_sse2)); ++#endif ++ ++} // namespace +diff --git a/test/test-data.sha1 b/test/test-data.sha1 +new file mode 100644 +index 0000000..c1b6a83 +--- /dev/null ++++ b/test/test-data.sha1 +@@ -0,0 +1,123 @@ ++d5dfb0151c9051f8c85999255645d7a23916d3c0 hantro_collage_w352h288.yuv ++5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf ++65bf1bbbced81b97bd030f376d1b7f61a224793f vp80-00-comprehensive-002.ivf ++906b4c1e99eb734504c504b3f1ad8052137ce672 vp80-00-comprehensive-003.ivf ++ec144b1af53af895db78355785650b96dd3f0ade vp80-00-comprehensive-004.ivf ++afc7091785c62f1c121c4554a2830c30704587d9 vp80-00-comprehensive-005.ivf ++42ea9d55c818145d06a9b633b8e85c6a6164fd3e vp80-00-comprehensive-006.ivf ++e5b3a73ab79fe024c14309d653d6bed92902ee3b vp80-00-comprehensive-007.ivf ++f3c50a58875930adfb84525c0ef59d7e4c08540c vp80-00-comprehensive-008.ivf ++4b2841fdb83db51ae322096ae468bbb9dc2c8362 vp80-00-comprehensive-009.ivf ++efbff736e3a91ab6a98c5bc2dce65d645944c7b1 vp80-00-comprehensive-010.ivf ++6b315102cae008d22a3d2c231be92cb704a222f8 vp80-00-comprehensive-011.ivf ++f3214a4fea14c2d5ec689936c1613f274c859ee8 vp80-00-comprehensive-012.ivf ++e4094e96d308c8a35b74c480a43d853c5294cd34 vp80-00-comprehensive-013.ivf ++5b0adfaf60a69e0aaf3ec021a39d0a68fc0e1b5a vp80-00-comprehensive-014.ivf ++e8467688ddf26b5000664f904faf0d70506aa653 vp80-00-comprehensive-015.ivf ++aab55582337dfd2a39ff54fb2576a91910d49337 vp80-00-comprehensive-016.ivf ++1ba24724f80203c9bae4f1d0f99d534721980016 vp80-00-comprehensive-017.ivf ++143a15512b46f436280ddb4d0e6411eb4af434f2 vp80-00-comprehensive-018.ivf ++c5baeaf5714fdfb3a8bc960a8e33ac438e83b16b vp80-01-intra-1400.ivf ++f383955229afe3408453e316d11553d923ca60d5 vp80-01-intra-1411.ivf ++84e1f4343f174c9f3c83f834bac3196fb325bf2c vp80-01-intra-1416.ivf ++fb6e712a47dd57a28a3727d2ae2c97a8b7c7ca51 vp80-01-intra-1417.ivf ++71ea772d3e9d315b8cbecf41207b8a237c34853b vp80-02-inter-1402.ivf ++d85dbc4271525dcd128c503f936fe69091d1f8d0 vp80-02-inter-1412.ivf ++d4e5d3ad56511867d025f93724d090f92ba6ec3d vp80-02-inter-1418.ivf ++91791cbcc37c60f35dbd8090bacb54e5ec6dd4fa vp80-02-inter-1424.ivf ++17fbfe2fea70f6e2f3fa6ca4efaae6c0b03b5f02 vp80-03-segmentation-01.ivf ++3c3600dbbcde08e20d54c66fe3b7eadd4f09bdbb vp80-03-segmentation-02.ivf ++c156778d5340967d4b369c490848076e92f1f875 vp80-03-segmentation-03.ivf ++d25dcff6c60e87a1af70945b8911b6b4998533b0 vp80-03-segmentation-04.ivf ++362baba2ce454c9db21218f35e81c27a5ed0b730 vp80-03-segmentation-1401.ivf ++d223ae7ee748ce07e74c4679bfd219e84aa9f4b0 vp80-03-segmentation-1403.ivf ++033adf7f3a13836a3f1cffcb87c1972900f2b5c6 vp80-03-segmentation-1407.ivf ++4d51dfbf9f3e2c590ec99d1d6f59dd731d04375f vp80-03-segmentation-1408.ivf ++f37a62b197c2600d75e0ccfbb31b60efdedac251 vp80-03-segmentation-1409.ivf ++eb25bd7bfba5b2f6935018a930f42d123b1e7fcd vp80-03-segmentation-1410.ivf ++b9d5c436663a30c27cfff84b53a002e501258843 vp80-03-segmentation-1413.ivf ++6da92b9d1a180cc3a8afe348ab12258f5a37be1a vp80-03-segmentation-1414.ivf ++a4f5842602886bd669f115f93d8a35c035cb0948 vp80-03-segmentation-1415.ivf ++f295dceb8ef278b77251b3f9df8aee22e161d547 vp80-03-segmentation-1425.ivf ++198dbf9f36f733200e432664cc8c5752d59779de vp80-03-segmentation-1426.ivf ++7704804e32f5de976803929934a7fafe101ac7b0 vp80-03-segmentation-1427.ivf ++831ccd862ea95ca025d2f3bd8b88678752f5416d vp80-03-segmentation-1432.ivf ++b3c11978529289f9109f2766fcaba3ebc40e11ef vp80-03-segmentation-1435.ivf ++a835a731f5520ebfc1002c40121264d0020559ac vp80-03-segmentation-1436.ivf ++1d1732942f773bb2a5775fcb9689b1579ce28eab vp80-03-segmentation-1437.ivf ++db04799adfe089dfdf74dbd43cc05ede7161f99e vp80-03-segmentation-1441.ivf ++7caf39b3f20cfd52b998210878062e52a5edf1e6 vp80-03-segmentation-1442.ivf ++3607f6bb4ee106c38fa1ea370dc4ff8b8cde2261 vp80-04-partitions-1404.ivf ++93cc323b6b6867f1b12dd48773424549c6960a6b vp80-04-partitions-1405.ivf ++047eedb14b865bdac8a3538e63801054e0295e9c vp80-04-partitions-1406.ivf ++0f1233bd2bc33f56ce5e495dbd455d122339f384 vp80-05-sharpness-1428.ivf ++51767fc136488a9535c2a4c38067c542ee2048df vp80-05-sharpness-1429.ivf ++9805aa107672de25d6fb8c35e20d06deca5efe18 vp80-05-sharpness-1430.ivf ++61db6b965f9c27aebe71b85bf2d5877e58e4bbdf vp80-05-sharpness-1431.ivf ++10420d266290d2923555f84af38eeb96edbd3ae8 vp80-05-sharpness-1433.ivf ++3ed24f9a80cddfdf75824ba95cdb4ff9286cb443 vp80-05-sharpness-1434.ivf ++c87599cbecd72d4cd4f7ace3313b7a6bc6eb8163 vp80-05-sharpness-1438.ivf ++aff51d865c2621b60510459244ea83e958e4baed vp80-05-sharpness-1439.ivf ++da386e72b19b5485a6af199c5eb60ef25e510dd1 vp80-05-sharpness-1440.ivf ++6759a095203d96ccd267ce09b1b050b8cc4c2f1f vp80-05-sharpness-1443.ivf ++db55ec7fd02c864ba996ff060b25b1e08611330b vp80-00-comprehensive-001.ivf.md5 ++29db0ad011cba1e45f856d5623cd38dac3e3bf19 vp80-00-comprehensive-002.ivf.md5 ++e84f258f69e173e7d68f8f8c037a0a3766902182 vp80-00-comprehensive-003.ivf.md5 ++eb7912eaf69559a16fd82bc3f5fb1524cf4a4466 vp80-00-comprehensive-004.ivf.md5 ++4206f71c94894bd5b5b376f6c09b3817dbc65206 vp80-00-comprehensive-005.ivf.md5 ++4f89b356f6f2fecb928f330a10f804f00f5325f5 vp80-00-comprehensive-006.ivf.md5 ++2813236a32964dd8007e17648bcf035a20fcda6c vp80-00-comprehensive-007.ivf.md5 ++10746c72098f872803c900e17c5680e451f5f498 vp80-00-comprehensive-008.ivf.md5 ++39a23d0692ce64421a7bb7cdf6ccec5928d37fff vp80-00-comprehensive-009.ivf.md5 ++f6e3de8931a0cc659bda8fbc14050346955e72d4 vp80-00-comprehensive-010.ivf.md5 ++101683ec195b6e944f7cd1e468fc8921439363e6 vp80-00-comprehensive-011.ivf.md5 ++1f592751ce46d8688998fa0fa4fbdcda0fd4058c vp80-00-comprehensive-012.ivf.md5 ++6066176f90ca790251e795fca1a5797d59999841 vp80-00-comprehensive-013.ivf.md5 ++2656da94ba93691f23edc4d60b3a09e2be46c217 vp80-00-comprehensive-014.ivf.md5 ++c6e0d5f5d61460c8ac8edfa4e701f10312c03133 vp80-00-comprehensive-015.ivf.md5 ++ee60fee501d8493e34e8d6a1fe315b51ed09b24a vp80-00-comprehensive-016.ivf.md5 ++9f1914ceffcad4546c0a29de3ef591d8bea304dc vp80-00-comprehensive-017.ivf.md5 ++e0305178fe288a9fd8082b39e2d03181edb19054 vp80-00-comprehensive-018.ivf.md5 ++612494da2fa799cc9d76dcdd835ae6c7cb2e5c05 vp80-01-intra-1400.ivf.md5 ++48ea06097ac8269c5e8c2131d3d0639f431fcf0e vp80-01-intra-1411.ivf.md5 ++6e2ab4e7677ad0ba868083ca6bc387ee922b400c vp80-01-intra-1416.ivf.md5 ++eca0a90348959ce3854142f8d8641b13050e8349 vp80-01-intra-1417.ivf.md5 ++920feea203145d5c2258a91c4e6991934a79a99e vp80-02-inter-1402.ivf.md5 ++f71d97909fe2b3dd65be7e1f56c72237f0cef200 vp80-02-inter-1412.ivf.md5 ++e911254569a30bbb2a237ff8b79f69ed9da0672d vp80-02-inter-1418.ivf.md5 ++58c789c50c9bb9cc90580bed291164a0939d28ba vp80-02-inter-1424.ivf.md5 ++ff3e2f441327b9c20a0b37c524e0f5a48a36de7b vp80-03-segmentation-01.ivf.md5 ++0791f417f076a542ae66fbc3426ab4d94cbd6c75 vp80-03-segmentation-02.ivf.md5 ++722e50f1a6a91c34302d68681faffc1c26d1cc57 vp80-03-segmentation-03.ivf.md5 ++c701f1885bcfb27fb8e70cc65606b289172ef889 vp80-03-segmentation-04.ivf.md5 ++f79bc9ec189a2b4807632a3d0c5bf04a178b5300 vp80-03-segmentation-1401.ivf.md5 ++b9aa4c74c0219b639811c44760d0b24cd8bb436a vp80-03-segmentation-1403.ivf.md5 ++70d5a2207ca1891bcaebd5cf6dd88ce8d57b4334 vp80-03-segmentation-1407.ivf.md5 ++265f962ee781531f9a93b9309461316fd32b2a1d vp80-03-segmentation-1408.ivf.md5 ++0c4ecbbd6dc042d30e626d951b65f460dd6cd563 vp80-03-segmentation-1409.ivf.md5 ++cf779af36a937f06570a0fca9db64ba133451dee vp80-03-segmentation-1410.ivf.md5 ++0e6c5036d51ab078842f133934926c598a9cff02 vp80-03-segmentation-1413.ivf.md5 ++eb3930aaf229116c80d507516c34759c3f6cdf69 vp80-03-segmentation-1414.ivf.md5 ++123d6c0f72ee87911c4ae7538e87b7d163b22d6c vp80-03-segmentation-1415.ivf.md5 ++e70551d1a38920e097a5d8782390b79ecaeb7505 vp80-03-segmentation-1425.ivf.md5 ++44e8f4117e46dbb302b2cfd81171cc1a1846e431 vp80-03-segmentation-1426.ivf.md5 ++52636e54aee5f95bbace37021bd67de5db767e9a vp80-03-segmentation-1427.ivf.md5 ++b1ad3eff20215c28e295b15ef3636ed926d59cba vp80-03-segmentation-1432.ivf.md5 ++24c22a552fa28a90e5978f67f57181cc2d7546d7 vp80-03-segmentation-1435.ivf.md5 ++96c49c390abfced18a7a8c9b9ea10af778e10edb vp80-03-segmentation-1436.ivf.md5 ++f95eb6214571434f1f73ab7833b9ccdf47588020 vp80-03-segmentation-1437.ivf.md5 ++1c0700ca27c9b0090a7747a4b0b4dc21d1843181 vp80-03-segmentation-1441.ivf.md5 ++81d4f23ca32667ee958bae579c8f5e97ba72eb97 vp80-03-segmentation-1442.ivf.md5 ++272efcef07a3a30fbca51bfd566063d8258ec0be vp80-04-partitions-1404.ivf.md5 ++66ed219ab812ac801b256d35cf495d193d4cf478 vp80-04-partitions-1405.ivf.md5 ++36083f37f56f502bd60ec5e07502ee9e6b8699b0 vp80-04-partitions-1406.ivf.md5 ++6ca909bf168a64c09415626294665dc1be3d1973 vp80-05-sharpness-1428.ivf.md5 ++1667d2ee2334e5fdea8a8a866f4ccf3cf76f033a vp80-05-sharpness-1429.ivf.md5 ++71bcbe5357d36a19df5b07fbe3e27bffa8893f0a vp80-05-sharpness-1430.ivf.md5 ++89a09b1dffce2d55770a89e58d9925c70ef79bf8 vp80-05-sharpness-1431.ivf.md5 ++08444a18b4e6ba3450c0796dd728d48c399a2dc9 vp80-05-sharpness-1433.ivf.md5 ++6d6223719a90c13e848aa2a8a6642098cdb5977a vp80-05-sharpness-1434.ivf.md5 ++41d70bb5fa45bc88da1604a0af466930b8dd77b5 vp80-05-sharpness-1438.ivf.md5 ++086c56378df81b6cee264d7540a7b8f2b405c7a4 vp80-05-sharpness-1439.ivf.md5 ++d32dc2c4165eb266ea4c23c14a45459b363def32 vp80-05-sharpness-1440.ivf.md5 ++8c69dc3d8e563f56ffab5ad1e400d9e689dd23df vp80-05-sharpness-1443.ivf.md5 +\ No newline at end of file +diff --git a/test/test.mk b/test/test.mk +new file mode 100644 +index 0000000..982be5b +--- /dev/null ++++ b/test/test.mk +@@ -0,0 +1,179 @@ ++LIBVPX_TEST_SRCS-yes += acm_random.h ++LIBVPX_TEST_SRCS-yes += register_state_check.h ++LIBVPX_TEST_SRCS-yes += test.mk ++LIBVPX_TEST_SRCS-yes += test_libvpx.cc ++LIBVPX_TEST_SRCS-yes += util.h ++LIBVPX_TEST_SRCS-yes += video_source.h ++ ++## ++## BLACK BOX TESTS ++## ++## Black box tests only use the public API. ++## ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += datarate_test.cc ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.cc ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.h ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += error_resilience_test.cc ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += i420_video_source.h ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc ++ ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ../md5_utils.h ../md5_utils.c ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.cc ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.h ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ivf_video_source.h ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += test_vector_test.cc ++## ++## WHITE BOX TESTS ++## ++## Whitebox tests invoke functions not exposed via the public API. Certain ++## shared library builds don't make these functions accessible. ++## ++ifeq ($(CONFIG_SHARED),) ++ ++# These tests require both the encoder and decoder to be built. ++ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes) ++LIBVPX_TEST_SRCS-yes += boolcoder_test.cc ++endif ++ ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += fdct4x4_test.cc ++LIBVPX_TEST_SRCS-yes += idctllm_test.cc ++LIBVPX_TEST_SRCS-yes += intrapred_test.cc ++LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc ++LIBVPX_TEST_SRCS-yes += sad_test.cc ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc ++LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc ++ ++endif ++ ++ ++## ++## TEST DATA ++## ++LIBVPX_TEST_DATA-$(CONFIG_VP8_ENCODER) += hantro_collage_w352h288.yuv ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf.md5 ++LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf.md5 +diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc +new file mode 100644 +index 0000000..cfd5d28 +--- /dev/null ++++ b/test/test_libvpx.cc +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#include ++#include "vpx_config.h" ++#if ARCH_X86 || ARCH_X86_64 ++extern "C" { ++#include "vpx_ports/x86.h" ++} ++#endif ++#include "third_party/googletest/src/include/gtest/gtest.h" ++ ++static void append_gtest_filter(const char *str) { ++ std::string filter = ::testing::FLAGS_gtest_filter; ++ filter += str; ++ ::testing::FLAGS_gtest_filter = filter; ++} ++ ++int main(int argc, char **argv) { ++ ::testing::InitGoogleTest(&argc, argv); ++ ++#if ARCH_X86 || ARCH_X86_64 ++ const int simd_caps = x86_simd_caps(); ++ if(!(simd_caps & HAS_MMX)) ++ append_gtest_filter(":-MMX/*"); ++ if(!(simd_caps & HAS_SSE)) ++ append_gtest_filter(":-SSE/*"); ++ if(!(simd_caps & HAS_SSE2)) ++ append_gtest_filter(":-SSE2/*"); ++ if(!(simd_caps & HAS_SSE3)) ++ append_gtest_filter(":-SSE3/*"); ++ if(!(simd_caps & HAS_SSSE3)) ++ append_gtest_filter(":-SSSE3/*"); ++ if(!(simd_caps & HAS_SSE4_1)) ++ append_gtest_filter(":-SSE4_1/*"); ++#endif ++ ++ return RUN_ALL_TESTS(); ++} +diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc +new file mode 100644 +index 0000000..938457b +--- /dev/null ++++ b/test/test_vector_test.cc +@@ -0,0 +1,144 @@ ++/* ++ Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ ++ Use of this source code is governed by a BSD-style license ++ that can be found in the LICENSE file in the root of the source ++ tree. An additional intellectual property rights grant can be found ++ in the file PATENTS. All contributing project authors may ++ be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++#include ++#include ++#include ++#include "third_party/googletest/src/include/gtest/gtest.h" ++#include "test/decode_test_driver.h" ++#include "test/ivf_video_source.h" ++extern "C" { ++#include "./md5_utils.h" ++#include "vpx_mem/vpx_mem.h" ++} ++ ++#if defined(_MSC_VER) ++#define snprintf sprintf_s ++#endif ++ ++namespace { ++// There are 61 test vectors in total. ++const char *kTestVectors[] = { ++ "vp80-00-comprehensive-001.ivf", ++ "vp80-00-comprehensive-002.ivf", "vp80-00-comprehensive-003.ivf", ++ "vp80-00-comprehensive-004.ivf", "vp80-00-comprehensive-005.ivf", ++ "vp80-00-comprehensive-006.ivf", "vp80-00-comprehensive-007.ivf", ++ "vp80-00-comprehensive-008.ivf", "vp80-00-comprehensive-009.ivf", ++ "vp80-00-comprehensive-010.ivf", "vp80-00-comprehensive-011.ivf", ++ "vp80-00-comprehensive-012.ivf", "vp80-00-comprehensive-013.ivf", ++ "vp80-00-comprehensive-014.ivf", "vp80-00-comprehensive-015.ivf", ++ "vp80-00-comprehensive-016.ivf", "vp80-00-comprehensive-017.ivf", ++ "vp80-00-comprehensive-018.ivf", "vp80-01-intra-1400.ivf", ++ "vp80-01-intra-1411.ivf", "vp80-01-intra-1416.ivf", ++ "vp80-01-intra-1417.ivf", "vp80-02-inter-1402.ivf", ++ "vp80-02-inter-1412.ivf", "vp80-02-inter-1418.ivf", ++ "vp80-02-inter-1424.ivf", "vp80-03-segmentation-01.ivf", ++ "vp80-03-segmentation-02.ivf", "vp80-03-segmentation-03.ivf", ++ "vp80-03-segmentation-04.ivf", "vp80-03-segmentation-1401.ivf", ++ "vp80-03-segmentation-1403.ivf", "vp80-03-segmentation-1407.ivf", ++ "vp80-03-segmentation-1408.ivf", "vp80-03-segmentation-1409.ivf", ++ "vp80-03-segmentation-1410.ivf", "vp80-03-segmentation-1413.ivf", ++ "vp80-03-segmentation-1414.ivf", "vp80-03-segmentation-1415.ivf", ++ "vp80-03-segmentation-1425.ivf", "vp80-03-segmentation-1426.ivf", ++ "vp80-03-segmentation-1427.ivf", "vp80-03-segmentation-1432.ivf", ++ "vp80-03-segmentation-1435.ivf", "vp80-03-segmentation-1436.ivf", ++ "vp80-03-segmentation-1437.ivf", "vp80-03-segmentation-1441.ivf", ++ "vp80-03-segmentation-1442.ivf", "vp80-04-partitions-1404.ivf", ++ "vp80-04-partitions-1405.ivf", "vp80-04-partitions-1406.ivf", ++ "vp80-05-sharpness-1428.ivf", "vp80-05-sharpness-1429.ivf", ++ "vp80-05-sharpness-1430.ivf", "vp80-05-sharpness-1431.ivf", ++ "vp80-05-sharpness-1433.ivf", "vp80-05-sharpness-1434.ivf", ++ "vp80-05-sharpness-1438.ivf", "vp80-05-sharpness-1439.ivf", ++ "vp80-05-sharpness-1440.ivf", "vp80-05-sharpness-1443.ivf" ++}; ++ ++class TestVectorTest : public libvpx_test::DecoderTest, ++ public ::testing::TestWithParam { ++ protected: ++ TestVectorTest() : md5_file_(NULL) {} ++ ++ virtual ~TestVectorTest() { ++ if (md5_file_) ++ fclose(md5_file_); ++ } ++ ++ void OpenMD5File(const std::string& md5_file_name_) { ++ md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_); ++ ASSERT_TRUE(md5_file_) << "Md5 file open failed. Filename: " ++ << md5_file_name_; ++ } ++ ++ virtual void DecompressedFrameHook(const vpx_image_t& img, ++ const unsigned int frame_number) { ++ char expected_md5[33]; ++ char junk[128]; ++ ++ // Read correct md5 checksums. ++ const int res = fscanf(md5_file_, "%s %s", expected_md5, junk); ++ ASSERT_NE(res, EOF) << "Read md5 data failed"; ++ expected_md5[32] = '\0'; ++ ++ MD5Context md5; ++ MD5Init(&md5); ++ ++ // Compute and update md5 for each raw in decompressed data. ++ for (int plane = 0; plane < 3; ++plane) { ++ uint8_t *buf = img.planes[plane]; ++ ++ for (unsigned int y = 0; y < (plane ? (img.d_h + 1) >> 1 : img.d_h); ++ ++y) { ++ MD5Update(&md5, buf, (plane ? (img.d_w + 1) >> 1 : img.d_w)); ++ buf += img.stride[plane]; ++ } ++ } ++ ++ uint8_t md5_sum[16]; ++ MD5Final(md5_sum, &md5); ++ ++ char actual_md5[33]; ++ // Convert to get the actual md5. ++ for (int i = 0; i < 16; i++) { ++ snprintf(&actual_md5[i * 2], sizeof(actual_md5) - i * 2, "%02x", ++ md5_sum[i]); ++ } ++ actual_md5[32] = '\0'; ++ ++ // Check md5 match. ++ ASSERT_STREQ(expected_md5, actual_md5) ++ << "Md5 checksums don't match: frame number = " << frame_number; ++ } ++ ++ private: ++ FILE *md5_file_; ++}; ++ ++// This test runs through the whole set of test vectors, and decodes them. ++// The md5 checksums are computed for each frame in the video file. If md5 ++// checksums match the correct md5 data, then the test is passed. Otherwise, ++// the test failed. ++TEST_P(TestVectorTest, MD5Match) { ++ const std::string filename = GetParam(); ++ // Open compressed video file. ++ libvpx_test::IVFVideoSource video(filename); ++ ++ video.Init(); ++ ++ // Construct md5 file name. ++ const std::string md5_filename = filename + ".md5"; ++ OpenMD5File(md5_filename); ++ ++ // Decode frame, and check the md5 matching. ++ ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ++} ++ ++INSTANTIATE_TEST_CASE_P(TestVectorSequence, TestVectorTest, ++ ::testing::ValuesIn(kTestVectors)); ++ ++} // namespace +diff --git a/test/util.h b/test/util.h +new file mode 100644 +index 0000000..06a70cc +--- /dev/null ++++ b/test/util.h +@@ -0,0 +1,18 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++#ifndef TEST_UTIL_H_ ++#define TEST_UTIL_H_ ++ ++// Macros ++#define PARAMS(...) ::testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > > ++#define GET_PARAM(k) std::tr1::get< k >(GetParam()) ++ ++#endif // TEST_UTIL_H_ +diff --git a/test/video_source.h b/test/video_source.h +new file mode 100644 +index 0000000..9772657 +--- /dev/null ++++ b/test/video_source.h +@@ -0,0 +1,175 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++#ifndef TEST_VIDEO_SOURCE_H_ ++#define TEST_VIDEO_SOURCE_H_ ++ ++#include ++#include ++#include ++#include "test/acm_random.h" ++#include "vpx/vpx_encoder.h" ++ ++namespace libvpx_test { ++ ++static FILE *OpenTestDataFile(const std::string& file_name) { ++ std::string path_to_source = file_name; ++ const char *kDataPath = getenv("LIBVPX_TEST_DATA_PATH"); ++ ++ if (kDataPath) { ++ path_to_source = kDataPath; ++ path_to_source += "/"; ++ path_to_source += file_name; ++ } ++ ++ return fopen(path_to_source.c_str(), "rb"); ++} ++ ++// Abstract base class for test video sources, which provide a stream of ++// vpx_image_t images with associated timestamps and duration. ++class VideoSource { ++ public: ++ virtual ~VideoSource() {} ++ ++ // Prepare the stream for reading, rewind/open as necessary. ++ virtual void Begin() = 0; ++ ++ // Advance the cursor to the next frame ++ virtual void Next() = 0; ++ ++ // Get the current video frame, or NULL on End-Of-Stream. ++ virtual vpx_image_t *img() const = 0; ++ ++ // Get the presentation timestamp of the current frame. ++ virtual vpx_codec_pts_t pts() const = 0; ++ ++ // Get the current frame's duration ++ virtual unsigned long duration() const = 0; ++ ++ // Get the timebase for the stream ++ virtual vpx_rational_t timebase() const = 0; ++ ++ // Get the current frame counter, starting at 0. ++ virtual unsigned int frame() const = 0; ++ ++ // Get the current file limit. ++ virtual unsigned int limit() const = 0; ++}; ++ ++ ++class DummyVideoSource : public VideoSource { ++ public: ++ DummyVideoSource() : img_(NULL), limit_(100), width_(0), height_(0) { ++ SetSize(80, 64); ++ } ++ ++ virtual ~DummyVideoSource() { vpx_img_free(img_); } ++ ++ virtual void Begin() { ++ frame_ = 0; ++ FillFrame(); ++ } ++ ++ virtual void Next() { ++ ++frame_; ++ FillFrame(); ++ } ++ ++ virtual vpx_image_t *img() const { ++ return (frame_ < limit_) ? img_ : NULL; ++ } ++ ++ // Models a stream where Timebase = 1/FPS, so pts == frame. ++ virtual vpx_codec_pts_t pts() const { return frame_; } ++ ++ virtual unsigned long duration() const { return 1; } ++ ++ virtual vpx_rational_t timebase() const { ++ const vpx_rational_t t = {1, 30}; ++ return t; ++ } ++ ++ virtual unsigned int frame() const { return frame_; } ++ ++ virtual unsigned int limit() const { return limit_; } ++ ++ void SetSize(unsigned int width, unsigned int height) { ++ if (width != width_ || height != height_) { ++ vpx_img_free(img_); ++ raw_sz_ = ((width + 31)&~31) * height * 3 / 2; ++ img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 32); ++ width_ = width; ++ height_ = height; ++ } ++ } ++ ++ protected: ++ virtual void FillFrame() { memset(img_->img_data, 0, raw_sz_); } ++ ++ vpx_image_t *img_; ++ size_t raw_sz_; ++ unsigned int limit_; ++ unsigned int frame_; ++ unsigned int width_; ++ unsigned int height_; ++}; ++ ++ ++class RandomVideoSource : public DummyVideoSource { ++ public: ++ RandomVideoSource(int seed = ACMRandom::DeterministicSeed()) ++ : rnd_(seed), ++ seed_(seed) { } ++ ++ protected: ++ // Reset the RNG to get a matching stream for the second pass ++ virtual void Begin() { ++ frame_ = 0; ++ rnd_.Reset(seed_); ++ FillFrame(); ++ } ++ ++ // 15 frames of noise, followed by 15 static frames. Reset to 0 rather ++ // than holding previous frames to encourage keyframes to be thrown. ++ virtual void FillFrame() { ++ if (frame_ % 30 < 15) ++ for (size_t i = 0; i < raw_sz_; ++i) ++ img_->img_data[i] = rnd_.Rand8(); ++ else ++ memset(img_->img_data, 0, raw_sz_); ++ } ++ ++ ACMRandom rnd_; ++ int seed_; ++}; ++ ++// Abstract base class for test video sources, which provide a stream of ++// decompressed images to the decoder. ++class CompressedVideoSource { ++ public: ++ virtual ~CompressedVideoSource() {} ++ ++ virtual void Init() = 0; ++ ++ // Prepare the stream for reading, rewind/open as necessary. ++ virtual void Begin() = 0; ++ ++ // Advance the cursor to the next frame ++ virtual void Next() = 0; ++ ++ virtual const uint8_t *cxdata() const = 0; ++ ++ virtual const unsigned int frame_size() const = 0; ++ ++ virtual const unsigned int frame_number() const = 0; ++}; ++ ++} // namespace libvpx_test ++ ++#endif // TEST_VIDEO_SOURCE_H_ +diff --git a/third_party/libyuv/source/scale.c b/third_party/libyuv/source/scale.c +index 930a7ae..c142a17 100644 +--- a/third_party/libyuv/source/scale.c ++++ b/third_party/libyuv/source/scale.c +@@ -60,7 +60,7 @@ void SetUseReferenceImpl(int use) { + + #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) + #define HAS_SCALEROWDOWN2_NEON +-void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */, ++void ScaleRowDown2_NEON(const uint8* src_ptr, int src_stride, + uint8* dst, int dst_width) { + asm volatile ( + "1: \n" +@@ -102,7 +102,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride, + } + + #define HAS_SCALEROWDOWN4_NEON +-static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */, ++static void ScaleRowDown4_NEON(const uint8* src_ptr, int src_stride, + uint8* dst_ptr, int dst_width) { + asm volatile ( + "1: \n" +@@ -160,7 +160,7 @@ static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride, + // Down scale from 4 to 3 pixels. Use the neon multilane read/write + // to load up the every 4th pixel into a 4 different registers. + // Point samples 32 pixels to 24 pixels. +-static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */, ++static void ScaleRowDown34_NEON(const uint8* src_ptr, int src_stride, + uint8* dst_ptr, int dst_width) { + asm volatile ( + "1: \n" +@@ -284,7 +284,7 @@ const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) = + 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 }; + + // 32 -> 12 +-static void ScaleRowDown38_NEON(const uint8* src_ptr, int, ++static void ScaleRowDown38_NEON(const uint8* src_ptr, int src_stride, + uint8* dst_ptr, int dst_width) { + asm volatile ( + "vld1.u8 {q3}, [%3] \n" +diff --git a/tools/ftfy.sh b/tools/ftfy.sh +index 95fd397..c5cfdea 100755 +--- a/tools/ftfy.sh ++++ b/tools/ftfy.sh +@@ -34,7 +34,7 @@ vpx_style() { + --align-pointer=name \ + --indent-preprocessor --convert-tabs --indent-labels \ + --suffix=none --quiet "$@" +- sed -i 's/[[:space:]]\{1,\},/,/g' "$@" ++ sed -i "" 's/[[:space:]]\{1,\},/,/g' "$@" + } + + +diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c +index d58e49c..8af9e90 100644 +--- a/vp8/common/alloccommon.c ++++ b/vp8/common/alloccommon.c +@@ -17,23 +17,6 @@ + #include "entropymode.h" + #include "systemdependent.h" + +- +-extern void vp8_init_scan_order_mask(); +- +-static void update_mode_info_border(MODE_INFO *mi, int rows, int cols) +-{ +- int i; +- vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1)); +- +- for (i = 0; i < rows; i++) +- { +- /* TODO(holmer): Bug? This updates the last element of each row +- * rather than the border element! +- */ +- vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO)); +- } +-} +- + void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) + { + int i; +@@ -45,16 +28,20 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) + vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer); + if (oci->post_proc_buffer_int_used) + vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int); ++ ++ vpx_free(oci->pp_limits_buffer); ++ oci->pp_limits_buffer = NULL; + #endif + + vpx_free(oci->above_context); + vpx_free(oci->mip); ++#if CONFIG_ERROR_CONCEALMENT + vpx_free(oci->prev_mip); ++ oci->prev_mip = NULL; ++#endif + +- oci->above_context = 0; +- oci->mip = 0; +- oci->prev_mip = 0; +- ++ oci->above_context = NULL; ++ oci->mip = NULL; + } + + int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) +@@ -76,10 +63,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) + oci->fb_idx_ref_cnt[i] = 0; + oci->yv12_fb[i].flags = 0; + if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0) +- { +- vp8_de_alloc_frame_buffers(oci); +- return 1; +- } ++ goto allocation_fail; + } + + oci->new_fb_idx = 0; +@@ -93,22 +77,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) + oci->fb_idx_ref_cnt[3] = 1; + + if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0) +- { +- vp8_de_alloc_frame_buffers(oci); +- return 1; +- } +- +-#if CONFIG_POSTPROC +- if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) +- { +- vp8_de_alloc_frame_buffers(oci); +- return 1; +- } +- +- oci->post_proc_buffer_int_used = 0; +- vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); +- vpx_memset((&oci->post_proc_buffer)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size); +-#endif ++ goto allocation_fail; + + oci->mb_rows = height >> 4; + oci->mb_cols = width >> 4; +@@ -117,44 +86,43 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) + oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); + + if (!oci->mip) +- { +- vp8_de_alloc_frame_buffers(oci); +- return 1; +- } ++ goto allocation_fail; + + oci->mi = oci->mip + oci->mode_info_stride + 1; + +- /* allocate memory for last frame MODE_INFO array */ +-#if CONFIG_ERROR_CONCEALMENT +- oci->prev_mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); +- +- if (!oci->prev_mip) +- { +- vp8_de_alloc_frame_buffers(oci); +- return 1; +- } +- +- oci->prev_mi = oci->prev_mip + oci->mode_info_stride + 1; +-#else +- oci->prev_mip = NULL; +- oci->prev_mi = NULL; +-#endif ++ /* Allocation of previous mode info will be done in vp8_decode_frame() ++ * as it is a decoder only data */ + + oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1); + + if (!oci->above_context) +- { +- vp8_de_alloc_frame_buffers(oci); +- return 1; +- } ++ goto allocation_fail; + +- update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols); +-#if CONFIG_ERROR_CONCEALMENT +- update_mode_info_border(oci->prev_mi, oci->mb_rows, oci->mb_cols); ++#if CONFIG_POSTPROC ++ if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) ++ goto allocation_fail; ++ ++ oci->post_proc_buffer_int_used = 0; ++ vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); ++ vpx_memset(oci->post_proc_buffer.buffer_alloc, 128, ++ oci->post_proc_buffer.frame_size); ++ ++ /* Allocate buffer to store post-processing filter coefficients. ++ * ++ * Note: Round up mb_cols to support SIMD reads ++ */ ++ oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1)); ++ if (!oci->pp_limits_buffer) ++ goto allocation_fail; + #endif + + return 0; ++ ++allocation_fail: ++ vp8_de_alloc_frame_buffers(oci); ++ return 1; + } ++ + void vp8_setup_version(VP8_COMMON *cm) + { + switch (cm->version) +diff --git a/vp8/common/arm/armv6/intra4x4_predict_v6.asm b/vp8/common/arm/armv6/intra4x4_predict_v6.asm +index a974cd1..c5ec824 100644 +--- a/vp8/common/arm/armv6/intra4x4_predict_v6.asm ++++ b/vp8/common/arm/armv6/intra4x4_predict_v6.asm +@@ -18,15 +18,23 @@ + AREA ||.text||, CODE, READONLY, ALIGN=2 + + +-;void vp8_intra4x4_predict(unsigned char *src, int src_stride, int b_mode, +-; unsigned char *dst, int dst_stride) +- ++;void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, ++; B_PREDICTION_MODE left_stride, int b_mode, ++; unsigned char *dst, int dst_stride, ++; unsigned char top_left) ++ ++; r0: *Above ++; r1: *yleft ++; r2: left_stride ++; r3: b_mode ++; sp + #40: dst ++; sp + #44: dst_stride ++; sp + #48: top_left + |vp8_intra4x4_predict_armv6| PROC + push {r4-r12, lr} + +- +- cmp r2, #10 +- addlt pc, pc, r2, lsl #2 ; position independent switch ++ cmp r3, #10 ++ addlt pc, pc, r3, lsl #2 ; position independent switch + pop {r4-r12, pc} ; default + b b_dc_pred + b b_tm_pred +@@ -41,13 +49,13 @@ + + b_dc_pred + ; load values +- ldr r8, [r0, -r1] ; Above +- ldrb r4, [r0, #-1]! ; Left[0] ++ ldr r8, [r0] ; Above ++ ldrb r4, [r1], r2 ; Left[0] + mov r9, #0 +- ldrb r5, [r0, r1] ; Left[1] +- ldrb r6, [r0, r1, lsl #1]! ; Left[2] ++ ldrb r5, [r1], r2 ; Left[1] ++ ldrb r6, [r1], r2 ; Left[2] + usad8 r12, r8, r9 +- ldrb r7, [r0, r1] ; Left[3] ++ ldrb r7, [r1] ; Left[3] + + ; calculate dc + add r4, r4, r5 +@@ -55,31 +63,30 @@ b_dc_pred + add r4, r4, r7 + add r4, r4, r12 + add r4, r4, #4 +- ldr r0, [sp, #40] ; load stride ++ ldr r0, [sp, #44] ; dst_stride + mov r12, r4, asr #3 ; (expected_dc + 4) >> 3 + + add r12, r12, r12, lsl #8 +- add r3, r3, r0 ++ ldr r3, [sp, #40] ; dst + add r12, r12, r12, lsl #16 + + ; store values +- str r12, [r3, -r0] ++ str r12, [r3], r0 ++ str r12, [r3], r0 ++ str r12, [r3], r0 + str r12, [r3] +- str r12, [r3, r0] +- str r12, [r3, r0, lsl #1] + + pop {r4-r12, pc} + + b_tm_pred +- sub r10, r0, #1 ; Left +- ldr r8, [r0, -r1] ; Above +- ldrb r9, [r10, -r1] ; top_left +- ldrb r4, [r0, #-1]! ; Left[0] +- ldrb r5, [r10, r1]! ; Left[1] +- ldrb r6, [r0, r1, lsl #1] ; Left[2] +- ldrb r7, [r10, r1, lsl #1] ; Left[3] +- ldr r0, [sp, #40] ; load stride +- ++ ldr r8, [r0] ; Above ++ ldrb r9, [sp, #48] ; top_left ++ ldrb r4, [r1], r2 ; Left[0] ++ ldrb r5, [r1], r2 ; Left[1] ++ ldrb r6, [r1], r2 ; Left[2] ++ ldrb r7, [r1] ; Left[3] ++ ldr r0, [sp, #44] ; dst_stride ++ ldr r3, [sp, #40] ; dst + + add r9, r9, r9, lsl #16 ; [tl|tl] + uxtb16 r10, r8 ; a[2|0] +@@ -126,25 +133,26 @@ b_tm_pred + str r12, [r3], r0 + + add r12, r4, r5, lsl #8 ; [3|2|1|0] +- str r12, [r3], r0 ++ str r12, [r3] + + pop {r4-r12, pc} + + b_ve_pred +- ldr r8, [r0, -r1]! ; a[3|2|1|0] ++ ldr r8, [r0] ; a[3|2|1|0] + ldr r11, c00FF00FF +- ldrb r9, [r0, #-1] ; top_left ++ ldrb r9, [sp, #48] ; top_left + ldrb r10, [r0, #4] ; a[4] + + ldr r0, c00020002 + + uxtb16 r4, r8 ; a[2|0] + uxtb16 r5, r8, ror #8 ; a[3|1] +- ldr r2, [sp, #40] ; stride ++ ldr r2, [sp, #44] ; dst_stride + pkhbt r9, r9, r5, lsl #16 ; a[1|-1] + + add r9, r9, r4, lsl #1 ;[a[1]+2*a[2] | tl+2*a[0] ] + uxtab16 r9, r9, r5 ;[a[1]+2*a[2]+a[3] | tl+2*a[0]+a[1] ] ++ ldr r3, [sp, #40] ; dst + uxtab16 r9, r9, r0 ;[a[1]+2*a[2]+a[3]+2| tl+2*a[0]+a[1]+2] + + add r0, r0, r10, lsl #16 ;[a[4]+2 | 2] +@@ -154,25 +162,23 @@ b_ve_pred + + and r9, r11, r9, asr #2 + and r4, r11, r4, asr #2 +- add r3, r3, r2 ; dst + dst_stride + add r9, r9, r4, lsl #8 + + ; store values +- str r9, [r3, -r2] ++ str r9, [r3], r2 ++ str r9, [r3], r2 ++ str r9, [r3], r2 + str r9, [r3] +- str r9, [r3, r2] +- str r9, [r3, r2, lsl #1] + + pop {r4-r12, pc} + + + b_he_pred +- sub r10, r0, #1 ; Left +- ldrb r4, [r0, #-1]! ; Left[0] +- ldrb r8, [r10, -r1] ; top_left +- ldrb r5, [r10, r1]! ; Left[1] +- ldrb r6, [r0, r1, lsl #1] ; Left[2] +- ldrb r7, [r10, r1, lsl #1] ; Left[3] ++ ldrb r4, [r1], r2 ; Left[0] ++ ldrb r8, [sp, #48] ; top_left ++ ldrb r5, [r1], r2 ; Left[1] ++ ldrb r6, [r1], r2 ; Left[2] ++ ldrb r7, [r1] ; Left[3] + + add r8, r8, r4 ; tl + l[0] + add r9, r4, r5 ; l[0] + l[1] +@@ -197,7 +203,8 @@ b_he_pred + pkhtb r10, r10, r10, asr #16 ; l[-|2|-|2] + pkhtb r11, r11, r11, asr #16 ; l[-|3|-|3] + +- ldr r0, [sp, #40] ; stride ++ ldr r0, [sp, #44] ; dst_stride ++ ldr r3, [sp, #40] ; dst + + add r8, r8, r8, lsl #8 ; l[0|0|0|0] + add r9, r9, r9, lsl #8 ; l[1|1|1|1] +@@ -206,16 +213,16 @@ b_he_pred + + ; store values + str r8, [r3], r0 +- str r9, [r3] +- str r10, [r3, r0] +- str r11, [r3, r0, lsl #1] ++ str r9, [r3], r0 ++ str r10, [r3], r0 ++ str r11, [r3] + + pop {r4-r12, pc} + + b_ld_pred +- ldr r4, [r0, -r1]! ; Above ++ ldr r4, [r0] ; Above[0-3] + ldr r12, c00020002 +- ldr r5, [r0, #4] ++ ldr r5, [r0, #4] ; Above[4-7] + ldr lr, c00FF00FF + + uxtb16 r6, r4 ; a[2|0] +@@ -225,7 +232,6 @@ b_ld_pred + pkhtb r10, r6, r8 ; a[2|4] + pkhtb r11, r7, r9 ; a[3|5] + +- + add r4, r6, r7, lsl #1 ; [a2+2*a3 | a0+2*a1] + add r4, r4, r10, ror #16 ; [a2+2*a3+a4 | a0+2*a1+a2] + uxtab16 r4, r4, r12 ; [a2+2*a3+a4+2 | a0+2*a1+a2+2] +@@ -244,7 +250,8 @@ b_ld_pred + add r7, r7, r9, asr #16 ; [ a5+2*a6+a7] + uxtah r7, r7, r12 ; [ a5+2*a6+a7+2] + +- ldr r0, [sp, #40] ; stride ++ ldr r0, [sp, #44] ; dst_stride ++ ldr r3, [sp, #40] ; dst + + ; scale down + and r4, lr, r4, asr #2 +@@ -266,18 +273,17 @@ b_ld_pred + mov r6, r6, lsr #16 + mov r11, r10, lsr #8 + add r11, r11, r6, lsl #24 ; [6|5|4|3] +- str r11, [r3], r0 ++ str r11, [r3] + + pop {r4-r12, pc} + + b_rd_pred +- sub r12, r0, r1 ; Above = src - src_stride +- ldrb r7, [r0, #-1]! ; l[0] = pp[3] +- ldr lr, [r12] ; Above = pp[8|7|6|5] +- ldrb r8, [r12, #-1]! ; tl = pp[4] +- ldrb r6, [r12, r1, lsl #1] ; l[1] = pp[2] +- ldrb r5, [r0, r1, lsl #1] ; l[2] = pp[1] +- ldrb r4, [r12, r1, lsl #2] ; l[3] = pp[0] ++ ldrb r7, [r1], r2 ; l[0] = pp[3] ++ ldr lr, [r0] ; Above = pp[8|7|6|5] ++ ldrb r8, [sp, #48] ; tl = pp[4] ++ ldrb r6, [r1], r2 ; l[1] = pp[2] ++ ldrb r5, [r1], r2 ; l[2] = pp[1] ++ ldrb r4, [r1], r2 ; l[3] = pp[0] + + + uxtb16 r9, lr ; p[7|5] +@@ -307,7 +313,8 @@ b_rd_pred + add r7, r7, r10 ; [p6+2*p7+p8 | p4+2*p5+p6] + uxtab16 r7, r7, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2] + +- ldr r0, [sp, #40] ; stride ++ ldr r0, [sp, #44] ; dst_stride ++ ldr r3, [sp, #40] ; dst + + ; scale down + and r7, lr, r7, asr #2 +@@ -328,18 +335,17 @@ b_rd_pred + + mov r11, r10, lsl #8 ; [3|2|1|-] + uxtab r11, r11, r4 ; [3|2|1|0] +- str r11, [r3], r0 ++ str r11, [r3] + + pop {r4-r12, pc} + + b_vr_pred +- sub r12, r0, r1 ; Above = src - src_stride +- ldrb r7, [r0, #-1]! ; l[0] = pp[3] +- ldr lr, [r12] ; Above = pp[8|7|6|5] +- ldrb r8, [r12, #-1]! ; tl = pp[4] +- ldrb r6, [r12, r1, lsl #1] ; l[1] = pp[2] +- ldrb r5, [r0, r1, lsl #1] ; l[2] = pp[1] +- ldrb r4, [r12, r1, lsl #2] ; l[3] = pp[0] ++ ldrb r7, [r1], r2 ; l[0] = pp[3] ++ ldr lr, [r0] ; Above = pp[8|7|6|5] ++ ldrb r8, [sp, #48] ; tl = pp[4] ++ ldrb r6, [r1], r2 ; l[1] = pp[2] ++ ldrb r5, [r1], r2 ; l[2] = pp[1] ++ ldrb r4, [r1] ; l[3] = pp[0] + + add r5, r5, r7, lsl #16 ; p[3|1] + add r6, r6, r8, lsl #16 ; p[4|2] +@@ -376,7 +382,8 @@ b_vr_pred + add r8, r8, r10 ; [p6+2*p7+p8 | p4+2*p5+p6] + uxtab16 r8, r8, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2] + +- ldr r0, [sp, #40] ; stride ++ ldr r0, [sp, #44] ; dst_stride ++ ldr r3, [sp, #40] ; dst + + ; scale down + and r5, lr, r5, asr #2 ; [B|A] +@@ -397,14 +404,14 @@ b_vr_pred + pkhtb r10, r7, r5, asr #16 ; [-|H|-|B] + str r2, [r3], r0 + add r12, r12, r10, lsl #8 ; [H|D|B|A] +- str r12, [r3], r0 ++ str r12, [r3] + + pop {r4-r12, pc} + + b_vl_pred +- ldr r4, [r0, -r1]! ; [3|2|1|0] ++ ldr r4, [r0] ; [3|2|1|0] = Above[0-3] + ldr r12, c00020002 +- ldr r5, [r0, #4] ; [7|6|5|4] ++ ldr r5, [r0, #4] ; [7|6|5|4] = Above[4-7] + ldr lr, c00FF00FF + ldr r2, c00010001 + +@@ -441,7 +448,8 @@ b_vl_pred + add r9, r9, r11 ; [p5+2*p6+p7 | p3+2*p4+p5] + uxtab16 r9, r9, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2] + +- ldr r0, [sp, #40] ; stride ++ ldr r0, [sp, #44] ; dst_stride ++ ldr r3, [sp, #40] ; dst + + ; scale down + and r5, lr, r5, asr #2 ; [D|C] +@@ -449,7 +457,6 @@ b_vl_pred + and r8, lr, r8, asr #2 ; [I|D] + and r9, lr, r9, asr #2 ; [J|H] + +- + add r10, r4, r6, lsl #8 ; [F|B|E|A] + str r10, [r3], r0 + +@@ -463,18 +470,17 @@ b_vl_pred + str r12, [r3], r0 + + add r10, r7, r10, lsl #8 ; [J|H|D|G] +- str r10, [r3], r0 ++ str r10, [r3] + + pop {r4-r12, pc} + + b_hd_pred +- sub r12, r0, r1 ; Above = src - src_stride +- ldrb r7, [r0, #-1]! ; l[0] = pp[3] +- ldr lr, [r12] ; Above = pp[8|7|6|5] +- ldrb r8, [r12, #-1]! ; tl = pp[4] +- ldrb r6, [r0, r1] ; l[1] = pp[2] +- ldrb r5, [r0, r1, lsl #1] ; l[2] = pp[1] +- ldrb r4, [r12, r1, lsl #2] ; l[3] = pp[0] ++ ldrb r7, [r1], r2 ; l[0] = pp[3] ++ ldr lr, [r0] ; Above = pp[8|7|6|5] ++ ldrb r8, [sp, #48] ; tl = pp[4] ++ ldrb r6, [r1], r2 ; l[1] = pp[2] ++ ldrb r5, [r1], r2 ; l[2] = pp[1] ++ ldrb r4, [r1] ; l[3] = pp[0] + + uxtb16 r9, lr ; p[7|5] + uxtb16 r10, lr, ror #8 ; p[8|6] +@@ -492,7 +498,6 @@ b_hd_pred + pkhtb r1, r9, r10 ; p[7|6] + pkhbt r10, r8, r10, lsl #16 ; p[6|5] + +- + uadd16 r11, r4, r5 ; [p1+p2 | p0+p1] + uhadd16 r11, r11, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1] + ; [B|A] +@@ -518,7 +523,8 @@ b_hd_pred + and r5, lr, r5, asr #2 ; [H|G] + and r6, lr, r6, asr #2 ; [J|I] + +- ldr lr, [sp, #40] ; stride ++ ldr lr, [sp, #44] ; dst_stride ++ ldr r3, [sp, #40] ; dst + + pkhtb r2, r0, r6 ; [-|F|-|I] + pkhtb r12, r6, r5, asr #16 ; [-|J|-|H] +@@ -527,7 +533,6 @@ b_hd_pred + mov r12, r12, ror #24 ; [J|I|H|F] + str r12, [r3], lr + +- + mov r7, r11, asr #16 ; [-|-|-|B] + str r2, [r3], lr + add r7, r7, r0, lsl #16 ; [-|E|-|B] +@@ -536,21 +541,20 @@ b_hd_pred + str r7, [r3], lr + + add r5, r11, r4, lsl #8 ; [D|B|C|A] +- str r5, [r3], lr ++ str r5, [r3] + + pop {r4-r12, pc} + + + + b_hu_pred +- ldrb r4, [r0, #-1]! ; Left[0] ++ ldrb r4, [r1], r2 ; Left[0] + ldr r12, c00020002 +- ldrb r5, [r0, r1]! ; Left[1] ++ ldrb r5, [r1], r2 ; Left[1] + ldr lr, c00FF00FF +- ldrb r6, [r0, r1]! ; Left[2] ++ ldrb r6, [r1], r2 ; Left[2] + ldr r2, c00010001 +- ldrb r7, [r0, r1] ; Left[3] +- ++ ldrb r7, [r1] ; Left[3] + + add r4, r4, r5, lsl #16 ; [1|0] + add r5, r5, r6, lsl #16 ; [2|1] +@@ -563,7 +567,8 @@ b_hu_pred + add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1] + add r4, r4, r9 ; [p1+2*p2+p3 | p0+2*p1+p2] + uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2] +- ldr r2, [sp, #40] ; stride ++ ldr r2, [sp, #44] ; dst_stride ++ ldr r3, [sp, #40] ; dst + and r4, lr, r4, asr #2 ; [D|C] + + add r10, r6, r7 ; [p2+p3] +@@ -587,9 +592,9 @@ b_hu_pred + + add r10, r11, lsl #8 ; [-|-|F|E] + add r10, r10, r9, lsl #16 ; [G|G|F|E] +- str r10, [r3] ++ str r10, [r3], r2 + +- str r7, [r3, r2] ++ str r7, [r3] + + pop {r4-r12, pc} + +diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm +index 65a4680..79ff02c 100644 +--- a/vp8/common/arm/neon/dc_only_idct_add_neon.asm ++++ b/vp8/common/arm/neon/dc_only_idct_add_neon.asm +@@ -46,7 +46,7 @@ + vst1.32 {d2[1]}, [r3], r12 + vst1.32 {d4[0]}, [r3], r12 + vst1.32 {d4[1]}, [r3] +- ++ + bx lr + + ENDP +diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h +index a4c1d92..f7ff577 100644 +--- a/vp8/common/blockd.h ++++ b/vp8/common/blockd.h +@@ -161,22 +161,32 @@ typedef struct + uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */ + } MB_MODE_INFO; + +-typedef struct ++typedef struct modeinfo + { + MB_MODE_INFO mbmi; + union b_mode_info bmi[16]; + } MODE_INFO; + + #if CONFIG_MULTI_RES_ENCODING +-/* The information needed to be stored for higher-resolution encoder */ ++/* The mb-level information needed to be stored for higher-resolution encoder */ + typedef struct + { + MB_PREDICTION_MODE mode; + MV_REFERENCE_FRAME ref_frame; + int_mv mv; +- //union b_mode_info bmi[16]; +- int dissim; // dissimilarity level of the macroblock +-} LOWER_RES_INFO; ++ int dissim; /* dissimilarity level of the macroblock */ ++} LOWER_RES_MB_INFO; ++ ++/* The frame-level information needed to be stored for higher-resolution ++ * encoder */ ++typedef struct ++{ ++ FRAME_TYPE frame_type; ++ int is_frame_dropped; ++ /* The frame number of each reference frames */ ++ unsigned int low_res_ref_frames[MAX_REF_FRAMES]; ++ LOWER_RES_MB_INFO *mb_info; ++} LOWER_RES_FRAME_INFO; + #endif + + typedef struct blockd +@@ -216,12 +226,6 @@ typedef struct macroblockd + MODE_INFO *mode_info_context; + int mode_info_stride; + +-#if CONFIG_TEMPORAL_DENOISING +- MB_PREDICTION_MODE best_sse_inter_mode; +- int_mv best_sse_mv; +- unsigned char need_to_clamp_best_mvs; +-#endif +- + FRAME_TYPE frame_type; + + int up_available; +diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c +index a95a923..8c046a4 100644 +--- a/vp8/common/entropy.c ++++ b/vp8/common/entropy.c +@@ -101,7 +101,7 @@ const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */ + /* vp8_coef_encodings generated with: + vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree); + */ +-const vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = ++vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = + { + {2, 2}, + {6, 3}, +diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c +index de7e828..091e4c7 100644 +--- a/vp8/common/entropymode.c ++++ b/vp8/common/entropymode.c +@@ -160,9 +160,7 @@ const vp8_tree_index vp8_small_mvtree [14] = + void vp8_init_mbmode_probs(VP8_COMMON *x) + { + vpx_memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob)); +- vpx_memcpy(x->kf_ymode_prob, vp8_kf_ymode_prob, sizeof(vp8_kf_ymode_prob)); + vpx_memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob)); +- vpx_memcpy(x->kf_uv_mode_prob, vp8_kf_uv_mode_prob, sizeof(vp8_kf_uv_mode_prob)); + vpx_memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob)); + } + +@@ -171,7 +169,3 @@ void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1]) + vpx_memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob)); + } + +-void vp8_kf_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]) +-{ +- vpx_memcpy(p, vp8_kf_bmode_prob, sizeof(vp8_kf_bmode_prob)); +-} +diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h +index 70200cb..1df0f64 100644 +--- a/vp8/common/entropymode.h ++++ b/vp8/common/entropymode.h +@@ -24,11 +24,11 @@ typedef enum + SUBMVREF_LEFT_ABOVE_ZED + } sumvfref_t; + +-typedef const int vp8_mbsplit[16]; ++typedef int vp8_mbsplit[16]; + + #define VP8_NUMMBSPLITS 4 + +-extern vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS]; ++extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS]; + + extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */ + +@@ -67,9 +67,14 @@ extern const vp8_tree_index vp8_small_mvtree[]; + + extern const struct vp8_token_struct vp8_small_mvencodings[8]; + +-void vp8_init_mbmode_probs(VP8_COMMON *x); ++/* Key frame default mode probs */ ++extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES] ++[VP8_BINTRAMODES-1]; ++extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1]; ++extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1]; + +-void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]); ++void vp8_init_mbmode_probs(VP8_COMMON *x); ++void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]); + void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]); + + #endif +diff --git a/vp8/common/extend.c b/vp8/common/extend.c +index 9089e16..c9bdd21 100644 +--- a/vp8/common/extend.c ++++ b/vp8/common/extend.c +@@ -116,7 +116,7 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, + int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1); + int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); + +- // If the side is not touching the bounder then don't extend. ++ /* If the side is not touching the bounder then don't extend. */ + if (srcy) + et = 0; + if (srcx) +@@ -157,7 +157,10 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, + + + /* note the extension is only for the last row, for intra prediction purpose */ +-void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr) ++void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, ++ unsigned char *YPtr, ++ unsigned char *UPtr, ++ unsigned char *VPtr) + { + int i; + +diff --git a/vp8/common/filter.h b/vp8/common/filter.h +index 0f225c2..b7591f2 100644 +--- a/vp8/common/filter.h ++++ b/vp8/common/filter.h +@@ -19,4 +19,4 @@ + extern const short vp8_bilinear_filters[8][2]; + extern const short vp8_sub_pel_filters[8][6]; + +-#endif //FILTER_H ++#endif +diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c +index 2a30166..5a6ac7b 100644 +--- a/vp8/common/generic/systemdependent.c ++++ b/vp8/common/generic/systemdependent.c +@@ -83,57 +83,6 @@ static int get_cpu_count() + #endif + + +-#if HAVE_PTHREAD_H +-#include +-static void once(void (*func)(void)) +-{ +- static pthread_once_t lock = PTHREAD_ONCE_INIT; +- pthread_once(&lock, func); +-} +- +- +-#elif defined(_WIN32) +-static void once(void (*func)(void)) +-{ +- /* Using a static initializer here rather than InitializeCriticalSection() +- * since there's no race-free context in which to execute it. Protecting +- * it with an atomic op like InterlockedCompareExchangePointer introduces +- * an x86 dependency, and InitOnceExecuteOnce requires Vista. +- */ +- static CRITICAL_SECTION lock = {(void *)-1, -1, 0, 0, 0, 0}; +- static int done; +- +- EnterCriticalSection(&lock); +- +- if (!done) +- { +- func(); +- done = 1; +- } +- +- LeaveCriticalSection(&lock); +-} +- +- +-#else +-/* No-op version that performs no synchronization. vpx_rtcd() is idempotent, +- * so as long as your platform provides atomic loads/stores of pointers +- * no synchronization is strictly necessary. +- */ +- +-static void once(void (*func)(void)) +-{ +- static int done; +- +- if(!done) +- { +- func(); +- done = 1; +- } +-} +-#endif +- +- + void vp8_machine_specific_config(VP8_COMMON *ctx) + { + #if CONFIG_MULTITHREAD +@@ -145,6 +94,4 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) + #elif ARCH_X86 || ARCH_X86_64 + ctx->cpu_caps = x86_simd_caps(); + #endif +- +- once(vpx_rtcd); + } +diff --git a/vp8/common/idctllm_test.cc b/vp8/common/idctllm_test.cc +deleted file mode 100755 +index 0f6ebe7..0000000 +--- a/vp8/common/idctllm_test.cc ++++ /dev/null +@@ -1,31 +0,0 @@ +-/* +- * Copyright (c) 2010 The WebM project authors. All Rights Reserved. +- * +- * Use of this source code is governed by a BSD-style license +- * that can be found in the LICENSE file in the root of the source +- * tree. An additional intellectual property rights grant can be found +- * in the file PATENTS. All contributing project authors may +- * be found in the AUTHORS file in the root of the source tree. +- */ +- +- +- extern "C" { +- void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr, +- int pred_stride, unsigned char *dst_ptr, +- int dst_stride); +-} +- +-#include "vpx_config.h" +-#include "idctllm_test.h" +-namespace +-{ +- +-INSTANTIATE_TEST_CASE_P(C, IDCTTest, +- ::testing::Values(vp8_short_idct4x4llm_c)); +- +-} // namespace +- +-int main(int argc, char **argv) { +- ::testing::InitGoogleTest(&argc, argv); +- return RUN_ALL_TESTS(); +-} +diff --git a/vp8/common/idctllm_test.h b/vp8/common/idctllm_test.h +deleted file mode 100755 +index a6a694b..0000000 +--- a/vp8/common/idctllm_test.h ++++ /dev/null +@@ -1,113 +0,0 @@ +-/* +- * Copyright (c) 2010 The WebM project authors. All Rights Reserved. +- * +- * Use of this source code is governed by a BSD-style license +- * that can be found in the LICENSE file in the root of the source +- * tree. An additional intellectual property rights grant can be found +- * in the file PATENTS. All contributing project authors may +- * be found in the AUTHORS file in the root of the source tree. +- */ +- +- +- #include "third_party/googletest/src/include/gtest/gtest.h" +-typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr, +- int pred_stride, unsigned char *dst_ptr, +- int dst_stride); +-namespace { +-class IDCTTest : public ::testing::TestWithParam +-{ +- protected: +- virtual void SetUp() +- { +- int i; +- +- UUT = GetParam(); +- memset(input, 0, sizeof(input)); +- /* Set up guard blocks */ +- for(i=0; i<256; i++) +- output[i] = ((i&0xF)<4&&(i<64))?0:-1; +- } +- +- idct_fn_t UUT; +- short input[16]; +- unsigned char output[256]; +- unsigned char predict[256]; +-}; +- +-TEST_P(IDCTTest, TestGuardBlocks) +-{ +- int i; +- +- for(i=0; i<256; i++) +- if((i&0xF) < 4 && i<64) +- EXPECT_EQ(0, output[i]) << i; +- else +- EXPECT_EQ(255, output[i]); +-} +- +-TEST_P(IDCTTest, TestAllZeros) +-{ +- int i; +- +- UUT(input, output, 16, output, 16); +- +- for(i=0; i<256; i++) +- if((i&0xF) < 4 && i<64) +- EXPECT_EQ(0, output[i]) << "i==" << i; +- else +- EXPECT_EQ(255, output[i]) << "i==" << i; +-} +- +-TEST_P(IDCTTest, TestAllOnes) +-{ +- int i; +- +- input[0] = 4; +- UUT(input, output, 16, output, 16); +- +- for(i=0; i<256; i++) +- if((i&0xF) < 4 && i<64) +- EXPECT_EQ(1, output[i]) << "i==" << i; +- else +- EXPECT_EQ(255, output[i]) << "i==" << i; +-} +- +-TEST_P(IDCTTest, TestAddOne) +-{ +- int i; +- +- for(i=0; i<256; i++) +- predict[i] = i; +- +- input[0] = 4; +- UUT(input, predict, 16, output, 16); +- +- for(i=0; i<256; i++) +- if((i&0xF) < 4 && i<64) +- EXPECT_EQ(i+1, output[i]) << "i==" << i; +- else +- EXPECT_EQ(255, output[i]) << "i==" << i; +-} +- +-TEST_P(IDCTTest, TestWithData) +-{ +- int i; +- +- for(i=0; i<16; i++) +- input[i] = i; +- +- UUT(input, output, 16, output, 16); +- +- for(i=0; i<256; i++) +- if((i&0xF) > 3 || i>63) +- EXPECT_EQ(255, output[i]) << "i==" << i; +- else if(i == 0) +- EXPECT_EQ(11, output[i]) << "i==" << i; +- else if(i == 34) +- EXPECT_EQ(1, output[i]) << "i==" << i; +- else if(i == 2 || i == 17 || i == 32) +- EXPECT_EQ(3, output[i]) << "i==" << i; +- else +- EXPECT_EQ(0, output[i]) << "i==" << i; +-} +-} +diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c +index 3f05efe..41b4f12 100644 +--- a/vp8/common/loopfilter.c ++++ b/vp8/common/loopfilter.c +@@ -196,18 +196,122 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm, + } + } + +-void vp8_loop_filter_frame +-( +- VP8_COMMON *cm, +- MACROBLOCKD *mbd +-) ++ ++void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context, ++ int mb_row, int post_ystride, int post_uvstride, ++ unsigned char *y_ptr, unsigned char *u_ptr, ++ unsigned char *v_ptr) + { +- YV12_BUFFER_CONFIG *post = cm->frame_to_show; ++ int mb_col; ++ int filter_level; + loop_filter_info_n *lfi_n = &cm->lf_info; + loop_filter_info lfi; +- + FRAME_TYPE frame_type = cm->frame_type; + ++ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) ++ { ++ int skip_lf = (mode_info_context->mbmi.mode != B_PRED && ++ mode_info_context->mbmi.mode != SPLITMV && ++ mode_info_context->mbmi.mb_skip_coeff); ++ ++ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; ++ const int seg = mode_info_context->mbmi.segment_id; ++ const int ref_frame = mode_info_context->mbmi.ref_frame; ++ ++ filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; ++ ++ if (filter_level) ++ { ++ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; ++ lfi.mblim = lfi_n->mblim[filter_level]; ++ lfi.blim = lfi_n->blim[filter_level]; ++ lfi.lim = lfi_n->lim[filter_level]; ++ lfi.hev_thr = lfi_n->hev_thr[hev_index]; ++ ++ if (mb_col > 0) ++ vp8_loop_filter_mbv ++ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); ++ ++ if (!skip_lf) ++ vp8_loop_filter_bv ++ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); ++ ++ /* don't apply across umv border */ ++ if (mb_row > 0) ++ vp8_loop_filter_mbh ++ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); ++ ++ if (!skip_lf) ++ vp8_loop_filter_bh ++ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); ++ } ++ ++ y_ptr += 16; ++ u_ptr += 8; ++ v_ptr += 8; ++ ++ mode_info_context++; /* step to next MB */ ++ } ++ ++} ++ ++void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context, ++ int mb_row, int post_ystride, int post_uvstride, ++ unsigned char *y_ptr, unsigned char *u_ptr, ++ unsigned char *v_ptr) ++{ ++ int mb_col; ++ int filter_level; ++ loop_filter_info_n *lfi_n = &cm->lf_info; ++ ++ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) ++ { ++ int skip_lf = (mode_info_context->mbmi.mode != B_PRED && ++ mode_info_context->mbmi.mode != SPLITMV && ++ mode_info_context->mbmi.mb_skip_coeff); ++ ++ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; ++ const int seg = mode_info_context->mbmi.segment_id; ++ const int ref_frame = mode_info_context->mbmi.ref_frame; ++ ++ filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; ++ ++ if (filter_level) ++ { ++ if (mb_col > 0) ++ vp8_loop_filter_simple_mbv ++ (y_ptr, post_ystride, lfi_n->mblim[filter_level]); ++ ++ if (!skip_lf) ++ vp8_loop_filter_simple_bv ++ (y_ptr, post_ystride, lfi_n->blim[filter_level]); ++ ++ /* don't apply across umv border */ ++ if (mb_row > 0) ++ vp8_loop_filter_simple_mbh ++ (y_ptr, post_ystride, lfi_n->mblim[filter_level]); ++ ++ if (!skip_lf) ++ vp8_loop_filter_simple_bh ++ (y_ptr, post_ystride, lfi_n->blim[filter_level]); ++ } ++ ++ y_ptr += 16; ++ u_ptr += 8; ++ v_ptr += 8; ++ ++ mode_info_context++; /* step to next MB */ ++ } ++ ++} ++void vp8_loop_filter_frame(VP8_COMMON *cm, ++ MACROBLOCKD *mbd, ++ int frame_type) ++{ ++ YV12_BUFFER_CONFIG *post = cm->frame_to_show; ++ loop_filter_info_n *lfi_n = &cm->lf_info; ++ loop_filter_info lfi; ++ + int mb_row; + int mb_col; + int mb_rows = cm->mb_rows; +diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h +index 0fa8375..b3af2d6 100644 +--- a/vp8/common/loopfilter.h ++++ b/vp8/common/loopfilter.h +@@ -69,6 +69,7 @@ typedef void loop_filter_uvfunction + /* assorted loopfilter functions which get used elsewhere */ + struct VP8Common; + struct macroblockd; ++struct modeinfo; + + void vp8_loop_filter_init(struct VP8Common *cm); + +@@ -76,7 +77,8 @@ void vp8_loop_filter_frame_init(struct VP8Common *cm, + struct macroblockd *mbd, + int default_filt_lvl); + +-void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd); ++void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd, ++ int frame_type); + + void vp8_loop_filter_partial_frame(struct VP8Common *cm, + struct macroblockd *mbd, +@@ -89,4 +91,15 @@ void vp8_loop_filter_frame_yonly(struct VP8Common *cm, + void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi, + int sharpness_lvl); + ++void vp8_loop_filter_row_normal(struct VP8Common *cm, ++ struct modeinfo *mode_info_context, ++ int mb_row, int post_ystride, int post_uvstride, ++ unsigned char *y_ptr, unsigned char *u_ptr, ++ unsigned char *v_ptr); ++ ++void vp8_loop_filter_row_simple(struct VP8Common *cm, ++ struct modeinfo *mode_info_context, ++ int mb_row, int post_ystride, int post_uvstride, ++ unsigned char *y_ptr, unsigned char *u_ptr, ++ unsigned char *v_ptr); + #endif +diff --git a/vp8/common/mfqe.c b/vp8/common/mfqe.c +index ca67e91..3dff150 100644 +--- a/vp8/common/mfqe.c ++++ b/vp8/common/mfqe.c +@@ -160,9 +160,9 @@ static void multiframe_quality_enhance_block + vsad = (vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse)); + vsad = (sse + 32)>>6; + #else +- sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, INT_MAX)+128)>>8; +- usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, INT_MAX)+32)>>6; +- vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, INT_MAX)+32)>>6; ++ sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8; ++ usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6; ++ vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6; + #endif + } + else /* if (blksize == 8) */ +@@ -177,16 +177,16 @@ static void multiframe_quality_enhance_block + vsad = (vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse)); + vsad = (sse + 8)>>4; + #else +- sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, INT_MAX)+32)>>6; +- usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, INT_MAX)+8)>>4; +- vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, INT_MAX)+8)>>4; ++ sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6; ++ usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4; ++ vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4; + #endif + } + + actrisk = (actd > act * 5); + +- /* thr = qdiff/8 + log2(act) + log4(qprev) */ +- thr = (qdiff >> 3); ++ /* thr = qdiff/16 + log2(act) + log4(qprev) */ ++ thr = (qdiff >> 4); + while (actd >>= 1) thr++; + while (qprev >>= 2) thr++; + +diff --git a/vp8/common/mips/dspr2/dequantize_dspr2.c b/vp8/common/mips/dspr2/dequantize_dspr2.c +new file mode 100644 +index 0000000..6823325 +--- /dev/null ++++ b/vp8/common/mips/dspr2/dequantize_dspr2.c +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++ ++#include "vpx_config.h" ++#include "vpx_rtcd.h" ++#include "vpx_mem/vpx_mem.h" ++ ++#if HAVE_DSPR2 ++void vp8_dequant_idct_add_dspr2(short *input, short *dq, ++ unsigned char *dest, int stride) ++{ ++ int i; ++ ++ for (i = 0; i < 16; i++) ++ { ++ input[i] = dq[i] * input[i]; ++ } ++ ++ vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride); ++ ++ vpx_memset(input, 0, 32); ++ ++} ++ ++#endif +diff --git a/vp8/common/mips/dspr2/filter_dspr2.c b/vp8/common/mips/dspr2/filter_dspr2.c +new file mode 100644 +index 0000000..71fdcd7 +--- /dev/null ++++ b/vp8/common/mips/dspr2/filter_dspr2.c +@@ -0,0 +1,2823 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++ ++#include ++#include "vpx_rtcd.h" ++#include "vpx_ports/mem.h" ++ ++#if HAVE_DSPR2 ++#define CROP_WIDTH 256 ++unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH]; ++ ++static const unsigned short sub_pel_filterss[8][3] = ++{ ++ { 0, 0, 0}, ++ { 0, 0x0601, 0x7b0c}, ++ { 0x0201, 0x0b08, 0x6c24}, ++ { 0, 0x0906, 0x5d32}, ++ { 0x0303, 0x1010, 0x4d4d}, ++ { 0, 0x0609, 0x325d}, ++ { 0x0102, 0x080b, 0x246c}, ++ { 0, 0x0106, 0x0c7b}, ++}; ++ ++ ++static const int sub_pel_filters_int[8][3] = ++{ ++ { 0, 0, 0}, ++ { 0x0000fffa, 0x007b000c, 0xffff0000}, ++ { 0x0002fff5, 0x006c0024, 0xfff80001}, ++ { 0x0000fff7, 0x005d0032, 0xfffa0000}, ++ { 0x0003fff0, 0x004d004d, 0xfff00003}, ++ { 0x0000fffa, 0x0032005d, 0xfff70000}, ++ { 0x0001fff8, 0x0024006c, 0xfff50002}, ++ { 0x0000ffff, 0x000c007b, 0xfffa0000}, ++}; ++ ++ ++static const int sub_pel_filters_inv[8][3] = ++{ ++ { 0, 0, 0}, ++ { 0xfffa0000, 0x000c007b, 0x0000ffff}, ++ { 0xfff50002, 0x0024006c, 0x0001fff8}, ++ { 0xfff70000, 0x0032005d, 0x0000fffa}, ++ { 0xfff00003, 0x004d004d, 0x0003fff0}, ++ { 0xfffa0000, 0x005d0032, 0x0000fff7}, ++ { 0xfff80001, 0x006c0024, 0x0002fff5}, ++ { 0xffff0000, 0x007b000c, 0x0000fffa}, ++}; ++ ++ ++static const int sub_pel_filters_int_tap_4[8][2] = ++{ ++ { 0, 0}, ++ { 0xfffa007b, 0x000cffff}, ++ { 0, 0}, ++ { 0xfff7005d, 0x0032fffa}, ++ { 0, 0}, ++ { 0xfffa0032, 0x005dfff7}, ++ { 0, 0}, ++ { 0xffff000c, 0x007bfffa}, ++}; ++ ++ ++static const int sub_pel_filters_inv_tap_4[8][2] = ++{ ++ { 0, 0}, ++ { 0x007bfffa, 0xffff000c}, ++ { 0, 0}, ++ { 0x005dfff7, 0xfffa0032}, ++ { 0, 0}, ++ { 0x0032fffa, 0xfff7005d}, ++ { 0, 0}, ++ { 0x000cffff, 0xfffa007b}, ++}; ++ ++inline void prefetch_load(unsigned char *src) ++{ ++ __asm__ __volatile__ ( ++ "pref 0, 0(%[src]) \n\t" ++ : ++ : [src] "r" (src) ++ ); ++} ++ ++ ++inline void prefetch_store(unsigned char *dst) ++{ ++ __asm__ __volatile__ ( ++ "pref 1, 0(%[dst]) \n\t" ++ : ++ : [dst] "r" (dst) ++ ); ++} ++ ++void dsputil_static_init(void) ++{ ++ int i; ++ ++ for (i = 0; i < 256; i++) ff_cropTbl[i + CROP_WIDTH] = i; ++ ++ for (i = 0; i < CROP_WIDTH; i++) ++ { ++ ff_cropTbl[i] = 0; ++ ff_cropTbl[i + CROP_WIDTH + 256] = 255; ++ } ++} ++ ++void vp8_filter_block2d_first_pass_4 ++( ++ unsigned char *RESTRICT src_ptr, ++ unsigned char *RESTRICT dst_ptr, ++ unsigned int src_pixels_per_line, ++ unsigned int output_height, ++ int xoffset, ++ int pitch ++) ++{ ++ unsigned int i; ++ int Temp1, Temp2, Temp3, Temp4; ++ ++ unsigned int vector4a = 64; ++ int vector1b, vector2b, vector3b; ++ unsigned int tp1, tp2, tn1, tn2; ++ unsigned int p1, p2, p3; ++ unsigned int n1, n2, n3; ++ unsigned char *cm = ff_cropTbl + CROP_WIDTH; ++ ++ vector3b = sub_pel_filters_inv[xoffset][2]; ++ ++ /* if (xoffset == 0) we don't need any filtering */ ++ if (vector3b == 0) ++ { ++ for (i = 0; i < output_height; i++) ++ { ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr + src_pixels_per_line); ++ dst_ptr[0] = src_ptr[0]; ++ dst_ptr[1] = src_ptr[1]; ++ dst_ptr[2] = src_ptr[2]; ++ dst_ptr[3] = src_ptr[3]; ++ ++ /* next row... */ ++ src_ptr += src_pixels_per_line; ++ dst_ptr += 4; ++ } ++ } ++ else ++ { ++ if (vector3b > 65536) ++ { ++ /* 6 tap filter */ ++ ++ vector1b = sub_pel_filters_inv[xoffset][0]; ++ vector2b = sub_pel_filters_inv[xoffset][1]; ++ ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr + src_pixels_per_line); ++ ++ for (i = output_height; i--;) ++ { ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "ulw %[tp1], -2(%[src_ptr]) \n\t" ++ "ulw %[tp2], 2(%[src_ptr]) \n\t" ++ ++ /* even 1. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[p1], %[tp1] \n\t" ++ "preceu.ph.qbl %[p2], %[tp1] \n\t" ++ "preceu.ph.qbr %[p3], %[tp2] \n\t" ++ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" ++ ++ /* even 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[p1], %[tp2] \n\t" ++ "balign %[tp2], %[tp1], 3 \n\t" ++ "extp %[Temp1], $ac3, 9 \n\t" ++ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" ++ ++ /* odd 1. pixel */ ++ "ulw %[tn2], 3(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[n1], %[tp2] \n\t" ++ "preceu.ph.qbl %[n2], %[tp2] \n\t" ++ "preceu.ph.qbr %[n3], %[tn2] \n\t" ++ "extp %[Temp3], $ac2, 9 \n\t" ++ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" ++ ++ /* even 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[n1], %[tn2] \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" ++ "extp %[Temp4], $ac2, 9 \n\t" ++ ++ /* clamp */ ++ "lbux %[tp1], %[Temp1](%[cm]) \n\t" ++ "lbux %[tn1], %[Temp2](%[cm]) \n\t" ++ "lbux %[tp2], %[Temp3](%[cm]) \n\t" ++ "lbux %[n2], %[Temp4](%[cm]) \n\t" ++ ++ /* store bytes */ ++ "sb %[tp1], 0(%[dst_ptr]) \n\t" ++ "sb %[tn1], 1(%[dst_ptr]) \n\t" ++ "sb %[tp2], 2(%[dst_ptr]) \n\t" ++ "sb %[n2], 3(%[dst_ptr]) \n\t" ++ ++ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1), ++ [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2), ++ [p3] "=&r" (p3), [n1] "=&r" (n1), [n2] "=&r" (n2), ++ [n3] "=&r" (n3), [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr), ++ [vector3b] "r" (vector3b), [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* Next row... */ ++ src_ptr += src_pixels_per_line; ++ dst_ptr += pitch; ++ } ++ } ++ else ++ { ++ /* 4 tap filter */ ++ ++ vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; ++ vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; ++ ++ for (i = output_height; i--;) ++ { ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "ulw %[tp1], -1(%[src_ptr]) \n\t" ++ "ulw %[tp2], 3(%[src_ptr]) \n\t" ++ ++ /* even 1. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[p1], %[tp1] \n\t" ++ "preceu.ph.qbl %[p2], %[tp1] \n\t" ++ "preceu.ph.qbr %[p3], %[tp2] \n\t" ++ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" ++ ++ /* even 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" ++ "extp %[Temp1], $ac3, 9 \n\t" ++ ++ /* odd 1. pixel */ ++ "srl %[tn1], %[tp2], 8 \n\t" ++ "balign %[tp2], %[tp1], 3 \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[n1], %[tp2] \n\t" ++ "preceu.ph.qbl %[n2], %[tp2] \n\t" ++ "preceu.ph.qbr %[n3], %[tn1] \n\t" ++ "extp %[Temp3], $ac2, 9 \n\t" ++ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" ++ ++ /* odd 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" ++ "extp %[Temp4], $ac2, 9 \n\t" ++ ++ /* clamp and store results */ ++ "lbux %[tp1], %[Temp1](%[cm]) \n\t" ++ "lbux %[tn1], %[Temp2](%[cm]) \n\t" ++ "lbux %[tp2], %[Temp3](%[cm]) \n\t" ++ "sb %[tp1], 0(%[dst_ptr]) \n\t" ++ "sb %[tn1], 1(%[dst_ptr]) \n\t" ++ "lbux %[n2], %[Temp4](%[cm]) \n\t" ++ "sb %[tp2], 2(%[dst_ptr]) \n\t" ++ "sb %[n2], 3(%[dst_ptr]) \n\t" ++ ++ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1), ++ [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), ++ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), ++ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr), ++ [src_ptr] "r" (src_ptr) ++ ); ++ /* Next row... */ ++ src_ptr += src_pixels_per_line; ++ dst_ptr += pitch; ++ } ++ } ++ } ++} ++ ++void vp8_filter_block2d_first_pass_8_all ++( ++ unsigned char *RESTRICT src_ptr, ++ unsigned char *RESTRICT dst_ptr, ++ unsigned int src_pixels_per_line, ++ unsigned int output_height, ++ int xoffset, ++ int pitch ++) ++{ ++ unsigned int i; ++ int Temp1, Temp2, Temp3, Temp4; ++ ++ unsigned int vector4a = 64; ++ unsigned int vector1b, vector2b, vector3b; ++ unsigned int tp1, tp2, tn1, tn2; ++ unsigned int p1, p2, p3, p4; ++ unsigned int n1, n2, n3, n4; ++ ++ unsigned char *cm = ff_cropTbl + CROP_WIDTH; ++ ++ /* if (xoffset == 0) we don't need any filtering */ ++ if (xoffset == 0) ++ { ++ for (i = 0; i < output_height; i++) ++ { ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr + src_pixels_per_line); ++ ++ dst_ptr[0] = src_ptr[0]; ++ dst_ptr[1] = src_ptr[1]; ++ dst_ptr[2] = src_ptr[2]; ++ dst_ptr[3] = src_ptr[3]; ++ dst_ptr[4] = src_ptr[4]; ++ dst_ptr[5] = src_ptr[5]; ++ dst_ptr[6] = src_ptr[6]; ++ dst_ptr[7] = src_ptr[7]; ++ ++ /* next row... */ ++ src_ptr += src_pixels_per_line; ++ dst_ptr += 8; ++ } ++ } ++ else ++ { ++ vector3b = sub_pel_filters_inv[xoffset][2]; ++ ++ if (vector3b > 65536) ++ { ++ /* 6 tap filter */ ++ ++ vector1b = sub_pel_filters_inv[xoffset][0]; ++ vector2b = sub_pel_filters_inv[xoffset][1]; ++ ++ for (i = output_height; i--;) ++ { ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr + src_pixels_per_line); ++ ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "ulw %[tp1], -2(%[src_ptr]) \n\t" ++ "ulw %[tp2], 2(%[src_ptr]) \n\t" ++ ++ /* even 1. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[p1], %[tp1] \n\t" ++ "preceu.ph.qbl %[p2], %[tp1] \n\t" ++ "preceu.ph.qbr %[p3], %[tp2] \n\t" ++ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" ++ ++ /* even 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[p1], %[tp2] \n\t" ++ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" ++ ++ "balign %[tp2], %[tp1], 3 \n\t" ++ "extp %[Temp1], $ac3, 9 \n\t" ++ "ulw %[tn2], 3(%[src_ptr]) \n\t" ++ ++ /* odd 1. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[n1], %[tp2] \n\t" ++ "preceu.ph.qbl %[n2], %[tp2] \n\t" ++ "preceu.ph.qbr %[n3], %[tn2] \n\t" ++ "extp %[Temp3], $ac2, 9 \n\t" ++ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" ++ ++ /* odd 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[n1], %[tn2] \n\t" ++ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" ++ "ulw %[tp1], 6(%[src_ptr]) \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[p2], %[tp1] \n\t" ++ "extp %[Temp4], $ac2, 9 \n\t" ++ ++ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2), ++ [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), ++ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), ++ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), ++ [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* clamp and store results */ ++ dst_ptr[0] = cm[Temp1]; ++ dst_ptr[1] = cm[Temp2]; ++ dst_ptr[2] = cm[Temp3]; ++ dst_ptr[3] = cm[Temp4]; ++ ++ /* next 4 pixels */ ++ __asm__ __volatile__ ( ++ /* even 3. pixel */ ++ "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[p1], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[p2], %[vector3b] \n\t" ++ ++ /* even 4. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[p4], %[tp1] \n\t" ++ "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" ++ ++ "ulw %[tn1], 7(%[src_ptr]) \n\t" ++ "extp %[Temp1], $ac3, 9 \n\t" ++ ++ /* odd 3. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[n2], %[tn1] \n\t" ++ "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[n1], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[n2], %[vector3b] \n\t" ++ "extp %[Temp3], $ac2, 9 \n\t" ++ ++ /* odd 4. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[n4], %[tn1] \n\t" ++ "dpa.w.ph $ac2, %[n1], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ "extp %[Temp4], $ac2, 9 \n\t" ++ ++ : [tn1] "=&r" (tn1), [n2] "=&r" (n2), ++ [p4] "=&r" (p4), [n4] "=&r" (n4), ++ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) ++ : [tp1] "r" (tp1), [vector1b] "r" (vector1b), [p2] "r" (p2), ++ [vector2b] "r" (vector2b), [n1] "r" (n1), [p1] "r" (p1), ++ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), ++ [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* clamp and store results */ ++ dst_ptr[4] = cm[Temp1]; ++ dst_ptr[5] = cm[Temp2]; ++ dst_ptr[6] = cm[Temp3]; ++ dst_ptr[7] = cm[Temp4]; ++ ++ src_ptr += src_pixels_per_line; ++ dst_ptr += pitch; ++ } ++ } ++ else ++ { ++ /* 4 tap filter */ ++ ++ vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; ++ vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; ++ ++ for (i = output_height; i--;) ++ { ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr + src_pixels_per_line); ++ ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "ulw %[tp1], -1(%[src_ptr]) \n\t" ++ ++ /* even 1. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[p1], %[tp1] \n\t" ++ "preceu.ph.qbl %[p2], %[tp1] \n\t" ++ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" ++ ++ "ulw %[tp2], 3(%[src_ptr]) \n\t" ++ ++ /* even 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbr %[p3], %[tp2] \n\t" ++ "preceu.ph.qbl %[p4], %[tp2] \n\t" ++ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" ++ "extp %[Temp1], $ac3, 9 \n\t" ++ ++ "balign %[tp2], %[tp1], 3 \n\t" ++ ++ /* odd 1. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[n1], %[tp2] \n\t" ++ "preceu.ph.qbl %[n2], %[tp2] \n\t" ++ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" ++ "extp %[Temp3], $ac2, 9 \n\t" ++ ++ "ulw %[tn2], 4(%[src_ptr]) \n\t" ++ ++ /* odd 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbr %[n3], %[tn2] \n\t" ++ "preceu.ph.qbl %[n4], %[tn2] \n\t" ++ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" ++ "ulw %[tp1], 7(%[src_ptr]) \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp4], $ac2, 9 \n\t" ++ ++ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), ++ [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2), ++ [p3] "=&r" (p3), [p4] "=&r" (p4), [n1] "=&r" (n1), ++ [n2] "=&r" (n2), [n3] "=&r" (n3), [n4] "=&r" (n4), ++ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* clamp and store results */ ++ dst_ptr[0] = cm[Temp1]; ++ dst_ptr[1] = cm[Temp2]; ++ dst_ptr[2] = cm[Temp3]; ++ dst_ptr[3] = cm[Temp4]; ++ ++ /* next 4 pixels */ ++ __asm__ __volatile__ ( ++ /* even 3. pixel */ ++ "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" ++ ++ /* even 4. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbr %[p2], %[tp1] \n\t" ++ "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" ++ "extp %[Temp1], $ac3, 9 \n\t" ++ ++ /* odd 3. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[n4], %[vector2b] \n\t" ++ "ulw %[tn1], 8(%[src_ptr]) \n\t" ++ "extp %[Temp3], $ac2, 9 \n\t" ++ ++ /* odd 4. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbr %[n2], %[tn1] \n\t" ++ "dpa.w.ph $ac2, %[n4], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ "extp %[Temp4], $ac2, 9 \n\t" ++ ++ : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2), ++ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) ++ : [tp1] "r" (tp1), [p3] "r" (p3), [p4] "r" (p4), ++ [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr), ++ [n3] "r" (n3), [n4] "r" (n4) ++ ); ++ ++ /* clamp and store results */ ++ dst_ptr[4] = cm[Temp1]; ++ dst_ptr[5] = cm[Temp2]; ++ dst_ptr[6] = cm[Temp3]; ++ dst_ptr[7] = cm[Temp4]; ++ ++ /* next row... */ ++ src_ptr += src_pixels_per_line; ++ dst_ptr += pitch; ++ } ++ } ++ } ++} ++ ++ ++void vp8_filter_block2d_first_pass16_6tap ++( ++ unsigned char *RESTRICT src_ptr, ++ unsigned char *RESTRICT dst_ptr, ++ unsigned int src_pixels_per_line, ++ unsigned int output_height, ++ int xoffset, ++ int pitch ++) ++{ ++ unsigned int i; ++ int Temp1, Temp2, Temp3, Temp4; ++ ++ unsigned int vector4a; ++ unsigned int vector1b, vector2b, vector3b; ++ unsigned int tp1, tp2, tn1, tn2; ++ unsigned int p1, p2, p3, p4; ++ unsigned int n1, n2, n3, n4; ++ unsigned char *cm = ff_cropTbl + CROP_WIDTH; ++ ++ vector1b = sub_pel_filters_inv[xoffset][0]; ++ vector2b = sub_pel_filters_inv[xoffset][1]; ++ vector3b = sub_pel_filters_inv[xoffset][2]; ++ vector4a = 64; ++ ++ for (i = output_height; i--;) ++ { ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr + src_pixels_per_line); ++ ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "ulw %[tp1], -2(%[src_ptr]) \n\t" ++ "ulw %[tp2], 2(%[src_ptr]) \n\t" ++ ++ /* even 1. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[p1], %[tp1] \n\t" ++ "preceu.ph.qbl %[p2], %[tp1] \n\t" ++ "preceu.ph.qbr %[p3], %[tp2] \n\t" ++ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" ++ ++ /* even 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[p1], %[tp2] \n\t" ++ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" ++ ++ "balign %[tp2], %[tp1], 3 \n\t" ++ "ulw %[tn2], 3(%[src_ptr]) \n\t" ++ "extp %[Temp1], $ac3, 9 \n\t" ++ ++ /* odd 1. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[n1], %[tp2] \n\t" ++ "preceu.ph.qbl %[n2], %[tp2] \n\t" ++ "preceu.ph.qbr %[n3], %[tn2] \n\t" ++ "extp %[Temp3], $ac2, 9 \n\t" ++ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" ++ ++ /* odd 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[n1], %[tn2] \n\t" ++ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" ++ "ulw %[tp1], 6(%[src_ptr]) \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[p2], %[tp1] \n\t" ++ "extp %[Temp4], $ac2, 9 \n\t" ++ ++ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2), ++ [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), ++ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), ++ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), ++ [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* clamp and store results */ ++ dst_ptr[0] = cm[Temp1]; ++ dst_ptr[1] = cm[Temp2]; ++ dst_ptr[2] = cm[Temp3]; ++ dst_ptr[3] = cm[Temp4]; ++ ++ /* next 4 pixels */ ++ __asm__ __volatile__ ( ++ /* even 3. pixel */ ++ "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[p1], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[p2], %[vector3b] \n\t" ++ ++ /* even 4. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[p4], %[tp1] \n\t" ++ "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" ++ "ulw %[tn1], 7(%[src_ptr]) \n\t" ++ "extp %[Temp1], $ac3, 9 \n\t" ++ ++ /* odd 3. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[n2], %[tn1] \n\t" ++ "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[n1], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[n2], %[vector3b] \n\t" ++ "extp %[Temp3], $ac2, 9 \n\t" ++ ++ /* odd 4. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[n4], %[tn1] \n\t" ++ "dpa.w.ph $ac2, %[n1], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" ++ "ulw %[tp2], 10(%[src_ptr]) \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[p1], %[tp2] \n\t" ++ "extp %[Temp4], $ac2, 9 \n\t" ++ ++ : [tn1] "=&r" (tn1), [tp2] "=&r" (tp2), [n2] "=&r" (n2), ++ [p4] "=&r" (p4), [n4] "=&r" (n4), ++ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [tp1] "r" (tp1), [n1] "r" (n1), [p1] "r" (p1), ++ [vector4a] "r" (vector4a), [p2] "r" (p2), [vector3b] "r" (vector3b), ++ [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* clamp and store results */ ++ dst_ptr[4] = cm[Temp1]; ++ dst_ptr[5] = cm[Temp2]; ++ dst_ptr[6] = cm[Temp3]; ++ dst_ptr[7] = cm[Temp4]; ++ ++ /* next 4 pixels */ ++ __asm__ __volatile__ ( ++ /* even 5. pixel */ ++ "dpa.w.ph $ac3, %[p2], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[p1], %[vector3b] \n\t" ++ ++ /* even 6. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[p3], %[tp2] \n\t" ++ "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[p3], %[vector3b] \n\t" ++ ++ "ulw %[tn1], 11(%[src_ptr]) \n\t" ++ "extp %[Temp1], $ac3, 9 \n\t" ++ ++ /* odd 5. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[n1], %[tn1] \n\t" ++ "dpa.w.ph $ac3, %[n2], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[n4], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" ++ "extp %[Temp3], $ac2, 9 \n\t" ++ ++ /* odd 6. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[n3], %[tn1] \n\t" ++ "dpa.w.ph $ac2, %[n4], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[n1], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[n3], %[vector3b] \n\t" ++ "ulw %[tp1], 14(%[src_ptr]) \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[p4], %[tp1] \n\t" ++ "extp %[Temp4], $ac2, 9 \n\t" ++ ++ : [tn1] "=&r" (tn1), [tp1] "=&r" (tp1), ++ [n1] "=&r" (n1), [p3] "=&r" (p3), [n3] "=&r" (n3), ++ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [tp2] "r" (tp2), [p2] "r" (p2), [n2] "r" (n2), ++ [p4] "r" (p4), [n4] "r" (n4), [p1] "r" (p1), [src_ptr] "r" (src_ptr), ++ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b) ++ ); ++ ++ /* clamp and store results */ ++ dst_ptr[8] = cm[Temp1]; ++ dst_ptr[9] = cm[Temp2]; ++ dst_ptr[10] = cm[Temp3]; ++ dst_ptr[11] = cm[Temp4]; ++ ++ /* next 4 pixels */ ++ __asm__ __volatile__ ( ++ /* even 7. pixel */ ++ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[p3], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[p4], %[vector3b] \n\t" ++ ++ /* even 8. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[p2], %[tp1] \n\t" ++ "dpa.w.ph $ac2, %[p3], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[p4], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[p2], %[vector3b] \n\t" ++ "ulw %[tn1], 15(%[src_ptr]) \n\t" ++ "extp %[Temp1], $ac3, 9 \n\t" ++ ++ /* odd 7. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "preceu.ph.qbr %[n4], %[tn1] \n\t" ++ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[n3], %[vector2b] \n\t" ++ "dpa.w.ph $ac3, %[n4], %[vector3b] \n\t" ++ "extp %[Temp3], $ac2, 9 \n\t" ++ ++ /* odd 8. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "preceu.ph.qbl %[n2], %[tn1] \n\t" ++ "dpa.w.ph $ac2, %[n3], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[n4], %[vector2b] \n\t" ++ "dpa.w.ph $ac2, %[n2], %[vector3b] \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ "extp %[Temp4], $ac2, 9 \n\t" ++ ++ /* clamp and store results */ ++ "lbux %[tp1], %[Temp1](%[cm]) \n\t" ++ "lbux %[tn1], %[Temp2](%[cm]) \n\t" ++ "lbux %[p2], %[Temp3](%[cm]) \n\t" ++ "sb %[tp1], 12(%[dst_ptr]) \n\t" ++ "sb %[tn1], 13(%[dst_ptr]) \n\t" ++ "lbux %[n2], %[Temp4](%[cm]) \n\t" ++ "sb %[p2], 14(%[dst_ptr]) \n\t" ++ "sb %[n2], 15(%[dst_ptr]) \n\t" ++ ++ : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2), [n4] "=&r" (n4), ++ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [tp1] "r" (tp1), [p4] "r" (p4), [n1] "r" (n1), [p1] "r" (p1), ++ [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), [p3] "r" (p3), ++ [n3] "r" (n3), [src_ptr] "r" (src_ptr), ++ [cm] "r" (cm), [dst_ptr] "r" (dst_ptr) ++ ); ++ ++ src_ptr += src_pixels_per_line; ++ dst_ptr += pitch; ++ } ++} ++ ++ ++void vp8_filter_block2d_first_pass16_0 ++( ++ unsigned char *RESTRICT src_ptr, ++ unsigned char *RESTRICT output_ptr, ++ unsigned int src_pixels_per_line ++) ++{ ++ int Temp1, Temp2, Temp3, Temp4; ++ int i; ++ ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_store(output_ptr + 32); ++ ++ /* copy memory from src buffer to dst buffer */ ++ for (i = 0; i < 7; i++) ++ { ++ __asm__ __volatile__ ( ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "ulw %[Temp3], 8(%[src_ptr]) \n\t" ++ "ulw %[Temp4], 12(%[src_ptr]) \n\t" ++ "sw %[Temp1], 0(%[output_ptr]) \n\t" ++ "sw %[Temp2], 4(%[output_ptr]) \n\t" ++ "sw %[Temp3], 8(%[output_ptr]) \n\t" ++ "sw %[Temp4], 12(%[output_ptr]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), ++ [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr) ++ : [src_pixels_per_line] "r" (src_pixels_per_line), ++ [output_ptr] "r" (output_ptr) ++ ); ++ ++ __asm__ __volatile__ ( ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "ulw %[Temp3], 8(%[src_ptr]) \n\t" ++ "ulw %[Temp4], 12(%[src_ptr]) \n\t" ++ "sw %[Temp1], 16(%[output_ptr]) \n\t" ++ "sw %[Temp2], 20(%[output_ptr]) \n\t" ++ "sw %[Temp3], 24(%[output_ptr]) \n\t" ++ "sw %[Temp4], 28(%[output_ptr]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), ++ [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr) ++ : [src_pixels_per_line] "r" (src_pixels_per_line), ++ [output_ptr] "r" (output_ptr) ++ ); ++ ++ __asm__ __volatile__ ( ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "ulw %[Temp3], 8(%[src_ptr]) \n\t" ++ "ulw %[Temp4], 12(%[src_ptr]) \n\t" ++ "sw %[Temp1], 32(%[output_ptr]) \n\t" ++ "sw %[Temp2], 36(%[output_ptr]) \n\t" ++ "sw %[Temp3], 40(%[output_ptr]) \n\t" ++ "sw %[Temp4], 44(%[output_ptr]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), ++ [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr) ++ : [src_pixels_per_line] "r" (src_pixels_per_line), ++ [output_ptr] "r" (output_ptr) ++ ); ++ ++ output_ptr += 48; ++ } ++} ++ ++ ++void vp8_filter_block2d_first_pass16_4tap ++( ++ unsigned char *RESTRICT src_ptr, ++ unsigned char *RESTRICT output_ptr, ++ unsigned int src_pixels_per_line, ++ unsigned int output_width, ++ unsigned int output_height, ++ int xoffset, ++ int yoffset, ++ unsigned char *RESTRICT dst_ptr, ++ int pitch ++) ++{ ++ unsigned int i, j; ++ int Temp1, Temp2, Temp3, Temp4; ++ ++ unsigned int vector4a; ++ int vector1b, vector2b; ++ unsigned int tp1, tp2, tp3, tn1; ++ unsigned int p1, p2, p3; ++ unsigned int n1, n2, n3; ++ unsigned char *cm = ff_cropTbl + CROP_WIDTH; ++ ++ vector4a = 64; ++ ++ vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; ++ vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; ++ ++ /* if (yoffset == 0) don't need temp buffer, data will be stored in dst_ptr */ ++ if (yoffset == 0) ++ { ++ output_height -= 5; ++ src_ptr += (src_pixels_per_line + src_pixels_per_line); ++ ++ for (i = output_height; i--;) ++ { ++ __asm__ __volatile__ ( ++ "ulw %[tp3], -1(%[src_ptr]) \n\t" ++ : [tp3] "=&r" (tp3) ++ : [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* processing 4 adjacent pixels */ ++ for (j = 0; j < 16; j += 4) ++ { ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "ulw %[tp2], 3(%[src_ptr]) \n\t" ++ "move %[tp1], %[tp3] \n\t" ++ ++ /* even 1. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "mthi $0, $ac3 \n\t" ++ "move %[tp3], %[tp2] \n\t" ++ "preceu.ph.qbr %[p1], %[tp1] \n\t" ++ "preceu.ph.qbl %[p2], %[tp1] \n\t" ++ "preceu.ph.qbr %[p3], %[tp2] \n\t" ++ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" ++ ++ /* even 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "mthi $0, $ac2 \n\t" ++ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" ++ "extr.w %[Temp1], $ac3, 7 \n\t" ++ ++ /* odd 1. pixel */ ++ "ulw %[tn1], 4(%[src_ptr]) \n\t" ++ "balign %[tp2], %[tp1], 3 \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "mthi $0, $ac3 \n\t" ++ "preceu.ph.qbr %[n1], %[tp2] \n\t" ++ "preceu.ph.qbl %[n2], %[tp2] \n\t" ++ "preceu.ph.qbr %[n3], %[tn1] \n\t" ++ "extr.w %[Temp3], $ac2, 7 \n\t" ++ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" ++ ++ /* odd 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "mthi $0, $ac2 \n\t" ++ "extr.w %[Temp2], $ac3, 7 \n\t" ++ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" ++ "extr.w %[Temp4], $ac2, 7 \n\t" ++ ++ /* clamp and store results */ ++ "lbux %[tp1], %[Temp1](%[cm]) \n\t" ++ "lbux %[tn1], %[Temp2](%[cm]) \n\t" ++ "lbux %[tp2], %[Temp3](%[cm]) \n\t" ++ "sb %[tp1], 0(%[dst_ptr]) \n\t" ++ "sb %[tn1], 1(%[dst_ptr]) \n\t" ++ "lbux %[n2], %[Temp4](%[cm]) \n\t" ++ "sb %[tp2], 2(%[dst_ptr]) \n\t" ++ "sb %[n2], 3(%[dst_ptr]) \n\t" ++ ++ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tp3] "=&r" (tp3), ++ [tn1] "=&r" (tn1), [p1] "=&r" (p1), [p2] "=&r" (p2), ++ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), ++ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [p3] "=&r" (p3), ++ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr), ++ [src_ptr] "r" (src_ptr) ++ ); ++ ++ src_ptr += 4; ++ } ++ ++ /* Next row... */ ++ src_ptr += src_pixels_per_line - 16; ++ dst_ptr += pitch; ++ } ++ } ++ else ++ { ++ for (i = output_height; i--;) ++ { ++ /* processing 4 adjacent pixels */ ++ for (j = 0; j < 16; j += 4) ++ { ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "ulw %[tp1], -1(%[src_ptr]) \n\t" ++ "ulw %[tp2], 3(%[src_ptr]) \n\t" ++ ++ /* even 1. pixel */ ++ "mtlo %[vector4a], $ac3 \n\t" ++ "mthi $0, $ac3 \n\t" ++ "preceu.ph.qbr %[p1], %[tp1] \n\t" ++ "preceu.ph.qbl %[p2], %[tp1] \n\t" ++ "preceu.ph.qbr %[p3], %[tp2] \n\t" ++ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" ++ ++ /* even 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "mthi $0, $ac2 \n\t" ++ "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" ++ "extr.w %[Temp1], $ac3, 7 \n\t" ++ ++ /* odd 1. pixel */ ++ "ulw %[tn1], 4(%[src_ptr]) \n\t" ++ "balign %[tp2], %[tp1], 3 \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "mthi $0, $ac3 \n\t" ++ "preceu.ph.qbr %[n1], %[tp2] \n\t" ++ "preceu.ph.qbl %[n2], %[tp2] \n\t" ++ "preceu.ph.qbr %[n3], %[tn1] \n\t" ++ "extr.w %[Temp3], $ac2, 7 \n\t" ++ "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" ++ "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" ++ ++ /* odd 2. pixel */ ++ "mtlo %[vector4a], $ac2 \n\t" ++ "mthi $0, $ac2 \n\t" ++ "extr.w %[Temp2], $ac3, 7 \n\t" ++ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" ++ "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" ++ "extr.w %[Temp4], $ac2, 7 \n\t" ++ ++ /* clamp and store results */ ++ "lbux %[tp1], %[Temp1](%[cm]) \n\t" ++ "lbux %[tn1], %[Temp2](%[cm]) \n\t" ++ "lbux %[tp2], %[Temp3](%[cm]) \n\t" ++ "sb %[tp1], 0(%[output_ptr]) \n\t" ++ "sb %[tn1], 1(%[output_ptr]) \n\t" ++ "lbux %[n2], %[Temp4](%[cm]) \n\t" ++ "sb %[tp2], 2(%[output_ptr]) \n\t" ++ "sb %[n2], 3(%[output_ptr]) \n\t" ++ ++ : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1), ++ [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), ++ [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), ++ [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector4a] "r" (vector4a), [cm] "r" (cm), ++ [output_ptr] "r" (output_ptr), [src_ptr] "r" (src_ptr) ++ ); ++ ++ src_ptr += 4; ++ } ++ ++ /* next row... */ ++ src_ptr += src_pixels_per_line; ++ output_ptr += output_width; ++ } ++ } ++} ++ ++ ++void vp8_filter_block2d_second_pass4 ++( ++ unsigned char *RESTRICT src_ptr, ++ unsigned char *RESTRICT output_ptr, ++ int output_pitch, ++ int yoffset ++) ++{ ++ unsigned int i; ++ ++ int Temp1, Temp2, Temp3, Temp4; ++ unsigned int vector1b, vector2b, vector3b, vector4a; ++ ++ unsigned char src_ptr_l2; ++ unsigned char src_ptr_l1; ++ unsigned char src_ptr_0; ++ unsigned char src_ptr_r1; ++ unsigned char src_ptr_r2; ++ unsigned char src_ptr_r3; ++ ++ unsigned char *cm = ff_cropTbl + CROP_WIDTH; ++ ++ vector4a = 64; ++ ++ /* load filter coefficients */ ++ vector1b = sub_pel_filterss[yoffset][0]; ++ vector2b = sub_pel_filterss[yoffset][2]; ++ vector3b = sub_pel_filterss[yoffset][1]; ++ ++ if (vector1b) ++ { ++ /* 6 tap filter */ ++ ++ for (i = 2; i--;) ++ { ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr); ++ ++ /* do not allow compiler to reorder instructions */ ++ __asm__ __volatile__ ( ++ ".set noreorder \n\t" ++ : ++ : ++ ); ++ ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "lbu %[src_ptr_l2], -8(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 4(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 8(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 12(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac2 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -7(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 5(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 9(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 13(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp1], $ac2, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -6(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 6(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 10(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 14(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac0 \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -5(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 7(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 11(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 15(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac1 \n\t" ++ "extp %[Temp3], $ac0, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp4], $ac1, 9 \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), ++ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), ++ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), ++ [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), ++ [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* clamp and store results */ ++ output_ptr[0] = cm[Temp1]; ++ output_ptr[1] = cm[Temp2]; ++ output_ptr[2] = cm[Temp3]; ++ output_ptr[3] = cm[Temp4]; ++ ++ output_ptr += output_pitch; ++ ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "lbu %[src_ptr_l2], -4(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], 0(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 12(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 16(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac2 \n\t" ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -3(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], 1(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 13(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 17(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp1], $ac2, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -2(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], 2(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 14(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 18(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac0 \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -1(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], 3(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 15(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 19(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac1 \n\t" ++ "extp %[Temp3], $ac0, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp4], $ac1, 9 \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), ++ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), ++ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), ++ [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), ++ [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* clamp and store results */ ++ output_ptr[0] = cm[Temp1]; ++ output_ptr[1] = cm[Temp2]; ++ output_ptr[2] = cm[Temp3]; ++ output_ptr[3] = cm[Temp4]; ++ ++ src_ptr += 8; ++ output_ptr += output_pitch; ++ } ++ } ++ else ++ { ++ /* 4 tap filter */ ++ ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr); ++ ++ for (i = 2; i--;) ++ { ++ /* do not allow compiler to reorder instructions */ ++ __asm__ __volatile__ ( ++ ".set noreorder \n\t" ++ : ++ : ++ ); ++ ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 4(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 8(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac2 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 5(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 9(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp1], $ac2, 9 \n\t" ++ ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 6(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 10(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac0 \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 7(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 11(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac1 \n\t" ++ "extp %[Temp3], $ac0, 9 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp4], $ac1, 9 \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), ++ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), ++ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) ++ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), ++ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* clamp and store results */ ++ output_ptr[0] = cm[Temp1]; ++ output_ptr[1] = cm[Temp2]; ++ output_ptr[2] = cm[Temp3]; ++ output_ptr[3] = cm[Temp4]; ++ ++ output_ptr += output_pitch; ++ ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "lbu %[src_ptr_l1], 0(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 12(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac2 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l1], 1(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 13(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp1], $ac2, 9 \n\t" ++ ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l1], 2(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 14(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac0 \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l1], 3(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 15(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac1 \n\t" ++ "extp %[Temp3], $ac0, 9 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp4], $ac1, 9 \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), ++ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), ++ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) ++ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), ++ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* clamp and store results */ ++ output_ptr[0] = cm[Temp1]; ++ output_ptr[1] = cm[Temp2]; ++ output_ptr[2] = cm[Temp3]; ++ output_ptr[3] = cm[Temp4]; ++ ++ src_ptr += 8; ++ output_ptr += output_pitch; ++ } ++ } ++} ++ ++ ++void vp8_filter_block2d_second_pass_8 ++( ++ unsigned char *RESTRICT src_ptr, ++ unsigned char *RESTRICT output_ptr, ++ int output_pitch, ++ unsigned int output_height, ++ unsigned int output_width, ++ unsigned int yoffset ++) ++{ ++ unsigned int i; ++ ++ int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8; ++ unsigned int vector1b, vector2b, vector3b, vector4a; ++ ++ unsigned char src_ptr_l2; ++ unsigned char src_ptr_l1; ++ unsigned char src_ptr_0; ++ unsigned char src_ptr_r1; ++ unsigned char src_ptr_r2; ++ unsigned char src_ptr_r3; ++ unsigned char *cm = ff_cropTbl + CROP_WIDTH; ++ ++ vector4a = 64; ++ ++ vector1b = sub_pel_filterss[yoffset][0]; ++ vector2b = sub_pel_filterss[yoffset][2]; ++ vector3b = sub_pel_filterss[yoffset][1]; ++ ++ if (vector1b) ++ { ++ /* 6 tap filter */ ++ ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr); ++ ++ for (i = output_height; i--;) ++ { ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "lbu %[src_ptr_l2], -16(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 16(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 24(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac2 \n\t" ++ ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -15(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 17(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 25(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp1], $ac2, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -14(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -6(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 18(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 26(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac0 \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -13(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -5(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 19(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 27(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac1 \n\t" ++ "extp %[Temp3], $ac0, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), ++ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), ++ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), ++ [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), ++ [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "lbu %[src_ptr_l2], -12(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 12(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 20(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 28(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac2 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp4], $ac1, 9 \n\t" ++ ++ "lbu %[src_ptr_l2], -11(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 13(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 21(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 29(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp5], $ac2, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -10(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 14(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 22(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 30(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac0 \n\t" ++ "extp %[Temp6], $ac3, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -9(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 15(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 23(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 31(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac1 \n\t" ++ "extp %[Temp7], $ac0, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp8], $ac1, 9 \n\t" ++ ++ : [Temp4] "=&r" (Temp4), [Temp5] "=&r" (Temp5), ++ [Temp6] "=&r" (Temp6), [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), ++ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), ++ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), ++ [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), ++ [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* clamp and store results */ ++ output_ptr[0] = cm[Temp1]; ++ output_ptr[1] = cm[Temp2]; ++ output_ptr[2] = cm[Temp3]; ++ output_ptr[3] = cm[Temp4]; ++ output_ptr[4] = cm[Temp5]; ++ output_ptr[5] = cm[Temp6]; ++ output_ptr[6] = cm[Temp7]; ++ output_ptr[7] = cm[Temp8]; ++ ++ src_ptr += 8; ++ output_ptr += output_pitch; ++ } ++ } ++ else ++ { ++ /* 4 tap filter */ ++ ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr); ++ ++ for (i = output_height; i--;) ++ { ++ __asm__ __volatile__ ( ++ "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 16(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac2 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ : [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), ++ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) ++ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), ++ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) ++ ); ++ ++ __asm__ __volatile__ ( ++ "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 17(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp1], $ac2, 9 \n\t" ++ ++ : [Temp1] "=r" (Temp1), ++ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), ++ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) ++ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), ++ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) ++ ); ++ ++ src_ptr_l1 = src_ptr[-6]; ++ src_ptr_0 = src_ptr[2]; ++ src_ptr_r1 = src_ptr[10]; ++ src_ptr_r2 = src_ptr[18]; ++ ++ __asm__ __volatile__ ( ++ "mtlo %[vector4a], $ac0 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ ++ : [Temp2] "=r" (Temp2) ++ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), ++ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), ++ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), ++ [vector4a] "r" (vector4a) ++ ); ++ ++ src_ptr_l1 = src_ptr[-5]; ++ src_ptr_0 = src_ptr[3]; ++ src_ptr_r1 = src_ptr[11]; ++ src_ptr_r2 = src_ptr[19]; ++ ++ __asm__ __volatile__ ( ++ "mtlo %[vector4a], $ac1 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp3], $ac0, 9 \n\t" ++ ++ : [Temp3] "=r" (Temp3) ++ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), ++ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), ++ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), ++ [vector4a] "r" (vector4a) ++ ); ++ ++ src_ptr_l1 = src_ptr[-4]; ++ src_ptr_0 = src_ptr[4]; ++ src_ptr_r1 = src_ptr[12]; ++ src_ptr_r2 = src_ptr[20]; ++ ++ __asm__ __volatile__ ( ++ "mtlo %[vector4a], $ac2 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp4], $ac1, 9 \n\t" ++ ++ : [Temp4] "=r" (Temp4) ++ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), ++ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), ++ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), ++ [vector4a] "r" (vector4a) ++ ); ++ ++ src_ptr_l1 = src_ptr[-3]; ++ src_ptr_0 = src_ptr[5]; ++ src_ptr_r1 = src_ptr[13]; ++ src_ptr_r2 = src_ptr[21]; ++ ++ __asm__ __volatile__ ( ++ "mtlo %[vector4a], $ac3 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp5], $ac2, 9 \n\t" ++ ++ : [Temp5] "=&r" (Temp5) ++ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), ++ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), ++ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), ++ [vector4a] "r" (vector4a) ++ ); ++ ++ src_ptr_l1 = src_ptr[-2]; ++ src_ptr_0 = src_ptr[6]; ++ src_ptr_r1 = src_ptr[14]; ++ src_ptr_r2 = src_ptr[22]; ++ ++ __asm__ __volatile__ ( ++ "mtlo %[vector4a], $ac0 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp6], $ac3, 9 \n\t" ++ ++ : [Temp6] "=r" (Temp6) ++ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), ++ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), ++ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), ++ [vector4a] "r" (vector4a) ++ ); ++ ++ src_ptr_l1 = src_ptr[-1]; ++ src_ptr_0 = src_ptr[7]; ++ src_ptr_r1 = src_ptr[15]; ++ src_ptr_r2 = src_ptr[23]; ++ ++ __asm__ __volatile__ ( ++ "mtlo %[vector4a], $ac1 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp7], $ac0, 9 \n\t" ++ "extp %[Temp8], $ac1, 9 \n\t" ++ ++ : [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8) ++ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), ++ [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), ++ [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), ++ [vector4a] "r" (vector4a) ++ ); ++ ++ /* clamp and store results */ ++ output_ptr[0] = cm[Temp1]; ++ output_ptr[1] = cm[Temp2]; ++ output_ptr[2] = cm[Temp3]; ++ output_ptr[3] = cm[Temp4]; ++ output_ptr[4] = cm[Temp5]; ++ output_ptr[5] = cm[Temp6]; ++ output_ptr[6] = cm[Temp7]; ++ output_ptr[7] = cm[Temp8]; ++ ++ src_ptr += 8; ++ output_ptr += output_pitch; ++ } ++ } ++} ++ ++ ++void vp8_filter_block2d_second_pass161 ++( ++ unsigned char *RESTRICT src_ptr, ++ unsigned char *RESTRICT output_ptr, ++ int output_pitch, ++ const unsigned short *vp8_filter ++) ++{ ++ unsigned int i, j; ++ ++ int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8; ++ unsigned int vector4a; ++ unsigned int vector1b, vector2b, vector3b; ++ ++ unsigned char src_ptr_l2; ++ unsigned char src_ptr_l1; ++ unsigned char src_ptr_0; ++ unsigned char src_ptr_r1; ++ unsigned char src_ptr_r2; ++ unsigned char src_ptr_r3; ++ unsigned char *cm = ff_cropTbl + CROP_WIDTH; ++ ++ vector4a = 64; ++ ++ vector1b = vp8_filter[0]; ++ vector2b = vp8_filter[2]; ++ vector3b = vp8_filter[1]; ++ ++ if (vector1b == 0) ++ { ++ /* 4 tap filter */ ++ ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr + 16); ++ ++ for (i = 16; i--;) ++ { ++ /* unrolling for loop */ ++ for (j = 0; j < 16; j += 8) ++ { ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "lbu %[src_ptr_l1], -16(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 16(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 32(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac2 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l1], -15(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 17(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 33(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp1], $ac2, 9 \n\t" ++ ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l1], -14(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 18(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 34(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac1 \n\t" ++ "extp %[Temp2], $ac3, 9 \n\t" ++ ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l1], -13(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 19(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 35(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp3], $ac1, 9 \n\t" ++ ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l1], -12(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 20(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 36(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac2 \n\t" ++ "extp %[Temp4], $ac3, 9 \n\t" ++ ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l1], -11(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 21(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 37(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp5], $ac2, 9 \n\t" ++ ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l1], -10(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 22(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 38(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac1 \n\t" ++ "extp %[Temp6], $ac3, 9 \n\t" ++ ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l1], -9(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 23(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 39(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp7], $ac1, 9 \n\t" ++ ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp8], $ac3, 9 \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4), ++ [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6), ++ [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), ++ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), ++ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) ++ : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), ++ [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* clamp and store results */ ++ output_ptr[j] = cm[Temp1]; ++ output_ptr[j + 1] = cm[Temp2]; ++ output_ptr[j + 2] = cm[Temp3]; ++ output_ptr[j + 3] = cm[Temp4]; ++ output_ptr[j + 4] = cm[Temp5]; ++ output_ptr[j + 5] = cm[Temp6]; ++ output_ptr[j + 6] = cm[Temp7]; ++ output_ptr[j + 7] = cm[Temp8]; ++ ++ src_ptr += 8; ++ } ++ ++ output_ptr += output_pitch; ++ } ++ } ++ else ++ { ++ /* 4 tap filter */ ++ ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr + 16); ++ ++ /* unroll for loop */ ++ for (i = 16; i--;) ++ { ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "lbu %[src_ptr_l2], -32(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -16(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 16(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 32(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 48(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac2 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -31(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -15(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 17(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 33(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 49(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac0 \n\t" ++ "extp %[Temp1], $ac2, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -30(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -14(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 18(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 34(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 50(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac1 \n\t" ++ "extp %[Temp2], $ac0, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -29(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -13(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 19(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 35(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 51(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp3], $ac1, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -28(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -12(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 20(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 36(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 52(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac2 \n\t" ++ "extp %[Temp4], $ac3, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -27(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -11(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 21(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 37(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 53(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac0 \n\t" ++ "extp %[Temp5], $ac2, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -26(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -10(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 22(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 38(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 54(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac1 \n\t" ++ "extp %[Temp6], $ac0, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -25(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -9(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 23(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 39(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 55(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp7], $ac1, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp8], $ac3, 9 \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4), ++ [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6), ++ [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), ++ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), ++ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), ++ [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), ++ [src_ptr] "r" (src_ptr) ++ ); ++ ++ /* clamp and store results */ ++ output_ptr[0] = cm[Temp1]; ++ output_ptr[1] = cm[Temp2]; ++ output_ptr[2] = cm[Temp3]; ++ output_ptr[3] = cm[Temp4]; ++ output_ptr[4] = cm[Temp5]; ++ output_ptr[5] = cm[Temp6]; ++ output_ptr[6] = cm[Temp7]; ++ output_ptr[7] = cm[Temp8]; ++ ++ /* apply filter with vectors pairs */ ++ __asm__ __volatile__ ( ++ "lbu %[src_ptr_l2], -24(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 8(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 24(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 40(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 56(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac2 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -23(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 9(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 25(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 41(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 57(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac0 \n\t" ++ "extp %[Temp1], $ac2, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -22(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -6(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 10(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 26(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 42(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 58(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac1 \n\t" ++ "extp %[Temp2], $ac0, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -21(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -5(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 11(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 27(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 43(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 59(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp3], $ac1, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -20(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 12(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 28(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 44(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 60(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac2 \n\t" ++ "extp %[Temp4], $ac3, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -19(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 13(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 29(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 45(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 61(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac0 \n\t" ++ "extp %[Temp5], $ac2, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -18(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 14(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 30(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 46(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 62(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac1 \n\t" ++ "extp %[Temp6], $ac0, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" ++ ++ "lbu %[src_ptr_l2], -17(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_0], 15(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r1], 31(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r2], 47(%[src_ptr]) \n\t" ++ "lbu %[src_ptr_r3], 63(%[src_ptr]) \n\t" ++ "mtlo %[vector4a], $ac3 \n\t" ++ "extp %[Temp7], $ac1, 9 \n\t" ++ ++ "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" ++ "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" ++ "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" ++ "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" ++ "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" ++ "extp %[Temp8], $ac3, 9 \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), ++ [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4), ++ [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6), ++ [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), ++ [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), ++ [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), ++ [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) ++ : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), ++ [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), ++ [src_ptr] "r" (src_ptr) ++ ); ++ ++ src_ptr += 16; ++ output_ptr[8] = cm[Temp1]; ++ output_ptr[9] = cm[Temp2]; ++ output_ptr[10] = cm[Temp3]; ++ output_ptr[11] = cm[Temp4]; ++ output_ptr[12] = cm[Temp5]; ++ output_ptr[13] = cm[Temp6]; ++ output_ptr[14] = cm[Temp7]; ++ output_ptr[15] = cm[Temp8]; ++ ++ output_ptr += output_pitch; ++ } ++ } ++} ++ ++ ++void vp8_sixtap_predict4x4_dspr2 ++( ++ unsigned char *RESTRICT src_ptr, ++ int src_pixels_per_line, ++ int xoffset, ++ int yoffset, ++ unsigned char *RESTRICT dst_ptr, ++ int dst_pitch ++) ++{ ++ unsigned char FData[9 * 4]; /* Temp data bufffer used in filtering */ ++ unsigned int pos = 16; ++ ++ /* bit positon for extract from acc */ ++ __asm__ __volatile__ ( ++ "wrdsp %[pos], 1 \n\t" ++ : ++ : [pos] "r" (pos) ++ ); ++ ++ if (yoffset) ++ { ++ /* First filter 1-D horizontally... */ ++ vp8_filter_block2d_first_pass_4(src_ptr - (2 * src_pixels_per_line), FData, ++ src_pixels_per_line, 9, xoffset, 4); ++ /* then filter verticaly... */ ++ vp8_filter_block2d_second_pass4(FData + 8, dst_ptr, dst_pitch, yoffset); ++ } ++ else ++ /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ ++ vp8_filter_block2d_first_pass_4(src_ptr, dst_ptr, src_pixels_per_line, ++ 4, xoffset, dst_pitch); ++} ++ ++ ++void vp8_sixtap_predict8x8_dspr2 ++( ++ unsigned char *RESTRICT src_ptr, ++ int src_pixels_per_line, ++ int xoffset, ++ int yoffset, ++ unsigned char *RESTRICT dst_ptr, ++ int dst_pitch ++) ++{ ++ ++ unsigned char FData[13 * 8]; /* Temp data bufffer used in filtering */ ++ unsigned int pos, Temp1, Temp2; ++ ++ pos = 16; ++ ++ /* bit positon for extract from acc */ ++ __asm__ __volatile__ ( ++ "wrdsp %[pos], 1 \n\t" ++ : ++ : [pos] "r" (pos) ++ ); ++ ++ if (yoffset) ++ { ++ ++ src_ptr = src_ptr - (2 * src_pixels_per_line); ++ ++ if (xoffset) ++ /* filter 1-D horizontally... */ ++ vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line, ++ 13, xoffset, 8); ++ ++ else ++ { ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr + 2 * src_pixels_per_line); ++ ++ __asm__ __volatile__ ( ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 0(%[FData]) \n\t" ++ "sw %[Temp2], 4(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 8(%[FData]) \n\t" ++ "sw %[Temp2], 12(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 16(%[FData]) \n\t" ++ "sw %[Temp2], 20(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 24(%[FData]) \n\t" ++ "sw %[Temp2], 28(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 32(%[FData]) \n\t" ++ "sw %[Temp2], 36(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 40(%[FData]) \n\t" ++ "sw %[Temp2], 44(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 48(%[FData]) \n\t" ++ "sw %[Temp2], 52(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 56(%[FData]) \n\t" ++ "sw %[Temp2], 60(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 64(%[FData]) \n\t" ++ "sw %[Temp2], 68(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 72(%[FData]) \n\t" ++ "sw %[Temp2], 76(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 80(%[FData]) \n\t" ++ "sw %[Temp2], 84(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 88(%[FData]) \n\t" ++ "sw %[Temp2], 92(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 96(%[FData]) \n\t" ++ "sw %[Temp2], 100(%[FData]) \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) ++ : [FData] "r" (FData), [src_ptr] "r" (src_ptr), ++ [src_pixels_per_line] "r" (src_pixels_per_line) ++ ); ++ } ++ ++ /* filter verticaly... */ ++ vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 8, 8, yoffset); ++ } ++ ++ /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ ++ else ++ { ++ if (xoffset) ++ vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line, ++ 8, xoffset, dst_pitch); ++ ++ else ++ { ++ /* copy from src buffer to dst buffer */ ++ __asm__ __volatile__ ( ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 0(%[dst_ptr]) \n\t" ++ "sw %[Temp2], 4(%[dst_ptr]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 8(%[dst_ptr]) \n\t" ++ "sw %[Temp2], 12(%[dst_ptr]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 16(%[dst_ptr]) \n\t" ++ "sw %[Temp2], 20(%[dst_ptr]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 24(%[dst_ptr]) \n\t" ++ "sw %[Temp2], 28(%[dst_ptr]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 32(%[dst_ptr]) \n\t" ++ "sw %[Temp2], 36(%[dst_ptr]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 40(%[dst_ptr]) \n\t" ++ "sw %[Temp2], 44(%[dst_ptr]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 48(%[dst_ptr]) \n\t" ++ "sw %[Temp2], 52(%[dst_ptr]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 56(%[dst_ptr]) \n\t" ++ "sw %[Temp2], 60(%[dst_ptr]) \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) ++ : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr), ++ [src_pixels_per_line] "r" (src_pixels_per_line) ++ ); ++ } ++ } ++} ++ ++ ++void vp8_sixtap_predict8x4_dspr2 ++( ++ unsigned char *RESTRICT src_ptr, ++ int src_pixels_per_line, ++ int xoffset, ++ int yoffset, ++ unsigned char *RESTRICT dst_ptr, ++ int dst_pitch ++) ++{ ++ unsigned char FData[9 * 8]; /* Temp data bufffer used in filtering */ ++ unsigned int pos, Temp1, Temp2; ++ ++ pos = 16; ++ ++ /* bit positon for extract from acc */ ++ __asm__ __volatile__ ( ++ "wrdsp %[pos], 1 \n\t" ++ : ++ : [pos] "r" (pos) ++ ); ++ ++ if (yoffset) ++ { ++ ++ src_ptr = src_ptr - (2 * src_pixels_per_line); ++ ++ if (xoffset) ++ /* filter 1-D horizontally... */ ++ vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line, ++ 9, xoffset, 8); ++ ++ else ++ { ++ /* prefetch src_ptr data to cache memory */ ++ prefetch_load(src_ptr + 2 * src_pixels_per_line); ++ ++ __asm__ __volatile__ ( ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 0(%[FData]) \n\t" ++ "sw %[Temp2], 4(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 8(%[FData]) \n\t" ++ "sw %[Temp2], 12(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 16(%[FData]) \n\t" ++ "sw %[Temp2], 20(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 24(%[FData]) \n\t" ++ "sw %[Temp2], 28(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 32(%[FData]) \n\t" ++ "sw %[Temp2], 36(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 40(%[FData]) \n\t" ++ "sw %[Temp2], 44(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 48(%[FData]) \n\t" ++ "sw %[Temp2], 52(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 56(%[FData]) \n\t" ++ "sw %[Temp2], 60(%[FData]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 64(%[FData]) \n\t" ++ "sw %[Temp2], 68(%[FData]) \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) ++ : [FData] "r" (FData), [src_ptr] "r" (src_ptr), ++ [src_pixels_per_line] "r" (src_pixels_per_line) ++ ); ++ } ++ ++ /* filter verticaly... */ ++ vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 4, 8, yoffset); ++ } ++ ++ /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ ++ else ++ { ++ if (xoffset) ++ vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line, ++ 4, xoffset, dst_pitch); ++ ++ else ++ { ++ /* copy from src buffer to dst buffer */ ++ __asm__ __volatile__ ( ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 0(%[dst_ptr]) \n\t" ++ "sw %[Temp2], 4(%[dst_ptr]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 8(%[dst_ptr]) \n\t" ++ "sw %[Temp2], 12(%[dst_ptr]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 16(%[dst_ptr]) \n\t" ++ "sw %[Temp2], 20(%[dst_ptr]) \n\t" ++ "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" ++ ++ "ulw %[Temp1], 0(%[src_ptr]) \n\t" ++ "ulw %[Temp2], 4(%[src_ptr]) \n\t" ++ "sw %[Temp1], 24(%[dst_ptr]) \n\t" ++ "sw %[Temp2], 28(%[dst_ptr]) \n\t" ++ ++ : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) ++ : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr), ++ [src_pixels_per_line] "r" (src_pixels_per_line) ++ ); ++ } ++ } ++} ++ ++ ++void vp8_sixtap_predict16x16_dspr2 ++( ++ unsigned char *RESTRICT src_ptr, ++ int src_pixels_per_line, ++ int xoffset, ++ int yoffset, ++ unsigned char *RESTRICT dst_ptr, ++ int dst_pitch ++) ++{ ++ const unsigned short *VFilter; ++ unsigned char FData[21 * 16]; /* Temp data bufffer used in filtering */ ++ unsigned int pos; ++ ++ VFilter = sub_pel_filterss[yoffset]; ++ ++ pos = 16; ++ ++ /* bit positon for extract from acc */ ++ __asm__ __volatile__ ( ++ "wrdsp %[pos], 1 \n\t" ++ : ++ : [pos] "r" (pos) ++ ); ++ ++ if (yoffset) ++ { ++ ++ src_ptr = src_ptr - (2 * src_pixels_per_line); ++ ++ switch (xoffset) ++ { ++ /* filter 1-D horizontally... */ ++ case 2: ++ case 4: ++ case 6: ++ /* 6 tap filter */ ++ vp8_filter_block2d_first_pass16_6tap(src_ptr, FData, src_pixels_per_line, ++ 21, xoffset, 16); ++ break; ++ ++ case 0: ++ /* only copy buffer */ ++ vp8_filter_block2d_first_pass16_0(src_ptr, FData, src_pixels_per_line); ++ break; ++ ++ case 1: ++ case 3: ++ case 5: ++ case 7: ++ /* 4 tap filter */ ++ vp8_filter_block2d_first_pass16_4tap(src_ptr, FData, src_pixels_per_line, 16, ++ 21, xoffset, yoffset, dst_ptr, dst_pitch); ++ break; ++ } ++ ++ /* filter verticaly... */ ++ vp8_filter_block2d_second_pass161(FData + 32, dst_ptr, dst_pitch, VFilter); ++ } ++ else ++ { ++ /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ ++ switch (xoffset) ++ { ++ case 2: ++ case 4: ++ case 6: ++ /* 6 tap filter */ ++ vp8_filter_block2d_first_pass16_6tap(src_ptr, dst_ptr, src_pixels_per_line, ++ 16, xoffset, dst_pitch); ++ break; ++ ++ case 1: ++ case 3: ++ case 5: ++ case 7: ++ /* 4 tap filter */ ++ vp8_filter_block2d_first_pass16_4tap(src_ptr, dst_ptr, src_pixels_per_line, 16, ++ 21, xoffset, yoffset, dst_ptr, dst_pitch); ++ break; ++ } ++ } ++} ++ ++#endif +diff --git a/vp8/common/mips/dspr2/idct_blk_dspr2.c b/vp8/common/mips/dspr2/idct_blk_dspr2.c +new file mode 100644 +index 0000000..1e0ebd1 +--- /dev/null ++++ b/vp8/common/mips/dspr2/idct_blk_dspr2.c +@@ -0,0 +1,88 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++#include "vpx_config.h" ++#include "vpx_rtcd.h" ++ ++#if HAVE_DSPR2 ++ ++void vp8_dequant_idct_add_y_block_dspr2 ++(short *q, short *dq, ++ unsigned char *dst, int stride, char *eobs) ++{ ++ int i, j; ++ ++ for (i = 0; i < 4; i++) ++ { ++ for (j = 0; j < 4; j++) ++ { ++ if (*eobs++ > 1) ++ vp8_dequant_idct_add_dspr2(q, dq, dst, stride); ++ else ++ { ++ vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dst, stride, dst, stride); ++ ((int *)q)[0] = 0; ++ } ++ ++ q += 16; ++ dst += 4; ++ } ++ ++ dst += 4 * stride - 16; ++ } ++} ++ ++void vp8_dequant_idct_add_uv_block_dspr2 ++(short *q, short *dq, ++ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) ++{ ++ int i, j; ++ ++ for (i = 0; i < 2; i++) ++ { ++ for (j = 0; j < 2; j++) ++ { ++ if (*eobs++ > 1) ++ vp8_dequant_idct_add_dspr2(q, dq, dstu, stride); ++ else ++ { ++ vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstu, stride, dstu, stride); ++ ((int *)q)[0] = 0; ++ } ++ ++ q += 16; ++ dstu += 4; ++ } ++ ++ dstu += 4 * stride - 8; ++ } ++ ++ for (i = 0; i < 2; i++) ++ { ++ for (j = 0; j < 2; j++) ++ { ++ if (*eobs++ > 1) ++ vp8_dequant_idct_add_dspr2(q, dq, dstv, stride); ++ else ++ { ++ vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstv, stride, dstv, stride); ++ ((int *)q)[0] = 0; ++ } ++ ++ q += 16; ++ dstv += 4; ++ } ++ ++ dstv += 4 * stride - 8; ++ } ++} ++ ++#endif ++ +diff --git a/vp8/common/mips/dspr2/idctllm_dspr2.c b/vp8/common/mips/dspr2/idctllm_dspr2.c +new file mode 100644 +index 0000000..25b7936 +--- /dev/null ++++ b/vp8/common/mips/dspr2/idctllm_dspr2.c +@@ -0,0 +1,369 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++#include "vpx_rtcd.h" ++ ++#if HAVE_DSPR2 ++#define CROP_WIDTH 256 ++ ++/****************************************************************************** ++ * Notes: ++ * ++ * This implementation makes use of 16 bit fixed point version of two multiply ++ * constants: ++ * 1. sqrt(2) * cos (pi/8) ++ * 2. sqrt(2) * sin (pi/8) ++ * Since the first constant is bigger than 1, to maintain the same 16 bit ++ * fixed point precision as the second one, we use a trick of ++ * x * a = x + x*(a-1) ++ * so ++ * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). ++ ****************************************************************************/ ++extern unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH]; ++static const int cospi8sqrt2minus1 = 20091; ++static const int sinpi8sqrt2 = 35468; ++ ++inline void prefetch_load_short(short *src) ++{ ++ __asm__ __volatile__ ( ++ "pref 0, 0(%[src]) \n\t" ++ : ++ : [src] "r" (src) ++ ); ++} ++ ++void vp8_short_idct4x4llm_dspr2(short *input, unsigned char *pred_ptr, ++ int pred_stride, unsigned char *dst_ptr, ++ int dst_stride) ++{ ++ int r, c; ++ int a1, b1, c1, d1; ++ short output[16]; ++ short *ip = input; ++ short *op = output; ++ int temp1, temp2; ++ int shortpitch = 4; ++ ++ int c2, d2; ++ int temp3, temp4; ++ unsigned char *cm = ff_cropTbl + CROP_WIDTH; ++ ++ /* prepare data for load */ ++ prefetch_load_short(ip + 8); ++ ++ /* first loop is unrolled */ ++ a1 = ip[0] + ip[8]; ++ b1 = ip[0] - ip[8]; ++ ++ temp1 = (ip[4] * sinpi8sqrt2) >> 16; ++ temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); ++ c1 = temp1 - temp2; ++ ++ temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); ++ temp2 = (ip[12] * sinpi8sqrt2) >> 16; ++ d1 = temp1 + temp2; ++ ++ temp3 = (ip[5] * sinpi8sqrt2) >> 16; ++ temp4 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16); ++ c2 = temp3 - temp4; ++ ++ temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16); ++ temp4 = (ip[13] * sinpi8sqrt2) >> 16; ++ d2 = temp3 + temp4; ++ ++ op[0] = a1 + d1; ++ op[12] = a1 - d1; ++ op[4] = b1 + c1; ++ op[8] = b1 - c1; ++ ++ a1 = ip[1] + ip[9]; ++ b1 = ip[1] - ip[9]; ++ ++ op[1] = a1 + d2; ++ op[13] = a1 - d2; ++ op[5] = b1 + c2; ++ op[9] = b1 - c2; ++ ++ a1 = ip[2] + ip[10]; ++ b1 = ip[2] - ip[10]; ++ ++ temp1 = (ip[6] * sinpi8sqrt2) >> 16; ++ temp2 = ip[14] + ((ip[14] * cospi8sqrt2minus1) >> 16); ++ c1 = temp1 - temp2; ++ ++ temp1 = ip[6] + ((ip[6] * cospi8sqrt2minus1) >> 16); ++ temp2 = (ip[14] * sinpi8sqrt2) >> 16; ++ d1 = temp1 + temp2; ++ ++ temp3 = (ip[7] * sinpi8sqrt2) >> 16; ++ temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16); ++ c2 = temp3 - temp4; ++ ++ temp3 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16); ++ temp4 = (ip[15] * sinpi8sqrt2) >> 16; ++ d2 = temp3 + temp4; ++ ++ op[2] = a1 + d1; ++ op[14] = a1 - d1; ++ op[6] = b1 + c1; ++ op[10] = b1 - c1; ++ ++ a1 = ip[3] + ip[11]; ++ b1 = ip[3] - ip[11]; ++ ++ op[3] = a1 + d2; ++ op[15] = a1 - d2; ++ op[7] = b1 + c2; ++ op[11] = b1 - c2; ++ ++ ip = output; ++ ++ /* prepare data for load */ ++ prefetch_load_short(ip + shortpitch); ++ ++ /* second loop is unrolled */ ++ a1 = ip[0] + ip[2]; ++ b1 = ip[0] - ip[2]; ++ ++ temp1 = (ip[1] * sinpi8sqrt2) >> 16; ++ temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); ++ c1 = temp1 - temp2; ++ ++ temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); ++ temp2 = (ip[3] * sinpi8sqrt2) >> 16; ++ d1 = temp1 + temp2; ++ ++ temp3 = (ip[5] * sinpi8sqrt2) >> 16; ++ temp4 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16); ++ c2 = temp3 - temp4; ++ ++ temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16); ++ temp4 = (ip[7] * sinpi8sqrt2) >> 16; ++ d2 = temp3 + temp4; ++ ++ op[0] = (a1 + d1 + 4) >> 3; ++ op[3] = (a1 - d1 + 4) >> 3; ++ op[1] = (b1 + c1 + 4) >> 3; ++ op[2] = (b1 - c1 + 4) >> 3; ++ ++ a1 = ip[4] + ip[6]; ++ b1 = ip[4] - ip[6]; ++ ++ op[4] = (a1 + d2 + 4) >> 3; ++ op[7] = (a1 - d2 + 4) >> 3; ++ op[5] = (b1 + c2 + 4) >> 3; ++ op[6] = (b1 - c2 + 4) >> 3; ++ ++ a1 = ip[8] + ip[10]; ++ b1 = ip[8] - ip[10]; ++ ++ temp1 = (ip[9] * sinpi8sqrt2) >> 16; ++ temp2 = ip[11] + ((ip[11] * cospi8sqrt2minus1) >> 16); ++ c1 = temp1 - temp2; ++ ++ temp1 = ip[9] + ((ip[9] * cospi8sqrt2minus1) >> 16); ++ temp2 = (ip[11] * sinpi8sqrt2) >> 16; ++ d1 = temp1 + temp2; ++ ++ temp3 = (ip[13] * sinpi8sqrt2) >> 16; ++ temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16); ++ c2 = temp3 - temp4; ++ ++ temp3 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16); ++ temp4 = (ip[15] * sinpi8sqrt2) >> 16; ++ d2 = temp3 + temp4; ++ ++ op[8] = (a1 + d1 + 4) >> 3; ++ op[11] = (a1 - d1 + 4) >> 3; ++ op[9] = (b1 + c1 + 4) >> 3; ++ op[10] = (b1 - c1 + 4) >> 3; ++ ++ a1 = ip[12] + ip[14]; ++ b1 = ip[12] - ip[14]; ++ ++ op[12] = (a1 + d2 + 4) >> 3; ++ op[15] = (a1 - d2 + 4) >> 3; ++ op[13] = (b1 + c2 + 4) >> 3; ++ op[14] = (b1 - c2 + 4) >> 3; ++ ++ ip = output; ++ ++ for (r = 0; r < 4; r++) ++ { ++ for (c = 0; c < 4; c++) ++ { ++ short a = ip[c] + pred_ptr[c] ; ++ dst_ptr[c] = cm[a] ; ++ } ++ ++ ip += 4; ++ dst_ptr += dst_stride; ++ pred_ptr += pred_stride; ++ } ++} ++ ++void vp8_dc_only_idct_add_dspr2(short input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride) ++{ ++ int a1; ++ int i, absa1; ++ int t2, vector_a1, vector_a; ++ ++ /* a1 = ((input_dc + 4) >> 3); */ ++ __asm__ __volatile__ ( ++ "addi %[a1], %[input_dc], 4 \n\t" ++ "sra %[a1], %[a1], 3 \n\t" ++ : [a1] "=r" (a1) ++ : [input_dc] "r" (input_dc) ++ ); ++ ++ if (a1 < 0) ++ { ++ /* use quad-byte ++ * input and output memory are four byte aligned ++ */ ++ __asm__ __volatile__ ( ++ "abs %[absa1], %[a1] \n\t" ++ "replv.qb %[vector_a1], %[absa1] \n\t" ++ : [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1) ++ : [a1] "r" (a1) ++ ); ++ ++ /* use (a1 - predptr[c]) instead a1 + predptr[c] */ ++ for (i = 4; i--;) ++ { ++ __asm__ __volatile__ ( ++ "lw %[t2], 0(%[pred_ptr]) \n\t" ++ "add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t" ++ "subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t" ++ "sw %[vector_a], 0(%[dst_ptr]) \n\t" ++ "add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" ++ : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), ++ [dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr) ++ : [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1) ++ ); ++ } ++ } ++ else ++ { ++ /* use quad-byte ++ * input and output memory are four byte aligned ++ */ ++ __asm__ __volatile__ ( ++ "replv.qb %[vector_a1], %[a1] \n\t" ++ : [vector_a1] "=r" (vector_a1) ++ : [a1] "r" (a1) ++ ); ++ ++ for (i = 4; i--;) ++ { ++ __asm__ __volatile__ ( ++ "lw %[t2], 0(%[pred_ptr]) \n\t" ++ "add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t" ++ "addu_s.qb %[vector_a], %[vector_a1], %[t2] \n\t" ++ "sw %[vector_a], 0(%[dst_ptr]) \n\t" ++ "add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" ++ : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), ++ [dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr) ++ : [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1) ++ ); ++ } ++ } ++ ++} ++ ++void vp8_short_inv_walsh4x4_dspr2(short *input, short *mb_dqcoeff) ++{ ++ short output[16]; ++ int i; ++ int a1, b1, c1, d1; ++ int a2, b2, c2, d2; ++ short *ip = input; ++ short *op = output; ++ ++ prefetch_load_short(ip); ++ ++ for (i = 4; i--;) ++ { ++ a1 = ip[0] + ip[12]; ++ b1 = ip[4] + ip[8]; ++ c1 = ip[4] - ip[8]; ++ d1 = ip[0] - ip[12]; ++ ++ op[0] = a1 + b1; ++ op[4] = c1 + d1; ++ op[8] = a1 - b1; ++ op[12] = d1 - c1; ++ ++ ip++; ++ op++; ++ } ++ ++ ip = output; ++ op = output; ++ ++ prefetch_load_short(ip); ++ ++ for (i = 4; i--;) ++ { ++ a1 = ip[0] + ip[3] + 3; ++ b1 = ip[1] + ip[2]; ++ c1 = ip[1] - ip[2]; ++ d1 = ip[0] - ip[3] + 3; ++ ++ a2 = a1 + b1; ++ b2 = d1 + c1; ++ c2 = a1 - b1; ++ d2 = d1 - c1; ++ ++ op[0] = a2 >> 3; ++ op[1] = b2 >> 3; ++ op[2] = c2 >> 3; ++ op[3] = d2 >> 3; ++ ++ ip += 4; ++ op += 4; ++ } ++ ++ for (i = 0; i < 16; i++) ++ { ++ mb_dqcoeff[i * 16] = output[i]; ++ } ++} ++ ++void vp8_short_inv_walsh4x4_1_dspr2(short *input, short *mb_dqcoeff) ++{ ++ int a1; ++ ++ a1 = ((input[0] + 3) >> 3); ++ ++ __asm__ __volatile__ ( ++ "sh %[a1], 0(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 32(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 64(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 96(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 128(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 160(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 192(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 224(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 256(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 288(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 320(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 352(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 384(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 416(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 448(%[mb_dqcoeff]) \n\t" ++ "sh %[a1], 480(%[mb_dqcoeff]) \n\t" ++ ++ : ++ : [a1] "r" (a1), [mb_dqcoeff] "r" (mb_dqcoeff) ++ ); ++} ++ ++#endif +diff --git a/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c +new file mode 100644 +index 0000000..b8e5e4d +--- /dev/null ++++ b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c +@@ -0,0 +1,2622 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++ ++#include ++#include "vpx_rtcd.h" ++#include "vp8/common/onyxc_int.h" ++ ++#if HAVE_DSPR2 ++typedef unsigned char uc; ++ ++/* prefetch data for load */ ++inline void prefetch_load_lf(unsigned char *src) ++{ ++ __asm__ __volatile__ ( ++ "pref 0, 0(%[src]) \n\t" ++ : ++ : [src] "r" (src) ++ ); ++} ++ ++ ++/* prefetch data for store */ ++inline void prefetch_store_lf(unsigned char *dst) ++{ ++ __asm__ __volatile__ ( ++ "pref 1, 0(%[dst]) \n\t" ++ : ++ : [dst] "r" (dst) ++ ); ++} ++ ++/* processing 4 pixels at the same time ++ * compute hev and mask in the same function ++ */ ++static __inline void vp8_filter_mask_vec_mips ++( ++ uint32_t limit, ++ uint32_t flimit, ++ uint32_t p1, ++ uint32_t p0, ++ uint32_t p3, ++ uint32_t p2, ++ uint32_t q0, ++ uint32_t q1, ++ uint32_t q2, ++ uint32_t q3, ++ uint32_t thresh, ++ uint32_t *hev, ++ uint32_t *mask ++) ++{ ++ uint32_t c, r, r3, r_k; ++ uint32_t s1, s2, s3; ++ uint32_t ones = 0xFFFFFFFF; ++ uint32_t hev1; ++ ++ __asm__ __volatile__ ( ++ /* mask |= (abs(p3 - p2) > limit) */ ++ "subu_s.qb %[c], %[p3], %[p2] \n\t" ++ "subu_s.qb %[r_k], %[p2], %[p3] \n\t" ++ "or %[r_k], %[r_k], %[c] \n\t" ++ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" ++ "or %[r], $0, %[c] \n\t" ++ ++ /* mask |= (abs(p2 - p1) > limit) */ ++ "subu_s.qb %[c], %[p2], %[p1] \n\t" ++ "subu_s.qb %[r_k], %[p1], %[p2] \n\t" ++ "or %[r_k], %[r_k], %[c] \n\t" ++ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" ++ "or %[r], %[r], %[c] \n\t" ++ ++ /* mask |= (abs(p1 - p0) > limit) ++ * hev |= (abs(p1 - p0) > thresh) ++ */ ++ "subu_s.qb %[c], %[p1], %[p0] \n\t" ++ "subu_s.qb %[r_k], %[p0], %[p1] \n\t" ++ "or %[r_k], %[r_k], %[c] \n\t" ++ "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" ++ "or %[r3], $0, %[c] \n\t" ++ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" ++ "or %[r], %[r], %[c] \n\t" ++ ++ /* mask |= (abs(q1 - q0) > limit) ++ * hev |= (abs(q1 - q0) > thresh) ++ */ ++ "subu_s.qb %[c], %[q1], %[q0] \n\t" ++ "subu_s.qb %[r_k], %[q0], %[q1] \n\t" ++ "or %[r_k], %[r_k], %[c] \n\t" ++ "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" ++ "or %[r3], %[r3], %[c] \n\t" ++ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" ++ "or %[r], %[r], %[c] \n\t" ++ ++ /* mask |= (abs(q2 - q1) > limit) */ ++ "subu_s.qb %[c], %[q2], %[q1] \n\t" ++ "subu_s.qb %[r_k], %[q1], %[q2] \n\t" ++ "or %[r_k], %[r_k], %[c] \n\t" ++ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" ++ "or %[r], %[r], %[c] \n\t" ++ "sll %[r3], %[r3], 24 \n\t" ++ ++ /* mask |= (abs(q3 - q2) > limit) */ ++ "subu_s.qb %[c], %[q3], %[q2] \n\t" ++ "subu_s.qb %[r_k], %[q2], %[q3] \n\t" ++ "or %[r_k], %[r_k], %[c] \n\t" ++ "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" ++ "or %[r], %[r], %[c] \n\t" ++ ++ : [c] "=&r" (c), [r_k] "=&r" (r_k), ++ [r] "=&r" (r), [r3] "=&r" (r3) ++ : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2), ++ [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), ++ [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh) ++ ); ++ ++ __asm__ __volatile__ ( ++ /* abs(p0 - q0) */ ++ "subu_s.qb %[c], %[p0], %[q0] \n\t" ++ "subu_s.qb %[r_k], %[q0], %[p0] \n\t" ++ "wrdsp %[r3] \n\t" ++ "or %[s1], %[r_k], %[c] \n\t" ++ ++ /* abs(p1 - q1) */ ++ "subu_s.qb %[c], %[p1], %[q1] \n\t" ++ "addu_s.qb %[s3], %[s1], %[s1] \n\t" ++ "pick.qb %[hev1], %[ones], $0 \n\t" ++ "subu_s.qb %[r_k], %[q1], %[p1] \n\t" ++ "or %[s2], %[r_k], %[c] \n\t" ++ ++ /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ ++ "shrl.qb %[s2], %[s2], 1 \n\t" ++ "addu_s.qb %[s1], %[s2], %[s3] \n\t" ++ "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" ++ "or %[r], %[r], %[c] \n\t" ++ "sll %[r], %[r], 24 \n\t" ++ ++ "wrdsp %[r] \n\t" ++ "pick.qb %[s2], $0, %[ones] \n\t" ++ ++ : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1), ++ [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3) ++ : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), ++ [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit) ++ ); ++ ++ *hev = hev1; ++ *mask = s2; ++} ++ ++ ++/* inputs & outputs are quad-byte vectors */ ++static __inline void vp8_filter_mips ++( ++ uint32_t mask, ++ uint32_t hev, ++ uint32_t *ps1, ++ uint32_t *ps0, ++ uint32_t *qs0, ++ uint32_t *qs1 ++) ++{ ++ int32_t vp8_filter_l, vp8_filter_r; ++ int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; ++ int32_t subr_r, subr_l; ++ uint32_t t1, t2, HWM, t3; ++ uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r; ++ ++ int32_t vps1, vps0, vqs0, vqs1; ++ int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r; ++ uint32_t N128; ++ ++ N128 = 0x80808080; ++ t1 = 0x03000300; ++ t2 = 0x04000400; ++ t3 = 0x01000100; ++ HWM = 0xFF00FF00; ++ ++ vps0 = (*ps0) ^ N128; ++ vps1 = (*ps1) ^ N128; ++ vqs0 = (*qs0) ^ N128; ++ vqs1 = (*qs1) ^ N128; ++ ++ /* use halfword pairs instead quad-bytes because of accuracy */ ++ vps0_l = vps0 & HWM; ++ vps0_r = vps0 << 8; ++ vps0_r = vps0_r & HWM; ++ ++ vps1_l = vps1 & HWM; ++ vps1_r = vps1 << 8; ++ vps1_r = vps1_r & HWM; ++ ++ vqs0_l = vqs0 & HWM; ++ vqs0_r = vqs0 << 8; ++ vqs0_r = vqs0_r & HWM; ++ ++ vqs1_l = vqs1 & HWM; ++ vqs1_r = vqs1 << 8; ++ vqs1_r = vqs1_r & HWM; ++ ++ mask_l = mask & HWM; ++ mask_r = mask << 8; ++ mask_r = mask_r & HWM; ++ ++ hev_l = hev & HWM; ++ hev_r = hev << 8; ++ hev_r = hev_r & HWM; ++ ++ __asm__ __volatile__ ( ++ /* vp8_filter = vp8_signed_char_clamp(ps1 - qs1); */ ++ "subq_s.ph %[vp8_filter_l], %[vps1_l], %[vqs1_l] \n\t" ++ "subq_s.ph %[vp8_filter_r], %[vps1_r], %[vqs1_r] \n\t" ++ ++ /* qs0 - ps0 */ ++ "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" ++ "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" ++ ++ /* vp8_filter &= hev; */ ++ "and %[vp8_filter_l], %[vp8_filter_l], %[hev_l] \n\t" ++ "and %[vp8_filter_r], %[vp8_filter_r], %[hev_r] \n\t" ++ ++ /* vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); */ ++ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" ++ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" ++ "xor %[invhev_l], %[hev_l], %[HWM] \n\t" ++ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" ++ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" ++ "xor %[invhev_r], %[hev_r], %[HWM] \n\t" ++ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" ++ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" ++ ++ /* vp8_filter &= mask; */ ++ "and %[vp8_filter_l], %[vp8_filter_l], %[mask_l] \n\t" ++ "and %[vp8_filter_r], %[vp8_filter_r], %[mask_r] \n\t" ++ ++ : [vp8_filter_l] "=&r" (vp8_filter_l), [vp8_filter_r] "=&r" (vp8_filter_r), ++ [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r), ++ [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r) ++ ++ : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l), ++ [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r), ++ [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r), ++ [mask_l] "r" (mask_l), [mask_r] "r" (mask_r), ++ [hev_l] "r" (hev_l), [hev_r] "r" (hev_r), ++ [HWM] "r" (HWM) ++ ); ++ ++ /* save bottom 3 bits so that we round one side +4 and the other +3 */ ++ __asm__ __volatile__ ( ++ /* Filter2 = vp8_signed_char_clamp(vp8_filter + 3) >>= 3; */ ++ "addq_s.ph %[Filter1_l], %[vp8_filter_l], %[t2] \n\t" ++ "addq_s.ph %[Filter1_r], %[vp8_filter_r], %[t2] \n\t" ++ ++ /* Filter1 = vp8_signed_char_clamp(vp8_filter + 4) >>= 3; */ ++ "addq_s.ph %[Filter2_l], %[vp8_filter_l], %[t1] \n\t" ++ "addq_s.ph %[Filter2_r], %[vp8_filter_r], %[t1] \n\t" ++ "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" ++ "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" ++ ++ "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" ++ "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" ++ ++ "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" ++ "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" ++ ++ /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */ ++ "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" ++ "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" ++ ++ /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */ ++ "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" ++ "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" ++ ++ : [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r), ++ [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r), ++ [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), ++ [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) ++ ++ : [t1] "r" (t1), [t2] "r" (t2), ++ [vp8_filter_l] "r" (vp8_filter_l), [vp8_filter_r] "r" (vp8_filter_r), ++ [HWM] "r" (HWM) ++ ); ++ ++ __asm__ __volatile__ ( ++ /* (vp8_filter += 1) >>= 1 */ ++ "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" ++ "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" ++ ++ /* vp8_filter &= ~hev; */ ++ "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" ++ "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" ++ ++ /* vps1 = vp8_signed_char_clamp(ps1 + vp8_filter); */ ++ "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" ++ "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" ++ ++ /* vqs1 = vp8_signed_char_clamp(qs1 - vp8_filter); */ ++ "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" ++ "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" ++ ++ : [Filter1_l] "+r" (Filter1_l), [Filter1_r] "+r" (Filter1_r), ++ [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), ++ [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r) ++ ++ : [t3] "r" (t3), [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r) ++ ); ++ ++ /* Create quad-bytes from halfword pairs */ ++ vqs0_l = vqs0_l & HWM; ++ vqs1_l = vqs1_l & HWM; ++ vps0_l = vps0_l & HWM; ++ vps1_l = vps1_l & HWM; ++ ++ __asm__ __volatile__ ( ++ "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" ++ "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" ++ "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" ++ "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" ++ ++ : [vps1_r] "+r" (vps1_r), [vqs1_r] "+r" (vqs1_r), ++ [vps0_r] "+r" (vps0_r), [vqs0_r] "+r" (vqs0_r) ++ : ++ ); ++ ++ vqs0 = vqs0_l | vqs0_r; ++ vqs1 = vqs1_l | vqs1_r; ++ vps0 = vps0_l | vps0_r; ++ vps1 = vps1_l | vps1_r; ++ ++ *ps0 = vps0 ^ N128; ++ *ps1 = vps1 ^ N128; ++ *qs0 = vqs0 ^ N128; ++ *qs1 = vqs1 ^ N128; ++} ++ ++void vp8_loop_filter_horizontal_edge_mips ++( ++ unsigned char *s, ++ int p, ++ unsigned int flimit, ++ unsigned int limit, ++ unsigned int thresh, ++ int count ++) ++{ ++ uint32_t mask; ++ uint32_t hev; ++ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; ++ unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; ++ ++ mask = 0; ++ hev = 0; ++ p1 = 0; ++ p2 = 0; ++ p3 = 0; ++ p4 = 0; ++ ++ /* prefetch data for store */ ++ prefetch_store_lf(s); ++ ++ /* loop filter designed to work using chars so that we can make maximum use ++ * of 8 bit simd instructions. ++ */ ++ ++ sm1 = s - (p << 2); ++ s0 = s - p - p - p; ++ s1 = s - p - p ; ++ s2 = s - p; ++ s3 = s; ++ s4 = s + p; ++ s5 = s + p + p; ++ s6 = s + p + p + p; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p1 = *((uint32_t *)(s1)); ++ p2 = *((uint32_t *)(s2)); ++ p3 = *((uint32_t *)(s3)); ++ p4 = *((uint32_t *)(s4)); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ pm1 = *((uint32_t *)(sm1)); ++ p0 = *((uint32_t *)(s0)); ++ p5 = *((uint32_t *)(s5)); ++ p6 = *((uint32_t *)(s6)); ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); ++ ++ /* unpack processed 4x4 neighborhood */ ++ *((uint32_t *)s1) = p1; ++ *((uint32_t *)s2) = p2; ++ *((uint32_t *)s3) = p3; ++ *((uint32_t *)s4) = p4; ++ } ++ } ++ ++ sm1 += 4; ++ s0 += 4; ++ s1 += 4; ++ s2 += 4; ++ s3 += 4; ++ s4 += 4; ++ s5 += 4; ++ s6 += 4; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p1 = *((uint32_t *)(s1)); ++ p2 = *((uint32_t *)(s2)); ++ p3 = *((uint32_t *)(s3)); ++ p4 = *((uint32_t *)(s4)); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ pm1 = *((uint32_t *)(sm1)); ++ p0 = *((uint32_t *)(s0)); ++ p5 = *((uint32_t *)(s5)); ++ p6 = *((uint32_t *)(s6)); ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); ++ ++ /* unpack processed 4x4 neighborhood */ ++ *((uint32_t *)s1) = p1; ++ *((uint32_t *)s2) = p2; ++ *((uint32_t *)s3) = p3; ++ *((uint32_t *)s4) = p4; ++ } ++ } ++ ++ sm1 += 4; ++ s0 += 4; ++ s1 += 4; ++ s2 += 4; ++ s3 += 4; ++ s4 += 4; ++ s5 += 4; ++ s6 += 4; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p1 = *((uint32_t *)(s1)); ++ p2 = *((uint32_t *)(s2)); ++ p3 = *((uint32_t *)(s3)); ++ p4 = *((uint32_t *)(s4)); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ pm1 = *((uint32_t *)(sm1)); ++ p0 = *((uint32_t *)(s0)); ++ p5 = *((uint32_t *)(s5)); ++ p6 = *((uint32_t *)(s6)); ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); ++ ++ /* unpack processed 4x4 neighborhood */ ++ *((uint32_t *)s1) = p1; ++ *((uint32_t *)s2) = p2; ++ *((uint32_t *)s3) = p3; ++ *((uint32_t *)s4) = p4; ++ } ++ } ++ ++ sm1 += 4; ++ s0 += 4; ++ s1 += 4; ++ s2 += 4; ++ s3 += 4; ++ s4 += 4; ++ s5 += 4; ++ s6 += 4; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p1 = *((uint32_t *)(s1)); ++ p2 = *((uint32_t *)(s2)); ++ p3 = *((uint32_t *)(s3)); ++ p4 = *((uint32_t *)(s4)); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ pm1 = *((uint32_t *)(sm1)); ++ p0 = *((uint32_t *)(s0)); ++ p5 = *((uint32_t *)(s5)); ++ p6 = *((uint32_t *)(s6)); ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); ++ ++ /* unpack processed 4x4 neighborhood */ ++ *((uint32_t *)s1) = p1; ++ *((uint32_t *)s2) = p2; ++ *((uint32_t *)s3) = p3; ++ *((uint32_t *)s4) = p4; ++ } ++ } ++} ++ ++void vp8_loop_filter_uvhorizontal_edge_mips ++( ++ unsigned char *s, ++ int p, ++ unsigned int flimit, ++ unsigned int limit, ++ unsigned int thresh, ++ int count ++) ++{ ++ uint32_t mask; ++ uint32_t hev; ++ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; ++ unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; ++ ++ mask = 0; ++ hev = 0; ++ p1 = 0; ++ p2 = 0; ++ p3 = 0; ++ p4 = 0; ++ ++ /* loop filter designed to work using chars so that we can make maximum use ++ * of 8 bit simd instructions. ++ */ ++ ++ sm1 = s - (p << 2); ++ s0 = s - p - p - p; ++ s1 = s - p - p ; ++ s2 = s - p; ++ s3 = s; ++ s4 = s + p; ++ s5 = s + p + p; ++ s6 = s + p + p + p; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p1 = *((uint32_t *)(s1)); ++ p2 = *((uint32_t *)(s2)); ++ p3 = *((uint32_t *)(s3)); ++ p4 = *((uint32_t *)(s4)); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ pm1 = *((uint32_t *)(sm1)); ++ p0 = *((uint32_t *)(s0)); ++ p5 = *((uint32_t *)(s5)); ++ p6 = *((uint32_t *)(s6)); ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); ++ ++ /* unpack processed 4x4 neighborhood */ ++ *((uint32_t *)s1) = p1; ++ *((uint32_t *)s2) = p2; ++ *((uint32_t *)s3) = p3; ++ *((uint32_t *)s4) = p4; ++ } ++ } ++ ++ sm1 += 4; ++ s0 += 4; ++ s1 += 4; ++ s2 += 4; ++ s3 += 4; ++ s4 += 4; ++ s5 += 4; ++ s6 += 4; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p1 = *((uint32_t *)(s1)); ++ p2 = *((uint32_t *)(s2)); ++ p3 = *((uint32_t *)(s3)); ++ p4 = *((uint32_t *)(s4)); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ pm1 = *((uint32_t *)(sm1)); ++ p0 = *((uint32_t *)(s0)); ++ p5 = *((uint32_t *)(s5)); ++ p6 = *((uint32_t *)(s6)); ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); ++ ++ /* unpack processed 4x4 neighborhood */ ++ *((uint32_t *)s1) = p1; ++ *((uint32_t *)s2) = p2; ++ *((uint32_t *)s3) = p3; ++ *((uint32_t *)s4) = p4; ++ } ++ } ++} ++ ++void vp8_loop_filter_vertical_edge_mips ++( ++ unsigned char *s, ++ int p, ++ const unsigned int flimit, ++ const unsigned int limit, ++ const unsigned int thresh, ++ int count ++) ++{ ++ int i; ++ uint32_t mask, hev; ++ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; ++ unsigned char *s1, *s2, *s3, *s4; ++ uint32_t prim1, prim2, sec3, sec4, prim3, prim4; ++ ++ hev = 0; ++ mask = 0; ++ i = 0; ++ pm1 = 0; ++ p0 = 0; ++ p1 = 0; ++ p2 = 0; ++ p3 = 0; ++ p4 = 0; ++ p5 = 0; ++ p6 = 0; ++ ++ /* loop filter designed to work using chars so that we can make maximum use ++ * of 8 bit simd instructions. ++ */ ++ ++ /* apply filter on 4 pixesl at the same time */ ++ do ++ { ++ ++ /* prefetch data for store */ ++ prefetch_store_lf(s + p); ++ ++ s1 = s; ++ s2 = s + p; ++ s3 = s2 + p; ++ s4 = s3 + p; ++ s = s4 + p; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p2 = *((uint32_t *)(s1 - 4)); ++ p6 = *((uint32_t *)(s1)); ++ p1 = *((uint32_t *)(s2 - 4)); ++ p5 = *((uint32_t *)(s2)); ++ p0 = *((uint32_t *)(s3 - 4)); ++ p4 = *((uint32_t *)(s3)); ++ pm1 = *((uint32_t *)(s4 - 4)); ++ p3 = *((uint32_t *)(s4)); ++ ++ /* transpose pm1, p0, p1, p2 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" ++ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" ++ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" ++ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" ++ ++ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" ++ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" ++ "append %[p1], %[sec3], 16 \n\t" ++ "append %[pm1], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* transpose p3, p4, p5, p6 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" ++ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" ++ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" ++ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" ++ ++ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" ++ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" ++ "append %[p5], %[sec3], 16 \n\t" ++ "append %[p3], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); ++ ++ /* unpack processed 4x4 neighborhood ++ * don't use transpose on output data ++ * because memory isn't aligned ++ */ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s4]) \n\t" ++ "sb %[p3], 0(%[s4]) \n\t" ++ "sb %[p2], -1(%[s4]) \n\t" ++ "sb %[p1], -2(%[s4]) \n\t" ++ : ++ : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), ++ [p2] "r" (p2), [p1] "r" (p1) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s3]) \n\t" ++ "sb %[p3], 0(%[s3]) \n\t" ++ "sb %[p2], -1(%[s3]) \n\t" ++ "sb %[p1], -2(%[s3]) \n\t" ++ : [p1] "+r" (p1) ++ : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s2]) \n\t" ++ "sb %[p3], 0(%[s2]) \n\t" ++ "sb %[p2], -1(%[s2]) \n\t" ++ "sb %[p1], -2(%[s2]) \n\t" ++ : ++ : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), ++ [p2] "r" (p2), [p1] "r" (p1) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s1]) \n\t" ++ "sb %[p3], 0(%[s1]) \n\t" ++ "sb %[p2], -1(%[s1]) \n\t" ++ "sb %[p1], -2(%[s1]) \n\t" ++ : ++ : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), ++ [p2] "r" (p2), [p1] "r" (p1) ++ ); ++ } ++ } ++ ++ s1 = s; ++ s2 = s + p; ++ s3 = s2 + p; ++ s4 = s3 + p; ++ s = s4 + p; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p2 = *((uint32_t *)(s1 - 4)); ++ p6 = *((uint32_t *)(s1)); ++ p1 = *((uint32_t *)(s2 - 4)); ++ p5 = *((uint32_t *)(s2)); ++ p0 = *((uint32_t *)(s3 - 4)); ++ p4 = *((uint32_t *)(s3)); ++ pm1 = *((uint32_t *)(s4 - 4)); ++ p3 = *((uint32_t *)(s4)); ++ ++ /* transpose pm1, p0, p1, p2 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" ++ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" ++ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" ++ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" ++ ++ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" ++ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" ++ "append %[p1], %[sec3], 16 \n\t" ++ "append %[pm1], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* transpose p3, p4, p5, p6 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" ++ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" ++ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" ++ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" ++ ++ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" ++ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" ++ "append %[p5], %[sec3], 16 \n\t" ++ "append %[p3], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); ++ ++ /* unpack processed 4x4 neighborhood ++ * don't use transpose on output data ++ * because memory isn't aligned ++ */ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s4]) \n\t" ++ "sb %[p3], 0(%[s4]) \n\t" ++ "sb %[p2], -1(%[s4]) \n\t" ++ "sb %[p1], -2(%[s4]) \n\t" ++ : ++ : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), ++ [p2] "r" (p2), [p1] "r" (p1) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s3]) \n\t" ++ "sb %[p3], 0(%[s3]) \n\t" ++ "sb %[p2], -1(%[s3]) \n\t" ++ "sb %[p1], -2(%[s3]) \n\t" ++ : [p1] "+r" (p1) ++ : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s2]) \n\t" ++ "sb %[p3], 0(%[s2]) \n\t" ++ "sb %[p2], -1(%[s2]) \n\t" ++ "sb %[p1], -2(%[s2]) \n\t" ++ : ++ : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), ++ [p2] "r" (p2), [p1] "r" (p1) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s1]) \n\t" ++ "sb %[p3], 0(%[s1]) \n\t" ++ "sb %[p2], -1(%[s1]) \n\t" ++ "sb %[p1], -2(%[s1]) \n\t" ++ : ++ : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), ++ [p2] "r" (p2), [p1] "r" (p1) ++ ); ++ } ++ } ++ ++ i += 8; ++ } ++ ++ while (i < count); ++} ++ ++void vp8_loop_filter_uvvertical_edge_mips ++( ++ unsigned char *s, ++ int p, ++ unsigned int flimit, ++ unsigned int limit, ++ unsigned int thresh, ++ int count ++) ++{ ++ uint32_t mask, hev; ++ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; ++ unsigned char *s1, *s2, *s3, *s4; ++ uint32_t prim1, prim2, sec3, sec4, prim3, prim4; ++ ++ /* loop filter designed to work using chars so that we can make maximum use ++ * of 8 bit simd instructions. ++ */ ++ ++ /* apply filter on 4 pixesl at the same time */ ++ ++ s1 = s; ++ s2 = s + p; ++ s3 = s2 + p; ++ s4 = s3 + p; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p2 = *((uint32_t *)(s1 - 4)); ++ p6 = *((uint32_t *)(s1)); ++ p1 = *((uint32_t *)(s2 - 4)); ++ p5 = *((uint32_t *)(s2)); ++ p0 = *((uint32_t *)(s3 - 4)); ++ p4 = *((uint32_t *)(s3)); ++ pm1 = *((uint32_t *)(s4 - 4)); ++ p3 = *((uint32_t *)(s4)); ++ ++ /* transpose pm1, p0, p1, p2 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" ++ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" ++ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" ++ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" ++ ++ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" ++ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" ++ "append %[p1], %[sec3], 16 \n\t" ++ "append %[pm1], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* transpose p3, p4, p5, p6 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" ++ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" ++ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" ++ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" ++ ++ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" ++ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" ++ "append %[p5], %[sec3], 16 \n\t" ++ "append %[p3], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); ++ ++ /* unpack processed 4x4 neighborhood ++ * don't use transpose on output data ++ * because memory isn't aligned ++ */ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s4]) \n\t" ++ "sb %[p3], 0(%[s4]) \n\t" ++ "sb %[p2], -1(%[s4]) \n\t" ++ "sb %[p1], -2(%[s4]) \n\t" ++ : ++ : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), ++ [p2] "r" (p2), [p1] "r" (p1) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s3]) \n\t" ++ "sb %[p3], 0(%[s3]) \n\t" ++ "sb %[p2], -1(%[s3]) \n\t" ++ "sb %[p1], -2(%[s3]) \n\t" ++ : [p1] "+r" (p1) ++ : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s2]) \n\t" ++ "sb %[p3], 0(%[s2]) \n\t" ++ "sb %[p2], -1(%[s2]) \n\t" ++ "sb %[p1], -2(%[s2]) \n\t" ++ : ++ : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), ++ [p2] "r" (p2), [p1] "r" (p1) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s1]) \n\t" ++ "sb %[p3], 0(%[s1]) \n\t" ++ "sb %[p2], -1(%[s1]) \n\t" ++ "sb %[p1], -2(%[s1]) \n\t" ++ : ++ : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), [p2] "r" (p2), [p1] "r" (p1) ++ ); ++ } ++ } ++ ++ s1 = s4 + p; ++ s2 = s1 + p; ++ s3 = s2 + p; ++ s4 = s3 + p; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p2 = *((uint32_t *)(s1 - 4)); ++ p6 = *((uint32_t *)(s1)); ++ p1 = *((uint32_t *)(s2 - 4)); ++ p5 = *((uint32_t *)(s2)); ++ p0 = *((uint32_t *)(s3 - 4)); ++ p4 = *((uint32_t *)(s3)); ++ pm1 = *((uint32_t *)(s4 - 4)); ++ p3 = *((uint32_t *)(s4)); ++ ++ /* transpose pm1, p0, p1, p2 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" ++ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" ++ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" ++ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" ++ ++ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" ++ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" ++ "append %[p1], %[sec3], 16 \n\t" ++ "append %[pm1], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* transpose p3, p4, p5, p6 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" ++ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" ++ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" ++ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" ++ ++ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" ++ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" ++ "append %[p5], %[sec3], 16 \n\t" ++ "append %[p3], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); ++ ++ /* unpack processed 4x4 neighborhood ++ * don't use transpose on output data ++ * because memory isn't aligned ++ */ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s4]) \n\t" ++ "sb %[p3], 0(%[s4]) \n\t" ++ "sb %[p2], -1(%[s4]) \n\t" ++ "sb %[p1], -2(%[s4]) \n\t" ++ : ++ : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), ++ [p2] "r" (p2), [p1] "r" (p1) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s3]) \n\t" ++ "sb %[p3], 0(%[s3]) \n\t" ++ "sb %[p2], -1(%[s3]) \n\t" ++ "sb %[p1], -2(%[s3]) \n\t" ++ : [p1] "+r" (p1) ++ : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s2]) \n\t" ++ "sb %[p3], 0(%[s2]) \n\t" ++ "sb %[p2], -1(%[s2]) \n\t" ++ "sb %[p1], -2(%[s2]) \n\t" ++ : ++ : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), ++ [p2] "r" (p2), [p1] "r" (p1) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p4], 1(%[s1]) \n\t" ++ "sb %[p3], 0(%[s1]) \n\t" ++ "sb %[p2], -1(%[s1]) \n\t" ++ "sb %[p1], -2(%[s1]) \n\t" ++ : ++ : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), ++ [p2] "r" (p2), [p1] "r" (p1) ++ ); ++ } ++ } ++} ++ ++/* inputs & outputs are quad-byte vectors */ ++static __inline void vp8_mbfilter_mips ++( ++ uint32_t mask, ++ uint32_t hev, ++ uint32_t *ps2, ++ uint32_t *ps1, ++ uint32_t *ps0, ++ uint32_t *qs0, ++ uint32_t *qs1, ++ uint32_t *qs2 ++) ++{ ++ int32_t vps2, vps1, vps0, vqs0, vqs1, vqs2; ++ int32_t vps2_l, vps1_l, vps0_l, vqs0_l, vqs1_l, vqs2_l; ++ int32_t vps2_r, vps1_r, vps0_r, vqs0_r, vqs1_r, vqs2_r; ++ uint32_t HWM, vp8_filter_l, vp8_filter_r, mask_l, mask_r, hev_l, hev_r, subr_r, subr_l; ++ uint32_t Filter2_l, Filter2_r, t1, t2, Filter1_l, Filter1_r, invhev_l, invhev_r; ++ uint32_t N128, R63; ++ uint32_t u1_l, u1_r, u2_l, u2_r, u3_l, u3_r; ++ ++ R63 = 0x003F003F; ++ HWM = 0xFF00FF00; ++ N128 = 0x80808080; ++ t1 = 0x03000300; ++ t2 = 0x04000400; ++ ++ vps0 = (*ps0) ^ N128; ++ vps1 = (*ps1) ^ N128; ++ vps2 = (*ps2) ^ N128; ++ vqs0 = (*qs0) ^ N128; ++ vqs1 = (*qs1) ^ N128; ++ vqs2 = (*qs2) ^ N128; ++ ++ /* use halfword pairs instead quad-bytes because of accuracy */ ++ vps0_l = vps0 & HWM; ++ vps0_r = vps0 << 8; ++ vps0_r = vps0_r & HWM; ++ ++ vqs0_l = vqs0 & HWM; ++ vqs0_r = vqs0 << 8; ++ vqs0_r = vqs0_r & HWM; ++ ++ vps1_l = vps1 & HWM; ++ vps1_r = vps1 << 8; ++ vps1_r = vps1_r & HWM; ++ ++ vqs1_l = vqs1 & HWM; ++ vqs1_r = vqs1 << 8; ++ vqs1_r = vqs1_r & HWM; ++ ++ vqs2_l = vqs2 & HWM; ++ vqs2_r = vqs2 << 8; ++ vqs2_r = vqs2_r & HWM; ++ ++ __asm__ __volatile__ ( ++ /* qs0 - ps0 */ ++ "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" ++ "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" ++ ++ /* vp8_filter = vp8_signed_char_clamp(ps1 - qs1); */ ++ "subq_s.ph %[vp8_filter_l], %[vps1_l], %[vqs1_l] \n\t" ++ "subq_s.ph %[vp8_filter_r], %[vps1_r], %[vqs1_r] \n\t" ++ ++ : [vp8_filter_l] "=&r" (vp8_filter_l), [vp8_filter_r] "=r" (vp8_filter_r), ++ [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r) ++ : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l), ++ [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r), ++ [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r) ++ ); ++ ++ vps2_l = vps2 & HWM; ++ vps2_r = vps2 << 8; ++ vps2_r = vps2_r & HWM; ++ ++ /* add outer taps if we have high edge variance */ ++ __asm__ __volatile__ ( ++ /* vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); */ ++ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" ++ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" ++ "and %[mask_l], %[HWM], %[mask] \n\t" ++ "sll %[mask_r], %[mask], 8 \n\t" ++ "and %[mask_r], %[HWM], %[mask_r] \n\t" ++ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" ++ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" ++ "and %[hev_l], %[HWM], %[hev] \n\t" ++ "sll %[hev_r], %[hev], 8 \n\t" ++ "and %[hev_r], %[HWM], %[hev_r] \n\t" ++ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" ++ "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" ++ ++ /* vp8_filter &= mask; */ ++ "and %[vp8_filter_l], %[vp8_filter_l], %[mask_l] \n\t" ++ "and %[vp8_filter_r], %[vp8_filter_r], %[mask_r] \n\t" ++ ++ /* Filter2 = vp8_filter & hev; */ ++ "and %[Filter2_l], %[vp8_filter_l], %[hev_l] \n\t" ++ "and %[Filter2_r], %[vp8_filter_r], %[hev_r] \n\t" ++ ++ : [vp8_filter_l] "+r" (vp8_filter_l), [vp8_filter_r] "+r" (vp8_filter_r), ++ [hev_l] "=&r" (hev_l), [hev_r] "=&r" (hev_r), ++ [mask_l] "=&r" (mask_l), [mask_r] "=&r" (mask_r), ++ [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r) ++ : [subr_l] "r" (subr_l), [subr_r] "r" (subr_r), ++ [HWM] "r" (HWM), [hev] "r" (hev), [mask] "r" (mask) ++ ); ++ ++ /* save bottom 3 bits so that we round one side +4 and the other +3 */ ++ __asm__ __volatile__ ( ++ /* Filter1 = vp8_signed_char_clamp(Filter2 + 4) >>= 3; */ ++ "addq_s.ph %[Filter1_l], %[Filter2_l], %[t2] \n\t" ++ "xor %[invhev_l], %[hev_l], %[HWM] \n\t" ++ "addq_s.ph %[Filter1_r], %[Filter2_r], %[t2] \n\t" ++ ++ /* Filter2 = vp8_signed_char_clamp(Filter2 + 3) >>= 3; */ ++ "addq_s.ph %[Filter2_l], %[Filter2_l], %[t1] \n\t" ++ "addq_s.ph %[Filter2_r], %[Filter2_r], %[t1] \n\t" ++ ++ "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" ++ "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" ++ ++ "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" ++ "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" ++ "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" ++ "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" ++ "xor %[invhev_r], %[hev_r], %[HWM] \n\t" ++ ++ /* qs0 = vp8_signed_char_clamp(qs0 - Filter1); */ ++ "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" ++ "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" ++ ++ /* ps0 = vp8_signed_char_clamp(ps0 + Filter2); */ ++ "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" ++ "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" ++ ++ : [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r), ++ [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r), ++ [Filter2_l] "+r" (Filter2_l), [Filter2_r] "+r" (Filter2_r), ++ [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), ++ [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) ++ : [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM), ++ [hev_l] "r" (hev_l), [hev_r] "r" (hev_r) ++ ); ++ ++ /* only apply wider filter if not high edge variance */ ++ __asm__ __volatile__ ( ++ /* vp8_filter &= ~hev; */ ++ "and %[Filter2_l], %[vp8_filter_l], %[invhev_l] \n\t" ++ "and %[Filter2_r], %[vp8_filter_r], %[invhev_r] \n\t" ++ ++ "shra.ph %[Filter2_l], %[Filter2_l], 8 \n\t" ++ "shra.ph %[Filter2_r], %[Filter2_r], 8 \n\t" ++ ++ : [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r) ++ : [vp8_filter_l] "r" (vp8_filter_l), [vp8_filter_r] "r" (vp8_filter_r), ++ [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r) ++ ); ++ ++ /* roughly 3/7th difference across boundary */ ++ __asm__ __volatile__ ( ++ "shll.ph %[u3_l], %[Filter2_l], 3 \n\t" ++ "shll.ph %[u3_r], %[Filter2_r], 3 \n\t" ++ ++ "addq.ph %[u3_l], %[u3_l], %[Filter2_l] \n\t" ++ "addq.ph %[u3_r], %[u3_r], %[Filter2_r] \n\t" ++ ++ "shll.ph %[u2_l], %[u3_l], 1 \n\t" ++ "shll.ph %[u2_r], %[u3_r], 1 \n\t" ++ ++ "addq.ph %[u1_l], %[u3_l], %[u2_l] \n\t" ++ "addq.ph %[u1_r], %[u3_r], %[u2_r] \n\t" ++ ++ "addq.ph %[u2_l], %[u2_l], %[R63] \n\t" ++ "addq.ph %[u2_r], %[u2_r], %[R63] \n\t" ++ ++ "addq.ph %[u3_l], %[u3_l], %[R63] \n\t" ++ "addq.ph %[u3_r], %[u3_r], %[R63] \n\t" ++ ++ /* vp8_signed_char_clamp((63 + Filter2 * 27) >> 7) ++ * vp8_signed_char_clamp((63 + Filter2 * 18) >> 7) ++ */ ++ "addq.ph %[u1_l], %[u1_l], %[R63] \n\t" ++ "addq.ph %[u1_r], %[u1_r], %[R63] \n\t" ++ "shra.ph %[u1_l], %[u1_l], 7 \n\t" ++ "shra.ph %[u1_r], %[u1_r], 7 \n\t" ++ "shra.ph %[u2_l], %[u2_l], 7 \n\t" ++ "shra.ph %[u2_r], %[u2_r], 7 \n\t" ++ "shll.ph %[u1_l], %[u1_l], 8 \n\t" ++ "shll.ph %[u1_r], %[u1_r], 8 \n\t" ++ "shll.ph %[u2_l], %[u2_l], 8 \n\t" ++ "shll.ph %[u2_r], %[u2_r], 8 \n\t" ++ ++ /* vqs0 = vp8_signed_char_clamp(qs0 - u); */ ++ "subq_s.ph %[vqs0_l], %[vqs0_l], %[u1_l] \n\t" ++ "subq_s.ph %[vqs0_r], %[vqs0_r], %[u1_r] \n\t" ++ ++ /* vps0 = vp8_signed_char_clamp(ps0 + u); */ ++ "addq_s.ph %[vps0_l], %[vps0_l], %[u1_l] \n\t" ++ "addq_s.ph %[vps0_r], %[vps0_r], %[u1_r] \n\t" ++ ++ : [u1_l] "=&r" (u1_l), [u1_r] "=&r" (u1_r), [u2_l] "=&r" (u2_l), ++ [u2_r] "=&r" (u2_r), [u3_l] "=&r" (u3_l), [u3_r] "=&r" (u3_r), ++ [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), ++ [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) ++ : [R63] "r" (R63), ++ [Filter2_l] "r" (Filter2_l), [Filter2_r] "r" (Filter2_r) ++ ); ++ ++ __asm__ __volatile__ ( ++ /* vqs1 = vp8_signed_char_clamp(qs1 - u); */ ++ "subq_s.ph %[vqs1_l], %[vqs1_l], %[u2_l] \n\t" ++ "addq_s.ph %[vps1_l], %[vps1_l], %[u2_l] \n\t" ++ ++ /* vps1 = vp8_signed_char_clamp(ps1 + u); */ ++ "addq_s.ph %[vps1_r], %[vps1_r], %[u2_r] \n\t" ++ "subq_s.ph %[vqs1_r], %[vqs1_r], %[u2_r] \n\t" ++ ++ : [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), ++ [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r) ++ : [u2_l] "r" (u2_l), [u2_r] "r" (u2_r) ++ ); ++ ++ /* roughly 1/7th difference across boundary */ ++ __asm__ __volatile__ ( ++ /* u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7); */ ++ "shra.ph %[u3_l], %[u3_l], 7 \n\t" ++ "shra.ph %[u3_r], %[u3_r], 7 \n\t" ++ "shll.ph %[u3_l], %[u3_l], 8 \n\t" ++ "shll.ph %[u3_r], %[u3_r], 8 \n\t" ++ ++ /* vqs2 = vp8_signed_char_clamp(qs2 - u); */ ++ "subq_s.ph %[vqs2_l], %[vqs2_l], %[u3_l] \n\t" ++ "subq_s.ph %[vqs2_r], %[vqs2_r], %[u3_r] \n\t" ++ ++ /* vps2 = vp8_signed_char_clamp(ps2 + u); */ ++ "addq_s.ph %[vps2_l], %[vps2_l], %[u3_l] \n\t" ++ "addq_s.ph %[vps2_r], %[vps2_r], %[u3_r] \n\t" ++ ++ : [u3_l] "+r" (u3_l), [u3_r] "+r" (u3_r), [vps2_l] "+r" (vps2_l), ++ [vps2_r] "+r" (vps2_r), [vqs2_l] "+r" (vqs2_l), [vqs2_r] "+r" (vqs2_r) ++ : ++ ); ++ ++ /* Create quad-bytes from halfword pairs */ ++ __asm__ __volatile__ ( ++ "and %[vqs0_l], %[vqs0_l], %[HWM] \n\t" ++ "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" ++ ++ "and %[vps0_l], %[vps0_l], %[HWM] \n\t" ++ "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" ++ ++ "and %[vqs1_l], %[vqs1_l], %[HWM] \n\t" ++ "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" ++ ++ "and %[vps1_l], %[vps1_l], %[HWM] \n\t" ++ "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" ++ ++ "and %[vqs2_l], %[vqs2_l], %[HWM] \n\t" ++ "shrl.ph %[vqs2_r], %[vqs2_r], 8 \n\t" ++ ++ "and %[vps2_l], %[vps2_l], %[HWM] \n\t" ++ "shrl.ph %[vps2_r], %[vps2_r], 8 \n\t" ++ ++ "or %[vqs0_r], %[vqs0_l], %[vqs0_r] \n\t" ++ "or %[vps0_r], %[vps0_l], %[vps0_r] \n\t" ++ "or %[vqs1_r], %[vqs1_l], %[vqs1_r] \n\t" ++ "or %[vps1_r], %[vps1_l], %[vps1_r] \n\t" ++ "or %[vqs2_r], %[vqs2_l], %[vqs2_r] \n\t" ++ "or %[vps2_r], %[vps2_l], %[vps2_r] \n\t" ++ ++ : [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), [vqs1_l] "+r" (vqs1_l), ++ [vqs1_r] "+r" (vqs1_r), [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), ++ [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r), [vqs2_l] "+r" (vqs2_l), ++ [vqs2_r] "+r" (vqs2_r), [vps2_r] "+r" (vps2_r), [vps2_l] "+r" (vps2_l) ++ : [HWM] "r" (HWM) ++ ); ++ ++ *ps0 = vps0_r ^ N128; ++ *ps1 = vps1_r ^ N128; ++ *ps2 = vps2_r ^ N128; ++ *qs0 = vqs0_r ^ N128; ++ *qs1 = vqs1_r ^ N128; ++ *qs2 = vqs2_r ^ N128; ++} ++ ++void vp8_mbloop_filter_horizontal_edge_mips ++( ++ unsigned char *s, ++ int p, ++ unsigned int flimit, ++ unsigned int limit, ++ unsigned int thresh, ++ int count ++) ++{ ++ int i; ++ uint32_t mask, hev; ++ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; ++ unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; ++ ++ mask = 0; ++ hev = 0; ++ i = 0; ++ p1 = 0; ++ p2 = 0; ++ p3 = 0; ++ p4 = 0; ++ ++ /* loop filter designed to work using chars so that we can make maximum use ++ * of 8 bit simd instructions. ++ */ ++ ++ sm1 = s - (p << 2); ++ s0 = s - p - p - p; ++ s1 = s - p - p; ++ s2 = s - p; ++ s3 = s; ++ s4 = s + p; ++ s5 = s + p + p; ++ s6 = s + p + p + p; ++ ++ /* prefetch data for load */ ++ prefetch_load_lf(s + p); ++ ++ /* apply filter on 4 pixesl at the same time */ ++ do ++ { ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p1 = *((uint32_t *)(s1)); ++ p2 = *((uint32_t *)(s2)); ++ p3 = *((uint32_t *)(s3)); ++ p4 = *((uint32_t *)(s4)); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ pm1 = *((uint32_t *)(sm1)); ++ p0 = *((uint32_t *)(s0)); ++ p5 = *((uint32_t *)(s5)); ++ p6 = *((uint32_t *)(s6)); ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); ++ ++ /* unpack processed 4x4 neighborhood ++ * memory is 4 byte aligned ++ */ ++ *((uint32_t *)s0) = p0; ++ *((uint32_t *)s1) = p1; ++ *((uint32_t *)s2) = p2; ++ *((uint32_t *)s3) = p3; ++ *((uint32_t *)s4) = p4; ++ *((uint32_t *)s5) = p5; ++ } ++ } ++ ++ sm1 += 4; ++ s0 += 4; ++ s1 += 4; ++ s2 += 4; ++ s3 += 4; ++ s4 += 4; ++ s5 += 4; ++ s6 += 4; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p1 = *((uint32_t *)(s1)); ++ p2 = *((uint32_t *)(s2)); ++ p3 = *((uint32_t *)(s3)); ++ p4 = *((uint32_t *)(s4)); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ pm1 = *((uint32_t *)(sm1)); ++ p0 = *((uint32_t *)(s0)); ++ p5 = *((uint32_t *)(s5)); ++ p6 = *((uint32_t *)(s6)); ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); ++ ++ /* unpack processed 4x4 neighborhood ++ * memory is 4 byte aligned ++ */ ++ *((uint32_t *)s0) = p0; ++ *((uint32_t *)s1) = p1; ++ *((uint32_t *)s2) = p2; ++ *((uint32_t *)s3) = p3; ++ *((uint32_t *)s4) = p4; ++ *((uint32_t *)s5) = p5; ++ } ++ } ++ ++ sm1 += 4; ++ s0 += 4; ++ s1 += 4; ++ s2 += 4; ++ s3 += 4; ++ s4 += 4; ++ s5 += 4; ++ s6 += 4; ++ ++ i += 8; ++ } ++ ++ while (i < count); ++} ++ ++void vp8_mbloop_filter_uvhorizontal_edge_mips ++( ++ unsigned char *s, ++ int p, ++ unsigned int flimit, ++ unsigned int limit, ++ unsigned int thresh, ++ int count ++) ++{ ++ uint32_t mask, hev; ++ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; ++ unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; ++ ++ mask = 0; ++ hev = 0; ++ p1 = 0; ++ p2 = 0; ++ p3 = 0; ++ p4 = 0; ++ ++ /* loop filter designed to work using chars so that we can make maximum use ++ * of 8 bit simd instructions. ++ */ ++ ++ sm1 = s - (p << 2); ++ s0 = s - p - p - p; ++ s1 = s - p - p; ++ s2 = s - p; ++ s3 = s; ++ s4 = s + p; ++ s5 = s + p + p; ++ s6 = s + p + p + p; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p1 = *((uint32_t *)(s1)); ++ p2 = *((uint32_t *)(s2)); ++ p3 = *((uint32_t *)(s3)); ++ p4 = *((uint32_t *)(s4)); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ pm1 = *((uint32_t *)(sm1)); ++ p0 = *((uint32_t *)(s0)); ++ p5 = *((uint32_t *)(s5)); ++ p6 = *((uint32_t *)(s6)); ++ ++ /* if mask == 0 do filtering is not needed */ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); ++ ++ /* unpack processed 4x4 neighborhood ++ * memory is 4 byte aligned ++ */ ++ *((uint32_t *)s0) = p0; ++ *((uint32_t *)s1) = p1; ++ *((uint32_t *)s2) = p2; ++ *((uint32_t *)s3) = p3; ++ *((uint32_t *)s4) = p4; ++ *((uint32_t *)s5) = p5; ++ } ++ } ++ ++ sm1 += 4; ++ s0 += 4; ++ s1 += 4; ++ s2 += 4; ++ s3 += 4; ++ s4 += 4; ++ s5 += 4; ++ s6 += 4; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p1 = *((uint32_t *)(s1)); ++ p2 = *((uint32_t *)(s2)); ++ p3 = *((uint32_t *)(s3)); ++ p4 = *((uint32_t *)(s4)); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ pm1 = *((uint32_t *)(sm1)); ++ p0 = *((uint32_t *)(s0)); ++ p5 = *((uint32_t *)(s5)); ++ p6 = *((uint32_t *)(s6)); ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); ++ ++ /* unpack processed 4x4 neighborhood ++ * memory is 4 byte aligned ++ */ ++ *((uint32_t *)s0) = p0; ++ *((uint32_t *)s1) = p1; ++ *((uint32_t *)s2) = p2; ++ *((uint32_t *)s3) = p3; ++ *((uint32_t *)s4) = p4; ++ *((uint32_t *)s5) = p5; ++ } ++ } ++} ++ ++ ++void vp8_mbloop_filter_vertical_edge_mips ++( ++ unsigned char *s, ++ int p, ++ unsigned int flimit, ++ unsigned int limit, ++ unsigned int thresh, ++ int count ++) ++{ ++ ++ int i; ++ uint32_t mask, hev; ++ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; ++ unsigned char *s1, *s2, *s3, *s4; ++ uint32_t prim1, prim2, sec3, sec4, prim3, prim4; ++ ++ mask = 0; ++ hev = 0; ++ i = 0; ++ pm1 = 0; ++ p0 = 0; ++ p1 = 0; ++ p2 = 0; ++ p3 = 0; ++ p4 = 0; ++ p5 = 0; ++ p6 = 0; ++ ++ /* loop filter designed to work using chars so that we can make maximum use ++ * of 8 bit simd instructions. ++ */ ++ ++ /* apply filter on 4 pixesl at the same time */ ++ do ++ { ++ s1 = s; ++ s2 = s + p; ++ s3 = s2 + p; ++ s4 = s3 + p; ++ s = s4 + p; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p2 = *((uint32_t *)(s1 - 4)); ++ p6 = *((uint32_t *)(s1)); ++ p1 = *((uint32_t *)(s2 - 4)); ++ p5 = *((uint32_t *)(s2)); ++ p0 = *((uint32_t *)(s3 - 4)); ++ p4 = *((uint32_t *)(s3)); ++ pm1 = *((uint32_t *)(s4 - 4)); ++ p3 = *((uint32_t *)(s4)); ++ ++ /* transpose pm1, p0, p1, p2 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" ++ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" ++ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" ++ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" ++ ++ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" ++ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" ++ "append %[p1], %[sec3], 16 \n\t" ++ "append %[pm1], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* transpose p3, p4, p5, p6 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" ++ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" ++ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" ++ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" ++ ++ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" ++ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" ++ "append %[p5], %[sec3], 16 \n\t" ++ "append %[p3], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); ++ ++ /* don't use transpose on output data ++ * because memory isn't aligned ++ */ ++ __asm__ __volatile__ ( ++ "sb %[p5], 2(%[s4]) \n\t" ++ "sb %[p4], 1(%[s4]) \n\t" ++ "sb %[p3], 0(%[s4]) \n\t" ++ "sb %[p2], -1(%[s4]) \n\t" ++ "sb %[p1], -2(%[s4]) \n\t" ++ "sb %[p0], -3(%[s4]) \n\t" ++ : ++ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), ++ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p5], %[p5], 8 \n\t" ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ "srl %[p0], %[p0], 8 \n\t" ++ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p5], 2(%[s3]) \n\t" ++ "sb %[p4], 1(%[s3]) \n\t" ++ "sb %[p3], 0(%[s3]) \n\t" ++ "sb %[p2], -1(%[s3]) \n\t" ++ "sb %[p1], -2(%[s3]) \n\t" ++ "sb %[p0], -3(%[s3]) \n\t" ++ : ++ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), ++ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p5], %[p5], 8 \n\t" ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ "srl %[p0], %[p0], 8 \n\t" ++ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p5], 2(%[s2]) \n\t" ++ "sb %[p4], 1(%[s2]) \n\t" ++ "sb %[p3], 0(%[s2]) \n\t" ++ "sb %[p2], -1(%[s2]) \n\t" ++ "sb %[p1], -2(%[s2]) \n\t" ++ "sb %[p0], -3(%[s2]) \n\t" ++ : ++ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), ++ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p5], %[p5], 8 \n\t" ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ "srl %[p0], %[p0], 8 \n\t" ++ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p5], 2(%[s1]) \n\t" ++ "sb %[p4], 1(%[s1]) \n\t" ++ "sb %[p3], 0(%[s1]) \n\t" ++ "sb %[p2], -1(%[s1]) \n\t" ++ "sb %[p1], -2(%[s1]) \n\t" ++ "sb %[p0], -3(%[s1]) \n\t" ++ : ++ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), ++ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) ++ ); ++ } ++ } ++ ++ i += 4; ++ } ++ ++ while (i < count); ++} ++ ++void vp8_mbloop_filter_uvvertical_edge_mips ++( ++ unsigned char *s, ++ int p, ++ unsigned int flimit, ++ unsigned int limit, ++ unsigned int thresh, ++ int count ++) ++{ ++ uint32_t mask, hev; ++ uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; ++ unsigned char *s1, *s2, *s3, *s4; ++ uint32_t prim1, prim2, sec3, sec4, prim3, prim4; ++ ++ mask = 0; ++ hev = 0; ++ pm1 = 0; ++ p0 = 0; ++ p1 = 0; ++ p2 = 0; ++ p3 = 0; ++ p4 = 0; ++ p5 = 0; ++ p6 = 0; ++ ++ /* loop filter designed to work using chars so that we can make maximum use ++ * of 8 bit simd instructions. ++ */ ++ ++ /* apply filter on 4 pixesl at the same time */ ++ ++ s1 = s; ++ s2 = s + p; ++ s3 = s2 + p; ++ s4 = s3 + p; ++ ++ /* prefetch data for load */ ++ prefetch_load_lf(s + 2 * p); ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p2 = *((uint32_t *)(s1 - 4)); ++ p6 = *((uint32_t *)(s1)); ++ p1 = *((uint32_t *)(s2 - 4)); ++ p5 = *((uint32_t *)(s2)); ++ p0 = *((uint32_t *)(s3 - 4)); ++ p4 = *((uint32_t *)(s3)); ++ pm1 = *((uint32_t *)(s4 - 4)); ++ p3 = *((uint32_t *)(s4)); ++ ++ /* transpose pm1, p0, p1, p2 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" ++ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" ++ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" ++ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" ++ ++ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" ++ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" ++ "append %[p1], %[sec3], 16 \n\t" ++ "append %[pm1], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* transpose p3, p4, p5, p6 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" ++ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" ++ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" ++ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" ++ ++ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" ++ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" ++ "append %[p5], %[sec3], 16 \n\t" ++ "append %[p3], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, ++ thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); ++ ++ /* don't use transpose on output data ++ * because memory isn't aligned ++ */ ++ __asm__ __volatile__ ( ++ "sb %[p5], 2(%[s4]) \n\t" ++ "sb %[p4], 1(%[s4]) \n\t" ++ "sb %[p3], 0(%[s4]) \n\t" ++ "sb %[p2], -1(%[s4]) \n\t" ++ "sb %[p1], -2(%[s4]) \n\t" ++ "sb %[p0], -3(%[s4]) \n\t" ++ : ++ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), ++ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p5], %[p5], 8 \n\t" ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ "srl %[p0], %[p0], 8 \n\t" ++ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p5], 2(%[s3]) \n\t" ++ "sb %[p4], 1(%[s3]) \n\t" ++ "sb %[p3], 0(%[s3]) \n\t" ++ "sb %[p2], -1(%[s3]) \n\t" ++ "sb %[p1], -2(%[s3]) \n\t" ++ "sb %[p0], -3(%[s3]) \n\t" ++ : ++ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), ++ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p5], %[p5], 8 \n\t" ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ "srl %[p0], %[p0], 8 \n\t" ++ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p5], 2(%[s2]) \n\t" ++ "sb %[p4], 1(%[s2]) \n\t" ++ "sb %[p3], 0(%[s2]) \n\t" ++ "sb %[p2], -1(%[s2]) \n\t" ++ "sb %[p1], -2(%[s2]) \n\t" ++ "sb %[p0], -3(%[s2]) \n\t" ++ : ++ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), ++ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p5], %[p5], 8 \n\t" ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ "srl %[p0], %[p0], 8 \n\t" ++ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p5], 2(%[s1]) \n\t" ++ "sb %[p4], 1(%[s1]) \n\t" ++ "sb %[p3], 0(%[s1]) \n\t" ++ "sb %[p2], -1(%[s1]) \n\t" ++ "sb %[p1], -2(%[s1]) \n\t" ++ "sb %[p0], -3(%[s1]) \n\t" ++ : ++ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), ++ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) ++ ); ++ } ++ } ++ ++ s1 = s4 + p; ++ s2 = s1 + p; ++ s3 = s2 + p; ++ s4 = s3 + p; ++ ++ /* load quad-byte vectors ++ * memory is 4 byte aligned ++ */ ++ p2 = *((uint32_t *)(s1 - 4)); ++ p6 = *((uint32_t *)(s1)); ++ p1 = *((uint32_t *)(s2 - 4)); ++ p5 = *((uint32_t *)(s2)); ++ p0 = *((uint32_t *)(s3 - 4)); ++ p4 = *((uint32_t *)(s3)); ++ pm1 = *((uint32_t *)(s4 - 4)); ++ p3 = *((uint32_t *)(s4)); ++ ++ /* transpose pm1, p0, p1, p2 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" ++ "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" ++ "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" ++ "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" ++ ++ "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" ++ "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" ++ "append %[p1], %[sec3], 16 \n\t" ++ "append %[pm1], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* transpose p3, p4, p5, p6 */ ++ __asm__ __volatile__ ( ++ "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" ++ "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" ++ "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" ++ "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" ++ ++ "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" ++ "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" ++ "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" ++ "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" ++ ++ "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" ++ "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" ++ "append %[p5], %[sec3], 16 \n\t" ++ "append %[p3], %[sec4], 16 \n\t" ++ ++ : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), ++ [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), ++ [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) ++ : ++ ); ++ ++ /* if (p1 - p4 == 0) and (p2 - p3 == 0) ++ * mask will be zero and filtering is not needed ++ */ ++ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) ++ { ++ ++ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); ++ ++ /* if mask == 0 do filtering is not needed */ ++ if (mask) ++ { ++ /* filtering */ ++ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); ++ ++ /* don't use transpose on output data ++ * because memory isn't aligned ++ */ ++ __asm__ __volatile__ ( ++ "sb %[p5], 2(%[s4]) \n\t" ++ "sb %[p4], 1(%[s4]) \n\t" ++ "sb %[p3], 0(%[s4]) \n\t" ++ "sb %[p2], -1(%[s4]) \n\t" ++ "sb %[p1], -2(%[s4]) \n\t" ++ "sb %[p0], -3(%[s4]) \n\t" ++ : ++ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), ++ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p5], %[p5], 8 \n\t" ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ "srl %[p0], %[p0], 8 \n\t" ++ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p5], 2(%[s3]) \n\t" ++ "sb %[p4], 1(%[s3]) \n\t" ++ "sb %[p3], 0(%[s3]) \n\t" ++ "sb %[p2], -1(%[s3]) \n\t" ++ "sb %[p1], -2(%[s3]) \n\t" ++ "sb %[p0], -3(%[s3]) \n\t" ++ : ++ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), ++ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p5], %[p5], 8 \n\t" ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ "srl %[p0], %[p0], 8 \n\t" ++ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p5], 2(%[s2]) \n\t" ++ "sb %[p4], 1(%[s2]) \n\t" ++ "sb %[p3], 0(%[s2]) \n\t" ++ "sb %[p2], -1(%[s2]) \n\t" ++ "sb %[p1], -2(%[s2]) \n\t" ++ "sb %[p0], -3(%[s2]) \n\t" ++ : ++ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), ++ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) ++ ); ++ ++ __asm__ __volatile__ ( ++ "srl %[p5], %[p5], 8 \n\t" ++ "srl %[p4], %[p4], 8 \n\t" ++ "srl %[p3], %[p3], 8 \n\t" ++ "srl %[p2], %[p2], 8 \n\t" ++ "srl %[p1], %[p1], 8 \n\t" ++ "srl %[p0], %[p0], 8 \n\t" ++ : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), ++ [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) ++ : ++ ); ++ ++ __asm__ __volatile__ ( ++ "sb %[p5], 2(%[s1]) \n\t" ++ "sb %[p4], 1(%[s1]) \n\t" ++ "sb %[p3], 0(%[s1]) \n\t" ++ "sb %[p2], -1(%[s1]) \n\t" ++ "sb %[p1], -2(%[s1]) \n\t" ++ "sb %[p0], -3(%[s1]) \n\t" ++ : ++ : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), ++ [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) ++ ); ++ } ++ } ++} ++ ++/* Horizontal MB filtering */ ++void vp8_loop_filter_mbh_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, ++ int y_stride, int uv_stride, loop_filter_info *lfi) ++{ ++ unsigned int thresh_vec, flimit_vec, limit_vec; ++ unsigned char thresh, flimit, limit, flimit_temp; ++ ++ /* use direct value instead pointers */ ++ limit = *(lfi->lim); ++ flimit_temp = *(lfi->mblim); ++ thresh = *(lfi->hev_thr); ++ flimit = flimit_temp; ++ ++ /* create quad-byte */ ++ __asm__ __volatile__ ( ++ "replv.qb %[thresh_vec], %[thresh] \n\t" ++ "replv.qb %[flimit_vec], %[flimit] \n\t" ++ "replv.qb %[limit_vec], %[limit] \n\t" ++ : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec) ++ : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit) ++ ); ++ ++ vp8_mbloop_filter_horizontal_edge_mips(y_ptr, y_stride, flimit_vec, limit_vec, thresh_vec, 16); ++ ++ if (u_ptr) ++ { ++ vp8_mbloop_filter_uvhorizontal_edge_mips(u_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); ++ } ++ ++ if (v_ptr) ++ { ++ vp8_mbloop_filter_uvhorizontal_edge_mips(v_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); ++ } ++} ++ ++ ++/* Vertical MB Filtering */ ++void vp8_loop_filter_mbv_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, ++ int y_stride, int uv_stride, loop_filter_info *lfi) ++{ ++ unsigned int thresh_vec, flimit_vec, limit_vec; ++ unsigned char thresh, flimit, limit, flimit_temp; ++ ++ /* use direct value instead pointers */ ++ limit = *(lfi->lim); ++ flimit_temp = *(lfi->mblim); ++ thresh = *(lfi->hev_thr); ++ flimit = flimit_temp; ++ ++ /* create quad-byte */ ++ __asm__ __volatile__ ( ++ "replv.qb %[thresh_vec], %[thresh] \n\t" ++ "replv.qb %[flimit_vec], %[flimit] \n\t" ++ "replv.qb %[limit_vec], %[limit] \n\t" ++ : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec) ++ : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit) ++ ); ++ ++ vp8_mbloop_filter_vertical_edge_mips(y_ptr, y_stride, flimit_vec, limit_vec, thresh_vec, 16); ++ ++ if (u_ptr) ++ vp8_mbloop_filter_uvvertical_edge_mips(u_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); ++ ++ if (v_ptr) ++ vp8_mbloop_filter_uvvertical_edge_mips(v_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); ++} ++ ++ ++/* Horizontal B Filtering */ ++void vp8_loop_filter_bh_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, ++ int y_stride, int uv_stride, loop_filter_info *lfi) ++{ ++ unsigned int thresh_vec, flimit_vec, limit_vec; ++ unsigned char thresh, flimit, limit, flimit_temp; ++ ++ /* use direct value instead pointers */ ++ limit = *(lfi->lim); ++ flimit_temp = *(lfi->blim); ++ thresh = *(lfi->hev_thr); ++ flimit = flimit_temp; ++ ++ /* create quad-byte */ ++ __asm__ __volatile__ ( ++ "replv.qb %[thresh_vec], %[thresh] \n\t" ++ "replv.qb %[flimit_vec], %[flimit] \n\t" ++ "replv.qb %[limit_vec], %[limit] \n\t" ++ : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec) ++ : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit) ++ ); ++ ++ vp8_loop_filter_horizontal_edge_mips(y_ptr + 4 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16); ++ vp8_loop_filter_horizontal_edge_mips(y_ptr + 8 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16); ++ vp8_loop_filter_horizontal_edge_mips(y_ptr + 12 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16); ++ ++ if (u_ptr) ++ vp8_loop_filter_uvhorizontal_edge_mips(u_ptr + 4 * uv_stride, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); ++ ++ if (v_ptr) ++ vp8_loop_filter_uvhorizontal_edge_mips(v_ptr + 4 * uv_stride, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); ++} ++ ++ ++/* Vertical B Filtering */ ++void vp8_loop_filter_bv_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, ++ int y_stride, int uv_stride, loop_filter_info *lfi) ++{ ++ unsigned int thresh_vec, flimit_vec, limit_vec; ++ unsigned char thresh, flimit, limit, flimit_temp; ++ ++ /* use direct value instead pointers */ ++ limit = *(lfi->lim); ++ flimit_temp = *(lfi->blim); ++ thresh = *(lfi->hev_thr); ++ flimit = flimit_temp; ++ ++ /* create quad-byte */ ++ __asm__ __volatile__ ( ++ "replv.qb %[thresh_vec], %[thresh] \n\t" ++ "replv.qb %[flimit_vec], %[flimit] \n\t" ++ "replv.qb %[limit_vec], %[limit] \n\t" ++ : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec) ++ : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit) ++ ); ++ ++ vp8_loop_filter_vertical_edge_mips(y_ptr + 4, y_stride, flimit_vec, limit_vec, thresh_vec, 16); ++ vp8_loop_filter_vertical_edge_mips(y_ptr + 8, y_stride, flimit_vec, limit_vec, thresh_vec, 16); ++ vp8_loop_filter_vertical_edge_mips(y_ptr + 12, y_stride, flimit_vec, limit_vec, thresh_vec, 16); ++ ++ if (u_ptr) ++ vp8_loop_filter_uvvertical_edge_mips(u_ptr + 4, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); ++ ++ if (v_ptr) ++ vp8_loop_filter_uvvertical_edge_mips(v_ptr + 4, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); ++} ++ ++#endif +diff --git a/vp8/common/mips/dspr2/reconinter_dspr2.c b/vp8/common/mips/dspr2/reconinter_dspr2.c +new file mode 100644 +index 0000000..a5239a3 +--- /dev/null ++++ b/vp8/common/mips/dspr2/reconinter_dspr2.c +@@ -0,0 +1,121 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++ ++#include "vpx_config.h" ++#include "vpx_rtcd.h" ++#include "vpx/vpx_integer.h" ++ ++#if HAVE_DSPR2 ++inline void prefetch_load_int(unsigned char *src) ++{ ++ __asm__ __volatile__ ( ++ "pref 0, 0(%[src]) \n\t" ++ : ++ : [src] "r" (src) ++ ); ++} ++ ++ ++__inline void vp8_copy_mem16x16_dspr2( ++ unsigned char *RESTRICT src, ++ int src_stride, ++ unsigned char *RESTRICT dst, ++ int dst_stride) ++{ ++ int r; ++ unsigned int a0, a1, a2, a3; ++ ++ for (r = 16; r--;) ++ { ++ /* load src data in cache memory */ ++ prefetch_load_int(src + src_stride); ++ ++ /* use unaligned memory load and store */ ++ __asm__ __volatile__ ( ++ "ulw %[a0], 0(%[src]) \n\t" ++ "ulw %[a1], 4(%[src]) \n\t" ++ "ulw %[a2], 8(%[src]) \n\t" ++ "ulw %[a3], 12(%[src]) \n\t" ++ "sw %[a0], 0(%[dst]) \n\t" ++ "sw %[a1], 4(%[dst]) \n\t" ++ "sw %[a2], 8(%[dst]) \n\t" ++ "sw %[a3], 12(%[dst]) \n\t" ++ : [a0] "=&r" (a0), [a1] "=&r" (a1), ++ [a2] "=&r" (a2), [a3] "=&r" (a3) ++ : [src] "r" (src), [dst] "r" (dst) ++ ); ++ ++ src += src_stride; ++ dst += dst_stride; ++ } ++} ++ ++ ++__inline void vp8_copy_mem8x8_dspr2( ++ unsigned char *RESTRICT src, ++ int src_stride, ++ unsigned char *RESTRICT dst, ++ int dst_stride) ++{ ++ int r; ++ unsigned int a0, a1; ++ ++ /* load src data in cache memory */ ++ prefetch_load_int(src + src_stride); ++ ++ for (r = 8; r--;) ++ { ++ /* use unaligned memory load and store */ ++ __asm__ __volatile__ ( ++ "ulw %[a0], 0(%[src]) \n\t" ++ "ulw %[a1], 4(%[src]) \n\t" ++ "sw %[a0], 0(%[dst]) \n\t" ++ "sw %[a1], 4(%[dst]) \n\t" ++ : [a0] "=&r" (a0), [a1] "=&r" (a1) ++ : [src] "r" (src), [dst] "r" (dst) ++ ); ++ ++ src += src_stride; ++ dst += dst_stride; ++ } ++} ++ ++ ++__inline void vp8_copy_mem8x4_dspr2( ++ unsigned char *RESTRICT src, ++ int src_stride, ++ unsigned char *RESTRICT dst, ++ int dst_stride) ++{ ++ int r; ++ unsigned int a0, a1; ++ ++ /* load src data in cache memory */ ++ prefetch_load_int(src + src_stride); ++ ++ for (r = 4; r--;) ++ { ++ /* use unaligned memory load and store */ ++ __asm__ __volatile__ ( ++ "ulw %[a0], 0(%[src]) \n\t" ++ "ulw %[a1], 4(%[src]) \n\t" ++ "sw %[a0], 0(%[dst]) \n\t" ++ "sw %[a1], 4(%[dst]) \n\t" ++ : [a0] "=&r" (a0), [a1] "=&r" (a1) ++ : [src] "r" (src), [dst] "r" (dst) ++ ); ++ ++ src += src_stride; ++ dst += dst_stride; ++ } ++} ++ ++#endif +diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h +index 2e282f6..766b4ea 100644 +--- a/vp8/common/onyx.h ++++ b/vp8/common/onyx.h +@@ -39,14 +39,6 @@ extern "C" + + typedef enum + { +- VP8_LAST_FLAG = 1, +- VP8_GOLD_FLAG = 2, +- VP8_ALT_FLAG = 4 +- } VP8_REFFRAME; +- +- +- typedef enum +- { + USAGE_STREAM_FROM_SERVER = 0x0, + USAGE_LOCAL_FILE_PLAYBACK = 0x1, + USAGE_CONSTRAINED_QUALITY = 0x2 +@@ -102,83 +94,101 @@ extern "C" + + typedef struct + { +- int Version; // 4 versions of bitstream defined 0 best quality/slowest decode, 3 lowest quality/fastest decode +- int Width; // width of data passed to the compressor +- int Height; // height of data passed to the compressor ++ /* 4 versions of bitstream defined: ++ * 0 best quality/slowest decode, 3 lowest quality/fastest decode ++ */ ++ int Version; ++ int Width; ++ int Height; + struct vpx_rational timebase; +- int target_bandwidth; // bandwidth to be used in kilobits per second ++ unsigned int target_bandwidth; /* kilobits per second */ ++ ++ /* parameter used for applying pre processing blur: recommendation 0 */ ++ int noise_sensitivity; + +- int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0 +- int Sharpness; // parameter used for sharpening output: recommendation 0: ++ /* parameter used for sharpening output: recommendation 0: */ ++ int Sharpness; + int cpu_used; + unsigned int rc_max_intra_bitrate_pct; + +- // mode -> +- //(0)=Realtime/Live Encoding. This mode is optimized for realtim encoding (for example, capturing +- // a television signal or feed from a live camera). ( speed setting controls how fast ) +- //(1)=Good Quality Fast Encoding. The encoder balances quality with the amount of time it takes to +- // encode the output. ( speed setting controls how fast ) +- //(2)=One Pass - Best Quality. The encoder places priority on the quality of the output over encoding +- // speed. The output is compressed at the highest possible quality. This option takes the longest +- // amount of time to encode. ( speed setting ignored ) +- //(3)=Two Pass - First Pass. The encoder generates a file of statistics for use in the second encoding +- // pass. ( speed setting controls how fast ) +- //(4)=Two Pass - Second Pass. The encoder uses the statistics that were generated in the first encoding +- // pass to create the compressed output. ( speed setting controls how fast ) +- //(5)=Two Pass - Second Pass Best. The encoder uses the statistics that were generated in the first +- // encoding pass to create the compressed output using the highest possible quality, and taking a +- // longer amount of time to encode.. ( speed setting ignored ) +- int Mode; // +- +- // Key Framing Operations +- int auto_key; // automatically detect cut scenes and set the keyframes +- int key_freq; // maximum distance to key frame. +- +- int allow_lag; // allow lagged compression (if 0 lagin frames is ignored) +- int lag_in_frames; // how many frames lag before we start encoding +- +- //---------------------------------------------------------------- +- // DATARATE CONTROL OPTIONS +- +- int end_usage; // vbr or cbr +- +- // buffer targeting aggressiveness ++ /* mode -> ++ *(0)=Realtime/Live Encoding. This mode is optimized for realtim ++ * encoding (for example, capturing a television signal or feed ++ * from a live camera). ( speed setting controls how fast ) ++ *(1)=Good Quality Fast Encoding. The encoder balances quality with ++ * the amount of time it takes to encode the output. ( speed ++ * setting controls how fast ) ++ *(2)=One Pass - Best Quality. The encoder places priority on the ++ * quality of the output over encoding speed. The output is ++ * compressed at the highest possible quality. This option takes ++ * the longest amount of time to encode. ( speed setting ignored ++ * ) ++ *(3)=Two Pass - First Pass. The encoder generates a file of ++ * statistics for use in the second encoding pass. ( speed ++ * setting controls how fast ) ++ *(4)=Two Pass - Second Pass. The encoder uses the statistics that ++ * were generated in the first encoding pass to create the ++ * compressed output. ( speed setting controls how fast ) ++ *(5)=Two Pass - Second Pass Best. The encoder uses the statistics ++ * that were generated in the first encoding pass to create the ++ * compressed output using the highest possible quality, and ++ * taking a longer amount of time to encode.. ( speed setting ++ * ignored ) ++ */ ++ int Mode; ++ ++ /* Key Framing Operations */ ++ int auto_key; /* automatically detect cut scenes */ ++ int key_freq; /* maximum distance to key frame. */ ++ ++ /* lagged compression (if allow_lag == 0 lag_in_frames is ignored) */ ++ int allow_lag; ++ int lag_in_frames; /* how many frames lag before we start encoding */ ++ ++ /* ++ * DATARATE CONTROL OPTIONS ++ */ ++ ++ int end_usage; /* vbr or cbr */ ++ ++ /* buffer targeting aggressiveness */ + int under_shoot_pct; + int over_shoot_pct; + +- // buffering parameters +- int64_t starting_buffer_level; // in bytes ++ /* buffering parameters */ ++ int64_t starting_buffer_level; + int64_t optimal_buffer_level; + int64_t maximum_buffer_size; + +- int64_t starting_buffer_level_in_ms; // in milli-seconds ++ int64_t starting_buffer_level_in_ms; + int64_t optimal_buffer_level_in_ms; + int64_t maximum_buffer_size_in_ms; + +- // controlling quality ++ /* controlling quality */ + int fixed_q; + int worst_allowed_q; + int best_allowed_q; + int cq_level; + +- // allow internal resizing ( currently disabled in the build !!!!!) ++ /* allow internal resizing */ + int allow_spatial_resampling; + int resample_down_water_mark; + int resample_up_water_mark; + +- // allow internal frame rate alterations ++ /* allow internal frame rate alterations */ + int allow_df; + int drop_frames_water_mark; + +- // two pass datarate control +- int two_pass_vbrbias; // two pass datarate control tweaks ++ /* two pass datarate control */ ++ int two_pass_vbrbias; + int two_pass_vbrmin_section; + int two_pass_vbrmax_section; +- // END DATARATE CONTROL OPTIONS +- //---------------------------------------------------------------- + ++ /* ++ * END DATARATE CONTROL OPTIONS ++ */ + +- // these parameters aren't to be used in final build don't use!!! ++ /* these parameters aren't to be used in final build don't use!!! */ + int play_alternate; + int alt_freq; + int alt_q; +@@ -186,26 +196,28 @@ extern "C" + int gold_q; + + +- int multi_threaded; // how many threads to run the encoder on +- int token_partitions; // how many token partitions to create for multi core decoding +- int encode_breakout; // early breakout encode threshold : for video conf recommend 800 ++ int multi_threaded; /* how many threads to run the encoder on */ ++ int token_partitions; /* how many token partitions to create */ ++ ++ /* early breakout threshold: for video conf recommend 800 */ ++ int encode_breakout; + +- unsigned int error_resilient_mode; // Bitfield defining the error +- // resiliency features to enable. Can provide +- // decodable frames after losses in previous +- // frames and decodable partitions after +- // losses in the same frame. ++ /* Bitfield defining the error resiliency features to enable. ++ * Can provide decodable frames after losses in previous ++ * frames and decodable partitions after losses in the same frame. ++ */ ++ unsigned int error_resilient_mode; + + int arnr_max_frames; +- int arnr_strength ; +- int arnr_type ; ++ int arnr_strength; ++ int arnr_type; + +- struct vpx_fixed_buf two_pass_stats_in; ++ struct vpx_fixed_buf two_pass_stats_in; + struct vpx_codec_pkt_list *output_pkt_list; + + vp8e_tuning tuning; + +- // Temporal scaling parameters ++ /* Temporal scaling parameters */ + unsigned int number_of_layers; + unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY]; + unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY]; +@@ -236,16 +248,14 @@ extern "C" + void vp8_init_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf); + void vp8_change_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf); + +-// receive a frames worth of data caller can assume that a copy of this frame is made +-// and not just a copy of the pointer.. + int vp8_receive_raw_frame(struct VP8_COMP* comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp); + int vp8_get_compressed_data(struct VP8_COMP* comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush); + int vp8_get_preview_raw_frame(struct VP8_COMP* comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags); + + int vp8_use_as_reference(struct VP8_COMP* comp, int ref_frame_flags); + int vp8_update_reference(struct VP8_COMP* comp, int ref_frame_flags); +- int vp8_get_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); +- int vp8_set_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); ++ int vp8_get_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); ++ int vp8_set_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); + int vp8_update_entropy(struct VP8_COMP* comp, int update); + int vp8_set_roimap(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]); + int vp8_set_active_map(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols); +diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h +index c3215c0..5325bac 100644 +--- a/vp8/common/onyxc_int.h ++++ b/vp8/common/onyxc_int.h +@@ -42,7 +42,6 @@ typedef struct frame_contexts + vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1]; + vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + MV_CONTEXT mvc[2]; +- MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */ + } FRAME_CONTEXT; + + typedef enum +@@ -59,12 +58,6 @@ typedef enum + RECON_CLAMP_NOTREQUIRED = 1 + } CLAMP_TYPE; + +-typedef enum +-{ +- SIXTAP = 0, +- BILINEAR = 1 +-} INTERPOLATIONFILTERTYPE; +- + typedef struct VP8Common + + { +@@ -94,6 +87,7 @@ typedef struct VP8Common + YV12_BUFFER_CONFIG post_proc_buffer; + YV12_BUFFER_CONFIG post_proc_buffer_int; + int post_proc_buffer_int_used; ++ unsigned char *pp_limits_buffer; /* post-processing filter coefficients */ + #endif + + FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */ +@@ -114,7 +108,6 @@ typedef struct VP8Common + int full_pixel; + + int base_qindex; +- int last_kf_gf_q; /* Q used on the last GF or KF */ + + int y1dc_delta_q; + int y2dc_delta_q; +@@ -130,11 +123,11 @@ typedef struct VP8Common + + MODE_INFO *mip; /* Base of allocated array */ + MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ ++#if CONFIG_ERROR_CONCEALMENT + MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ + MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ ++#endif + +- +- INTERPOLATIONFILTERTYPE mcomp_filter_type; + LOOPFILTERTYPE filter_type; + + loop_filter_info_n lf_info; +@@ -158,14 +151,6 @@ typedef struct VP8Common + ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */ + ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */ + +- +- /* keyframe block modes are predicted by their above, left neighbors */ +- +- vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]; +- vp8_prob kf_ymode_prob [VP8_YMODES-1]; /* keyframe "" */ +- vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1]; +- +- + FRAME_CONTEXT lfc; /* last frame entropy */ + FRAME_CONTEXT fc; /* this frame entropy */ + +diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h +index 35a8b6e..fd7e051 100644 +--- a/vp8/common/onyxd.h ++++ b/vp8/common/onyxd.h +@@ -22,6 +22,7 @@ extern "C" + #include "ppflags.h" + #include "vpx_ports/mem.h" + #include "vpx/vpx_codec.h" ++#include "vpx/vp8.h" + + struct VP8D_COMP; + +@@ -35,12 +36,6 @@ extern "C" + int error_concealment; + int input_fragments; + } VP8D_CONFIG; +- typedef enum +- { +- VP8_LAST_FLAG = 1, +- VP8_GOLD_FLAG = 2, +- VP8_ALT_FLAG = 4 +- } VP8_REFFRAME; + + typedef enum + { +@@ -53,11 +48,13 @@ extern "C" + + int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst); + +- int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, unsigned long size, const unsigned char *dest, int64_t time_stamp); ++ int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, ++ size_t size, const uint8_t *dest, ++ int64_t time_stamp); + int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags); + +- vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); +- vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); ++ vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); ++ vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); + + struct VP8D_COMP* vp8dx_create_decompressor(VP8D_CONFIG *oxcf); + +diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c +index ccf6ad7..80fa530 100644 +--- a/vp8/common/postproc.c ++++ b/vp8/common/postproc.c +@@ -127,27 +127,24 @@ extern void vp8_blit_text(const char *msg, unsigned char *address, const int pit + extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch); + /*********************************************************************************************************** + */ +-void vp8_post_proc_down_and_across_c ++void vp8_post_proc_down_and_across_mb_row_c + ( + unsigned char *src_ptr, + unsigned char *dst_ptr, + int src_pixels_per_line, + int dst_pixels_per_line, +- int rows, + int cols, +- int flimit ++ unsigned char *f, ++ int size + ) + { + unsigned char *p_src, *p_dst; + int row; + int col; +- int i; +- int v; +- int pitch = src_pixels_per_line; +- unsigned char d[8]; +- (void)dst_pixels_per_line; ++ unsigned char v; ++ unsigned char d[4]; + +- for (row = 0; row < rows; row++) ++ for (row = 0; row < size; row++) + { + /* post_proc_down for one row */ + p_src = src_ptr; +@@ -155,20 +152,23 @@ void vp8_post_proc_down_and_across_c + + for (col = 0; col < cols; col++) + { ++ unsigned char p_above2 = p_src[col - 2 * src_pixels_per_line]; ++ unsigned char p_above1 = p_src[col - src_pixels_per_line]; ++ unsigned char p_below1 = p_src[col + src_pixels_per_line]; ++ unsigned char p_below2 = p_src[col + 2 * src_pixels_per_line]; + +- int kernel = 4; +- int v = p_src[col]; ++ v = p_src[col]; + +- for (i = -2; i <= 2; i++) ++ if ((abs(v - p_above2) < f[col]) && (abs(v - p_above1) < f[col]) ++ && (abs(v - p_below1) < f[col]) && (abs(v - p_below2) < f[col])) + { +- if (abs(v - p_src[col+i*pitch]) > flimit) +- goto down_skip_convolve; +- +- kernel += kernel5[2+i] * p_src[col+i*pitch]; ++ unsigned char k1, k2, k3; ++ k1 = (p_above2 + p_above1 + 1) >> 1; ++ k2 = (p_below2 + p_below1 + 1) >> 1; ++ k3 = (k1 + k2 + 1) >> 1; ++ v = (k3 + v + 1) >> 1; + } + +- v = (kernel >> 3); +- down_skip_convolve: + p_dst[col] = v; + } + +@@ -176,45 +176,38 @@ void vp8_post_proc_down_and_across_c + p_src = dst_ptr; + p_dst = dst_ptr; + +- for (i = -8; i<0; i++) +- p_src[i]=p_src[0]; +- +- for (i = cols; i flimit) +- goto across_skip_convolve; +- +- kernel += kernel5[2+i] * p_src[col+i]; ++ unsigned char k1, k2, k3; ++ k1 = (p_src[col - 2] + p_src[col - 1] + 1) >> 1; ++ k2 = (p_src[col + 2] + p_src[col + 1] + 1) >> 1; ++ k3 = (k1 + k2 + 1) >> 1; ++ v = (k3 + v + 1) >> 1; + } + +- d[col&7] = (kernel >> 3); +- across_skip_convolve: ++ d[col & 3] = v; + + if (col >= 2) +- p_dst[col-2] = d[(col-2)&7]; ++ p_dst[col - 2] = d[(col - 2) & 3]; + } + + /* handle the last two pixels */ +- p_dst[col-2] = d[(col-2)&7]; +- p_dst[col-1] = d[(col-1)&7]; +- ++ p_dst[col - 2] = d[(col - 2) & 3]; ++ p_dst[col - 1] = d[(col - 1) & 3]; + + /* next row */ +- src_ptr += pitch; +- dst_ptr += pitch; ++ src_ptr += src_pixels_per_line; ++ dst_ptr += dst_pixels_per_line; + } + } + +@@ -240,8 +233,9 @@ void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int co + for (i = -8; i<0; i++) + s[i]=s[0]; + +- // 17 avoids valgrind warning - we buffer values in c in d +- // and only write them when we've read 8 ahead... ++ /* 17 avoids valgrind warning - we buffer values in c in d ++ * and only write them when we've read 8 ahead... ++ */ + for (i = cols; iy_buffer, post->y_buffer, source->y_stride, post->y_stride, source->y_height, source->y_width, ppl); +- vp8_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); +- vp8_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); +- +- vp8_post_proc_down_and_across(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); +- vp8_post_proc_down_and_across(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); +- ++ vp8_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, ++ post->y_width, q2mbl(q)); ++ vp8_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, ++ post->y_width, q2mbl(q)); + } + +-void vp8_deblock(YV12_BUFFER_CONFIG *source, ++void vp8_deblock(VP8_COMMON *cm, ++ YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *post, + int q, + int low_var_thresh, +@@ -351,16 +332,64 @@ void vp8_deblock(YV12_BUFFER_CONFIG *source, + { + double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; + int ppl = (int)(level + .5); ++ ++ const MODE_INFO *mode_info_context = cm->mi; ++ int mbr, mbc; ++ ++ /* The pixel thresholds are adjusted according to if or not the macroblock ++ * is a skipped block. */ ++ unsigned char *ylimits = cm->pp_limits_buffer; ++ unsigned char *uvlimits = cm->pp_limits_buffer + 16 * cm->mb_cols; + (void) low_var_thresh; + (void) flag; + +- vp8_post_proc_down_and_across(source->y_buffer, post->y_buffer, source->y_stride, post->y_stride, source->y_height, source->y_width, ppl); +- vp8_post_proc_down_and_across(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); +- vp8_post_proc_down_and_across(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); ++ if (ppl > 0) ++ { ++ for (mbr = 0; mbr < cm->mb_rows; mbr++) ++ { ++ unsigned char *ylptr = ylimits; ++ unsigned char *uvlptr = uvlimits; ++ for (mbc = 0; mbc < cm->mb_cols; mbc++) ++ { ++ unsigned char mb_ppl; ++ ++ if (mode_info_context->mbmi.mb_skip_coeff) ++ mb_ppl = (unsigned char)ppl >> 1; ++ else ++ mb_ppl = (unsigned char)ppl; ++ ++ vpx_memset(ylptr, mb_ppl, 16); ++ vpx_memset(uvlptr, mb_ppl, 8); ++ ++ ylptr += 16; ++ uvlptr += 8; ++ mode_info_context++; ++ } ++ mode_info_context++; ++ ++ vp8_post_proc_down_and_across_mb_row( ++ source->y_buffer + 16 * mbr * source->y_stride, ++ post->y_buffer + 16 * mbr * post->y_stride, source->y_stride, ++ post->y_stride, source->y_width, ylimits, 16); ++ ++ vp8_post_proc_down_and_across_mb_row( ++ source->u_buffer + 8 * mbr * source->uv_stride, ++ post->u_buffer + 8 * mbr * post->uv_stride, source->uv_stride, ++ post->uv_stride, source->uv_width, uvlimits, 8); ++ vp8_post_proc_down_and_across_mb_row( ++ source->v_buffer + 8 * mbr * source->uv_stride, ++ post->v_buffer + 8 * mbr * post->uv_stride, source->uv_stride, ++ post->uv_stride, source->uv_width, uvlimits, 8); ++ } ++ } else ++ { ++ vp8_yv12_copy_frame(source, post); ++ } + } + + #if !(CONFIG_TEMPORAL_DENOISING) +-void vp8_de_noise(YV12_BUFFER_CONFIG *source, ++void vp8_de_noise(VP8_COMMON *cm, ++ YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *post, + int q, + int low_var_thresh, +@@ -368,33 +397,33 @@ void vp8_de_noise(YV12_BUFFER_CONFIG *source, + { + double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; + int ppl = (int)(level + .5); ++ int mb_rows = source->y_width >> 4; ++ int mb_cols = source->y_height >> 4; ++ unsigned char *limits = cm->pp_limits_buffer;; ++ int mbr, mbc; + (void) post; + (void) low_var_thresh; + (void) flag; + +- vp8_post_proc_down_and_across( +- source->y_buffer + 2 * source->y_stride + 2, +- source->y_buffer + 2 * source->y_stride + 2, +- source->y_stride, +- source->y_stride, +- source->y_height - 4, +- source->y_width - 4, +- ppl); +- vp8_post_proc_down_and_across( +- source->u_buffer + 2 * source->uv_stride + 2, +- source->u_buffer + 2 * source->uv_stride + 2, +- source->uv_stride, +- source->uv_stride, +- source->uv_height - 4, +- source->uv_width - 4, ppl); +- vp8_post_proc_down_and_across( +- source->v_buffer + 2 * source->uv_stride + 2, +- source->v_buffer + 2 * source->uv_stride + 2, +- source->uv_stride, +- source->uv_stride, +- source->uv_height - 4, +- source->uv_width - 4, ppl); ++ vpx_memset(limits, (unsigned char)ppl, 16 * mb_cols); + ++ /* TODO: The original code don't filter the 2 outer rows and columns. */ ++ for (mbr = 0; mbr < mb_rows; mbr++) ++ { ++ vp8_post_proc_down_and_across_mb_row( ++ source->y_buffer + 16 * mbr * source->y_stride, ++ source->y_buffer + 16 * mbr * source->y_stride, ++ source->y_stride, source->y_stride, source->y_width, limits, 16); ++ ++ vp8_post_proc_down_and_across_mb_row( ++ source->u_buffer + 8 * mbr * source->uv_stride, ++ source->u_buffer + 8 * mbr * source->uv_stride, ++ source->uv_stride, source->uv_stride, source->uv_width, limits, 8); ++ vp8_post_proc_down_and_across_mb_row( ++ source->v_buffer + 8 * mbr * source->uv_stride, ++ source->v_buffer + 8 * mbr * source->uv_stride, ++ source->uv_stride, source->uv_stride, source->uv_width, limits, 8); ++ } + } + #endif + +@@ -441,7 +470,7 @@ static void fillrd(struct postproc_state *state, int q, int a) + + } + +- for (next = next; next < 256; next++) ++ for (; next < 256; next++) + char_dist[next] = 0; + + } +@@ -731,21 +760,21 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t + + oci->post_proc_buffer_int_used = 1; + +- // insure that postproc is set to all 0's so that post proc +- // doesn't pull random data in from edge ++ /* insure that postproc is set to all 0's so that post proc ++ * doesn't pull random data in from edge ++ */ + vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size); + + } + } + +-#if ARCH_X86||ARCH_X86_64 +- vpx_reset_mmx_state(); +-#endif ++ vp8_clear_system_state(); + + if ((flags & VP8D_MFQE) && + oci->postproc_state.last_frame_valid && + oci->current_video_frame >= 2 && +- oci->base_qindex - oci->postproc_state.last_base_qindex >= 10) ++ oci->postproc_state.last_base_qindex < 60 && ++ oci->base_qindex - oci->postproc_state.last_base_qindex >= 20) + { + vp8_multiframe_quality_enhance(oci); + if (((flags & VP8D_DEBLOCK) || (flags & VP8D_DEMACROBLOCK)) && +@@ -754,12 +783,14 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t + vp8_yv12_copy_frame(&oci->post_proc_buffer, &oci->post_proc_buffer_int); + if (flags & VP8D_DEMACROBLOCK) + { +- vp8_deblock_and_de_macro_block(&oci->post_proc_buffer_int, &oci->post_proc_buffer, ++ vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer, + q + (deblock_level - 5) * 10, 1, 0); ++ vp8_de_mblock(&oci->post_proc_buffer, ++ q + (deblock_level - 5) * 10); + } + else if (flags & VP8D_DEBLOCK) + { +- vp8_deblock(&oci->post_proc_buffer_int, &oci->post_proc_buffer, ++ vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer, + q, 1, 0); + } + } +@@ -768,13 +799,15 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t + } + else if (flags & VP8D_DEMACROBLOCK) + { +- vp8_deblock_and_de_macro_block(oci->frame_to_show, &oci->post_proc_buffer, +- q + (deblock_level - 5) * 10, 1, 0); ++ vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer, ++ q + (deblock_level - 5) * 10, 1, 0); ++ vp8_de_mblock(&oci->post_proc_buffer, q + (deblock_level - 5) * 10); ++ + oci->postproc_state.last_base_qindex = oci->base_qindex; + } + else if (flags & VP8D_DEBLOCK) + { +- vp8_deblock(oci->frame_to_show, &oci->post_proc_buffer, ++ vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer, + q, 1, 0); + oci->postproc_state.last_base_qindex = oci->base_qindex; + } +diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h +index 6ac788c..495a2c9 100644 +--- a/vp8/common/postproc.h ++++ b/vp8/common/postproc.h +@@ -30,13 +30,15 @@ int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest, + vp8_ppflags_t *flags); + + +-void vp8_de_noise(YV12_BUFFER_CONFIG *source, ++void vp8_de_noise(struct VP8Common *oci, ++ YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *post, + int q, + int low_var_thresh, + int flag); + +-void vp8_deblock(YV12_BUFFER_CONFIG *source, ++void vp8_deblock(struct VP8Common *oci, ++ YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *post, + int q, + int low_var_thresh, +diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c +index 7046a63..87f4cac 100644 +--- a/vp8/common/ppc/systemdependent.c ++++ b/vp8/common/ppc/systemdependent.c +@@ -19,14 +19,14 @@ void (*vp8_short_idct4x4)(short *input, short *output, int pitch); + void (*vp8_short_idct4x4_1)(short *input, short *output, int pitch); + void (*vp8_dc_only_idct)(short input_dc, short *output, int pitch); + +-extern void (*vp8_post_proc_down_and_across)( ++extern void (*vp8_post_proc_down_and_across_mb_row)( + unsigned char *src_ptr, + unsigned char *dst_ptr, + int src_pixels_per_line, + int dst_pixels_per_line, +- int rows, + int cols, +- int flimit ++ unsigned char *f, ++ int size + ); + + extern void (*vp8_mbpost_proc_down)(unsigned char *dst, int pitch, int rows, int cols, int flimit); +@@ -34,15 +34,15 @@ extern void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int + extern void (*vp8_mbpost_proc_across_ip)(unsigned char *src, int pitch, int rows, int cols, int flimit); + extern void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int cols, int flimit); + +-extern void vp8_post_proc_down_and_across_c ++extern void vp8_post_proc_down_and_across_mb_row_c + ( + unsigned char *src_ptr, + unsigned char *dst_ptr, + int src_pixels_per_line, + int dst_pixels_per_line, +- int rows, + int cols, +- int flimit ++ unsigned char *f, ++ int size + ); + void vp8_plane_add_noise_c(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int q, int a); + +@@ -158,7 +158,7 @@ void vp8_machine_specific_config(void) + vp8_lf_mbhsimple = loop_filter_mbhs_ppc; + vp8_lf_bhsimple = loop_filter_bhs_ppc; + +- vp8_post_proc_down_and_across = vp8_post_proc_down_and_across_c; ++ vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_c; + vp8_mbpost_proc_down = vp8_mbpost_proc_down_c; + vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_c; + vp8_plane_add_noise = vp8_plane_add_noise_c; +diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c +index e9833fe..05f9210 100644 +--- a/vp8/common/quant_common.c ++++ b/vp8/common/quant_common.c +@@ -109,7 +109,10 @@ int vp8_ac2quant(int QIndex, int Delta) + else if (QIndex < 0) + QIndex = 0; + +- retval = (ac_qlookup[ QIndex ] * 155) / 100; ++ /* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16. ++ * The smallest precision for that is '(x*6349) >> 12' but 16 is a good ++ * word size. */ ++ retval = (ac_qlookup[ QIndex ] * 101581) >> 16; + + if (retval < 8) + retval = 8; +diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c +index dcc35ec..7bb8d0a 100644 +--- a/vp8/common/reconintra4x4.c ++++ b/vp8/common/reconintra4x4.c +@@ -13,11 +13,11 @@ + #include "vpx_rtcd.h" + #include "blockd.h" + +-void vp8_intra4x4_predict_d_c(unsigned char *Above, +- unsigned char *yleft, int left_stride, +- int b_mode, +- unsigned char *dst, int dst_stride, +- unsigned char top_left) ++void vp8_intra4x4_predict_c(unsigned char *Above, ++ unsigned char *yleft, int left_stride, ++ B_PREDICTION_MODE b_mode, ++ unsigned char *dst, int dst_stride, ++ unsigned char top_left) + { + int i, r, c; + +@@ -290,19 +290,8 @@ void vp8_intra4x4_predict_d_c(unsigned char *Above, + } + break; + ++ default: ++ break; + + } + } +- +-void vp8_intra4x4_predict_c(unsigned char *src, int src_stride, +- int b_mode, +- unsigned char *dst, int dst_stride) +-{ +- unsigned char *Above = src - src_stride; +- +- vp8_intra4x4_predict_d_c(Above, +- src - 1, src_stride, +- b_mode, +- dst, dst_stride, +- Above[-1]); +-} +diff --git a/vp8/common/rtcd.c b/vp8/common/rtcd.c +index 232640d..01dad46 100644 +--- a/vp8/common/rtcd.c ++++ b/vp8/common/rtcd.c +@@ -10,3 +10,96 @@ + #include "vpx_config.h" + #define RTCD_C + #include "vpx_rtcd.h" ++ ++#if CONFIG_MULTITHREAD && defined(_WIN32) ++#include ++#include ++static void once(void (*func)(void)) ++{ ++ static CRITICAL_SECTION *lock; ++ static LONG waiters; ++ static int done; ++ void *lock_ptr = &lock; ++ ++ /* If the initialization is complete, return early. This isn't just an ++ * optimization, it prevents races on the destruction of the global ++ * lock. ++ */ ++ if(done) ++ return; ++ ++ InterlockedIncrement(&waiters); ++ ++ /* Get a lock. We create one and try to make it the one-true-lock, ++ * throwing it away if we lost the race. ++ */ ++ ++ { ++ /* Scope to protect access to new_lock */ ++ CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION)); ++ InitializeCriticalSection(new_lock); ++ if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL) ++ { ++ DeleteCriticalSection(new_lock); ++ free(new_lock); ++ } ++ } ++ ++ /* At this point, we have a lock that can be synchronized on. We don't ++ * care which thread actually performed the allocation. ++ */ ++ ++ EnterCriticalSection(lock); ++ ++ if (!done) ++ { ++ func(); ++ done = 1; ++ } ++ ++ LeaveCriticalSection(lock); ++ ++ /* Last one out should free resources. The destructed objects are ++ * protected by checking if(done) above. ++ */ ++ if(!InterlockedDecrement(&waiters)) ++ { ++ DeleteCriticalSection(lock); ++ free(lock); ++ lock = NULL; ++ } ++} ++ ++ ++#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H ++#include ++static void once(void (*func)(void)) ++{ ++ static pthread_once_t lock = PTHREAD_ONCE_INIT; ++ pthread_once(&lock, func); ++} ++ ++ ++#else ++/* No-op version that performs no synchronization. vpx_rtcd() is idempotent, ++ * so as long as your platform provides atomic loads/stores of pointers ++ * no synchronization is strictly necessary. ++ */ ++ ++static void once(void (*func)(void)) ++{ ++ static int done; ++ ++ if(!done) ++ { ++ func(); ++ done = 1; ++ } ++} ++#endif ++ ++ ++void vpx_rtcd() ++{ ++ once(setup_rtcd_internal); ++} +diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh +index 33bf08b..0f950f8 100644 +--- a/vp8/common/rtcd_defs.sh ++++ b/vp8/common/rtcd_defs.sh +@@ -1,5 +1,7 @@ + common_forward_decls() { + cat < + #include + #include "vpx_config.h" + #include "vpx/vpx_integer.h" + +-static +-unsigned int sad_mx_n_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- int max_sad, +- int m, +- int n) ++static unsigned int sad_mx_n_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned int max_sad, int m, int n) + { +- + int r, c; + unsigned int sad = 0; + +@@ -48,298 +42,211 @@ unsigned int sad_mx_n_c( + * implementations of these functions are not required to check it. + */ + +-unsigned int vp8_sad16x16_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- int max_sad) ++unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned int max_sad) + { +- + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 16); + } + +- +-unsigned int vp8_sad8x8_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- int max_sad) ++unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned int max_sad) + { +- + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 8); + } + +- +-unsigned int vp8_sad16x8_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- int max_sad) ++unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned int max_sad) + { +- + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 8); + + } + +- +-unsigned int vp8_sad8x16_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- int max_sad) ++unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned int max_sad) + { +- + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 16); + } + +- +-unsigned int vp8_sad4x4_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- int max_sad) ++unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned int max_sad) + { +- + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 4, 4); + } + +-void vp8_sad16x16x3_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- unsigned int *sad_array +-) ++void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned int *sad_array) + { +- sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); +- sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); +- sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); ++ sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); ++ sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); ++ sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); + } + +-void vp8_sad16x16x8_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- unsigned short *sad_array +-) ++void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned short *sad_array) + { +- sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); +- sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); +- sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); +- sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); +- sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); +- sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); +- sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); +- sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); ++ sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); ++ sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); ++ sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); ++ sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); ++ sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); ++ sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); ++ sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); ++ sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); + } + +-void vp8_sad16x8x3_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- unsigned int *sad_array +-) ++void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned int *sad_array) + { +- sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); +- sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); +- sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); ++ sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); ++ sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); ++ sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); + } + +-void vp8_sad16x8x8_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- unsigned short *sad_array +-) ++void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned short *sad_array) + { +- sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); +- sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); +- sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); +- sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); +- sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); +- sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); +- sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); +- sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); ++ sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); ++ sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); ++ sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); ++ sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); ++ sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); ++ sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); ++ sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); ++ sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); + } + +-void vp8_sad8x8x3_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- unsigned int *sad_array +-) ++void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned int *sad_array) + { +- sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); +- sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); +- sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); ++ sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); ++ sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); ++ sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); + } + +-void vp8_sad8x8x8_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- unsigned short *sad_array +-) ++void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned short *sad_array) + { +- sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); +- sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); +- sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); +- sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); +- sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); +- sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); +- sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); +- sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); ++ sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); ++ sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); ++ sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); ++ sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); ++ sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); ++ sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); ++ sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); ++ sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); + } + +-void vp8_sad8x16x3_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- unsigned int *sad_array +-) ++void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned int *sad_array) + { +- sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); +- sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); +- sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); ++ sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); ++ sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); ++ sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); + } + +-void vp8_sad8x16x8_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- unsigned short *sad_array +-) ++void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned short *sad_array) + { +- sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); +- sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); +- sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); +- sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); +- sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); +- sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); +- sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); +- sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); ++ sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); ++ sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); ++ sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); ++ sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); ++ sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); ++ sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); ++ sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); ++ sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); + } + +-void vp8_sad4x4x3_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- unsigned int *sad_array +-) ++void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned int *sad_array) + { +- sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); +- sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); +- sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); ++ sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); ++ sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); ++ sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); + } + +-void vp8_sad4x4x8_c( +- const unsigned char *src_ptr, +- int src_stride, +- const unsigned char *ref_ptr, +- int ref_stride, +- unsigned short *sad_array +-) ++void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char *ref_ptr, int ref_stride, ++ unsigned short *sad_array) + { +- sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); +- sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); +- sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); +- sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); +- sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); +- sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); +- sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); +- sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); ++ sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); ++ sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); ++ sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); ++ sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); ++ sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); ++ sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); ++ sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); ++ sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); + } + +-void vp8_sad16x16x4d_c( +- const unsigned char *src_ptr, +- int src_stride, +- unsigned char *ref_ptr[], +- int ref_stride, +- unsigned int *sad_array +-) ++void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char * const ref_ptr[], int ref_stride, ++ unsigned int *sad_array) + { +- sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); +- sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); +- sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); +- sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); ++ sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); ++ sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); ++ sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); ++ sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); + } + +-void vp8_sad16x8x4d_c( +- const unsigned char *src_ptr, +- int src_stride, +- unsigned char *ref_ptr[], +- int ref_stride, +- unsigned int *sad_array +-) ++void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char * const ref_ptr[], int ref_stride, ++ unsigned int *sad_array) + { +- sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); +- sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); +- sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); +- sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); ++ sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); ++ sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); ++ sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); ++ sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); + } + +-void vp8_sad8x8x4d_c( +- const unsigned char *src_ptr, +- int src_stride, +- unsigned char *ref_ptr[], +- int ref_stride, +- unsigned int *sad_array +-) ++void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char * const ref_ptr[], int ref_stride, ++ unsigned int *sad_array) + { +- sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); +- sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); +- sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); +- sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); ++ sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); ++ sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); ++ sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); ++ sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); + } + +-void vp8_sad8x16x4d_c( +- const unsigned char *src_ptr, +- int src_stride, +- unsigned char *ref_ptr[], +- int ref_stride, +- unsigned int *sad_array +-) ++void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char * const ref_ptr[], int ref_stride, ++ unsigned int *sad_array) + { +- sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); +- sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); +- sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); +- sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); ++ sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); ++ sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); ++ sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); ++ sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); + } + +-void vp8_sad4x4x4d_c( +- const unsigned char *src_ptr, +- int src_stride, +- unsigned char *ref_ptr[], +- int ref_stride, +- unsigned int *sad_array +-) ++void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, ++ const unsigned char * const ref_ptr[], int ref_stride, ++ unsigned int *sad_array) + { +- sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); +- sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); +- sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); +- sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); ++ sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); ++ sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); ++ sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); ++ sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); + } + + /* Copy 2 macroblocks to a buffer */ +-void vp8_copy32xn_c( +- unsigned char *src_ptr, +- int src_stride, +- unsigned char *dst_ptr, +- int dst_stride, +- int height) ++void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride, ++ unsigned char *dst_ptr, int dst_stride, ++ int height) + { + int r; + +diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c +index 7976e25..60afe51 100644 +--- a/vp8/common/setupintrarecon.c ++++ b/vp8/common/setupintrarecon.c +@@ -30,3 +30,10 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf) + ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129; + + } ++ ++void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf) ++{ ++ vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); ++ vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); ++ vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); ++} +diff --git a/vp8/common/setupintrarecon.h b/vp8/common/setupintrarecon.h +index 5264fd0..e515c3a 100644 +--- a/vp8/common/setupintrarecon.h ++++ b/vp8/common/setupintrarecon.h +@@ -11,3 +11,23 @@ + + #include "vpx_scale/yv12config.h" + extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf); ++extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf); ++ ++static ++void setup_intra_recon_left(unsigned char *y_buffer, ++ unsigned char *u_buffer, ++ unsigned char *v_buffer, ++ int y_stride, ++ int uv_stride) ++{ ++ int i; ++ ++ for (i = 0; i < 16; i++) ++ y_buffer[y_stride *i] = (unsigned char) 129; ++ ++ for (i = 0; i < 8; i++) ++ u_buffer[uv_stride *i] = (unsigned char) 129; ++ ++ for (i = 0; i < 8; i++) ++ v_buffer[uv_stride *i] = (unsigned char) 129; ++} +diff --git a/vp8/common/variance.h b/vp8/common/variance.h +index b77aa28..01193b8 100644 +--- a/vp8/common/variance.h ++++ b/vp8/common/variance.h +@@ -12,14 +12,14 @@ + #ifndef VARIANCE_H + #define VARIANCE_H + +-typedef unsigned int(*vp8_sad_fn_t) +- ( ++#include "vpx_config.h" ++ ++typedef unsigned int(*vp8_sad_fn_t)( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int ref_stride, +- int max_sad +- ); ++ unsigned int max_sad); + + typedef void (*vp8_copy32xn_fn_t)( + const unsigned char *src_ptr, +@@ -48,7 +48,7 @@ typedef void (*vp8_sad_multi_d_fn_t) + ( + const unsigned char *src_ptr, + int source_stride, +- unsigned char *ref_ptr[4], ++ const unsigned char * const ref_ptr[], + int ref_stride, + unsigned int *sad_array + ); +diff --git a/vp8/common/variance_c.c b/vp8/common/variance_c.c +index 996404d..da08aff 100644 +--- a/vp8/common/variance_c.c ++++ b/vp8/common/variance_c.c +@@ -205,14 +205,14 @@ static void var_filter_block2d_bil_first_pass + { + for (j = 0; j < output_width; j++) + { +- // Apply bilinear filter ++ /* Apply bilinear filter */ + output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + + ((int)src_ptr[pixel_step] * vp8_filter[1]) + + (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; + src_ptr++; + } + +- // Next row... ++ /* Next row... */ + src_ptr += src_pixels_per_line - output_width; + output_ptr += output_width; + } +@@ -264,15 +264,15 @@ static void var_filter_block2d_bil_second_pass + { + for (j = 0; j < output_width; j++) + { +- // Apply filter +- Temp = ((int)src_ptr[0] * vp8_filter[0]) + ++ /* Apply filter */ ++ Temp = ((int)src_ptr[0] * vp8_filter[0]) + + ((int)src_ptr[pixel_step] * vp8_filter[1]) + + (VP8_FILTER_WEIGHT / 2); + output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); + src_ptr++; + } + +- // Next row... ++ /* Next row... */ + src_ptr += src_pixels_per_line - output_width; + output_ptr += output_width; + } +@@ -292,15 +292,15 @@ unsigned int vp8_sub_pixel_variance4x4_c + { + unsigned char temp2[20*16]; + const short *HFilter, *VFilter; +- unsigned short FData3[5*4]; // Temp data bufffer used in filtering ++ unsigned short FData3[5*4]; /* Temp data bufffer used in filtering */ + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + +- // First filter 1d Horizontal ++ /* First filter 1d Horizontal */ + var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter); + +- // Now filter Verticaly ++ /* Now filter Verticaly */ + var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); + + return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); +@@ -318,7 +318,7 @@ unsigned int vp8_sub_pixel_variance8x8_c + unsigned int *sse + ) + { +- unsigned short FData3[9*8]; // Temp data bufffer used in filtering ++ unsigned short FData3[9*8]; /* Temp data bufffer used in filtering */ + unsigned char temp2[20*16]; + const short *HFilter, *VFilter; + +@@ -342,7 +342,7 @@ unsigned int vp8_sub_pixel_variance16x16_c + unsigned int *sse + ) + { +- unsigned short FData3[17*16]; // Temp data bufffer used in filtering ++ unsigned short FData3[17*16]; /* Temp data bufffer used in filtering */ + unsigned char temp2[20*16]; + const short *HFilter, *VFilter; + +@@ -418,7 +418,7 @@ unsigned int vp8_sub_pixel_variance16x8_c + unsigned int *sse + ) + { +- unsigned short FData3[16*9]; // Temp data bufffer used in filtering ++ unsigned short FData3[16*9]; /* Temp data bufffer used in filtering */ + unsigned char temp2[20*16]; + const short *HFilter, *VFilter; + +@@ -442,7 +442,7 @@ unsigned int vp8_sub_pixel_variance8x16_c + unsigned int *sse + ) + { +- unsigned short FData3[9*16]; // Temp data bufffer used in filtering ++ unsigned short FData3[9*16]; /* Temp data bufffer used in filtering */ + unsigned char temp2[20*16]; + const short *HFilter, *VFilter; + +diff --git a/vp8/common/vp8_entropymodedata.h b/vp8/common/vp8_entropymodedata.h +old mode 100755 +new mode 100644 +diff --git a/vp8/common/x86/dequantize_mmx.asm b/vp8/common/x86/dequantize_mmx.asm +index de9eba8..4e551f0 100644 +--- a/vp8/common/x86/dequantize_mmx.asm ++++ b/vp8/common/x86/dequantize_mmx.asm +@@ -13,7 +13,7 @@ + + + ;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q) +-global sym(vp8_dequantize_b_impl_mmx) ++global sym(vp8_dequantize_b_impl_mmx) PRIVATE + sym(vp8_dequantize_b_impl_mmx): + push rbp + mov rbp, rsp +@@ -55,7 +55,7 @@ sym(vp8_dequantize_b_impl_mmx): + ;short *dq, 1 + ;unsigned char *dest, 2 + ;int stride) 3 +-global sym(vp8_dequant_idct_add_mmx) ++global sym(vp8_dequant_idct_add_mmx) PRIVATE + sym(vp8_dequant_idct_add_mmx): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm +index 0c9c205..96fa2c6 100644 +--- a/vp8/common/x86/idctllm_mmx.asm ++++ b/vp8/common/x86/idctllm_mmx.asm +@@ -34,7 +34,7 @@ + + ;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, + ;int pitch, unsigned char *dest,int stride) +-global sym(vp8_short_idct4x4llm_mmx) ++global sym(vp8_short_idct4x4llm_mmx) PRIVATE + sym(vp8_short_idct4x4llm_mmx): + push rbp + mov rbp, rsp +@@ -224,7 +224,7 @@ sym(vp8_short_idct4x4llm_mmx): + ;int pred_stride, + ;unsigned char *dst_ptr, + ;int stride) +-global sym(vp8_dc_only_idct_add_mmx) ++global sym(vp8_dc_only_idct_add_mmx) PRIVATE + sym(vp8_dc_only_idct_add_mmx): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/idctllm_mmx_test.cc b/vp8/common/x86/idctllm_mmx_test.cc +deleted file mode 100755 +index 8c11533..0000000 +--- a/vp8/common/x86/idctllm_mmx_test.cc ++++ /dev/null +@@ -1,31 +0,0 @@ +-/* +- * Copyright (c) 2010 The WebM project authors. All Rights Reserved. +- * +- * Use of this source code is governed by a BSD-style license +- * that can be found in the LICENSE file in the root of the source +- * tree. An additional intellectual property rights grant can be found +- * in the file PATENTS. All contributing project authors may +- * be found in the AUTHORS file in the root of the source tree. +- */ +- +- +- extern "C" { +- void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred_ptr, +- int pred_stride, unsigned char *dst_ptr, +- int dst_stride); +-} +- +-#include "vp8/common/idctllm_test.h" +- +-namespace +-{ +- +-INSTANTIATE_TEST_CASE_P(MMX, IDCTTest, +- ::testing::Values(vp8_short_idct4x4llm_mmx)); +- +-} // namespace +- +-int main(int argc, char **argv) { +- ::testing::InitGoogleTest(&argc, argv); +- return RUN_ALL_TESTS(); +-} +diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm +index abeb0b6..bf8e2c4 100644 +--- a/vp8/common/x86/idctllm_sse2.asm ++++ b/vp8/common/x86/idctllm_sse2.asm +@@ -19,7 +19,7 @@ + ; int dst_stride - 3 + ; ) + +-global sym(vp8_idct_dequant_0_2x_sse2) ++global sym(vp8_idct_dequant_0_2x_sse2) PRIVATE + sym(vp8_idct_dequant_0_2x_sse2): + push rbp + mov rbp, rsp +@@ -101,7 +101,7 @@ sym(vp8_idct_dequant_0_2x_sse2): + ; unsigned char *dst - 2 + ; int dst_stride - 3 + ; ) +-global sym(vp8_idct_dequant_full_2x_sse2) ++global sym(vp8_idct_dequant_full_2x_sse2) PRIVATE + sym(vp8_idct_dequant_full_2x_sse2): + push rbp + mov rbp, rsp +@@ -358,7 +358,7 @@ sym(vp8_idct_dequant_full_2x_sse2): + ; int dst_stride - 3 + ; short *dc - 4 + ; ) +-global sym(vp8_idct_dequant_dc_0_2x_sse2) ++global sym(vp8_idct_dequant_dc_0_2x_sse2) PRIVATE + sym(vp8_idct_dequant_dc_0_2x_sse2): + push rbp + mov rbp, rsp +@@ -434,7 +434,7 @@ sym(vp8_idct_dequant_dc_0_2x_sse2): + ; int dst_stride - 3 + ; short *dc - 4 + ; ) +-global sym(vp8_idct_dequant_dc_full_2x_sse2) ++global sym(vp8_idct_dequant_dc_full_2x_sse2) PRIVATE + sym(vp8_idct_dequant_dc_full_2x_sse2): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm +index 6582687..4aac094 100644 +--- a/vp8/common/x86/iwalsh_mmx.asm ++++ b/vp8/common/x86/iwalsh_mmx.asm +@@ -12,7 +12,7 @@ + %include "vpx_ports/x86_abi_support.asm" + + ;void vp8_short_inv_walsh4x4_mmx(short *input, short *output) +-global sym(vp8_short_inv_walsh4x4_mmx) ++global sym(vp8_short_inv_walsh4x4_mmx) PRIVATE + sym(vp8_short_inv_walsh4x4_mmx): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm +index 51cb5e2..06e86a8 100644 +--- a/vp8/common/x86/iwalsh_sse2.asm ++++ b/vp8/common/x86/iwalsh_sse2.asm +@@ -12,7 +12,7 @@ + %include "vpx_ports/x86_abi_support.asm" + + ;void vp8_short_inv_walsh4x4_sse2(short *input, short *output) +-global sym(vp8_short_inv_walsh4x4_sse2) ++global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE + sym(vp8_short_inv_walsh4x4_sse2): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/loopfilter_block_sse2.asm b/vp8/common/x86/loopfilter_block_sse2.asm +index 4918eb5..3d45c61 100644 +--- a/vp8/common/x86/loopfilter_block_sse2.asm ++++ b/vp8/common/x86/loopfilter_block_sse2.asm +@@ -133,7 +133,7 @@ + ; const char *limit, + ; const char *thresh + ;) +-global sym(vp8_loop_filter_bh_y_sse2) ++global sym(vp8_loop_filter_bh_y_sse2) PRIVATE + sym(vp8_loop_filter_bh_y_sse2): + + %ifidn __OUTPUT_FORMAT__,x64 +@@ -150,6 +150,7 @@ sym(vp8_loop_filter_bh_y_sse2): + + push rbp + mov rbp, rsp ++ SAVE_XMM 11 + push r12 + push r13 + mov thresh, arg(4) +@@ -258,6 +259,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 + %ifidn __OUTPUT_FORMAT__,x64 + pop r13 + pop r12 ++ RESTORE_XMM + pop rbp + %endif + +@@ -273,7 +275,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 + ; const char *thresh + ;) + +-global sym(vp8_loop_filter_bv_y_sse2) ++global sym(vp8_loop_filter_bv_y_sse2) PRIVATE + sym(vp8_loop_filter_bv_y_sse2): + + %ifidn __OUTPUT_FORMAT__,x64 +diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm +index 697a5de..f388d24 100644 +--- a/vp8/common/x86/loopfilter_mmx.asm ++++ b/vp8/common/x86/loopfilter_mmx.asm +@@ -21,7 +21,7 @@ + ; const char *thresh, + ; int count + ;) +-global sym(vp8_loop_filter_horizontal_edge_mmx) ++global sym(vp8_loop_filter_horizontal_edge_mmx) PRIVATE + sym(vp8_loop_filter_horizontal_edge_mmx): + push rbp + mov rbp, rsp +@@ -233,7 +233,7 @@ sym(vp8_loop_filter_horizontal_edge_mmx): + ; const char *thresh, + ; int count + ;) +-global sym(vp8_loop_filter_vertical_edge_mmx) ++global sym(vp8_loop_filter_vertical_edge_mmx) PRIVATE + sym(vp8_loop_filter_vertical_edge_mmx): + push rbp + mov rbp, rsp +@@ -603,7 +603,7 @@ sym(vp8_loop_filter_vertical_edge_mmx): + ; const char *thresh, + ; int count + ;) +-global sym(vp8_mbloop_filter_horizontal_edge_mmx) ++global sym(vp8_mbloop_filter_horizontal_edge_mmx) PRIVATE + sym(vp8_mbloop_filter_horizontal_edge_mmx): + push rbp + mov rbp, rsp +@@ -920,7 +920,7 @@ sym(vp8_mbloop_filter_horizontal_edge_mmx): + ; const char *thresh, + ; int count + ;) +-global sym(vp8_mbloop_filter_vertical_edge_mmx) ++global sym(vp8_mbloop_filter_vertical_edge_mmx) PRIVATE + sym(vp8_mbloop_filter_vertical_edge_mmx): + push rbp + mov rbp, rsp +@@ -1384,7 +1384,7 @@ sym(vp8_mbloop_filter_vertical_edge_mmx): + ; int src_pixel_step, + ; const char *blimit + ;) +-global sym(vp8_loop_filter_simple_horizontal_edge_mmx) ++global sym(vp8_loop_filter_simple_horizontal_edge_mmx) PRIVATE + sym(vp8_loop_filter_simple_horizontal_edge_mmx): + push rbp + mov rbp, rsp +@@ -1500,7 +1500,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx): + ; int src_pixel_step, + ; const char *blimit + ;) +-global sym(vp8_loop_filter_simple_vertical_edge_mmx) ++global sym(vp8_loop_filter_simple_vertical_edge_mmx) PRIVATE + sym(vp8_loop_filter_simple_vertical_edge_mmx): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm +index 9944c33..a66753b 100644 +--- a/vp8/common/x86/loopfilter_sse2.asm ++++ b/vp8/common/x86/loopfilter_sse2.asm +@@ -286,7 +286,7 @@ + ; const char *limit, + ; const char *thresh, + ;) +-global sym(vp8_loop_filter_horizontal_edge_sse2) ++global sym(vp8_loop_filter_horizontal_edge_sse2) PRIVATE + sym(vp8_loop_filter_horizontal_edge_sse2): + push rbp + mov rbp, rsp +@@ -334,7 +334,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2): + ; const char *thresh, + ; int count + ;) +-global sym(vp8_loop_filter_horizontal_edge_uv_sse2) ++global sym(vp8_loop_filter_horizontal_edge_uv_sse2) PRIVATE + sym(vp8_loop_filter_horizontal_edge_uv_sse2): + push rbp + mov rbp, rsp +@@ -561,7 +561,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2): + ; const char *limit, + ; const char *thresh, + ;) +-global sym(vp8_mbloop_filter_horizontal_edge_sse2) ++global sym(vp8_mbloop_filter_horizontal_edge_sse2) PRIVATE + sym(vp8_mbloop_filter_horizontal_edge_sse2): + push rbp + mov rbp, rsp +@@ -607,7 +607,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2): + ; const char *thresh, + ; unsigned char *v + ;) +-global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) ++global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) PRIVATE + sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): + push rbp + mov rbp, rsp +@@ -928,7 +928,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): + ; const char *limit, + ; const char *thresh, + ;) +-global sym(vp8_loop_filter_vertical_edge_sse2) ++global sym(vp8_loop_filter_vertical_edge_sse2) PRIVATE + sym(vp8_loop_filter_vertical_edge_sse2): + push rbp + mov rbp, rsp +@@ -993,7 +993,7 @@ sym(vp8_loop_filter_vertical_edge_sse2): + ; const char *thresh, + ; unsigned char *v + ;) +-global sym(vp8_loop_filter_vertical_edge_uv_sse2) ++global sym(vp8_loop_filter_vertical_edge_uv_sse2) PRIVATE + sym(vp8_loop_filter_vertical_edge_uv_sse2): + push rbp + mov rbp, rsp +@@ -1142,7 +1142,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2): + ; const char *limit, + ; const char *thresh, + ;) +-global sym(vp8_mbloop_filter_vertical_edge_sse2) ++global sym(vp8_mbloop_filter_vertical_edge_sse2) PRIVATE + sym(vp8_mbloop_filter_vertical_edge_sse2): + push rbp + mov rbp, rsp +@@ -1209,7 +1209,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2): + ; const char *thresh, + ; unsigned char *v + ;) +-global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) ++global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) PRIVATE + sym(vp8_mbloop_filter_vertical_edge_uv_sse2): + push rbp + mov rbp, rsp +@@ -1269,7 +1269,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2): + ; int src_pixel_step, + ; const char *blimit, + ;) +-global sym(vp8_loop_filter_simple_horizontal_edge_sse2) ++global sym(vp8_loop_filter_simple_horizontal_edge_sse2) PRIVATE + sym(vp8_loop_filter_simple_horizontal_edge_sse2): + push rbp + mov rbp, rsp +@@ -1374,7 +1374,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): + ; int src_pixel_step, + ; const char *blimit, + ;) +-global sym(vp8_loop_filter_simple_vertical_edge_sse2) ++global sym(vp8_loop_filter_simple_vertical_edge_sse2) PRIVATE + sym(vp8_loop_filter_simple_vertical_edge_sse2): + push rbp ; save old base pointer value. + mov rbp, rsp ; set new base pointer value. +diff --git a/vp8/common/x86/mfqe_sse2.asm b/vp8/common/x86/mfqe_sse2.asm +index 10d21f3..c1d2174 100644 +--- a/vp8/common/x86/mfqe_sse2.asm ++++ b/vp8/common/x86/mfqe_sse2.asm +@@ -19,7 +19,7 @@ + ; int dst_stride, + ; int src_weight + ;) +-global sym(vp8_filter_by_weight16x16_sse2) ++global sym(vp8_filter_by_weight16x16_sse2) PRIVATE + sym(vp8_filter_by_weight16x16_sse2): + push rbp + mov rbp, rsp +@@ -97,7 +97,7 @@ sym(vp8_filter_by_weight16x16_sse2): + ; int dst_stride, + ; int src_weight + ;) +-global sym(vp8_filter_by_weight8x8_sse2) ++global sym(vp8_filter_by_weight8x8_sse2) PRIVATE + sym(vp8_filter_by_weight8x8_sse2): + push rbp + mov rbp, rsp +@@ -165,7 +165,7 @@ sym(vp8_filter_by_weight8x8_sse2): + ; unsigned int *variance, 4 + ; unsigned int *sad, 5 + ;) +-global sym(vp8_variance_and_sad_16x16_sse2) ++global sym(vp8_variance_and_sad_16x16_sse2) PRIVATE + sym(vp8_variance_and_sad_16x16_sse2): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm +index d24f740..966c586 100644 +--- a/vp8/common/x86/postproc_mmx.asm ++++ b/vp8/common/x86/postproc_mmx.asm +@@ -14,275 +14,10 @@ + %define VP8_FILTER_WEIGHT 128 + %define VP8_FILTER_SHIFT 7 + +-;void vp8_post_proc_down_and_across_mmx +-;( +-; unsigned char *src_ptr, +-; unsigned char *dst_ptr, +-; int src_pixels_per_line, +-; int dst_pixels_per_line, +-; int rows, +-; int cols, +-; int flimit +-;) +-global sym(vp8_post_proc_down_and_across_mmx) +-sym(vp8_post_proc_down_and_across_mmx): +- push rbp +- mov rbp, rsp +- SHADOW_ARGS_TO_STACK 7 +- GET_GOT rbx +- push rsi +- push rdi +- ; end prolog +- +-%if ABI_IS_32BIT=1 && CONFIG_PIC=1 +- ; move the global rd onto the stack, since we don't have enough registers +- ; to do PIC addressing +- movq mm0, [GLOBAL(rd)] +- sub rsp, 8 +- movq [rsp], mm0 +-%define RD [rsp] +-%else +-%define RD [GLOBAL(rd)] +-%endif +- +- push rbx +- lea rbx, [GLOBAL(Blur)] +- movd mm2, dword ptr arg(6) ;flimit +- punpcklwd mm2, mm2 +- punpckldq mm2, mm2 +- +- mov rsi, arg(0) ;src_ptr +- mov rdi, arg(1) ;dst_ptr +- +- movsxd rcx, DWORD PTR arg(4) ;rows +- movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch? +- pxor mm0, mm0 ; mm0 = 00000000 +- +-.nextrow: +- +- xor rdx, rdx ; clear out rdx for use as loop counter +-.nextcol: +- +- pxor mm7, mm7 ; mm7 = 00000000 +- movq mm6, [rbx + 32 ] ; mm6 = kernel 2 taps +- movq mm3, [rsi] ; mm4 = r0 p0..p7 +- punpcklbw mm3, mm0 ; mm3 = p0..p3 +- movq mm1, mm3 ; mm1 = p0..p3 +- pmullw mm3, mm6 ; mm3 *= kernel 2 modifiers +- +- movq mm6, [rbx + 48] ; mm6 = kernel 3 taps +- movq mm5, [rsi + rax] ; mm4 = r1 p0..p7 +- punpcklbw mm5, mm0 ; mm5 = r1 p0..p3 +- pmullw mm6, mm5 ; mm6 *= p0..p3 * kernel 3 modifiers +- paddusw mm3, mm6 ; mm3 += mm6 +- +- ; thresholding +- movq mm7, mm1 ; mm7 = r0 p0..p3 +- psubusw mm7, mm5 ; mm7 = r0 p0..p3 - r1 p0..p3 +- psubusw mm5, mm1 ; mm5 = r1 p0..p3 - r0 p0..p3 +- paddusw mm7, mm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3) +- pcmpgtw mm7, mm2 +- +- movq mm6, [rbx + 64 ] ; mm6 = kernel 4 modifiers +- movq mm5, [rsi + 2*rax] ; mm4 = r2 p0..p7 +- punpcklbw mm5, mm0 ; mm5 = r2 p0..p3 +- pmullw mm6, mm5 ; mm5 *= kernel 4 modifiers +- paddusw mm3, mm6 ; mm3 += mm5 +- +- ; thresholding +- movq mm6, mm1 ; mm6 = r0 p0..p3 +- psubusw mm6, mm5 ; mm6 = r0 p0..p3 - r2 p0..p3 +- psubusw mm5, mm1 ; mm5 = r2 p0..p3 - r2 p0..p3 +- paddusw mm6, mm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3) +- pcmpgtw mm6, mm2 +- por mm7, mm6 ; accumulate thresholds +- +- +- neg rax +- movq mm6, [rbx ] ; kernel 0 taps +- movq mm5, [rsi+2*rax] ; mm4 = r-2 p0..p7 +- punpcklbw mm5, mm0 ; mm5 = r-2 p0..p3 +- pmullw mm6, mm5 ; mm5 *= kernel 0 modifiers +- paddusw mm3, mm6 ; mm3 += mm5 +- +- ; thresholding +- movq mm6, mm1 ; mm6 = r0 p0..p3 +- psubusw mm6, mm5 ; mm6 = p0..p3 - r-2 p0..p3 +- psubusw mm5, mm1 ; mm5 = r-2 p0..p3 - p0..p3 +- paddusw mm6, mm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3) +- pcmpgtw mm6, mm2 +- por mm7, mm6 ; accumulate thresholds +- +- movq mm6, [rbx + 16] ; kernel 1 taps +- movq mm4, [rsi+rax] ; mm4 = r-1 p0..p7 +- punpcklbw mm4, mm0 ; mm4 = r-1 p0..p3 +- pmullw mm6, mm4 ; mm4 *= kernel 1 modifiers. +- paddusw mm3, mm6 ; mm3 += mm5 +- +- ; thresholding +- movq mm6, mm1 ; mm6 = r0 p0..p3 +- psubusw mm6, mm4 ; mm6 = p0..p3 - r-2 p0..p3 +- psubusw mm4, mm1 ; mm5 = r-1 p0..p3 - p0..p3 +- paddusw mm6, mm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3) +- pcmpgtw mm6, mm2 +- por mm7, mm6 ; accumulate thresholds +- +- +- paddusw mm3, RD ; mm3 += round value +- psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 +- +- pand mm1, mm7 ; mm1 select vals > thresh from source +- pandn mm7, mm3 ; mm7 select vals < thresh from blurred result +- paddusw mm1, mm7 ; combination +- +- packuswb mm1, mm0 ; pack to bytes +- +- movd [rdi], mm1 ; +- neg rax ; pitch is positive +- +- +- add rsi, 4 +- add rdi, 4 +- add rdx, 4 +- +- cmp edx, dword ptr arg(5) ;cols +- jl .nextcol +- ; done with the all cols, start the across filtering in place +- sub rsi, rdx +- sub rdi, rdx +- +- ; dup the first byte into the left border 8 times +- movq mm1, [rdi] +- punpcklbw mm1, mm1 +- punpcklwd mm1, mm1 +- punpckldq mm1, mm1 +- +- mov rdx, -8 +- movq [rdi+rdx], mm1 +- +- ; dup the last byte into the right border +- movsxd rdx, dword arg(5) +- movq mm1, [rdi + rdx + -1] +- punpcklbw mm1, mm1 +- punpcklwd mm1, mm1 +- punpckldq mm1, mm1 +- movq [rdi+rdx], mm1 +- +- +- push rax +- xor rdx, rdx +- mov rax, [rdi-4]; +- +-.acrossnextcol: +- pxor mm7, mm7 ; mm7 = 00000000 +- movq mm6, [rbx + 32 ] ; +- movq mm4, [rdi+rdx] ; mm4 = p0..p7 +- movq mm3, mm4 ; mm3 = p0..p7 +- punpcklbw mm3, mm0 ; mm3 = p0..p3 +- movq mm1, mm3 ; mm1 = p0..p3 +- pmullw mm3, mm6 ; mm3 *= kernel 2 modifiers +- +- movq mm6, [rbx + 48] +- psrlq mm4, 8 ; mm4 = p1..p7 +- movq mm5, mm4 ; mm5 = p1..p7 +- punpcklbw mm5, mm0 ; mm5 = p1..p4 +- pmullw mm6, mm5 ; mm6 *= p1..p4 * kernel 3 modifiers +- paddusw mm3, mm6 ; mm3 += mm6 +- +- ; thresholding +- movq mm7, mm1 ; mm7 = p0..p3 +- psubusw mm7, mm5 ; mm7 = p0..p3 - p1..p4 +- psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3 +- paddusw mm7, mm5 ; mm7 = abs(p0..p3 - p1..p4) +- pcmpgtw mm7, mm2 +- +- movq mm6, [rbx + 64 ] +- psrlq mm4, 8 ; mm4 = p2..p7 +- movq mm5, mm4 ; mm5 = p2..p7 +- punpcklbw mm5, mm0 ; mm5 = p2..p5 +- pmullw mm6, mm5 ; mm5 *= kernel 4 modifiers +- paddusw mm3, mm6 ; mm3 += mm5 +- +- ; thresholding +- movq mm6, mm1 ; mm6 = p0..p3 +- psubusw mm6, mm5 ; mm6 = p0..p3 - p1..p4 +- psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3 +- paddusw mm6, mm5 ; mm6 = abs(p0..p3 - p1..p4) +- pcmpgtw mm6, mm2 +- por mm7, mm6 ; accumulate thresholds +- +- +- movq mm6, [rbx ] +- movq mm4, [rdi+rdx-2] ; mm4 = p-2..p5 +- movq mm5, mm4 ; mm5 = p-2..p5 +- punpcklbw mm5, mm0 ; mm5 = p-2..p1 +- pmullw mm6, mm5 ; mm5 *= kernel 0 modifiers +- paddusw mm3, mm6 ; mm3 += mm5 +- +- ; thresholding +- movq mm6, mm1 ; mm6 = p0..p3 +- psubusw mm6, mm5 ; mm6 = p0..p3 - p1..p4 +- psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3 +- paddusw mm6, mm5 ; mm6 = abs(p0..p3 - p1..p4) +- pcmpgtw mm6, mm2 +- por mm7, mm6 ; accumulate thresholds +- +- movq mm6, [rbx + 16] +- psrlq mm4, 8 ; mm4 = p-1..p5 +- punpcklbw mm4, mm0 ; mm4 = p-1..p2 +- pmullw mm6, mm4 ; mm4 *= kernel 1 modifiers. +- paddusw mm3, mm6 ; mm3 += mm5 +- +- ; thresholding +- movq mm6, mm1 ; mm6 = p0..p3 +- psubusw mm6, mm4 ; mm6 = p0..p3 - p1..p4 +- psubusw mm4, mm1 ; mm5 = p1..p4 - p0..p3 +- paddusw mm6, mm4 ; mm6 = abs(p0..p3 - p1..p4) +- pcmpgtw mm6, mm2 +- por mm7, mm6 ; accumulate thresholds +- +- paddusw mm3, RD ; mm3 += round value +- psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 +- +- pand mm1, mm7 ; mm1 select vals > thresh from source +- pandn mm7, mm3 ; mm7 select vals < thresh from blurred result +- paddusw mm1, mm7 ; combination +- +- packuswb mm1, mm0 ; pack to bytes +- mov DWORD PTR [rdi+rdx-4], eax ; store previous four bytes +- movd eax, mm1 +- +- add rdx, 4 +- cmp edx, dword ptr arg(5) ;cols +- jl .acrossnextcol; +- +- mov DWORD PTR [rdi+rdx-4], eax +- pop rax +- +- ; done with this rwo +- add rsi,rax ; next line +- movsxd rax, dword ptr arg(3) ;dst_pixels_per_line ; destination pitch? +- add rdi,rax ; next destination +- movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; destination pitch? +- +- dec rcx ; decrement count +- jnz .nextrow ; next row +- pop rbx +- +- ; begin epilog +- pop rdi +- pop rsi +- RESTORE_GOT +- UNSHADOW_ARGS +- pop rbp +- ret +-%undef RD +- +- + ;void vp8_mbpost_proc_down_mmx(unsigned char *dst, + ; int pitch, int rows, int cols,int flimit) + extern sym(vp8_rv) +-global sym(vp8_mbpost_proc_down_mmx) ++global sym(vp8_mbpost_proc_down_mmx) PRIVATE + sym(vp8_mbpost_proc_down_mmx): + push rbp + mov rbp, rsp +@@ -510,7 +245,7 @@ sym(vp8_mbpost_proc_down_mmx): + ; unsigned char bothclamp[16], + ; unsigned int Width, unsigned int Height, int Pitch) + extern sym(rand) +-global sym(vp8_plane_add_noise_mmx) ++global sym(vp8_plane_add_noise_mmx) PRIVATE + sym(vp8_plane_add_noise_mmx): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm +index 966aafd..00f84a3 100644 +--- a/vp8/common/x86/postproc_sse2.asm ++++ b/vp8/common/x86/postproc_sse2.asm +@@ -11,146 +11,159 @@ + + %include "vpx_ports/x86_abi_support.asm" + +-;void vp8_post_proc_down_and_across_xmm ++;macro in deblock functions ++%macro FIRST_2_ROWS 0 ++ movdqa xmm4, xmm0 ++ movdqa xmm6, xmm0 ++ movdqa xmm5, xmm1 ++ pavgb xmm5, xmm3 ++ ++ ;calculate absolute value ++ psubusb xmm4, xmm1 ++ psubusb xmm1, xmm0 ++ psubusb xmm6, xmm3 ++ psubusb xmm3, xmm0 ++ paddusb xmm4, xmm1 ++ paddusb xmm6, xmm3 ++ ++ ;get threshold ++ movdqa xmm2, flimit ++ pxor xmm1, xmm1 ++ movdqa xmm7, xmm2 ++ ++ ;get mask ++ psubusb xmm2, xmm4 ++ psubusb xmm7, xmm6 ++ pcmpeqb xmm2, xmm1 ++ pcmpeqb xmm7, xmm1 ++ por xmm7, xmm2 ++%endmacro ++ ++%macro SECOND_2_ROWS 0 ++ movdqa xmm6, xmm0 ++ movdqa xmm4, xmm0 ++ movdqa xmm2, xmm1 ++ pavgb xmm1, xmm3 ++ ++ ;calculate absolute value ++ psubusb xmm6, xmm2 ++ psubusb xmm2, xmm0 ++ psubusb xmm4, xmm3 ++ psubusb xmm3, xmm0 ++ paddusb xmm6, xmm2 ++ paddusb xmm4, xmm3 ++ ++ pavgb xmm5, xmm1 ++ ++ ;get threshold ++ movdqa xmm2, flimit ++ pxor xmm1, xmm1 ++ movdqa xmm3, xmm2 ++ ++ ;get mask ++ psubusb xmm2, xmm6 ++ psubusb xmm3, xmm4 ++ pcmpeqb xmm2, xmm1 ++ pcmpeqb xmm3, xmm1 ++ ++ por xmm7, xmm2 ++ por xmm7, xmm3 ++ ++ pavgb xmm5, xmm0 ++ ++ ;decide if or not to use filtered value ++ pand xmm0, xmm7 ++ pandn xmm7, xmm5 ++ paddusb xmm0, xmm7 ++%endmacro ++ ++%macro UPDATE_FLIMIT 0 ++ movdqa xmm2, XMMWORD PTR [rbx] ++ movdqa [rsp], xmm2 ++ add rbx, 16 ++%endmacro ++ ++;void vp8_post_proc_down_and_across_mb_row_sse2 + ;( + ; unsigned char *src_ptr, + ; unsigned char *dst_ptr, + ; int src_pixels_per_line, + ; int dst_pixels_per_line, +-; int rows, + ; int cols, +-; int flimit ++; int *flimits, ++; int size + ;) +-global sym(vp8_post_proc_down_and_across_xmm) +-sym(vp8_post_proc_down_and_across_xmm): ++global sym(vp8_post_proc_down_and_across_mb_row_sse2) PRIVATE ++sym(vp8_post_proc_down_and_across_mb_row_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + SAVE_XMM 7 +- GET_GOT rbx ++ push rbx + push rsi + push rdi + ; end prolog +- +-%if ABI_IS_32BIT=1 && CONFIG_PIC=1 + ALIGN_STACK 16, rax +- ; move the global rd onto the stack, since we don't have enough registers +- ; to do PIC addressing +- movdqa xmm0, [GLOBAL(rd42)] + sub rsp, 16 +- movdqa [rsp], xmm0 +-%define RD42 [rsp] +-%else +-%define RD42 [GLOBAL(rd42)] +-%endif +- + +- movd xmm2, dword ptr arg(6) ;flimit +- punpcklwd xmm2, xmm2 +- punpckldq xmm2, xmm2 +- punpcklqdq xmm2, xmm2 ++ ; put flimit on stack ++ mov rbx, arg(5) ;flimits ptr ++ UPDATE_FLIMIT + +- mov rsi, arg(0) ;src_ptr +- mov rdi, arg(1) ;dst_ptr ++%define flimit [rsp] + +- movsxd rcx, DWORD PTR arg(4) ;rows +- movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch? +- pxor xmm0, xmm0 ; mm0 = 00000000 ++ mov rsi, arg(0) ;src_ptr ++ mov rdi, arg(1) ;dst_ptr + ++ movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ++ movsxd rcx, DWORD PTR arg(6) ;rows in a macroblock + .nextrow: +- +- xor rdx, rdx ; clear out rdx for use as loop counter ++ xor rdx, rdx ;col + .nextcol: +- movq xmm3, QWORD PTR [rsi] ; mm4 = r0 p0..p7 +- punpcklbw xmm3, xmm0 ; mm3 = p0..p3 +- movdqa xmm1, xmm3 ; mm1 = p0..p3 +- psllw xmm3, 2 ; +- +- movq xmm5, QWORD PTR [rsi + rax] ; mm4 = r1 p0..p7 +- punpcklbw xmm5, xmm0 ; mm5 = r1 p0..p3 +- paddusw xmm3, xmm5 ; mm3 += mm6 +- +- ; thresholding +- movdqa xmm7, xmm1 ; mm7 = r0 p0..p3 +- psubusw xmm7, xmm5 ; mm7 = r0 p0..p3 - r1 p0..p3 +- psubusw xmm5, xmm1 ; mm5 = r1 p0..p3 - r0 p0..p3 +- paddusw xmm7, xmm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3) +- pcmpgtw xmm7, xmm2 +- +- movq xmm5, QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7 +- punpcklbw xmm5, xmm0 ; mm5 = r2 p0..p3 +- paddusw xmm3, xmm5 ; mm3 += mm5 +- +- ; thresholding +- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 +- psubusw xmm6, xmm5 ; mm6 = r0 p0..p3 - r2 p0..p3 +- psubusw xmm5, xmm1 ; mm5 = r2 p0..p3 - r2 p0..p3 +- paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3) +- pcmpgtw xmm6, xmm2 +- por xmm7, xmm6 ; accumulate thresholds ++ ;load current and next 2 rows ++ movdqu xmm0, XMMWORD PTR [rsi] ++ movdqu xmm1, XMMWORD PTR [rsi + rax] ++ movdqu xmm3, XMMWORD PTR [rsi + 2*rax] + ++ FIRST_2_ROWS + ++ ;load above 2 rows + neg rax +- movq xmm5, QWORD PTR [rsi+2*rax] ; mm4 = r-2 p0..p7 +- punpcklbw xmm5, xmm0 ; mm5 = r-2 p0..p3 +- paddusw xmm3, xmm5 ; mm3 += mm5 +- +- ; thresholding +- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 +- psubusw xmm6, xmm5 ; mm6 = p0..p3 - r-2 p0..p3 +- psubusw xmm5, xmm1 ; mm5 = r-2 p0..p3 - p0..p3 +- paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3) +- pcmpgtw xmm6, xmm2 +- por xmm7, xmm6 ; accumulate thresholds +- +- movq xmm4, QWORD PTR [rsi+rax] ; mm4 = r-1 p0..p7 +- punpcklbw xmm4, xmm0 ; mm4 = r-1 p0..p3 +- paddusw xmm3, xmm4 ; mm3 += mm5 +- +- ; thresholding +- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 +- psubusw xmm6, xmm4 ; mm6 = p0..p3 - r-2 p0..p3 +- psubusw xmm4, xmm1 ; mm5 = r-1 p0..p3 - p0..p3 +- paddusw xmm6, xmm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3) +- pcmpgtw xmm6, xmm2 +- por xmm7, xmm6 ; accumulate thresholds +- +- +- paddusw xmm3, RD42 ; mm3 += round value +- psraw xmm3, 3 ; mm3 /= 8 +- +- pand xmm1, xmm7 ; mm1 select vals > thresh from source +- pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result +- paddusw xmm1, xmm7 ; combination ++ movdqu xmm1, XMMWORD PTR [rsi + 2*rax] ++ movdqu xmm3, XMMWORD PTR [rsi + rax] + +- packuswb xmm1, xmm0 ; pack to bytes +- movq QWORD PTR [rdi], xmm1 ; ++ SECOND_2_ROWS + +- neg rax ; pitch is positive +- add rsi, 8 +- add rdi, 8 ++ movdqu XMMWORD PTR [rdi], xmm0 + +- add rdx, 8 +- cmp edx, dword arg(5) ;cols ++ neg rax ; positive stride ++ add rsi, 16 ++ add rdi, 16 + +- jl .nextcol ++ add rdx, 16 ++ cmp edx, dword arg(4) ;cols ++ jge .downdone ++ UPDATE_FLIMIT ++ jmp .nextcol + ++.downdone: + ; done with the all cols, start the across filtering in place + sub rsi, rdx + sub rdi, rdx + ++ mov rbx, arg(5) ; flimits ++ UPDATE_FLIMIT + + ; dup the first byte into the left border 8 times + movq mm1, [rdi] + punpcklbw mm1, mm1 + punpcklwd mm1, mm1 + punpckldq mm1, mm1 +- + mov rdx, -8 + movq [rdi+rdx], mm1 + + ; dup the last byte into the right border +- movsxd rdx, dword arg(5) ++ movsxd rdx, dword arg(4) + movq mm1, [rdi + rdx + -1] + punpcklbw mm1, mm1 + punpcklwd mm1, mm1 +@@ -158,118 +171,69 @@ sym(vp8_post_proc_down_and_across_xmm): + movq [rdi+rdx], mm1 + + xor rdx, rdx +- movq mm0, QWORD PTR [rdi-8]; ++ movq mm0, QWORD PTR [rdi-16]; ++ movq mm1, QWORD PTR [rdi-8]; + + .acrossnextcol: +- movq xmm7, QWORD PTR [rdi +rdx -2] +- movd xmm4, DWORD PTR [rdi +rdx +6] +- +- pslldq xmm4, 8 +- por xmm4, xmm7 +- +- movdqa xmm3, xmm4 +- psrldq xmm3, 2 +- punpcklbw xmm3, xmm0 ; mm3 = p0..p3 +- movdqa xmm1, xmm3 ; mm1 = p0..p3 +- psllw xmm3, 2 +- +- +- movdqa xmm5, xmm4 +- psrldq xmm5, 3 +- punpcklbw xmm5, xmm0 ; mm5 = p1..p4 +- paddusw xmm3, xmm5 ; mm3 += mm6 +- +- ; thresholding +- movdqa xmm7, xmm1 ; mm7 = p0..p3 +- psubusw xmm7, xmm5 ; mm7 = p0..p3 - p1..p4 +- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 +- paddusw xmm7, xmm5 ; mm7 = abs(p0..p3 - p1..p4) +- pcmpgtw xmm7, xmm2 +- +- movdqa xmm5, xmm4 +- psrldq xmm5, 4 +- punpcklbw xmm5, xmm0 ; mm5 = p2..p5 +- paddusw xmm3, xmm5 ; mm3 += mm5 +- +- ; thresholding +- movdqa xmm6, xmm1 ; mm6 = p0..p3 +- psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 +- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 +- paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) +- pcmpgtw xmm6, xmm2 +- por xmm7, xmm6 ; accumulate thresholds +- +- +- movdqa xmm5, xmm4 ; mm5 = p-2..p5 +- punpcklbw xmm5, xmm0 ; mm5 = p-2..p1 +- paddusw xmm3, xmm5 ; mm3 += mm5 +- +- ; thresholding +- movdqa xmm6, xmm1 ; mm6 = p0..p3 +- psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 +- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 +- paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) +- pcmpgtw xmm6, xmm2 +- por xmm7, xmm6 ; accumulate thresholds +- +- psrldq xmm4, 1 ; mm4 = p-1..p5 +- punpcklbw xmm4, xmm0 ; mm4 = p-1..p2 +- paddusw xmm3, xmm4 ; mm3 += mm5 +- +- ; thresholding +- movdqa xmm6, xmm1 ; mm6 = p0..p3 +- psubusw xmm6, xmm4 ; mm6 = p0..p3 - p1..p4 +- psubusw xmm4, xmm1 ; mm5 = p1..p4 - p0..p3 +- paddusw xmm6, xmm4 ; mm6 = abs(p0..p3 - p1..p4) +- pcmpgtw xmm6, xmm2 +- por xmm7, xmm6 ; accumulate thresholds +- +- paddusw xmm3, RD42 ; mm3 += round value +- psraw xmm3, 3 ; mm3 /= 8 +- +- pand xmm1, xmm7 ; mm1 select vals > thresh from source +- pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result +- paddusw xmm1, xmm7 ; combination +- +- packuswb xmm1, xmm0 ; pack to bytes +- movq QWORD PTR [rdi+rdx-8], mm0 ; store previous four bytes +- movdq2q mm0, xmm1 +- +- add rdx, 8 +- cmp edx, dword arg(5) ;cols +- jl .acrossnextcol; +- +- ; last 8 pixels +- movq QWORD PTR [rdi+rdx-8], mm0 ++ movdqu xmm0, XMMWORD PTR [rdi + rdx] ++ movdqu xmm1, XMMWORD PTR [rdi + rdx -2] ++ movdqu xmm3, XMMWORD PTR [rdi + rdx -1] ++ ++ FIRST_2_ROWS ++ ++ movdqu xmm1, XMMWORD PTR [rdi + rdx +1] ++ movdqu xmm3, XMMWORD PTR [rdi + rdx +2] ++ ++ SECOND_2_ROWS ++ ++ movq QWORD PTR [rdi+rdx-16], mm0 ; store previous 8 bytes ++ movq QWORD PTR [rdi+rdx-8], mm1 ; store previous 8 bytes ++ movdq2q mm0, xmm0 ++ psrldq xmm0, 8 ++ movdq2q mm1, xmm0 ++ ++ add rdx, 16 ++ cmp edx, dword arg(4) ;cols ++ jge .acrossdone ++ UPDATE_FLIMIT ++ jmp .acrossnextcol + ++.acrossdone ++ ; last 16 pixels ++ movq QWORD PTR [rdi+rdx-16], mm0 ++ ++ cmp edx, dword arg(4) ++ jne .throw_last_8 ++ movq QWORD PTR [rdi+rdx-8], mm1 ++.throw_last_8: + ; done with this rwo +- add rsi,rax ; next line +- mov eax, dword arg(3) ;dst_pixels_per_line ; destination pitch? +- add rdi,rax ; next destination +- mov eax, dword arg(2) ;src_pixels_per_line ; destination pitch? ++ add rsi,rax ;next src line ++ mov eax, dword arg(3) ;dst_pixels_per_line ++ add rdi,rax ;next destination ++ mov eax, dword arg(2) ;src_pixels_per_line + +- dec rcx ; decrement count +- jnz .nextrow ; next row ++ mov rbx, arg(5) ;flimits ++ UPDATE_FLIMIT + +-%if ABI_IS_32BIT=1 && CONFIG_PIC=1 +- add rsp,16 ++ dec rcx ;decrement count ++ jnz .nextrow ;next row ++ ++ add rsp, 16 + pop rsp +-%endif + ; begin epilog + pop rdi + pop rsi +- RESTORE_GOT ++ pop rbx + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret +-%undef RD42 +- ++%undef flimit + + ;void vp8_mbpost_proc_down_xmm(unsigned char *dst, + ; int pitch, int rows, int cols,int flimit) + extern sym(vp8_rv) +-global sym(vp8_mbpost_proc_down_xmm) ++global sym(vp8_mbpost_proc_down_xmm) PRIVATE + sym(vp8_mbpost_proc_down_xmm): + push rbp + mov rbp, rsp +@@ -497,7 +461,7 @@ sym(vp8_mbpost_proc_down_xmm): + + ;void vp8_mbpost_proc_across_ip_xmm(unsigned char *src, + ; int pitch, int rows, int cols,int flimit) +-global sym(vp8_mbpost_proc_across_ip_xmm) ++global sym(vp8_mbpost_proc_across_ip_xmm) PRIVATE + sym(vp8_mbpost_proc_across_ip_xmm): + push rbp + mov rbp, rsp +@@ -694,7 +658,7 @@ sym(vp8_mbpost_proc_across_ip_xmm): + ; unsigned char bothclamp[16], + ; unsigned int Width, unsigned int Height, int Pitch) + extern sym(rand) +-global sym(vp8_plane_add_noise_wmt) ++global sym(vp8_plane_add_noise_wmt) PRIVATE + sym(vp8_plane_add_noise_wmt): + push rbp + mov rbp, rsp +@@ -753,7 +717,5 @@ sym(vp8_plane_add_noise_wmt): + + SECTION_RODATA + align 16 +-rd42: +- times 8 dw 0x04 + four8s: + times 4 dd 8 +diff --git a/vp8/common/x86/postproc_x86.c b/vp8/common/x86/postproc_x86.c +index a25921b..3ec0106 100644 +--- a/vp8/common/x86/postproc_x86.c ++++ b/vp8/common/x86/postproc_x86.c +@@ -18,4 +18,7 @@ extern int rand(void) + { + return __rand(); + } ++#else ++/* ISO C forbids an empty translation unit. */ ++int vp8_unused; + #endif +diff --git a/vp8/common/x86/recon_mmx.asm b/vp8/common/x86/recon_mmx.asm +index 19c0faf..15e9871 100644 +--- a/vp8/common/x86/recon_mmx.asm ++++ b/vp8/common/x86/recon_mmx.asm +@@ -18,7 +18,7 @@ + ; unsigned char *dst, + ; int dst_stride + ; ) +-global sym(vp8_copy_mem8x8_mmx) ++global sym(vp8_copy_mem8x8_mmx) PRIVATE + sym(vp8_copy_mem8x8_mmx): + push rbp + mov rbp, rsp +@@ -81,7 +81,7 @@ sym(vp8_copy_mem8x8_mmx): + ; unsigned char *dst, + ; int dst_stride + ; ) +-global sym(vp8_copy_mem8x4_mmx) ++global sym(vp8_copy_mem8x4_mmx) PRIVATE + sym(vp8_copy_mem8x4_mmx): + push rbp + mov rbp, rsp +@@ -125,7 +125,7 @@ sym(vp8_copy_mem8x4_mmx): + ; unsigned char *dst, + ; int dst_stride + ; ) +-global sym(vp8_copy_mem16x16_mmx) ++global sym(vp8_copy_mem16x16_mmx) PRIVATE + sym(vp8_copy_mem16x16_mmx): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm +index 7b6e3cf..1434bcd 100644 +--- a/vp8/common/x86/recon_sse2.asm ++++ b/vp8/common/x86/recon_sse2.asm +@@ -17,7 +17,7 @@ + ; unsigned char *dst, + ; int dst_stride + ; ) +-global sym(vp8_copy_mem16x16_sse2) ++global sym(vp8_copy_mem16x16_sse2) PRIVATE + sym(vp8_copy_mem16x16_sse2): + push rbp + mov rbp, rsp +@@ -123,7 +123,7 @@ sym(vp8_copy_mem16x16_sse2): + ; unsigned char *left, + ; int left_stride, + ; ) +-global sym(vp8_intra_pred_uv_dc_mmx2) ++global sym(vp8_intra_pred_uv_dc_mmx2) PRIVATE + sym(vp8_intra_pred_uv_dc_mmx2): + push rbp + mov rbp, rsp +@@ -196,7 +196,7 @@ sym(vp8_intra_pred_uv_dc_mmx2): + ; unsigned char *left, + ; int left_stride, + ; ) +-global sym(vp8_intra_pred_uv_dctop_mmx2) ++global sym(vp8_intra_pred_uv_dctop_mmx2) PRIVATE + sym(vp8_intra_pred_uv_dctop_mmx2): + push rbp + mov rbp, rsp +@@ -250,7 +250,7 @@ sym(vp8_intra_pred_uv_dctop_mmx2): + ; unsigned char *left, + ; int left_stride, + ; ) +-global sym(vp8_intra_pred_uv_dcleft_mmx2) ++global sym(vp8_intra_pred_uv_dcleft_mmx2) PRIVATE + sym(vp8_intra_pred_uv_dcleft_mmx2): + push rbp + mov rbp, rsp +@@ -317,7 +317,7 @@ sym(vp8_intra_pred_uv_dcleft_mmx2): + ; unsigned char *left, + ; int left_stride, + ; ) +-global sym(vp8_intra_pred_uv_dc128_mmx) ++global sym(vp8_intra_pred_uv_dc128_mmx) PRIVATE + sym(vp8_intra_pred_uv_dc128_mmx): + push rbp + mov rbp, rsp +@@ -357,7 +357,7 @@ sym(vp8_intra_pred_uv_dc128_mmx): + ; int left_stride, + ; ) + %macro vp8_intra_pred_uv_tm 1 +-global sym(vp8_intra_pred_uv_tm_%1) ++global sym(vp8_intra_pred_uv_tm_%1) PRIVATE + sym(vp8_intra_pred_uv_tm_%1): + push rbp + mov rbp, rsp +@@ -437,7 +437,7 @@ vp8_intra_pred_uv_tm ssse3 + ; unsigned char *left, + ; int left_stride, + ; ) +-global sym(vp8_intra_pred_uv_ve_mmx) ++global sym(vp8_intra_pred_uv_ve_mmx) PRIVATE + sym(vp8_intra_pred_uv_ve_mmx): + push rbp + mov rbp, rsp +@@ -479,7 +479,7 @@ sym(vp8_intra_pred_uv_ve_mmx): + ; int left_stride + ; ) + %macro vp8_intra_pred_uv_ho 1 +-global sym(vp8_intra_pred_uv_ho_%1) ++global sym(vp8_intra_pred_uv_ho_%1) PRIVATE + sym(vp8_intra_pred_uv_ho_%1): + push rbp + mov rbp, rsp +@@ -577,7 +577,7 @@ vp8_intra_pred_uv_ho ssse3 + ; unsigned char *left, + ; int left_stride + ; ) +-global sym(vp8_intra_pred_y_dc_sse2) ++global sym(vp8_intra_pred_y_dc_sse2) PRIVATE + sym(vp8_intra_pred_y_dc_sse2): + push rbp + mov rbp, rsp +@@ -683,7 +683,7 @@ sym(vp8_intra_pred_y_dc_sse2): + ; unsigned char *left, + ; int left_stride + ; ) +-global sym(vp8_intra_pred_y_dctop_sse2) ++global sym(vp8_intra_pred_y_dctop_sse2) PRIVATE + sym(vp8_intra_pred_y_dctop_sse2): + push rbp + mov rbp, rsp +@@ -745,7 +745,7 @@ sym(vp8_intra_pred_y_dctop_sse2): + ; unsigned char *left, + ; int left_stride + ; ) +-global sym(vp8_intra_pred_y_dcleft_sse2) ++global sym(vp8_intra_pred_y_dcleft_sse2) PRIVATE + sym(vp8_intra_pred_y_dcleft_sse2): + push rbp + mov rbp, rsp +@@ -838,7 +838,7 @@ sym(vp8_intra_pred_y_dcleft_sse2): + ; unsigned char *left, + ; int left_stride + ; ) +-global sym(vp8_intra_pred_y_dc128_sse2) ++global sym(vp8_intra_pred_y_dc128_sse2) PRIVATE + sym(vp8_intra_pred_y_dc128_sse2): + push rbp + mov rbp, rsp +@@ -885,11 +885,12 @@ sym(vp8_intra_pred_y_dc128_sse2): + ; int left_stride + ; ) + %macro vp8_intra_pred_y_tm 1 +-global sym(vp8_intra_pred_y_tm_%1) ++global sym(vp8_intra_pred_y_tm_%1) PRIVATE + sym(vp8_intra_pred_y_tm_%1): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 ++ SAVE_XMM 7 + push rsi + push rdi + GET_GOT rbx +@@ -957,6 +958,7 @@ vp8_intra_pred_y_tm_%1_loop: + RESTORE_GOT + pop rdi + pop rsi ++ RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret +@@ -972,7 +974,7 @@ vp8_intra_pred_y_tm ssse3 + ; unsigned char *left, + ; int left_stride + ; ) +-global sym(vp8_intra_pred_y_ve_sse2) ++global sym(vp8_intra_pred_y_ve_sse2) PRIVATE + sym(vp8_intra_pred_y_ve_sse2): + push rbp + mov rbp, rsp +@@ -1020,7 +1022,7 @@ sym(vp8_intra_pred_y_ve_sse2): + ; unsigned char *left, + ; int left_stride, + ; ) +-global sym(vp8_intra_pred_y_ho_sse2) ++global sym(vp8_intra_pred_y_ho_sse2) PRIVATE + sym(vp8_intra_pred_y_ho_sse2): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/sad_mmx.asm b/vp8/common/x86/sad_mmx.asm +index 407b399..592112f 100644 +--- a/vp8/common/x86/sad_mmx.asm ++++ b/vp8/common/x86/sad_mmx.asm +@@ -11,11 +11,11 @@ + + %include "vpx_ports/x86_abi_support.asm" + +-global sym(vp8_sad16x16_mmx) +-global sym(vp8_sad8x16_mmx) +-global sym(vp8_sad8x8_mmx) +-global sym(vp8_sad4x4_mmx) +-global sym(vp8_sad16x8_mmx) ++global sym(vp8_sad16x16_mmx) PRIVATE ++global sym(vp8_sad8x16_mmx) PRIVATE ++global sym(vp8_sad8x8_mmx) PRIVATE ++global sym(vp8_sad4x4_mmx) PRIVATE ++global sym(vp8_sad16x8_mmx) PRIVATE + + ;unsigned int vp8_sad16x16_mmx( + ; unsigned char *src_ptr, +diff --git a/vp8/common/x86/sad_sse2.asm b/vp8/common/x86/sad_sse2.asm +index 0b01d7b..8d86abc 100644 +--- a/vp8/common/x86/sad_sse2.asm ++++ b/vp8/common/x86/sad_sse2.asm +@@ -16,7 +16,7 @@ + ; int src_stride, + ; unsigned char *ref_ptr, + ; int ref_stride) +-global sym(vp8_sad16x16_wmt) ++global sym(vp8_sad16x16_wmt) PRIVATE + sym(vp8_sad16x16_wmt): + push rbp + mov rbp, rsp +@@ -90,7 +90,7 @@ sym(vp8_sad16x16_wmt): + ; unsigned char *ref_ptr, + ; int ref_stride, + ; int max_sad) +-global sym(vp8_sad8x16_wmt) ++global sym(vp8_sad8x16_wmt) PRIVATE + sym(vp8_sad8x16_wmt): + push rbp + mov rbp, rsp +@@ -115,7 +115,7 @@ sym(vp8_sad8x16_wmt): + + movq rax, mm7 + cmp eax, arg(4) +- jg .x8x16sad_wmt_early_exit ++ ja .x8x16sad_wmt_early_exit + + movq mm0, QWORD PTR [rsi] + movq mm1, QWORD PTR [rdi] +@@ -153,7 +153,7 @@ sym(vp8_sad8x16_wmt): + ; int src_stride, + ; unsigned char *ref_ptr, + ; int ref_stride) +-global sym(vp8_sad8x8_wmt) ++global sym(vp8_sad8x8_wmt) PRIVATE + sym(vp8_sad8x8_wmt): + push rbp + mov rbp, rsp +@@ -176,7 +176,7 @@ sym(vp8_sad8x8_wmt): + + movq rax, mm7 + cmp eax, arg(4) +- jg .x8x8sad_wmt_early_exit ++ ja .x8x8sad_wmt_early_exit + + movq mm0, QWORD PTR [rsi] + movq mm1, QWORD PTR [rdi] +@@ -206,7 +206,7 @@ sym(vp8_sad8x8_wmt): + ; int src_stride, + ; unsigned char *ref_ptr, + ; int ref_stride) +-global sym(vp8_sad4x4_wmt) ++global sym(vp8_sad4x4_wmt) PRIVATE + sym(vp8_sad4x4_wmt): + push rbp + mov rbp, rsp +@@ -261,7 +261,7 @@ sym(vp8_sad4x4_wmt): + ; int src_stride, + ; unsigned char *ref_ptr, + ; int ref_stride) +-global sym(vp8_sad16x8_wmt) ++global sym(vp8_sad16x8_wmt) PRIVATE + sym(vp8_sad16x8_wmt): + push rbp + mov rbp, rsp +@@ -285,7 +285,7 @@ sym(vp8_sad16x8_wmt): + + movq rax, mm7 + cmp eax, arg(4) +- jg .x16x8sad_wmt_early_exit ++ ja .x16x8sad_wmt_early_exit + + movq mm0, QWORD PTR [rsi] + movq mm2, QWORD PTR [rsi+8] +@@ -335,7 +335,7 @@ sym(vp8_sad16x8_wmt): + ; unsigned char *dst_ptr, + ; int dst_stride, + ; int height); +-global sym(vp8_copy32xn_sse2) ++global sym(vp8_copy32xn_sse2) PRIVATE + sym(vp8_copy32xn_sse2): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/sad_sse3.asm b/vp8/common/x86/sad_sse3.asm +index c2af3c8..f90a589 100644 +--- a/vp8/common/x86/sad_sse3.asm ++++ b/vp8/common/x86/sad_sse3.asm +@@ -380,7 +380,7 @@ + ; unsigned char *ref_ptr, + ; int ref_stride, + ; int *results) +-global sym(vp8_sad16x16x3_sse3) ++global sym(vp8_sad16x16x3_sse3) PRIVATE + sym(vp8_sad16x16x3_sse3): + + STACK_FRAME_CREATE_X3 +@@ -422,7 +422,7 @@ sym(vp8_sad16x16x3_sse3): + ; unsigned char *ref_ptr, + ; int ref_stride, + ; int *results) +-global sym(vp8_sad16x8x3_sse3) ++global sym(vp8_sad16x8x3_sse3) PRIVATE + sym(vp8_sad16x8x3_sse3): + + STACK_FRAME_CREATE_X3 +@@ -460,7 +460,7 @@ sym(vp8_sad16x8x3_sse3): + ; unsigned char *ref_ptr, + ; int ref_stride, + ; int *results) +-global sym(vp8_sad8x16x3_sse3) ++global sym(vp8_sad8x16x3_sse3) PRIVATE + sym(vp8_sad8x16x3_sse3): + + STACK_FRAME_CREATE_X3 +@@ -489,7 +489,7 @@ sym(vp8_sad8x16x3_sse3): + ; unsigned char *ref_ptr, + ; int ref_stride, + ; int *results) +-global sym(vp8_sad8x8x3_sse3) ++global sym(vp8_sad8x8x3_sse3) PRIVATE + sym(vp8_sad8x8x3_sse3): + + STACK_FRAME_CREATE_X3 +@@ -514,7 +514,7 @@ sym(vp8_sad8x8x3_sse3): + ; unsigned char *ref_ptr, + ; int ref_stride, + ; int *results) +-global sym(vp8_sad4x4x3_sse3) ++global sym(vp8_sad4x4x3_sse3) PRIVATE + sym(vp8_sad4x4x3_sse3): + + STACK_FRAME_CREATE_X3 +@@ -589,7 +589,7 @@ sym(vp8_sad4x4x3_sse3): + ; int ref_stride, + ; int max_sad) + ;%define lddqu movdqu +-global sym(vp8_sad16x16_sse3) ++global sym(vp8_sad16x16_sse3) PRIVATE + sym(vp8_sad16x16_sse3): + + STACK_FRAME_CREATE_X3 +@@ -642,7 +642,7 @@ sym(vp8_sad16x16_sse3): + ; unsigned char *dst_ptr, + ; int dst_stride, + ; int height); +-global sym(vp8_copy32xn_sse3) ++global sym(vp8_copy32xn_sse3) PRIVATE + sym(vp8_copy32xn_sse3): + + STACK_FRAME_CREATE_X3 +@@ -703,7 +703,7 @@ sym(vp8_copy32xn_sse3): + ; unsigned char *ref_ptr_base, + ; int ref_stride, + ; int *results) +-global sym(vp8_sad16x16x4d_sse3) ++global sym(vp8_sad16x16x4d_sse3) PRIVATE + sym(vp8_sad16x16x4d_sse3): + + STACK_FRAME_CREATE_X4 +@@ -754,7 +754,7 @@ sym(vp8_sad16x16x4d_sse3): + ; unsigned char *ref_ptr_base, + ; int ref_stride, + ; int *results) +-global sym(vp8_sad16x8x4d_sse3) ++global sym(vp8_sad16x8x4d_sse3) PRIVATE + sym(vp8_sad16x8x4d_sse3): + + STACK_FRAME_CREATE_X4 +@@ -801,7 +801,7 @@ sym(vp8_sad16x8x4d_sse3): + ; unsigned char *ref_ptr, + ; int ref_stride, + ; int *results) +-global sym(vp8_sad8x16x4d_sse3) ++global sym(vp8_sad8x16x4d_sse3) PRIVATE + sym(vp8_sad8x16x4d_sse3): + + STACK_FRAME_CREATE_X4 +@@ -834,7 +834,7 @@ sym(vp8_sad8x16x4d_sse3): + ; unsigned char *ref_ptr, + ; int ref_stride, + ; int *results) +-global sym(vp8_sad8x8x4d_sse3) ++global sym(vp8_sad8x8x4d_sse3) PRIVATE + sym(vp8_sad8x8x4d_sse3): + + STACK_FRAME_CREATE_X4 +@@ -863,7 +863,7 @@ sym(vp8_sad8x8x4d_sse3): + ; unsigned char *ref_ptr, + ; int ref_stride, + ; int *results) +-global sym(vp8_sad4x4x4d_sse3) ++global sym(vp8_sad4x4x4d_sse3) PRIVATE + sym(vp8_sad4x4x4d_sse3): + + STACK_FRAME_CREATE_X4 +diff --git a/vp8/common/x86/sad_sse4.asm b/vp8/common/x86/sad_sse4.asm +index 03ecec4..f7fccd7 100644 +--- a/vp8/common/x86/sad_sse4.asm ++++ b/vp8/common/x86/sad_sse4.asm +@@ -161,7 +161,7 @@ + ; const unsigned char *ref_ptr, + ; int ref_stride, + ; unsigned short *sad_array); +-global sym(vp8_sad16x16x8_sse4) ++global sym(vp8_sad16x16x8_sse4) PRIVATE + sym(vp8_sad16x16x8_sse4): + push rbp + mov rbp, rsp +@@ -203,7 +203,7 @@ sym(vp8_sad16x16x8_sse4): + ; int ref_stride, + ; unsigned short *sad_array + ;); +-global sym(vp8_sad16x8x8_sse4) ++global sym(vp8_sad16x8x8_sse4) PRIVATE + sym(vp8_sad16x8x8_sse4): + push rbp + mov rbp, rsp +@@ -241,7 +241,7 @@ sym(vp8_sad16x8x8_sse4): + ; int ref_stride, + ; unsigned short *sad_array + ;); +-global sym(vp8_sad8x8x8_sse4) ++global sym(vp8_sad8x8x8_sse4) PRIVATE + sym(vp8_sad8x8x8_sse4): + push rbp + mov rbp, rsp +@@ -279,7 +279,7 @@ sym(vp8_sad8x8x8_sse4): + ; int ref_stride, + ; unsigned short *sad_array + ;); +-global sym(vp8_sad8x16x8_sse4) ++global sym(vp8_sad8x16x8_sse4) PRIVATE + sym(vp8_sad8x16x8_sse4): + push rbp + mov rbp, rsp +@@ -320,7 +320,7 @@ sym(vp8_sad8x16x8_sse4): + ; int ref_stride, + ; unsigned short *sad_array + ;); +-global sym(vp8_sad4x4x8_sse4) ++global sym(vp8_sad4x4x8_sse4) PRIVATE + sym(vp8_sad4x4x8_sse4): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/sad_ssse3.asm b/vp8/common/x86/sad_ssse3.asm +index 95b6c89..278fc06 100644 +--- a/vp8/common/x86/sad_ssse3.asm ++++ b/vp8/common/x86/sad_ssse3.asm +@@ -152,7 +152,7 @@ + ; unsigned char *ref_ptr, + ; int ref_stride, + ; int *results) +-global sym(vp8_sad16x16x3_ssse3) ++global sym(vp8_sad16x16x3_ssse3) PRIVATE + sym(vp8_sad16x16x3_ssse3): + push rbp + mov rbp, rsp +@@ -265,7 +265,7 @@ sym(vp8_sad16x16x3_ssse3): + ; unsigned char *ref_ptr, + ; int ref_stride, + ; int *results) +-global sym(vp8_sad16x8x3_ssse3) ++global sym(vp8_sad16x8x3_ssse3) PRIVATE + sym(vp8_sad16x8x3_ssse3): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm +index 5528fd0..47dd452 100644 +--- a/vp8/common/x86/subpixel_mmx.asm ++++ b/vp8/common/x86/subpixel_mmx.asm +@@ -28,7 +28,7 @@ extern sym(vp8_bilinear_filters_x86_8) + ; unsigned int output_width, + ; short * vp8_filter + ;) +-global sym(vp8_filter_block1d_h6_mmx) ++global sym(vp8_filter_block1d_h6_mmx) PRIVATE + sym(vp8_filter_block1d_h6_mmx): + push rbp + mov rbp, rsp +@@ -125,7 +125,7 @@ sym(vp8_filter_block1d_h6_mmx): + ; unsigned int output_width, + ; short * vp8_filter + ;) +-global sym(vp8_filter_block1dc_v6_mmx) ++global sym(vp8_filter_block1dc_v6_mmx) PRIVATE + sym(vp8_filter_block1dc_v6_mmx): + push rbp + mov rbp, rsp +@@ -213,7 +213,7 @@ sym(vp8_filter_block1dc_v6_mmx): + ; unsigned char *dst_ptr, + ; int dst_pitch + ;) +-global sym(vp8_bilinear_predict8x8_mmx) ++global sym(vp8_bilinear_predict8x8_mmx) PRIVATE + sym(vp8_bilinear_predict8x8_mmx): + push rbp + mov rbp, rsp +@@ -370,7 +370,7 @@ sym(vp8_bilinear_predict8x8_mmx): + ; unsigned char *dst_ptr, + ; int dst_pitch + ;) +-global sym(vp8_bilinear_predict8x4_mmx) ++global sym(vp8_bilinear_predict8x4_mmx) PRIVATE + sym(vp8_bilinear_predict8x4_mmx): + push rbp + mov rbp, rsp +@@ -525,7 +525,7 @@ sym(vp8_bilinear_predict8x4_mmx): + ; unsigned char *dst_ptr, + ; int dst_pitch + ;) +-global sym(vp8_bilinear_predict4x4_mmx) ++global sym(vp8_bilinear_predict4x4_mmx) PRIVATE + sym(vp8_bilinear_predict4x4_mmx): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm +index cb550af..69f8d10 100644 +--- a/vp8/common/x86/subpixel_sse2.asm ++++ b/vp8/common/x86/subpixel_sse2.asm +@@ -33,7 +33,7 @@ extern sym(vp8_bilinear_filters_x86_8) + ; unsigned int output_width, + ; short *vp8_filter + ;) +-global sym(vp8_filter_block1d8_h6_sse2) ++global sym(vp8_filter_block1d8_h6_sse2) PRIVATE + sym(vp8_filter_block1d8_h6_sse2): + push rbp + mov rbp, rsp +@@ -153,7 +153,7 @@ sym(vp8_filter_block1d8_h6_sse2): + ; even number. This function handles 8 pixels in horizontal direction, calculating ONE + ; rows each iteration to take advantage of the 128 bits operations. + ;*************************************************************************************/ +-global sym(vp8_filter_block1d16_h6_sse2) ++global sym(vp8_filter_block1d16_h6_sse2) PRIVATE + sym(vp8_filter_block1d16_h6_sse2): + push rbp + mov rbp, rsp +@@ -329,7 +329,7 @@ sym(vp8_filter_block1d16_h6_sse2): + ; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The + ; input pixel array has output_height rows. + ;*************************************************************************************/ +-global sym(vp8_filter_block1d8_v6_sse2) ++global sym(vp8_filter_block1d8_v6_sse2) PRIVATE + sym(vp8_filter_block1d8_v6_sse2): + push rbp + mov rbp, rsp +@@ -424,7 +424,7 @@ sym(vp8_filter_block1d8_v6_sse2): + ; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The + ; input pixel array has output_height rows. + ;*************************************************************************************/ +-global sym(vp8_filter_block1d16_v6_sse2) ++global sym(vp8_filter_block1d16_v6_sse2) PRIVATE + sym(vp8_filter_block1d16_v6_sse2): + push rbp + mov rbp, rsp +@@ -534,7 +534,7 @@ sym(vp8_filter_block1d16_v6_sse2): + ; const short *vp8_filter + ;) + ; First-pass filter only when yoffset==0 +-global sym(vp8_filter_block1d8_h6_only_sse2) ++global sym(vp8_filter_block1d8_h6_only_sse2) PRIVATE + sym(vp8_filter_block1d8_h6_only_sse2): + push rbp + mov rbp, rsp +@@ -647,7 +647,7 @@ sym(vp8_filter_block1d8_h6_only_sse2): + ; const short *vp8_filter + ;) + ; First-pass filter only when yoffset==0 +-global sym(vp8_filter_block1d16_h6_only_sse2) ++global sym(vp8_filter_block1d16_h6_only_sse2) PRIVATE + sym(vp8_filter_block1d16_h6_only_sse2): + push rbp + mov rbp, rsp +@@ -812,7 +812,7 @@ sym(vp8_filter_block1d16_h6_only_sse2): + ; const short *vp8_filter + ;) + ; Second-pass filter only when xoffset==0 +-global sym(vp8_filter_block1d8_v6_only_sse2) ++global sym(vp8_filter_block1d8_v6_only_sse2) PRIVATE + sym(vp8_filter_block1d8_v6_only_sse2): + push rbp + mov rbp, rsp +@@ -904,7 +904,7 @@ sym(vp8_filter_block1d8_v6_only_sse2): + ; unsigned int output_height, + ; unsigned int output_width + ;) +-global sym(vp8_unpack_block1d16_h6_sse2) ++global sym(vp8_unpack_block1d16_h6_sse2) PRIVATE + sym(vp8_unpack_block1d16_h6_sse2): + push rbp + mov rbp, rsp +@@ -963,7 +963,7 @@ sym(vp8_unpack_block1d16_h6_sse2): + ; int dst_pitch + ;) + extern sym(vp8_bilinear_filters_x86_8) +-global sym(vp8_bilinear_predict16x16_sse2) ++global sym(vp8_bilinear_predict16x16_sse2) PRIVATE + sym(vp8_bilinear_predict16x16_sse2): + push rbp + mov rbp, rsp +@@ -1231,7 +1231,7 @@ sym(vp8_bilinear_predict16x16_sse2): + ; unsigned char *dst_ptr, + ; int dst_pitch + ;) +-global sym(vp8_bilinear_predict8x8_sse2) ++global sym(vp8_bilinear_predict8x8_sse2) PRIVATE + sym(vp8_bilinear_predict8x8_sse2): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm +index 6bca82b..c06f245 100644 +--- a/vp8/common/x86/subpixel_ssse3.asm ++++ b/vp8/common/x86/subpixel_ssse3.asm +@@ -34,7 +34,7 @@ + ; unsigned int output_height, + ; unsigned int vp8_filter_index + ;) +-global sym(vp8_filter_block1d8_h6_ssse3) ++global sym(vp8_filter_block1d8_h6_ssse3) PRIVATE + sym(vp8_filter_block1d8_h6_ssse3): + push rbp + mov rbp, rsp +@@ -177,7 +177,7 @@ vp8_filter_block1d8_h4_ssse3: + ; unsigned int output_height, + ; unsigned int vp8_filter_index + ;) +-global sym(vp8_filter_block1d16_h6_ssse3) ++global sym(vp8_filter_block1d16_h6_ssse3) PRIVATE + sym(vp8_filter_block1d16_h6_ssse3): + push rbp + mov rbp, rsp +@@ -284,7 +284,7 @@ sym(vp8_filter_block1d16_h6_ssse3): + ; unsigned int output_height, + ; unsigned int vp8_filter_index + ;) +-global sym(vp8_filter_block1d4_h6_ssse3) ++global sym(vp8_filter_block1d4_h6_ssse3) PRIVATE + sym(vp8_filter_block1d4_h6_ssse3): + push rbp + mov rbp, rsp +@@ -352,6 +352,7 @@ sym(vp8_filter_block1d4_h6_ssse3): + pop rdi + pop rsi + RESTORE_GOT ++ RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret +@@ -413,7 +414,7 @@ sym(vp8_filter_block1d4_h6_ssse3): + ; unsigned int output_height, + ; unsigned int vp8_filter_index + ;) +-global sym(vp8_filter_block1d16_v6_ssse3) ++global sym(vp8_filter_block1d16_v6_ssse3) PRIVATE + sym(vp8_filter_block1d16_v6_ssse3): + push rbp + mov rbp, rsp +@@ -601,7 +602,7 @@ sym(vp8_filter_block1d16_v6_ssse3): + ; unsigned int output_height, + ; unsigned int vp8_filter_index + ;) +-global sym(vp8_filter_block1d8_v6_ssse3) ++global sym(vp8_filter_block1d8_v6_ssse3) PRIVATE + sym(vp8_filter_block1d8_v6_ssse3): + push rbp + mov rbp, rsp +@@ -741,7 +742,7 @@ sym(vp8_filter_block1d8_v6_ssse3): + ; unsigned int output_height, + ; unsigned int vp8_filter_index + ;) +-global sym(vp8_filter_block1d4_v6_ssse3) ++global sym(vp8_filter_block1d4_v6_ssse3) PRIVATE + sym(vp8_filter_block1d4_v6_ssse3): + push rbp + mov rbp, rsp +@@ -880,7 +881,7 @@ sym(vp8_filter_block1d4_v6_ssse3): + ; unsigned char *dst_ptr, + ; int dst_pitch + ;) +-global sym(vp8_bilinear_predict16x16_ssse3) ++global sym(vp8_bilinear_predict16x16_ssse3) PRIVATE + sym(vp8_bilinear_predict16x16_ssse3): + push rbp + mov rbp, rsp +@@ -1143,7 +1144,7 @@ sym(vp8_bilinear_predict16x16_ssse3): + ; unsigned char *dst_ptr, + ; int dst_pitch + ;) +-global sym(vp8_bilinear_predict8x8_ssse3) ++global sym(vp8_bilinear_predict8x8_ssse3) PRIVATE + sym(vp8_bilinear_predict8x8_ssse3): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/variance_impl_mmx.asm b/vp8/common/x86/variance_impl_mmx.asm +index 2be8bbe..d9120d0 100644 +--- a/vp8/common/x86/variance_impl_mmx.asm ++++ b/vp8/common/x86/variance_impl_mmx.asm +@@ -12,7 +12,7 @@ + %include "vpx_ports/x86_abi_support.asm" + + ;unsigned int vp8_get_mb_ss_mmx( short *src_ptr ) +-global sym(vp8_get_mb_ss_mmx) ++global sym(vp8_get_mb_ss_mmx) PRIVATE + sym(vp8_get_mb_ss_mmx): + push rbp + mov rbp, rsp +@@ -72,7 +72,7 @@ sym(vp8_get_mb_ss_mmx): + ; unsigned int *SSE, + ; int *Sum + ;) +-global sym(vp8_get8x8var_mmx) ++global sym(vp8_get8x8var_mmx) PRIVATE + sym(vp8_get8x8var_mmx): + push rbp + mov rbp, rsp +@@ -320,7 +320,7 @@ sym(vp8_get8x8var_mmx): + ; unsigned int *SSE, + ; int *Sum + ;) +-global sym(vp8_get4x4var_mmx) ++global sym(vp8_get4x4var_mmx) PRIVATE + sym(vp8_get4x4var_mmx): + push rbp + mov rbp, rsp +@@ -433,7 +433,7 @@ sym(vp8_get4x4var_mmx): + ; unsigned char *ref_ptr, + ; int recon_stride + ;) +-global sym(vp8_get4x4sse_cs_mmx) ++global sym(vp8_get4x4sse_cs_mmx) PRIVATE + sym(vp8_get4x4sse_cs_mmx): + push rbp + mov rbp, rsp +@@ -522,7 +522,7 @@ sym(vp8_get4x4sse_cs_mmx): + ; int *sum, + ; unsigned int *sumsquared + ;) +-global sym(vp8_filter_block2d_bil4x4_var_mmx) ++global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE + sym(vp8_filter_block2d_bil4x4_var_mmx): + push rbp + mov rbp, rsp +@@ -667,7 +667,7 @@ sym(vp8_filter_block2d_bil4x4_var_mmx): + ; int *sum, + ; unsigned int *sumsquared + ;) +-global sym(vp8_filter_block2d_bil_var_mmx) ++global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE + sym(vp8_filter_block2d_bil_var_mmx): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/variance_impl_sse2.asm b/vp8/common/x86/variance_impl_sse2.asm +index 7629220..761433c 100644 +--- a/vp8/common/x86/variance_impl_sse2.asm ++++ b/vp8/common/x86/variance_impl_sse2.asm +@@ -17,7 +17,7 @@ + ;( + ; short *src_ptr + ;) +-global sym(vp8_get_mb_ss_sse2) ++global sym(vp8_get_mb_ss_sse2) PRIVATE + sym(vp8_get_mb_ss_sse2): + push rbp + mov rbp, rsp +@@ -80,7 +80,7 @@ sym(vp8_get_mb_ss_sse2): + ; unsigned int * SSE, + ; int * Sum + ;) +-global sym(vp8_get16x16var_sse2) ++global sym(vp8_get16x16var_sse2) PRIVATE + sym(vp8_get16x16var_sse2): + push rbp + mov rbp, rsp +@@ -224,7 +224,7 @@ sym(vp8_get16x16var_sse2): + ; unsigned int * SSE, + ; int * Sum + ;) +-global sym(vp8_get8x8var_sse2) ++global sym(vp8_get8x8var_sse2) PRIVATE + sym(vp8_get8x8var_sse2): + push rbp + mov rbp, rsp +@@ -413,7 +413,7 @@ sym(vp8_get8x8var_sse2): + ; unsigned int *sumsquared;; + ; + ;) +-global sym(vp8_filter_block2d_bil_var_sse2) ++global sym(vp8_filter_block2d_bil_var_sse2) PRIVATE + sym(vp8_filter_block2d_bil_var_sse2): + push rbp + mov rbp, rsp +@@ -690,7 +690,7 @@ filter_block2d_bil_variance: + ; int *sum, + ; unsigned int *sumsquared + ;) +-global sym(vp8_half_horiz_vert_variance8x_h_sse2) ++global sym(vp8_half_horiz_vert_variance8x_h_sse2) PRIVATE + sym(vp8_half_horiz_vert_variance8x_h_sse2): + push rbp + mov rbp, rsp +@@ -812,7 +812,7 @@ vp8_half_horiz_vert_variance8x_h_1: + ; int *sum, + ; unsigned int *sumsquared + ;) +-global sym(vp8_half_horiz_vert_variance16x_h_sse2) ++global sym(vp8_half_horiz_vert_variance16x_h_sse2) PRIVATE + sym(vp8_half_horiz_vert_variance16x_h_sse2): + push rbp + mov rbp, rsp +@@ -928,7 +928,7 @@ vp8_half_horiz_vert_variance16x_h_1: + ; int *sum, + ; unsigned int *sumsquared + ;) +-global sym(vp8_half_vert_variance8x_h_sse2) ++global sym(vp8_half_vert_variance8x_h_sse2) PRIVATE + sym(vp8_half_vert_variance8x_h_sse2): + push rbp + mov rbp, rsp +@@ -1035,7 +1035,7 @@ vp8_half_vert_variance8x_h_1: + ; int *sum, + ; unsigned int *sumsquared + ;) +-global sym(vp8_half_vert_variance16x_h_sse2) ++global sym(vp8_half_vert_variance16x_h_sse2) PRIVATE + sym(vp8_half_vert_variance16x_h_sse2): + push rbp + mov rbp, rsp +@@ -1143,7 +1143,7 @@ vp8_half_vert_variance16x_h_1: + ; int *sum, + ; unsigned int *sumsquared + ;) +-global sym(vp8_half_horiz_variance8x_h_sse2) ++global sym(vp8_half_horiz_variance8x_h_sse2) PRIVATE + sym(vp8_half_horiz_variance8x_h_sse2): + push rbp + mov rbp, rsp +@@ -1248,7 +1248,7 @@ vp8_half_horiz_variance8x_h_1: + ; int *sum, + ; unsigned int *sumsquared + ;) +-global sym(vp8_half_horiz_variance16x_h_sse2) ++global sym(vp8_half_horiz_variance16x_h_sse2) PRIVATE + sym(vp8_half_horiz_variance16x_h_sse2): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/variance_impl_ssse3.asm b/vp8/common/x86/variance_impl_ssse3.asm +index 97e8b0e..686b4a9 100644 +--- a/vp8/common/x86/variance_impl_ssse3.asm ++++ b/vp8/common/x86/variance_impl_ssse3.asm +@@ -29,7 +29,7 @@ + ;) + ;Note: The filter coefficient at offset=0 is 128. Since the second register + ;for Pmaddubsw is signed bytes, we must calculate zero offset seperately. +-global sym(vp8_filter_block2d_bil_var_ssse3) ++global sym(vp8_filter_block2d_bil_var_ssse3) PRIVATE + sym(vp8_filter_block2d_bil_var_ssse3): + push rbp + mov rbp, rsp +diff --git a/vp8/common/x86/variance_sse2.c b/vp8/common/x86/variance_sse2.c +index 2769a30..afd6429 100644 +--- a/vp8/common/x86/variance_sse2.c ++++ b/vp8/common/x86/variance_sse2.c +@@ -332,8 +332,9 @@ unsigned int vp8_sub_pixel_variance16x16_wmt + unsigned int xxsum0, xxsum1; + + +- // note we could avoid these if statements if the calling function +- // just called the appropriate functions inside. ++ /* note we could avoid these if statements if the calling function ++ * just called the appropriate functions inside. ++ */ + if (xoffset == 4 && yoffset == 0) + { + vp8_half_horiz_variance16x_h_sse2( +diff --git a/vp8/common/x86/variance_ssse3.c b/vp8/common/x86/variance_ssse3.c +index 1be0d92..ba2055c 100644 +--- a/vp8/common/x86/variance_ssse3.c ++++ b/vp8/common/x86/variance_ssse3.c +@@ -79,8 +79,9 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3 + int xsum0; + unsigned int xxsum0; + +- // note we could avoid these if statements if the calling function +- // just called the appropriate functions inside. ++ /* note we could avoid these if statements if the calling function ++ * just called the appropriate functions inside. ++ */ + if (xoffset == 4 && yoffset == 0) + { + vp8_half_horiz_variance16x_h_sse2( +diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c +index 23a7fdc..3437a23 100644 +--- a/vp8/common/x86/vp8_asm_stubs.c ++++ b/vp8/common/x86/vp8_asm_stubs.c +@@ -438,19 +438,35 @@ void vp8_sixtap_predict16x16_ssse3 + { + if (yoffset) + { +- vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset); +- vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset); ++ vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), ++ src_pixels_per_line, FData2, ++ 16, 21, xoffset); ++ vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, ++ 16, yoffset); + } + else + { + /* First-pass only */ +- vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset); ++ vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, ++ dst_ptr, dst_pitch, 16, xoffset); + } + } + else + { +- /* Second-pass only */ +- vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset); ++ if (yoffset) ++ { ++ /* Second-pass only */ ++ vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line), ++ src_pixels_per_line, ++ dst_ptr, dst_pitch, 16, yoffset); ++ } ++ else ++ { ++ /* ssse3 second-pass only function couldn't handle (xoffset==0 && ++ * yoffset==0) case correctly. Add copy function here to guarantee ++ * six-tap function handles all possible offsets. */ ++ vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); ++ } + } + } + +@@ -470,18 +486,34 @@ void vp8_sixtap_predict8x8_ssse3 + { + if (yoffset) + { +- vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset); +- vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset); ++ vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), ++ src_pixels_per_line, FData2, ++ 8, 13, xoffset); ++ vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, ++ 8, yoffset); + } + else + { +- vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset); ++ vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, ++ dst_ptr, dst_pitch, 8, xoffset); + } + } + else + { +- /* Second-pass only */ +- vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset); ++ if (yoffset) ++ { ++ /* Second-pass only */ ++ vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), ++ src_pixels_per_line, ++ dst_ptr, dst_pitch, 8, yoffset); ++ } ++ else ++ { ++ /* ssse3 second-pass only function couldn't handle (xoffset==0 && ++ * yoffset==0) case correctly. Add copy function here to guarantee ++ * six-tap function handles all possible offsets. */ ++ vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); ++ } + } + } + +@@ -502,19 +534,35 @@ void vp8_sixtap_predict8x4_ssse3 + { + if (yoffset) + { +- vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset); +- vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset); ++ vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), ++ src_pixels_per_line, FData2, ++ 8, 9, xoffset); ++ vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, ++ 4, yoffset); + } + else + { + /* First-pass only */ +- vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset); ++ vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, ++ dst_ptr, dst_pitch, 4, xoffset); + } + } + else + { +- /* Second-pass only */ +- vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset); ++ if (yoffset) ++ { ++ /* Second-pass only */ ++ vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), ++ src_pixels_per_line, ++ dst_ptr, dst_pitch, 4, yoffset); ++ } ++ else ++ { ++ /* ssse3 second-pass only function couldn't handle (xoffset==0 && ++ * yoffset==0) case correctly. Add copy function here to guarantee ++ * six-tap function handles all possible offsets. */ ++ vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); ++ } + } + } + +@@ -534,19 +582,48 @@ void vp8_sixtap_predict4x4_ssse3 + { + if (yoffset) + { +- vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset); +- vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset); ++ vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), ++ src_pixels_per_line, ++ FData2, 4, 9, xoffset); ++ vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, ++ 4, yoffset); + } + else + { +- vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset); ++ vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, ++ dst_ptr, dst_pitch, 4, xoffset); + } + } + else + { +- vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset); ++ if (yoffset) ++ { ++ vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), ++ src_pixels_per_line, ++ dst_ptr, dst_pitch, 4, yoffset); ++ } ++ else ++ { ++ /* ssse3 second-pass only function couldn't handle (xoffset==0 && ++ * yoffset==0) case correctly. Add copy function here to guarantee ++ * six-tap function handles all possible offsets. */ ++ int r; ++ ++ for (r = 0; r < 4; r++) ++ { ++ #if !(CONFIG_FAST_UNALIGNED) ++ dst_ptr[0] = src_ptr[0]; ++ dst_ptr[1] = src_ptr[1]; ++ dst_ptr[2] = src_ptr[2]; ++ dst_ptr[3] = src_ptr[3]; ++ #else ++ *(uint32_t *)dst_ptr = *(uint32_t *)src_ptr ; ++ #endif ++ dst_ptr += dst_pitch; ++ src_ptr += src_pixels_per_line; ++ } ++ } + } +- + } + + #endif +diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h +index 880c185..1a08c05 100644 +--- a/vp8/decoder/dboolhuff.h ++++ b/vp8/decoder/dboolhuff.h +@@ -55,7 +55,7 @@ void vp8dx_bool_decoder_fill(BOOL_DECODER *br); + int loop_end, x; \ + size_t bits_left = ((_bufend)-(_bufptr))*CHAR_BIT; \ + \ +- x = shift + CHAR_BIT - bits_left; \ ++ x = (int)(shift + CHAR_BIT - bits_left); \ + loop_end = 0; \ + if(x >= 0) \ + { \ +diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c +index 51e2420..8027a07 100644 +--- a/vp8/decoder/decodemv.c ++++ b/vp8/decoder/decodemv.c +@@ -48,11 +48,11 @@ static MB_PREDICTION_MODE read_uv_mode(vp8_reader *bc, const vp8_prob *p) + + static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi) + { +- vp8_reader *const bc = & pbi->bc; ++ vp8_reader *const bc = & pbi->mbc[8]; + const int mis = pbi->common.mode_info_stride; + + mi->mbmi.ref_frame = INTRA_FRAME; +- mi->mbmi.mode = read_kf_ymode(bc, pbi->common.kf_ymode_prob); ++ mi->mbmi.mode = read_kf_ymode(bc, vp8_kf_ymode_prob); + + if (mi->mbmi.mode == B_PRED) + { +@@ -65,12 +65,12 @@ static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi) + const B_PREDICTION_MODE L = left_block_mode(mi, i); + + mi->bmi[i].as_mode = +- read_bmode(bc, pbi->common.kf_bmode_prob [A] [L]); ++ read_bmode(bc, vp8_kf_bmode_prob [A] [L]); + } + while (++i < 16); + } + +- mi->mbmi.uv_mode = read_uv_mode(bc, pbi->common.kf_uv_mode_prob); ++ mi->mbmi.uv_mode = read_uv_mode(bc, vp8_kf_uv_mode_prob); + } + + static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc) +@@ -150,7 +150,7 @@ static const unsigned char mbsplit_fill_offset[4][16] = { + + static void mb_mode_mv_init(VP8D_COMP *pbi) + { +- vp8_reader *const bc = & pbi->bc; ++ vp8_reader *const bc = & pbi->mbc[8]; + MV_CONTEXT *const mvc = pbi->common.fc.mvc; + + #if CONFIG_ERROR_CONCEALMENT +@@ -159,6 +159,9 @@ static void mb_mode_mv_init(VP8D_COMP *pbi) + * outside the frame. */ + pbi->mvs_corrupt_from_mb = UINT_MAX; + #endif ++ /* Read the mb_no_coeff_skip flag */ ++ pbi->common.mb_no_coeff_skip = (int)vp8_read_bit(bc); ++ + pbi->prob_skip_false = 0; + if (pbi->common.mb_no_coeff_skip) + pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8); +@@ -293,26 +296,24 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi, + blockmv.as_mv.row += best_mv.as_mv.row; + blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) << 1; + blockmv.as_mv.col += best_mv.as_mv.col; +- +- mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv, +- mb_to_left_edge, +- mb_to_right_edge, +- mb_to_top_edge, +- mb_to_bottom_edge); + } + } + else + { + blockmv.as_int = abovemv.as_int; +- mbmi->need_to_clamp_mvs |= above_mb->mbmi.need_to_clamp_mvs; + } + } + else + { + blockmv.as_int = leftmv.as_int; +- mbmi->need_to_clamp_mvs |= left_mb->mbmi.need_to_clamp_mvs; + } + ++ mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv, ++ mb_to_left_edge, ++ mb_to_right_edge, ++ mb_to_top_edge, ++ mb_to_bottom_edge); ++ + { + /* Fill (uniform) modes, mvs of jth subset. + Must do it here because ensuing subsets can +@@ -337,7 +338,7 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi, + + static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi) + { +- vp8_reader *const bc = & pbi->bc; ++ vp8_reader *const bc = & pbi->mbc[8]; + mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra); + if (mbmi->ref_frame) /* inter MB */ + { +@@ -595,14 +596,14 @@ static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi, + * By default on a key frame reset all MBs to segment 0 + */ + if (pbi->mb.update_mb_segmentation_map) +- read_mb_features(&pbi->bc, &mi->mbmi, &pbi->mb); ++ read_mb_features(&pbi->mbc[8], &mi->mbmi, &pbi->mb); + else if(pbi->common.frame_type == KEY_FRAME) + mi->mbmi.segment_id = 0; + + /* Read the macroblock coeff skip flag if this feature is in use, + * else default to 0 */ + if (pbi->common.mb_no_coeff_skip) +- mi->mbmi.mb_skip_coeff = vp8_read(&pbi->bc, pbi->prob_skip_false); ++ mi->mbmi.mb_skip_coeff = vp8_read(&pbi->mbc[8], pbi->prob_skip_false); + else + mi->mbmi.mb_skip_coeff = 0; + +@@ -644,7 +645,8 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi) + #if CONFIG_ERROR_CONCEALMENT + /* look for corruption. set mvs_corrupt_from_mb to the current + * mb_num if the frame is corrupt from this macroblock. */ +- if (vp8dx_bool_error(&pbi->bc) && mb_num < pbi->mvs_corrupt_from_mb) ++ if (vp8dx_bool_error(&pbi->mbc[8]) && mb_num < ++ (int)pbi->mvs_corrupt_from_mb) + { + pbi->mvs_corrupt_from_mb = mb_num; + /* no need to continue since the partition is corrupt from +diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c +index 62a068b..a4a00f6 100644 +--- a/vp8/decoder/decodframe.c ++++ b/vp8/decoder/decodframe.c +@@ -177,7 +177,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, + { + short *DQC = xd->dequant_y1; + int dst_stride = xd->dst.y_stride; +- unsigned char *base_dst = xd->dst.y_buffer; + + /* clear out residual eob info */ + if(xd->mode_info_context->mbmi.mb_skip_coeff) +@@ -188,38 +187,29 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, + for (i = 0; i < 16; i++) + { + BLOCKD *b = &xd->block[i]; +- int b_mode = xd->mode_info_context->bmi[i].as_mode; +- unsigned char *yabove; +- unsigned char *yleft; +- int left_stride; +- unsigned char top_left; +- +- yabove = base_dst + b->offset - dst_stride; +- yleft = base_dst + b->offset - 1; +- left_stride = dst_stride; +- top_left = yabove[-1]; +- +- // vp8_intra4x4_predict (base_dst + b->offset, dst_stride, b_mode, +- // base_dst + b->offset, dst_stride ); +- vp8_intra4x4_predict_d_c(yabove, yleft, left_stride, +- b_mode, +- base_dst + b->offset, dst_stride, +- top_left); ++ unsigned char *dst = xd->dst.y_buffer + b->offset; ++ B_PREDICTION_MODE b_mode = ++ xd->mode_info_context->bmi[i].as_mode; ++ unsigned char *Above = dst - dst_stride; ++ unsigned char *yleft = dst - 1; ++ int left_stride = dst_stride; ++ unsigned char top_left = Above[-1]; ++ ++ vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, ++ dst, dst_stride, top_left); + + if (xd->eobs[i]) + { + if (xd->eobs[i] > 1) + { +- vp8_dequant_idct_add +- (b->qcoeff, DQC, +- base_dst + b->offset, dst_stride); ++ vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); + } + else + { + vp8_dc_only_idct_add + (b->qcoeff[0] * DQC[0], +- base_dst + b->offset, dst_stride, +- base_dst + b->offset, dst_stride); ++ dst, dst_stride, ++ dst, dst_stride); + ((int *)b->qcoeff)[0] = 0; + } + } +@@ -317,48 +307,253 @@ static int get_delta_q(vp8_reader *bc, int prev, int *q_update) + FILE *vpxlog = 0; + #endif + ++static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf) ++{ ++ int i; ++ unsigned char *src_ptr1; ++ unsigned char *dest_ptr1; ++ ++ unsigned int Border; ++ int plane_stride; ++ ++ /***********/ ++ /* Y Plane */ ++ /***********/ ++ Border = ybf->border; ++ plane_stride = ybf->y_stride; ++ src_ptr1 = ybf->y_buffer - Border; ++ dest_ptr1 = src_ptr1 - (Border * plane_stride); ++ ++ for (i = 0; i < (int)Border; i++) ++ { ++ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); ++ dest_ptr1 += plane_stride; ++ } ++ ++ ++ /***********/ ++ /* U Plane */ ++ /***********/ ++ plane_stride = ybf->uv_stride; ++ Border /= 2; ++ src_ptr1 = ybf->u_buffer - Border; ++ dest_ptr1 = src_ptr1 - (Border * plane_stride); ++ ++ for (i = 0; i < (int)(Border); i++) ++ { ++ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); ++ dest_ptr1 += plane_stride; ++ } ++ ++ /***********/ ++ /* V Plane */ ++ /***********/ ++ ++ src_ptr1 = ybf->v_buffer - Border; ++ dest_ptr1 = src_ptr1 - (Border * plane_stride); ++ ++ for (i = 0; i < (int)(Border); i++) ++ { ++ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); ++ dest_ptr1 += plane_stride; ++ } ++} ++ ++static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf) ++{ ++ int i; ++ unsigned char *src_ptr1, *src_ptr2; ++ unsigned char *dest_ptr2; ++ ++ unsigned int Border; ++ int plane_stride; ++ int plane_height; ++ ++ /***********/ ++ /* Y Plane */ ++ /***********/ ++ Border = ybf->border; ++ plane_stride = ybf->y_stride; ++ plane_height = ybf->y_height; ++ ++ src_ptr1 = ybf->y_buffer - Border; ++ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; ++ dest_ptr2 = src_ptr2 + plane_stride; ++ ++ for (i = 0; i < (int)Border; i++) ++ { ++ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); ++ dest_ptr2 += plane_stride; ++ } ++ ++ ++ /***********/ ++ /* U Plane */ ++ /***********/ ++ plane_stride = ybf->uv_stride; ++ plane_height = ybf->uv_height; ++ Border /= 2; ++ ++ src_ptr1 = ybf->u_buffer - Border; ++ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; ++ dest_ptr2 = src_ptr2 + plane_stride; ++ ++ for (i = 0; i < (int)(Border); i++) ++ { ++ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); ++ dest_ptr2 += plane_stride; ++ } ++ ++ /***********/ ++ /* V Plane */ ++ /***********/ ++ ++ src_ptr1 = ybf->v_buffer - Border; ++ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; ++ dest_ptr2 = src_ptr2 + plane_stride; ++ ++ for (i = 0; i < (int)(Border); i++) ++ { ++ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); ++ dest_ptr2 += plane_stride; ++ } ++} ++ ++static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf, ++ unsigned char *y_src, ++ unsigned char *u_src, ++ unsigned char *v_src) ++{ ++ int i; ++ unsigned char *src_ptr1, *src_ptr2; ++ unsigned char *dest_ptr1, *dest_ptr2; ++ ++ unsigned int Border; ++ int plane_stride; ++ int plane_height; ++ int plane_width; ++ ++ /***********/ ++ /* Y Plane */ ++ /***********/ ++ Border = ybf->border; ++ plane_stride = ybf->y_stride; ++ plane_height = 16; ++ plane_width = ybf->y_width; ++ ++ /* copy the left and right most columns out */ ++ src_ptr1 = y_src; ++ src_ptr2 = src_ptr1 + plane_width - 1; ++ dest_ptr1 = src_ptr1 - Border; ++ dest_ptr2 = src_ptr2 + 1; ++ ++ for (i = 0; i < plane_height; i++) ++ { ++ vpx_memset(dest_ptr1, src_ptr1[0], Border); ++ vpx_memset(dest_ptr2, src_ptr2[0], Border); ++ src_ptr1 += plane_stride; ++ src_ptr2 += plane_stride; ++ dest_ptr1 += plane_stride; ++ dest_ptr2 += plane_stride; ++ } ++ ++ /***********/ ++ /* U Plane */ ++ /***********/ ++ plane_stride = ybf->uv_stride; ++ plane_height = 8; ++ plane_width = ybf->uv_width; ++ Border /= 2; ++ ++ /* copy the left and right most columns out */ ++ src_ptr1 = u_src; ++ src_ptr2 = src_ptr1 + plane_width - 1; ++ dest_ptr1 = src_ptr1 - Border; ++ dest_ptr2 = src_ptr2 + 1; ++ ++ for (i = 0; i < plane_height; i++) ++ { ++ vpx_memset(dest_ptr1, src_ptr1[0], Border); ++ vpx_memset(dest_ptr2, src_ptr2[0], Border); ++ src_ptr1 += plane_stride; ++ src_ptr2 += plane_stride; ++ dest_ptr1 += plane_stride; ++ dest_ptr2 += plane_stride; ++ } ++ ++ /***********/ ++ /* V Plane */ ++ /***********/ ++ ++ /* copy the left and right most columns out */ ++ src_ptr1 = v_src; ++ src_ptr2 = src_ptr1 + plane_width - 1; ++ dest_ptr1 = src_ptr1 - Border; ++ dest_ptr2 = src_ptr2 + 1; ++ ++ for (i = 0; i < plane_height; i++) ++ { ++ vpx_memset(dest_ptr1, src_ptr1[0], Border); ++ vpx_memset(dest_ptr2, src_ptr2[0], Border); ++ src_ptr1 += plane_stride; ++ src_ptr2 += plane_stride; ++ dest_ptr1 += plane_stride; ++ dest_ptr2 += plane_stride; ++ } ++} ++ + static void decode_mb_rows(VP8D_COMP *pbi) + { + VP8_COMMON *const pc = & pbi->common; + MACROBLOCKD *const xd = & pbi->mb; + ++ MODE_INFO *lf_mic = xd->mode_info_context; ++ + int ibc = 0; + int num_part = 1 << pc->multi_token_partition; + + int recon_yoffset, recon_uvoffset; + int mb_row, mb_col; + int mb_idx = 0; +- int dst_fb_idx = pc->new_fb_idx; +- int recon_y_stride = pc->yv12_fb[dst_fb_idx].y_stride; +- int recon_uv_stride = pc->yv12_fb[dst_fb_idx].uv_stride; ++ ++ YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; ++ ++ int recon_y_stride = yv12_fb_new->y_stride; ++ int recon_uv_stride = yv12_fb_new->uv_stride; + + unsigned char *ref_buffer[MAX_REF_FRAMES][3]; + unsigned char *dst_buffer[3]; ++ unsigned char *lf_dst[3]; ++ unsigned char *eb_dst[3]; + int i; +- int ref_fb_index[MAX_REF_FRAMES]; + int ref_fb_corrupted[MAX_REF_FRAMES]; + + ref_fb_corrupted[INTRA_FRAME] = 0; + +- ref_fb_index[LAST_FRAME] = pc->lst_fb_idx; +- ref_fb_index[GOLDEN_FRAME] = pc->gld_fb_idx; +- ref_fb_index[ALTREF_FRAME] = pc->alt_fb_idx; +- + for(i = 1; i < MAX_REF_FRAMES; i++) + { +- ref_buffer[i][0] = pc->yv12_fb[ref_fb_index[i]].y_buffer; +- ref_buffer[i][1] = pc->yv12_fb[ref_fb_index[i]].u_buffer; +- ref_buffer[i][2] = pc->yv12_fb[ref_fb_index[i]].v_buffer; ++ YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; ++ ++ ref_buffer[i][0] = this_fb->y_buffer; ++ ref_buffer[i][1] = this_fb->u_buffer; ++ ref_buffer[i][2] = this_fb->v_buffer; + +- ref_fb_corrupted[i] = pc->yv12_fb[ref_fb_index[i]].corrupted; ++ ref_fb_corrupted[i] = this_fb->corrupted; + } + +- dst_buffer[0] = pc->yv12_fb[dst_fb_idx].y_buffer; +- dst_buffer[1] = pc->yv12_fb[dst_fb_idx].u_buffer; +- dst_buffer[2] = pc->yv12_fb[dst_fb_idx].v_buffer; ++ /* Set up the buffer pointers */ ++ eb_dst[0] = lf_dst[0] = dst_buffer[0] = yv12_fb_new->y_buffer; ++ eb_dst[1] = lf_dst[1] = dst_buffer[1] = yv12_fb_new->u_buffer; ++ eb_dst[2] = lf_dst[2] = dst_buffer[2] = yv12_fb_new->v_buffer; + + xd->up_available = 0; + ++ /* Initialize the loop filter for this frame. */ ++ if(pc->filter_level) ++ vp8_loop_filter_frame_init(pc, xd, pc->filter_level); ++ ++ vp8_setup_intra_recon_top_line(yv12_fb_new); ++ + /* Decode the individual macro block */ + for (mb_row = 0; mb_row < pc->mb_rows; mb_row++) + { +@@ -395,10 +590,14 @@ static void decode_mb_rows(VP8D_COMP *pbi) + xd->recon_above[1] -= xd->dst.uv_stride; + xd->recon_above[2] -= xd->dst.uv_stride; + +- //TODO: move to outside row loop ++ /* TODO: move to outside row loop */ + xd->recon_left_stride[0] = xd->dst.y_stride; + xd->recon_left_stride[1] = xd->dst.uv_stride; + ++ setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], ++ xd->recon_left[2], xd->dst.y_stride, ++ xd->dst.uv_stride); ++ + for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) + { + /* Distance of Mb to the various image edges. +@@ -460,26 +659,103 @@ static void decode_mb_rows(VP8D_COMP *pbi) + xd->recon_left[1] += 8; + xd->recon_left[2] += 8; + +- + recon_yoffset += 16; + recon_uvoffset += 8; + + ++xd->mode_info_context; /* next mb */ + + xd->above_context++; +- + } + + /* adjust to the next row of mbs */ +- vp8_extend_mb_row( +- &pc->yv12_fb[dst_fb_idx], +- xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8 +- ); ++ vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, ++ xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); + + ++xd->mode_info_context; /* skip prediction column */ + xd->up_available = 1; + ++ if(pc->filter_level) ++ { ++ if(mb_row > 0) ++ { ++ if (pc->filter_type == NORMAL_LOOPFILTER) ++ vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, ++ recon_y_stride, recon_uv_stride, ++ lf_dst[0], lf_dst[1], lf_dst[2]); ++ else ++ vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, ++ recon_y_stride, recon_uv_stride, ++ lf_dst[0], lf_dst[1], lf_dst[2]); ++ ++ if(mb_row > 1) ++ { ++ yv12_extend_frame_left_right_c(yv12_fb_new, ++ eb_dst[0], ++ eb_dst[1], ++ eb_dst[2]); ++ ++ eb_dst[0] += recon_y_stride * 16; ++ eb_dst[1] += recon_uv_stride * 8; ++ eb_dst[2] += recon_uv_stride * 8; ++ ++ if(mb_row == 2) ++ yv12_extend_frame_top_c(yv12_fb_new); ++ ++ } ++ ++ lf_dst[0] += recon_y_stride * 16; ++ lf_dst[1] += recon_uv_stride * 8; ++ lf_dst[2] += recon_uv_stride * 8; ++ lf_mic += pc->mb_cols; ++ lf_mic++; /* Skip border mb */ ++ } ++ } ++ else ++ { ++ if(mb_row > 0) ++ { ++ /**/ ++ yv12_extend_frame_left_right_c(yv12_fb_new, ++ eb_dst[0], ++ eb_dst[1], ++ eb_dst[2]); ++ ++ eb_dst[0] += recon_y_stride * 16; ++ eb_dst[1] += recon_uv_stride * 8; ++ eb_dst[2] += recon_uv_stride * 8; ++ ++ if(mb_row == 1) ++ yv12_extend_frame_top_c(yv12_fb_new); ++ } ++ } ++ } ++ ++ if(pc->filter_level) ++ { ++ if (pc->filter_type == NORMAL_LOOPFILTER) ++ vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, recon_y_stride, ++ recon_uv_stride, lf_dst[0], lf_dst[1], ++ lf_dst[2]); ++ else ++ vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, recon_y_stride, ++ recon_uv_stride, lf_dst[0], lf_dst[1], ++ lf_dst[2]); ++ ++ yv12_extend_frame_left_right_c(yv12_fb_new, ++ eb_dst[0], ++ eb_dst[1], ++ eb_dst[2]); ++ eb_dst[0] += recon_y_stride * 16; ++ eb_dst[1] += recon_uv_stride * 8; ++ eb_dst[2] += recon_uv_stride * 8; + } ++ yv12_extend_frame_left_right_c(yv12_fb_new, ++ eb_dst[0], ++ eb_dst[1], ++ eb_dst[2]); ++ ++ yv12_extend_frame_bottom_c(yv12_fb_new); ++ + } + + static unsigned int read_partition_size(const unsigned char *cx_size) +@@ -519,13 +795,13 @@ static unsigned int read_available_partition_size( + if (read_is_valid(partition_size_ptr, 3, first_fragment_end)) + partition_size = read_partition_size(partition_size_ptr); + else if (pbi->ec_active) +- partition_size = bytes_left; ++ partition_size = (unsigned int)bytes_left; + else + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Truncated partition size data"); + } + else +- partition_size = bytes_left; ++ partition_size = (unsigned int)bytes_left; + + /* Validate the calculated partition length. If the buffer + * described by the partition can't be fully read, then restrict +@@ -534,7 +810,7 @@ static unsigned int read_available_partition_size( + if (!read_is_valid(fragment_start, partition_size, fragment_end)) + { + if (pbi->ec_active) +- partition_size = bytes_left; ++ partition_size = (unsigned int)bytes_left; + else + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt partition " +@@ -547,24 +823,18 @@ static unsigned int read_available_partition_size( + static void setup_token_decoder(VP8D_COMP *pbi, + const unsigned char* token_part_sizes) + { +- vp8_reader *bool_decoder = &pbi->bc2; ++ vp8_reader *bool_decoder = &pbi->mbc[0]; + unsigned int partition_idx; +- int fragment_idx; +- int num_token_partitions; ++ unsigned int fragment_idx; ++ unsigned int num_token_partitions; + const unsigned char *first_fragment_end = pbi->fragments[0] + + pbi->fragment_sizes[0]; + + TOKEN_PARTITION multi_token_partition = +- (TOKEN_PARTITION)vp8_read_literal(&pbi->bc, 2); +- if (!vp8dx_bool_error(&pbi->bc)) ++ (TOKEN_PARTITION)vp8_read_literal(&pbi->mbc[8], 2); ++ if (!vp8dx_bool_error(&pbi->mbc[8])) + pbi->common.multi_token_partition = multi_token_partition; + num_token_partitions = 1 << pbi->common.multi_token_partition; +- if (num_token_partitions > 1) +- { +- CHECK_MEM_ERROR(pbi->mbc, vpx_malloc(num_token_partitions * +- sizeof(vp8_reader))); +- bool_decoder = pbi->mbc; +- } + + /* Check for partitions within the fragments and unpack the fragments + * so that each fragment pointer points to its corresponding partition. */ +@@ -580,10 +850,10 @@ static void setup_token_decoder(VP8D_COMP *pbi, + /* Size of first partition + token partition sizes element */ + ptrdiff_t ext_first_part_size = token_part_sizes - + pbi->fragments[0] + 3 * (num_token_partitions - 1); +- fragment_size -= ext_first_part_size; ++ fragment_size -= (unsigned int)ext_first_part_size; + if (fragment_size > 0) + { +- pbi->fragment_sizes[0] = ext_first_part_size; ++ pbi->fragment_sizes[0] = (unsigned int)ext_first_part_size; + /* The fragment contains an additional partition. Move to + * next. */ + fragment_idx++; +@@ -602,8 +872,8 @@ static void setup_token_decoder(VP8D_COMP *pbi, + fragment_end, + fragment_idx - 1, + num_token_partitions); +- pbi->fragment_sizes[fragment_idx] = partition_size; +- fragment_size -= partition_size; ++ pbi->fragment_sizes[fragment_idx] = (unsigned int)partition_size; ++ fragment_size -= (unsigned int)partition_size; + assert(fragment_idx <= num_token_partitions); + if (fragment_size > 0) + { +@@ -637,16 +907,6 @@ static void setup_token_decoder(VP8D_COMP *pbi, + #endif + } + +-static void stop_token_decoder(VP8D_COMP *pbi) +-{ +- VP8_COMMON *pc = &pbi->common; +- +- if (pc->multi_token_partition != ONE_PARTITION) +- { +- vpx_free(pbi->mbc); +- pbi->mbc = NULL; +- } +-} + + static void init_frame(VP8D_COMP *pbi) + { +@@ -661,7 +921,6 @@ static void init_frame(VP8D_COMP *pbi) + vp8_init_mbmode_probs(pc); + + vp8_default_coef_probs(pc); +- vp8_kf_default_bmode_probs(pc->kf_bmode_prob); + + /* reset the segment feature data to 0 with delta coding (Default state). */ + vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); +@@ -685,13 +944,8 @@ static void init_frame(VP8D_COMP *pbi) + } + else + { +- if (!pc->use_bilinear_mc_filter) +- pc->mcomp_filter_type = SIXTAP; +- else +- pc->mcomp_filter_type = BILINEAR; +- + /* To enable choice of different interploation filters */ +- if (pc->mcomp_filter_type == SIXTAP) ++ if (!pc->use_bilinear_mc_filter) + { + xd->subpixel_predict = vp8_sixtap_predict4x4; + xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; +@@ -725,7 +979,7 @@ static void init_frame(VP8D_COMP *pbi) + + int vp8_decode_frame(VP8D_COMP *pbi) + { +- vp8_reader *const bc = & pbi->bc; ++ vp8_reader *const bc = & pbi->mbc[8]; + VP8_COMMON *const pc = & pbi->common; + MACROBLOCKD *const xd = & pbi->mb; + const unsigned char *data = pbi->fragments[0]; +@@ -737,9 +991,11 @@ int vp8_decode_frame(VP8D_COMP *pbi) + int corrupt_tokens = 0; + int prev_independent_partitions = pbi->independent_partitions; + ++ YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; ++ + /* start with no corruption of current frame */ + xd->corrupted = 0; +- pc->yv12_fb[pc->new_fb_idx].corrupted = 0; ++ yv12_fb_new->corrupted = 0; + + if (data_end - data < 3) + { +@@ -774,11 +1030,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) + + vp8_setup_version(pc); + ++ + if (pc->frame_type == KEY_FRAME) + { +- const int Width = pc->Width; +- const int Height = pc->Height; +- + /* vet via sync code */ + /* When error concealment is enabled we should only check the sync + * code if we have enough bits available +@@ -803,56 +1057,21 @@ int vp8_decode_frame(VP8D_COMP *pbi) + } + data += 7; + +- if (Width != pc->Width || Height != pc->Height) +- { +- int prev_mb_rows = pc->mb_rows; +- +- if (pc->Width <= 0) +- { +- pc->Width = Width; +- vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, +- "Invalid frame width"); +- } +- +- if (pc->Height <= 0) +- { +- pc->Height = Height; +- vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, +- "Invalid frame height"); +- } +- +- if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height)) +- vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, +- "Failed to allocate frame buffers"); +- +-#if CONFIG_ERROR_CONCEALMENT +- pbi->overlaps = NULL; +- if (pbi->ec_enabled) +- { +- if (vp8_alloc_overlap_lists(pbi)) +- vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, +- "Failed to allocate overlap lists " +- "for error concealment"); +- } +-#endif +- +-#if CONFIG_MULTITHREAD +- if (pbi->b_multithreaded_rd) +- vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows); +-#endif +- } ++ } ++ else ++ { ++ vpx_memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); ++ vpx_memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); + } + } +- +- if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME) || +- pc->Width == 0 || pc->Height == 0) ++ if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME)) + { + return -1; + } + + init_frame(pbi); + +- if (vp8dx_start_decode(bc, data, data_end - data)) ++ if (vp8dx_start_decode(bc, data, (unsigned int)(data_end - data))) + vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate bool decoder 0"); + if (pc->frame_type == KEY_FRAME) { +@@ -961,7 +1180,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) + + setup_token_decoder(pbi, data + first_partition_length_in_bytes); + +- xd->current_bc = &pbi->bc2; ++ xd->current_bc = &pbi->mbc[0]; + + /* Read the default quantizers. */ + { +@@ -1094,26 +1313,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) + } + } + +- vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG)); +- vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG)); +- +- /* set up frame new frame for intra coded blocks */ +-#if CONFIG_MULTITHREAD +- if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level)) +-#endif +- vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]); +- +- vp8_setup_block_dptrs(xd); +- +- vp8_build_block_doffsets(xd); +- + /* clear out the coeff buffer */ + vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); + +- /* Read the mb_no_coeff_skip flag */ +- pc->mb_no_coeff_skip = (int)vp8_read_bit(bc); +- +- + vp8_decode_mode_mvs(pbi); + + #if CONFIG_ERROR_CONCEALMENT +@@ -1132,9 +1334,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) + #if CONFIG_MULTITHREAD + if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) + { +- int i; ++ unsigned int i; + vp8mt_decode_mb_rows(pbi, xd); +- vp8_yv12_extend_frame_borders(&pc->yv12_fb[pc->new_fb_idx]); /*cm->frame_to_show);*/ ++ vp8_yv12_extend_frame_borders(yv12_fb_new); + for (i = 0; i < pbi->decoding_thread_count; ++i) + corrupt_tokens |= pbi->mb_row_di[i].mbd.corrupted; + } +@@ -1145,18 +1347,16 @@ int vp8_decode_frame(VP8D_COMP *pbi) + corrupt_tokens |= xd->corrupted; + } + +- stop_token_decoder(pbi); +- + /* Collect information about decoder corruption. */ + /* 1. Check first boolean decoder for errors. */ +- pc->yv12_fb[pc->new_fb_idx].corrupted = vp8dx_bool_error(bc); ++ yv12_fb_new->corrupted = vp8dx_bool_error(bc); + /* 2. Check the macroblock information */ +- pc->yv12_fb[pc->new_fb_idx].corrupted |= corrupt_tokens; ++ yv12_fb_new->corrupted |= corrupt_tokens; + + if (!pbi->decoded_key_frame) + { + if (pc->frame_type == KEY_FRAME && +- !pc->yv12_fb[pc->new_fb_idx].corrupted) ++ !yv12_fb_new->corrupted) + pbi->decoded_key_frame = 1; + else + vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, +@@ -1165,13 +1365,6 @@ int vp8_decode_frame(VP8D_COMP *pbi) + + /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); */ + +- /* If this was a kf or Gf note the Q used */ +- if ((pc->frame_type == KEY_FRAME) || +- pc->refresh_golden_frame || pc->refresh_alt_ref_frame) +- { +- pc->last_kf_gf_q = pc->base_qindex; +- } +- + if (pc->refresh_entropy_probs == 0) + { + vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc)); +diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c +index 0c39848..452ff6c 100644 +--- a/vp8/decoder/detokenize.c ++++ b/vp8/decoder/detokenize.c +@@ -53,7 +53,8 @@ static const uint8_t kZigzag[16] = { + #define NUM_PROBAS 11 + #define NUM_CTX 3 + +-typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting ++/* for const-casting */ ++typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; + + static int GetSigned(BOOL_DECODER *br, int value_to_sign) + { +diff --git a/vp8/decoder/error_concealment.c b/vp8/decoder/error_concealment.c +index 7750728..8b2e32b 100644 +--- a/vp8/decoder/error_concealment.c ++++ b/vp8/decoder/error_concealment.c +@@ -51,12 +51,13 @@ int vp8_alloc_overlap_lists(VP8D_COMP *pbi) + vpx_free(pbi->overlaps); + pbi->overlaps = NULL; + } ++ + pbi->overlaps = vpx_calloc(pbi->common.mb_rows * pbi->common.mb_cols, + sizeof(MB_OVERLAP)); ++ + if (pbi->overlaps == NULL) + return -1; +- vpx_memset(pbi->overlaps, 0, +- sizeof(MB_OVERLAP) * pbi->common.mb_rows * pbi->common.mb_cols); ++ + return 0; + } + +diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c +index c59ce25..8d6871b 100644 +--- a/vp8/decoder/onyxd_if.c ++++ b/vp8/decoder/onyxd_if.c +@@ -80,6 +80,7 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf) + + #if CONFIG_ERROR_CONCEALMENT + pbi->ec_enabled = oxcf->error_concealment; ++ pbi->overlaps = NULL; + #else + pbi->ec_enabled = 0; + #endif +@@ -99,6 +100,8 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf) + */ + pbi->independent_partitions = 0; + ++ vp8_setup_block_dptrs(&pbi->mb); ++ + return pbi; + } + +@@ -117,21 +120,20 @@ void vp8dx_remove_decompressor(VP8D_COMP *pbi) + vp8_de_alloc_overlap_lists(pbi); + #endif + vp8_remove_common(&pbi->common); +- vpx_free(pbi->mbc); + vpx_free(pbi); + } + + +-vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) ++vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) + { + VP8_COMMON *cm = &pbi->common; + int ref_fb_idx; + +- if (ref_frame_flag == VP8_LAST_FLAG) ++ if (ref_frame_flag == VP8_LAST_FRAME) + ref_fb_idx = cm->lst_fb_idx; +- else if (ref_frame_flag == VP8_GOLD_FLAG) ++ else if (ref_frame_flag == VP8_GOLD_FRAME) + ref_fb_idx = cm->gld_fb_idx; +- else if (ref_frame_flag == VP8_ALT_FLAG) ++ else if (ref_frame_flag == VP8_ALTR_FRAME) + ref_fb_idx = cm->alt_fb_idx; + else{ + vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, +@@ -153,17 +155,17 @@ vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, + } + + +-vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) ++vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) + { + VP8_COMMON *cm = &pbi->common; + int *ref_fb_ptr = NULL; + int free_fb; + +- if (ref_frame_flag == VP8_LAST_FLAG) ++ if (ref_frame_flag == VP8_LAST_FRAME) + ref_fb_ptr = &cm->lst_fb_idx; +- else if (ref_frame_flag == VP8_GOLD_FLAG) ++ else if (ref_frame_flag == VP8_GOLD_FRAME) + ref_fb_ptr = &cm->gld_fb_idx; +- else if (ref_frame_flag == VP8_ALT_FLAG) ++ else if (ref_frame_flag == VP8_ALTR_FRAME) + ref_fb_ptr = &cm->alt_fb_idx; + else{ + vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, +@@ -279,28 +281,22 @@ static int swap_frame_buffers (VP8_COMMON *cm) + return err; + } + +-int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsigned char *source, int64_t time_stamp) ++int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, ++ const uint8_t *source, ++ int64_t time_stamp) + { + #if HAVE_NEON + int64_t dx_store_reg[8]; + #endif + VP8_COMMON *cm = &pbi->common; +- int retcode = 0; +- +- /*if(pbi->ready_for_new_data == 0) +- return -1;*/ +- +- if (pbi == 0) +- { +- return -1; +- } ++ int retcode = -1; + + pbi->common.error.error_code = VPX_CODEC_OK; + + if (pbi->num_fragments == 0) + { + /* New frame, reset fragment pointers and sizes */ +- vpx_memset(pbi->fragments, 0, sizeof(pbi->fragments)); ++ vpx_memset((void*)pbi->fragments, 0, sizeof(pbi->fragments)); + vpx_memset(pbi->fragment_sizes, 0, sizeof(pbi->fragment_sizes)); + } + if (pbi->input_fragments && !(source == NULL && size == 0)) +@@ -381,20 +377,14 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi + + cm->new_fb_idx = get_free_fb (cm); + ++ /* setup reference frames for vp8_decode_frame */ ++ pbi->dec_fb_ref[INTRA_FRAME] = &cm->yv12_fb[cm->new_fb_idx]; ++ pbi->dec_fb_ref[LAST_FRAME] = &cm->yv12_fb[cm->lst_fb_idx]; ++ pbi->dec_fb_ref[GOLDEN_FRAME] = &cm->yv12_fb[cm->gld_fb_idx]; ++ pbi->dec_fb_ref[ALTREF_FRAME] = &cm->yv12_fb[cm->alt_fb_idx]; ++ + if (setjmp(pbi->common.error.jmp)) + { +-#if HAVE_NEON +-#if CONFIG_RUNTIME_CPU_DETECT +- if (cm->cpu_caps & HAS_NEON) +-#endif +- { +- vp8_pop_neon(dx_store_reg); +- } +-#endif +- pbi->common.error.setjmp = 0; +- +- pbi->num_fragments = 0; +- + /* We do not know if the missing frame(s) was supposed to update + * any of the reference buffers, but we act conservative and + * mark only the last buffer as corrupted. +@@ -403,7 +393,8 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi + + if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) + cm->fb_idx_ref_cnt[cm->new_fb_idx]--; +- return -1; ++ ++ goto decode_exit; + } + + pbi->common.error.setjmp = 1; +@@ -412,68 +403,19 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi + + if (retcode < 0) + { +-#if HAVE_NEON +-#if CONFIG_RUNTIME_CPU_DETECT +- if (cm->cpu_caps & HAS_NEON) +-#endif +- { +- vp8_pop_neon(dx_store_reg); +- } +-#endif +- pbi->common.error.error_code = VPX_CODEC_ERROR; +- pbi->common.error.setjmp = 0; +- pbi->num_fragments = 0; + if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) + cm->fb_idx_ref_cnt[cm->new_fb_idx]--; +- return retcode; ++ ++ pbi->common.error.error_code = VPX_CODEC_ERROR; ++ goto decode_exit; + } + +-#if CONFIG_MULTITHREAD +- if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION) +- { +- if (swap_frame_buffers (cm)) +- { +-#if HAVE_NEON +-#if CONFIG_RUNTIME_CPU_DETECT +- if (cm->cpu_caps & HAS_NEON) +-#endif +- { +- vp8_pop_neon(dx_store_reg); +- } +-#endif +- pbi->common.error.error_code = VPX_CODEC_ERROR; +- pbi->common.error.setjmp = 0; +- pbi->num_fragments = 0; +- return -1; +- } +- } else +-#endif ++ if (swap_frame_buffers (cm)) + { +- if (swap_frame_buffers (cm)) +- { +-#if HAVE_NEON +-#if CONFIG_RUNTIME_CPU_DETECT +- if (cm->cpu_caps & HAS_NEON) +-#endif +- { +- vp8_pop_neon(dx_store_reg); +- } +-#endif +- pbi->common.error.error_code = VPX_CODEC_ERROR; +- pbi->common.error.setjmp = 0; +- pbi->num_fragments = 0; +- return -1; +- } +- +- if(cm->filter_level) +- { +- /* Apply the loop filter if appropriate. */ +- vp8_loop_filter_frame(cm, &pbi->mb); +- } +- vp8_yv12_extend_frame_borders(cm->frame_to_show); ++ pbi->common.error.error_code = VPX_CODEC_ERROR; ++ goto decode_exit; + } + +- + vp8_clear_system_state(); + + #if CONFIG_ERROR_CONCEALMENT +@@ -498,49 +440,13 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi + } + #endif + +- /*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/ +- + if (cm->show_frame) + cm->current_video_frame++; + + pbi->ready_for_new_data = 0; + pbi->last_time_stamp = time_stamp; +- pbi->num_fragments = 0; +- +-#if 0 +- { +- int i; +- int64_t earliest_time = pbi->dr[0].time_stamp; +- int64_t latest_time = pbi->dr[0].time_stamp; +- int64_t time_diff = 0; +- int bytes = 0; +- +- pbi->dr[pbi->common.current_video_frame&0xf].size = pbi->bc.pos + pbi->bc2.pos + 4;; +- pbi->dr[pbi->common.current_video_frame&0xf].time_stamp = time_stamp; +- +- for (i = 0; i < 16; i++) +- { +- +- bytes += pbi->dr[i].size; +- +- if (pbi->dr[i].time_stamp < earliest_time) +- earliest_time = pbi->dr[i].time_stamp; +- +- if (pbi->dr[i].time_stamp > latest_time) +- latest_time = pbi->dr[i].time_stamp; +- } +- +- time_diff = latest_time - earliest_time; +- +- if (time_diff > 0) +- { +- pbi->common.bitrate = 80000.00 * bytes / time_diff ; +- pbi->common.framerate = 160000000.00 / time_diff ; +- } +- +- } +-#endif + ++decode_exit: + #if HAVE_NEON + #if CONFIG_RUNTIME_CPU_DETECT + if (cm->cpu_caps & HAS_NEON) +@@ -549,7 +455,9 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi + vp8_pop_neon(dx_store_reg); + } + #endif ++ + pbi->common.error.setjmp = 0; ++ pbi->num_fragments = 0; + return retcode; + } + int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags) +diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h +index 97cf0dc..0063beb 100644 +--- a/vp8/decoder/onyxd_int.h ++++ b/vp8/decoder/onyxd_int.h +@@ -31,23 +31,18 @@ typedef struct + typedef struct + { + MACROBLOCKD mbd; +- int mb_row; + } MB_ROW_DEC; + +-typedef struct +-{ +- int64_t time_stamp; +- int size; +-} DATARATE; +- +- + typedef struct VP8D_COMP + { + DECLARE_ALIGNED(16, MACROBLOCKD, mb); + ++ YV12_BUFFER_CONFIG *dec_fb_ref[NUM_YV12_BUFFERS]; ++ + DECLARE_ALIGNED(16, VP8_COMMON, common); + +- vp8_reader bc, bc2; ++ /* the last partition will be used for the modes/mvs */ ++ vp8_reader mbc[MAX_PARTITIONS]; + + VP8D_CONFIG oxcf; + +@@ -62,7 +57,7 @@ typedef struct VP8D_COMP + volatile int b_multithreaded_rd; + int max_threads; + int current_mb_col_main; +- int decoding_thread_count; ++ unsigned int decoding_thread_count; + int allocated_decoding_thread_count; + + int mt_baseline_filter_level[MAX_MB_SEGMENTS]; +@@ -85,12 +80,9 @@ typedef struct VP8D_COMP + /* end of threading data */ + #endif + +- vp8_reader *mbc; + int64_t last_time_stamp; + int ready_for_new_data; + +- DATARATE dr[16]; +- + vp8_prob prob_intra; + vp8_prob prob_last; + vp8_prob prob_gf; +diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c +index 47a0349..88c06be 100644 +--- a/vp8/decoder/threading.c ++++ b/vp8/decoder/threading.c +@@ -24,10 +24,18 @@ + #include "detokenize.h" + #include "vp8/common/reconintra4x4.h" + #include "vp8/common/reconinter.h" ++#include "vp8/common/setupintrarecon.h" + #if CONFIG_ERROR_CONCEALMENT + #include "error_concealment.h" + #endif + ++#define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n))) ++#define CALLOC_ARRAY_ALIGNED(p, n, algn) do { \ ++ CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \ ++ memset((p), 0, (n) * sizeof(*(p))); \ ++} while (0) ++ ++ + extern void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd); + + static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count) +@@ -47,11 +55,9 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D + mbd->mode_info_stride = pc->mode_info_stride; + + mbd->frame_type = pc->frame_type; +- mbd->pre = pc->yv12_fb[pc->lst_fb_idx]; +- mbd->dst = pc->yv12_fb[pc->new_fb_idx]; ++ mbd->pre = xd->pre; ++ mbd->dst = xd->dst; + +- vp8_setup_block_dptrs(mbd); +- vp8_build_block_doffsets(mbd); + mbd->segmentation_enabled = xd->segmentation_enabled; + mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta; + vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); +@@ -65,7 +71,7 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D + mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled; + mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update; + +- mbd->current_bc = &pbi->bc2; ++ mbd->current_bc = &pbi->mbc[0]; + + vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); + vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); +@@ -73,16 +79,18 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D + vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); + + mbd->fullpixel_mask = 0xffffffff; +- if(pc->full_pixel) ++ ++ if (pc->full_pixel) + mbd->fullpixel_mask = 0xfffffff8; + + } + +- for (i=0; i< pc->mb_rows; i++) +- pbi->mt_current_mb_col[i]=-1; ++ for (i = 0; i < pc->mb_rows; i++) ++ pbi->mt_current_mb_col[i] = -1; + } + +-static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_idx) ++static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, ++ unsigned int mb_idx) + { + MB_PREDICTION_MODE mode; + int i; +@@ -166,7 +174,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i + { + short *DQC = xd->dequant_y1; + int dst_stride = xd->dst.y_stride; +- unsigned char *base_dst = xd->dst.y_buffer; + + /* clear out residual eob info */ + if(xd->mode_info_context->mbmi.mb_skip_coeff) +@@ -177,17 +184,19 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i + for (i = 0; i < 16; i++) + { + BLOCKD *b = &xd->block[i]; +- int b_mode = xd->mode_info_context->bmi[i].as_mode; +- unsigned char *yabove; ++ unsigned char *dst = xd->dst.y_buffer + b->offset; ++ B_PREDICTION_MODE b_mode = ++ xd->mode_info_context->bmi[i].as_mode; ++ unsigned char *Above; + unsigned char *yleft; + int left_stride; + unsigned char top_left; + + /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/ + if (i < 4 && pbi->common.filter_level) +- yabove = xd->recon_above[0] + b->offset; //i*4; ++ Above = xd->recon_above[0] + b->offset; + else +- yabove = (base_dst - dst_stride) + b->offset; ++ Above = dst - dst_stride; + + if (i%4==0 && pbi->common.filter_level) + { +@@ -196,34 +205,28 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i + } + else + { +- yleft = (base_dst - 1) + b->offset; ++ yleft = dst - 1; + left_stride = dst_stride; + } + + if ((i==4 || i==8 || i==12) && pbi->common.filter_level) + top_left = *(xd->recon_left[0] + i - 1); + else +- top_left = yabove[-1]; ++ top_left = Above[-1]; + +- vp8_intra4x4_predict_d_c(yabove, yleft, left_stride, +- b_mode, +- base_dst + b->offset, dst_stride, +- top_left); ++ vp8_intra4x4_predict(Above, yleft, left_stride, ++ b_mode, dst, dst_stride, top_left); + + if (xd->eobs[i] ) + { + if (xd->eobs[i] > 1) + { +- vp8_dequant_idct_add +- (b->qcoeff, DQC, +- base_dst + b->offset, dst_stride); ++ vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); + } + else + { +- vp8_dc_only_idct_add +- (b->qcoeff[0] * DQC[0], +- base_dst + b->offset, dst_stride, +- base_dst + b->offset, dst_stride); ++ vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], ++ dst, dst_stride, dst, dst_stride); + ((int *)b->qcoeff)[0] = 0; + } + } +@@ -297,60 +300,44 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i + } + } + +-typedef void (*init_current_bc_fn_t)(VP8D_COMP *pbi, MACROBLOCKD *xd, +- int start_mb_row, int mb_row, int num_part); +- +-static void init_current_bc(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, +- int mb_row, int num_part) ++static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) + { +- (void) start_mb_row; +- +- xd->current_bc = &pbi->mbc[mb_row%num_part]; +-} +- +-static void init_current_bc_threads(VP8D_COMP *pbi, MACROBLOCKD *xd, +- int start_mb_row, int mb_row, int num_part) +-{ +- (void) xd; +- pbi->mb_row_di[start_mb_row - 1].mb_row = mb_row; +- pbi->mb_row_di[start_mb_row - 1].mbd.current_bc = &pbi->mbc[mb_row%num_part]; +-} +- +- +-static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, +- init_current_bc_fn_t init_current_bc_fn) +-{ +- volatile int *last_row_current_mb_col = NULL; ++ volatile const int *last_row_current_mb_col; ++ volatile int *current_mb_col; + int mb_row; + VP8_COMMON *pc = &pbi->common; +- int nsync = pbi->sync_range; ++ const int nsync = pbi->sync_range; ++ const int first_row_no_sync_above = pc->mb_cols + nsync; + int num_part = 1 << pbi->common.multi_token_partition; ++ int last_mb_row = start_mb_row; ++ ++ YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; ++ YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME]; ++ ++ int recon_y_stride = yv12_fb_new->y_stride; ++ int recon_uv_stride = yv12_fb_new->uv_stride; + +- int dst_fb_idx = pc->new_fb_idx; + unsigned char *ref_buffer[MAX_REF_FRAMES][3]; + unsigned char *dst_buffer[3]; + int i; +- int ref_fb_index[MAX_REF_FRAMES]; + int ref_fb_corrupted[MAX_REF_FRAMES]; + + ref_fb_corrupted[INTRA_FRAME] = 0; + +- ref_fb_index[LAST_FRAME] = pc->lst_fb_idx; +- ref_fb_index[GOLDEN_FRAME] = pc->gld_fb_idx; +- ref_fb_index[ALTREF_FRAME] = pc->alt_fb_idx; +- + for(i = 1; i < MAX_REF_FRAMES; i++) + { +- ref_buffer[i][0] = pc->yv12_fb[ref_fb_index[i]].y_buffer; +- ref_buffer[i][1] = pc->yv12_fb[ref_fb_index[i]].u_buffer; +- ref_buffer[i][2] = pc->yv12_fb[ref_fb_index[i]].v_buffer; ++ YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; + +- ref_fb_corrupted[i] = pc->yv12_fb[ref_fb_index[i]].corrupted; ++ ref_buffer[i][0] = this_fb->y_buffer; ++ ref_buffer[i][1] = this_fb->u_buffer; ++ ref_buffer[i][2] = this_fb->v_buffer; ++ ++ ref_fb_corrupted[i] = this_fb->corrupted; + } + +- dst_buffer[0] = pc->yv12_fb[dst_fb_idx].y_buffer; +- dst_buffer[1] = pc->yv12_fb[dst_fb_idx].u_buffer; +- dst_buffer[2] = pc->yv12_fb[dst_fb_idx].v_buffer; ++ dst_buffer[0] = yv12_fb_new->y_buffer; ++ dst_buffer[1] = yv12_fb_new->u_buffer; ++ dst_buffer[2] = yv12_fb_new->v_buffer; + + xd->up_available = (start_mb_row != 0); + +@@ -359,18 +346,20 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, + int i; + int recon_yoffset, recon_uvoffset; + int mb_col; +- int ref_fb_idx = pc->lst_fb_idx; +- int dst_fb_idx = pc->new_fb_idx; +- int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride; +- int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride; +- + int filter_level; + loop_filter_info_n *lfi_n = &pc->lf_info; + +- init_current_bc_fn(pbi, xd, start_mb_row, mb_row, num_part); ++ /* save last row processed by this thread */ ++ last_mb_row = mb_row; ++ /* select bool coder for current partition */ ++ xd->current_bc = &pbi->mbc[mb_row%num_part]; + + if (mb_row > 0) + last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1]; ++ else ++ last_row_current_mb_col = &first_row_no_sync_above; ++ ++ current_mb_col = &pbi->mt_current_mb_col[mb_row]; + + recon_yoffset = mb_row * recon_y_stride * 16; + recon_uvoffset = mb_row * recon_uv_stride * 8; +@@ -394,7 +383,7 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, + xd->recon_left[1] = pbi->mt_uleft_col[mb_row]; + xd->recon_left[2] = pbi->mt_vleft_col[mb_row]; + +- //TODO: move to outside row loop ++ /* TODO: move to outside row loop */ + xd->recon_left_stride[0] = 1; + xd->recon_left_stride[1] = 1; + } +@@ -412,16 +401,22 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, + xd->recon_above[1] -= xd->dst.uv_stride; + xd->recon_above[2] -= xd->dst.uv_stride; + +- //TODO: move to outside row loop ++ /* TODO: move to outside row loop */ + xd->recon_left_stride[0] = xd->dst.y_stride; + xd->recon_left_stride[1] = xd->dst.uv_stride; ++ ++ setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], ++ xd->recon_left[2], xd->dst.y_stride, ++ xd->dst.uv_stride); + } + + for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) + { +- if ( mb_row > 0 && (mb_col & (nsync-1)) == 0) ++ *current_mb_col = mb_col - 1; ++ ++ if ((mb_col & (nsync - 1)) == 0) + { +- while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1) ++ while (mb_col > (*last_row_current_mb_col - nsync)) + { + x86_pause_hint(); + thread_sleep(0); +@@ -477,7 +472,7 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, + /* propagate errors from reference frames */ + xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; + +- decode_macroblock(pbi, xd, 0); ++ mt_decode_macroblock(pbi, xd, 0); + + xd->left_available = 1; + +@@ -591,9 +586,6 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, + ++xd->mode_info_context; /* next mb */ + + xd->above_context++; +- +- /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/ +- pbi->mt_current_mb_col[mb_row] = mb_col; + } + + /* adjust to the next row of mbs */ +@@ -601,8 +593,8 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, + { + if(mb_row != pc->mb_rows-1) + { +- int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS; +- int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1); ++ int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS; ++ int lastuv = (yv12_fb_lst->y_width>>1) + (VP8BORDERINPIXELS>>1); + + for (i = 0; i < 4; i++) + { +@@ -611,8 +603,13 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, + pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1]; + } + } +- } else +- vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); ++ } ++ else ++ vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, ++ xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); ++ ++ /* last MB of row is ready just after extension is done */ ++ *current_mb_col = mb_col + nsync; + + ++xd->mode_info_context; /* skip prediction column */ + xd->up_available = 1; +@@ -620,6 +617,11 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, + /* since we have multithread */ + xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; + } ++ ++ /* signal end of frame decoding if this thread processed the last mb_row */ ++ if (last_mb_row == (pc->mb_rows - 1)) ++ sem_post(&pbi->h_event_end_decoding); ++ + } + + +@@ -635,7 +637,6 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) + if (pbi->b_multithreaded_rd == 0) + break; + +- /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/ + if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) + { + if (pbi->b_multithreaded_rd == 0) +@@ -643,21 +644,11 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) + else + { + MACROBLOCKD *xd = &mbrd->mbd; +- + xd->left_context = &mb_row_left_context; + +- decode_mb_rows(pbi, xd, ithread+1, init_current_bc_threads); ++ mt_decode_mb_rows(pbi, xd, ithread+1); + } + } +- +- /* add this to each frame */ +- if ((mbrd->mb_row == pbi->common.mb_rows-1) || +- ((mbrd->mb_row == pbi->common.mb_rows-2) && +- (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1)) +- { +- /*SetEvent(pbi->h_event_end_decoding);*/ +- sem_post(&pbi->h_event_end_decoding); +- } + } + + return 0 ; +@@ -667,7 +658,7 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) + void vp8_decoder_create_threads(VP8D_COMP *pbi) + { + int core_count = 0; +- int ithread; ++ unsigned int ithread; + + pbi->b_multithreaded_rd = 0; + pbi->allocated_decoding_thread_count = 0; +@@ -684,16 +675,17 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) + pbi->b_multithreaded_rd = 1; + pbi->decoding_thread_count = core_count - 1; + +- CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count)); +- CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count)); +- CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count)); +- vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count); +- CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count)); ++ CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count); ++ CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count); ++ CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32); ++ CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count); + + for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++) + { + sem_init(&pbi->h_event_start_decoding[ithread], 0, 0); + ++ vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd); ++ + pbi->de_thread_data[ithread].ithread = ithread; + pbi->de_thread_data[ithread].ptr1 = (void *)pbi; + pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread]; +@@ -810,32 +802,32 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) + uv_width = width >>1; + + /* Allocate an int for each mb row. */ +- CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows)); ++ CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows); + + /* Allocate memory for above_row buffers. */ +- CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); +- for (i=0; i< pc->mb_rows; i++) ++ CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows); ++ for (i = 0; i < pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)))); + +- CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); +- for (i=0; i< pc->mb_rows; i++) ++ CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows); ++ for (i = 0; i < pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); + +- CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); +- for (i=0; i< pc->mb_rows; i++) ++ CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows); ++ for (i = 0; i < pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); + + /* Allocate memory for left_col buffers. */ +- CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); +- for (i=0; i< pc->mb_rows; i++) ++ CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows); ++ for (i = 0; i < pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1)); + +- CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); +- for (i=0; i< pc->mb_rows; i++) ++ CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows); ++ for (i = 0; i < pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); + +- CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); +- for (i=0; i< pc->mb_rows; i++) ++ CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows); ++ for (i = 0; i < pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); + } + } +@@ -881,42 +873,46 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi) + void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) + { + VP8_COMMON *pc = &pbi->common; +- int i; ++ unsigned int i; ++ int j; + + int filter_level = pc->filter_level; ++ YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; + + if (filter_level) + { + /* Set above_row buffer to 127 for decoding first MB row */ +- vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5); +- vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5); +- vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5); ++ vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5); ++ vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); ++ vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); + +- for (i=1; imb_rows; i++) ++ for (j=1; jmb_rows; j++) + { +- vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); +- vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); +- vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); ++ vpx_memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); ++ vpx_memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); ++ vpx_memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); + } + + /* Set left_col to 129 initially */ +- for (i=0; imb_rows; i++) ++ for (j=0; jmb_rows; j++) + { +- vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16); +- vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8); +- vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8); ++ vpx_memset(pbi->mt_yleft_col[j], (unsigned char)129, 16); ++ vpx_memset(pbi->mt_uleft_col[j], (unsigned char)129, 8); ++ vpx_memset(pbi->mt_vleft_col[j], (unsigned char)129, 8); + } + + /* Initialize the loop filter for this frame. */ + vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level); + } ++ else ++ vp8_setup_intra_recon_top_line(yv12_fb_new); + + setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count); + + for (i = 0; i < pbi->decoding_thread_count; i++) + sem_post(&pbi->h_event_start_decoding[i]); + +- decode_mb_rows(pbi, xd, 0, init_current_bc); ++ mt_decode_mb_rows(pbi, xd, 0); + + sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ + } +diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c +index 3824294..e666b6c 100644 +--- a/vp8/encoder/bitstream.c ++++ b/vp8/encoder/bitstream.c +@@ -118,7 +118,7 @@ static void update_mbintra_mode_probs(VP8_COMP *cpi) + + update_mode( + w, VP8_YMODES, vp8_ymode_encodings, vp8_ymode_tree, +- Pnew, x->fc.ymode_prob, bct, (unsigned int *)cpi->ymode_count ++ Pnew, x->fc.ymode_prob, bct, (unsigned int *)cpi->mb.ymode_count + ); + } + { +@@ -127,7 +127,7 @@ static void update_mbintra_mode_probs(VP8_COMP *cpi) + + update_mode( + w, VP8_UV_MODES, vp8_uv_mode_encodings, vp8_uv_mode_tree, +- Pnew, x->fc.uv_mode_prob, bct, (unsigned int *)cpi->uv_mode_count ++ Pnew, x->fc.uv_mode_prob, bct, (unsigned int *)cpi->mb.uv_mode_count + ); + } + } +@@ -172,7 +172,7 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount) + while (p < stop) + { + const int t = p->Token; +- const vp8_token *a = vp8_coef_encodings + t; ++ vp8_token *a = vp8_coef_encodings + t; + const vp8_extra_bit_struct *b = vp8_extra_bits + t; + int i = 0; + const unsigned char *pp = p->context_tree; +@@ -397,7 +397,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data, + { + const TOKENEXTRA *p = cpi->tplist[mb_row].start; + const TOKENEXTRA *stop = cpi->tplist[mb_row].stop; +- int tokens = stop - p; ++ int tokens = (int)(stop - p); + + vp8_pack_tokens_c(w, p, tokens); + } +@@ -416,7 +416,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w) + { + const TOKENEXTRA *p = cpi->tplist[mb_row].start; + const TOKENEXTRA *stop = cpi->tplist[mb_row].stop; +- int tokens = stop - p; ++ int tokens = (int)(stop - p); + + vp8_pack_tokens_c(w, p, tokens); + } +@@ -461,7 +461,7 @@ static void write_mv + + static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACROBLOCKD *x) + { +- // Encode the MB segment id. ++ /* Encode the MB segment id. */ + if (x->segmentation_enabled && x->update_mb_segmentation_map) + { + switch (mi->segment_id) +@@ -483,7 +483,7 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO + vp8_write(w, 1, x->mb_segment_tree_probs[2]); + break; + +- // TRAP.. This should not happen ++ /* TRAP.. This should not happen */ + default: + vp8_write(w, 0, x->mb_segment_tree_probs[0]); + vp8_write(w, 0, x->mb_segment_tree_probs[1]); +@@ -493,11 +493,11 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO + } + void vp8_convert_rfct_to_prob(VP8_COMP *const cpi) + { +- const int *const rfct = cpi->count_mb_ref_frame_usage; ++ const int *const rfct = cpi->mb.count_mb_ref_frame_usage; + const int rf_intra = rfct[INTRA_FRAME]; + const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; + +- // Calculate the probabilities used to code the ref frame based on useage ++ /* Calculate the probabilities used to code the ref frame based on usage */ + if (!(cpi->prob_intra_coded = rf_intra * 255 / (rf_intra + rf_inter))) + cpi->prob_intra_coded = 1; + +@@ -539,7 +539,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) + { + int total_mbs = pc->mb_rows * pc->mb_cols; + +- prob_skip_false = (total_mbs - cpi->skip_true_count ) * 256 / total_mbs; ++ prob_skip_false = (total_mbs - cpi->mb.skip_true_count ) * 256 / total_mbs; + + if (prob_skip_false <= 1) + prob_skip_false = 1; +@@ -571,8 +571,10 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) + + MACROBLOCKD *xd = &cpi->mb.e_mbd; + +- // Distance of Mb to the various image edges. +- // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units ++ /* Distance of Mb to the various image edges. ++ * These specified to 8th pel as they are always compared to MV ++ * values that are in 1/8th pel units ++ */ + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; + xd->mb_to_top_edge = -((mb_row * 16)) << 3; +@@ -728,7 +730,7 @@ static void write_kfmodes(VP8_COMP *cpi) + { + int total_mbs = c->mb_rows * c->mb_cols; + +- prob_skip_false = (total_mbs - cpi->skip_true_count ) * 256 / total_mbs; ++ prob_skip_false = (total_mbs - cpi->mb.skip_true_count ) * 256 / total_mbs; + + if (prob_skip_false <= 1) + prob_skip_false = 1; +@@ -754,7 +756,7 @@ static void write_kfmodes(VP8_COMP *cpi) + if (c->mb_no_coeff_skip) + vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false); + +- kfwrite_ymode(bc, ym, c->kf_ymode_prob); ++ kfwrite_ymode(bc, ym, vp8_kf_ymode_prob); + + if (ym == B_PRED) + { +@@ -771,15 +773,15 @@ static void write_kfmodes(VP8_COMP *cpi) + ++intra_mode_stats [A] [L] [bm]; + #endif + +- write_bmode(bc, bm, c->kf_bmode_prob [A] [L]); ++ write_bmode(bc, bm, vp8_kf_bmode_prob [A] [L]); + } + while (++i < 16); + } + +- write_uv_mode(bc, (m++)->mbmi.uv_mode, c->kf_uv_mode_prob); ++ write_uv_mode(bc, (m++)->mbmi.uv_mode, vp8_kf_uv_mode_prob); + } + +- m++; // skip L prediction border ++ m++; /* skip L prediction border */ + } + } + +@@ -849,6 +851,7 @@ static int prob_update_savings(const unsigned int *ct, + + static int independent_coef_context_savings(VP8_COMP *cpi) + { ++ MACROBLOCK *const x = & cpi->mb; + int savings = 0; + int i = 0; + do +@@ -865,7 +868,7 @@ static int independent_coef_context_savings(VP8_COMP *cpi) + */ + + probs = (const unsigned int (*)[MAX_ENTROPY_TOKENS]) +- cpi->coef_counts[i][j]; ++ x->coef_counts[i][j]; + + /* Reset to default probabilities at key frames */ + if (cpi->common.frame_type == KEY_FRAME) +@@ -878,9 +881,6 @@ static int independent_coef_context_savings(VP8_COMP *cpi) + /* at every context */ + + /* calc probs and branch cts for this frame only */ +- //vp8_prob new_p [ENTROPY_NODES]; +- //unsigned int branch_ct [ENTROPY_NODES] [2]; +- + int t = 0; /* token/prob index */ + + vp8_tree_probs_from_distribution( +@@ -927,6 +927,7 @@ static int independent_coef_context_savings(VP8_COMP *cpi) + + static int default_coef_context_savings(VP8_COMP *cpi) + { ++ MACROBLOCK *const x = & cpi->mb; + int savings = 0; + int i = 0; + do +@@ -940,16 +941,13 @@ static int default_coef_context_savings(VP8_COMP *cpi) + /* at every context */ + + /* calc probs and branch cts for this frame only */ +- //vp8_prob new_p [ENTROPY_NODES]; +- //unsigned int branch_ct [ENTROPY_NODES] [2]; +- + int t = 0; /* token/prob index */ + + vp8_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + cpi->frame_coef_probs [i][j][k], + cpi->frame_branch_ct [i][j][k], +- cpi->coef_counts [i][j][k], ++ x->coef_counts [i][j][k], + 256, 1 + ); + +@@ -998,13 +996,13 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi) + { + int savings = 0; + +- const int *const rfct = cpi->count_mb_ref_frame_usage; ++ const int *const rfct = cpi->mb.count_mb_ref_frame_usage; + const int rf_intra = rfct[INTRA_FRAME]; + const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; + int new_intra, new_last, new_garf, oldtotal, newtotal; + int ref_frame_cost[MAX_REF_FRAMES]; + +- vp8_clear_system_state(); //__asm emms; ++ vp8_clear_system_state(); + + if (cpi->common.frame_type != KEY_FRAME) + { +@@ -1026,7 +1024,7 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi) + rfct[ALTREF_FRAME] * ref_frame_cost[ALTREF_FRAME]; + + +- // old costs ++ /* old costs */ + vp8_calc_ref_frame_costs(ref_frame_cost,cpi->prob_intra_coded, + cpi->prob_last_coded,cpi->prob_gf_coded); + +@@ -1078,7 +1076,7 @@ void vp8_update_coef_probs(VP8_COMP *cpi) + #endif + int savings = 0; + +- vp8_clear_system_state(); //__asm emms; ++ vp8_clear_system_state(); + + do + { +@@ -1110,21 +1108,15 @@ void vp8_update_coef_probs(VP8_COMP *cpi) + } + do + { +- //note: use result from vp8_estimate_entropy_savings, so no need to call vp8_tree_probs_from_distribution here. ++ /* note: use result from vp8_estimate_entropy_savings, so no ++ * need to call vp8_tree_probs_from_distribution here. ++ */ ++ + /* at every context */ + + /* calc probs and branch cts for this frame only */ +- //vp8_prob new_p [ENTROPY_NODES]; +- //unsigned int branch_ct [ENTROPY_NODES] [2]; +- + int t = 0; /* token/prob index */ + +- //vp8_tree_probs_from_distribution( +- // MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, +- // new_p, branch_ct, (unsigned int *)cpi->coef_counts [i][j][k], +- // 256, 1 +- // ); +- + do + { + const vp8_prob newp = cpi->frame_coef_probs [i][j][k][t]; +@@ -1295,19 +1287,16 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest + Sectionbits[active_section = 1] += sizeof(VP8_HEADER) * 8 * 256; + #endif + +- //vp8_kf_default_bmode_probs() is called in vp8_setup_key_frame() once for each +- //K frame before encode frame. pc->kf_bmode_prob doesn't get changed anywhere +- //else. No need to call it again here. --yw +- //vp8_kf_default_bmode_probs( pc->kf_bmode_prob); +- +- // every keyframe send startcode, width, height, scale factor, clamp and color type ++ /* every keyframe send startcode, width, height, scale factor, clamp ++ * and color type ++ */ + if (oh.type == KEY_FRAME) + { + int v; + + validate_buffer(cx_data, 7, cx_data_end, &cpi->common.error); + +- // Start / synch code ++ /* Start / synch code */ + cx_data[0] = 0x9D; + cx_data[1] = 0x01; + cx_data[2] = 0x2a; +@@ -1326,7 +1315,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest + + vp8_start_encode(bc, cx_data, cx_data_end); + +- // signal clr type ++ /* signal clr type */ + vp8_write_bit(bc, pc->clr_type); + vp8_write_bit(bc, pc->clamp_type); + +@@ -1335,13 +1324,13 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest + vp8_start_encode(bc, cx_data, cx_data_end); + + +- // Signal whether or not Segmentation is enabled ++ /* Signal whether or not Segmentation is enabled */ + vp8_write_bit(bc, xd->segmentation_enabled); + +- // Indicate which features are enabled ++ /* Indicate which features are enabled */ + if (xd->segmentation_enabled) + { +- // Signal whether or not the segmentation map is being updated. ++ /* Signal whether or not the segmentation map is being updated. */ + vp8_write_bit(bc, xd->update_mb_segmentation_map); + vp8_write_bit(bc, xd->update_mb_segmentation_data); + +@@ -1351,15 +1340,15 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest + + vp8_write_bit(bc, xd->mb_segement_abs_delta); + +- // For each segmentation feature (Quant and loop filter level) ++ /* For each segmentation feature (Quant and loop filter level) */ + for (i = 0; i < MB_LVL_MAX; i++) + { +- // For each of the segments ++ /* For each of the segments */ + for (j = 0; j < MAX_MB_SEGMENTS; j++) + { + Data = xd->segment_feature_data[i][j]; + +- // Frame level data ++ /* Frame level data */ + if (Data) + { + vp8_write_bit(bc, 1); +@@ -1384,7 +1373,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest + + if (xd->update_mb_segmentation_map) + { +- // Write the probs used to decode the segment id for each macro block. ++ /* Write the probs used to decode the segment id for each mb */ + for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) + { + int Data = xd->mb_segment_tree_probs[i]; +@@ -1400,17 +1389,18 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest + } + } + +- // Code to determine whether or not to update the scan order. + vp8_write_bit(bc, pc->filter_type); + vp8_write_literal(bc, pc->filter_level, 6); + vp8_write_literal(bc, pc->sharpness_level, 3); + +- // Write out loop filter deltas applied at the MB level based on mode or ref frame (if they are enabled). ++ /* Write out loop filter deltas applied at the MB level based on mode ++ * or ref frame (if they are enabled). ++ */ + vp8_write_bit(bc, xd->mode_ref_lf_delta_enabled); + + if (xd->mode_ref_lf_delta_enabled) + { +- // Do the deltas need to be updated ++ /* Do the deltas need to be updated */ + int send_update = xd->mode_ref_lf_delta_update + || cpi->oxcf.error_resilient_mode; + +@@ -1419,12 +1409,12 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest + { + int Data; + +- // Send update ++ /* Send update */ + for (i = 0; i < MAX_REF_LF_DELTAS; i++) + { + Data = xd->ref_lf_deltas[i]; + +- // Frame level data ++ /* Frame level data */ + if (xd->ref_lf_deltas[i] != xd->last_ref_lf_deltas[i] + || cpi->oxcf.error_resilient_mode) + { +@@ -1434,20 +1424,20 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest + if (Data > 0) + { + vp8_write_literal(bc, (Data & 0x3F), 6); +- vp8_write_bit(bc, 0); // sign ++ vp8_write_bit(bc, 0); /* sign */ + } + else + { + Data = -Data; + vp8_write_literal(bc, (Data & 0x3F), 6); +- vp8_write_bit(bc, 1); // sign ++ vp8_write_bit(bc, 1); /* sign */ + } + } + else + vp8_write_bit(bc, 0); + } + +- // Send update ++ /* Send update */ + for (i = 0; i < MAX_MODE_LF_DELTAS; i++) + { + Data = xd->mode_lf_deltas[i]; +@@ -1461,13 +1451,13 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest + if (Data > 0) + { + vp8_write_literal(bc, (Data & 0x3F), 6); +- vp8_write_bit(bc, 0); // sign ++ vp8_write_bit(bc, 0); /* sign */ + } + else + { + Data = -Data; + vp8_write_literal(bc, (Data & 0x3F), 6); +- vp8_write_bit(bc, 1); // sign ++ vp8_write_bit(bc, 1); /* sign */ + } + } + else +@@ -1476,34 +1466,42 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest + } + } + +- //signal here is multi token partition is enabled ++ /* signal here is multi token partition is enabled */ + vp8_write_literal(bc, pc->multi_token_partition, 2); + +- // Frame Qbaseline quantizer index ++ /* Frame Qbaseline quantizer index */ + vp8_write_literal(bc, pc->base_qindex, 7); + +- // Transmit Dc, Second order and Uv quantizer delta information ++ /* Transmit Dc, Second order and Uv quantizer delta information */ + put_delta_q(bc, pc->y1dc_delta_q); + put_delta_q(bc, pc->y2dc_delta_q); + put_delta_q(bc, pc->y2ac_delta_q); + put_delta_q(bc, pc->uvdc_delta_q); + put_delta_q(bc, pc->uvac_delta_q); + +- // When there is a key frame all reference buffers are updated using the new key frame ++ /* When there is a key frame all reference buffers are updated using ++ * the new key frame ++ */ + if (pc->frame_type != KEY_FRAME) + { +- // Should the GF or ARF be updated using the transmitted frame or buffer ++ /* Should the GF or ARF be updated using the transmitted frame ++ * or buffer ++ */ + vp8_write_bit(bc, pc->refresh_golden_frame); + vp8_write_bit(bc, pc->refresh_alt_ref_frame); + +- // If not being updated from current frame should either GF or ARF be updated from another buffer ++ /* If not being updated from current frame should either GF or ARF ++ * be updated from another buffer ++ */ + if (!pc->refresh_golden_frame) + vp8_write_literal(bc, pc->copy_buffer_to_gf, 2); + + if (!pc->refresh_alt_ref_frame) + vp8_write_literal(bc, pc->copy_buffer_to_arf, 2); + +- // Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer) ++ /* Indicate reference frame sign bias for Golden and ARF frames ++ * (always 0 for last frame buffer) ++ */ + vp8_write_bit(bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]); + vp8_write_bit(bc, pc->ref_frame_sign_bias[ALTREF_FRAME]); + } +@@ -1532,14 +1530,14 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest + + #endif + +- vp8_clear_system_state(); //__asm emms; ++ vp8_clear_system_state(); + + #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + pack_coef_probs(cpi); + #else + if (pc->refresh_entropy_probs == 0) + { +- // save a copy for later refresh ++ /* save a copy for later refresh */ + vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc)); + } + +@@ -1550,7 +1548,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest + active_section = 2; + #endif + +- // Write out the mb_no_coeff_skip flag ++ /* Write out the mb_no_coeff_skip flag */ + vp8_write_bit(bc, pc->mb_no_coeff_skip); + + if (pc->frame_type == KEY_FRAME) +diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h +index 6165d04..a30f888 100644 +--- a/vp8/encoder/block.h ++++ b/vp8/encoder/block.h +@@ -18,7 +18,10 @@ + #include "vp8/common/entropy.h" + #include "vpx_ports/mem.h" + +-// motion search site ++#define MAX_MODES 20 ++#define MAX_ERROR_BINS 1024 ++ ++/* motion search site */ + typedef struct + { + MV mv; +@@ -27,11 +30,11 @@ typedef struct + + typedef struct block + { +- // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries ++ /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ + short *src_diff; + short *coeff; + +- // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries ++ /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ + short *quant; + short *quant_fast; + unsigned char *quant_shift; +@@ -39,7 +42,7 @@ typedef struct block + short *zrun_zbin_boost; + short *round; + +- // Zbin Over Quant value ++ /* Zbin Over Quant value */ + short zbin_extra; + + unsigned char **base_src; +@@ -59,12 +62,12 @@ typedef struct + + typedef struct macroblock + { +- DECLARE_ALIGNED(16, short, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y +- DECLARE_ALIGNED(16, short, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y ++ DECLARE_ALIGNED(16, short, src_diff[400]); /* 25 blocks Y,U,V,Y2 */ ++ DECLARE_ALIGNED(16, short, coeff[400]); /* 25 blocks Y,U,V,Y2 */ + DECLARE_ALIGNED(16, unsigned char, thismb[256]); + + unsigned char *thismb_ptr; +- // 16 Y blocks, 4 U blocks, 4 V blocks, 1 DC 2nd order block each with 16 entries ++ /* 16 Y, 4 U, 4 V, 1 DC 2nd order block */ + BLOCK block[25]; + + YV12_BUFFER_CONFIG src; +@@ -90,16 +93,18 @@ typedef struct macroblock + signed int act_zbin_adj; + signed int last_act_zbin_adj; + +- int mvcosts[2][MVvals+1]; + int *mvcost[2]; +- int mvsadcosts[2][MVfpvals+1]; + int *mvsadcost[2]; +- int mbmode_cost[2][MB_MODE_COUNT]; +- int intra_uv_mode_cost[2][MB_MODE_COUNT]; +- unsigned int bmode_costs[10][10][10]; +- unsigned int inter_bmode_costs[B_MODE_COUNT]; +- +- // These define limits to motion vector components to prevent them from extending outside the UMV borders ++ int (*mbmode_cost)[MB_MODE_COUNT]; ++ int (*intra_uv_mode_cost)[MB_MODE_COUNT]; ++ int (*bmode_costs)[10][10]; ++ int *inter_bmode_costs; ++ int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS] ++ [MAX_ENTROPY_TOKENS]; ++ ++ /* These define limits to motion vector components to prevent ++ * them from extending outside the UMV borders. ++ */ + int mv_col_min; + int mv_col_max; + int mv_row_min; +@@ -107,18 +112,45 @@ typedef struct macroblock + + int skip; + +- int encode_breakout; ++ unsigned int encode_breakout; + +- //char * gf_active_ptr; + signed char *gf_active_ptr; + + unsigned char *active_ptr; + MV_CONTEXT *mvc; + +- unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; + int optimize; + int q_index; + ++#if CONFIG_TEMPORAL_DENOISING ++ MB_PREDICTION_MODE best_sse_inter_mode; ++ int_mv best_sse_mv; ++ MV_REFERENCE_FRAME best_reference_frame; ++ MV_REFERENCE_FRAME best_zeromv_reference_frame; ++ unsigned char need_to_clamp_best_mvs; ++#endif ++ ++ int skip_true_count; ++ unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; ++ unsigned int MVcount [2] [MVvals]; /* (row,col) MV cts this frame */ ++ int ymode_count [VP8_YMODES]; /* intra MB type cts this frame */ ++ int uv_mode_count[VP8_UV_MODES]; /* intra MB type cts this frame */ ++ int64_t prediction_error; ++ int64_t intra_error; ++ int count_mb_ref_frame_usage[MAX_REF_FRAMES]; ++ ++ int rd_thresh_mult[MAX_MODES]; ++ int rd_threshes[MAX_MODES]; ++ unsigned int mbs_tested_so_far; ++ unsigned int mode_test_hit_counts[MAX_MODES]; ++ int zbin_mode_boost_enabled; ++ int zbin_mode_boost; ++ int last_zbin_mode_boost; ++ ++ int last_zbin_over_quant; ++ int zbin_over_quant; ++ int error_bins[MAX_ERROR_BINS]; ++ + void (*short_fdct4x4)(short *input, short *output, int pitch); + void (*short_fdct8x4)(short *input, short *output, int pitch); + void (*short_walsh4x4)(short *input, short *output, int pitch); +diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h +index fb6cbaf..8309063 100644 +--- a/vp8/encoder/boolhuff.h ++++ b/vp8/encoder/boolhuff.h +@@ -32,7 +32,7 @@ typedef struct + unsigned char *buffer_end; + struct vpx_internal_error_info *error; + +- // Variables used to track bit costs without outputing to the bitstream ++ /* Variables used to track bit costs without outputing to the bitstream */ + unsigned int measure_cost; + unsigned long bit_counter; + } BOOL_CODER; +diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c +index 09ed9dd..f3faa22 100644 +--- a/vp8/encoder/denoising.c ++++ b/vp8/encoder/denoising.c +@@ -15,198 +15,293 @@ + #include "vpx_mem/vpx_mem.h" + #include "vpx_rtcd.h" + +-static const unsigned int NOISE_MOTION_THRESHOLD = 20*20; +-static const unsigned int NOISE_DIFF2_THRESHOLD = 75; +-// SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming var(noise) ~= 100. +-static const unsigned int SSE_DIFF_THRESHOLD = 16*16*20; +-static const unsigned int SSE_THRESHOLD = 16*16*40; ++static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25; ++/* SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming ++ * var(noise) ~= 100. ++ */ ++static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20; ++static const unsigned int SSE_THRESHOLD = 16 * 16 * 40; + +-static uint8_t blend(uint8_t state, uint8_t sample, uint8_t factor_q8) +-{ +- return (uint8_t)( +- (((uint16_t)factor_q8 * ((uint16_t)state) + // Q8 +- (uint16_t)(256 - factor_q8) * ((uint16_t)sample)) + 128) // Q8 +- >> 8); +-} ++/* ++ * The filter function was modified to reduce the computational complexity. ++ * Step 1: ++ * Instead of applying tap coefficients for each pixel, we calculated the ++ * pixel adjustments vs. pixel diff value ahead of time. ++ * adjustment = filtered_value - current_raw ++ * = (filter_coefficient * diff + 128) >> 8 ++ * where ++ * filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3)); ++ * filter_coefficient += filter_coefficient / ++ * (3 + motion_magnitude_adjustment); ++ * filter_coefficient is clamped to 0 ~ 255. ++ * ++ * Step 2: ++ * The adjustment vs. diff curve becomes flat very quick when diff increases. ++ * This allowed us to use only several levels to approximate the curve without ++ * changing the filtering algorithm too much. ++ * The adjustments were further corrected by checking the motion magnitude. ++ * The levels used are: ++ * diff adjustment w/o motion correction adjustment w/ motion correction ++ * [-255, -16] -6 -7 ++ * [-15, -8] -4 -5 ++ * [-7, -4] -3 -4 ++ * [-3, 3] diff diff ++ * [4, 7] 3 4 ++ * [8, 15] 4 5 ++ * [16, 255] 6 7 ++ */ + +-static unsigned int denoiser_motion_compensate(YV12_BUFFER_CONFIG* src, +- YV12_BUFFER_CONFIG* dst, +- MACROBLOCK* x, +- unsigned int best_sse, +- unsigned int zero_mv_sse, +- int recon_yoffset, +- int recon_uvoffset) ++int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg, ++ YV12_BUFFER_CONFIG *running_avg, MACROBLOCK *signal, ++ unsigned int motion_magnitude, int y_offset, ++ int uv_offset) + { +- MACROBLOCKD filter_xd = x->e_mbd; +- int mv_col; +- int mv_row; +- int sse_diff = zero_mv_sse - best_sse; +- // Compensate the running average. +- filter_xd.pre.y_buffer = src->y_buffer + recon_yoffset; +- filter_xd.pre.u_buffer = src->u_buffer + recon_uvoffset; +- filter_xd.pre.v_buffer = src->v_buffer + recon_uvoffset; +- // Write the compensated running average to the destination buffer. +- filter_xd.dst.y_buffer = dst->y_buffer + recon_yoffset; +- filter_xd.dst.u_buffer = dst->u_buffer + recon_uvoffset; +- filter_xd.dst.v_buffer = dst->v_buffer + recon_uvoffset; +- // Use the best MV for the compensation. +- filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME; +- filter_xd.mode_info_context->mbmi.mode = filter_xd.best_sse_inter_mode; +- filter_xd.mode_info_context->mbmi.mv = filter_xd.best_sse_mv; +- filter_xd.mode_info_context->mbmi.need_to_clamp_mvs = +- filter_xd.need_to_clamp_best_mvs; +- mv_col = filter_xd.best_sse_mv.as_mv.col; +- mv_row = filter_xd.best_sse_mv.as_mv.row; +- if (filter_xd.mode_info_context->mbmi.mode <= B_PRED || +- (mv_row*mv_row + mv_col*mv_col <= NOISE_MOTION_THRESHOLD && +- sse_diff < SSE_DIFF_THRESHOLD)) +- { +- // Handle intra blocks as referring to last frame with zero motion and +- // let the absolute pixel difference affect the filter factor. +- // Also consider small amount of motion as being random walk due to noise, +- // if it doesn't mean that we get a much bigger error. +- // Note that any changes to the mode info only affects the denoising. +- filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME; +- filter_xd.mode_info_context->mbmi.mode = ZEROMV; +- filter_xd.mode_info_context->mbmi.mv.as_int = 0; +- x->e_mbd.best_sse_inter_mode = ZEROMV; +- x->e_mbd.best_sse_mv.as_int = 0; +- best_sse = zero_mv_sse; +- } +- if (!x->skip) +- { +- vp8_build_inter_predictors_mb(&filter_xd); +- } +- else +- { +- vp8_build_inter16x16_predictors_mb(&filter_xd, +- filter_xd.dst.y_buffer, +- filter_xd.dst.u_buffer, +- filter_xd.dst.v_buffer, +- filter_xd.dst.y_stride, +- filter_xd.dst.uv_stride); +- } +- return best_sse; +-} ++ unsigned char *sig = signal->thismb; ++ int sig_stride = 16; ++ unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; ++ int mc_avg_y_stride = mc_running_avg->y_stride; ++ unsigned char *running_avg_y = running_avg->y_buffer + y_offset; ++ int avg_y_stride = running_avg->y_stride; ++ int r, c, i; ++ int sum_diff = 0; ++ int adj_val[3] = {3, 4, 6}; + +-static void denoiser_filter(YV12_BUFFER_CONFIG* mc_running_avg, +- YV12_BUFFER_CONFIG* running_avg, +- MACROBLOCK* signal, +- unsigned int motion_magnitude2, +- int y_offset, +- int uv_offset) +-{ +- unsigned char* sig = signal->thismb; +- int sig_stride = 16; +- unsigned char* mc_running_avg_y = mc_running_avg->y_buffer + y_offset; +- int mc_avg_y_stride = mc_running_avg->y_stride; +- unsigned char* running_avg_y = running_avg->y_buffer + y_offset; +- int avg_y_stride = running_avg->y_stride; +- int r, c; +- for (r = 0; r < 16; r++) +- { +- for (c = 0; c < 16; c++) ++ /* If motion_magnitude is small, making the denoiser more aggressive by ++ * increasing the adjustment for each level. */ ++ if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ++ { ++ for (i = 0; i < 3; i++) ++ adj_val[i] += 1; ++ } ++ ++ for (r = 0; r < 16; ++r) + { +- int diff; +- int absdiff = 0; +- unsigned int filter_coefficient; +- absdiff = sig[c] - mc_running_avg_y[c]; +- absdiff = absdiff > 0 ? absdiff : -absdiff; +- assert(absdiff >= 0 && absdiff < 256); +- filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3)); +- // Allow some additional filtering of static blocks, or blocks with very +- // small motion vectors. +- filter_coefficient += filter_coefficient / (3 + (motion_magnitude2 >> 3)); +- filter_coefficient = filter_coefficient > 255 ? 255 : filter_coefficient; +- +- running_avg_y[c] = blend(mc_running_avg_y[c], sig[c], filter_coefficient); +- diff = sig[c] - running_avg_y[c]; +- +- if (diff * diff < NOISE_DIFF2_THRESHOLD) +- { +- // Replace with mean to suppress the noise. +- sig[c] = running_avg_y[c]; +- } +- else +- { +- // Replace the filter state with the signal since the change in this +- // pixel isn't classified as noise. +- running_avg_y[c] = sig[c]; +- } ++ for (c = 0; c < 16; ++c) ++ { ++ int diff = 0; ++ int adjustment = 0; ++ int absdiff = 0; ++ ++ diff = mc_running_avg_y[c] - sig[c]; ++ absdiff = abs(diff); ++ ++ /* When |diff| < 4, use pixel value from last denoised raw. */ ++ if (absdiff <= 3) ++ { ++ running_avg_y[c] = mc_running_avg_y[c]; ++ sum_diff += diff; ++ } ++ else ++ { ++ if (absdiff >= 4 && absdiff <= 7) ++ adjustment = adj_val[0]; ++ else if (absdiff >= 8 && absdiff <= 15) ++ adjustment = adj_val[1]; ++ else ++ adjustment = adj_val[2]; ++ ++ if (diff > 0) ++ { ++ if ((sig[c] + adjustment) > 255) ++ running_avg_y[c] = 255; ++ else ++ running_avg_y[c] = sig[c] + adjustment; ++ ++ sum_diff += adjustment; ++ } ++ else ++ { ++ if ((sig[c] - adjustment) < 0) ++ running_avg_y[c] = 0; ++ else ++ running_avg_y[c] = sig[c] - adjustment; ++ ++ sum_diff -= adjustment; ++ } ++ } ++ } ++ ++ /* Update pointers for next iteration. */ ++ sig += sig_stride; ++ mc_running_avg_y += mc_avg_y_stride; ++ running_avg_y += avg_y_stride; + } +- sig += sig_stride; +- mc_running_avg_y += mc_avg_y_stride; +- running_avg_y += avg_y_stride; +- } ++ ++ if (abs(sum_diff) > SUM_DIFF_THRESHOLD) ++ return COPY_BLOCK; ++ ++ vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride, ++ signal->thismb, sig_stride); ++ return FILTER_BLOCK; + } + + int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height) + { +- assert(denoiser); +- denoiser->yv12_running_avg.flags = 0; +- if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg), width, +- height, VP8BORDERINPIXELS) < 0) +- { +- vp8_denoiser_free(denoiser); +- return 1; +- } +- denoiser->yv12_mc_running_avg.flags = 0; +- if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width, +- height, VP8BORDERINPIXELS) < 0) +- { +- vp8_denoiser_free(denoiser); +- return 1; +- } +- vpx_memset(denoiser->yv12_running_avg.buffer_alloc, 0, +- denoiser->yv12_running_avg.frame_size); +- vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0, +- denoiser->yv12_mc_running_avg.frame_size); +- return 0; ++ int i; ++ assert(denoiser); ++ ++ for (i = 0; i < MAX_REF_FRAMES; i++) ++ { ++ denoiser->yv12_running_avg[i].flags = 0; ++ ++ if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg[i]), width, ++ height, VP8BORDERINPIXELS) ++ < 0) ++ { ++ vp8_denoiser_free(denoiser); ++ return 1; ++ } ++ vpx_memset(denoiser->yv12_running_avg[i].buffer_alloc, 0, ++ denoiser->yv12_running_avg[i].frame_size); ++ ++ } ++ denoiser->yv12_mc_running_avg.flags = 0; ++ ++ if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width, ++ height, VP8BORDERINPIXELS) < 0) ++ { ++ vp8_denoiser_free(denoiser); ++ return 1; ++ } ++ ++ vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0, ++ denoiser->yv12_mc_running_avg.frame_size); ++ return 0; + } + + void vp8_denoiser_free(VP8_DENOISER *denoiser) + { +- assert(denoiser); +- vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg); +- vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg); ++ int i; ++ assert(denoiser); ++ ++ for (i = 0; i < MAX_REF_FRAMES ; i++) ++ { ++ vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg[i]); ++ } ++ vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg); + } + ++ + void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, + MACROBLOCK *x, + unsigned int best_sse, + unsigned int zero_mv_sse, + int recon_yoffset, +- int recon_uvoffset) { +- int mv_row; +- int mv_col; +- unsigned int motion_magnitude2; +- // Motion compensate the running average. +- best_sse = denoiser_motion_compensate(&denoiser->yv12_running_avg, +- &denoiser->yv12_mc_running_avg, +- x, +- best_sse, +- zero_mv_sse, +- recon_yoffset, +- recon_uvoffset); +- +- mv_row = x->e_mbd.best_sse_mv.as_mv.row; +- mv_col = x->e_mbd.best_sse_mv.as_mv.col; +- motion_magnitude2 = mv_row*mv_row + mv_col*mv_col; +- if (best_sse > SSE_THRESHOLD || +- motion_magnitude2 > 8 * NOISE_MOTION_THRESHOLD) +- { +- // No filtering of this block since it differs too much from the predictor, +- // or the motion vector magnitude is considered too big. +- vp8_copy_mem16x16(x->thismb, 16, +- denoiser->yv12_running_avg.y_buffer + recon_yoffset, +- denoiser->yv12_running_avg.y_stride); +- return; +- } +- // Filter. +- denoiser_filter(&denoiser->yv12_mc_running_avg, +- &denoiser->yv12_running_avg, +- x, +- motion_magnitude2, +- recon_yoffset, +- recon_uvoffset); ++ int recon_uvoffset) ++{ ++ int mv_row; ++ int mv_col; ++ unsigned int motion_magnitude2; ++ ++ MV_REFERENCE_FRAME frame = x->best_reference_frame; ++ MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame; ++ ++ enum vp8_denoiser_decision decision = FILTER_BLOCK; ++ ++ if (zero_frame) ++ { ++ YV12_BUFFER_CONFIG *src = &denoiser->yv12_running_avg[frame]; ++ YV12_BUFFER_CONFIG *dst = &denoiser->yv12_mc_running_avg; ++ YV12_BUFFER_CONFIG saved_pre,saved_dst; ++ MB_MODE_INFO saved_mbmi; ++ MACROBLOCKD *filter_xd = &x->e_mbd; ++ MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi; ++ int mv_col; ++ int mv_row; ++ int sse_diff = zero_mv_sse - best_sse; ++ ++ saved_mbmi = *mbmi; ++ ++ /* Use the best MV for the compensation. */ ++ mbmi->ref_frame = x->best_reference_frame; ++ mbmi->mode = x->best_sse_inter_mode; ++ mbmi->mv = x->best_sse_mv; ++ mbmi->need_to_clamp_mvs = x->need_to_clamp_best_mvs; ++ mv_col = x->best_sse_mv.as_mv.col; ++ mv_row = x->best_sse_mv.as_mv.row; ++ ++ if (frame == INTRA_FRAME || ++ ((unsigned int)(mv_row *mv_row + mv_col *mv_col) ++ <= NOISE_MOTION_THRESHOLD && ++ sse_diff < (int)SSE_DIFF_THRESHOLD)) ++ { ++ /* ++ * Handle intra blocks as referring to last frame with zero motion ++ * and let the absolute pixel difference affect the filter factor. ++ * Also consider small amount of motion as being random walk due ++ * to noise, if it doesn't mean that we get a much bigger error. ++ * Note that any changes to the mode info only affects the ++ * denoising. ++ */ ++ mbmi->ref_frame = ++ x->best_zeromv_reference_frame; ++ ++ src = &denoiser->yv12_running_avg[zero_frame]; ++ ++ mbmi->mode = ZEROMV; ++ mbmi->mv.as_int = 0; ++ x->best_sse_inter_mode = ZEROMV; ++ x->best_sse_mv.as_int = 0; ++ best_sse = zero_mv_sse; ++ } ++ ++ saved_pre = filter_xd->pre; ++ saved_dst = filter_xd->dst; ++ ++ /* Compensate the running average. */ ++ filter_xd->pre.y_buffer = src->y_buffer + recon_yoffset; ++ filter_xd->pre.u_buffer = src->u_buffer + recon_uvoffset; ++ filter_xd->pre.v_buffer = src->v_buffer + recon_uvoffset; ++ /* Write the compensated running average to the destination buffer. */ ++ filter_xd->dst.y_buffer = dst->y_buffer + recon_yoffset; ++ filter_xd->dst.u_buffer = dst->u_buffer + recon_uvoffset; ++ filter_xd->dst.v_buffer = dst->v_buffer + recon_uvoffset; ++ ++ if (!x->skip) ++ { ++ vp8_build_inter_predictors_mb(filter_xd); ++ } ++ else ++ { ++ vp8_build_inter16x16_predictors_mb(filter_xd, ++ filter_xd->dst.y_buffer, ++ filter_xd->dst.u_buffer, ++ filter_xd->dst.v_buffer, ++ filter_xd->dst.y_stride, ++ filter_xd->dst.uv_stride); ++ } ++ filter_xd->pre = saved_pre; ++ filter_xd->dst = saved_dst; ++ *mbmi = saved_mbmi; ++ ++ } ++ ++ mv_row = x->best_sse_mv.as_mv.row; ++ mv_col = x->best_sse_mv.as_mv.col; ++ motion_magnitude2 = mv_row * mv_row + mv_col * mv_col; ++ if (best_sse > SSE_THRESHOLD || motion_magnitude2 ++ > 8 * NOISE_MOTION_THRESHOLD) ++ { ++ decision = COPY_BLOCK; ++ } ++ ++ if (decision == FILTER_BLOCK) ++ { ++ /* Filter. */ ++ decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg, ++ &denoiser->yv12_running_avg[INTRA_FRAME], ++ x, ++ motion_magnitude2, ++ recon_yoffset, recon_uvoffset); ++ } ++ if (decision == COPY_BLOCK) ++ { ++ /* No filtering of this block; it differs too much from the predictor, ++ * or the motion vector magnitude is considered too big. ++ */ ++ vp8_copy_mem16x16( ++ x->thismb, 16, ++ denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset, ++ denoiser->yv12_running_avg[INTRA_FRAME].y_stride); ++ } + } +diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h +index 343531b..b025f5c 100644 +--- a/vp8/encoder/denoising.h ++++ b/vp8/encoder/denoising.h +@@ -13,10 +13,19 @@ + + #include "block.h" + ++#define SUM_DIFF_THRESHOLD (16 * 16 * 2) ++#define MOTION_MAGNITUDE_THRESHOLD (8*3) ++ ++enum vp8_denoiser_decision ++{ ++ COPY_BLOCK, ++ FILTER_BLOCK ++}; ++ + typedef struct vp8_denoiser + { +- YV12_BUFFER_CONFIG yv12_running_avg; +- YV12_BUFFER_CONFIG yv12_mc_running_avg; ++ YV12_BUFFER_CONFIG yv12_running_avg[MAX_REF_FRAMES]; ++ YV12_BUFFER_CONFIG yv12_mc_running_avg; + } VP8_DENOISER; + + int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height); +@@ -30,4 +39,4 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, + int recon_yoffset, + int recon_uvoffset); + +-#endif // VP8_ENCODER_DENOISING_H_ ++#endif /* VP8_ENCODER_DENOISING_H_ */ +diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c +index 8233873..d1b647b 100644 +--- a/vp8/encoder/encodeframe.c ++++ b/vp8/encoder/encodeframe.c +@@ -33,7 +33,7 @@ + #endif + #include "encodeframe.h" + +-extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; ++extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ; + extern void vp8_calc_ref_frame_costs(int *ref_frame_cost, + int prob_intra, + int prob_last, +@@ -45,7 +45,6 @@ extern void vp8_auto_select_speed(VP8_COMP *cpi); + extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi, + MACROBLOCK *x, + MB_ROW_COMP *mbr_ei, +- int mb_row, + int count); + static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ); + +@@ -77,7 +76,7 @@ static const unsigned char VP8_VAR_OFFS[16]= + }; + + +-// Original activity measure from Tim T's code. ++/* Original activity measure from Tim T's code. */ + static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) + { + unsigned int act; +@@ -100,7 +99,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) + return act; + } + +-// Stub for alternative experimental activity measures. ++/* Stub for alternative experimental activity measures. */ + static unsigned int alt_activity_measure( VP8_COMP *cpi, + MACROBLOCK *x, int use_dc_pred ) + { +@@ -108,8 +107,9 @@ static unsigned int alt_activity_measure( VP8_COMP *cpi, + } + + +-// Measure the activity of the current macroblock +-// What we measure here is TBD so abstracted to this function ++/* Measure the activity of the current macroblock ++ * What we measure here is TBD so abstracted to this function ++ */ + #define ALT_ACT_MEASURE 1 + static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x, + int mb_row, int mb_col) +@@ -120,12 +120,12 @@ static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x, + { + int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); + +- // Or use and alternative. ++ /* Or use and alternative. */ + mb_activity = alt_activity_measure( cpi, x, use_dc_pred ); + } + else + { +- // Original activity measure from Tim T's code. ++ /* Original activity measure from Tim T's code. */ + mb_activity = tt_activity_measure( cpi, x ); + } + +@@ -135,36 +135,36 @@ static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x, + return mb_activity; + } + +-// Calculate an "average" mb activity value for the frame ++/* Calculate an "average" mb activity value for the frame */ + #define ACT_MEDIAN 0 + static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum ) + { + #if ACT_MEDIAN +- // Find median: Simple n^2 algorithm for experimentation ++ /* Find median: Simple n^2 algorithm for experimentation */ + { + unsigned int median; + unsigned int i,j; + unsigned int * sortlist; + unsigned int tmp; + +- // Create a list to sort to ++ /* Create a list to sort to */ + CHECK_MEM_ERROR(sortlist, + vpx_calloc(sizeof(unsigned int), + cpi->common.MBs)); + +- // Copy map to sort list ++ /* Copy map to sort list */ + vpx_memcpy( sortlist, cpi->mb_activity_map, + sizeof(unsigned int) * cpi->common.MBs ); + + +- // Ripple each value down to its correct position ++ /* Ripple each value down to its correct position */ + for ( i = 1; i < cpi->common.MBs; i ++ ) + { + for ( j = i; j > 0; j -- ) + { + if ( sortlist[j] < sortlist[j-1] ) + { +- // Swap values ++ /* Swap values */ + tmp = sortlist[j-1]; + sortlist[j-1] = sortlist[j]; + sortlist[j] = tmp; +@@ -174,7 +174,7 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum ) + } + } + +- // Even number MBs so estimate median as mean of two either side. ++ /* Even number MBs so estimate median as mean of two either side. */ + median = ( 1 + sortlist[cpi->common.MBs >> 1] + + sortlist[(cpi->common.MBs >> 1) + 1] ) >> 1; + +@@ -183,14 +183,14 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum ) + vpx_free(sortlist); + } + #else +- // Simple mean for now ++ /* Simple mean for now */ + cpi->activity_avg = (unsigned int)(activity_sum/cpi->common.MBs); + #endif + + if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN) + cpi->activity_avg = VP8_ACTIVITY_AVG_MIN; + +- // Experimental code: return fixed value normalized for several clips ++ /* Experimental code: return fixed value normalized for several clips */ + if ( ALT_ACT_MEASURE ) + cpi->activity_avg = 100000; + } +@@ -199,7 +199,7 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum ) + #define OUTPUT_NORM_ACT_STATS 0 + + #if USE_ACT_INDEX +-// Calculate and activity index for each mb ++/* Calculate and activity index for each mb */ + static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x ) + { + VP8_COMMON *const cm = & cpi->common; +@@ -214,19 +214,19 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x ) + fprintf(f, "\n%12d\n", cpi->activity_avg ); + #endif + +- // Reset pointers to start of activity map ++ /* Reset pointers to start of activity map */ + x->mb_activity_ptr = cpi->mb_activity_map; + +- // Calculate normalized mb activity number. ++ /* Calculate normalized mb activity number. */ + for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) + { +- // for each macroblock col in image ++ /* for each macroblock col in image */ + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { +- // Read activity from the map ++ /* Read activity from the map */ + act = *(x->mb_activity_ptr); + +- // Calculate a normalized activity number ++ /* Calculate a normalized activity number */ + a = act + 4*cpi->activity_avg; + b = 4*act + cpi->activity_avg; + +@@ -238,7 +238,7 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x ) + #if OUTPUT_NORM_ACT_STATS + fprintf(f, " %6d", *(x->mb_activity_ptr)); + #endif +- // Increment activity map pointers ++ /* Increment activity map pointers */ + x->mb_activity_ptr++; + } + +@@ -255,8 +255,9 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x ) + } + #endif + +-// Loop through all MBs. Note activity of each, average activity and +-// calculate a normalized activity for each ++/* Loop through all MBs. Note activity of each, average activity and ++ * calculate a normalized activity for each ++ */ + static void build_activity_map( VP8_COMP *cpi ) + { + MACROBLOCK *const x = & cpi->mb; +@@ -273,15 +274,15 @@ static void build_activity_map( VP8_COMP *cpi ) + unsigned int mb_activity; + int64_t activity_sum = 0; + +- // for each macroblock row in image ++ /* for each macroblock row in image */ + for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) + { + #if ALT_ACT_MEASURE +- // reset above block coeffs ++ /* reset above block coeffs */ + xd->up_available = (mb_row != 0); + recon_yoffset = (mb_row * recon_y_stride * 16); + #endif +- // for each macroblock col in image ++ /* for each macroblock col in image */ + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + #if ALT_ACT_MEASURE +@@ -289,48 +290,48 @@ static void build_activity_map( VP8_COMP *cpi ) + xd->left_available = (mb_col != 0); + recon_yoffset += 16; + #endif +- //Copy current mb to a buffer ++ /* Copy current mb to a buffer */ + vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); + +- // measure activity ++ /* measure activity */ + mb_activity = mb_activity_measure( cpi, x, mb_row, mb_col ); + +- // Keep frame sum ++ /* Keep frame sum */ + activity_sum += mb_activity; + +- // Store MB level activity details. ++ /* Store MB level activity details. */ + *x->mb_activity_ptr = mb_activity; + +- // Increment activity map pointer ++ /* Increment activity map pointer */ + x->mb_activity_ptr++; + +- // adjust to the next column of source macroblocks ++ /* adjust to the next column of source macroblocks */ + x->src.y_buffer += 16; + } + + +- // adjust to the next row of mbs ++ /* adjust to the next row of mbs */ + x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; + + #if ALT_ACT_MEASURE +- //extend the recon for intra prediction ++ /* extend the recon for intra prediction */ + vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, + xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); + #endif + + } + +- // Calculate an "average" MB activity ++ /* Calculate an "average" MB activity */ + calc_av_activity(cpi, activity_sum); + + #if USE_ACT_INDEX +- // Calculate an activity index number of each mb ++ /* Calculate an activity index number of each mb */ + calc_activity_index( cpi, x ); + #endif + + } + +-// Macroblock activity masking ++/* Macroblock activity masking */ + void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) + { + #if USE_ACT_INDEX +@@ -342,7 +343,7 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) + int64_t b; + int64_t act = *(x->mb_activity_ptr); + +- // Apply the masking to the RD multiplier. ++ /* Apply the masking to the RD multiplier. */ + a = act + (2*cpi->activity_avg); + b = (2*act) + cpi->activity_avg; + +@@ -351,7 +352,7 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) + x->errorperbit += (x->errorperbit==0); + #endif + +- // Activity based Zbin adjustment ++ /* Activity based Zbin adjustment */ + adjust_act_zbin(cpi, x); + } + +@@ -398,7 +399,7 @@ void encode_mb_row(VP8_COMP *cpi, + w = &cpi->bc[1]; + #endif + +- // reset above block coeffs ++ /* reset above block coeffs */ + xd->above_context = cm->above_context; + + xd->up_available = (mb_row != 0); +@@ -406,37 +407,41 @@ void encode_mb_row(VP8_COMP *cpi, + recon_uvoffset = (mb_row * recon_uv_stride * 8); + + cpi->tplist[mb_row].start = *tp; +- //printf("Main mb_row = %d\n", mb_row); ++ /* printf("Main mb_row = %d\n", mb_row); */ + +- // Distance of Mb to the top & bottom edges, specified in 1/8th pel +- // units as they are always compared to values that are in 1/8th pel units ++ /* Distance of Mb to the top & bottom edges, specified in 1/8th pel ++ * units as they are always compared to values that are in 1/8th pel ++ */ + xd->mb_to_top_edge = -((mb_row * 16) << 3); + xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; + +- // Set up limit values for vertical motion vector components +- // to prevent them extending beyond the UMV borders ++ /* Set up limit values for vertical motion vector components ++ * to prevent them extending beyond the UMV borders ++ */ + x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); + x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + + (VP8BORDERINPIXELS - 16); + +- // Set the mb activity pointer to the start of the row. ++ /* Set the mb activity pointer to the start of the row. */ + x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; + +- // for each macroblock col in image ++ /* for each macroblock col in image */ + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + + #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) + *tp = cpi->tok; + #endif +- // Distance of Mb to the left & right edges, specified in +- // 1/8th pel units as they are always compared to values +- // that are in 1/8th pel units ++ /* Distance of Mb to the left & right edges, specified in ++ * 1/8th pel units as they are always compared to values ++ * that are in 1/8th pel units ++ */ + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; + +- // Set up limit values for horizontal motion vector components +- // to prevent them extending beyond the UMV borders ++ /* Set up limit values for horizontal motion vector components ++ * to prevent them extending beyond the UMV borders ++ */ + x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); + x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + + (VP8BORDERINPIXELS - 16); +@@ -449,13 +454,13 @@ void encode_mb_row(VP8_COMP *cpi, + x->rddiv = cpi->RDDIV; + x->rdmult = cpi->RDMULT; + +- //Copy current mb to a buffer ++ /* Copy current mb to a buffer */ + vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); + + #if CONFIG_MULTITHREAD + if (cpi->b_multi_threaded != 0) + { +- *current_mb_col = mb_col - 1; // set previous MB done ++ *current_mb_col = mb_col - 1; /* set previous MB done */ + + if ((mb_col & (nsync - 1)) == 0) + { +@@ -471,11 +476,13 @@ void encode_mb_row(VP8_COMP *cpi, + if(cpi->oxcf.tuning == VP8_TUNE_SSIM) + vp8_activity_masking(cpi, x); + +- // Is segmentation enabled +- // MB level adjustment to quantizer ++ /* Is segmentation enabled */ ++ /* MB level adjustment to quantizer */ + if (xd->segmentation_enabled) + { +- // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) ++ /* Code to set segment id in xd->mbmi.segment_id for current MB ++ * (with range checking) ++ */ + if (cpi->segmentation_map[map_index+mb_col] <= 3) + xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col]; + else +@@ -484,7 +491,8 @@ void encode_mb_row(VP8_COMP *cpi, + vp8cx_mb_init_quantizer(cpi, x, 1); + } + else +- xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default ++ /* Set to Segment 0 by default */ ++ xd->mode_info_context->mbmi.segment_id = 0; + + x->active_ptr = cpi->active_map + map_index + mb_col; + +@@ -514,21 +522,25 @@ void encode_mb_row(VP8_COMP *cpi, + + #endif + +- // Count of last ref frame 0,0 usage +- if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) +- cpi->inter_zz_count ++; +- +- // Special case code for cyclic refresh +- // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode +- // during vp8cx_encode_inter_macroblock()) back into the global segmentation map ++ /* Special case code for cyclic refresh ++ * If cyclic update enabled then copy xd->mbmi.segment_id; (which ++ * may have been updated based on mode during ++ * vp8cx_encode_inter_macroblock()) back into the global ++ * segmentation map ++ */ + if ((cpi->current_layer == 0) && +- (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)) ++ (cpi->cyclic_refresh_mode_enabled && ++ xd->segmentation_enabled)) + { + cpi->segmentation_map[map_index+mb_col] = xd->mode_info_context->mbmi.segment_id; + +- // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh): +- // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0) +- // else mark it as dirty (1). ++ /* If the block has been refreshed mark it as clean (the ++ * magnitude of the -ve influences how long it will be before ++ * we consider another refresh): ++ * Else if it was coded (last frame 0,0) and has not already ++ * been refreshed then mark it as a candidate for cleanup ++ * next time (marked 0) else mark it as dirty (1). ++ */ + if (xd->mode_info_context->mbmi.segment_id) + cpi->cyclic_refresh_map[map_index+mb_col] = -1; + else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) +@@ -551,13 +563,13 @@ void encode_mb_row(VP8_COMP *cpi, + pack_tokens(w, tp_start, tok_count); + } + #endif +- // Increment pointer into gf usage flags structure. ++ /* Increment pointer into gf usage flags structure. */ + x->gf_active_ptr++; + +- // Increment the activity mask pointers. ++ /* Increment the activity mask pointers. */ + x->mb_activity_ptr++; + +- // adjust to the next column of macroblocks ++ /* adjust to the next column of macroblocks */ + x->src.y_buffer += 16; + x->src.u_buffer += 8; + x->src.v_buffer += 8; +@@ -565,16 +577,16 @@ void encode_mb_row(VP8_COMP *cpi, + recon_yoffset += 16; + recon_uvoffset += 8; + +- // Keep track of segment usage ++ /* Keep track of segment usage */ + segment_counts[xd->mode_info_context->mbmi.segment_id] ++; + +- // skip to next mb ++ /* skip to next mb */ + xd->mode_info_context++; + x->partition_info++; + xd->above_context++; + } + +- //extend the recon for intra prediction ++ /* extend the recon for intra prediction */ + vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx], + xd->dst.y_buffer + 16, + xd->dst.u_buffer + 8, +@@ -585,7 +597,7 @@ void encode_mb_row(VP8_COMP *cpi, + *current_mb_col = rightmost_col; + #endif + +- // this is to account for the border ++ /* this is to account for the border */ + xd->mode_info_context++; + x->partition_info++; + } +@@ -596,10 +608,10 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi) + VP8_COMMON *const cm = & cpi->common; + MACROBLOCKD *const xd = & x->e_mbd; + +- // GF active flags data structure ++ /* GF active flags data structure */ + x->gf_active_ptr = (signed char *)cpi->gf_active_flags; + +- // Activity map pointer ++ /* Activity map pointer */ + x->mb_activity_ptr = cpi->mb_activity_map; + + x->act_zbin_adj = 0; +@@ -611,48 +623,42 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi) + + xd->frame_type = cm->frame_type; + +- // reset intra mode contexts ++ /* reset intra mode contexts */ + if (cm->frame_type == KEY_FRAME) + vp8_init_mbmode_probs(cm); + +- // Copy data over into macro block data structures. ++ /* Copy data over into macro block data structures. */ + x->src = * cpi->Source; + xd->pre = cm->yv12_fb[cm->lst_fb_idx]; + xd->dst = cm->yv12_fb[cm->new_fb_idx]; + +- // set up frame for intra coded blocks ++ /* set up frame for intra coded blocks */ + vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]); + + vp8_build_block_offsets(x); + +- vp8_setup_block_dptrs(&x->e_mbd); +- +- vp8_setup_block_ptrs(x); +- + xd->mode_info_context->mbmi.mode = DC_PRED; + xd->mode_info_context->mbmi.uv_mode = DC_PRED; + + xd->left_context = &cm->left_context; + +- vp8_zero(cpi->count_mb_ref_frame_usage) +- vp8_zero(cpi->ymode_count) +- vp8_zero(cpi->uv_mode_count) +- + x->mvc = cm->fc.mvc; + + vpx_memset(cm->above_context, 0, + sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols); + +- // Special case treatment when GF and ARF are not sensible options for reference +- if (cpi->ref_frame_flags == VP8_LAST_FLAG) ++ /* Special case treatment when GF and ARF are not sensible options ++ * for reference ++ */ ++ if (cpi->ref_frame_flags == VP8_LAST_FRAME) + vp8_calc_ref_frame_costs(x->ref_frame_cost, + cpi->prob_intra_coded,255,128); + else if ((cpi->oxcf.number_of_layers > 1) && +- (cpi->ref_frame_flags == VP8_GOLD_FLAG)) ++ (cpi->ref_frame_flags == VP8_GOLD_FRAME)) + vp8_calc_ref_frame_costs(x->ref_frame_cost, + cpi->prob_intra_coded,1,255); + else if ((cpi->oxcf.number_of_layers > 1) && +- (cpi->ref_frame_flags == VP8_ALT_FLAG)) ++ (cpi->ref_frame_flags == VP8_ALTR_FRAME)) + vp8_calc_ref_frame_costs(x->ref_frame_cost, + cpi->prob_intra_coded,1,1); + else +@@ -664,6 +670,43 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi) + xd->fullpixel_mask = 0xffffffff; + if(cm->full_pixel) + xd->fullpixel_mask = 0xfffffff8; ++ ++ vp8_zero(x->coef_counts); ++ vp8_zero(x->ymode_count); ++ vp8_zero(x->uv_mode_count) ++ x->prediction_error = 0; ++ x->intra_error = 0; ++ vp8_zero(x->count_mb_ref_frame_usage); ++} ++ ++static void sum_coef_counts(MACROBLOCK *x, MACROBLOCK *x_thread) ++{ ++ int i = 0; ++ do ++ { ++ int j = 0; ++ do ++ { ++ int k = 0; ++ do ++ { ++ /* at every context */ ++ ++ /* calc probs and branch cts for this frame only */ ++ int t = 0; /* token/prob index */ ++ ++ do ++ { ++ x->coef_counts [i][j][k][t] += ++ x_thread->coef_counts [i][j][k][t]; ++ } ++ while (++t < ENTROPY_NODES); ++ } ++ while (++k < PREV_COEF_CONTEXTS); ++ } ++ while (++j < COEF_BANDS); ++ } ++ while (++i < BLOCK_TYPES); + } + + void vp8_encode_frame(VP8_COMP *cpi) +@@ -676,7 +719,7 @@ void vp8_encode_frame(VP8_COMP *cpi) + int segment_counts[MAX_MB_SEGMENTS]; + int totalrate; + #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING +- BOOL_CODER * bc = &cpi->bc[1]; // bc[0] is for control partition ++ BOOL_CODER * bc = &cpi->bc[1]; /* bc[0] is for control partition */ + const int num_part = (1 << cm->multi_token_partition); + #endif + +@@ -691,8 +734,8 @@ void vp8_encode_frame(VP8_COMP *cpi) + vp8_auto_select_speed(cpi); + } + +- // Functions setup for all frame types so we can use MC in AltRef +- if (cm->mcomp_filter_type == SIXTAP) ++ /* Functions setup for all frame types so we can use MC in AltRef */ ++ if(!cm->use_bilinear_mc_filter) + { + xd->subpixel_predict = vp8_sixtap_predict4x4; + xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; +@@ -707,43 +750,36 @@ void vp8_encode_frame(VP8_COMP *cpi) + xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; + } + +- // Reset frame count of inter 0,0 motion vector usage. +- cpi->inter_zz_count = 0; +- +- cpi->prediction_error = 0; +- cpi->intra_error = 0; +- cpi->skip_true_count = 0; ++ cpi->mb.skip_true_count = 0; + cpi->tok_count = 0; + + #if 0 +- // Experimental code ++ /* Experimental code */ + cpi->frame_distortion = 0; + cpi->last_mb_distortion = 0; + #endif + + xd->mode_info_context = cm->mi; + +- vp8_zero(cpi->MVcount); +- +- vp8_zero(cpi->coef_counts); ++ vp8_zero(cpi->mb.MVcount); + + vp8cx_frame_init_quantizer(cpi); + +- vp8_initialize_rd_consts(cpi, ++ vp8_initialize_rd_consts(cpi, x, + vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); + + vp8cx_initialize_me_consts(cpi, cm->base_qindex); + + if(cpi->oxcf.tuning == VP8_TUNE_SSIM) + { +- // Initialize encode frame context. ++ /* Initialize encode frame context. */ + init_encode_frame_mb_context(cpi); + +- // Build a frame level activity map ++ /* Build a frame level activity map */ + build_activity_map(cpi); + } + +- // re-init encode frame context. ++ /* re-init encode frame context. */ + init_encode_frame_mb_context(cpi); + + #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING +@@ -768,7 +804,8 @@ void vp8_encode_frame(VP8_COMP *cpi) + { + int i; + +- vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count); ++ vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, ++ cpi->encoding_thread_count); + + for (i = 0; i < cm->mb_rows; i++) + cpi->mt_current_mb_col[i] = -1; +@@ -790,7 +827,7 @@ void vp8_encode_frame(VP8_COMP *cpi) + + encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); + +- // adjust to the next row of mbs ++ /* adjust to the next row of mbs */ + x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; + x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; + x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; +@@ -809,7 +846,8 @@ void vp8_encode_frame(VP8_COMP *cpi) + + for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++) + { +- cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start; ++ cpi->tok_count += (unsigned int) ++ (cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start); + } + + if (xd->segmentation_enabled) +@@ -829,14 +867,50 @@ void vp8_encode_frame(VP8_COMP *cpi) + + for (i = 0; i < cpi->encoding_thread_count; i++) + { ++ int mode_count; ++ int c_idx; + totalrate += cpi->mb_row_ei[i].totalrate; ++ ++ cpi->mb.skip_true_count += cpi->mb_row_ei[i].mb.skip_true_count; ++ ++ for(mode_count = 0; mode_count < VP8_YMODES; mode_count++) ++ cpi->mb.ymode_count[mode_count] += ++ cpi->mb_row_ei[i].mb.ymode_count[mode_count]; ++ ++ for(mode_count = 0; mode_count < VP8_UV_MODES; mode_count++) ++ cpi->mb.uv_mode_count[mode_count] += ++ cpi->mb_row_ei[i].mb.uv_mode_count[mode_count]; ++ ++ for(c_idx = 0; c_idx < MVvals; c_idx++) ++ { ++ cpi->mb.MVcount[0][c_idx] += ++ cpi->mb_row_ei[i].mb.MVcount[0][c_idx]; ++ cpi->mb.MVcount[1][c_idx] += ++ cpi->mb_row_ei[i].mb.MVcount[1][c_idx]; ++ } ++ ++ cpi->mb.prediction_error += ++ cpi->mb_row_ei[i].mb.prediction_error; ++ cpi->mb.intra_error += cpi->mb_row_ei[i].mb.intra_error; ++ ++ for(c_idx = 0; c_idx < MAX_REF_FRAMES; c_idx++) ++ cpi->mb.count_mb_ref_frame_usage[c_idx] += ++ cpi->mb_row_ei[i].mb.count_mb_ref_frame_usage[c_idx]; ++ ++ for(c_idx = 0; c_idx < MAX_ERROR_BINS; c_idx++) ++ cpi->mb.error_bins[c_idx] += ++ cpi->mb_row_ei[i].mb.error_bins[c_idx]; ++ ++ /* add up counts for each thread */ ++ sum_coef_counts(x, &cpi->mb_row_ei[i].mb); + } + + } + else + #endif + { +- // for each macroblock row in image ++ ++ /* for each macroblock row in image */ + for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) + { + vp8_zero(cm->left_context) +@@ -847,13 +921,13 @@ void vp8_encode_frame(VP8_COMP *cpi) + + encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); + +- // adjust to the next row of mbs ++ /* adjust to the next row of mbs */ + x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; + x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; + x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; + } + +- cpi->tok_count = tp - cpi->tok; ++ cpi->tok_count = (unsigned int)(tp - cpi->tok); + } + + #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING +@@ -873,12 +947,13 @@ void vp8_encode_frame(VP8_COMP *cpi) + + + // Work out the segment probabilities if segmentation is enabled +- if (xd->segmentation_enabled) ++ // and needs to be updated ++ if (xd->segmentation_enabled && xd->update_mb_segmentation_map) + { + int tot_count; + int i; + +- // Set to defaults ++ /* Set to defaults */ + vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs)); + + tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3]; +@@ -899,7 +974,7 @@ void vp8_encode_frame(VP8_COMP *cpi) + if (tot_count > 0) + xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count; + +- // Zero probabilities not allowed ++ /* Zero probabilities not allowed */ + for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++) + { + if (xd->mb_segment_tree_probs[i] == 0) +@@ -908,10 +983,10 @@ void vp8_encode_frame(VP8_COMP *cpi) + } + } + +- // 256 rate units to the bit +- cpi->projected_frame_size = totalrate >> 8; // projected_frame_size in units of BYTES ++ /* projected_frame_size in units of BYTES */ ++ cpi->projected_frame_size = totalrate >> 8; + +- // Make a note of the percentage MBs coded Intra. ++ /* Make a note of the percentage MBs coded Intra. */ + if (cm->frame_type == KEY_FRAME) + { + cpi->this_frame_percent_intra = 100; +@@ -920,50 +995,23 @@ void vp8_encode_frame(VP8_COMP *cpi) + { + int tot_modes; + +- tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME] +- + cpi->count_mb_ref_frame_usage[LAST_FRAME] +- + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME] +- + cpi->count_mb_ref_frame_usage[ALTREF_FRAME]; ++ tot_modes = cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] ++ + cpi->mb.count_mb_ref_frame_usage[LAST_FRAME] ++ + cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME] ++ + cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME]; + + if (tot_modes) +- cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes; +- +- } +- +-#if 0 +- { +- int cnt = 0; +- int flag[2] = {0, 0}; +- +- for (cnt = 0; cnt < MVPcount; cnt++) +- { +- if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt]) +- { +- flag[0] = 1; +- vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount); +- break; +- } +- } +- +- for (cnt = 0; cnt < MVPcount; cnt++) +- { +- if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt]) +- { +- flag[1] = 1; +- vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount); +- break; +- } +- } ++ cpi->this_frame_percent_intra = ++ cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes; + +- if (flag[0] || flag[1]) +- vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag); + } +-#endif + + #if ! CONFIG_REALTIME_ONLY +- // Adjust the projected reference frame usage probability numbers to reflect +- // what we have just seen. This may be useful when we make multiple iterations +- // of the recode loop rather than continuing to use values from the previous frame. ++ /* Adjust the projected reference frame usage probability numbers to ++ * reflect what we have just seen. This may be useful when we make ++ * multiple iterations of the recode loop rather than continuing to use ++ * values from the previous frame. ++ */ + if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) || + (!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame))) + { +@@ -1017,16 +1065,13 @@ void vp8_build_block_offsets(MACROBLOCK *x) + + vp8_build_block_doffsets(&x->e_mbd); + +- // y blocks ++ /* y blocks */ + x->thismb_ptr = &x->thismb[0]; + for (br = 0; br < 4; br++) + { + for (bc = 0; bc < 4; bc++) + { + BLOCK *this_block = &x->block[block]; +- //this_block->base_src = &x->src.y_buffer; +- //this_block->src_stride = x->src.y_stride; +- //this_block->src = 4 * br * this_block->src_stride + 4 * bc; + this_block->base_src = &x->thismb_ptr; + this_block->src_stride = 16; + this_block->src = 4 * br * 16 + 4 * bc; +@@ -1034,7 +1079,7 @@ void vp8_build_block_offsets(MACROBLOCK *x) + } + } + +- // u blocks ++ /* u blocks */ + for (br = 0; br < 2; br++) + { + for (bc = 0; bc < 2; bc++) +@@ -1047,7 +1092,7 @@ void vp8_build_block_offsets(MACROBLOCK *x) + } + } + +- // v blocks ++ /* v blocks */ + for (br = 0; br < 2; br++) + { + for (bc = 0; bc < 2; bc++) +@@ -1087,13 +1132,14 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x) + + #endif + +- ++cpi->ymode_count[m]; +- ++cpi->uv_mode_count[uvm]; ++ ++x->ymode_count[m]; ++ ++x->uv_mode_count[uvm]; + + } + +-// Experimental stub function to create a per MB zbin adjustment based on +-// some previously calculated measure of MB activity. ++/* Experimental stub function to create a per MB zbin adjustment based on ++ * some previously calculated measure of MB activity. ++ */ + static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ) + { + #if USE_ACT_INDEX +@@ -1103,7 +1149,7 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ) + int64_t b; + int64_t act = *(x->mb_activity_ptr); + +- // Apply the masking to the RD multiplier. ++ /* Apply the masking to the RD multiplier. */ + a = act + 4*cpi->activity_avg; + b = 4*act + cpi->activity_avg; + +@@ -1114,15 +1160,16 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ) + #endif + } + +-int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ++int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, ++ TOKENEXTRA **t) + { + MACROBLOCKD *xd = &x->e_mbd; + int rate; + + if (cpi->sf.RD && cpi->compressor_speed != 2) +- vp8_rd_pick_intra_mode(cpi, x, &rate); ++ vp8_rd_pick_intra_mode(x, &rate); + else +- vp8_pick_intra_mode(cpi, x, &rate); ++ vp8_pick_intra_mode(x, &rate); + + if(cpi->oxcf.tuning == VP8_TUNE_SSIM) + { +@@ -1139,7 +1186,7 @@ int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) + + sum_intra_stats(cpi, x); + +- vp8_tokenize_mb(cpi, &x->e_mbd, t); ++ vp8_tokenize_mb(cpi, x, t); + + if (xd->mode_info_context->mbmi.mode != B_PRED) + vp8_inverse_transform_mby(xd); +@@ -1176,25 +1223,27 @@ int vp8cx_encode_inter_macroblock + x->encode_breakout = cpi->oxcf.encode_breakout; + + #if CONFIG_TEMPORAL_DENOISING +- // Reset the best sse mode/mv for each macroblock. +- x->e_mbd.best_sse_inter_mode = 0; +- x->e_mbd.best_sse_mv.as_int = 0; +- x->e_mbd.need_to_clamp_best_mvs = 0; ++ /* Reset the best sse mode/mv for each macroblock. */ ++ x->best_reference_frame = INTRA_FRAME; ++ x->best_zeromv_reference_frame = INTRA_FRAME; ++ x->best_sse_inter_mode = 0; ++ x->best_sse_mv.as_int = 0; ++ x->need_to_clamp_best_mvs = 0; + #endif + + if (cpi->sf.RD) + { +- int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled; ++ int zbin_mode_boost_enabled = x->zbin_mode_boost_enabled; + + /* Are we using the fast quantizer for the mode selection? */ + if(cpi->sf.use_fastquant_for_pick) + { +- cpi->mb.quantize_b = vp8_fast_quantize_b; +- cpi->mb.quantize_b_pair = vp8_fast_quantize_b_pair; ++ x->quantize_b = vp8_fast_quantize_b; ++ x->quantize_b_pair = vp8_fast_quantize_b_pair; + + /* the fast quantizer does not use zbin_extra, so + * do not recalculate */ +- cpi->zbin_mode_boost_enabled = 0; ++ x->zbin_mode_boost_enabled = 0; + } + vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, + &distortion, &intra_error); +@@ -1202,12 +1251,12 @@ int vp8cx_encode_inter_macroblock + /* switch back to the regular quantizer for the encode */ + if (cpi->sf.improved_quant) + { +- cpi->mb.quantize_b = vp8_regular_quantize_b; +- cpi->mb.quantize_b_pair = vp8_regular_quantize_b_pair; ++ x->quantize_b = vp8_regular_quantize_b; ++ x->quantize_b_pair = vp8_regular_quantize_b_pair; + } + + /* restore cpi->zbin_mode_boost_enabled */ +- cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled; ++ x->zbin_mode_boost_enabled = zbin_mode_boost_enabled; + + } + else +@@ -1216,28 +1265,28 @@ int vp8cx_encode_inter_macroblock + &distortion, &intra_error, mb_row, mb_col); + } + +- cpi->prediction_error += distortion; +- cpi->intra_error += intra_error; ++ x->prediction_error += distortion; ++ x->intra_error += intra_error; + + if(cpi->oxcf.tuning == VP8_TUNE_SSIM) + { +- // Adjust the zbin based on this MB rate. ++ /* Adjust the zbin based on this MB rate. */ + adjust_act_zbin( cpi, x ); + } + + #if 0 +- // Experimental RD code ++ /* Experimental RD code */ + cpi->frame_distortion += distortion; + cpi->last_mb_distortion = distortion; + #endif + +- // MB level adjutment to quantizer setup ++ /* MB level adjutment to quantizer setup */ + if (xd->segmentation_enabled) + { +- // If cyclic update enabled ++ /* If cyclic update enabled */ + if (cpi->current_layer == 0 && cpi->cyclic_refresh_mode_enabled) + { +- // Clear segment_id back to 0 if not coded (last frame 0,0) ++ /* Clear segment_id back to 0 if not coded (last frame 0,0) */ + if ((xd->mode_info_context->mbmi.segment_id == 1) && + ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV))) + { +@@ -1250,24 +1299,25 @@ int vp8cx_encode_inter_macroblock + } + + { +- // Experimental code. Special case for gf and arf zeromv modes. +- // Increase zbin size to supress noise +- cpi->zbin_mode_boost = 0; +- if (cpi->zbin_mode_boost_enabled) ++ /* Experimental code. Special case for gf and arf zeromv modes. ++ * Increase zbin size to supress noise ++ */ ++ x->zbin_mode_boost = 0; ++ if (x->zbin_mode_boost_enabled) + { + if ( xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME ) + { + if (xd->mode_info_context->mbmi.mode == ZEROMV) + { + if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) +- cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; ++ x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; + else +- cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; ++ x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; + } + else if (xd->mode_info_context->mbmi.mode == SPLITMV) +- cpi->zbin_mode_boost = 0; ++ x->zbin_mode_boost = 0; + else +- cpi->zbin_mode_boost = MV_ZBIN_BOOST; ++ x->zbin_mode_boost = MV_ZBIN_BOOST; + } + } + +@@ -1277,7 +1327,7 @@ int vp8cx_encode_inter_macroblock + vp8_update_zbin_extra(cpi, x); + } + +- cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++; ++ x->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++; + + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) + { +@@ -1322,7 +1372,7 @@ int vp8cx_encode_inter_macroblock + + if (!x->skip) + { +- vp8_tokenize_mb(cpi, xd, t); ++ vp8_tokenize_mb(cpi, x, t); + + if (xd->mode_info_context->mbmi.mode != B_PRED) + vp8_inverse_transform_mby(xd); +@@ -1339,12 +1389,12 @@ int vp8cx_encode_inter_macroblock + + if (cpi->common.mb_no_coeff_skip) + { +- cpi->skip_true_count ++; ++ x->skip_true_count ++; + vp8_fix_contexts(xd); + } + else + { +- vp8_stuff_mb(cpi, xd, t); ++ vp8_stuff_mb(cpi, x, t); + } + } + +diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c +index 1f445b7..340dd63 100644 +--- a/vp8/encoder/encodeintra.c ++++ b/vp8/encoder/encodeintra.c +@@ -54,10 +54,13 @@ void vp8_encode_intra4x4block(MACROBLOCK *x, int ib) + BLOCKD *b = &x->e_mbd.block[ib]; + BLOCK *be = &x->block[ib]; + int dst_stride = x->e_mbd.dst.y_stride; +- unsigned char *base_dst = x->e_mbd.dst.y_buffer; ++ unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; ++ unsigned char *Above = dst - dst_stride; ++ unsigned char *yleft = dst - 1; ++ unsigned char top_left = Above[-1]; + +- vp8_intra4x4_predict(base_dst + b->offset, dst_stride, +- b->bmi.as_mode, b->predictor, 16); ++ vp8_intra4x4_predict(Above, yleft, dst_stride, b->bmi.as_mode, ++ b->predictor, 16, top_left); + + vp8_subtract_b(be, b, 16); + +@@ -67,14 +70,11 @@ void vp8_encode_intra4x4block(MACROBLOCK *x, int ib) + + if (*b->eob > 1) + { +- vp8_short_idct4x4llm(b->dqcoeff, +- b->predictor, 16, base_dst + b->offset, dst_stride); ++ vp8_short_idct4x4llm(b->dqcoeff, b->predictor, 16, dst, dst_stride); + } + else + { +- vp8_dc_only_idct_add +- (b->dqcoeff[0], b->predictor, 16, base_dst + b->offset, +- dst_stride); ++ vp8_dc_only_idct_add(b->dqcoeff[0], b->predictor, 16, dst, dst_stride); + } + } + +diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c +index f89e4f7..7d494f2 100644 +--- a/vp8/encoder/encodemb.c ++++ b/vp8/encoder/encodemb.c +@@ -137,10 +137,10 @@ void vp8_transform_intra_mby(MACROBLOCK *x) + &x->block[i].coeff[0], 32); + } + +- // build dc block from 16 y dc values ++ /* build dc block from 16 y dc values */ + build_dcblock(x); + +- // do 2nd order transform on the dc block ++ /* do 2nd order transform on the dc block */ + x->short_walsh4x4(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); + +@@ -157,7 +157,7 @@ static void transform_mb(MACROBLOCK *x) + &x->block[i].coeff[0], 32); + } + +- // build dc block from 16 y dc values ++ /* build dc block from 16 y dc values */ + if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + build_dcblock(x); + +@@ -167,7 +167,7 @@ static void transform_mb(MACROBLOCK *x) + &x->block[i].coeff[0], 16); + } + +- // do 2nd order transform on the dc block ++ /* do 2nd order transform on the dc block */ + if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + x->short_walsh4x4(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); +@@ -185,7 +185,7 @@ static void transform_mby(MACROBLOCK *x) + &x->block[i].coeff[0], 32); + } + +- // build dc block from 16 y dc values ++ /* build dc block from 16 y dc values */ + if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + { + build_dcblock(x); +@@ -208,7 +208,7 @@ struct vp8_token_state{ + short qc; + }; + +-// TODO: experiments to find optimal multiple numbers ++/* TODO: experiments to find optimal multiple numbers */ + #define Y1_RD_MULT 4 + #define UV_RD_MULT 2 + #define Y2_RD_MULT 16 +diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c +index 0145f6d..0c43d06 100644 +--- a/vp8/encoder/encodemv.c ++++ b/vp8/encoder/encodemv.c +@@ -29,15 +29,15 @@ static void encode_mvcomponent( + const vp8_prob *p = mvc->prob; + const int x = v < 0 ? -v : v; + +- if (x < mvnum_short) // Small ++ if (x < mvnum_short) /* Small */ + { + vp8_write(w, 0, p [mvpis_short]); + vp8_treed_write(w, vp8_small_mvtree, p + MVPshort, x, 3); + + if (!x) +- return; // no sign bit ++ return; /* no sign bit */ + } +- else // Large ++ else /* Large */ + { + int i = 0; + +@@ -100,7 +100,7 @@ void vp8_encode_motion_vector(vp8_writer *w, const MV *mv, const MV_CONTEXT *mvc + static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc) + { + const vp8_prob *p = mvc->prob; +- const int x = v; //v<0? -v:v; ++ const int x = v; + unsigned int cost; + + if (x < mvnum_short) +@@ -132,12 +132,12 @@ static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc) + cost += vp8_cost_bit(p [MVPbits + 3], (x >> 3) & 1); + } + +- return cost; // + vp8_cost_bit( p [MVPsign], v < 0); ++ return cost; /* + vp8_cost_bit( p [MVPsign], v < 0); */ + } + + void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int mvc_flag[2]) + { +- int i = 1; //-mv_max; ++ int i = 1; + unsigned int cost0 = 0; + unsigned int cost1 = 0; + +@@ -151,7 +151,6 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m + + do + { +- //mvcost [0] [i] = cost_mvcomponent( i, &mvc[0]); + cost0 = cost_mvcomponent(i, &mvc[0]); + + mvcost [0] [i] = cost0 + vp8_cost_zero(mvc[0].prob[MVPsign]); +@@ -168,7 +167,6 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m + + do + { +- //mvcost [1] [i] = cost_mvcomponent( i, mvc[1]); + cost1 = cost_mvcomponent(i, &mvc[1]); + + mvcost [1] [i] = cost1 + vp8_cost_zero(mvc[1].prob[MVPsign]); +@@ -179,10 +177,10 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m + } + + +-// Motion vector probability table update depends on benefit. +-// Small correction allows for the fact that an update to an MV probability +-// may have benefit in subsequent frames as well as the current one. +- ++/* Motion vector probability table update depends on benefit. ++ * Small correction allows for the fact that an update to an MV probability ++ * may have benefit in subsequent frames as well as the current one. ++ */ + #define MV_PROB_UPDATE_CORRECTION -1 + + +@@ -254,22 +252,22 @@ static void write_component_probs( + vp8_zero(short_bct) + + +- //j=0 ++ /* j=0 */ + { + const int c = events [mv_max]; + +- is_short_ct [0] += c; // Short vector +- short_ct [0] += c; // Magnitude distribution ++ is_short_ct [0] += c; /* Short vector */ ++ short_ct [0] += c; /* Magnitude distribution */ + } + +- //j: 1 ~ mv_max (1023) ++ /* j: 1 ~ mv_max (1023) */ + { + int j = 1; + + do + { +- const int c1 = events [mv_max + j]; //positive +- const int c2 = events [mv_max - j]; //negative ++ const int c1 = events [mv_max + j]; /* positive */ ++ const int c2 = events [mv_max - j]; /* negative */ + const int c = c1 + c2; + int a = j; + +@@ -278,13 +276,13 @@ static void write_component_probs( + + if (a < mvnum_short) + { +- is_short_ct [0] += c; // Short vector +- short_ct [a] += c; // Magnitude distribution ++ is_short_ct [0] += c; /* Short vector */ ++ short_ct [a] += c; /* Magnitude distribution */ + } + else + { + int k = mvlong_width - 1; +- is_short_ct [1] += c; // Long vector ++ is_short_ct [1] += c; /* Long vector */ + + /* bit 3 not always encoded. */ + do +@@ -296,43 +294,6 @@ static void write_component_probs( + while (++j <= mv_max); + } + +- /* +- { +- int j = -mv_max; +- do +- { +- +- const int c = events [mv_max + j]; +- int a = j; +- +- if( j < 0) +- { +- sign_ct [1] += c; +- a = -j; +- } +- else if( j) +- sign_ct [0] += c; +- +- if( a < mvnum_short) +- { +- is_short_ct [0] += c; // Short vector +- short_ct [a] += c; // Magnitude distribution +- } +- else +- { +- int k = mvlong_width - 1; +- is_short_ct [1] += c; // Long vector +- +- // bit 3 not always encoded. +- +- do +- bit_ct [k] [(a >> k) & 1] += c; +- while( --k >= 0); +- } +- } while( ++j <= mv_max); +- } +- */ +- + calc_prob(Pnew + mvpis_short, is_short_ct); + + calc_prob(Pnew + MVPsign, sign_ct); +@@ -402,10 +363,12 @@ void vp8_write_mvprobs(VP8_COMP *cpi) + active_section = 4; + #endif + write_component_probs( +- w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0], cpi->MVcount[0], 0, &flags[0] ++ w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0], ++ cpi->mb.MVcount[0], 0, &flags[0] + ); + write_component_probs( +- w, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1], cpi->MVcount[1], 1, &flags[1] ++ w, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1], ++ cpi->mb.MVcount[1], 1, &flags[1] + ); + + if (flags[0] || flags[1]) +diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c +index 2a2cb2f..d4b17ce 100644 +--- a/vp8/encoder/ethreading.c ++++ b/vp8/encoder/ethreading.c +@@ -17,12 +17,6 @@ + + #if CONFIG_MULTITHREAD + +-extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, +- TOKENEXTRA **t, +- int recon_yoffset, int recon_uvoffset, +- int mb_row, int mb_col); +-extern int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, +- TOKENEXTRA **t); + extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip); + + extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); +@@ -39,7 +33,7 @@ static THREAD_FUNCTION thread_loopfilter(void *p_data) + + if (sem_wait(&cpi->h_event_start_lpf) == 0) + { +- if (cpi->b_multi_threaded == 0) // we're shutting down ++ if (cpi->b_multi_threaded == 0) /* we're shutting down */ + break; + + vp8_loopfilter_frame(cpi, cm); +@@ -59,17 +53,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) + MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2); + ENTROPY_CONTEXT_PLANES mb_row_left_context; + +- const int nsync = cpi->mt_sync_range; +- //printf("Started thread %d\n", ithread); +- + while (1) + { + if (cpi->b_multi_threaded == 0) + break; + +- //if(WaitForSingleObject(cpi->h_event_mbrencoding[ithread], INFINITE) == WAIT_OBJECT_0) + if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) + { ++ const int nsync = cpi->mt_sync_range; + VP8_COMMON *cm = &cpi->common; + int mb_row; + MACROBLOCK *x = &mbri->mb; +@@ -83,7 +74,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) + int *segment_counts = mbri->segment_counts; + int *totalrate = &mbri->totalrate; + +- if (cpi->b_multi_threaded == 0) // we're shutting down ++ if (cpi->b_multi_threaded == 0) /* we're shutting down */ + break; + + for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) +@@ -108,7 +99,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) + + last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; + +- // reset above block coeffs ++ /* reset above block coeffs */ + xd->above_context = cm->above_context; + xd->left_context = &mb_row_left_context; + +@@ -118,10 +109,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) + recon_yoffset = (mb_row * recon_y_stride * 16); + recon_uvoffset = (mb_row * recon_uv_stride * 8); + +- // Set the mb activity pointer to the start of the row. ++ /* Set the mb activity pointer to the start of the row. */ + x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; + +- // for each macroblock col in image ++ /* for each macroblock col in image */ + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + *current_mb_col = mb_col - 1; +@@ -139,14 +130,18 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) + tp = tp_start; + #endif + +- // Distance of Mb to the various image edges. +- // These specified to 8th pel as they are always compared to values that are in 1/8th pel units ++ /* Distance of Mb to the various image edges. ++ * These specified to 8th pel as they are always compared ++ * to values that are in 1/8th pel units ++ */ + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; + xd->mb_to_top_edge = -((mb_row * 16) << 3); + xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; + +- // Set up limit values for motion vectors used to prevent them extending outside the UMV borders ++ /* Set up limit values for motion vectors used to prevent ++ * them extending outside the UMV borders ++ */ + x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); + x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); + x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); +@@ -160,17 +155,19 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) + x->rddiv = cpi->RDDIV; + x->rdmult = cpi->RDMULT; + +- //Copy current mb to a buffer ++ /* Copy current mb to a buffer */ + vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); + + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) + vp8_activity_masking(cpi, x); + +- // Is segmentation enabled +- // MB level adjustment to quantizer ++ /* Is segmentation enabled */ ++ /* MB level adjustment to quantizer */ + if (xd->segmentation_enabled) + { +- // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) ++ /* Code to set segment id in xd->mbmi.segment_id for ++ * current MB (with range checking) ++ */ + if (cpi->segmentation_map[map_index + mb_col] <= 3) + xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index + mb_col]; + else +@@ -179,7 +176,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) + vp8cx_mb_init_quantizer(cpi, x, 1); + } + else +- xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default ++ /* Set to Segment 0 by default */ ++ xd->mode_info_context->mbmi.segment_id = 0; + + x->active_ptr = cpi->active_map + map_index + mb_col; + +@@ -209,21 +207,28 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) + + #endif + +- // Count of last ref frame 0,0 usage +- if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) +- cpi->inter_zz_count++; +- +- // Special case code for cyclic refresh +- // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode +- // during vp8cx_encode_inter_macroblock()) back into the global segmentation map +- if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled) ++ /* Special case code for cyclic refresh ++ * If cyclic update enabled then copy ++ * xd->mbmi.segment_id; (which may have been updated ++ * based on mode during ++ * vp8cx_encode_inter_macroblock()) back into the ++ * global segmentation map ++ */ ++ if ((cpi->current_layer == 0) && ++ (cpi->cyclic_refresh_mode_enabled && ++ xd->segmentation_enabled)) + { + const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; + cpi->segmentation_map[map_index + mb_col] = mbmi->segment_id; + +- // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh): +- // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0) +- // else mark it as dirty (1). ++ /* If the block has been refreshed mark it as clean ++ * (the magnitude of the -ve influences how long it ++ * will be before we consider another refresh): ++ * Else if it was coded (last frame 0,0) and has ++ * not already been refreshed then mark it as a ++ * candidate for cleanup next time (marked 0) else ++ * mark it as dirty (1). ++ */ + if (mbmi->segment_id) + cpi->cyclic_refresh_map[map_index + mb_col] = -1; + else if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) +@@ -246,13 +251,13 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) + #else + cpi->tplist[mb_row].stop = tp; + #endif +- // Increment pointer into gf usage flags structure. ++ /* Increment pointer into gf usage flags structure. */ + x->gf_active_ptr++; + +- // Increment the activity mask pointers. ++ /* Increment the activity mask pointers. */ + x->mb_activity_ptr++; + +- // adjust to the next column of macroblocks ++ /* adjust to the next column of macroblocks */ + x->src.y_buffer += 16; + x->src.u_buffer += 8; + x->src.v_buffer += 8; +@@ -260,10 +265,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) + recon_yoffset += 16; + recon_uvoffset += 8; + +- // Keep track of segment usage ++ /* Keep track of segment usage */ + segment_counts[xd->mode_info_context->mbmi.segment_id]++; + +- // skip to next mb ++ /* skip to next mb */ + xd->mode_info_context++; + x->partition_info++; + xd->above_context++; +@@ -276,7 +281,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) + + *current_mb_col = mb_col + nsync; + +- // this is to account for the border ++ /* this is to account for the border */ + xd->mode_info_context++; + x->partition_info++; + +@@ -296,7 +301,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) + } + } + +- //printf("exit thread %d\n", ithread); ++ /* printf("exit thread %d\n", ithread); */ + return 0; + } + +@@ -336,21 +341,16 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) + z->src.v_buffer = x->src.v_buffer; + */ + ++ z->mvcost[0] = x->mvcost[0]; ++ z->mvcost[1] = x->mvcost[1]; ++ z->mvsadcost[0] = x->mvsadcost[0]; ++ z->mvsadcost[1] = x->mvsadcost[1]; + +- vpx_memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts)); +- z->mvcost[0] = &z->mvcosts[0][mv_max+1]; +- z->mvcost[1] = &z->mvcosts[1][mv_max+1]; +- z->mvsadcost[0] = &z->mvsadcosts[0][mvfp_max+1]; +- z->mvsadcost[1] = &z->mvsadcosts[1][mvfp_max+1]; +- +- +- vpx_memcpy(z->token_costs, x->token_costs, sizeof(x->token_costs)); +- vpx_memcpy(z->inter_bmode_costs, x->inter_bmode_costs, sizeof(x->inter_bmode_costs)); +- //memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts)); +- //memcpy(z->mvcost, x->mvcost, sizeof(x->mvcost)); +- vpx_memcpy(z->mbmode_cost, x->mbmode_cost, sizeof(x->mbmode_cost)); +- vpx_memcpy(z->intra_uv_mode_cost, x->intra_uv_mode_cost, sizeof(x->intra_uv_mode_cost)); +- vpx_memcpy(z->bmode_costs, x->bmode_costs, sizeof(x->bmode_costs)); ++ z->token_costs = x->token_costs; ++ z->inter_bmode_costs = x->inter_bmode_costs; ++ z->mbmode_cost = x->mbmode_cost; ++ z->intra_uv_mode_cost = x->intra_uv_mode_cost; ++ z->bmode_costs = x->bmode_costs; + + for (i = 0; i < 25; i++) + { +@@ -358,17 +358,15 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) + z->block[i].quant_fast = x->block[i].quant_fast; + z->block[i].quant_shift = x->block[i].quant_shift; + z->block[i].zbin = x->block[i].zbin; +- z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; ++ z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; + z->block[i].round = x->block[i].round; +- z->q_index = x->q_index; +- z->act_zbin_adj = x->act_zbin_adj; +- z->last_act_zbin_adj = x->last_act_zbin_adj; +- /* +- z->block[i].src = x->block[i].src; +- */ +- z->block[i].src_stride = x->block[i].src_stride; ++ z->block[i].src_stride = x->block[i].src_stride; + } + ++ z->q_index = x->q_index; ++ z->act_zbin_adj = x->act_zbin_adj; ++ z->last_act_zbin_adj = x->last_act_zbin_adj; ++ + { + MACROBLOCKD *xd = &x->e_mbd; + MACROBLOCKD *zd = &z->e_mbd; +@@ -400,9 +398,11 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) + zd->subpixel_predict16x16 = xd->subpixel_predict16x16; + zd->segmentation_enabled = xd->segmentation_enabled; + zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; +- vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); ++ vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, ++ sizeof(xd->segment_feature_data)); + +- vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); ++ vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, ++ sizeof(xd->dequant_y1_dc)); + vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); + vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); + vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); +@@ -418,13 +418,23 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) + zd->block[i].dequant = zd->dequant_uv; + zd->block[24].dequant = zd->dequant_y2; + #endif ++ ++ ++ vpx_memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes)); ++ vpx_memcpy(z->rd_thresh_mult, x->rd_thresh_mult, ++ sizeof(x->rd_thresh_mult)); ++ ++ z->zbin_over_quant = x->zbin_over_quant; ++ z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled; ++ z->zbin_mode_boost = x->zbin_mode_boost; ++ ++ vpx_memset(z->error_bins, 0, sizeof(z->error_bins)); + } + } + + void vp8cx_init_mbrthread_data(VP8_COMP *cpi, + MACROBLOCK *x, + MB_ROW_COMP *mbr_ei, +- int mb_row, + int count + ) + { +@@ -432,7 +442,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, + VP8_COMMON *const cm = & cpi->common; + MACROBLOCKD *const xd = & x->e_mbd; + int i; +- (void) mb_row; + + for (i = 0; i < count; i++) + { +@@ -465,10 +474,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, + + vp8_build_block_offsets(mb); + +- vp8_setup_block_dptrs(mbd); +- +- vp8_setup_block_ptrs(mb); +- + mbd->left_context = &cm->left_context; + mb->mvc = cm->fc.mvc; + +@@ -477,10 +482,19 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, + mbd->fullpixel_mask = 0xffffffff; + if(cm->full_pixel) + mbd->fullpixel_mask = 0xfffffff8; ++ ++ vp8_zero(mb->coef_counts); ++ vp8_zero(x->ymode_count); ++ mb->skip_true_count = 0; ++ vp8_zero(mb->MVcount); ++ mb->prediction_error = 0; ++ mb->intra_error = 0; ++ vp8_zero(mb->count_mb_ref_frame_usage); ++ mb->mbs_tested_so_far = 0; + } + } + +-void vp8cx_create_encoder_threads(VP8_COMP *cpi) ++int vp8cx_create_encoder_threads(VP8_COMP *cpi) + { + const VP8_COMMON * cm = &cpi->common; + +@@ -492,6 +506,7 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) + { + int ithread; + int th_count = cpi->oxcf.multi_threaded - 1; ++ int rc = 0; + + /* don't allocate more threads than cores available */ + if (cpi->oxcf.multi_threaded > cm->processor_core_count) +@@ -505,16 +520,17 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) + } + + if(th_count == 0) +- return; +- +- CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count)); +- CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count)); +- CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); ++ return 0; ++ ++ CHECK_MEM_ERROR(cpi->h_encoding_thread, ++ vpx_malloc(sizeof(pthread_t) * th_count)); ++ CHECK_MEM_ERROR(cpi->h_event_start_encoding, ++ vpx_malloc(sizeof(sem_t) * th_count)); ++ CHECK_MEM_ERROR(cpi->mb_row_ei, ++ vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); + vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); + CHECK_MEM_ERROR(cpi->en_thread_data, + vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count)); +- CHECK_MEM_ERROR(cpi->mt_current_mb_col, +- vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows)); + + sem_init(&cpi->h_event_end_encoding, 0, 0); + +@@ -528,16 +544,45 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) + + for (ithread = 0; ithread < th_count; ithread++) + { +- ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread]; ++ ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread]; ++ ++ /* Setup block ptrs and offsets */ ++ vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb); ++ vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd); + + sem_init(&cpi->h_event_start_encoding[ithread], 0, 0); ++ + ethd->ithread = ithread; + ethd->ptr1 = (void *)cpi; + ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread]; + +- pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd); ++ rc = pthread_create(&cpi->h_encoding_thread[ithread], 0, ++ thread_encoding_proc, ethd); ++ if(rc) ++ break; + } + ++ if(rc) ++ { ++ /* shutdown other threads */ ++ cpi->b_multi_threaded = 0; ++ for(--ithread; ithread >= 0; ithread--) ++ { ++ pthread_join(cpi->h_encoding_thread[ithread], 0); ++ sem_destroy(&cpi->h_event_start_encoding[ithread]); ++ } ++ sem_destroy(&cpi->h_event_end_encoding); ++ ++ /* free thread related resources */ ++ vpx_free(cpi->h_event_start_encoding); ++ vpx_free(cpi->h_encoding_thread); ++ vpx_free(cpi->mb_row_ei); ++ vpx_free(cpi->en_thread_data); ++ ++ return -1; ++ } ++ ++ + { + LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data; + +@@ -545,24 +590,47 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) + sem_init(&cpi->h_event_end_lpf, 0, 0); + + lpfthd->ptr1 = (void *)cpi; +- pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, lpfthd); ++ rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, ++ lpfthd); ++ ++ if(rc) ++ { ++ /* shutdown other threads */ ++ cpi->b_multi_threaded = 0; ++ for(--ithread; ithread >= 0; ithread--) ++ { ++ sem_post(&cpi->h_event_start_encoding[ithread]); ++ pthread_join(cpi->h_encoding_thread[ithread], 0); ++ sem_destroy(&cpi->h_event_start_encoding[ithread]); ++ } ++ sem_destroy(&cpi->h_event_end_encoding); ++ sem_destroy(&cpi->h_event_end_lpf); ++ sem_destroy(&cpi->h_event_start_lpf); ++ ++ /* free thread related resources */ ++ vpx_free(cpi->h_event_start_encoding); ++ vpx_free(cpi->h_encoding_thread); ++ vpx_free(cpi->mb_row_ei); ++ vpx_free(cpi->en_thread_data); ++ ++ return -2; ++ } + } + } +- ++ return 0; + } + + void vp8cx_remove_encoder_threads(VP8_COMP *cpi) + { + if (cpi->b_multi_threaded) + { +- //shutdown other threads ++ /* shutdown other threads */ + cpi->b_multi_threaded = 0; + { + int i; + + for (i = 0; i < cpi->encoding_thread_count; i++) + { +- //SetEvent(cpi->h_event_mbrencoding[i]); + sem_post(&cpi->h_event_start_encoding[i]); + pthread_join(cpi->h_encoding_thread[i], 0); + +@@ -577,12 +645,11 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi) + sem_destroy(&cpi->h_event_end_lpf); + sem_destroy(&cpi->h_event_start_lpf); + +- //free thread related resources ++ /* free thread related resources */ + vpx_free(cpi->h_event_start_encoding); + vpx_free(cpi->h_encoding_thread); + vpx_free(cpi->mb_row_ei); + vpx_free(cpi->en_thread_data); +- vpx_free(cpi->mt_current_mb_col); + } + } + #endif +diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c +index 8de1a6a..30bf8a6 100644 +--- a/vp8/encoder/firstpass.c ++++ b/vp8/encoder/firstpass.c +@@ -30,14 +30,12 @@ + #include "encodemv.h" + #include "encodeframe.h" + +-//#define OUTPUT_FPF 1 ++/* #define OUTPUT_FPF 1 */ + + extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi); + extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv); + extern void vp8_alloc_compressor_data(VP8_COMP *cpi); + +-//#define GFQ_ADJUSTMENT (40 + ((15*Q)/10)) +-//#define GFQ_ADJUSTMENT (80 + ((15*Q)/10)) + #define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q] + extern int vp8_kf_boost_qadjustment[QINDEX_RANGE]; + +@@ -77,7 +75,9 @@ static const int cq_level[QINDEX_RANGE] = + + static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame); + +-// Resets the first pass file to the given position using a relative seek from the current position ++/* Resets the first pass file to the given position using a relative seek ++ * from the current position ++ */ + static void reset_fpf_position(VP8_COMP *cpi, FIRSTPASS_STATS *Position) + { + cpi->twopass.stats_in = Position; +@@ -92,14 +92,14 @@ static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame) + return 1; + } + +-// Read frame stats at an offset from the current position ++/* Read frame stats at an offset from the current position */ + static int read_frame_stats( VP8_COMP *cpi, + FIRSTPASS_STATS *frame_stats, + int offset ) + { + FIRSTPASS_STATS * fps_ptr = cpi->twopass.stats_in; + +- // Check legality of offset ++ /* Check legality of offset */ + if ( offset >= 0 ) + { + if ( &fps_ptr[offset] >= cpi->twopass.stats_in_end ) +@@ -136,7 +136,7 @@ static void output_stats(const VP8_COMP *cpi, + pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS); + vpx_codec_pkt_list_add(pktlist, &pkt); + +-// TEMP debug code ++/* TEMP debug code */ + #if OUTPUT_FPF + + { +@@ -257,7 +257,9 @@ static void avg_stats(FIRSTPASS_STATS *section) + section->duration /= section->count; + } + +-// Calculate a modified Error used in distributing bits between easier and harder frames ++/* Calculate a modified Error used in distributing bits between easier ++ * and harder frames ++ */ + static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + { + double av_err = ( cpi->twopass.total_stats.ssim_weighted_pred_err / +@@ -315,7 +317,9 @@ static double simple_weight(YV12_BUFFER_CONFIG *source) + unsigned char *src = source->y_buffer; + double sum_weights = 0.0; + +- // Loop throught the Y plane raw examining levels and creating a weight for the image ++ /* Loop throught the Y plane raw examining levels and creating a weight ++ * for the image ++ */ + i = source->y_height; + do + { +@@ -335,41 +339,52 @@ static double simple_weight(YV12_BUFFER_CONFIG *source) + } + + +-// This function returns the current per frame maximum bitrate target ++/* This function returns the current per frame maximum bitrate target */ + static int frame_max_bits(VP8_COMP *cpi) + { +- // Max allocation for a single frame based on the max section guidelines passed in and how many bits are left ++ /* Max allocation for a single frame based on the max section guidelines ++ * passed in and how many bits are left ++ */ + int max_bits; + +- // For CBR we need to also consider buffer fullness. +- // If we are running below the optimal level then we need to gradually tighten up on max_bits. ++ /* For CBR we need to also consider buffer fullness. ++ * If we are running below the optimal level then we need to gradually ++ * tighten up on max_bits. ++ */ + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + { + double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level); + +- // For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user ++ /* For CBR base this on the target average bits per frame plus the ++ * maximum sedction rate passed in by the user ++ */ + max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); + +- // If our buffer is below the optimum level ++ /* If our buffer is below the optimum level */ + if (buffer_fullness_ratio < 1.0) + { +- // The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4. ++ /* The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4. */ + int min_max_bits = ((cpi->av_per_frame_bandwidth >> 2) < (max_bits >> 2)) ? cpi->av_per_frame_bandwidth >> 2 : max_bits >> 2; + + max_bits = (int)(max_bits * buffer_fullness_ratio); + ++ /* Lowest value we will set ... which should allow the buffer to ++ * refill. ++ */ + if (max_bits < min_max_bits) +- max_bits = min_max_bits; // Lowest value we will set ... which should allow the buffer to refil. ++ max_bits = min_max_bits; + } + } +- // VBR ++ /* VBR */ + else + { +- // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user ++ /* For VBR base this on the bits and frames left plus the ++ * two_pass_vbrmax_section rate passed in by the user ++ */ + max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats.count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); + } + +- // Trap case where we are out of bits ++ /* Trap case where we are out of bits */ + if (max_bits < 0) + max_bits = 0; + +@@ -403,13 +418,13 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, + unsigned char *ref_ptr; + int ref_stride = x->e_mbd.pre.y_stride; + +- // Set up pointers for this macro block raw buffer ++ /* Set up pointers for this macro block raw buffer */ + raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset + + d->offset); + vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride, + (unsigned int *)(raw_motion_err)); + +- // Set up pointers for this macro block recon buffer ++ /* Set up pointers for this macro block recon buffer */ + xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; + ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset ); + vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride, +@@ -430,19 +445,19 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, + int_mv ref_mv_full; + + int tmp_err; +- int step_param = 3; //3; // Dont search over full range for first pass +- int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; //3; ++ int step_param = 3; /* Dont search over full range for first pass */ ++ int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; + int n; + vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; + int new_mv_mode_penalty = 256; + +- // override the default variance function to use MSE ++ /* override the default variance function to use MSE */ + v_fn_ptr.vf = vp8_mse16x16; + +- // Set up pointers for this macro block recon buffer ++ /* Set up pointers for this macro block recon buffer */ + xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; + +- // Initial step/diamond search centred on best mv ++ /* Initial step/diamond search centred on best mv */ + tmp_mv.as_int = 0; + ref_mv_full.as_mv.col = ref_mv->as_mv.col>>3; + ref_mv_full.as_mv.row = ref_mv->as_mv.row>>3; +@@ -459,7 +474,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, + best_mv->col = tmp_mv.as_mv.col; + } + +- // Further step/diamond searches as necessary ++ /* Further step/diamond searches as necessary */ + n = num00; + num00 = 0; + +@@ -520,7 +535,7 @@ void vp8_first_pass(VP8_COMP *cpi) + + zero_ref_mv.as_int = 0; + +- vp8_clear_system_state(); //__asm emms; ++ vp8_clear_system_state(); + + x->src = * cpi->Source; + xd->pre = *lst_yv12; +@@ -530,44 +545,55 @@ void vp8_first_pass(VP8_COMP *cpi) + + xd->mode_info_context = cm->mi; + +- vp8_build_block_offsets(x); +- +- vp8_setup_block_dptrs(&x->e_mbd); ++ if(!cm->use_bilinear_mc_filter) ++ { ++ xd->subpixel_predict = vp8_sixtap_predict4x4; ++ xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; ++ xd->subpixel_predict8x8 = vp8_sixtap_predict8x8; ++ xd->subpixel_predict16x16 = vp8_sixtap_predict16x16; ++ } ++ else ++ { ++ xd->subpixel_predict = vp8_bilinear_predict4x4; ++ xd->subpixel_predict8x4 = vp8_bilinear_predict8x4; ++ xd->subpixel_predict8x8 = vp8_bilinear_predict8x8; ++ xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; ++ } + +- vp8_setup_block_ptrs(x); ++ vp8_build_block_offsets(x); + +- // set up frame new frame for intra coded blocks ++ /* set up frame new frame for intra coded blocks */ + vp8_setup_intra_recon(new_yv12); + vp8cx_frame_init_quantizer(cpi); + +- // Initialise the MV cost table to the defaults +- //if( cm->current_video_frame == 0) +- //if ( 0 ) ++ /* Initialise the MV cost table to the defaults */ + { + int flag[2] = {1, 1}; +- vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); ++ vp8_initialize_rd_consts(cpi, x, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); + vpx_memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); + vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag); + } + +- // for each macroblock row in image ++ /* for each macroblock row in image */ + for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) + { + int_mv best_ref_mv; + + best_ref_mv.as_int = 0; + +- // reset above block coeffs ++ /* reset above block coeffs */ + xd->up_available = (mb_row != 0); + recon_yoffset = (mb_row * recon_y_stride * 16); + recon_uvoffset = (mb_row * recon_uv_stride * 8); + +- // Set up limit values for motion vectors to prevent them extending outside the UMV borders ++ /* Set up limit values for motion vectors to prevent them extending ++ * outside the UMV borders ++ */ + x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); + x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); + + +- // for each macroblock col in image ++ /* for each macroblock col in image */ + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + int this_error; +@@ -579,26 +605,33 @@ void vp8_first_pass(VP8_COMP *cpi) + xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset; + xd->left_available = (mb_col != 0); + +- //Copy current mb to a buffer ++ /* Copy current mb to a buffer */ + vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); + +- // do intra 16x16 prediction ++ /* do intra 16x16 prediction */ + this_error = vp8_encode_intra(cpi, x, use_dc_pred); + +- // "intrapenalty" below deals with situations where the intra and inter error scores are very low (eg a plain black frame) +- // We do not have special cases in first pass for 0,0 and nearest etc so all inter modes carry an overhead cost estimate fot the mv. +- // When the error score is very low this causes us to pick all or lots of INTRA modes and throw lots of key frames. +- // This penalty adds a cost matching that of a 0,0 mv to the intra case. ++ /* "intrapenalty" below deals with situations where the intra ++ * and inter error scores are very low (eg a plain black frame) ++ * We do not have special cases in first pass for 0,0 and ++ * nearest etc so all inter modes carry an overhead cost ++ * estimate fot the mv. When the error score is very low this ++ * causes us to pick all or lots of INTRA modes and throw lots ++ * of key frames. This penalty adds a cost matching that of a ++ * 0,0 mv to the intra case. ++ */ + this_error += intrapenalty; + +- // Cumulative intra error total ++ /* Cumulative intra error total */ + intra_error += (int64_t)this_error; + +- // Set up limit values for motion vectors to prevent them extending outside the UMV borders ++ /* Set up limit values for motion vectors to prevent them ++ * extending outside the UMV borders ++ */ + x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); + x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); + +- // Other than for the first frame do a motion search ++ /* Other than for the first frame do a motion search */ + if (cm->current_video_frame > 0) + { + BLOCKD *d = &x->e_mbd.block[0]; +@@ -607,7 +640,7 @@ void vp8_first_pass(VP8_COMP *cpi) + int motion_error = INT_MAX; + int raw_motion_error = INT_MAX; + +- // Simple 0,0 motion with no mv overhead ++ /* Simple 0,0 motion with no mv overhead */ + zz_motion_search( cpi, x, cpi->last_frame_unscaled_source, + &raw_motion_error, lst_yv12, &motion_error, + recon_yoffset ); +@@ -617,13 +650,16 @@ void vp8_first_pass(VP8_COMP *cpi) + if (raw_motion_error < cpi->oxcf.encode_breakout) + goto skip_motion_search; + +- // Test last reference frame using the previous best mv as the +- // starting point (best reference) for the search ++ /* Test last reference frame using the previous best mv as the ++ * starting point (best reference) for the search ++ */ + first_pass_motion_search(cpi, x, &best_ref_mv, + &d->bmi.mv.as_mv, lst_yv12, + &motion_error, recon_yoffset); + +- // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well ++ /* If the current best reference mv is not centred on 0,0 ++ * then do a 0,0 based search as well ++ */ + if (best_ref_mv.as_int) + { + tmp_err = INT_MAX; +@@ -638,7 +674,9 @@ void vp8_first_pass(VP8_COMP *cpi) + } + } + +- // Experimental search in a second reference frame ((0,0) based only) ++ /* Experimental search in a second reference frame ((0,0) ++ * based only) ++ */ + if (cm->current_video_frame > 1) + { + first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset); +@@ -646,19 +684,9 @@ void vp8_first_pass(VP8_COMP *cpi) + if ((gf_motion_error < motion_error) && (gf_motion_error < this_error)) + { + second_ref_count++; +- //motion_error = gf_motion_error; +- //d->bmi.mv.as_mv.row = tmp_mv.row; +- //d->bmi.mv.as_mv.col = tmp_mv.col; + } +- /*else +- { +- xd->pre.y_buffer = cm->last_frame.y_buffer + recon_yoffset; +- xd->pre.u_buffer = cm->last_frame.u_buffer + recon_uvoffset; +- xd->pre.v_buffer = cm->last_frame.v_buffer + recon_uvoffset; +- }*/ +- + +- // Reset to last frame as reference buffer ++ /* Reset to last frame as reference buffer */ + xd->pre.y_buffer = lst_yv12->y_buffer + recon_yoffset; + xd->pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset; + xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; +@@ -670,10 +698,11 @@ skip_motion_search: + + if (motion_error <= this_error) + { +- // Keep a count of cases where the inter and intra were +- // very close and very low. This helps with scene cut +- // detection for example in cropped clips with black bars +- // at the sides or top and bottom. ++ /* Keep a count of cases where the inter and intra were ++ * very close and very low. This helps with scene cut ++ * detection for example in cropped clips with black bars ++ * at the sides or top and bottom. ++ */ + if( (((this_error-intrapenalty) * 9) <= + (motion_error*10)) && + (this_error < (2*intrapenalty)) ) +@@ -696,17 +725,17 @@ skip_motion_search: + + best_ref_mv.as_int = d->bmi.mv.as_int; + +- // Was the vector non-zero ++ /* Was the vector non-zero */ + if (d->bmi.mv.as_int) + { + mvcount++; + +- // Was it different from the last non zero vector ++ /* Was it different from the last non zero vector */ + if ( d->bmi.mv.as_int != lastmv_as_int ) + new_mv_count++; + lastmv_as_int = d->bmi.mv.as_int; + +- // Does the Row vector point inwards or outwards ++ /* Does the Row vector point inwards or outwards */ + if (mb_row < cm->mb_rows / 2) + { + if (d->bmi.mv.as_mv.row > 0) +@@ -722,7 +751,7 @@ skip_motion_search: + sum_in_vectors--; + } + +- // Does the Row vector point inwards or outwards ++ /* Does the Row vector point inwards or outwards */ + if (mb_col < cm->mb_cols / 2) + { + if (d->bmi.mv.as_mv.col > 0) +@@ -743,7 +772,7 @@ skip_motion_search: + + coded_error += (int64_t)this_error; + +- // adjust to the next column of macroblocks ++ /* adjust to the next column of macroblocks */ + x->src.y_buffer += 16; + x->src.u_buffer += 8; + x->src.v_buffer += 8; +@@ -752,25 +781,25 @@ skip_motion_search: + recon_uvoffset += 8; + } + +- // adjust to the next row of mbs ++ /* adjust to the next row of mbs */ + x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; + x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; + x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; + +- //extend the recon for intra prediction ++ /* extend the recon for intra prediction */ + vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); +- vp8_clear_system_state(); //__asm emms; ++ vp8_clear_system_state(); + } + +- vp8_clear_system_state(); //__asm emms; ++ vp8_clear_system_state(); + { + double weight = 0.0; + + FIRSTPASS_STATS fps; + + fps.frame = cm->current_video_frame ; +- fps.intra_error = intra_error >> 8; +- fps.coded_error = coded_error >> 8; ++ fps.intra_error = (double)(intra_error >> 8); ++ fps.coded_error = (double)(coded_error >> 8); + weight = simple_weight(cpi->Source); + + +@@ -809,12 +838,13 @@ skip_motion_search: + fps.pcnt_motion = 1.0 * (double)mvcount / cpi->common.MBs; + } + +- // TODO: handle the case when duration is set to 0, or something less +- // than the full time between subsequent cpi->source_time_stamp s . +- fps.duration = cpi->source->ts_end +- - cpi->source->ts_start; ++ /* TODO: handle the case when duration is set to 0, or something less ++ * than the full time between subsequent cpi->source_time_stamps ++ */ ++ fps.duration = (double)(cpi->source->ts_end ++ - cpi->source->ts_start); + +- // don't want to do output stats with a stack variable! ++ /* don't want to do output stats with a stack variable! */ + memcpy(&cpi->twopass.this_frame_stats, + &fps, + sizeof(FIRSTPASS_STATS)); +@@ -822,7 +852,9 @@ skip_motion_search: + accumulate_stats(&cpi->twopass.total_stats, &fps); + } + +- // Copy the previous Last Frame into the GF buffer if specific conditions for doing so are met ++ /* Copy the previous Last Frame into the GF buffer if specific ++ * conditions for doing so are met ++ */ + if ((cm->current_video_frame > 0) && + (cpi->twopass.this_frame_stats.pcnt_inter > 0.20) && + ((cpi->twopass.this_frame_stats.intra_error / cpi->twopass.this_frame_stats.coded_error) > 2.0)) +@@ -830,18 +862,22 @@ skip_motion_search: + vp8_yv12_copy_frame(lst_yv12, gld_yv12); + } + +- // swap frame pointers so last frame refers to the frame we just compressed ++ /* swap frame pointers so last frame refers to the frame we just ++ * compressed ++ */ + vp8_swap_yv12_buffer(lst_yv12, new_yv12); + vp8_yv12_extend_frame_borders(lst_yv12); + +- // Special case for the first frame. Copy into the GF buffer as a second reference. ++ /* Special case for the first frame. Copy into the GF buffer as a ++ * second reference. ++ */ + if (cm->current_video_frame == 0) + { + vp8_yv12_copy_frame(lst_yv12, gld_yv12); + } + + +- // use this to see what the first pass reconstruction looks like ++ /* use this to see what the first pass reconstruction looks like */ + if (0) + { + char filename[512]; +@@ -853,7 +889,8 @@ skip_motion_search: + else + recon_file = fopen(filename, "ab"); + +- if(fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file)); ++ (void) fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, ++ recon_file); + fclose(recon_file); + } + +@@ -862,11 +899,10 @@ skip_motion_search: + } + extern const int vp8_bits_per_mb[2][QINDEX_RANGE]; + +-// Estimate a cost per mb attributable to overheads such as the coding of +-// modes and motion vectors. +-// Currently simplistic in its assumptions for testing. +-// +- ++/* Estimate a cost per mb attributable to overheads such as the coding of ++ * modes and motion vectors. ++ * Currently simplistic in its assumptions for testing. ++ */ + + static double bitcost( double prob ) + { +@@ -890,12 +926,14 @@ static int64_t estimate_modemvcost(VP8_COMP *cpi, + motion_cost = bitcost(av_pct_motion); + intra_cost = bitcost(av_intra); + +- // Estimate of extra bits per mv overhead for mbs +- // << 9 is the normalization to the (bits * 512) used in vp8_bits_per_mb ++ /* Estimate of extra bits per mv overhead for mbs ++ * << 9 is the normalization to the (bits * 512) used in vp8_bits_per_mb ++ */ + mv_cost = ((int)(fpstats->new_mv_count / fpstats->count) * 8) << 9; + +- // Crude estimate of overhead cost from modes +- // << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb ++ /* Crude estimate of overhead cost from modes ++ * << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb ++ */ + mode_cost = + (int)( ( ((av_pct_inter - av_pct_motion) * zz_cost) + + (av_pct_motion * motion_cost) + +@@ -914,17 +952,17 @@ static double calc_correction_factor( double err_per_mb, + double error_term = err_per_mb / err_devisor; + double correction_factor; + +- // Adjustment based on Q to power term. ++ /* Adjustment based on Q to power term. */ + power_term = pt_low + (Q * 0.01); + power_term = (power_term > pt_high) ? pt_high : power_term; + +- // Adjustments to error term +- // TBD ++ /* Adjustments to error term */ ++ /* TBD */ + +- // Calculate correction factor ++ /* Calculate correction factor */ + correction_factor = pow(error_term, power_term); + +- // Clip range ++ /* Clip range */ + correction_factor = + (correction_factor < 0.05) + ? 0.05 : (correction_factor > 5.0) ? 5.0 : correction_factor; +@@ -948,15 +986,16 @@ static int estimate_max_q(VP8_COMP *cpi, + int overhead_bits_per_mb; + + if (section_target_bandwitdh <= 0) +- return cpi->twopass.maxq_max_limit; // Highest value allowed ++ return cpi->twopass.maxq_max_limit; /* Highest value allowed */ + + target_norm_bits_per_mb = + (section_target_bandwitdh < (1 << 20)) + ? (512 * section_target_bandwitdh) / num_mbs + : 512 * (section_target_bandwitdh / num_mbs); + +- // Calculate a corrective factor based on a rolling ratio of bits spent +- // vs target bits ++ /* Calculate a corrective factor based on a rolling ratio of bits spent ++ * vs target bits ++ */ + if ((cpi->rolling_target_bits > 0) && + (cpi->active_worst_quality < cpi->worst_quality)) + { +@@ -977,8 +1016,9 @@ static int estimate_max_q(VP8_COMP *cpi, + ? 10.0 : cpi->twopass.est_max_qcorrection_factor; + } + +- // Corrections for higher compression speed settings +- // (reduced compression expected) ++ /* Corrections for higher compression speed settings ++ * (reduced compression expected) ++ */ + if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) + { + if (cpi->oxcf.cpu_used <= 5) +@@ -987,18 +1027,20 @@ static int estimate_max_q(VP8_COMP *cpi, + speed_correction = 1.25; + } + +- // Estimate of overhead bits per mb +- // Correction to overhead bits for min allowed Q. ++ /* Estimate of overhead bits per mb */ ++ /* Correction to overhead bits for min allowed Q. */ + overhead_bits_per_mb = overhead_bits / num_mbs; +- overhead_bits_per_mb *= pow( 0.98, (double)cpi->twopass.maxq_min_limit ); ++ overhead_bits_per_mb = (int)(overhead_bits_per_mb * ++ pow( 0.98, (double)cpi->twopass.maxq_min_limit )); + +- // Try and pick a max Q that will be high enough to encode the +- // content at the given rate. ++ /* Try and pick a max Q that will be high enough to encode the ++ * content at the given rate. ++ */ + for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; Q++) + { + int bits_per_mb_at_this_q; + +- // Error per MB based correction factor ++ /* Error per MB based correction factor */ + err_correction_factor = + calc_correction_factor(err_per_mb, 150.0, 0.40, 0.90, Q); + +@@ -1010,27 +1052,29 @@ static int estimate_max_q(VP8_COMP *cpi, + * cpi->twopass.section_max_qfactor + * (double)bits_per_mb_at_this_q); + +- // Mode and motion overhead +- // As Q rises in real encode loop rd code will force overhead down +- // We make a crude adjustment for this here as *.98 per Q step. ++ /* Mode and motion overhead */ ++ /* As Q rises in real encode loop rd code will force overhead down ++ * We make a crude adjustment for this here as *.98 per Q step. ++ */ + overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98); + + if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) + break; + } + +- // Restriction on active max q for constrained quality mode. ++ /* Restriction on active max q for constrained quality mode. */ + if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && + (Q < cpi->cq_target_quality) ) + { + Q = cpi->cq_target_quality; + } + +- // Adjust maxq_min_limit and maxq_max_limit limits based on +- // averaga q observed in clip for non kf/gf.arf frames +- // Give average a chance to settle though. ++ /* Adjust maxq_min_limit and maxq_max_limit limits based on ++ * average q observed in clip for non kf/gf.arf frames ++ * Give average a chance to settle though. ++ */ + if ( (cpi->ni_frames > +- ((unsigned int)cpi->twopass.total_stats.count >> 8)) && ++ ((int)cpi->twopass.total_stats.count >> 8)) && + (cpi->ni_frames > 150) ) + { + cpi->twopass.maxq_max_limit = ((cpi->ni_av_qi + 32) < cpi->worst_quality) +@@ -1042,8 +1086,9 @@ static int estimate_max_q(VP8_COMP *cpi, + return Q; + } + +-// For cq mode estimate a cq level that matches the observed +-// complexity and data rate. ++/* For cq mode estimate a cq level that matches the observed ++ * complexity and data rate. ++ */ + static int estimate_cq( VP8_COMP *cpi, + FIRSTPASS_STATS * fpstats, + int section_target_bandwitdh, +@@ -1072,11 +1117,12 @@ static int estimate_cq( VP8_COMP *cpi, + ? (512 * section_target_bandwitdh) / num_mbs + : 512 * (section_target_bandwitdh / num_mbs); + +- // Estimate of overhead bits per mb ++ /* Estimate of overhead bits per mb */ + overhead_bits_per_mb = overhead_bits / num_mbs; + +- // Corrections for higher compression speed settings +- // (reduced compression expected) ++ /* Corrections for higher compression speed settings ++ * (reduced compression expected) ++ */ + if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) + { + if (cpi->oxcf.cpu_used <= 5) +@@ -1085,19 +1131,19 @@ static int estimate_cq( VP8_COMP *cpi, + speed_correction = 1.25; + } + +- // II ratio correction factor for clip as a whole ++ /* II ratio correction factor for clip as a whole */ + clip_iiratio = cpi->twopass.total_stats.intra_error / + DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.coded_error); + clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025); + if (clip_iifactor < 0.80) + clip_iifactor = 0.80; + +- // Try and pick a Q that can encode the content at the given rate. ++ /* Try and pick a Q that can encode the content at the given rate. */ + for (Q = 0; Q < MAXQ; Q++) + { + int bits_per_mb_at_this_q; + +- // Error per MB based correction factor ++ /* Error per MB based correction factor */ + err_correction_factor = + calc_correction_factor(err_per_mb, 100.0, 0.40, 0.90, Q); + +@@ -1110,16 +1156,17 @@ static int estimate_cq( VP8_COMP *cpi, + clip_iifactor * + (double)bits_per_mb_at_this_q); + +- // Mode and motion overhead +- // As Q rises in real encode loop rd code will force overhead down +- // We make a crude adjustment for this here as *.98 per Q step. ++ /* Mode and motion overhead */ ++ /* As Q rises in real encode loop rd code will force overhead down ++ * We make a crude adjustment for this here as *.98 per Q step. ++ */ + overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98); + + if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) + break; + } + +- // Clip value to range "best allowed to (worst allowed - 1)" ++ /* Clip value to range "best allowed to (worst allowed - 1)" */ + Q = cq_level[Q]; + if ( Q >= cpi->worst_quality ) + Q = cpi->worst_quality - 1; +@@ -1141,7 +1188,9 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band + + target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs); + +- // Corrections for higher compression speed settings (reduced compression expected) ++ /* Corrections for higher compression speed settings ++ * (reduced compression expected) ++ */ + if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) + { + if (cpi->oxcf.cpu_used <= 5) +@@ -1150,12 +1199,12 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band + speed_correction = 1.25; + } + +- // Try and pick a Q that can encode the content at the given rate. ++ /* Try and pick a Q that can encode the content at the given rate. */ + for (Q = 0; Q < MAXQ; Q++) + { + int bits_per_mb_at_this_q; + +- // Error per MB based correction factor ++ /* Error per MB based correction factor */ + err_correction_factor = + calc_correction_factor(err_per_mb, 150.0, 0.40, 0.90, Q); + +@@ -1172,7 +1221,7 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band + return Q; + } + +-// Estimate a worst case Q for a KF group ++/* Estimate a worst case Q for a KF group */ + static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, double group_iiratio) + { + int Q; +@@ -1192,12 +1241,14 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta + + double combined_correction_factor; + +- // Trap special case where the target is <= 0 ++ /* Trap special case where the target is <= 0 */ + if (target_norm_bits_per_mb <= 0) + return MAXQ * 2; + +- // Calculate a corrective factor based on a rolling ratio of bits spent vs target bits +- // This is clamped to the range 0.1 to 10.0 ++ /* Calculate a corrective factor based on a rolling ratio of bits spent ++ * vs target bits ++ * This is clamped to the range 0.1 to 10.0 ++ */ + if (cpi->long_rolling_target_bits <= 0) + current_spend_ratio = 10.0; + else +@@ -1206,14 +1257,19 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta + current_spend_ratio = (current_spend_ratio > 10.0) ? 10.0 : (current_spend_ratio < 0.1) ? 0.1 : current_spend_ratio; + } + +- // Calculate a correction factor based on the quality of prediction in the sequence as indicated by intra_inter error score ratio (IIRatio) +- // The idea here is to favour subsampling in the hardest sections vs the easyest. ++ /* Calculate a correction factor based on the quality of prediction in ++ * the sequence as indicated by intra_inter error score ratio (IIRatio) ++ * The idea here is to favour subsampling in the hardest sections vs ++ * the easyest. ++ */ + iiratio_correction_factor = 1.0 - ((group_iiratio - 6.0) * 0.1); + + if (iiratio_correction_factor < 0.5) + iiratio_correction_factor = 0.5; + +- // Corrections for higher compression speed settings (reduced compression expected) ++ /* Corrections for higher compression speed settings ++ * (reduced compression expected) ++ */ + if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) + { + if (cpi->oxcf.cpu_used <= 5) +@@ -1222,13 +1278,15 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta + speed_correction = 1.25; + } + +- // Combine the various factors calculated above ++ /* Combine the various factors calculated above */ + combined_correction_factor = speed_correction * iiratio_correction_factor * current_spend_ratio; + +- // Try and pick a Q that should be high enough to encode the content at the given rate. ++ /* Try and pick a Q that should be high enough to encode the content at ++ * the given rate. ++ */ + for (Q = 0; Q < MAXQ; Q++) + { +- // Error per MB based correction factor ++ /* Error per MB based correction factor */ + err_correction_factor = + calc_correction_factor(err_per_mb, 150.0, pow_lowq, pow_highq, Q); + +@@ -1241,7 +1299,9 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta + break; + } + +- // If we could not hit the target even at Max Q then estimate what Q would have bee required ++ /* If we could not hit the target even at Max Q then estimate what Q ++ * would have been required ++ */ + while ((bits_per_mb_at_this_q > target_norm_bits_per_mb) && (Q < (MAXQ * 2))) + { + +@@ -1280,30 +1340,34 @@ void vp8_init_second_pass(VP8_COMP *cpi) + cpi->twopass.total_stats = *cpi->twopass.stats_in_end; + cpi->twopass.total_left_stats = cpi->twopass.total_stats; + +- // each frame can have a different duration, as the frame rate in the source +- // isn't guaranteed to be constant. The frame rate prior to the first frame +- // encoded in the second pass is a guess. However the sum duration is not. +- // Its calculated based on the actual durations of all frames from the first +- // pass. ++ /* each frame can have a different duration, as the frame rate in the ++ * source isn't guaranteed to be constant. The frame rate prior to ++ * the first frame encoded in the second pass is a guess. However the ++ * sum duration is not. Its calculated based on the actual durations of ++ * all frames from the first pass. ++ */ + vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration); + + cpi->output_frame_rate = cpi->frame_rate; + cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ; + cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0); + +- // Calculate a minimum intra value to be used in determining the IIratio +- // scores used in the second pass. We have this minimum to make sure +- // that clips that are static but "low complexity" in the intra domain +- // are still boosted appropriately for KF/GF/ARF ++ /* Calculate a minimum intra value to be used in determining the IIratio ++ * scores used in the second pass. We have this minimum to make sure ++ * that clips that are static but "low complexity" in the intra domain ++ * are still boosted appropriately for KF/GF/ARF ++ */ + cpi->twopass.kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; + cpi->twopass.gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; + +- // Scan the first pass file and calculate an average Intra / Inter error score ratio for the sequence ++ /* Scan the first pass file and calculate an average Intra / Inter error ++ * score ratio for the sequence ++ */ + { + double sum_iiratio = 0.0; + double IIRatio; + +- start_pos = cpi->twopass.stats_in; // Note starting "file" position ++ start_pos = cpi->twopass.stats_in; /* Note starting "file" position */ + + while (input_stats(cpi, &this_frame) != EOF) + { +@@ -1314,14 +1378,15 @@ void vp8_init_second_pass(VP8_COMP *cpi) + + cpi->twopass.avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats.count); + +- // Reset file position ++ /* Reset file position */ + reset_fpf_position(cpi, start_pos); + } + +- // Scan the first pass file and calculate a modified total error based upon the bias/power function +- // used to allocate bits ++ /* Scan the first pass file and calculate a modified total error based ++ * upon the bias/power function used to allocate bits ++ */ + { +- start_pos = cpi->twopass.stats_in; // Note starting "file" position ++ start_pos = cpi->twopass.stats_in; /* Note starting "file" position */ + + cpi->twopass.modified_error_total = 0.0; + cpi->twopass.modified_error_used = 0.0; +@@ -1332,7 +1397,7 @@ void vp8_init_second_pass(VP8_COMP *cpi) + } + cpi->twopass.modified_error_left = cpi->twopass.modified_error_total; + +- reset_fpf_position(cpi, start_pos); // Reset file position ++ reset_fpf_position(cpi, start_pos); /* Reset file position */ + + } + } +@@ -1341,23 +1406,24 @@ void vp8_end_second_pass(VP8_COMP *cpi) + { + } + +-// This function gives and estimate of how badly we believe +-// the prediction quality is decaying from frame to frame. ++/* This function gives and estimate of how badly we believe the prediction ++ * quality is decaying from frame to frame. ++ */ + static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame) + { + double prediction_decay_rate; + double motion_decay; + double motion_pct = next_frame->pcnt_motion; + +- // Initial basis is the % mbs inter coded ++ /* Initial basis is the % mbs inter coded */ + prediction_decay_rate = next_frame->pcnt_inter; + +- // High % motion -> somewhat higher decay rate ++ /* High % motion -> somewhat higher decay rate */ + motion_decay = (1.0 - (motion_pct / 20.0)); + if (motion_decay < prediction_decay_rate) + prediction_decay_rate = motion_decay; + +- // Adjustment to decay rate based on speed of motion ++ /* Adjustment to decay rate based on speed of motion */ + { + double this_mv_rabs; + double this_mv_cabs; +@@ -1377,9 +1443,10 @@ static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_fra + return prediction_decay_rate; + } + +-// Function to test for a condition where a complex transition is followed +-// by a static section. For example in slide shows where there is a fade +-// between slides. This is to help with more optimal kf and gf positioning. ++/* Function to test for a condition where a complex transition is followed ++ * by a static section. For example in slide shows where there is a fade ++ * between slides. This is to help with more optimal kf and gf positioning. ++ */ + static int detect_transition_to_still( + VP8_COMP *cpi, + int frame_interval, +@@ -1389,9 +1456,10 @@ static int detect_transition_to_still( + { + int trans_to_still = 0; + +- // Break clause to detect very still sections after motion +- // For example a static image after a fade or other transition +- // instead of a clean scene cut. ++ /* Break clause to detect very still sections after motion ++ * For example a static image after a fade or other transition ++ * instead of a clean scene cut. ++ */ + if ( (frame_interval > MIN_GF_INTERVAL) && + (loop_decay_rate >= 0.999) && + (decay_accumulator < 0.9) ) +@@ -1401,8 +1469,7 @@ static int detect_transition_to_still( + FIRSTPASS_STATS tmp_next_frame; + double decay_rate; + +- // Look ahead a few frames to see if static condition +- // persists... ++ /* Look ahead a few frames to see if static condition persists... */ + for ( j = 0; j < still_interval; j++ ) + { + if (EOF == input_stats(cpi, &tmp_next_frame)) +@@ -1412,10 +1479,10 @@ static int detect_transition_to_still( + if ( decay_rate < 0.999 ) + break; + } +- // Reset file position ++ /* Reset file position */ + reset_fpf_position(cpi, position); + +- // Only if it does do we signal a transition to still ++ /* Only if it does do we signal a transition to still */ + if ( j == still_interval ) + trans_to_still = 1; + } +@@ -1423,24 +1490,26 @@ static int detect_transition_to_still( + return trans_to_still; + } + +-// This function detects a flash through the high relative pcnt_second_ref +-// score in the frame following a flash frame. The offset passed in should +-// reflect this ++/* This function detects a flash through the high relative pcnt_second_ref ++ * score in the frame following a flash frame. The offset passed in should ++ * reflect this ++ */ + static int detect_flash( VP8_COMP *cpi, int offset ) + { + FIRSTPASS_STATS next_frame; + + int flash_detected = 0; + +- // Read the frame data. +- // The return is 0 (no flash detected) if not a valid frame ++ /* Read the frame data. */ ++ /* The return is 0 (no flash detected) if not a valid frame */ + if ( read_frame_stats(cpi, &next_frame, offset) != EOF ) + { +- // What we are looking for here is a situation where there is a +- // brief break in prediction (such as a flash) but subsequent frames +- // are reasonably well predicted by an earlier (pre flash) frame. +- // The recovery after a flash is indicated by a high pcnt_second_ref +- // comapred to pcnt_inter. ++ /* What we are looking for here is a situation where there is a ++ * brief break in prediction (such as a flash) but subsequent frames ++ * are reasonably well predicted by an earlier (pre flash) frame. ++ * The recovery after a flash is indicated by a high pcnt_second_ref ++ * comapred to pcnt_inter. ++ */ + if ( (next_frame.pcnt_second_ref > next_frame.pcnt_inter) && + (next_frame.pcnt_second_ref >= 0.5 ) ) + { +@@ -1461,7 +1530,7 @@ static int detect_flash( VP8_COMP *cpi, int offset ) + return flash_detected; + } + +-// Update the motion related elements to the GF arf boost calculation ++/* Update the motion related elements to the GF arf boost calculation */ + static void accumulate_frame_motion_stats( + VP8_COMP *cpi, + FIRSTPASS_STATS * this_frame, +@@ -1470,22 +1539,22 @@ static void accumulate_frame_motion_stats( + double * abs_mv_in_out_accumulator, + double * mv_ratio_accumulator ) + { +- //double this_frame_mv_in_out; + double this_frame_mvr_ratio; + double this_frame_mvc_ratio; + double motion_pct; + +- // Accumulate motion stats. ++ /* Accumulate motion stats. */ + motion_pct = this_frame->pcnt_motion; + +- // Accumulate Motion In/Out of frame stats ++ /* Accumulate Motion In/Out of frame stats */ + *this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct; + *mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct; + *abs_mv_in_out_accumulator += + fabs(this_frame->mv_in_out_count * motion_pct); + +- // Accumulate a measure of how uniform (or conversely how random) +- // the motion field is. (A ratio of absmv / mv) ++ /* Accumulate a measure of how uniform (or conversely how random) ++ * the motion field is. (A ratio of absmv / mv) ++ */ + if (motion_pct > 0.05) + { + this_frame_mvr_ratio = fabs(this_frame->mvr_abs) / +@@ -1507,7 +1576,7 @@ static void accumulate_frame_motion_stats( + } + } + +-// Calculate a baseline boost number for the current frame. ++/* Calculate a baseline boost number for the current frame. */ + static double calc_frame_boost( + VP8_COMP *cpi, + FIRSTPASS_STATS * this_frame, +@@ -1515,7 +1584,7 @@ static double calc_frame_boost( + { + double frame_boost; + +- // Underlying boost factor is based on inter intra error ratio ++ /* Underlying boost factor is based on inter intra error ratio */ + if (this_frame->intra_error > cpi->twopass.gf_intra_err_min) + frame_boost = (IIFACTOR * this_frame->intra_error / + DOUBLE_DIVIDE_CHECK(this_frame->coded_error)); +@@ -1523,17 +1592,18 @@ static double calc_frame_boost( + frame_boost = (IIFACTOR * cpi->twopass.gf_intra_err_min / + DOUBLE_DIVIDE_CHECK(this_frame->coded_error)); + +- // Increase boost for frames where new data coming into frame +- // (eg zoom out). Slightly reduce boost if there is a net balance +- // of motion out of the frame (zoom in). +- // The range for this_frame_mv_in_out is -1.0 to +1.0 ++ /* Increase boost for frames where new data coming into frame ++ * (eg zoom out). Slightly reduce boost if there is a net balance ++ * of motion out of the frame (zoom in). ++ * The range for this_frame_mv_in_out is -1.0 to +1.0 ++ */ + if (this_frame_mv_in_out > 0.0) + frame_boost += frame_boost * (this_frame_mv_in_out * 2.0); +- // In extreme case boost is halved ++ /* In extreme case boost is halved */ + else + frame_boost += frame_boost * (this_frame_mv_in_out / 2.0); + +- // Clip to maximum ++ /* Clip to maximum */ + if (frame_boost > GF_RMAX) + frame_boost = GF_RMAX; + +@@ -1561,26 +1631,27 @@ static int calc_arf_boost( + double r; + int flash_detected = 0; + +- // Search forward from the proposed arf/next gf position ++ /* Search forward from the proposed arf/next gf position */ + for ( i = 0; i < f_frames; i++ ) + { + if ( read_frame_stats(cpi, &this_frame, (i+offset)) == EOF ) + break; + +- // Update the motion related elements to the boost calculation ++ /* Update the motion related elements to the boost calculation */ + accumulate_frame_motion_stats( cpi, &this_frame, + &this_frame_mv_in_out, &mv_in_out_accumulator, + &abs_mv_in_out_accumulator, &mv_ratio_accumulator ); + +- // Calculate the baseline boost number for this frame ++ /* Calculate the baseline boost number for this frame */ + r = calc_frame_boost( cpi, &this_frame, this_frame_mv_in_out ); + +- // We want to discount the the flash frame itself and the recovery +- // frame that follows as both will have poor scores. ++ /* We want to discount the the flash frame itself and the recovery ++ * frame that follows as both will have poor scores. ++ */ + flash_detected = detect_flash(cpi, (i+offset)) || + detect_flash(cpi, (i+offset+1)); + +- // Cumulative effect of prediction quality decay ++ /* Cumulative effect of prediction quality decay */ + if ( !flash_detected ) + { + decay_accumulator = +@@ -1591,7 +1662,7 @@ static int calc_arf_boost( + } + boost_score += (decay_accumulator * r); + +- // Break out conditions. ++ /* Break out conditions. */ + if ( (!flash_detected) && + ((mv_ratio_accumulator > 100.0) || + (abs_mv_in_out_accumulator > 3.0) || +@@ -1603,7 +1674,7 @@ static int calc_arf_boost( + + *f_boost = (int)(boost_score * 100.0) >> 4; + +- // Reset for backward looking loop ++ /* Reset for backward looking loop */ + boost_score = 0.0; + mv_ratio_accumulator = 0.0; + decay_accumulator = 1.0; +@@ -1611,26 +1682,27 @@ static int calc_arf_boost( + mv_in_out_accumulator = 0.0; + abs_mv_in_out_accumulator = 0.0; + +- // Search forward from the proposed arf/next gf position ++ /* Search forward from the proposed arf/next gf position */ + for ( i = -1; i >= -b_frames; i-- ) + { + if ( read_frame_stats(cpi, &this_frame, (i+offset)) == EOF ) + break; + +- // Update the motion related elements to the boost calculation ++ /* Update the motion related elements to the boost calculation */ + accumulate_frame_motion_stats( cpi, &this_frame, + &this_frame_mv_in_out, &mv_in_out_accumulator, + &abs_mv_in_out_accumulator, &mv_ratio_accumulator ); + +- // Calculate the baseline boost number for this frame ++ /* Calculate the baseline boost number for this frame */ + r = calc_frame_boost( cpi, &this_frame, this_frame_mv_in_out ); + +- // We want to discount the the flash frame itself and the recovery +- // frame that follows as both will have poor scores. ++ /* We want to discount the the flash frame itself and the recovery ++ * frame that follows as both will have poor scores. ++ */ + flash_detected = detect_flash(cpi, (i+offset)) || + detect_flash(cpi, (i+offset+1)); + +- // Cumulative effect of prediction quality decay ++ /* Cumulative effect of prediction quality decay */ + if ( !flash_detected ) + { + decay_accumulator = +@@ -1642,7 +1714,7 @@ static int calc_arf_boost( + + boost_score += (decay_accumulator * r); + +- // Break out conditions. ++ /* Break out conditions. */ + if ( (!flash_detected) && + ((mv_ratio_accumulator > 100.0) || + (abs_mv_in_out_accumulator > 3.0) || +@@ -1657,7 +1729,7 @@ static int calc_arf_boost( + } + #endif + +-// Analyse and define a gf/arf group . ++/* Analyse and define a gf/arf group . */ + static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + { + FIRSTPASS_STATS next_frame; +@@ -1673,14 +1745,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + double mv_ratio_accumulator = 0.0; + double decay_accumulator = 1.0; + +- double loop_decay_rate = 1.00; // Starting decay rate ++ double loop_decay_rate = 1.00; /* Starting decay rate */ + + double this_frame_mv_in_out = 0.0; + double mv_in_out_accumulator = 0.0; + double abs_mv_in_out_accumulator = 0.0; + double mod_err_per_mb_accumulator = 0.0; + +- int max_bits = frame_max_bits(cpi); // Max for a single frame ++ int max_bits = frame_max_bits(cpi); /* Max for a single frame */ + + unsigned int allow_alt_ref = + cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames; +@@ -1693,37 +1765,40 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + cpi->twopass.gf_group_bits = 0; + cpi->twopass.gf_decay_rate = 0; + +- vp8_clear_system_state(); //__asm emms; ++ vp8_clear_system_state(); + + start_pos = cpi->twopass.stats_in; + +- vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean ++ vpx_memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */ + +- // Load stats for the current frame. ++ /* Load stats for the current frame. */ + mod_frame_err = calculate_modified_err(cpi, this_frame); + +- // Note the error of the frame at the start of the group (this will be +- // the GF frame error if we code a normal gf ++ /* Note the error of the frame at the start of the group (this will be ++ * the GF frame error if we code a normal gf ++ */ + gf_first_frame_err = mod_frame_err; + +- // Special treatment if the current frame is a key frame (which is also +- // a gf). If it is then its error score (and hence bit allocation) need +- // to be subtracted out from the calculation for the GF group ++ /* Special treatment if the current frame is a key frame (which is also ++ * a gf). If it is then its error score (and hence bit allocation) need ++ * to be subtracted out from the calculation for the GF group ++ */ + if (cpi->common.frame_type == KEY_FRAME) + gf_group_err -= gf_first_frame_err; + +- // Scan forward to try and work out how many frames the next gf group +- // should contain and what level of boost is appropriate for the GF +- // or ARF that will be coded with the group ++ /* Scan forward to try and work out how many frames the next gf group ++ * should contain and what level of boost is appropriate for the GF ++ * or ARF that will be coded with the group ++ */ + i = 0; + + while (((i < cpi->twopass.static_scene_max_gf_interval) || + ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL)) && + (i < cpi->twopass.frames_to_key)) + { +- i++; // Increment the loop counter ++ i++; + +- // Accumulate error score of frames in this gf group ++ /* Accumulate error score of frames in this gf group */ + mod_frame_err = calculate_modified_err(cpi, this_frame); + + gf_group_err += mod_frame_err; +@@ -1734,19 +1809,20 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + if (EOF == input_stats(cpi, &next_frame)) + break; + +- // Test for the case where there is a brief flash but the prediction +- // quality back to an earlier frame is then restored. ++ /* Test for the case where there is a brief flash but the prediction ++ * quality back to an earlier frame is then restored. ++ */ + flash_detected = detect_flash(cpi, 0); + +- // Update the motion related elements to the boost calculation ++ /* Update the motion related elements to the boost calculation */ + accumulate_frame_motion_stats( cpi, &next_frame, + &this_frame_mv_in_out, &mv_in_out_accumulator, + &abs_mv_in_out_accumulator, &mv_ratio_accumulator ); + +- // Calculate a baseline boost number for this frame ++ /* Calculate a baseline boost number for this frame */ + r = calc_frame_boost( cpi, &next_frame, this_frame_mv_in_out ); + +- // Cumulative effect of prediction quality decay ++ /* Cumulative effect of prediction quality decay */ + if ( !flash_detected ) + { + loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); +@@ -1756,8 +1832,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + } + boost_score += (decay_accumulator * r); + +- // Break clause to detect very still sections after motion +- // For example a staic image after a fade or other transition. ++ /* Break clause to detect very still sections after motion ++ * For example a staic image after a fade or other transition. ++ */ + if ( detect_transition_to_still( cpi, i, 5, + loop_decay_rate, + decay_accumulator ) ) +@@ -1767,14 +1844,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + break; + } + +- // Break out conditions. ++ /* Break out conditions. */ + if ( +- // Break at cpi->max_gf_interval unless almost totally static ++ /* Break at cpi->max_gf_interval unless almost totally static */ + (i >= cpi->max_gf_interval && (decay_accumulator < 0.995)) || + ( +- // Dont break out with a very short interval ++ /* Dont break out with a very short interval */ + (i > MIN_GF_INTERVAL) && +- // Dont break out very close to a key frame ++ /* Dont break out very close to a key frame */ + ((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) && + ((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) && + (!flash_detected) && +@@ -1796,15 +1873,15 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + cpi->twopass.gf_decay_rate = + (i > 0) ? (int)(100.0 * (1.0 - decay_accumulator)) / i : 0; + +- // When using CBR apply additional buffer related upper limits ++ /* When using CBR apply additional buffer related upper limits */ + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + { + double max_boost; + +- // For cbr apply buffer related limits ++ /* For cbr apply buffer related limits */ + if (cpi->drop_frames_allowed) + { +- int df_buffer_level = cpi->oxcf.drop_frames_water_mark * ++ int64_t df_buffer_level = cpi->oxcf.drop_frames_water_mark * + (cpi->oxcf.optimal_buffer_level / 100); + + if (cpi->buffer_level > df_buffer_level) +@@ -1825,7 +1902,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + boost_score = max_boost; + } + +- // Dont allow conventional gf too near the next kf ++ /* Dont allow conventional gf too near the next kf */ + if ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL) + { + while (i < cpi->twopass.frames_to_key) +@@ -1846,14 +1923,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + cpi->gfu_boost = (int)(boost_score * 100.0) >> 4; + + #if NEW_BOOST +- // Alterrnative boost calculation for alt ref ++ /* Alterrnative boost calculation for alt ref */ + alt_boost = calc_arf_boost( cpi, 0, (i-1), (i-1), &f_boost, &b_boost ); + #endif + +- // Should we use the alternate refernce frame ++ /* Should we use the alternate refernce frame */ + if (allow_alt_ref && + (i >= MIN_GF_INTERVAL) && +- // dont use ARF very near next kf ++ /* dont use ARF very near next kf */ + (i <= (cpi->twopass.frames_to_key - MIN_GF_INTERVAL)) && + #if NEW_BOOST + ((next_frame.pcnt_inter > 0.75) || +@@ -1883,7 +1960,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + cpi->gfu_boost = alt_boost; + #endif + +- // Estimate the bits to be allocated to the group as a whole ++ /* Estimate the bits to be allocated to the group as a whole */ + if ((cpi->twopass.kf_group_bits > 0) && + (cpi->twopass.kf_group_error_left > 0)) + { +@@ -1893,7 +1970,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + else + group_bits = 0; + +- // Boost for arf frame ++ /* Boost for arf frame */ + #if NEW_BOOST + Boost = (alt_boost * GFQ_ADJUSTMENT) / 100; + #else +@@ -1901,7 +1978,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + #endif + Boost += (i * 50); + +- // Set max and minimum boost and hence minimum allocation ++ /* Set max and minimum boost and hence minimum allocation */ + if (Boost > ((cpi->baseline_gf_interval + 1) * 200)) + Boost = ((cpi->baseline_gf_interval + 1) * 200); + else if (Boost < 125) +@@ -1909,24 +1986,27 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + + allocation_chunks = (i * 100) + Boost; + +- // Normalize Altboost and allocations chunck down to prevent overflow ++ /* Normalize Altboost and allocations chunck down to prevent overflow */ + while (Boost > 1000) + { + Boost /= 2; + allocation_chunks /= 2; + } + +- // Calculate the number of bits to be spent on the arf based on the +- // boost number ++ /* Calculate the number of bits to be spent on the arf based on the ++ * boost number ++ */ + arf_frame_bits = (int)((double)Boost * (group_bits / + (double)allocation_chunks)); + +- // Estimate if there are enough bits available to make worthwhile use +- // of an arf. ++ /* Estimate if there are enough bits available to make worthwhile use ++ * of an arf. ++ */ + tmp_q = estimate_q(cpi, mod_frame_err, (int)arf_frame_bits); + +- // Only use an arf if it is likely we will be able to code +- // it at a lower Q than the surrounding frames. ++ /* Only use an arf if it is likely we will be able to code ++ * it at a lower Q than the surrounding frames. ++ */ + if (tmp_q < cpi->worst_quality) + { + int half_gf_int; +@@ -1936,42 +2016,46 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + + cpi->source_alt_ref_pending = 1; + +- // For alt ref frames the error score for the end frame of the +- // group (the alt ref frame) should not contribute to the group +- // total and hence the number of bit allocated to the group. +- // Rather it forms part of the next group (it is the GF at the +- // start of the next group) +- // gf_group_err -= mod_frame_err; +- +- // For alt ref frames alt ref frame is technically part of the +- // GF frame for the next group but we always base the error +- // calculation and bit allocation on the current group of frames. +- +- // Set the interval till the next gf or arf. +- // For ARFs this is the number of frames to be coded before the +- // future frame that is coded as an ARF. +- // The future frame itself is part of the next group ++ /* ++ * For alt ref frames the error score for the end frame of the ++ * group (the alt ref frame) should not contribute to the group ++ * total and hence the number of bit allocated to the group. ++ * Rather it forms part of the next group (it is the GF at the ++ * start of the next group) ++ * gf_group_err -= mod_frame_err; ++ * ++ * For alt ref frames alt ref frame is technically part of the ++ * GF frame for the next group but we always base the error ++ * calculation and bit allocation on the current group of frames. ++ * ++ * Set the interval till the next gf or arf. ++ * For ARFs this is the number of frames to be coded before the ++ * future frame that is coded as an ARF. ++ * The future frame itself is part of the next group ++ */ + cpi->baseline_gf_interval = i; + +- // Define the arnr filter width for this group of frames: +- // We only filter frames that lie within a distance of half +- // the GF interval from the ARF frame. We also have to trap +- // cases where the filter extends beyond the end of clip. +- // Note: this_frame->frame has been updated in the loop +- // so it now points at the ARF frame. ++ /* ++ * Define the arnr filter width for this group of frames: ++ * We only filter frames that lie within a distance of half ++ * the GF interval from the ARF frame. We also have to trap ++ * cases where the filter extends beyond the end of clip. ++ * Note: this_frame->frame has been updated in the loop ++ * so it now points at the ARF frame. ++ */ + half_gf_int = cpi->baseline_gf_interval >> 1; +- frames_after_arf = cpi->twopass.total_stats.count - +- this_frame->frame - 1; ++ frames_after_arf = (int)(cpi->twopass.total_stats.count - ++ this_frame->frame - 1); + + switch (cpi->oxcf.arnr_type) + { +- case 1: // Backward filter ++ case 1: /* Backward filter */ + frames_fwd = 0; + if (frames_bwd > half_gf_int) + frames_bwd = half_gf_int; + break; + +- case 2: // Forward filter ++ case 2: /* Forward filter */ + if (frames_fwd > half_gf_int) + frames_fwd = half_gf_int; + if (frames_fwd > frames_after_arf) +@@ -1979,7 +2063,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + frames_bwd = 0; + break; + +- case 3: // Centered filter ++ case 3: /* Centered filter */ + default: + frames_fwd >>= 1; + if (frames_fwd > frames_after_arf) +@@ -1989,8 +2073,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + + frames_bwd = frames_fwd; + +- // For even length filter there is one more frame backward +- // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. ++ /* For even length filter there is one more frame backward ++ * than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. ++ */ + if (frames_bwd < half_gf_int) + frames_bwd += (cpi->oxcf.arnr_max_frames+1) & 0x1; + break; +@@ -2010,12 +2095,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + cpi->baseline_gf_interval = i; + } + +- // Now decide how many bits should be allocated to the GF group as a +- // proportion of those remaining in the kf group. +- // The final key frame group in the clip is treated as a special case +- // where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left. +- // This is also important for short clips where there may only be one +- // key frame. ++ /* ++ * Now decide how many bits should be allocated to the GF group as a ++ * proportion of those remaining in the kf group. ++ * The final key frame group in the clip is treated as a special case ++ * where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left. ++ * This is also important for short clips where there may only be one ++ * key frame. ++ */ + if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats.count - + cpi->common.current_video_frame)) + { +@@ -2023,7 +2110,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0; + } + +- // Calculate the bits to be allocated to the group as a whole ++ /* Calculate the bits to be allocated to the group as a whole */ + if ((cpi->twopass.kf_group_bits > 0) && + (cpi->twopass.kf_group_error_left > 0)) + { +@@ -2034,31 +2121,32 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + else + cpi->twopass.gf_group_bits = 0; + +- cpi->twopass.gf_group_bits = ++ cpi->twopass.gf_group_bits = (int)( + (cpi->twopass.gf_group_bits < 0) + ? 0 + : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits) +- ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits; ++ ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits); + +- // Clip cpi->twopass.gf_group_bits based on user supplied data rate +- // variability limit (cpi->oxcf.two_pass_vbrmax_section) ++ /* Clip cpi->twopass.gf_group_bits based on user supplied data rate ++ * variability limit (cpi->oxcf.two_pass_vbrmax_section) ++ */ + if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval) + cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval; + +- // Reset the file position ++ /* Reset the file position */ + reset_fpf_position(cpi, start_pos); + +- // Update the record of error used so far (only done once per gf group) ++ /* Update the record of error used so far (only done once per gf group) */ + cpi->twopass.modified_error_used += gf_group_err; + +- // Assign bits to the arf or gf. ++ /* Assign bits to the arf or gf. */ + for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); i++) { + int Boost; + int allocation_chunks; + int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; + int gf_bits; + +- // For ARF frames ++ /* For ARF frames */ + if (cpi->source_alt_ref_pending && i == 0) + { + #if NEW_BOOST +@@ -2068,7 +2156,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + #endif + Boost += (cpi->baseline_gf_interval * 50); + +- // Set max and minimum boost and hence minimum allocation ++ /* Set max and minimum boost and hence minimum allocation */ + if (Boost > ((cpi->baseline_gf_interval + 1) * 200)) + Boost = ((cpi->baseline_gf_interval + 1) * 200); + else if (Boost < 125) +@@ -2077,13 +2165,13 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + allocation_chunks = + ((cpi->baseline_gf_interval + 1) * 100) + Boost; + } +- // Else for standard golden frames ++ /* Else for standard golden frames */ + else + { +- // boost based on inter / intra ratio of subsequent frames ++ /* boost based on inter / intra ratio of subsequent frames */ + Boost = (cpi->gfu_boost * GFQ_ADJUSTMENT) / 100; + +- // Set max and minimum boost and hence minimum allocation ++ /* Set max and minimum boost and hence minimum allocation */ + if (Boost > (cpi->baseline_gf_interval * 150)) + Boost = (cpi->baseline_gf_interval * 150); + else if (Boost < 125) +@@ -2093,22 +2181,24 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + (cpi->baseline_gf_interval * 100) + (Boost - 100); + } + +- // Normalize Altboost and allocations chunck down to prevent overflow ++ /* Normalize Altboost and allocations chunck down to prevent overflow */ + while (Boost > 1000) + { + Boost /= 2; + allocation_chunks /= 2; + } + +- // Calculate the number of bits to be spent on the gf or arf based on +- // the boost number ++ /* Calculate the number of bits to be spent on the gf or arf based on ++ * the boost number ++ */ + gf_bits = (int)((double)Boost * + (cpi->twopass.gf_group_bits / + (double)allocation_chunks)); + +- // If the frame that is to be boosted is simpler than the average for +- // the gf/arf group then use an alternative calculation +- // based on the error score of the frame itself ++ /* If the frame that is to be boosted is simpler than the average for ++ * the gf/arf group then use an alternative calculation ++ * based on the error score of the frame itself ++ */ + if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval) + { + double alt_gf_grp_bits; +@@ -2127,9 +2217,10 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + gf_bits = alt_gf_bits; + } + } +- // Else if it is harder than other frames in the group make sure it at +- // least receives an allocation in keeping with its relative error +- // score, otherwise it may be worse off than an "un-boosted" frame ++ /* Else if it is harder than other frames in the group make sure it at ++ * least receives an allocation in keeping with its relative error ++ * score, otherwise it may be worse off than an "un-boosted" frame ++ */ + else + { + int alt_gf_bits = +@@ -2143,18 +2234,19 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + } + } + +- // Apply an additional limit for CBR ++ /* Apply an additional limit for CBR */ + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + { +- if (cpi->twopass.gf_bits > (cpi->buffer_level >> 1)) +- cpi->twopass.gf_bits = cpi->buffer_level >> 1; ++ if (cpi->twopass.gf_bits > (int)(cpi->buffer_level >> 1)) ++ cpi->twopass.gf_bits = (int)(cpi->buffer_level >> 1); + } + +- // Dont allow a negative value for gf_bits ++ /* Dont allow a negative value for gf_bits */ + if (gf_bits < 0) + gf_bits = 0; + +- gf_bits += cpi->min_frame_bandwidth; // Add in minimum for a frame ++ /* Add in minimum for a frame */ ++ gf_bits += cpi->min_frame_bandwidth; + + if (i == 0) + { +@@ -2162,33 +2254,39 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + } + if (i == 1 || (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME))) + { +- cpi->per_frame_bandwidth = gf_bits; // Per frame bit target for this frame ++ /* Per frame bit target for this frame */ ++ cpi->per_frame_bandwidth = gf_bits; + } + } + + { +- // Adjust KF group bits and error remainin +- cpi->twopass.kf_group_error_left -= gf_group_err; ++ /* Adjust KF group bits and error remainin */ ++ cpi->twopass.kf_group_error_left -= (int64_t)gf_group_err; + cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits; + + if (cpi->twopass.kf_group_bits < 0) + cpi->twopass.kf_group_bits = 0; + +- // Note the error score left in the remaining frames of the group. +- // For normal GFs we want to remove the error score for the first frame of the group (except in Key frame case where this has already happened) ++ /* Note the error score left in the remaining frames of the group. ++ * For normal GFs we want to remove the error score for the first ++ * frame of the group (except in Key frame case where this has ++ * already happened) ++ */ + if (!cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME) +- cpi->twopass.gf_group_error_left = gf_group_err - gf_first_frame_err; ++ cpi->twopass.gf_group_error_left = (int)(gf_group_err - ++ gf_first_frame_err); + else +- cpi->twopass.gf_group_error_left = gf_group_err; ++ cpi->twopass.gf_group_error_left = (int) gf_group_err; + + cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth; + + if (cpi->twopass.gf_group_bits < 0) + cpi->twopass.gf_group_bits = 0; + +- // This condition could fail if there are two kfs very close together +- // despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the +- // calculation of cpi->twopass.alt_extra_bits. ++ /* This condition could fail if there are two kfs very close together ++ * despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the ++ * calculation of cpi->twopass.alt_extra_bits. ++ */ + if ( cpi->baseline_gf_interval >= 3 ) + { + #if NEW_BOOST +@@ -2217,7 +2315,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + cpi->twopass.alt_extra_bits = 0; + } + +- // Adjustments based on a measure of complexity of the section ++ /* Adjustments based on a measure of complexity of the section */ + if (cpi->common.frame_type != KEY_FRAME) + { + FIRSTPASS_STATS sectionstats; +@@ -2234,47 +2332,45 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + + avg_stats(§ionstats); + +- cpi->twopass.section_intra_rating = +- sectionstats.intra_error / +- DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); ++ cpi->twopass.section_intra_rating = (unsigned int) ++ (sectionstats.intra_error / ++ DOUBLE_DIVIDE_CHECK(sectionstats.coded_error)); + + Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); +- //if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) ) +- //{ + cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025); + + if (cpi->twopass.section_max_qfactor < 0.80) + cpi->twopass.section_max_qfactor = 0.80; + +- //} +- //else +- // cpi->twopass.section_max_qfactor = 1.0; +- + reset_fpf_position(cpi, start_pos); + } + } + +-// Allocate bits to a normal frame that is neither a gf an arf or a key frame. ++/* Allocate bits to a normal frame that is neither a gf an arf or a key frame. */ + static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + { +- int target_frame_size; // gf_group_error_left ++ int target_frame_size; + + double modified_err; +- double err_fraction; // What portion of the remaining GF group error is used by this frame ++ double err_fraction; + +- int max_bits = frame_max_bits(cpi); // Max for a single frame ++ int max_bits = frame_max_bits(cpi); /* Max for a single frame */ + +- // Calculate modified prediction error used in bit allocation ++ /* Calculate modified prediction error used in bit allocation */ + modified_err = calculate_modified_err(cpi, this_frame); + ++ /* What portion of the remaining GF group error is used by this frame */ + if (cpi->twopass.gf_group_error_left > 0) +- err_fraction = modified_err / cpi->twopass.gf_group_error_left; // What portion of the remaining GF group error is used by this frame ++ err_fraction = modified_err / cpi->twopass.gf_group_error_left; + else + err_fraction = 0.0; + +- target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); // How many of those bits available for allocation should we give it? ++ /* How many of those bits available for allocation should we give it? */ ++ target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); + +- // Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at the top end. ++ /* Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits) ++ * at the top end. ++ */ + if (target_frame_size < 0) + target_frame_size = 0; + else +@@ -2286,22 +2382,25 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + target_frame_size = cpi->twopass.gf_group_bits; + } + +- cpi->twopass.gf_group_error_left -= modified_err; // Adjust error remaining +- cpi->twopass.gf_group_bits -= target_frame_size; // Adjust bits remaining ++ /* Adjust error and bits remaining */ ++ cpi->twopass.gf_group_error_left -= (int)modified_err; ++ cpi->twopass.gf_group_bits -= target_frame_size; + + if (cpi->twopass.gf_group_bits < 0) + cpi->twopass.gf_group_bits = 0; + +- target_frame_size += cpi->min_frame_bandwidth; // Add in the minimum number of bits that is set aside for every frame. ++ /* Add in the minimum number of bits that is set aside for every frame. */ ++ target_frame_size += cpi->min_frame_bandwidth; + +- // Every other frame gets a few extra bits ++ /* Every other frame gets a few extra bits */ + if ( (cpi->common.frames_since_golden & 0x01) && + (cpi->frames_till_gf_update_due > 0) ) + { + target_frame_size += cpi->twopass.alt_extra_bits; + } + +- cpi->per_frame_bandwidth = target_frame_size; // Per frame bit target for this frame ++ /* Per frame bit target for this frame */ ++ cpi->per_frame_bandwidth = target_frame_size; + } + + void vp8_second_pass(VP8_COMP *cpi) +@@ -2330,20 +2429,25 @@ void vp8_second_pass(VP8_COMP *cpi) + this_frame_intra_error = this_frame.intra_error; + this_frame_coded_error = this_frame.coded_error; + +- // keyframe and section processing ! ++ /* keyframe and section processing ! */ + if (cpi->twopass.frames_to_key == 0) + { +- // Define next KF group and assign bits to it ++ /* Define next KF group and assign bits to it */ + vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); + find_next_key_frame(cpi, &this_frame_copy); + +- // Special case: Error error_resilient_mode mode does not make much sense for two pass but with its current meaning but this code is designed to stop +- // outlandish behaviour if someone does set it when using two pass. It effectively disables GF groups. +- // This is temporary code till we decide what should really happen in this case. ++ /* Special case: Error error_resilient_mode mode does not make much ++ * sense for two pass but with its current meaning but this code is ++ * designed to stop outlandish behaviour if someone does set it when ++ * using two pass. It effectively disables GF groups. This is ++ * temporary code till we decide what should really happen in this ++ * case. ++ */ + if (cpi->oxcf.error_resilient_mode) + { +- cpi->twopass.gf_group_bits = cpi->twopass.kf_group_bits; +- cpi->twopass.gf_group_error_left = cpi->twopass.kf_group_error_left; ++ cpi->twopass.gf_group_bits = (int)cpi->twopass.kf_group_bits; ++ cpi->twopass.gf_group_error_left = ++ (int)cpi->twopass.kf_group_error_left; + cpi->baseline_gf_interval = cpi->twopass.frames_to_key; + cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; + cpi->source_alt_ref_pending = 0; +@@ -2351,19 +2455,25 @@ void vp8_second_pass(VP8_COMP *cpi) + + } + +- // Is this a GF / ARF (Note that a KF is always also a GF) ++ /* Is this a GF / ARF (Note that a KF is always also a GF) */ + if (cpi->frames_till_gf_update_due == 0) + { +- // Define next gf group and assign bits to it ++ /* Define next gf group and assign bits to it */ + vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); + define_gf_group(cpi, &this_frame_copy); + +- // If we are going to code an altref frame at the end of the group and the current frame is not a key frame.... +- // If the previous group used an arf this frame has already benefited from that arf boost and it should not be given extra bits +- // If the previous group was NOT coded using arf we may want to apply some boost to this GF as well ++ /* If we are going to code an altref frame at the end of the group ++ * and the current frame is not a key frame.... If the previous ++ * group used an arf this frame has already benefited from that arf ++ * boost and it should not be given extra bits If the previous ++ * group was NOT coded using arf we may want to apply some boost to ++ * this GF as well ++ */ + if (cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)) + { +- // Assign a standard frames worth of bits from those allocated to the GF group ++ /* Assign a standard frames worth of bits from those allocated ++ * to the GF group ++ */ + int bak = cpi->per_frame_bandwidth; + vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); + assign_std_frame_bits(cpi, &this_frame_copy); +@@ -2371,59 +2481,64 @@ void vp8_second_pass(VP8_COMP *cpi) + } + } + +- // Otherwise this is an ordinary frame ++ /* Otherwise this is an ordinary frame */ + else + { +- // Special case: Error error_resilient_mode mode does not make much sense for two pass but with its current meaning but this code is designed to stop +- // outlandish behaviour if someone does set it when using two pass. It effectively disables GF groups. +- // This is temporary code till we decide what should really happen in this case. ++ /* Special case: Error error_resilient_mode mode does not make much ++ * sense for two pass but with its current meaning but this code is ++ * designed to stop outlandish behaviour if someone does set it ++ * when using two pass. It effectively disables GF groups. This is ++ * temporary code till we decide what should really happen in this ++ * case. ++ */ + if (cpi->oxcf.error_resilient_mode) + { + cpi->frames_till_gf_update_due = cpi->twopass.frames_to_key; + + if (cpi->common.frame_type != KEY_FRAME) + { +- // Assign bits from those allocated to the GF group ++ /* Assign bits from those allocated to the GF group */ + vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); + assign_std_frame_bits(cpi, &this_frame_copy); + } + } + else + { +- // Assign bits from those allocated to the GF group ++ /* Assign bits from those allocated to the GF group */ + vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); + assign_std_frame_bits(cpi, &this_frame_copy); + } + } + +- // Keep a globally available copy of this and the next frame's iiratio. +- cpi->twopass.this_iiratio = this_frame_intra_error / +- DOUBLE_DIVIDE_CHECK(this_frame_coded_error); ++ /* Keep a globally available copy of this and the next frame's iiratio. */ ++ cpi->twopass.this_iiratio = (unsigned int)(this_frame_intra_error / ++ DOUBLE_DIVIDE_CHECK(this_frame_coded_error)); + { + FIRSTPASS_STATS next_frame; + if ( lookup_next_frame_stats(cpi, &next_frame) != EOF ) + { +- cpi->twopass.next_iiratio = next_frame.intra_error / +- DOUBLE_DIVIDE_CHECK(next_frame.coded_error); ++ cpi->twopass.next_iiratio = (unsigned int)(next_frame.intra_error / ++ DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); + } + } + +- // Set nominal per second bandwidth for this frame +- cpi->target_bandwidth = cpi->per_frame_bandwidth * cpi->output_frame_rate; ++ /* Set nominal per second bandwidth for this frame */ ++ cpi->target_bandwidth = (int) ++ (cpi->per_frame_bandwidth * cpi->output_frame_rate); + if (cpi->target_bandwidth < 0) + cpi->target_bandwidth = 0; + + +- // Account for mv, mode and other overheads. +- overhead_bits = estimate_modemvcost( ++ /* Account for mv, mode and other overheads. */ ++ overhead_bits = (int)estimate_modemvcost( + cpi, &cpi->twopass.total_left_stats ); + +- // Special case code for first frame. ++ /* Special case code for first frame. */ + if (cpi->common.current_video_frame == 0) + { + cpi->twopass.est_max_qcorrection_factor = 1.0; + +- // Set a cq_level in constrained quality mode. ++ /* Set a cq_level in constrained quality mode. */ + if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY ) + { + int est_cq; +@@ -2439,7 +2554,7 @@ void vp8_second_pass(VP8_COMP *cpi) + cpi->cq_target_quality = est_cq; + } + +- // guess at maxq needed in 2nd pass ++ /* guess at maxq needed in 2nd pass */ + cpi->twopass.maxq_max_limit = cpi->worst_quality; + cpi->twopass.maxq_min_limit = cpi->best_quality; + +@@ -2449,11 +2564,12 @@ void vp8_second_pass(VP8_COMP *cpi) + (int)(cpi->twopass.bits_left / frames_left), + overhead_bits ); + +- // Limit the maxq value returned subsequently. +- // This increases the risk of overspend or underspend if the initial +- // estimate for the clip is bad, but helps prevent excessive +- // variation in Q, especially near the end of a clip +- // where for example a small overspend may cause Q to crash ++ /* Limit the maxq value returned subsequently. ++ * This increases the risk of overspend or underspend if the initial ++ * estimate for the clip is bad, but helps prevent excessive ++ * variation in Q, especially near the end of a clip ++ * where for example a small overspend may cause Q to crash ++ */ + cpi->twopass.maxq_max_limit = ((tmp_q + 32) < cpi->worst_quality) + ? (tmp_q + 32) : cpi->worst_quality; + cpi->twopass.maxq_min_limit = ((tmp_q - 32) > cpi->best_quality) +@@ -2463,10 +2579,11 @@ void vp8_second_pass(VP8_COMP *cpi) + cpi->ni_av_qi = tmp_q; + } + +- // The last few frames of a clip almost always have to few or too many +- // bits and for the sake of over exact rate control we dont want to make +- // radical adjustments to the allowed quantizer range just to use up a +- // few surplus bits or get beneath the target rate. ++ /* The last few frames of a clip almost always have to few or too many ++ * bits and for the sake of over exact rate control we dont want to make ++ * radical adjustments to the allowed quantizer range just to use up a ++ * few surplus bits or get beneath the target rate. ++ */ + else if ( (cpi->common.current_video_frame < + (((unsigned int)cpi->twopass.total_stats.count * 255)>>8)) && + ((cpi->common.current_video_frame + cpi->baseline_gf_interval) < +@@ -2481,7 +2598,7 @@ void vp8_second_pass(VP8_COMP *cpi) + (int)(cpi->twopass.bits_left / frames_left), + overhead_bits ); + +- // Move active_worst_quality but in a damped way ++ /* Move active_worst_quality but in a damped way */ + if (tmp_q > cpi->active_worst_quality) + cpi->active_worst_quality ++; + else if (tmp_q < cpi->active_worst_quality) +@@ -2493,7 +2610,7 @@ void vp8_second_pass(VP8_COMP *cpi) + + cpi->twopass.frames_to_key --; + +- // Update the total stats remaining sturcture ++ /* Update the total stats remaining sturcture */ + subtract_stats(&cpi->twopass.total_left_stats, &this_frame ); + } + +@@ -2502,8 +2619,9 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP + { + int is_viable_kf = 0; + +- // Does the frame satisfy the primary criteria of a key frame +- // If so, then examine how well it predicts subsequent frames ++ /* Does the frame satisfy the primary criteria of a key frame ++ * If so, then examine how well it predicts subsequent frames ++ */ + if ((this_frame->pcnt_second_ref < 0.10) && + (next_frame->pcnt_second_ref < 0.10) && + ((this_frame->pcnt_inter < 0.05) || +@@ -2530,10 +2648,10 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP + + vpx_memcpy(&local_next_frame, next_frame, sizeof(*next_frame)); + +- // Note the starting file position so we can reset to it ++ /* Note the starting file position so we can reset to it */ + start_pos = cpi->twopass.stats_in; + +- // Examine how well the key frame predicts subsequent frames ++ /* Examine how well the key frame predicts subsequent frames */ + for (i = 0 ; i < 16; i++) + { + next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error / DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error)) ; +@@ -2541,18 +2659,16 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP + if (next_iiratio > RMAX) + next_iiratio = RMAX; + +- // Cumulative effect of decay in prediction quality ++ /* Cumulative effect of decay in prediction quality */ + if (local_next_frame.pcnt_inter > 0.85) + decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter; + else + decay_accumulator = decay_accumulator * ((0.85 + local_next_frame.pcnt_inter) / 2.0); + +- //decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter; +- +- // Keep a running total ++ /* Keep a running total */ + boost_score += (decay_accumulator * next_iiratio); + +- // Test various breakout clauses ++ /* Test various breakout clauses */ + if ((local_next_frame.pcnt_inter < 0.05) || + (next_iiratio < 1.5) || + (((local_next_frame.pcnt_inter - +@@ -2567,17 +2683,19 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP + + old_boost_score = boost_score; + +- // Get the next frame details ++ /* Get the next frame details */ + if (EOF == input_stats(cpi, &local_next_frame)) + break; + } + +- // If there is tolerable prediction for at least the next 3 frames then break out else discard this pottential key frame and move on ++ /* If there is tolerable prediction for at least the next 3 frames ++ * then break out else discard this pottential key frame and move on ++ */ + if (boost_score > 5.0 && (i > 3)) + is_viable_kf = 1; + else + { +- // Reset the file position ++ /* Reset the file position */ + reset_fpf_position(cpi, start_pos); + + is_viable_kf = 0; +@@ -2605,65 +2723,71 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + double kf_group_coded_err = 0.0; + double recent_loop_decay[8] = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0}; + +- vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean ++ vpx_memset(&next_frame, 0, sizeof(next_frame)); + +- vp8_clear_system_state(); //__asm emms; ++ vp8_clear_system_state(); + start_position = cpi->twopass.stats_in; + + cpi->common.frame_type = KEY_FRAME; + +- // is this a forced key frame by interval ++ /* is this a forced key frame by interval */ + cpi->this_key_frame_forced = cpi->next_key_frame_forced; + +- // Clear the alt ref active flag as this can never be active on a key frame ++ /* Clear the alt ref active flag as this can never be active on a key ++ * frame ++ */ + cpi->source_alt_ref_active = 0; + +- // Kf is always a gf so clear frames till next gf counter ++ /* Kf is always a gf so clear frames till next gf counter */ + cpi->frames_till_gf_update_due = 0; + + cpi->twopass.frames_to_key = 1; + +- // Take a copy of the initial frame details ++ /* Take a copy of the initial frame details */ + vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame)); + +- cpi->twopass.kf_group_bits = 0; // Total bits avaialable to kf group +- cpi->twopass.kf_group_error_left = 0; // Group modified error score. ++ cpi->twopass.kf_group_bits = 0; ++ cpi->twopass.kf_group_error_left = 0; + + kf_mod_err = calculate_modified_err(cpi, this_frame); + +- // find the next keyframe ++ /* find the next keyframe */ + i = 0; + while (cpi->twopass.stats_in < cpi->twopass.stats_in_end) + { +- // Accumulate kf group error ++ /* Accumulate kf group error */ + kf_group_err += calculate_modified_err(cpi, this_frame); + +- // These figures keep intra and coded error counts for all frames including key frames in the group. +- // The effect of the key frame itself can be subtracted out using the first_frame data collected above ++ /* These figures keep intra and coded error counts for all frames ++ * including key frames in the group. The effect of the key frame ++ * itself can be subtracted out using the first_frame data ++ * collected above ++ */ + kf_group_intra_err += this_frame->intra_error; + kf_group_coded_err += this_frame->coded_error; + +- // load a the next frame's stats ++ /* load a the next frame's stats */ + vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame)); + input_stats(cpi, this_frame); + +- // Provided that we are not at the end of the file... ++ /* Provided that we are not at the end of the file... */ + if (cpi->oxcf.auto_key + && lookup_next_frame_stats(cpi, &next_frame) != EOF) + { +- // Normal scene cut check ++ /* Normal scene cut check */ + if ( ( i >= MIN_GF_INTERVAL ) && + test_candidate_kf(cpi, &last_frame, this_frame, &next_frame) ) + { + break; + } + +- // How fast is prediction quality decaying ++ /* How fast is prediction quality decaying */ + loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); + +- // We want to know something about the recent past... rather than +- // as used elsewhere where we are concened with decay in prediction +- // quality since the last GF or KF. ++ /* We want to know something about the recent past... rather than ++ * as used elsewhere where we are concened with decay in prediction ++ * quality since the last GF or KF. ++ */ + recent_loop_decay[i%8] = loop_decay_rate; + decay_accumulator = 1.0; + for (j = 0; j < 8; j++) +@@ -2671,8 +2795,9 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + decay_accumulator = decay_accumulator * recent_loop_decay[j]; + } + +- // Special check for transition or high motion followed by a +- // to a static scene. ++ /* Special check for transition or high motion followed by a ++ * static scene. ++ */ + if ( detect_transition_to_still( cpi, i, + (cpi->key_frame_frequency-i), + loop_decay_rate, +@@ -2682,11 +2807,12 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + } + + +- // Step on to the next frame ++ /* Step on to the next frame */ + cpi->twopass.frames_to_key ++; + +- // If we don't have a real key frame within the next two +- // forcekeyframeevery intervals then break out of the loop. ++ /* If we don't have a real key frame within the next two ++ * forcekeyframeevery intervals then break out of the loop. ++ */ + if (cpi->twopass.frames_to_key >= 2 *(int)cpi->key_frame_frequency) + break; + } else +@@ -2695,10 +2821,11 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + i++; + } + +- // If there is a max kf interval set by the user we must obey it. +- // We already breakout of the loop above at 2x max. +- // This code centers the extra kf if the actual natural +- // interval is between 1x and 2x ++ /* If there is a max kf interval set by the user we must obey it. ++ * We already breakout of the loop above at 2x max. ++ * This code centers the extra kf if the actual natural ++ * interval is between 1x and 2x ++ */ + if (cpi->oxcf.auto_key + && cpi->twopass.frames_to_key > (int)cpi->key_frame_frequency ) + { +@@ -2707,29 +2834,29 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + + cpi->twopass.frames_to_key /= 2; + +- // Copy first frame details ++ /* Copy first frame details */ + vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame)); + +- // Reset to the start of the group ++ /* Reset to the start of the group */ + reset_fpf_position(cpi, start_position); + + kf_group_err = 0; + kf_group_intra_err = 0; + kf_group_coded_err = 0; + +- // Rescan to get the correct error data for the forced kf group ++ /* Rescan to get the correct error data for the forced kf group */ + for( i = 0; i < cpi->twopass.frames_to_key; i++ ) + { +- // Accumulate kf group errors ++ /* Accumulate kf group errors */ + kf_group_err += calculate_modified_err(cpi, &tmp_frame); + kf_group_intra_err += tmp_frame.intra_error; + kf_group_coded_err += tmp_frame.coded_error; + +- // Load a the next frame's stats ++ /* Load a the next frame's stats */ + input_stats(cpi, &tmp_frame); + } + +- // Reset to the start of the group ++ /* Reset to the start of the group */ + reset_fpf_position(cpi, current_pos); + + cpi->next_key_frame_forced = 1; +@@ -2737,58 +2864,63 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + else + cpi->next_key_frame_forced = 0; + +- // Special case for the last frame of the file ++ /* Special case for the last frame of the file */ + if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) + { +- // Accumulate kf group error ++ /* Accumulate kf group error */ + kf_group_err += calculate_modified_err(cpi, this_frame); + +- // These figures keep intra and coded error counts for all frames including key frames in the group. +- // The effect of the key frame itself can be subtracted out using the first_frame data collected above ++ /* These figures keep intra and coded error counts for all frames ++ * including key frames in the group. The effect of the key frame ++ * itself can be subtracted out using the first_frame data ++ * collected above ++ */ + kf_group_intra_err += this_frame->intra_error; + kf_group_coded_err += this_frame->coded_error; + } + +- // Calculate the number of bits that should be assigned to the kf group. ++ /* Calculate the number of bits that should be assigned to the kf group. */ + if ((cpi->twopass.bits_left > 0) && (cpi->twopass.modified_error_left > 0.0)) + { +- // Max for a single normal frame (not key frame) ++ /* Max for a single normal frame (not key frame) */ + int max_bits = frame_max_bits(cpi); + +- // Maximum bits for the kf group ++ /* Maximum bits for the kf group */ + int64_t max_grp_bits; + +- // Default allocation based on bits left and relative +- // complexity of the section ++ /* Default allocation based on bits left and relative ++ * complexity of the section ++ */ + cpi->twopass.kf_group_bits = (int64_t)( cpi->twopass.bits_left * + ( kf_group_err / + cpi->twopass.modified_error_left )); + +- // Clip based on maximum per frame rate defined by the user. ++ /* Clip based on maximum per frame rate defined by the user. */ + max_grp_bits = (int64_t)max_bits * (int64_t)cpi->twopass.frames_to_key; + if (cpi->twopass.kf_group_bits > max_grp_bits) + cpi->twopass.kf_group_bits = max_grp_bits; + +- // Additional special case for CBR if buffer is getting full. ++ /* Additional special case for CBR if buffer is getting full. */ + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + { +- int opt_buffer_lvl = cpi->oxcf.optimal_buffer_level; +- int buffer_lvl = cpi->buffer_level; ++ int64_t opt_buffer_lvl = cpi->oxcf.optimal_buffer_level; ++ int64_t buffer_lvl = cpi->buffer_level; + +- // If the buffer is near or above the optimal and this kf group is +- // not being allocated much then increase the allocation a bit. ++ /* If the buffer is near or above the optimal and this kf group is ++ * not being allocated much then increase the allocation a bit. ++ */ + if (buffer_lvl >= opt_buffer_lvl) + { +- int high_water_mark = (opt_buffer_lvl + ++ int64_t high_water_mark = (opt_buffer_lvl + + cpi->oxcf.maximum_buffer_size) >> 1; + + int64_t av_group_bits; + +- // Av bits per frame * number of frames ++ /* Av bits per frame * number of frames */ + av_group_bits = (int64_t)cpi->av_per_frame_bandwidth * + (int64_t)cpi->twopass.frames_to_key; + +- // We are at or above the maximum. ++ /* We are at or above the maximum. */ + if (cpi->buffer_level >= high_water_mark) + { + int64_t min_group_bits; +@@ -2800,7 +2932,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + if (cpi->twopass.kf_group_bits < min_group_bits) + cpi->twopass.kf_group_bits = min_group_bits; + } +- // We are above optimal but below the maximum ++ /* We are above optimal but below the maximum */ + else if (cpi->twopass.kf_group_bits < av_group_bits) + { + int64_t bits_below_av = av_group_bits - +@@ -2817,13 +2949,15 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + else + cpi->twopass.kf_group_bits = 0; + +- // Reset the first pass file position ++ /* Reset the first pass file position */ + reset_fpf_position(cpi, start_position); + +- // determine how big to make this keyframe based on how well the subsequent frames use inter blocks ++ /* determine how big to make this keyframe based on how well the ++ * subsequent frames use inter blocks ++ */ + decay_accumulator = 1.0; + boost_score = 0.0; +- loop_decay_rate = 1.00; // Starting decay rate ++ loop_decay_rate = 1.00; /* Starting decay rate */ + + for (i = 0 ; i < cpi->twopass.frames_to_key ; i++) + { +@@ -2842,7 +2976,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + if (r > RMAX) + r = RMAX; + +- // How fast is prediction quality decaying ++ /* How fast is prediction quality decaying */ + loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); + + decay_accumulator = decay_accumulator * loop_decay_rate; +@@ -2875,31 +3009,26 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + + avg_stats(§ionstats); + +- cpi->twopass.section_intra_rating = +- sectionstats.intra_error +- / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); ++ cpi->twopass.section_intra_rating = (unsigned int) ++ (sectionstats.intra_error ++ / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error)); + + Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); +- // if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) ) +- //{ + cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025); + + if (cpi->twopass.section_max_qfactor < 0.80) + cpi->twopass.section_max_qfactor = 0.80; +- +- //} +- //else +- // cpi->twopass.section_max_qfactor = 1.0; + } + +- // When using CBR apply additional buffer fullness related upper limits ++ /* When using CBR apply additional buffer fullness related upper limits */ + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + { + double max_boost; + + if (cpi->drop_frames_allowed) + { +- int df_buffer_level = cpi->oxcf.drop_frames_water_mark * (cpi->oxcf.optimal_buffer_level / 100); ++ int df_buffer_level = (int)(cpi->oxcf.drop_frames_water_mark ++ * (cpi->oxcf.optimal_buffer_level / 100)); + + if (cpi->buffer_level > df_buffer_level) + max_boost = ((double)((cpi->buffer_level - df_buffer_level) * 2 / 3) * 16.0) / DOUBLE_DIVIDE_CHECK((double)cpi->av_per_frame_bandwidth); +@@ -2919,18 +3048,18 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + boost_score = max_boost; + } + +- // Reset the first pass file position ++ /* Reset the first pass file position */ + reset_fpf_position(cpi, start_position); + +- // Work out how many bits to allocate for the key frame itself ++ /* Work out how many bits to allocate for the key frame itself */ + if (1) + { +- int kf_boost = boost_score; ++ int kf_boost = (int)boost_score; + int allocation_chunks; + int Counter = cpi->twopass.frames_to_key; + int alt_kf_bits; + YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; +- // Min boost based on kf interval ++ /* Min boost based on kf interval */ + #if 0 + + while ((kf_boost < 48) && (Counter > 0)) +@@ -2948,32 +3077,33 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + if (kf_boost > 48) kf_boost = 48; + } + +- // bigger frame sizes need larger kf boosts, smaller frames smaller boosts... ++ /* bigger frame sizes need larger kf boosts, smaller frames smaller ++ * boosts... ++ */ + if ((lst_yv12->y_width * lst_yv12->y_height) > (320 * 240)) + kf_boost += 2 * (lst_yv12->y_width * lst_yv12->y_height) / (320 * 240); + else if ((lst_yv12->y_width * lst_yv12->y_height) < (320 * 240)) + kf_boost -= 4 * (320 * 240) / (lst_yv12->y_width * lst_yv12->y_height); + +- kf_boost = (int)((double)kf_boost * 100.0) >> 4; // Scale 16 to 100 +- +- // Adjustment to boost based on recent average q +- //kf_boost = kf_boost * vp8_kf_boost_qadjustment[cpi->ni_av_qi] / 100; +- +- if (kf_boost < 250) // Min KF boost ++ /* Min KF boost */ ++ kf_boost = (int)((double)kf_boost * 100.0) >> 4; /* Scale 16 to 100 */ ++ if (kf_boost < 250) + kf_boost = 250; + +- // We do three calculations for kf size. +- // The first is based on the error score for the whole kf group. +- // The second (optionaly) on the key frames own error if this is +- // smaller than the average for the group. +- // The final one insures that the frame receives at least the +- // allocation it would have received based on its own error score vs +- // the error score remaining +- // Special case if the sequence appears almost totaly static +- // as measured by the decay accumulator. In this case we want to +- // spend almost all of the bits on the key frame. +- // cpi->twopass.frames_to_key-1 because key frame itself is taken +- // care of by kf_boost. ++ /* ++ * We do three calculations for kf size. ++ * The first is based on the error score for the whole kf group. ++ * The second (optionaly) on the key frames own error if this is ++ * smaller than the average for the group. ++ * The final one insures that the frame receives at least the ++ * allocation it would have received based on its own error score vs ++ * the error score remaining ++ * Special case if the sequence appears almost totaly static ++ * as measured by the decay accumulator. In this case we want to ++ * spend almost all of the bits on the key frame. ++ * cpi->twopass.frames_to_key-1 because key frame itself is taken ++ * care of by kf_boost. ++ */ + if ( decay_accumulator >= 0.99 ) + { + allocation_chunks = +@@ -2985,7 +3115,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + ((cpi->twopass.frames_to_key - 1) * 100) + kf_boost; + } + +- // Normalize Altboost and allocations chunck down to prevent overflow ++ /* Normalize Altboost and allocations chunck down to prevent overflow */ + while (kf_boost > 1000) + { + kf_boost /= 2; +@@ -2994,20 +3124,21 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + + cpi->twopass.kf_group_bits = (cpi->twopass.kf_group_bits < 0) ? 0 : cpi->twopass.kf_group_bits; + +- // Calculate the number of bits to be spent on the key frame ++ /* Calculate the number of bits to be spent on the key frame */ + cpi->twopass.kf_bits = (int)((double)kf_boost * ((double)cpi->twopass.kf_group_bits / (double)allocation_chunks)); + +- // Apply an additional limit for CBR ++ /* Apply an additional limit for CBR */ + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + { +- if (cpi->twopass.kf_bits > ((3 * cpi->buffer_level) >> 2)) +- cpi->twopass.kf_bits = (3 * cpi->buffer_level) >> 2; ++ if (cpi->twopass.kf_bits > (int)((3 * cpi->buffer_level) >> 2)) ++ cpi->twopass.kf_bits = (int)((3 * cpi->buffer_level) >> 2); + } + +- // If the key frame is actually easier than the average for the +- // kf group (which does sometimes happen... eg a blank intro frame) +- // Then use an alternate calculation based on the kf error score +- // which should give a smaller key frame. ++ /* If the key frame is actually easier than the average for the ++ * kf group (which does sometimes happen... eg a blank intro frame) ++ * Then use an alternate calculation based on the kf error score ++ * which should give a smaller key frame. ++ */ + if (kf_mod_err < kf_group_err / cpi->twopass.frames_to_key) + { + double alt_kf_grp_bits = +@@ -3023,9 +3154,10 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + cpi->twopass.kf_bits = alt_kf_bits; + } + } +- // Else if it is much harder than other frames in the group make sure +- // it at least receives an allocation in keeping with its relative +- // error score ++ /* Else if it is much harder than other frames in the group make sure ++ * it at least receives an allocation in keeping with its relative ++ * error score ++ */ + else + { + alt_kf_bits = +@@ -3040,17 +3172,23 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + } + + cpi->twopass.kf_group_bits -= cpi->twopass.kf_bits; +- cpi->twopass.kf_bits += cpi->min_frame_bandwidth; // Add in the minimum frame allowance ++ /* Add in the minimum frame allowance */ ++ cpi->twopass.kf_bits += cpi->min_frame_bandwidth; ++ ++ /* Peer frame bit target for this frame */ ++ cpi->per_frame_bandwidth = cpi->twopass.kf_bits; + +- cpi->per_frame_bandwidth = cpi->twopass.kf_bits; // Peer frame bit target for this frame +- cpi->target_bandwidth = cpi->twopass.kf_bits * cpi->output_frame_rate; // Convert to a per second bitrate ++ /* Convert to a per second bitrate */ ++ cpi->target_bandwidth = (int)(cpi->twopass.kf_bits * ++ cpi->output_frame_rate); + } + +- // Note the total error score of the kf group minus the key frame itself ++ /* Note the total error score of the kf group minus the key frame itself */ + cpi->twopass.kf_group_error_left = (int)(kf_group_err - kf_mod_err); + +- // Adjust the count of total modified error left. +- // The count of bits left is adjusted elsewhere based on real coded frame sizes ++ /* Adjust the count of total modified error left. The count of bits left ++ * is adjusted elsewhere based on real coded frame sizes ++ */ + cpi->twopass.modified_error_left -= kf_group_err; + + if (cpi->oxcf.allow_spatial_resampling) +@@ -3063,7 +3201,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + int new_width = cpi->oxcf.Width; + int new_height = cpi->oxcf.Height; + +- int projected_buffer_level = cpi->buffer_level; ++ int projected_buffer_level = (int)cpi->buffer_level; + int tmp_q; + + double projected_bits_perframe; +@@ -3076,40 +3214,47 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + if ((cpi->common.Width != cpi->oxcf.Width) || (cpi->common.Height != cpi->oxcf.Height)) + last_kf_resampled = 1; + +- // Set back to unscaled by defaults ++ /* Set back to unscaled by defaults */ + cpi->common.horiz_scale = NORMAL; + cpi->common.vert_scale = NORMAL; + +- // Calculate Average bits per frame. +- //av_bits_per_frame = cpi->twopass.bits_left/(double)(cpi->twopass.total_stats.count - cpi->common.current_video_frame); ++ /* Calculate Average bits per frame. */ + av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate); +- //if ( av_bits_per_frame < 0.0 ) +- // av_bits_per_frame = 0.0 + +- // CBR... Use the clip average as the target for deciding resample ++ /* CBR... Use the clip average as the target for deciding resample */ + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + { + bits_per_frame = av_bits_per_frame; + } + +- // In VBR we want to avoid downsampling in easy section unless we are under extreme pressure +- // So use the larger of target bitrate for this sectoion or average bitrate for sequence ++ /* In VBR we want to avoid downsampling in easy section unless we ++ * are under extreme pressure So use the larger of target bitrate ++ * for this section or average bitrate for sequence ++ */ + else + { +- bits_per_frame = cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key; // This accounts for how hard the section is... ++ /* This accounts for how hard the section is... */ ++ bits_per_frame = (double) ++ (cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key); + +- if (bits_per_frame < av_bits_per_frame) // Dont turn to resampling in easy sections just because they have been assigned a small number of bits ++ /* Dont turn to resampling in easy sections just because they ++ * have been assigned a small number of bits ++ */ ++ if (bits_per_frame < av_bits_per_frame) + bits_per_frame = av_bits_per_frame; + } + +- // bits_per_frame should comply with our minimum ++ /* bits_per_frame should comply with our minimum */ + if (bits_per_frame < (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100)) + bits_per_frame = (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); + +- // Work out if spatial resampling is necessary +- kf_q = estimate_kf_group_q(cpi, err_per_frame, bits_per_frame, group_iiratio); ++ /* Work out if spatial resampling is necessary */ ++ kf_q = estimate_kf_group_q(cpi, err_per_frame, ++ (int)bits_per_frame, group_iiratio); + +- // If we project a required Q higher than the maximum allowed Q then make a guess at the actual size of frames in this section ++ /* If we project a required Q higher than the maximum allowed Q then ++ * make a guess at the actual size of frames in this section ++ */ + projected_bits_perframe = bits_per_frame; + tmp_q = kf_q; + +@@ -3119,8 +3264,11 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + tmp_q--; + } + +- // Guess at buffer level at the end of the section +- projected_buffer_level = cpi->buffer_level - (int)((projected_bits_perframe - av_bits_per_frame) * cpi->twopass.frames_to_key); ++ /* Guess at buffer level at the end of the section */ ++ projected_buffer_level = (int) ++ (cpi->buffer_level - (int) ++ ((projected_bits_perframe - av_bits_per_frame) * ++ cpi->twopass.frames_to_key)); + + if (0) + { +@@ -3129,15 +3277,17 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + fclose(f); + } + +- // The trigger for spatial resampling depends on the various parameters such as whether we are streaming (CBR) or VBR. ++ /* The trigger for spatial resampling depends on the various ++ * parameters such as whether we are streaming (CBR) or VBR. ++ */ + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + { +- // Trigger resample if we are projected to fall below down sample level or +- // resampled last time and are projected to remain below the up sample level ++ /* Trigger resample if we are projected to fall below down ++ * sample level or resampled last time and are projected to ++ * remain below the up sample level ++ */ + if ((projected_buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100)) || + (last_kf_resampled && (projected_buffer_level < (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100)))) +- //( ((cpi->buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100))) && +- // ((projected_buffer_level < (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100))) )) + resample_trigger = 1; + else + resample_trigger = 0; +@@ -3147,9 +3297,15 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate)); + int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level; + +- if ((last_kf_resampled && (kf_q > cpi->worst_quality)) || // If triggered last time the threshold for triggering again is reduced +- ((kf_q > cpi->worst_quality) && // Projected Q higher than allowed and ... +- (over_spend > clip_bits / 20))) // ... Overspend > 5% of total bits ++ /* If triggered last time the threshold for triggering again is ++ * reduced: ++ * ++ * Projected Q higher than allowed and Overspend > 5% of total ++ * bits ++ */ ++ if ((last_kf_resampled && (kf_q > cpi->worst_quality)) || ++ ((kf_q > cpi->worst_quality) && ++ (over_spend > clip_bits / 20))) + resample_trigger = 1; + else + resample_trigger = 0; +@@ -3171,13 +3327,19 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) + new_width = ((hs - 1) + (cpi->oxcf.Width * hr)) / hs; + new_height = ((vs - 1) + (cpi->oxcf.Height * vr)) / vs; + +- // Reducing the area to 1/4 does not reduce the complexity (err_per_frame) to 1/4... +- // effective_sizeratio attempts to provide a crude correction for this ++ /* Reducing the area to 1/4 does not reduce the complexity ++ * (err_per_frame) to 1/4... effective_sizeratio attempts ++ * to provide a crude correction for this ++ */ + effective_size_ratio = (double)(new_width * new_height) / (double)(cpi->oxcf.Width * cpi->oxcf.Height); + effective_size_ratio = (1.0 + (3.0 * effective_size_ratio)) / 4.0; + +- // Now try again and see what Q we get with the smaller image size +- kf_q = estimate_kf_group_q(cpi, err_per_frame * effective_size_ratio, bits_per_frame, group_iiratio); ++ /* Now try again and see what Q we get with the smaller ++ * image size ++ */ ++ kf_q = estimate_kf_group_q(cpi, ++ err_per_frame * effective_size_ratio, ++ (int)bits_per_frame, group_iiratio); + + if (0) + { +diff --git a/vp8/encoder/lookahead.c b/vp8/encoder/lookahead.c +index 4c92281..ce2ce08 100644 +--- a/vp8/encoder/lookahead.c ++++ b/vp8/encoder/lookahead.c +@@ -118,10 +118,11 @@ vp8_lookahead_push(struct lookahead_ctx *ctx, + ctx->sz++; + buf = pop(ctx, &ctx->write_idx); + +- // Only do this partial copy if the following conditions are all met: +- // 1. Lookahead queue has has size of 1. +- // 2. Active map is provided. +- // 3. This is not a key frame, golden nor altref frame. ++ /* Only do this partial copy if the following conditions are all met: ++ * 1. Lookahead queue has has size of 1. ++ * 2. Active map is provided. ++ * 3. This is not a key frame, golden nor altref frame. ++ */ + if (ctx->max_sz == 1 && active_map && !flags) + { + for (row = 0; row < mb_rows; ++row) +@@ -130,18 +131,18 @@ vp8_lookahead_push(struct lookahead_ctx *ctx, + + while (1) + { +- // Find the first active macroblock in this row. ++ /* Find the first active macroblock in this row. */ + for (; col < mb_cols; ++col) + { + if (active_map[col]) + break; + } + +- // No more active macroblock in this row. ++ /* No more active macroblock in this row. */ + if (col == mb_cols) + break; + +- // Find the end of active region in this row. ++ /* Find the end of active region in this row. */ + active_end = col; + + for (; active_end < mb_cols; ++active_end) +@@ -150,13 +151,13 @@ vp8_lookahead_push(struct lookahead_ctx *ctx, + break; + } + +- // Only copy this active region. ++ /* Only copy this active region. */ + vp8_copy_and_extend_frame_with_rect(src, &buf->img, + row << 4, + col << 4, 16, + (active_end - col) << 4); + +- // Start again from the end of this active region. ++ /* Start again from the end of this active region. */ + col = active_end; + } + +diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c +index 67e4f7e..b08c7a5 100644 +--- a/vp8/encoder/mcomp.c ++++ b/vp8/encoder/mcomp.c +@@ -25,26 +25,35 @@ static int mv_mode_cts [4] [2]; + + int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) + { +- // MV costing is based on the distribution of vectors in the previous frame and as such will tend to +- // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the +- // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks. +- // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors. ++ /* MV costing is based on the distribution of vectors in the previous ++ * frame and as such will tend to over state the cost of vectors. In ++ * addition coding a new vector can have a knock on effect on the cost ++ * of subsequent vectors and the quality of prediction from NEAR and ++ * NEAREST for subsequent blocks. The "Weight" parameter allows, to a ++ * limited extent, for some account to be taken of these factors. ++ */ + return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7; + } + + static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit) + { +- return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + +- mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) +- * error_per_bit + 128) >> 8; ++ /* Ignore mv costing if mvcost is NULL */ ++ if (mvcost) ++ return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + ++ mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) ++ * error_per_bit + 128) >> 8; ++ return 0; + } + + static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit) + { + /* Calculate sad error cost on full pixel basis. */ +- return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] + +- mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) +- * error_per_bit + 128) >> 8; ++ /* Ignore mv costing if mvsadcost is NULL */ ++ if (mvsadcost) ++ return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] + ++ mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) ++ * error_per_bit + 128) >> 8; ++ return 0; + } + + void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) +@@ -53,7 +62,7 @@ void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) + int search_site_count = 0; + + +- // Generate offsets for 4 search sites per step. ++ /* Generate offsets for 4 search sites per step. */ + Len = MAX_FIRST_STEP; + x->ss[search_site_count].mv.col = 0; + x->ss[search_site_count].mv.row = 0; +@@ -63,31 +72,31 @@ void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) + while (Len > 0) + { + +- // Compute offsets for search sites. ++ /* Compute offsets for search sites. */ + x->ss[search_site_count].mv.col = 0; + x->ss[search_site_count].mv.row = -Len; + x->ss[search_site_count].offset = -Len * stride; + search_site_count++; + +- // Compute offsets for search sites. ++ /* Compute offsets for search sites. */ + x->ss[search_site_count].mv.col = 0; + x->ss[search_site_count].mv.row = Len; + x->ss[search_site_count].offset = Len * stride; + search_site_count++; + +- // Compute offsets for search sites. ++ /* Compute offsets for search sites. */ + x->ss[search_site_count].mv.col = -Len; + x->ss[search_site_count].mv.row = 0; + x->ss[search_site_count].offset = -Len; + search_site_count++; + +- // Compute offsets for search sites. ++ /* Compute offsets for search sites. */ + x->ss[search_site_count].mv.col = Len; + x->ss[search_site_count].mv.row = 0; + x->ss[search_site_count].offset = Len; + search_site_count++; + +- // Contract. ++ /* Contract. */ + Len /= 2; + } + +@@ -100,7 +109,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) + int Len; + int search_site_count = 0; + +- // Generate offsets for 8 search sites per step. ++ /* Generate offsets for 8 search sites per step. */ + Len = MAX_FIRST_STEP; + x->ss[search_site_count].mv.col = 0; + x->ss[search_site_count].mv.row = 0; +@@ -110,56 +119,56 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) + while (Len > 0) + { + +- // Compute offsets for search sites. ++ /* Compute offsets for search sites. */ + x->ss[search_site_count].mv.col = 0; + x->ss[search_site_count].mv.row = -Len; + x->ss[search_site_count].offset = -Len * stride; + search_site_count++; + +- // Compute offsets for search sites. ++ /* Compute offsets for search sites. */ + x->ss[search_site_count].mv.col = 0; + x->ss[search_site_count].mv.row = Len; + x->ss[search_site_count].offset = Len * stride; + search_site_count++; + +- // Compute offsets for search sites. ++ /* Compute offsets for search sites. */ + x->ss[search_site_count].mv.col = -Len; + x->ss[search_site_count].mv.row = 0; + x->ss[search_site_count].offset = -Len; + search_site_count++; + +- // Compute offsets for search sites. ++ /* Compute offsets for search sites. */ + x->ss[search_site_count].mv.col = Len; + x->ss[search_site_count].mv.row = 0; + x->ss[search_site_count].offset = Len; + search_site_count++; + +- // Compute offsets for search sites. ++ /* Compute offsets for search sites. */ + x->ss[search_site_count].mv.col = -Len; + x->ss[search_site_count].mv.row = -Len; + x->ss[search_site_count].offset = -Len * stride - Len; + search_site_count++; + +- // Compute offsets for search sites. ++ /* Compute offsets for search sites. */ + x->ss[search_site_count].mv.col = Len; + x->ss[search_site_count].mv.row = -Len; + x->ss[search_site_count].offset = -Len * stride + Len; + search_site_count++; + +- // Compute offsets for search sites. ++ /* Compute offsets for search sites. */ + x->ss[search_site_count].mv.col = -Len; + x->ss[search_site_count].mv.row = Len; + x->ss[search_site_count].offset = Len * stride - Len; + search_site_count++; + +- // Compute offsets for search sites. ++ /* Compute offsets for search sites. */ + x->ss[search_site_count].mv.col = Len; + x->ss[search_site_count].mv.row = Len; + x->ss[search_site_count].offset = Len * stride + Len; + search_site_count++; + + +- // Contract. ++ /* Contract. */ + Len /= 2; + } + +@@ -176,13 +185,20 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) + * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we + * could reduce the area. + */ +-#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) +-#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector +-#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc +-#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function. ++ ++/* estimated cost of a motion vector (r,c) */ ++#define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0) ++/* pointer to predictor base of a motionvector */ ++#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) ++/* convert motion vector component to offset for svf calc */ ++#define SP(x) (((x)&3)<<1) ++/* returns subpixel variance error function. */ ++#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) + #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; +-#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost +-#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best ++/* returns distortion + motion vector cost */ ++#define ERR(r,c) (MVC(r,c)+DIST(r,c)) ++/* checks if (r,c) has better score than previous best */ ++#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;) + + int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + int_mv *bestmv, int_mv *ref_mv, +@@ -196,7 +212,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1; + int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2; + int tr = br, tc = bc; +- unsigned int besterr = INT_MAX; ++ unsigned int besterr; + unsigned int left, right, up, down, diag; + unsigned int sse; + unsigned int whichdir; +@@ -221,7 +237,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + unsigned char *y; + int buf_r1, buf_r2, buf_c1, buf_c2; + +- // Clamping to avoid out-of-range data access ++ /* Clamping to avoid out-of-range data access */ + buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3; + buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3; + buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3; +@@ -238,19 +254,21 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + + offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; + +- // central mv ++ /* central mv */ + bestmv->as_mv.row <<= 3; + bestmv->as_mv.col <<= 3; + +- // calculate central point error ++ /* calculate central point error */ + besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); + *distortion = besterr; + besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); + +- // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) ++ /* TODO: Each subsequent iteration checks at least one point in common ++ * with the last iteration could be 2 ( if diag selected) ++ */ + while (--halfiters) + { +- // 1/2 pel ++ /* 1/2 pel */ + CHECK_BETTER(left, tr, tc - 2); + CHECK_BETTER(right, tr, tc + 2); + CHECK_BETTER(up, tr - 2, tc); +@@ -274,7 +292,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + break; + } + +- // no reason to check the same one again. ++ /* no reason to check the same one again. */ + if (tr == br && tc == bc) + break; + +@@ -282,8 +300,11 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + tc = bc; + } + +- // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) +- // 1/4 pel ++ /* TODO: Each subsequent iteration checks at least one point in common ++ * with the last iteration could be 2 ( if diag selected) ++ */ ++ ++ /* 1/4 pel */ + while (--quarteriters) + { + CHECK_BETTER(left, tr, tc - 1); +@@ -309,7 +330,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + break; + } + +- // no reason to check the same one again. ++ /* no reason to check the same one again. */ + if (tr == br && tc == bc) + break; + +@@ -367,17 +388,17 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + y_stride = pre_stride; + #endif + +- // central mv ++ /* central mv */ + bestmv->as_mv.row <<= 3; + bestmv->as_mv.col <<= 3; + startmv = *bestmv; + +- // calculate central point error ++ /* calculate central point error */ + bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); + *distortion = bestmse; + bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); + +- // go left then right and check error ++ /* go left then right and check error */ + this_mv.as_mv.row = startmv.as_mv.row; + this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); + thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); +@@ -403,7 +424,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + *sse1 = sse; + } + +- // go up then down and check error ++ /* go up then down and check error */ + this_mv.as_mv.col = startmv.as_mv.col; + this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); + thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); +@@ -430,10 +451,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + } + + +- // now check 1 more diagonal ++ /* now check 1 more diagonal */ + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); +- //for(whichdir =0;whichdir<4;whichdir++) +- //{ + this_mv = startmv; + + switch (whichdir) +@@ -471,10 +490,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + *sse1 = sse; + } + +-// } +- + +- // time to check quarter pels. ++ /* time to check quarter pels. */ + if (bestmv->as_mv.row < startmv.as_mv.row) + y -= y_stride; + +@@ -485,7 +502,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + + + +- // go left then right and check error ++ /* go left then right and check error */ + this_mv.as_mv.row = startmv.as_mv.row; + + if (startmv.as_mv.col & 7) +@@ -521,7 +538,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + *sse1 = sse; + } + +- // go up then down and check error ++ /* go up then down and check error */ + this_mv.as_mv.col = startmv.as_mv.col; + + if (startmv.as_mv.row & 7) +@@ -558,11 +575,9 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + } + + +- // now check 1 more diagonal ++ /* now check 1 more diagonal */ + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); + +-// for(whichdir=0;whichdir<4;whichdir++) +-// { + this_mv = startmv; + + switch (whichdir) +@@ -684,17 +699,17 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + y_stride = pre_stride; + #endif + +- // central mv ++ /* central mv */ + bestmv->as_mv.row <<= 3; + bestmv->as_mv.col <<= 3; + startmv = *bestmv; + +- // calculate central point error ++ /* calculate central point error */ + bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); + *distortion = bestmse; + bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); + +- // go left then right and check error ++ /* go left then right and check error */ + this_mv.as_mv.row = startmv.as_mv.row; + this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); + thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); +@@ -720,7 +735,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + *sse1 = sse; + } + +- // go up then down and check error ++ /* go up then down and check error */ + this_mv.as_mv.col = startmv.as_mv.col; + this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); + thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); +@@ -746,7 +761,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + *sse1 = sse; + } + +- // now check 1 more diagonal - ++ /* now check 1 more diagonal - */ + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); + this_mv = startmv; + +@@ -855,7 +870,7 @@ int vp8_hex_search + int in_what_stride = pre_stride; + int br, bc; + int_mv this_mv; +- unsigned int bestsad = 0x7fffffff; ++ unsigned int bestsad; + unsigned int thissad; + unsigned char *base_offset; + unsigned char *this_offset; +@@ -869,18 +884,17 @@ int vp8_hex_search + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; + +- // adjust ref_mv to make sure it is within MV range ++ /* adjust ref_mv to make sure it is within MV range */ + vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); + br = ref_mv->as_mv.row; + bc = ref_mv->as_mv.col; + +- // Work out the start point for the search ++ /* Work out the start point for the search */ + base_offset = (unsigned char *)(base_pre + d->offset); + this_offset = base_offset + (br * (pre_stride)) + bc; + this_mv.as_mv.row = br; + this_mv.as_mv.col = bc; +- bestsad = vfp->sdf( what, what_stride, this_offset, +- in_what_stride, 0x7fffffff) ++ bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX) + + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); + + #if CONFIG_MULTI_RES_ENCODING +@@ -895,8 +909,7 @@ int vp8_hex_search + dia_range = 8; + #endif + +- // hex search +- //j=0 ++ /* hex search */ + CHECK_BOUNDS(2) + + if(all_in) +@@ -906,7 +919,7 @@ int vp8_hex_search + this_mv.as_mv.row = br + hex[i].row; + this_mv.as_mv.col = bc + hex[i].col; + this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; +- thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); ++ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); + CHECK_BETTER + } + }else +@@ -917,7 +930,7 @@ int vp8_hex_search + this_mv.as_mv.col = bc + hex[i].col; + CHECK_POINT + this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; +- thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); ++ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); + CHECK_BETTER + } + } +@@ -943,7 +956,7 @@ int vp8_hex_search + this_mv.as_mv.row = br + next_chkpts[k][i].row; + this_mv.as_mv.col = bc + next_chkpts[k][i].col; + this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; +- thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); ++ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); + CHECK_BETTER + } + }else +@@ -954,7 +967,7 @@ int vp8_hex_search + this_mv.as_mv.col = bc + next_chkpts[k][i].col; + CHECK_POINT + this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; +- thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); ++ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); + CHECK_BETTER + } + } +@@ -971,7 +984,7 @@ int vp8_hex_search + } + } + +- // check 4 1-away neighbors ++ /* check 4 1-away neighbors */ + cal_neighbors: + for (j = 0; j < dia_range; j++) + { +@@ -985,7 +998,7 @@ cal_neighbors: + this_mv.as_mv.row = br + neighbors[i].row; + this_mv.as_mv.col = bc + neighbors[i].col; + this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; +- thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); ++ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); + CHECK_BETTER + } + }else +@@ -996,7 +1009,7 @@ cal_neighbors: + this_mv.as_mv.col = bc + neighbors[i].col; + CHECK_POINT + this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; +- thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); ++ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); + CHECK_BETTER + } + } +@@ -1047,7 +1060,8 @@ int vp8_diamond_search_sad_c + int tot_steps; + int_mv this_mv; + +- int bestsad = INT_MAX; ++ unsigned int bestsad; ++ unsigned int thissad; + int best_site = 0; + int last_site = 0; + +@@ -1058,10 +1072,12 @@ int vp8_diamond_search_sad_c + search_site *ss; + + unsigned char *check_here; +- int thissad; + +- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; ++ int *mvsadcost[2]; + int_mv fcenter_mv; ++ ++ mvsadcost[0] = x->mvsadcost[0]; ++ mvsadcost[1] = x->mvsadcost[1]; + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; + +@@ -1072,17 +1088,18 @@ int vp8_diamond_search_sad_c + best_mv->as_mv.row = ref_row; + best_mv->as_mv.col = ref_col; + +- // Work out the start point for the search ++ /* Work out the start point for the search */ + in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col); + best_address = in_what; + +- // Check the starting position +- bestsad = fn_ptr->sdf(what, what_stride, in_what, +- in_what_stride, 0x7fffffff) +- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); ++ /* Check the starting position */ ++ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX) ++ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); + +- // search_param determines the length of the initial step and hence the number of iterations +- // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. ++ /* search_param determines the length of the initial step and hence ++ * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel : ++ * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. ++ */ + ss = &x->ss[search_param * x->searches_per_step]; + tot_steps = (x->ss_count / x->searches_per_step) - search_param; + +@@ -1092,7 +1109,7 @@ int vp8_diamond_search_sad_c + { + for (j = 0 ; j < x->searches_per_step ; j++) + { +- // Trap illegal vectors ++ /* Trap illegal vectors */ + this_row_offset = best_mv->as_mv.row + ss[i].mv.row; + this_col_offset = best_mv->as_mv.col + ss[i].mv.col; + +@@ -1101,14 +1118,14 @@ int vp8_diamond_search_sad_c + + { + check_here = ss[i].offset + best_address; +- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); ++ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); + + if (thissad < bestsad) + { + this_mv.as_mv.row = this_row_offset; + this_mv.as_mv.col = this_col_offset; + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, +- mvsadcost, sad_per_bit); ++ mvsadcost, sad_per_bit); + + if (thissad < bestsad) + { +@@ -1135,11 +1152,8 @@ int vp8_diamond_search_sad_c + this_mv.as_mv.row = best_mv->as_mv.row << 3; + this_mv.as_mv.col = best_mv->as_mv.col << 3; + +- if (bestsad == INT_MAX) +- return INT_MAX; +- +- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) +- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); ++ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) ++ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); + } + + int vp8_diamond_search_sadx4 +@@ -1170,7 +1184,8 @@ int vp8_diamond_search_sadx4 + int tot_steps; + int_mv this_mv; + +- unsigned int bestsad = UINT_MAX; ++ unsigned int bestsad; ++ unsigned int thissad; + int best_site = 0; + int last_site = 0; + +@@ -1181,10 +1196,12 @@ int vp8_diamond_search_sadx4 + search_site *ss; + + unsigned char *check_here; +- unsigned int thissad; + +- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; ++ int *mvsadcost[2]; + int_mv fcenter_mv; ++ ++ mvsadcost[0] = x->mvsadcost[0]; ++ mvsadcost[1] = x->mvsadcost[1]; + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; + +@@ -1195,17 +1212,18 @@ int vp8_diamond_search_sadx4 + best_mv->as_mv.row = ref_row; + best_mv->as_mv.col = ref_col; + +- // Work out the start point for the search ++ /* Work out the start point for the search */ + in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col); + best_address = in_what; + +- // Check the starting position +- bestsad = fn_ptr->sdf(what, what_stride, +- in_what, in_what_stride, 0x7fffffff) +- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); ++ /* Check the starting position */ ++ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX) ++ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); + +- // search_param determines the length of the initial step and hence the number of iterations +- // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. ++ /* search_param determines the length of the initial step and hence the ++ * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 = ++ * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. ++ */ + ss = &x->ss[search_param * x->searches_per_step]; + tot_steps = (x->ss_count / x->searches_per_step) - search_param; + +@@ -1215,8 +1233,10 @@ int vp8_diamond_search_sadx4 + { + int all_in = 1, t; + +- // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of +- // checking 4 bounds for each points. ++ /* To know if all neighbor points are within the bounds, 4 bounds ++ * checking are enough instead of checking 4 bounds for each ++ * points. ++ */ + all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min); + all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max); + all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min); +@@ -1228,7 +1248,7 @@ int vp8_diamond_search_sadx4 + + for (j = 0 ; j < x->searches_per_step ; j += 4) + { +- unsigned char *block_offset[4]; ++ const unsigned char *block_offset[4]; + + for (t = 0; t < 4; t++) + block_offset[t] = ss[i+t].offset + best_address; +@@ -1257,7 +1277,7 @@ int vp8_diamond_search_sadx4 + { + for (j = 0 ; j < x->searches_per_step ; j++) + { +- // Trap illegal vectors ++ /* Trap illegal vectors */ + this_row_offset = best_mv->as_mv.row + ss[i].mv.row; + this_col_offset = best_mv->as_mv.col + ss[i].mv.col; + +@@ -1265,14 +1285,14 @@ int vp8_diamond_search_sadx4 + (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) + { + check_here = ss[i].offset + best_address; +- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); ++ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); + + if (thissad < bestsad) + { + this_mv.as_mv.row = this_row_offset; + this_mv.as_mv.col = this_col_offset; + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, +- mvsadcost, sad_per_bit); ++ mvsadcost, sad_per_bit); + + if (thissad < bestsad) + { +@@ -1299,11 +1319,8 @@ int vp8_diamond_search_sadx4 + this_mv.as_mv.row = best_mv->as_mv.row << 3; + this_mv.as_mv.col = best_mv->as_mv.col << 3; + +- if (bestsad == INT_MAX) +- return INT_MAX; +- +- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) +- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); ++ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) ++ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); + } + + int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, +@@ -1321,11 +1338,11 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + unsigned char *bestaddress; + int_mv *best_mv = &d->bmi.mv; + int_mv this_mv; +- int bestsad = INT_MAX; ++ unsigned int bestsad; ++ unsigned int thissad; + int r, c; + + unsigned char *check_here; +- int thissad; + + int ref_row = ref_mv->as_mv.row; + int ref_col = ref_mv->as_mv.col; +@@ -1335,24 +1352,29 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + int col_min = ref_col - distance; + int col_max = ref_col + distance; + +- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; ++ int *mvsadcost[2]; + int_mv fcenter_mv; ++ ++ mvsadcost[0] = x->mvsadcost[0]; ++ mvsadcost[1] = x->mvsadcost[1]; + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; + +- // Work out the mid point for the search ++ /* Work out the mid point for the search */ + in_what = base_pre + d->offset; + bestaddress = in_what + (ref_row * pre_stride) + ref_col; + + best_mv->as_mv.row = ref_row; + best_mv->as_mv.col = ref_col; + +- // Baseline value at the centre ++ /* Baseline value at the centre */ + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, +- in_what_stride, 0x7fffffff) +- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); ++ in_what_stride, UINT_MAX) ++ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); + +- // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border ++ /* Apply further limits to prevent us looking using vectors that ++ * stretch beyiond the UMV border ++ */ + if (col_min < x->mv_col_min) + col_min = x->mv_col_min; + +@@ -1372,11 +1394,11 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + + for (c = col_min; c < col_max; c++) + { +- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); ++ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); + + this_mv.as_mv.col = c; +- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, +- mvsadcost, sad_per_bit); ++ thissad += mvsad_err_cost(&this_mv, &fcenter_mv, ++ mvsadcost, sad_per_bit); + + if (thissad < bestsad) + { +@@ -1393,11 +1415,8 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + this_mv.as_mv.row = best_mv->as_mv.row << 3; + this_mv.as_mv.col = best_mv->as_mv.col << 3; + +- if (bestsad < INT_MAX) +- return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) +- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); +- else +- return INT_MAX; ++ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) ++ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); + } + + int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, +@@ -1415,11 +1434,11 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + unsigned char *bestaddress; + int_mv *best_mv = &d->bmi.mv; + int_mv this_mv; +- unsigned int bestsad = UINT_MAX; ++ unsigned int bestsad; ++ unsigned int thissad; + int r, c; + + unsigned char *check_here; +- unsigned int thissad; + + int ref_row = ref_mv->as_mv.row; + int ref_col = ref_mv->as_mv.col; +@@ -1431,24 +1450,29 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + + unsigned int sad_array[3]; + +- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; ++ int *mvsadcost[2]; + int_mv fcenter_mv; ++ ++ mvsadcost[0] = x->mvsadcost[0]; ++ mvsadcost[1] = x->mvsadcost[1]; + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; + +- // Work out the mid point for the search ++ /* Work out the mid point for the search */ + in_what = base_pre + d->offset; + bestaddress = in_what + (ref_row * pre_stride) + ref_col; + + best_mv->as_mv.row = ref_row; + best_mv->as_mv.col = ref_col; + +- // Baseline value at the centre +- bestsad = fn_ptr->sdf(what, what_stride, +- bestaddress, in_what_stride, 0x7fffffff) +- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); ++ /* Baseline value at the centre */ ++ bestsad = fn_ptr->sdf(what, what_stride, bestaddress, ++ in_what_stride, UINT_MAX) ++ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); + +- // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border ++ /* Apply further limits to prevent us looking using vectors that stretch ++ * beyond the UMV border ++ */ + if (col_min < x->mv_col_min) + col_min = x->mv_col_min; + +@@ -1471,7 +1495,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + { + int i; + +- fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array); ++ fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); + + for (i = 0; i < 3; i++) + { +@@ -1480,8 +1504,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + if (thissad < bestsad) + { + this_mv.as_mv.col = c; +- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, +- mvsadcost, sad_per_bit); ++ thissad += mvsad_err_cost(&this_mv, &fcenter_mv, ++ mvsadcost, sad_per_bit); + + if (thissad < bestsad) + { +@@ -1499,13 +1523,13 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + + while (c < col_max) + { +- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); ++ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); + + if (thissad < bestsad) + { + this_mv.as_mv.col = c; +- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, +- mvsadcost, sad_per_bit); ++ thissad += mvsad_err_cost(&this_mv, &fcenter_mv, ++ mvsadcost, sad_per_bit); + + if (thissad < bestsad) + { +@@ -1525,11 +1549,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + this_mv.as_mv.row = best_mv->as_mv.row << 3; + this_mv.as_mv.col = best_mv->as_mv.col << 3; + +- if (bestsad < INT_MAX) +- return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) +- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); +- else +- return INT_MAX; ++ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) ++ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); + } + + int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, +@@ -1547,11 +1568,11 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + unsigned char *bestaddress; + int_mv *best_mv = &d->bmi.mv; + int_mv this_mv; +- unsigned int bestsad = UINT_MAX; ++ unsigned int bestsad; ++ unsigned int thissad; + int r, c; + + unsigned char *check_here; +- unsigned int thissad; + + int ref_row = ref_mv->as_mv.row; + int ref_col = ref_mv->as_mv.col; +@@ -1564,24 +1585,29 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8); + unsigned int sad_array[3]; + +- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; ++ int *mvsadcost[2]; + int_mv fcenter_mv; ++ ++ mvsadcost[0] = x->mvsadcost[0]; ++ mvsadcost[1] = x->mvsadcost[1]; + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; + +- // Work out the mid point for the search ++ /* Work out the mid point for the search */ + in_what = base_pre + d->offset; + bestaddress = in_what + (ref_row * pre_stride) + ref_col; + + best_mv->as_mv.row = ref_row; + best_mv->as_mv.col = ref_col; + +- // Baseline value at the centre ++ /* Baseline value at the centre */ + bestsad = fn_ptr->sdf(what, what_stride, +- bestaddress, in_what_stride, 0x7fffffff) +- + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); ++ bestaddress, in_what_stride, UINT_MAX) ++ + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); + +- // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border ++ /* Apply further limits to prevent us looking using vectors that stretch ++ * beyond the UMV border ++ */ + if (col_min < x->mv_col_min) + col_min = x->mv_col_min; + +@@ -1604,17 +1630,17 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + { + int i; + +- fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8); ++ fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); + + for (i = 0; i < 8; i++) + { +- thissad = (unsigned int)sad_array8[i]; ++ thissad = sad_array8[i]; + + if (thissad < bestsad) + { + this_mv.as_mv.col = c; +- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, +- mvsadcost, sad_per_bit); ++ thissad += mvsad_err_cost(&this_mv, &fcenter_mv, ++ mvsadcost, sad_per_bit); + + if (thissad < bestsad) + { +@@ -1687,11 +1713,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + this_mv.as_mv.row = best_mv->as_mv.row << 3; + this_mv.as_mv.col = best_mv->as_mv.col << 3; + +- if (bestsad < INT_MAX) +- return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) +- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); +- else +- return INT_MAX; ++ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) ++ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); + } + + int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, +@@ -1711,17 +1734,21 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv + unsigned char *best_address = (unsigned char *)(base_pre + d->offset + + (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); + unsigned char *check_here; +- unsigned int thissad; + int_mv this_mv; +- unsigned int bestsad = INT_MAX; ++ unsigned int bestsad; ++ unsigned int thissad; + +- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; ++ int *mvsadcost[2]; + int_mv fcenter_mv; + ++ mvsadcost[0] = x->mvsadcost[0]; ++ mvsadcost[1] = x->mvsadcost[1]; + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; + +- bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); ++ bestsad = fn_ptr->sdf(what, what_stride, best_address, ++ in_what_stride, UINT_MAX) ++ + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); + + for (i=0; ias_mv.row << 3; + this_mv.as_mv.col = ref_mv->as_mv.col << 3; + +- if (bestsad < INT_MAX) +- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) +- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); +- else +- return INT_MAX; ++ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) ++ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); + } + + int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, +@@ -1790,17 +1814,21 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + unsigned char *best_address = (unsigned char *)(base_pre + d->offset + + (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); + unsigned char *check_here; +- unsigned int thissad; + int_mv this_mv; +- unsigned int bestsad = INT_MAX; ++ unsigned int bestsad; ++ unsigned int thissad; + +- int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; ++ int *mvsadcost[2]; + int_mv fcenter_mv; + ++ mvsadcost[0] = x->mvsadcost[0]; ++ mvsadcost[1] = x->mvsadcost[1]; + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; + +- bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); ++ bestsad = fn_ptr->sdf(what, what_stride, best_address, ++ in_what_stride, UINT_MAX) ++ + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); + + for (i=0; ias_mv.row << 3; + this_mv.as_mv.col = ref_mv->as_mv.col << 3; + +- if (bestsad < INT_MAX) +- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) +- + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); +- else +- return INT_MAX; ++ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) ++ + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); + } + + #ifdef ENTROPY_STATS +@@ -1900,16 +1925,16 @@ void print_mode_context(void) + + for (j = 0; j < 6; j++) + { +- fprintf(f, " { // %d \n", j); ++ fprintf(f, " { /* %d */\n", j); + fprintf(f, " "); + + for (i = 0; i < 4; i++) + { + int overal_prob; + int this_prob; +- int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1]; ++ int count; + +- // Overall probs ++ /* Overall probs */ + count = mv_mode_cts[i][0] + mv_mode_cts[i][1]; + + if (count) +@@ -1920,7 +1945,7 @@ void print_mode_context(void) + if (overal_prob == 0) + overal_prob = 1; + +- // context probs ++ /* context probs */ + count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; + + if (count) +@@ -1932,8 +1957,6 @@ void print_mode_context(void) + this_prob = 1; + + fprintf(f, "%5d, ", this_prob); +- //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob); +- //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob); + } + + fprintf(f, " },\n"); +diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h +index cdb0cb6..890113f 100644 +--- a/vp8/encoder/mcomp.h ++++ b/vp8/encoder/mcomp.h +@@ -21,9 +21,16 @@ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]); + #endif + + +-#define MAX_MVSEARCH_STEPS 8 // The maximum number of steps in a step search given the largest allowed initial step +-#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) // Max full pel mv specified in 1 pel units +-#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units ++/* The maximum number of steps in a step search given the largest allowed ++ * initial step ++ */ ++#define MAX_MVSEARCH_STEPS 8 ++ ++/* Max full pel mv specified in 1 pel units */ ++#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) ++ ++/* Maximum size of the first step in full pel units */ ++#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) + + extern void print_mode_context(void); + extern int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight); +diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c +index c636c48..c61563c 100644 +--- a/vp8/encoder/modecosts.c ++++ b/vp8/encoder/modecosts.c +@@ -18,6 +18,8 @@ + void vp8_init_mode_costs(VP8_COMP *c) + { + VP8_COMMON *x = &c->common; ++ struct rd_costs_struct *rd_costs = &c->rd_costs; ++ + { + const vp8_tree_p T = vp8_bmode_tree; + +@@ -29,19 +31,24 @@ void vp8_init_mode_costs(VP8_COMP *c) + + do + { +- vp8_cost_tokens((int *)c->mb.bmode_costs[i][j], x->kf_bmode_prob[i][j], T); ++ vp8_cost_tokens(rd_costs->bmode_costs[i][j], ++ vp8_kf_bmode_prob[i][j], T); + } + while (++j < VP8_BINTRAMODES); + } + while (++i < VP8_BINTRAMODES); + +- vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.bmode_prob, T); ++ vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.bmode_prob, T); + } +- vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.sub_mv_ref_prob, vp8_sub_mv_ref_tree); ++ vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.sub_mv_ref_prob, ++ vp8_sub_mv_ref_tree); + +- vp8_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree); +- vp8_cost_tokens(c->mb.mbmode_cost[0], x->kf_ymode_prob, vp8_kf_ymode_tree); ++ vp8_cost_tokens(rd_costs->mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree); ++ vp8_cost_tokens(rd_costs->mbmode_cost[0], vp8_kf_ymode_prob, ++ vp8_kf_ymode_tree); + +- vp8_cost_tokens(c->mb.intra_uv_mode_cost[1], x->fc.uv_mode_prob, vp8_uv_mode_tree); +- vp8_cost_tokens(c->mb.intra_uv_mode_cost[0], x->kf_uv_mode_prob, vp8_uv_mode_tree); ++ vp8_cost_tokens(rd_costs->intra_uv_mode_cost[1], x->fc.uv_mode_prob, ++ vp8_uv_mode_tree); ++ vp8_cost_tokens(rd_costs->intra_uv_mode_cost[0], vp8_kf_uv_mode_prob, ++ vp8_uv_mode_tree); + } +diff --git a/vp8/encoder/mr_dissim.c b/vp8/encoder/mr_dissim.c +index 7a62a06..71218cc 100644 +--- a/vp8/encoder/mr_dissim.c ++++ b/vp8/encoder/mr_dissim.c +@@ -53,6 +53,7 @@ if(x->mbmi.ref_frame !=INTRA_FRAME) \ + void vp8_cal_dissimilarity(VP8_COMP *cpi) + { + VP8_COMMON *cm = &cpi->common; ++ int i; + + /* Note: The first row & first column in mip are outside the frame, which + * were initialized to all 0.(ref_frame, mode, mv...) +@@ -65,14 +66,25 @@ void vp8_cal_dissimilarity(VP8_COMP *cpi) + /* Store info for show/no-show frames for supporting alt_ref. + * If parent frame is alt_ref, child has one too. + */ ++ LOWER_RES_FRAME_INFO* store_info ++ = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info; ++ ++ store_info->frame_type = cm->frame_type; ++ ++ if(cm->frame_type != KEY_FRAME) ++ { ++ store_info->is_frame_dropped = 0; ++ for (i = 1; i < MAX_REF_FRAMES; i++) ++ store_info->low_res_ref_frames[i] = cpi->current_ref_frames[i]; ++ } ++ + if(cm->frame_type != KEY_FRAME) + { + int mb_row; + int mb_col; + /* Point to beginning of allocated MODE_INFO arrays. */ + MODE_INFO *tmp = cm->mip + cm->mode_info_stride; +- LOWER_RES_INFO* store_mode_info +- = (LOWER_RES_INFO*)cpi->oxcf.mr_low_res_mode_info; ++ LOWER_RES_MB_INFO* store_mode_info = store_info->mb_info; + + for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++) + { +@@ -199,3 +211,26 @@ void vp8_cal_dissimilarity(VP8_COMP *cpi) + } + } + } ++ ++/* This function is called only when this frame is dropped at current ++ resolution level. */ ++void vp8_store_drop_frame_info(VP8_COMP *cpi) ++{ ++ /* If the frame is dropped in lower-resolution encoding, this information ++ is passed to higher resolution level so that the encoder knows there ++ is no mode & motion info available. ++ */ ++ if (cpi->oxcf.mr_total_resolutions >1 ++ && cpi->oxcf.mr_encoder_id < (cpi->oxcf.mr_total_resolutions - 1)) ++ { ++ /* Store info for show/no-show frames for supporting alt_ref. ++ * If parent frame is alt_ref, child has one too. ++ */ ++ LOWER_RES_FRAME_INFO* store_info ++ = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info; ++ ++ /* Set frame_type to be INTER_FRAME since we won't drop key frame. */ ++ store_info->frame_type = INTER_FRAME; ++ store_info->is_frame_dropped = 1; ++ } ++} +diff --git a/vp8/encoder/mr_dissim.h b/vp8/encoder/mr_dissim.h +index 3d2c203..f8cb135 100644 +--- a/vp8/encoder/mr_dissim.h ++++ b/vp8/encoder/mr_dissim.h +@@ -15,5 +15,6 @@ + + extern void vp8_cal_low_res_mb_cols(VP8_COMP *cpi); + extern void vp8_cal_dissimilarity(VP8_COMP *cpi); ++extern void vp8_store_drop_frame_info(VP8_COMP *cpi); + + #endif +diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c +index cee62fa..4680f39 100644 +--- a/vp8/encoder/onyx_if.c ++++ b/vp8/encoder/onyx_if.c +@@ -11,6 +11,7 @@ + + #include "vpx_config.h" + #include "vp8/common/onyxc_int.h" ++#include "vp8/common/blockd.h" + #include "onyx_int.h" + #include "vp8/common/systemdependent.h" + #include "quantize.h" +@@ -55,12 +56,8 @@ extern void vp8_deblock_frame(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *po + extern void print_parms(VP8_CONFIG *ocf, char *filenam); + extern unsigned int vp8_get_processor_freq(); + extern void print_tree_update_probs(); +-extern void vp8cx_create_encoder_threads(VP8_COMP *cpi); ++extern int vp8cx_create_encoder_threads(VP8_COMP *cpi); + extern void vp8cx_remove_encoder_threads(VP8_COMP *cpi); +-#if HAVE_NEON +-extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); +-extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); +-#endif + + int vp8_estimate_entropy_savings(VP8_COMP *cpi); + +@@ -143,7 +140,7 @@ extern const int qzbin_factors[129]; + extern void vp8cx_init_quantizer(VP8_COMP *cpi); + extern const int vp8cx_base_skip_false_prob[128]; + +-// Tables relating active max Q to active min Q ++/* Tables relating active max Q to active min Q */ + static const unsigned char kf_low_motion_minq[QINDEX_RANGE] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +@@ -219,9 +216,8 @@ static void save_layer_context(VP8_COMP *cpi) + { + LAYER_CONTEXT *lc = &cpi->layer_context[cpi->current_layer]; + +- // Save layer dependent coding state ++ /* Save layer dependent coding state */ + lc->target_bandwidth = cpi->target_bandwidth; +- //lc->target_bandwidth = cpi->oxcf.target_bandwidth; + lc->starting_buffer_level = cpi->oxcf.starting_buffer_level; + lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level; + lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size; +@@ -242,7 +238,7 @@ static void save_layer_context(VP8_COMP *cpi) + lc->rate_correction_factor = cpi->rate_correction_factor; + lc->key_frame_rate_correction_factor = cpi->key_frame_rate_correction_factor; + lc->gf_rate_correction_factor = cpi->gf_rate_correction_factor; +- lc->zbin_over_quant = cpi->zbin_over_quant; ++ lc->zbin_over_quant = cpi->mb.zbin_over_quant; + lc->inter_frame_target = cpi->inter_frame_target; + lc->total_byte_count = cpi->total_byte_count; + lc->filter_level = cpi->common.filter_level; +@@ -250,15 +246,15 @@ static void save_layer_context(VP8_COMP *cpi) + lc->last_frame_percent_intra = cpi->last_frame_percent_intra; + + memcpy (lc->count_mb_ref_frame_usage, +- cpi->count_mb_ref_frame_usage, +- sizeof(cpi->count_mb_ref_frame_usage)); ++ cpi->mb.count_mb_ref_frame_usage, ++ sizeof(cpi->mb.count_mb_ref_frame_usage)); + } + + static void restore_layer_context(VP8_COMP *cpi, const int layer) + { + LAYER_CONTEXT *lc = &cpi->layer_context[layer]; + +- // Restore layer dependent coding state ++ /* Restore layer dependent coding state */ + cpi->current_layer = layer; + cpi->target_bandwidth = lc->target_bandwidth; + cpi->oxcf.target_bandwidth = lc->target_bandwidth; +@@ -271,9 +267,7 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer) + cpi->buffer_level = lc->buffer_level; + cpi->bits_off_target = lc->bits_off_target; + cpi->total_actual_bits = lc->total_actual_bits; +- //cpi->worst_quality = lc->worst_quality; + cpi->active_worst_quality = lc->active_worst_quality; +- //cpi->best_quality = lc->best_quality; + cpi->active_best_quality = lc->active_best_quality; + cpi->ni_av_qi = lc->ni_av_qi; + cpi->ni_tot_qi = lc->ni_tot_qi; +@@ -282,26 +276,31 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer) + cpi->rate_correction_factor = lc->rate_correction_factor; + cpi->key_frame_rate_correction_factor = lc->key_frame_rate_correction_factor; + cpi->gf_rate_correction_factor = lc->gf_rate_correction_factor; +- cpi->zbin_over_quant = lc->zbin_over_quant; ++ cpi->mb.zbin_over_quant = lc->zbin_over_quant; + cpi->inter_frame_target = lc->inter_frame_target; + cpi->total_byte_count = lc->total_byte_count; + cpi->common.filter_level = lc->filter_level; + + cpi->last_frame_percent_intra = lc->last_frame_percent_intra; + +- memcpy (cpi->count_mb_ref_frame_usage, ++ memcpy (cpi->mb.count_mb_ref_frame_usage, + lc->count_mb_ref_frame_usage, +- sizeof(cpi->count_mb_ref_frame_usage)); ++ sizeof(cpi->mb.count_mb_ref_frame_usage)); + } + + static void setup_features(VP8_COMP *cpi) + { +- // Set up default state for MB feature flags +- cpi->mb.e_mbd.segmentation_enabled = 0; +- cpi->mb.e_mbd.update_mb_segmentation_map = 0; +- cpi->mb.e_mbd.update_mb_segmentation_data = 0; +- vpx_memset(cpi->mb.e_mbd.mb_segment_tree_probs, 255, sizeof(cpi->mb.e_mbd.mb_segment_tree_probs)); +- vpx_memset(cpi->mb.e_mbd.segment_feature_data, 0, sizeof(cpi->mb.e_mbd.segment_feature_data)); ++ // If segmentation enabled set the update flags ++ if ( cpi->mb.e_mbd.segmentation_enabled ) ++ { ++ cpi->mb.e_mbd.update_mb_segmentation_map = 1; ++ cpi->mb.e_mbd.update_mb_segmentation_data = 1; ++ } ++ else ++ { ++ cpi->mb.e_mbd.update_mb_segmentation_map = 0; ++ cpi->mb.e_mbd.update_mb_segmentation_data = 0; ++ } + + cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 0; + cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; +@@ -323,7 +322,7 @@ static void dealloc_compressor_data(VP8_COMP *cpi) + vpx_free(cpi->tplist); + cpi->tplist = NULL; + +- // Delete last frame MV storage buffers ++ /* Delete last frame MV storage buffers */ + vpx_free(cpi->lfmv); + cpi->lfmv = 0; + +@@ -333,7 +332,7 @@ static void dealloc_compressor_data(VP8_COMP *cpi) + vpx_free(cpi->lf_ref_frame); + cpi->lf_ref_frame = 0; + +- // Delete sementation map ++ /* Delete sementation map */ + vpx_free(cpi->segmentation_map); + cpi->segmentation_map = 0; + +@@ -349,53 +348,61 @@ static void dealloc_compressor_data(VP8_COMP *cpi) + vpx_free(cpi->tok); + cpi->tok = 0; + +- // Structure used to monitor GF usage ++ /* Structure used to monitor GF usage */ + vpx_free(cpi->gf_active_flags); + cpi->gf_active_flags = 0; + +- // Activity mask based per mb zbin adjustments ++ /* Activity mask based per mb zbin adjustments */ + vpx_free(cpi->mb_activity_map); + cpi->mb_activity_map = 0; +- vpx_free(cpi->mb_norm_activity_map); +- cpi->mb_norm_activity_map = 0; + + vpx_free(cpi->mb.pip); + cpi->mb.pip = 0; ++ ++#if CONFIG_MULTITHREAD ++ vpx_free(cpi->mt_current_mb_col); ++ cpi->mt_current_mb_col = NULL; ++#endif + } + + static void enable_segmentation(VP8_COMP *cpi) + { +- // Set the appropriate feature bit ++ /* Set the appropriate feature bit */ + cpi->mb.e_mbd.segmentation_enabled = 1; + cpi->mb.e_mbd.update_mb_segmentation_map = 1; + cpi->mb.e_mbd.update_mb_segmentation_data = 1; + } + static void disable_segmentation(VP8_COMP *cpi) + { +- // Clear the appropriate feature bit ++ /* Clear the appropriate feature bit */ + cpi->mb.e_mbd.segmentation_enabled = 0; + } + +-// Valid values for a segment are 0 to 3 +-// Segmentation map is arrange as [Rows][Columns] ++/* Valid values for a segment are 0 to 3 ++ * Segmentation map is arrange as [Rows][Columns] ++ */ + static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map) + { +- // Copy in the new segmentation map ++ /* Copy in the new segmentation map */ + vpx_memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols)); + +- // Signal that the map should be updated. ++ /* Signal that the map should be updated. */ + cpi->mb.e_mbd.update_mb_segmentation_map = 1; + cpi->mb.e_mbd.update_mb_segmentation_data = 1; + } + +-// The values given for each segment can be either deltas (from the default value chosen for the frame) or absolute values. +-// +-// Valid range for abs values is (0-127 for MB_LVL_ALT_Q) , (0-63 for SEGMENT_ALT_LF) +-// Valid range for delta values are (+/-127 for MB_LVL_ALT_Q) , (+/-63 for SEGMENT_ALT_LF) +-// +-// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use the absolute values given). +-// +-// ++/* The values given for each segment can be either deltas (from the default ++ * value chosen for the frame) or absolute values. ++ * ++ * Valid range for abs values is: ++ * (0-127 for MB_LVL_ALT_Q), (0-63 for SEGMENT_ALT_LF) ++ * Valid range for delta values are: ++ * (+/-127 for MB_LVL_ALT_Q), (+/-63 for SEGMENT_ALT_LF) ++ * ++ * abs_delta = SEGMENT_DELTADATA (deltas) ++ * abs_delta = SEGMENT_ABSDATA (use the absolute values given). ++ * ++ */ + static void set_segment_data(VP8_COMP *cpi, signed char *feature_data, unsigned char abs_delta) + { + cpi->mb.e_mbd.mb_segement_abs_delta = abs_delta; +@@ -411,26 +418,6 @@ static void segmentation_test_function(VP8_COMP *cpi) + // Create a temporary map for segmentation data. + CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); + +- // MB loop to set local segmentation map +- /*for ( i = 0; i < cpi->common.mb_rows; i++ ) +- { +- for ( j = 0; j < cpi->common.mb_cols; j++ ) +- { +- //seg_map[(i*cpi->common.mb_cols) + j] = (j % 2) + ((i%2)* 2); +- //if ( j < cpi->common.mb_cols/2 ) +- +- // Segment 1 around the edge else 0 +- if ( (i == 0) || (j == 0) || (i == (cpi->common.mb_rows-1)) || (j == (cpi->common.mb_cols-1)) ) +- seg_map[(i*cpi->common.mb_cols) + j] = 1; +- //else if ( (i < 2) || (j < 2) || (i > (cpi->common.mb_rows-3)) || (j > (cpi->common.mb_cols-3)) ) +- // seg_map[(i*cpi->common.mb_cols) + j] = 2; +- //else if ( (i < 5) || (j < 5) || (i > (cpi->common.mb_rows-6)) || (j > (cpi->common.mb_cols-6)) ) +- // seg_map[(i*cpi->common.mb_cols) + j] = 3; +- else +- seg_map[(i*cpi->common.mb_cols) + j] = 0; +- } +- }*/ +- + // Set the segmentation Map + set_segmentation_map(cpi, seg_map); + +@@ -453,103 +440,78 @@ static void segmentation_test_function(VP8_COMP *cpi) + set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA); + + // Delete sementation map +- vpx_free(seg_map); ++ vpx_free(seg_map); + + seg_map = 0; +- + } + +-// A simple function to cyclically refresh the background at a lower Q ++/* A simple function to cyclically refresh the background at a lower Q */ + static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment) + { +- unsigned char *seg_map; ++ unsigned char *seg_map = cpi->segmentation_map; + signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; + int i; + int block_count = cpi->cyclic_refresh_mode_max_mbs_perframe; + int mbs_in_frame = cpi->common.mb_rows * cpi->common.mb_cols; + +- // Create a temporary map for segmentation data. +- CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); ++ cpi->cyclic_refresh_q = Q / 2; + +- cpi->cyclic_refresh_q = Q; ++ // Set every macroblock to be eligible for update. ++ // For key frame this will reset seg map to 0. ++ vpx_memset(cpi->segmentation_map, 0, mbs_in_frame); + +- for (i = Q; i > 0; i--) +- { +- if (vp8_bits_per_mb[cpi->common.frame_type][i] >= ((vp8_bits_per_mb[cpi->common.frame_type][Q]*(Q + 128)) / 64)) +- //if ( vp8_bits_per_mb[cpi->common.frame_type][i] >= ((vp8_bits_per_mb[cpi->common.frame_type][Q]*((2*Q)+96))/64) ) +- { +- break; +- } +- } +- +- cpi->cyclic_refresh_q = i; +- +- // Only update for inter frames + if (cpi->common.frame_type != KEY_FRAME) + { +- // Cycle through the macro_block rows +- // MB loop to set local segmentation map +- for (i = cpi->cyclic_refresh_mode_index; i < mbs_in_frame; i++) ++ /* Cycle through the macro_block rows */ ++ /* MB loop to set local segmentation map */ ++ i = cpi->cyclic_refresh_mode_index; ++ assert(i < mbs_in_frame); ++ do + { +- // If the MB is as a candidate for clean up then mark it for possible boost/refresh (segment 1) +- // The segment id may get reset to 0 later if the MB gets coded anything other than last frame 0,0 +- // as only (last frame 0,0) MBs are eligable for refresh : that is to say Mbs likely to be background blocks. +- if (cpi->cyclic_refresh_map[i] == 0) +- { +- seg_map[i] = 1; +- } +- else +- { +- seg_map[i] = 0; +- +- // Skip blocks that have been refreshed recently anyway. +- if (cpi->cyclic_refresh_map[i] < 0) +- //cpi->cyclic_refresh_map[i] = cpi->cyclic_refresh_map[i] / 16; +- cpi->cyclic_refresh_map[i]++; +- } +- +- +- if (block_count > 0) +- block_count--; +- else +- break; ++ /* If the MB is as a candidate for clean up then mark it for ++ * possible boost/refresh (segment 1) The segment id may get ++ * reset to 0 later if the MB gets coded anything other than ++ * last frame 0,0 as only (last frame 0,0) MBs are eligable for ++ * refresh : that is to say Mbs likely to be background blocks. ++ */ ++ if (cpi->cyclic_refresh_map[i] == 0) ++ { ++ seg_map[i] = 1; ++ block_count --; ++ } ++ else if (cpi->cyclic_refresh_map[i] < 0) ++ cpi->cyclic_refresh_map[i]++; ++ ++ i++; ++ if (i == mbs_in_frame) ++ i = 0; + + } ++ while(block_count && i != cpi->cyclic_refresh_mode_index); + +- // If we have gone through the frame reset to the start + cpi->cyclic_refresh_mode_index = i; +- +- if (cpi->cyclic_refresh_mode_index >= mbs_in_frame) +- cpi->cyclic_refresh_mode_index = 0; + } + +- // Set the segmentation Map +- set_segmentation_map(cpi, seg_map); +- +- // Activate segmentation. ++ /* Activate segmentation. */ ++ cpi->mb.e_mbd.update_mb_segmentation_map = 1; ++ cpi->mb.e_mbd.update_mb_segmentation_data = 1; + enable_segmentation(cpi); + +- // Set up the quant segment data ++ /* Set up the quant segment data */ + feature_data[MB_LVL_ALT_Q][0] = 0; + feature_data[MB_LVL_ALT_Q][1] = (cpi->cyclic_refresh_q - Q); + feature_data[MB_LVL_ALT_Q][2] = 0; + feature_data[MB_LVL_ALT_Q][3] = 0; + +- // Set up the loop segment data ++ /* Set up the loop segment data */ + feature_data[MB_LVL_ALT_LF][0] = 0; + feature_data[MB_LVL_ALT_LF][1] = lf_adjustment; + feature_data[MB_LVL_ALT_LF][2] = 0; + feature_data[MB_LVL_ALT_LF][3] = 0; + +- // Initialise the feature data structure +- // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1 ++ /* Initialise the feature data structure */ + set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA); + +- // Delete sementation map +- vpx_free(seg_map); +- +- seg_map = 0; +- + } + + static void set_default_lf_deltas(VP8_COMP *cpi) +@@ -560,16 +522,21 @@ static void set_default_lf_deltas(VP8_COMP *cpi) + vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); + vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); + +- // Test of ref frame deltas ++ /* Test of ref frame deltas */ + cpi->mb.e_mbd.ref_lf_deltas[INTRA_FRAME] = 2; + cpi->mb.e_mbd.ref_lf_deltas[LAST_FRAME] = 0; + cpi->mb.e_mbd.ref_lf_deltas[GOLDEN_FRAME] = -2; + cpi->mb.e_mbd.ref_lf_deltas[ALTREF_FRAME] = -2; + +- cpi->mb.e_mbd.mode_lf_deltas[0] = 4; // BPRED +- cpi->mb.e_mbd.mode_lf_deltas[1] = -2; // Zero +- cpi->mb.e_mbd.mode_lf_deltas[2] = 2; // New mv +- cpi->mb.e_mbd.mode_lf_deltas[3] = 4; // Split mv ++ cpi->mb.e_mbd.mode_lf_deltas[0] = 4; /* BPRED */ ++ ++ if(cpi->oxcf.Mode == MODE_REALTIME) ++ cpi->mb.e_mbd.mode_lf_deltas[1] = -12; /* Zero */ ++ else ++ cpi->mb.e_mbd.mode_lf_deltas[1] = -2; /* Zero */ ++ ++ cpi->mb.e_mbd.mode_lf_deltas[2] = 2; /* New mv */ ++ cpi->mb.e_mbd.mode_lf_deltas[3] = 4; /* Split mv */ + } + + /* Convenience macros for mapping speed and mode into a continuous +@@ -669,17 +636,16 @@ void vp8_set_speed_features(VP8_COMP *cpi) + int last_improved_quant = sf->improved_quant; + int ref_frames; + +- // Initialise default mode frequency sampling variables ++ /* Initialise default mode frequency sampling variables */ + for (i = 0; i < MAX_MODES; i ++) + { + cpi->mode_check_freq[i] = 0; +- cpi->mode_test_hit_counts[i] = 0; + cpi->mode_chosen_counts[i] = 0; + } + +- cpi->mbs_tested_so_far = 0; ++ cpi->mb.mbs_tested_so_far = 0; + +- // best quality defaults ++ /* best quality defaults */ + sf->RD = 1; + sf->search_method = NSTEP; + sf->improved_quant = 1; +@@ -697,17 +663,17 @@ void vp8_set_speed_features(VP8_COMP *cpi) + sf->max_step_search_steps = MAX_MVSEARCH_STEPS; + sf->improved_mv_pred = 1; + +- // default thresholds to 0 ++ /* default thresholds to 0 */ + for (i = 0; i < MAX_MODES; i++) + sf->thresh_mult[i] = 0; + + /* Count enabled references */ + ref_frames = 1; +- if (cpi->ref_frame_flags & VP8_LAST_FLAG) ++ if (cpi->ref_frame_flags & VP8_LAST_FRAME) + ref_frames++; +- if (cpi->ref_frame_flags & VP8_GOLD_FLAG) ++ if (cpi->ref_frame_flags & VP8_GOLD_FRAME) + ref_frames++; +- if (cpi->ref_frame_flags & VP8_ALT_FLAG) ++ if (cpi->ref_frame_flags & VP8_ALTR_FRAME) + ref_frames++; + + /* Convert speed to continuous range, with clamping */ +@@ -779,7 +745,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) + switch (Mode) + { + #if !(CONFIG_REALTIME_ONLY) +- case 0: // best quality mode ++ case 0: /* best quality mode */ + sf->first_step = 0; + sf->max_step_search_steps = MAX_MVSEARCH_STEPS; + break; +@@ -800,8 +766,9 @@ void vp8_set_speed_features(VP8_COMP *cpi) + sf->improved_quant = 0; + sf->improved_dct = 0; + +- // Only do recode loop on key frames, golden frames and +- // alt ref frames ++ /* Only do recode loop on key frames, golden frames and ++ * alt ref frames ++ */ + sf->recode_loop = 2; + + } +@@ -809,14 +776,14 @@ void vp8_set_speed_features(VP8_COMP *cpi) + if (Speed > 3) + { + sf->auto_filter = 1; +- sf->recode_loop = 0; // recode loop off +- sf->RD = 0; // Turn rd off ++ sf->recode_loop = 0; /* recode loop off */ ++ sf->RD = 0; /* Turn rd off */ + + } + + if (Speed > 4) + { +- sf->auto_filter = 0; // Faster selection of loop filter ++ sf->auto_filter = 0; /* Faster selection of loop filter */ + } + + break; +@@ -839,7 +806,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) + } + + if (Speed > 2) +- sf->auto_filter = 0; // Faster selection of loop filter ++ sf->auto_filter = 0; /* Faster selection of loop filter */ + + if (Speed > 3) + { +@@ -849,7 +816,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) + + if (Speed > 4) + { +- sf->auto_filter = 0; // Faster selection of loop filter ++ sf->auto_filter = 0; /* Faster selection of loop filter */ + sf->search_method = HEX; + sf->iterative_sub_pixel = 0; + } +@@ -870,16 +837,16 @@ void vp8_set_speed_features(VP8_COMP *cpi) + + for (i = 0; i < min; i++) + { +- sum += cpi->error_bins[i]; ++ sum += cpi->mb.error_bins[i]; + } + + total_skip = sum; + sum = 0; + +- // i starts from 2 to make sure thresh started from 2048 ++ /* i starts from 2 to make sure thresh started from 2048 */ + for (; i < 1024; i++) + { +- sum += cpi->error_bins[i]; ++ sum += cpi->mb.error_bins[i]; + + if (10 * sum >= (unsigned int)(cpi->Speed - 6)*(total_mbs - total_skip)) + break; +@@ -930,16 +897,17 @@ void vp8_set_speed_features(VP8_COMP *cpi) + cm->filter_type = SIMPLE_LOOPFILTER; + } + +- // This has a big hit on quality. Last resort ++ /* This has a big hit on quality. Last resort */ + if (Speed >= 15) + sf->half_pixel_search = 0; + +- vpx_memset(cpi->error_bins, 0, sizeof(cpi->error_bins)); ++ vpx_memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins)); + + }; /* switch */ + +- // Slow quant, dct and trellis not worthwhile for first pass +- // so make sure they are always turned off. ++ /* Slow quant, dct and trellis not worthwhile for first pass ++ * so make sure they are always turned off. ++ */ + if ( cpi->pass == 1 ) + { + sf->improved_quant = 0; +@@ -1107,27 +1075,46 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) + CHECK_MEM_ERROR(cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok))); + } + +- // Data used for real time vc mode to see if gf needs refreshing +- cpi->inter_zz_count = 0; +- cpi->gf_bad_count = 0; +- cpi->gf_update_recommended = 0; ++ /* Data used for real time vc mode to see if gf needs refreshing */ ++ cpi->zeromv_count = 0; + + +- // Structures used to minitor GF usage ++ /* Structures used to monitor GF usage */ + vpx_free(cpi->gf_active_flags); + CHECK_MEM_ERROR(cpi->gf_active_flags, +- vpx_calloc(1, cm->mb_rows * cm->mb_cols)); ++ vpx_calloc(sizeof(*cpi->gf_active_flags), ++ cm->mb_rows * cm->mb_cols)); + cpi->gf_active_count = cm->mb_rows * cm->mb_cols; + + vpx_free(cpi->mb_activity_map); + CHECK_MEM_ERROR(cpi->mb_activity_map, +- vpx_calloc(sizeof(unsigned int), ++ vpx_calloc(sizeof(*cpi->mb_activity_map), + cm->mb_rows * cm->mb_cols)); + +- vpx_free(cpi->mb_norm_activity_map); +- CHECK_MEM_ERROR(cpi->mb_norm_activity_map, +- vpx_calloc(sizeof(unsigned int), +- cm->mb_rows * cm->mb_cols)); ++ /* allocate memory for storing last frame's MVs for MV prediction. */ ++ vpx_free(cpi->lfmv); ++ CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2), ++ sizeof(*cpi->lfmv))); ++ vpx_free(cpi->lf_ref_frame_sign_bias); ++ CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, ++ vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2), ++ sizeof(*cpi->lf_ref_frame_sign_bias))); ++ vpx_free(cpi->lf_ref_frame); ++ CHECK_MEM_ERROR(cpi->lf_ref_frame, ++ vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2), ++ sizeof(*cpi->lf_ref_frame))); ++ ++ /* Create the encoder segmentation map and set all entries to 0 */ ++ vpx_free(cpi->segmentation_map); ++ CHECK_MEM_ERROR(cpi->segmentation_map, ++ vpx_calloc(cm->mb_rows * cm->mb_cols, ++ sizeof(*cpi->segmentation_map))); ++ cpi->cyclic_refresh_mode_index = 0; ++ vpx_free(cpi->active_map); ++ CHECK_MEM_ERROR(cpi->active_map, ++ vpx_calloc(cm->mb_rows * cm->mb_cols, ++ sizeof(*cpi->active_map))); ++ vpx_memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols)); + + #if CONFIG_MULTITHREAD + if (width < 640) +@@ -1138,15 +1125,22 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) + cpi->mt_sync_range = 8; + else + cpi->mt_sync_range = 16; ++ ++ if (cpi->oxcf.multi_threaded > 1) ++ { ++ vpx_free(cpi->mt_current_mb_col); ++ CHECK_MEM_ERROR(cpi->mt_current_mb_col, ++ vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows)); ++ } ++ + #endif + + vpx_free(cpi->tplist); +- +- CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows)); ++ CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cm->mb_rows)); + } + + +-// Quant MOD ++/* Quant MOD */ + static const int q_trans[] = + { + 0, 1, 2, 3, 4, 5, 7, 8, +@@ -1168,7 +1162,7 @@ int vp8_reverse_trans(int x) + return i; + + return 63; +-}; ++} + void vp8_new_frame_rate(VP8_COMP *cpi, double framerate) + { + if(framerate < .1) +@@ -1182,16 +1176,16 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate) + cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * + cpi->oxcf.two_pass_vbrmin_section / 100); + +- // Set Maximum gf/arf interval ++ /* Set Maximum gf/arf interval */ + cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2); + + if(cpi->max_gf_interval < 12) + cpi->max_gf_interval = 12; + +- // Extended interval for genuinely static scenes ++ /* Extended interval for genuinely static scenes */ + cpi->twopass.static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; + +- // Special conditions when altr ref frame enabled in lagged compress mode ++ /* Special conditions when altr ref frame enabled in lagged compress mode */ + if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames) + { + if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1) +@@ -1213,7 +1207,7 @@ rescale(int val, int num, int denom) + int64_t llden = denom; + int64_t llval = val; + +- return llval * llnum / llden; ++ return (int)(llval * llnum / llden); + } + + +@@ -1225,7 +1219,6 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + + cpi->auto_gold = 1; + cpi->auto_adjust_gold_quantizer = 1; +- cpi->goldfreq = 7; + + cm->version = oxcf->Version; + vp8_setup_version(cm); +@@ -1244,15 +1237,15 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + + cpi->ref_frame_rate = cpi->frame_rate; + +- // change includes all joint functionality ++ /* change includes all joint functionality */ + vp8_change_config(cpi, oxcf); + +- // Initialize active best and worst q and average q values. ++ /* Initialize active best and worst q and average q values. */ + cpi->active_worst_quality = cpi->oxcf.worst_allowed_q; + cpi->active_best_quality = cpi->oxcf.best_allowed_q; + cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q; + +- // Initialise the starting buffer levels ++ /* Initialise the starting buffer levels */ + cpi->buffer_level = cpi->oxcf.starting_buffer_level; + cpi->bits_off_target = cpi->oxcf.starting_buffer_level; + +@@ -1264,7 +1257,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + cpi->total_actual_bits = 0; + cpi->total_target_vs_actual = 0; + +- // Temporal scalabilty ++ /* Temporal scalabilty */ + if (cpi->oxcf.number_of_layers > 1) + { + unsigned int i; +@@ -1274,7 +1267,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + { + LAYER_CONTEXT *lc = &cpi->layer_context[i]; + +- // Layer configuration ++ /* Layer configuration */ + lc->frame_rate = + cpi->output_frame_rate / cpi->oxcf.rate_decimator[i]; + lc->target_bandwidth = cpi->oxcf.target_bitrate[i] * 1000; +@@ -1284,28 +1277,29 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + lc->maximum_buffer_size_in_ms = oxcf->maximum_buffer_size; + + lc->starting_buffer_level = +- rescale(oxcf->starting_buffer_level, ++ rescale((int)(oxcf->starting_buffer_level), + lc->target_bandwidth, 1000); + + if (oxcf->optimal_buffer_level == 0) + lc->optimal_buffer_level = lc->target_bandwidth / 8; + else + lc->optimal_buffer_level = +- rescale(oxcf->optimal_buffer_level, ++ rescale((int)(oxcf->optimal_buffer_level), + lc->target_bandwidth, 1000); + + if (oxcf->maximum_buffer_size == 0) + lc->maximum_buffer_size = lc->target_bandwidth / 8; + else + lc->maximum_buffer_size = +- rescale(oxcf->maximum_buffer_size, ++ rescale((int)oxcf->maximum_buffer_size, + lc->target_bandwidth, 1000); + +- // Work out the average size of a frame within this layer ++ /* Work out the average size of a frame within this layer */ + if (i > 0) +- lc->avg_frame_size_for_layer = (cpi->oxcf.target_bitrate[i] - +- cpi->oxcf.target_bitrate[i-1]) * 1000 / +- (lc->frame_rate - prev_layer_frame_rate); ++ lc->avg_frame_size_for_layer = ++ (int)((cpi->oxcf.target_bitrate[i] - ++ cpi->oxcf.target_bitrate[i-1]) * 1000 / ++ (lc->frame_rate - prev_layer_frame_rate)); + + lc->active_worst_quality = cpi->oxcf.worst_allowed_q; + lc->active_best_quality = cpi->oxcf.best_allowed_q; +@@ -1321,7 +1315,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + lc->rate_correction_factor = 1.0; + lc->key_frame_rate_correction_factor = 1.0; + lc->gf_rate_correction_factor = 1.0; +- lc->inter_frame_target = 0.0; ++ lc->inter_frame_target = 0; + + prev_layer_frame_rate = lc->frame_rate; + } +@@ -1358,32 +1352,29 @@ static void update_layer_contexts (VP8_COMP *cpi) + lc->target_bandwidth = oxcf->target_bitrate[i] * 1000; + + lc->starting_buffer_level = rescale( +- oxcf->starting_buffer_level_in_ms, ++ (int)oxcf->starting_buffer_level_in_ms, + lc->target_bandwidth, 1000); + + if (oxcf->optimal_buffer_level == 0) + lc->optimal_buffer_level = lc->target_bandwidth / 8; + else + lc->optimal_buffer_level = rescale( +- oxcf->optimal_buffer_level_in_ms, ++ (int)oxcf->optimal_buffer_level_in_ms, + lc->target_bandwidth, 1000); + + if (oxcf->maximum_buffer_size == 0) + lc->maximum_buffer_size = lc->target_bandwidth / 8; + else + lc->maximum_buffer_size = rescale( +- oxcf->maximum_buffer_size_in_ms, ++ (int)oxcf->maximum_buffer_size_in_ms, + lc->target_bandwidth, 1000); + +- // Work out the average size of a frame within this layer ++ /* Work out the average size of a frame within this layer */ + if (i > 0) +- lc->avg_frame_size_for_layer = (oxcf->target_bitrate[i] - +- oxcf->target_bitrate[i-1]) * 1000 / +- (lc->frame_rate - prev_layer_frame_rate); +- +- lc->active_worst_quality = oxcf->worst_allowed_q; +- lc->active_best_quality = oxcf->best_allowed_q; +- lc->avg_frame_qindex = oxcf->worst_allowed_q; ++ lc->avg_frame_size_for_layer = ++ (int)((oxcf->target_bitrate[i] - ++ oxcf->target_bitrate[i-1]) * 1000 / ++ (lc->frame_rate - prev_layer_frame_rate)); + + prev_layer_frame_rate = lc->frame_rate; + } +@@ -1514,10 +1505,8 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + cpi->baseline_gf_interval = + cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL; + +- cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG; ++ cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME; + +- //cpi->use_golden_frame_only = 0; +- //cpi->use_last_frame_only = 0; + cm->refresh_golden_frame = 0; + cm->refresh_last_frame = 1; + cm->refresh_entropy_probs = 1; +@@ -1539,11 +1528,11 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout; + } + +- // At the moment the first order values may not be > MAXQ ++ /* At the moment the first order values may not be > MAXQ */ + if (cpi->oxcf.fixed_q > MAXQ) + cpi->oxcf.fixed_q = MAXQ; + +- // local file playback mode == really big buffer ++ /* local file playback mode == really big buffer */ + if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) + { + cpi->oxcf.starting_buffer_level = 60000; +@@ -1554,41 +1543,41 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + cpi->oxcf.maximum_buffer_size_in_ms = 240000; + } + +- // Convert target bandwidth from Kbit/s to Bit/s ++ /* Convert target bandwidth from Kbit/s to Bit/s */ + cpi->oxcf.target_bandwidth *= 1000; + + cpi->oxcf.starting_buffer_level = +- rescale(cpi->oxcf.starting_buffer_level, ++ rescale((int)cpi->oxcf.starting_buffer_level, + cpi->oxcf.target_bandwidth, 1000); + +- // Set or reset optimal and maximum buffer levels. ++ /* Set or reset optimal and maximum buffer levels. */ + if (cpi->oxcf.optimal_buffer_level == 0) + cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; + else + cpi->oxcf.optimal_buffer_level = +- rescale(cpi->oxcf.optimal_buffer_level, ++ rescale((int)cpi->oxcf.optimal_buffer_level, + cpi->oxcf.target_bandwidth, 1000); + + if (cpi->oxcf.maximum_buffer_size == 0) + cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; + else + cpi->oxcf.maximum_buffer_size = +- rescale(cpi->oxcf.maximum_buffer_size, ++ rescale((int)cpi->oxcf.maximum_buffer_size, + cpi->oxcf.target_bandwidth, 1000); + +- // Set up frame rate and related parameters rate control values. ++ /* Set up frame rate and related parameters rate control values. */ + vp8_new_frame_rate(cpi, cpi->frame_rate); + +- // Set absolute upper and lower quality limits ++ /* Set absolute upper and lower quality limits */ + cpi->worst_quality = cpi->oxcf.worst_allowed_q; + cpi->best_quality = cpi->oxcf.best_allowed_q; + +- // active values should only be modified if out of new range ++ /* active values should only be modified if out of new range */ + if (cpi->active_worst_quality > cpi->oxcf.worst_allowed_q) + { + cpi->active_worst_quality = cpi->oxcf.worst_allowed_q; + } +- // less likely ++ /* less likely */ + else if (cpi->active_worst_quality < cpi->oxcf.best_allowed_q) + { + cpi->active_worst_quality = cpi->oxcf.best_allowed_q; +@@ -1597,7 +1586,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + { + cpi->active_best_quality = cpi->oxcf.best_allowed_q; + } +- // less likely ++ /* less likely */ + else if (cpi->active_best_quality > cpi->oxcf.worst_allowed_q) + { + cpi->active_best_quality = cpi->oxcf.worst_allowed_q; +@@ -1607,14 +1596,9 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + + cpi->cq_target_quality = cpi->oxcf.cq_level; + +- // Only allow dropped frames in buffered mode ++ /* Only allow dropped frames in buffered mode */ + cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode; + +- if (!cm->use_bilinear_mc_filter) +- cm->mcomp_filter_type = SIXTAP; +- else +- cm->mcomp_filter_type = BILINEAR; +- + cpi->target_bandwidth = cpi->oxcf.target_bandwidth; + + +@@ -1627,7 +1611,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + * correct. + */ + +- // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) ++ /* VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) */ + if (cpi->oxcf.Sharpness > 7) + cpi->oxcf.Sharpness = 7; + +@@ -1641,7 +1625,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + Scale2Ratio(cm->horiz_scale, &hr, &hs); + Scale2Ratio(cm->vert_scale, &vr, &vs); + +- // always go to the next whole number ++ /* always go to the next whole number */ + cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs; + cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs; + } +@@ -1655,6 +1639,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + cm->yv12_fb[cm->lst_fb_idx].y_height || + cm->yv12_fb[cm->lst_fb_idx].y_width == 0) + { ++ dealloc_raw_frame_buffers(cpi); + alloc_raw_frame_buffers(cpi); + vp8_alloc_compressor_data(cpi); + } +@@ -1667,16 +1652,16 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + + cpi->Speed = cpi->oxcf.cpu_used; + +- // force to allowlag to 0 if lag_in_frames is 0; ++ /* force to allowlag to 0 if lag_in_frames is 0; */ + if (cpi->oxcf.lag_in_frames == 0) + { + cpi->oxcf.allow_lag = 0; + } +- // Limit on lag buffers as these are not currently dynamically allocated ++ /* Limit on lag buffers as these are not currently dynamically allocated */ + else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS) + cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS; + +- // YX Temp ++ /* YX Temp */ + cpi->alt_ref_source = NULL; + cpi->is_src_frame_alt_ref = 0; + +@@ -1693,7 +1678,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) + #endif + + #if 0 +- // Experimental RD Code ++ /* Experimental RD Code */ + cpi->frame_distortion = 0; + cpi->last_frame_distortion = 0; + #endif +@@ -1728,7 +1713,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) + VP8_COMMON *cm; + + cpi = vpx_memalign(32, sizeof(VP8_COMP)); +- // Check that the CPI instance is valid ++ /* Check that the CPI instance is valid */ + if (!cpi) + return 0; + +@@ -1762,14 +1747,15 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) + cpi->prob_gf_coded = 128; + cpi->prob_intra_coded = 63; + +- // Prime the recent reference frame usage counters. +- // Hereafter they will be maintained as a sort of moving average ++ /* Prime the recent reference frame usage counters. ++ * Hereafter they will be maintained as a sort of moving average ++ */ + cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; + cpi->recent_ref_frame_usage[LAST_FRAME] = 1; + cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1; + cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1; + +- // Set reference frame sign bias for ALTREF frame to 1 (for now) ++ /* Set reference frame sign bias for ALTREF frame to 1 (for now) */ + cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1; + + cpi->twopass.gf_decay_rate = 0; +@@ -1779,21 +1765,12 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) + cpi->alt_is_last = 0 ; + cpi->gold_is_alt = 0 ; + +- // allocate memory for storing last frame's MVs for MV prediction. +- CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int_mv))); +- CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int))); +- CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int))); +- +- // Create the encoder segmentation map and set all entries to 0 +- CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); +- CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); +- vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols)); + cpi->active_map_enabled = 0; + + #if 0 +- // Experimental code for lagged and one pass +- // Initialise one_pass GF frames stats +- // Update stats used for GF selection ++ /* Experimental code for lagged and one pass */ ++ /* Initialise one_pass GF frames stats */ ++ /* Update stats used for GF selection */ + if (cpi->pass == 0) + { + cpi->one_pass_frame_index = 0; +@@ -1813,10 +1790,11 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) + } + #endif + +- // Should we use the cyclic refresh method. +- // Currently this is tied to error resilliant mode ++ /* Should we use the cyclic refresh method. ++ * Currently this is tied to error resilliant mode ++ */ + cpi->cyclic_refresh_mode_enabled = cpi->oxcf.error_resilient_mode; +- cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 40; ++ cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 5; + cpi->cyclic_refresh_mode_index = 0; + cpi->cyclic_refresh_q = 32; + +@@ -1827,9 +1805,6 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) + else + cpi->cyclic_refresh_map = (signed char *) NULL; + +- // Test function for segmentation +- //segmentation_test_function( cpi); +- + #ifdef ENTROPY_STATS + init_context_counters(); + #endif +@@ -1837,7 +1812,8 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) + /*Initialize the feed-forward activity masking.*/ + cpi->activity_avg = 90<<12; + +- cpi->frames_since_key = 8; // Give a sensible default for the first frame. ++ /* Give a sensible default for the first frame. */ ++ cpi->frames_since_key = 8; + cpi->key_frame_frequency = cpi->oxcf.key_freq; + cpi->this_key_frame_forced = 0; + cpi->next_key_frame_forced = 0; +@@ -1880,10 +1856,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) + + #endif + +-#ifndef LLONG_MAX +-#define LLONG_MAX 9223372036854775807LL +-#endif +- cpi->first_time_stamp_ever = LLONG_MAX; ++ cpi->first_time_stamp_ever = 0x7FFFFFFF; + + cpi->frames_till_gf_update_due = 0; + cpi->key_frame_count = 1; +@@ -1894,22 +1867,12 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) + cpi->total_byte_count = 0; + + cpi->drop_frame = 0; +- cpi->drop_count = 0; +- cpi->max_drop_count = 0; +- cpi->max_consec_dropped_frames = 4; + + cpi->rate_correction_factor = 1.0; + cpi->key_frame_rate_correction_factor = 1.0; + cpi->gf_rate_correction_factor = 1.0; + cpi->twopass.est_max_qcorrection_factor = 1.0; + +- cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max+1]; +- cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max+1]; +- cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max+1]; +- cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max+1]; +- +- cal_mvsadcosts(cpi->mb.mvsadcost); +- + for (i = 0; i < KEY_FRAME_CONTEXT; i++) + { + cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate; +@@ -1935,7 +1898,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) + else if (cpi->pass == 2) + { + size_t packet_sz = sizeof(FIRSTPASS_STATS); +- int packets = oxcf->two_pass_stats_in.sz / packet_sz; ++ int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); + + cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; + cpi->twopass.stats_in = cpi->twopass.stats_in_start; +@@ -1948,17 +1911,16 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) + + if (cpi->compressor_speed == 2) + { +- cpi->cpu_freq = 0; //vp8_get_processor_freq(); + cpi->avg_encode_time = 0; + cpi->avg_pick_mode_time = 0; + } + + vp8_set_speed_features(cpi); + +- // Set starting values of RD threshold multipliers (128 = *1) ++ /* Set starting values of RD threshold multipliers (128 = *1) */ + for (i = 0; i < MAX_MODES; i++) + { +- cpi->rd_thresh_mult[i] = 128; ++ cpi->mb.rd_thresh_mult[i] = 128; + } + + #ifdef ENTROPY_STATS +@@ -1966,7 +1928,11 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) + #endif + + #if CONFIG_MULTITHREAD +- vp8cx_create_encoder_threads(cpi); ++ if(vp8cx_create_encoder_threads(cpi)) ++ { ++ vp8_remove_compressor(&cpi); ++ return 0; ++ } + #endif + + cpi->fn_ptr[BLOCK_16X16].sdf = vp8_sad16x16; +@@ -2031,11 +1997,14 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) + cpi->diamond_search_sad = vp8_diamond_search_sad; + cpi->refining_search_sad = vp8_refining_search_sad; + +- // make sure frame 1 is okay +- cpi->error_bins[0] = cpi->common.MBs; ++ /* make sure frame 1 is okay */ ++ cpi->mb.error_bins[0] = cpi->common.MBs; + +- //vp8cx_init_quantizer() is first called here. Add check in vp8cx_frame_init_quantizer() so that vp8cx_init_quantizer is only called later +- //when needed. This will avoid unnecessary calls of vp8cx_init_quantizer() for every frame. ++ /* vp8cx_init_quantizer() is first called here. Add check in ++ * vp8cx_frame_init_quantizer() so that vp8cx_init_quantizer is only ++ * called later when needed. This will avoid unnecessary calls of ++ * vp8cx_init_quantizer() for every frame. ++ */ + vp8cx_init_quantizer(cpi); + + vp8_loop_filter_init(cm); +@@ -2043,13 +2012,33 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) + cpi->common.error.setjmp = 0; + + #if CONFIG_MULTI_RES_ENCODING ++ + /* Calculate # of MBs in a row in lower-resolution level image. */ + if (cpi->oxcf.mr_encoder_id > 0) + vp8_cal_low_res_mb_cols(cpi); ++ + #endif + +- return cpi; ++ /* setup RD costs to MACROBLOCK struct */ ++ ++ cpi->mb.mvcost[0] = &cpi->rd_costs.mvcosts[0][mv_max+1]; ++ cpi->mb.mvcost[1] = &cpi->rd_costs.mvcosts[1][mv_max+1]; ++ cpi->mb.mvsadcost[0] = &cpi->rd_costs.mvsadcosts[0][mvfp_max+1]; ++ cpi->mb.mvsadcost[1] = &cpi->rd_costs.mvsadcosts[1][mvfp_max+1]; + ++ cal_mvsadcosts(cpi->mb.mvsadcost); ++ ++ cpi->mb.mbmode_cost = cpi->rd_costs.mbmode_cost; ++ cpi->mb.intra_uv_mode_cost = cpi->rd_costs.intra_uv_mode_cost; ++ cpi->mb.bmode_costs = cpi->rd_costs.bmode_costs; ++ cpi->mb.inter_bmode_costs = cpi->rd_costs.inter_bmode_costs; ++ cpi->mb.token_costs = cpi->rd_costs.token_costs; ++ ++ /* setup block ptrs & offsets */ ++ vp8_setup_block_ptrs(&cpi->mb); ++ vp8_setup_block_dptrs(&cpi->mb.e_mbd); ++ ++ return cpi; + } + + +@@ -2099,7 +2088,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) + + fprintf(f, "Layer\tBitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t" + "GLPsnrP\tVPXSSIM\t\n"); +- for (i=0; ioxcf.number_of_layers; i++) ++ for (i=0; i<(int)cpi->oxcf.number_of_layers; i++) + { + double dr = (double)cpi->bytes_in_layer[i] * + 8.0 / 1000.0 / time_encoded; +@@ -2150,7 +2139,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) + + fprintf(f, "Layer\tBitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t" + "Time(us)\n"); +- for (i=0; ioxcf.number_of_layers; i++) ++ for (i=0; i<(int)cpi->oxcf.number_of_layers; i++) + { + double dr = (double)cpi->bytes_in_layer[i] * + 8.0 / 1000.0 / time_encoded; +@@ -2204,7 +2193,6 @@ void vp8_remove_compressor(VP8_COMP **ptr) + fprintf(f, "%5d", frames_at_speed[i]); + + fprintf(f, "\n"); +- //fprintf(f, "%10d PM %10d %10d %10d EF %10d %10d %10d\n", cpi->Speed, cpi->avg_pick_mode_time, (tot_pm/cnt_pm), cnt_pm, cpi->avg_encode_time, 0, 0); + fclose(f); + } + +@@ -2266,7 +2254,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) + for (i = 0; i < 10; i++) + { + +- fprintf(fmode, " { //Above Mode : %d\n", i); ++ fprintf(fmode, " { /* Above Mode : %d */\n", i); + + for (j = 0; j < 10; j++) + { +@@ -2281,7 +2269,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) + fprintf(fmode, " %5d, ", intra_mode_stats[i][j][k]); + } + +- fprintf(fmode, "}, // left_mode %d\n", j); ++ fprintf(fmode, "}, /* left_mode %d */\n", j); + + } + +@@ -2459,7 +2447,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) + + for (i = 0; i < 4; i++) + pkt.data.psnr.psnr[i] = vp8_mse2psnr(pkt.data.psnr.samples[i], 255.0, +- pkt.data.psnr.sse[i]); ++ (double)(pkt.data.psnr.sse[i])); + + vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); + } +@@ -2482,28 +2470,28 @@ int vp8_update_reference(VP8_COMP *cpi, int ref_frame_flags) + cpi->common.refresh_alt_ref_frame = 0; + cpi->common.refresh_last_frame = 0; + +- if (ref_frame_flags & VP8_LAST_FLAG) ++ if (ref_frame_flags & VP8_LAST_FRAME) + cpi->common.refresh_last_frame = 1; + +- if (ref_frame_flags & VP8_GOLD_FLAG) ++ if (ref_frame_flags & VP8_GOLD_FRAME) + cpi->common.refresh_golden_frame = 1; + +- if (ref_frame_flags & VP8_ALT_FLAG) ++ if (ref_frame_flags & VP8_ALTR_FRAME) + cpi->common.refresh_alt_ref_frame = 1; + + return 0; + } + +-int vp8_get_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) ++int vp8_get_reference(VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) + { + VP8_COMMON *cm = &cpi->common; + int ref_fb_idx; + +- if (ref_frame_flag == VP8_LAST_FLAG) ++ if (ref_frame_flag == VP8_LAST_FRAME) + ref_fb_idx = cm->lst_fb_idx; +- else if (ref_frame_flag == VP8_GOLD_FLAG) ++ else if (ref_frame_flag == VP8_GOLD_FRAME) + ref_fb_idx = cm->gld_fb_idx; +- else if (ref_frame_flag == VP8_ALT_FLAG) ++ else if (ref_frame_flag == VP8_ALTR_FRAME) + ref_fb_idx = cm->alt_fb_idx; + else + return -1; +@@ -2512,17 +2500,17 @@ int vp8_get_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CO + + return 0; + } +-int vp8_set_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) ++int vp8_set_reference(VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) + { + VP8_COMMON *cm = &cpi->common; + + int ref_fb_idx; + +- if (ref_frame_flag == VP8_LAST_FLAG) ++ if (ref_frame_flag == VP8_LAST_FRAME) + ref_fb_idx = cm->lst_fb_idx; +- else if (ref_frame_flag == VP8_GOLD_FLAG) ++ else if (ref_frame_flag == VP8_GOLD_FRAME) + ref_fb_idx = cm->gld_fb_idx; +- else if (ref_frame_flag == VP8_ALT_FLAG) ++ else if (ref_frame_flag == VP8_ALTR_FRAME) + ref_fb_idx = cm->alt_fb_idx; + else + return -1; +@@ -2583,7 +2571,7 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + { + VP8_COMMON *cm = &cpi->common; + +- // are we resizing the image ++ /* are we resizing the image */ + if (cm->horiz_scale != 0 || cm->vert_scale != 0) + { + #if CONFIG_SPATIAL_RESAMPLING +@@ -2611,51 +2599,57 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + } + + +-static void resize_key_frame(VP8_COMP *cpi) ++static int resize_key_frame(VP8_COMP *cpi) + { + #if CONFIG_SPATIAL_RESAMPLING + VP8_COMMON *cm = &cpi->common; + +- // Do we need to apply resampling for one pass cbr. +- // In one pass this is more limited than in two pass cbr +- // The test and any change is only made one per key frame sequence ++ /* Do we need to apply resampling for one pass cbr. ++ * In one pass this is more limited than in two pass cbr ++ * The test and any change is only made one per key frame sequence ++ */ + if (cpi->oxcf.allow_spatial_resampling && (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) + { + int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs); + int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs); + int new_width, new_height; + +- // If we are below the resample DOWN watermark then scale down a notch. ++ /* If we are below the resample DOWN watermark then scale down a ++ * notch. ++ */ + if (cpi->buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100)) + { + cm->horiz_scale = (cm->horiz_scale < ONETWO) ? cm->horiz_scale + 1 : ONETWO; + cm->vert_scale = (cm->vert_scale < ONETWO) ? cm->vert_scale + 1 : ONETWO; + } +- // Should we now start scaling back up ++ /* Should we now start scaling back up */ + else if (cpi->buffer_level > (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100)) + { + cm->horiz_scale = (cm->horiz_scale > NORMAL) ? cm->horiz_scale - 1 : NORMAL; + cm->vert_scale = (cm->vert_scale > NORMAL) ? cm->vert_scale - 1 : NORMAL; + } + +- // Get the new hieght and width ++ /* Get the new hieght and width */ + Scale2Ratio(cm->horiz_scale, &hr, &hs); + Scale2Ratio(cm->vert_scale, &vr, &vs); + new_width = ((hs - 1) + (cpi->oxcf.Width * hr)) / hs; + new_height = ((vs - 1) + (cpi->oxcf.Height * vr)) / vs; + +- // If the image size has changed we need to reallocate the buffers +- // and resample the source image ++ /* If the image size has changed we need to reallocate the buffers ++ * and resample the source image ++ */ + if ((cm->Width != new_width) || (cm->Height != new_height)) + { + cm->Width = new_width; + cm->Height = new_height; + vp8_alloc_compressor_data(cpi); + scale_and_extend_source(cpi->un_scaled_source, cpi); ++ return 1; + } + } + + #endif ++ return 0; + } + + +@@ -2663,34 +2657,35 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi) + { + VP8_COMMON *cm = &cpi->common; + +- // Select an interval before next GF or altref ++ /* Select an interval before next GF or altref */ + if (!cpi->auto_gold) +- cpi->frames_till_gf_update_due = cpi->goldfreq; ++ cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; + + if ((cpi->pass != 2) && cpi->frames_till_gf_update_due) + { + cpi->current_gf_interval = cpi->frames_till_gf_update_due; + +- // Set the bits per frame that we should try and recover in subsequent inter frames +- // to account for the extra GF spend... note that his does not apply for GF updates +- // that occur coincident with a key frame as the extra cost of key frames is dealt +- // with elsewhere. +- ++ /* Set the bits per frame that we should try and recover in ++ * subsequent inter frames to account for the extra GF spend... ++ * note that his does not apply for GF updates that occur ++ * coincident with a key frame as the extra cost of key frames is ++ * dealt with elsewhere. ++ */ + cpi->gf_overspend_bits += cpi->projected_frame_size; + cpi->non_gf_bitrate_adjustment = cpi->gf_overspend_bits / cpi->frames_till_gf_update_due; + } + +- // Update data structure that monitors level of reference to last GF ++ /* Update data structure that monitors level of reference to last GF */ + vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); + cpi->gf_active_count = cm->mb_rows * cm->mb_cols; + +- // this frame refreshes means next frames don't unless specified by user ++ /* this frame refreshes means next frames don't unless specified by user */ + cpi->common.frames_since_golden = 0; + +- // Clear the alternate reference update pending flag. ++ /* Clear the alternate reference update pending flag. */ + cpi->source_alt_ref_pending = 0; + +- // Set the alternate refernce frame active flag ++ /* Set the alternate refernce frame active flag */ + cpi->source_alt_ref_active = 1; + + +@@ -2699,25 +2694,29 @@ static void update_golden_frame_stats(VP8_COMP *cpi) + { + VP8_COMMON *cm = &cpi->common; + +- // Update the Golden frame usage counts. ++ /* Update the Golden frame usage counts. */ + if (cm->refresh_golden_frame) + { +- // Select an interval before next GF ++ /* Select an interval before next GF */ + if (!cpi->auto_gold) +- cpi->frames_till_gf_update_due = cpi->goldfreq; ++ cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; + + if ((cpi->pass != 2) && (cpi->frames_till_gf_update_due > 0)) + { + cpi->current_gf_interval = cpi->frames_till_gf_update_due; + +- // Set the bits per frame that we should try and recover in subsequent inter frames +- // to account for the extra GF spend... note that his does not apply for GF updates +- // that occur coincident with a key frame as the extra cost of key frames is dealt +- // with elsewhere. ++ /* Set the bits per frame that we should try and recover in ++ * subsequent inter frames to account for the extra GF spend... ++ * note that his does not apply for GF updates that occur ++ * coincident with a key frame as the extra cost of key frames ++ * is dealt with elsewhere. ++ */ + if ((cm->frame_type != KEY_FRAME) && !cpi->source_alt_ref_active) + { +- // Calcluate GF bits to be recovered +- // Projected size - av frame bits available for inter frames for clip as a whole ++ /* Calcluate GF bits to be recovered ++ * Projected size - av frame bits available for inter ++ * frames for clip as a whole ++ */ + cpi->gf_overspend_bits += (cpi->projected_frame_size - cpi->inter_frame_target); + } + +@@ -2725,32 +2724,25 @@ static void update_golden_frame_stats(VP8_COMP *cpi) + + } + +- // Update data structure that monitors level of reference to last GF ++ /* Update data structure that monitors level of reference to last GF */ + vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); + cpi->gf_active_count = cm->mb_rows * cm->mb_cols; + +- // this frame refreshes means next frames don't unless specified by user ++ /* this frame refreshes means next frames don't unless specified by ++ * user ++ */ + cm->refresh_golden_frame = 0; + cpi->common.frames_since_golden = 0; + +- //if ( cm->frame_type == KEY_FRAME ) +- //{ + cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; + cpi->recent_ref_frame_usage[LAST_FRAME] = 1; + cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1; + cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1; +- //} +- //else +- //{ +- // // Carry a potrtion of count over to begining of next gf sequence +- // cpi->recent_ref_frame_usage[INTRA_FRAME] >>= 5; +- // cpi->recent_ref_frame_usage[LAST_FRAME] >>= 5; +- // cpi->recent_ref_frame_usage[GOLDEN_FRAME] >>= 5; +- // cpi->recent_ref_frame_usage[ALTREF_FRAME] >>= 5; +- //} +- +- // ******** Fixed Q test code only ************ +- // If we are going to use the ALT reference for the next group of frames set a flag to say so. ++ ++ /* ******** Fixed Q test code only ************ */ ++ /* If we are going to use the ALT reference for the next group of ++ * frames set a flag to say so. ++ */ + if (cpi->oxcf.fixed_q >= 0 && + cpi->oxcf.play_alternate && !cpi->common.refresh_alt_ref_frame) + { +@@ -2761,14 +2753,14 @@ static void update_golden_frame_stats(VP8_COMP *cpi) + if (!cpi->source_alt_ref_pending) + cpi->source_alt_ref_active = 0; + +- // Decrement count down till next gf ++ /* Decrement count down till next gf */ + if (cpi->frames_till_gf_update_due > 0) + cpi->frames_till_gf_update_due--; + + } + else if (!cpi->common.refresh_alt_ref_frame) + { +- // Decrement count down till next gf ++ /* Decrement count down till next gf */ + if (cpi->frames_till_gf_update_due > 0) + cpi->frames_till_gf_update_due--; + +@@ -2779,21 +2771,26 @@ static void update_golden_frame_stats(VP8_COMP *cpi) + + if (cpi->common.frames_since_golden > 1) + { +- cpi->recent_ref_frame_usage[INTRA_FRAME] += cpi->count_mb_ref_frame_usage[INTRA_FRAME]; +- cpi->recent_ref_frame_usage[LAST_FRAME] += cpi->count_mb_ref_frame_usage[LAST_FRAME]; +- cpi->recent_ref_frame_usage[GOLDEN_FRAME] += cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]; +- cpi->recent_ref_frame_usage[ALTREF_FRAME] += cpi->count_mb_ref_frame_usage[ALTREF_FRAME]; ++ cpi->recent_ref_frame_usage[INTRA_FRAME] += ++ cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME]; ++ cpi->recent_ref_frame_usage[LAST_FRAME] += ++ cpi->mb.count_mb_ref_frame_usage[LAST_FRAME]; ++ cpi->recent_ref_frame_usage[GOLDEN_FRAME] += ++ cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME]; ++ cpi->recent_ref_frame_usage[ALTREF_FRAME] += ++ cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME]; + } + } + } + +-// This function updates the reference frame probability estimates that +-// will be used during mode selection ++/* This function updates the reference frame probability estimates that ++ * will be used during mode selection ++ */ + static void update_rd_ref_frame_probs(VP8_COMP *cpi) + { + VP8_COMMON *cm = &cpi->common; + +- const int *const rfct = cpi->count_mb_ref_frame_usage; ++ const int *const rfct = cpi->mb.count_mb_ref_frame_usage; + const int rf_intra = rfct[INTRA_FRAME]; + const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; + +@@ -2810,7 +2807,9 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi) + cpi->prob_gf_coded = 128; + } + +- // update reference frame costs since we can do better than what we got last frame. ++ /* update reference frame costs since we can do better than what we got ++ * last frame. ++ */ + if (cpi->oxcf.number_of_layers == 1) + { + if (cpi->common.refresh_alt_ref_frame) +@@ -2841,7 +2840,7 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi) + } + + +-// 1 = key, 0 = inter ++/* 1 = key, 0 = inter */ + static int decide_key_frame(VP8_COMP *cpi) + { + VP8_COMMON *cm = &cpi->common; +@@ -2853,43 +2852,22 @@ static int decide_key_frame(VP8_COMP *cpi) + if (cpi->Speed > 11) + return 0; + +- // Clear down mmx registers +- vp8_clear_system_state(); //__asm emms; ++ /* Clear down mmx registers */ ++ vp8_clear_system_state(); + + if ((cpi->compressor_speed == 2) && (cpi->Speed >= 5) && (cpi->sf.RD == 0)) + { +- double change = 1.0 * abs((int)(cpi->intra_error - cpi->last_intra_error)) / (1 + cpi->last_intra_error); +- double change2 = 1.0 * abs((int)(cpi->prediction_error - cpi->last_prediction_error)) / (1 + cpi->last_prediction_error); ++ double change = 1.0 * abs((int)(cpi->mb.intra_error - ++ cpi->last_intra_error)) / (1 + cpi->last_intra_error); ++ double change2 = 1.0 * abs((int)(cpi->mb.prediction_error - ++ cpi->last_prediction_error)) / (1 + cpi->last_prediction_error); + double minerror = cm->MBs * 256; + +-#if 0 +- +- if (10 * cpi->intra_error / (1 + cpi->prediction_error) < 15 +- && cpi->prediction_error > minerror +- && (change > .25 || change2 > .25)) +- { +- FILE *f = fopen("intra_inter.stt", "a"); +- +- if (cpi->prediction_error <= 0) +- cpi->prediction_error = 1; +- +- fprintf(f, "%d %d %d %d %14.4f\n", +- cm->current_video_frame, +- (int) cpi->prediction_error, +- (int) cpi->intra_error, +- (int)((10 * cpi->intra_error) / cpi->prediction_error), +- change); +- +- fclose(f); +- } +- +-#endif +- +- cpi->last_intra_error = cpi->intra_error; +- cpi->last_prediction_error = cpi->prediction_error; ++ cpi->last_intra_error = cpi->mb.intra_error; ++ cpi->last_prediction_error = cpi->mb.prediction_error; + +- if (10 * cpi->intra_error / (1 + cpi->prediction_error) < 15 +- && cpi->prediction_error > minerror ++ if (10 * cpi->mb.intra_error / (1 + cpi->mb.prediction_error) < 15 ++ && cpi->mb.prediction_error > minerror + && (change > .25 || change2 > .25)) + { + /*(change > 1.4 || change < .75)&& cpi->this_frame_percent_intra > cpi->last_frame_percent_intra + 3*/ +@@ -2900,7 +2878,7 @@ static int decide_key_frame(VP8_COMP *cpi) + + } + +- // If the following are true we might as well code a key frame ++ /* If the following are true we might as well code a key frame */ + if (((cpi->this_frame_percent_intra == 100) && + (cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra + 2))) || + ((cpi->this_frame_percent_intra > 95) && +@@ -2908,9 +2886,12 @@ static int decide_key_frame(VP8_COMP *cpi) + { + code_key_frame = 1; + } +- // in addition if the following are true and this is not a golden frame then code a key frame +- // Note that on golden frames there often seems to be a pop in intra useage anyway hence this +- // restriction is designed to prevent spurious key frames. The Intra pop needs to be investigated. ++ /* in addition if the following are true and this is not a golden frame ++ * then code a key frame Note that on golden frames there often seems ++ * to be a pop in intra useage anyway hence this restriction is ++ * designed to prevent spurious key frames. The Intra pop needs to be ++ * investigated. ++ */ + else if (((cpi->this_frame_percent_intra > 60) && + (cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra * 2))) || + ((cpi->this_frame_percent_intra > 75) && +@@ -2942,7 +2923,7 @@ static void Pass1Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, + void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) + { + +- // write the frame ++ /* write the frame */ + FILE *yframe; + int i; + char filename[255]; +@@ -2970,10 +2951,11 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) + fclose(yframe); + } + #endif +-// return of 0 means drop frame ++/* return of 0 means drop frame */ + +-// Function to test for conditions that indeicate we should loop +-// back and recode a frame. ++/* Function to test for conditions that indeicate we should loop ++ * back and recode a frame. ++ */ + static int recode_loop_test( VP8_COMP *cpi, + int high_limit, int low_limit, + int q, int maxq, int minq ) +@@ -2981,32 +2963,33 @@ static int recode_loop_test( VP8_COMP *cpi, + int force_recode = 0; + VP8_COMMON *cm = &cpi->common; + +- // Is frame recode allowed at all +- // Yes if either recode mode 1 is selected or mode two is selcted +- // and the frame is a key frame. golden frame or alt_ref_frame ++ /* Is frame recode allowed at all ++ * Yes if either recode mode 1 is selected or mode two is selcted ++ * and the frame is a key frame. golden frame or alt_ref_frame ++ */ + if ( (cpi->sf.recode_loop == 1) || + ( (cpi->sf.recode_loop == 2) && + ( (cm->frame_type == KEY_FRAME) || + cm->refresh_golden_frame || + cm->refresh_alt_ref_frame ) ) ) + { +- // General over and under shoot tests ++ /* General over and under shoot tests */ + if ( ((cpi->projected_frame_size > high_limit) && (q < maxq)) || + ((cpi->projected_frame_size < low_limit) && (q > minq)) ) + { + force_recode = 1; + } +- // Special Constrained quality tests ++ /* Special Constrained quality tests */ + else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) + { +- // Undershoot and below auto cq level ++ /* Undershoot and below auto cq level */ + if ( (q > cpi->cq_target_quality) && + (cpi->projected_frame_size < + ((cpi->this_frame_target * 7) >> 3))) + { + force_recode = 1; + } +- // Severe undershoot and between auto and user cq level ++ /* Severe undershoot and between auto and user cq level */ + else if ( (q > cpi->oxcf.cq_level) && + (cpi->projected_frame_size < cpi->min_frame_bandwidth) && + (cpi->active_best_quality > cpi->oxcf.cq_level)) +@@ -3020,21 +3003,28 @@ static int recode_loop_test( VP8_COMP *cpi, + return force_recode; + } + +-static void update_reference_frames(VP8_COMMON *cm) ++static void update_reference_frames(VP8_COMP *cpi) + { ++ VP8_COMMON *cm = &cpi->common; + YV12_BUFFER_CONFIG *yv12_fb = cm->yv12_fb; + +- // At this point the new frame has been encoded. +- // If any buffer copy / swapping is signaled it should be done here. ++ /* At this point the new frame has been encoded. ++ * If any buffer copy / swapping is signaled it should be done here. ++ */ + + if (cm->frame_type == KEY_FRAME) + { +- yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FLAG | VP8_ALT_FLAG ; ++ yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FRAME | VP8_ALTR_FRAME ; + +- yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG; +- yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG; ++ yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; ++ yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; + + cm->alt_fb_idx = cm->gld_fb_idx = cm->new_fb_idx; ++ ++#if CONFIG_MULTI_RES_ENCODING ++ cpi->current_ref_frames[GOLDEN_FRAME] = cm->current_video_frame; ++ cpi->current_ref_frames[ALTREF_FRAME] = cm->current_video_frame; ++#endif + } + else /* For non key frames */ + { +@@ -3042,9 +3032,13 @@ static void update_reference_frames(VP8_COMMON *cm) + { + assert(!cm->copy_buffer_to_arf); + +- cm->yv12_fb[cm->new_fb_idx].flags |= VP8_ALT_FLAG; +- cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG; ++ cm->yv12_fb[cm->new_fb_idx].flags |= VP8_ALTR_FRAME; ++ cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; + cm->alt_fb_idx = cm->new_fb_idx; ++ ++#if CONFIG_MULTI_RES_ENCODING ++ cpi->current_ref_frames[ALTREF_FRAME] = cm->current_video_frame; ++#endif + } + else if (cm->copy_buffer_to_arf) + { +@@ -3054,18 +3048,28 @@ static void update_reference_frames(VP8_COMMON *cm) + { + if(cm->alt_fb_idx != cm->lst_fb_idx) + { +- yv12_fb[cm->lst_fb_idx].flags |= VP8_ALT_FLAG; +- yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG; ++ yv12_fb[cm->lst_fb_idx].flags |= VP8_ALTR_FRAME; ++ yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; + cm->alt_fb_idx = cm->lst_fb_idx; ++ ++#if CONFIG_MULTI_RES_ENCODING ++ cpi->current_ref_frames[ALTREF_FRAME] = ++ cpi->current_ref_frames[LAST_FRAME]; ++#endif + } + } + else /* if (cm->copy_buffer_to_arf == 2) */ + { + if(cm->alt_fb_idx != cm->gld_fb_idx) + { +- yv12_fb[cm->gld_fb_idx].flags |= VP8_ALT_FLAG; +- yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG; ++ yv12_fb[cm->gld_fb_idx].flags |= VP8_ALTR_FRAME; ++ yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; + cm->alt_fb_idx = cm->gld_fb_idx; ++ ++#if CONFIG_MULTI_RES_ENCODING ++ cpi->current_ref_frames[ALTREF_FRAME] = ++ cpi->current_ref_frames[GOLDEN_FRAME]; ++#endif + } + } + } +@@ -3074,9 +3078,13 @@ static void update_reference_frames(VP8_COMMON *cm) + { + assert(!cm->copy_buffer_to_gf); + +- cm->yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FLAG; +- cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG; ++ cm->yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FRAME; ++ cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; + cm->gld_fb_idx = cm->new_fb_idx; ++ ++#if CONFIG_MULTI_RES_ENCODING ++ cpi->current_ref_frames[GOLDEN_FRAME] = cm->current_video_frame; ++#endif + } + else if (cm->copy_buffer_to_gf) + { +@@ -3086,18 +3094,28 @@ static void update_reference_frames(VP8_COMMON *cm) + { + if(cm->gld_fb_idx != cm->lst_fb_idx) + { +- yv12_fb[cm->lst_fb_idx].flags |= VP8_GOLD_FLAG; +- yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG; ++ yv12_fb[cm->lst_fb_idx].flags |= VP8_GOLD_FRAME; ++ yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; + cm->gld_fb_idx = cm->lst_fb_idx; ++ ++#if CONFIG_MULTI_RES_ENCODING ++ cpi->current_ref_frames[GOLDEN_FRAME] = ++ cpi->current_ref_frames[LAST_FRAME]; ++#endif + } + } + else /* if (cm->copy_buffer_to_gf == 2) */ + { + if(cm->alt_fb_idx != cm->gld_fb_idx) + { +- yv12_fb[cm->alt_fb_idx].flags |= VP8_GOLD_FLAG; +- yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG; ++ yv12_fb[cm->alt_fb_idx].flags |= VP8_GOLD_FRAME; ++ yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; + cm->gld_fb_idx = cm->alt_fb_idx; ++ ++#if CONFIG_MULTI_RES_ENCODING ++ cpi->current_ref_frames[GOLDEN_FRAME] = ++ cpi->current_ref_frames[ALTREF_FRAME]; ++#endif + } + } + } +@@ -3105,14 +3123,71 @@ static void update_reference_frames(VP8_COMMON *cm) + + if (cm->refresh_last_frame) + { +- cm->yv12_fb[cm->new_fb_idx].flags |= VP8_LAST_FLAG; +- cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP8_LAST_FLAG; ++ cm->yv12_fb[cm->new_fb_idx].flags |= VP8_LAST_FRAME; ++ cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP8_LAST_FRAME; + cm->lst_fb_idx = cm->new_fb_idx; ++ ++#if CONFIG_MULTI_RES_ENCODING ++ cpi->current_ref_frames[LAST_FRAME] = cm->current_video_frame; ++#endif + } ++ ++#if CONFIG_TEMPORAL_DENOISING ++ if (cpi->oxcf.noise_sensitivity) ++ { ++ /* we shouldn't have to keep multiple copies as we know in advance which ++ * buffer we should start - for now to get something up and running ++ * I've chosen to copy the buffers ++ */ ++ if (cm->frame_type == KEY_FRAME) ++ { ++ int i; ++ vp8_yv12_copy_frame( ++ cpi->Source, ++ &cpi->denoiser.yv12_running_avg[LAST_FRAME]); ++ ++ vp8_yv12_extend_frame_borders( ++ &cpi->denoiser.yv12_running_avg[LAST_FRAME]); ++ ++ for (i = 2; i < MAX_REF_FRAMES - 1; i++) ++ vp8_yv12_copy_frame( ++ &cpi->denoiser.yv12_running_avg[LAST_FRAME], ++ &cpi->denoiser.yv12_running_avg[i]); ++ } ++ else /* For non key frames */ ++ { ++ vp8_yv12_extend_frame_borders( ++ &cpi->denoiser.yv12_running_avg[INTRA_FRAME]); ++ ++ if (cm->refresh_alt_ref_frame || cm->copy_buffer_to_arf) ++ { ++ vp8_yv12_copy_frame( ++ &cpi->denoiser.yv12_running_avg[INTRA_FRAME], ++ &cpi->denoiser.yv12_running_avg[ALTREF_FRAME]); ++ } ++ if (cm->refresh_golden_frame || cm->copy_buffer_to_gf) ++ { ++ vp8_yv12_copy_frame( ++ &cpi->denoiser.yv12_running_avg[INTRA_FRAME], ++ &cpi->denoiser.yv12_running_avg[GOLDEN_FRAME]); ++ } ++ if(cm->refresh_last_frame) ++ { ++ vp8_yv12_copy_frame( ++ &cpi->denoiser.yv12_running_avg[INTRA_FRAME], ++ &cpi->denoiser.yv12_running_avg[LAST_FRAME]); ++ } ++ } ++ ++ } ++#endif ++ + } + + void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) + { ++ const FRAME_TYPE frame_type = cm->frame_type; ++ + if (cm->no_lpf) + { + cm->filter_level = 0; +@@ -3130,6 +3205,11 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) + else + vp8cx_pick_filter_level(cpi->Source, cpi); + ++ if (cm->filter_level > 0) ++ { ++ vp8cx_set_alt_lf_level(cpi, cm->filter_level); ++ } ++ + vpx_usec_timer_mark(&timer); + cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); + } +@@ -3141,17 +3221,11 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) + + if (cm->filter_level > 0) + { +- vp8cx_set_alt_lf_level(cpi, cm->filter_level); +- vp8_loop_filter_frame(cm, &cpi->mb.e_mbd); ++ vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, frame_type); + } + + vp8_yv12_extend_frame_borders(cm->frame_to_show); +-#if CONFIG_TEMPORAL_DENOISING +- if (cpi->oxcf.noise_sensitivity) +- { +- vp8_yv12_extend_frame_borders(&cpi->denoiser.yv12_running_avg); +- } +-#endif ++ + } + + static void encode_frame_to_data_rate +@@ -3184,13 +3258,14 @@ static void encode_frame_to_data_rate + int undershoot_seen = 0; + #endif + +- int drop_mark = cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100; ++ int drop_mark = (int)(cpi->oxcf.drop_frames_water_mark * ++ cpi->oxcf.optimal_buffer_level / 100); + int drop_mark75 = drop_mark * 2 / 3; + int drop_mark50 = drop_mark / 4; + int drop_mark25 = drop_mark / 8; + + +- // Clear down mmx registers to allow floating point in what follows ++ /* Clear down mmx registers to allow floating point in what follows */ + vp8_clear_system_state(); + + #if CONFIG_MULTITHREAD +@@ -3202,108 +3277,125 @@ static void encode_frame_to_data_rate + } + #endif + +- // Test code for segmentation of gf/arf (0,0) +- //segmentation_test_function( cpi); +- + if(cpi->force_next_frame_intra) + { + cm->frame_type = KEY_FRAME; /* delayed intra frame */ + cpi->force_next_frame_intra = 0; + } + +- // For an alt ref frame in 2 pass we skip the call to the second pass function that sets the target bandwidth ++ /* For an alt ref frame in 2 pass we skip the call to the second pass ++ * function that sets the target bandwidth ++ */ + #if !(CONFIG_REALTIME_ONLY) + + if (cpi->pass == 2) + { + if (cpi->common.refresh_alt_ref_frame) + { +- cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame +- cpi->target_bandwidth = cpi->twopass.gf_bits * cpi->output_frame_rate; // per second target bitrate ++ /* Per frame bit target for the alt ref frame */ ++ cpi->per_frame_bandwidth = cpi->twopass.gf_bits; ++ /* per second target bitrate */ ++ cpi->target_bandwidth = (int)(cpi->twopass.gf_bits * ++ cpi->output_frame_rate); + } + } + else + #endif + cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_frame_rate); + +- // Default turn off buffer to buffer copying ++ /* Default turn off buffer to buffer copying */ + cm->copy_buffer_to_gf = 0; + cm->copy_buffer_to_arf = 0; + +- // Clear zbin over-quant value and mode boost values. +- cpi->zbin_over_quant = 0; +- cpi->zbin_mode_boost = 0; ++ /* Clear zbin over-quant value and mode boost values. */ ++ cpi->mb.zbin_over_quant = 0; ++ cpi->mb.zbin_mode_boost = 0; + +- // Enable or disable mode based tweaking of the zbin +- // For 2 Pass Only used where GF/ARF prediction quality +- // is above a threshold +- cpi->zbin_mode_boost_enabled = 1; ++ /* Enable or disable mode based tweaking of the zbin ++ * For 2 Pass Only used where GF/ARF prediction quality ++ * is above a threshold ++ */ ++ cpi->mb.zbin_mode_boost_enabled = 1; + if (cpi->pass == 2) + { + if ( cpi->gfu_boost <= 400 ) + { +- cpi->zbin_mode_boost_enabled = 0; ++ cpi->mb.zbin_mode_boost_enabled = 0; + } + } + +- // Current default encoder behaviour for the altref sign bias ++ /* Current default encoder behaviour for the altref sign bias */ + if (cpi->source_alt_ref_active) + cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1; + else + cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 0; + +- // Check to see if a key frame is signalled +- // For two pass with auto key frame enabled cm->frame_type may already be set, but not for one pass. ++ /* Check to see if a key frame is signalled ++ * For two pass with auto key frame enabled cm->frame_type may already ++ * be set, but not for one pass. ++ */ + if ((cm->current_video_frame == 0) || + (cm->frame_flags & FRAMEFLAGS_KEY) || + (cpi->oxcf.auto_key && (cpi->frames_since_key % cpi->key_frame_frequency == 0))) + { +- // Key frame from VFW/auto-keyframe/first frame ++ /* Key frame from VFW/auto-keyframe/first frame */ + cm->frame_type = KEY_FRAME; + } + +- // Set default state for segment and mode based loop filter update flags +- cpi->mb.e_mbd.update_mb_segmentation_map = 0; +- cpi->mb.e_mbd.update_mb_segmentation_data = 0; +- cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; ++#if CONFIG_MULTI_RES_ENCODING ++ /* In multi-resolution encoding, frame_type is decided by lowest-resolution ++ * encoder. Same frame_type is adopted while encoding at other resolution. ++ */ ++ if (cpi->oxcf.mr_encoder_id) ++ { ++ LOWER_RES_FRAME_INFO* low_res_frame_info ++ = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info; ++ ++ cm->frame_type = low_res_frame_info->frame_type; + +- // Set various flags etc to special state if it is a key frame ++ if(cm->frame_type != KEY_FRAME) ++ { ++ cpi->mr_low_res_mv_avail = 1; ++ cpi->mr_low_res_mv_avail &= !(low_res_frame_info->is_frame_dropped); ++ ++ if (cpi->ref_frame_flags & VP8_LAST_FRAME) ++ cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[LAST_FRAME] ++ == low_res_frame_info->low_res_ref_frames[LAST_FRAME]); ++ ++ if (cpi->ref_frame_flags & VP8_GOLD_FRAME) ++ cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[GOLDEN_FRAME] ++ == low_res_frame_info->low_res_ref_frames[GOLDEN_FRAME]); ++ ++ if (cpi->ref_frame_flags & VP8_ALTR_FRAME) ++ cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[ALTREF_FRAME] ++ == low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]); ++ } ++ } ++#endif ++ ++ /* Set various flags etc to special state if it is a key frame */ + if (cm->frame_type == KEY_FRAME) + { + int i; + +- // Reset the loop filter deltas and segmentation map ++ // Set the loop filter deltas and segmentation map update + setup_features(cpi); + +- // If segmentation is enabled force a map update for key frames +- if (cpi->mb.e_mbd.segmentation_enabled) +- { +- cpi->mb.e_mbd.update_mb_segmentation_map = 1; +- cpi->mb.e_mbd.update_mb_segmentation_data = 1; +- } +- +- // The alternate reference frame cannot be active for a key frame ++ /* The alternate reference frame cannot be active for a key frame */ + cpi->source_alt_ref_active = 0; + +- // Reset the RD threshold multipliers to default of * 1 (128) ++ /* Reset the RD threshold multipliers to default of * 1 (128) */ + for (i = 0; i < MAX_MODES; i++) + { +- cpi->rd_thresh_mult[i] = 128; ++ cpi->mb.rd_thresh_mult[i] = 128; + } + } + +- // Test code for segmentation +- //if ( (cm->frame_type == KEY_FRAME) || ((cm->current_video_frame % 2) == 0)) +- //if ( (cm->current_video_frame % 2) == 0 ) +- // enable_segmentation(cpi); +- //else +- // disable_segmentation(cpi); +- + #if 0 +- // Experimental code for lagged compress and one pass +- // Initialise one_pass GF frames stats +- // Update stats used for GF selection +- //if ( cpi->pass == 0 ) ++ /* Experimental code for lagged compress and one pass ++ * Initialise one_pass GF frames stats ++ * Update stats used for GF selection ++ */ + { + cpi->one_pass_frame_index = cm->current_video_frame % MAX_LAG_BUFFERS; + +@@ -3323,8 +3415,9 @@ static void encode_frame_to_data_rate + + if (cpi->drop_frames_allowed) + { +- // The reset to decimation 0 is only done here for one pass. +- // Once it is set two pass leaves decimation on till the next kf. ++ /* The reset to decimation 0 is only done here for one pass. ++ * Once it is set two pass leaves decimation on till the next kf. ++ */ + if ((cpi->buffer_level > drop_mark) && (cpi->decimation_factor > 0)) + cpi->decimation_factor --; + +@@ -3343,14 +3436,17 @@ static void encode_frame_to_data_rate + { + cpi->decimation_factor = 1; + } +- //vpx_log("Encoder: Decimation Factor: %d \n",cpi->decimation_factor); + } + +- // The following decimates the frame rate according to a regular pattern (i.e. to 1/2 or 2/3 frame rate) +- // This can be used to help prevent buffer under-run in CBR mode. Alternatively it might be desirable in +- // some situations to drop frame rate but throw more bits at each frame. +- // +- // Note that dropping a key frame can be problematic if spatial resampling is also active ++ /* The following decimates the frame rate according to a regular ++ * pattern (i.e. to 1/2 or 2/3 frame rate) This can be used to help ++ * prevent buffer under-run in CBR mode. Alternatively it might be ++ * desirable in some situations to drop frame rate but throw more bits ++ * at each frame. ++ * ++ * Note that dropping a key frame can be problematic if spatial ++ * resampling is also active ++ */ + if (cpi->decimation_factor > 0) + { + switch (cpi->decimation_factor) +@@ -3366,8 +3462,10 @@ static void encode_frame_to_data_rate + break; + } + +- // Note that we should not throw out a key frame (especially when spatial resampling is enabled). +- if ((cm->frame_type == KEY_FRAME)) // && cpi->oxcf.allow_spatial_resampling ) ++ /* Note that we should not throw out a key frame (especially when ++ * spatial resampling is enabled). ++ */ ++ if ((cm->frame_type == KEY_FRAME)) + { + cpi->decimation_count = cpi->decimation_factor; + } +@@ -3379,6 +3477,10 @@ static void encode_frame_to_data_rate + if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) + cpi->bits_off_target = cpi->oxcf.maximum_buffer_size; + ++#if CONFIG_MULTI_RES_ENCODING ++ vp8_store_drop_frame_info(cpi); ++#endif ++ + cm->current_video_frame++; + cpi->frames_since_key++; + +@@ -3392,7 +3494,9 @@ static void encode_frame_to_data_rate + { + unsigned int i; + +- // Propagate bits saved by dropping the frame to higher layers ++ /* Propagate bits saved by dropping the frame to higher ++ * layers ++ */ + for (i=cpi->current_layer+1; ioxcf.number_of_layers; i++) + { + LAYER_CONTEXT *lc = &cpi->layer_context[i]; +@@ -3408,24 +3512,32 @@ static void encode_frame_to_data_rate + else + cpi->decimation_count = cpi->decimation_factor; + } ++ else ++ cpi->decimation_count = 0; + +- // Decide how big to make the frame ++ /* Decide how big to make the frame */ + if (!vp8_pick_frame_size(cpi)) + { ++ /*TODO: 2 drop_frame and return code could be put together. */ ++#if CONFIG_MULTI_RES_ENCODING ++ vp8_store_drop_frame_info(cpi); ++#endif + cm->current_video_frame++; + cpi->frames_since_key++; + return; + } + +- // Reduce active_worst_allowed_q for CBR if our buffer is getting too full. +- // This has a knock on effect on active best quality as well. +- // For CBR if the buffer reaches its maximum level then we can no longer +- // save up bits for later frames so we might as well use them up +- // on the current frame. ++ /* Reduce active_worst_allowed_q for CBR if our buffer is getting too full. ++ * This has a knock on effect on active best quality as well. ++ * For CBR if the buffer reaches its maximum level then we can no longer ++ * save up bits for later frames so we might as well use them up ++ * on the current frame. ++ */ + if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && + (cpi->buffer_level >= cpi->oxcf.optimal_buffer_level) && cpi->buffered_mode) + { +- int Adjustment = cpi->active_worst_quality / 4; // Max adjustment is 1/4 ++ /* Max adjustment is 1/4 */ ++ int Adjustment = cpi->active_worst_quality / 4; + + if (Adjustment) + { +@@ -3433,10 +3545,16 @@ static void encode_frame_to_data_rate + + if (cpi->buffer_level < cpi->oxcf.maximum_buffer_size) + { +- buff_lvl_step = (cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level) / Adjustment; ++ buff_lvl_step = (int) ++ ((cpi->oxcf.maximum_buffer_size - ++ cpi->oxcf.optimal_buffer_level) / ++ Adjustment); + + if (buff_lvl_step) +- Adjustment = (cpi->buffer_level - cpi->oxcf.optimal_buffer_level) / buff_lvl_step; ++ Adjustment = (int) ++ ((cpi->buffer_level - ++ cpi->oxcf.optimal_buffer_level) / ++ buff_lvl_step); + else + Adjustment = 0; + } +@@ -3448,8 +3566,9 @@ static void encode_frame_to_data_rate + } + } + +- // Set an active best quality and if necessary active worst quality +- // There is some odd behavior for one pass here that needs attention. ++ /* Set an active best quality and if necessary active worst quality ++ * There is some odd behavior for one pass here that needs attention. ++ */ + if ( (cpi->pass == 2) || (cpi->ni_frames > 150)) + { + vp8_clear_system_state(); +@@ -3465,9 +3584,10 @@ static void encode_frame_to_data_rate + else + cpi->active_best_quality = kf_high_motion_minq[Q]; + +- // Special case for key frames forced because we have reached +- // the maximum key frame interval. Here force the Q to a range +- // based on the ambient Q to reduce the risk of popping ++ /* Special case for key frames forced because we have reached ++ * the maximum key frame interval. Here force the Q to a range ++ * based on the ambient Q to reduce the risk of popping ++ */ + if ( cpi->this_key_frame_forced ) + { + if ( cpi->active_best_quality > cpi->avg_frame_qindex * 7/8) +@@ -3476,7 +3596,7 @@ static void encode_frame_to_data_rate + cpi->active_best_quality = cpi->avg_frame_qindex >> 2; + } + } +- // One pass more conservative ++ /* One pass more conservative */ + else + cpi->active_best_quality = kf_high_motion_minq[Q]; + } +@@ -3484,16 +3604,17 @@ static void encode_frame_to_data_rate + else if (cpi->oxcf.number_of_layers==1 && + (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame)) + { +- // Use the lower of cpi->active_worst_quality and recent +- // average Q as basis for GF/ARF Q limit unless last frame was +- // a key frame. ++ /* Use the lower of cpi->active_worst_quality and recent ++ * average Q as basis for GF/ARF Q limit unless last frame was ++ * a key frame. ++ */ + if ( (cpi->frames_since_key > 1) && + (cpi->avg_frame_qindex < cpi->active_worst_quality) ) + { + Q = cpi->avg_frame_qindex; + } + +- // For constrained quality dont allow Q less than the cq level ++ /* For constrained quality dont allow Q less than the cq level */ + if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && + (Q < cpi->cq_target_quality) ) + { +@@ -3509,14 +3630,14 @@ static void encode_frame_to_data_rate + else + cpi->active_best_quality = gf_mid_motion_minq[Q]; + +- // Constrained quality use slightly lower active best. ++ /* Constrained quality use slightly lower active best. */ + if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY ) + { + cpi->active_best_quality = + cpi->active_best_quality * 15/16; + } + } +- // One pass more conservative ++ /* One pass more conservative */ + else + cpi->active_best_quality = gf_high_motion_minq[Q]; + } +@@ -3524,14 +3645,16 @@ static void encode_frame_to_data_rate + { + cpi->active_best_quality = inter_minq[Q]; + +- // For the constant/constrained quality mode we dont want +- // q to fall below the cq level. ++ /* For the constant/constrained quality mode we dont want ++ * q to fall below the cq level. ++ */ + if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && + (cpi->active_best_quality < cpi->cq_target_quality) ) + { +- // If we are strongly undershooting the target rate in the last +- // frames then use the user passed in cq value not the auto +- // cq value. ++ /* If we are strongly undershooting the target rate in the last ++ * frames then use the user passed in cq value not the auto ++ * cq value. ++ */ + if ( cpi->rolling_actual_bits < cpi->min_frame_bandwidth ) + cpi->active_best_quality = cpi->oxcf.cq_level; + else +@@ -3539,26 +3662,33 @@ static void encode_frame_to_data_rate + } + } + +- // If CBR and the buffer is as full then it is reasonable to allow +- // higher quality on the frames to prevent bits just going to waste. ++ /* If CBR and the buffer is as full then it is reasonable to allow ++ * higher quality on the frames to prevent bits just going to waste. ++ */ + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + { +- // Note that the use of >= here elliminates the risk of a devide +- // by 0 error in the else if clause ++ /* Note that the use of >= here elliminates the risk of a devide ++ * by 0 error in the else if clause ++ */ + if (cpi->buffer_level >= cpi->oxcf.maximum_buffer_size) + cpi->active_best_quality = cpi->best_quality; + + else if (cpi->buffer_level > cpi->oxcf.optimal_buffer_level) + { +- int Fraction = ((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) * 128) / (cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level); +- int min_qadjustment = ((cpi->active_best_quality - cpi->best_quality) * Fraction) / 128; ++ int Fraction = (int) ++ (((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) * 128) ++ / (cpi->oxcf.maximum_buffer_size - ++ cpi->oxcf.optimal_buffer_level)); ++ int min_qadjustment = ((cpi->active_best_quality - ++ cpi->best_quality) * Fraction) / 128; + + cpi->active_best_quality -= min_qadjustment; + } + } + } +- // Make sure constrained quality mode limits are adhered to for the first +- // few frames of one pass encodes ++ /* Make sure constrained quality mode limits are adhered to for the first ++ * few frames of one pass encodes ++ */ + else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) + { + if ( (cm->frame_type == KEY_FRAME) || +@@ -3572,7 +3702,7 @@ static void encode_frame_to_data_rate + } + } + +- // Clip the active best and worst quality values to limits ++ /* Clip the active best and worst quality values to limits */ + if (cpi->active_worst_quality > cpi->worst_quality) + cpi->active_worst_quality = cpi->worst_quality; + +@@ -3582,14 +3712,14 @@ static void encode_frame_to_data_rate + if ( cpi->active_worst_quality < cpi->active_best_quality ) + cpi->active_worst_quality = cpi->active_best_quality; + +- // Determine initial Q to try ++ /* Determine initial Q to try */ + Q = vp8_regulate_q(cpi, cpi->this_frame_target); + + #if !(CONFIG_REALTIME_ONLY) + +- // Set highest allowed value for Zbin over quant ++ /* Set highest allowed value for Zbin over quant */ + if (cm->frame_type == KEY_FRAME) +- zbin_oq_high = 0; //ZBIN_OQ_MAX/16 ++ zbin_oq_high = 0; + else if ((cpi->oxcf.number_of_layers == 1) && ((cm->refresh_alt_ref_frame || + (cm->refresh_golden_frame && !cpi->source_alt_ref_active)))) + { +@@ -3599,15 +3729,21 @@ static void encode_frame_to_data_rate + zbin_oq_high = ZBIN_OQ_MAX; + #endif + +- // Setup background Q adjustment for error resilient mode. +- // For multi-layer encodes only enable this for the base layer. +- if (cpi->cyclic_refresh_mode_enabled && (cpi->current_layer==0)) ++ /* Setup background Q adjustment for error resilient mode. ++ * For multi-layer encodes only enable this for the base layer. ++ */ ++ if (cpi->cyclic_refresh_mode_enabled) ++ { ++ if (cpi->current_layer==0) + cyclic_background_refresh(cpi, Q, 0); ++ else ++ disable_segmentation(cpi); ++ } + + vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); + + #if !(CONFIG_REALTIME_ONLY) +- // Limit Q range for the adaptive loop. ++ /* Limit Q range for the adaptive loop. */ + bottom_index = cpi->active_best_quality; + top_index = cpi->active_worst_quality; + q_low = cpi->active_best_quality; +@@ -3652,11 +3788,11 @@ static void encode_frame_to_data_rate + + if (cm->frame_type == KEY_FRAME) + { +- vp8_de_noise(cpi->Source, cpi->Source, l , 1, 0); ++ vp8_de_noise(cm, cpi->Source, cpi->Source, l , 1, 0); + } + else + { +- vp8_de_noise(cpi->Source, cpi->Source, l , 1, 0); ++ vp8_de_noise(cm, cpi->Source, cpi->Source, l , 1, 0); + + src = cpi->Source->y_buffer; + +@@ -3675,16 +3811,11 @@ static void encode_frame_to_data_rate + + do + { +- vp8_clear_system_state(); //__asm emms; +- +- /* +- if(cpi->is_src_frame_alt_ref) +- Q = 127; +- */ ++ vp8_clear_system_state(); + + vp8_set_quantizer(cpi, Q); + +- // setup skip prob for costing in mode/mv decision ++ /* setup skip prob for costing in mode/mv decision */ + if (cpi->common.mb_no_coeff_skip) + { + cpi->prob_skip_false = cpi->base_skip_false_prob[Q]; +@@ -3728,7 +3859,9 @@ static void encode_frame_to_data_rate + */ + } + +- //as this is for cost estimate, let's make sure it does not go extreme eitehr way ++ /* as this is for cost estimate, let's make sure it does not ++ * go extreme eitehr way ++ */ + if (cpi->prob_skip_false < 5) + cpi->prob_skip_false = 5; + +@@ -3754,7 +3887,22 @@ static void encode_frame_to_data_rate + + if (cm->frame_type == KEY_FRAME) + { +- resize_key_frame(cpi); ++ if(resize_key_frame(cpi)) ++ { ++ /* If the frame size has changed, need to reset Q, quantizer, ++ * and background refresh. ++ */ ++ Q = vp8_regulate_q(cpi, cpi->this_frame_target); ++ if (cpi->cyclic_refresh_mode_enabled) ++ { ++ if (cpi->current_layer==0) ++ cyclic_background_refresh(cpi, Q, 0); ++ else ++ disable_segmentation(cpi); ++ } ++ vp8_set_quantizer(cpi, Q); ++ } ++ + vp8_setup_key_frame(cpi); + } + +@@ -3773,7 +3921,7 @@ static void encode_frame_to_data_rate + + if (cm->refresh_entropy_probs == 0) + { +- // save a copy for later refresh ++ /* save a copy for later refresh */ + vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc)); + } + +@@ -3781,61 +3929,52 @@ static void encode_frame_to_data_rate + + vp8_update_coef_probs(cpi); + +- // transform / motion compensation build reconstruction frame +- // +pack coef partitions ++ /* transform / motion compensation build reconstruction frame ++ * +pack coef partitions ++ */ + vp8_encode_frame(cpi); + + /* cpi->projected_frame_size is not needed for RT mode */ + } + #else +- // transform / motion compensation build reconstruction frame ++ /* transform / motion compensation build reconstruction frame */ + vp8_encode_frame(cpi); + + cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi); + cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0; + #endif +- vp8_clear_system_state(); //__asm emms; ++ vp8_clear_system_state(); + +- // Test to see if the stats generated for this frame indicate that we should have coded a key frame +- // (assuming that we didn't)! +- if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME) +- { +- int key_frame_decision = decide_key_frame(cpi); ++ /* Test to see if the stats generated for this frame indicate that ++ * we should have coded a key frame (assuming that we didn't)! ++ */ + +- if (cpi->compressor_speed == 2) +- { +- /* we don't do re-encoding in realtime mode +- * if key frame is decided then we force it on next frame */ +- cpi->force_next_frame_intra = key_frame_decision; +- } ++ if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME ++ && cpi->compressor_speed != 2) ++ { + #if !(CONFIG_REALTIME_ONLY) +- else if (key_frame_decision) ++ if (decide_key_frame(cpi)) + { +- // Reset all our sizing numbers and recode ++ /* Reset all our sizing numbers and recode */ + cm->frame_type = KEY_FRAME; + + vp8_pick_frame_size(cpi); + +- // Clear the Alt reference frame active flag when we have a key frame ++ /* Clear the Alt reference frame active flag when we have ++ * a key frame ++ */ + cpi->source_alt_ref_active = 0; + +- // Reset the loop filter deltas and segmentation map ++ // Set the loop filter deltas and segmentation map update + setup_features(cpi); + +- // If segmentation is enabled force a map update for key frames +- if (cpi->mb.e_mbd.segmentation_enabled) +- { +- cpi->mb.e_mbd.update_mb_segmentation_map = 1; +- cpi->mb.e_mbd.update_mb_segmentation_data = 1; +- } +- + vp8_restore_coding_context(cpi); + + Q = vp8_regulate_q(cpi, cpi->this_frame_target); + + vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); + +- // Limit Q range for the adaptive loop. ++ /* Limit Q range for the adaptive loop. */ + bottom_index = cpi->active_best_quality; + top_index = cpi->active_worst_quality; + q_low = cpi->active_best_quality; +@@ -3854,7 +3993,7 @@ static void encode_frame_to_data_rate + if (frame_over_shoot_limit == 0) + frame_over_shoot_limit = 1; + +- // Are we are overshooting and up against the limit of active max Q. ++ /* Are we are overshooting and up against the limit of active max Q. */ + if (((cpi->pass != 2) || (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) && + (Q == cpi->active_worst_quality) && + (cpi->active_worst_quality < cpi->worst_quality) && +@@ -3862,50 +4001,52 @@ static void encode_frame_to_data_rate + { + int over_size_percent = ((cpi->projected_frame_size - frame_over_shoot_limit) * 100) / frame_over_shoot_limit; + +- // If so is there any scope for relaxing it ++ /* If so is there any scope for relaxing it */ + while ((cpi->active_worst_quality < cpi->worst_quality) && (over_size_percent > 0)) + { + cpi->active_worst_quality++; +- +- over_size_percent = (int)(over_size_percent * 0.96); // Assume 1 qstep = about 4% on frame size. ++ /* Assume 1 qstep = about 4% on frame size. */ ++ over_size_percent = (int)(over_size_percent * 0.96); + } + #if !(CONFIG_REALTIME_ONLY) + top_index = cpi->active_worst_quality; + #endif +- // If we have updated the active max Q do not call vp8_update_rate_correction_factors() this loop. ++ /* If we have updated the active max Q do not call ++ * vp8_update_rate_correction_factors() this loop. ++ */ + active_worst_qchanged = 1; + } + else + active_worst_qchanged = 0; + + #if !(CONFIG_REALTIME_ONLY) +- // Special case handling for forced key frames ++ /* Special case handling for forced key frames */ + if ( (cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced ) + { + int last_q = Q; + int kf_err = vp8_calc_ss_err(cpi->Source, + &cm->yv12_fb[cm->new_fb_idx]); + +- // The key frame is not good enough ++ /* The key frame is not good enough */ + if ( kf_err > ((cpi->ambient_err * 7) >> 3) ) + { +- // Lower q_high ++ /* Lower q_high */ + q_high = (Q > q_low) ? (Q - 1) : q_low; + +- // Adjust Q ++ /* Adjust Q */ + Q = (q_high + q_low) >> 1; + } +- // The key frame is much better than the previous frame ++ /* The key frame is much better than the previous frame */ + else if ( kf_err < (cpi->ambient_err >> 1) ) + { +- // Raise q_low ++ /* Raise q_low */ + q_low = (Q < q_high) ? (Q + 1) : q_high; + +- // Adjust Q ++ /* Adjust Q */ + Q = (q_high + q_low + 1) >> 1; + } + +- // Clamp Q to upper and lower limits: ++ /* Clamp Q to upper and lower limits: */ + if (Q > q_high) + Q = q_high; + else if (Q < q_low) +@@ -3914,7 +4055,9 @@ static void encode_frame_to_data_rate + Loop = Q != last_q; + } + +- // Is the projected frame size out of range and are we allowed to attempt to recode. ++ /* Is the projected frame size out of range and are we allowed ++ * to attempt to recode. ++ */ + else if ( recode_loop_test( cpi, + frame_over_shoot_limit, frame_under_shoot_limit, + Q, top_index, bottom_index ) ) +@@ -3922,45 +4065,57 @@ static void encode_frame_to_data_rate + int last_q = Q; + int Retries = 0; + +- // Frame size out of permitted range: +- // Update correction factor & compute new Q to try... ++ /* Frame size out of permitted range. Update correction factor ++ * & compute new Q to try... ++ */ + +- // Frame is too large ++ /* Frame is too large */ + if (cpi->projected_frame_size > cpi->this_frame_target) + { +- //if ( cpi->zbin_over_quant == 0 ) +- q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value ++ /* Raise Qlow as to at least the current value */ ++ q_low = (Q < q_high) ? (Q + 1) : q_high; + +- if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low +- zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high; ++ /* If we are using over quant do the same for zbin_oq_low */ ++ if (cpi->mb.zbin_over_quant > 0) ++ zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ? ++ (cpi->mb.zbin_over_quant + 1) : zbin_oq_high; + +- //if ( undershoot_seen || (Q == MAXQ) ) + if (undershoot_seen) + { +- // Update rate_correction_factor unless cpi->active_worst_quality has changed. ++ /* Update rate_correction_factor unless ++ * cpi->active_worst_quality has changed. ++ */ + if (!active_worst_qchanged) + vp8_update_rate_correction_factors(cpi, 1); + + Q = (q_high + q_low + 1) / 2; + +- // Adjust cpi->zbin_over_quant (only allowed when Q is max) ++ /* Adjust cpi->zbin_over_quant (only allowed when Q ++ * is max) ++ */ + if (Q < MAXQ) +- cpi->zbin_over_quant = 0; ++ cpi->mb.zbin_over_quant = 0; + else + { +- zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high; +- cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2; ++ zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ? ++ (cpi->mb.zbin_over_quant + 1) : zbin_oq_high; ++ cpi->mb.zbin_over_quant = ++ (zbin_oq_high + zbin_oq_low) / 2; + } + } + else + { +- // Update rate_correction_factor unless cpi->active_worst_quality has changed. ++ /* Update rate_correction_factor unless ++ * cpi->active_worst_quality has changed. ++ */ + if (!active_worst_qchanged) + vp8_update_rate_correction_factors(cpi, 0); + + Q = vp8_regulate_q(cpi, cpi->this_frame_target); + +- while (((Q < q_low) || (cpi->zbin_over_quant < zbin_oq_low)) && (Retries < 10)) ++ while (((Q < q_low) || ++ (cpi->mb.zbin_over_quant < zbin_oq_low)) && ++ (Retries < 10)) + { + vp8_update_rate_correction_factors(cpi, 0); + Q = vp8_regulate_q(cpi, cpi->this_frame_target); +@@ -3970,47 +4125,60 @@ static void encode_frame_to_data_rate + + overshoot_seen = 1; + } +- // Frame is too small ++ /* Frame is too small */ + else + { +- if (cpi->zbin_over_quant == 0) +- q_high = (Q > q_low) ? (Q - 1) : q_low; // Lower q_high if not using over quant +- else // else lower zbin_oq_high +- zbin_oq_high = (cpi->zbin_over_quant > zbin_oq_low) ? (cpi->zbin_over_quant - 1) : zbin_oq_low; ++ if (cpi->mb.zbin_over_quant == 0) ++ /* Lower q_high if not using over quant */ ++ q_high = (Q > q_low) ? (Q - 1) : q_low; ++ else ++ /* else lower zbin_oq_high */ ++ zbin_oq_high = (cpi->mb.zbin_over_quant > zbin_oq_low) ? ++ (cpi->mb.zbin_over_quant - 1) : zbin_oq_low; + + if (overshoot_seen) + { +- // Update rate_correction_factor unless cpi->active_worst_quality has changed. ++ /* Update rate_correction_factor unless ++ * cpi->active_worst_quality has changed. ++ */ + if (!active_worst_qchanged) + vp8_update_rate_correction_factors(cpi, 1); + + Q = (q_high + q_low) / 2; + +- // Adjust cpi->zbin_over_quant (only allowed when Q is max) ++ /* Adjust cpi->zbin_over_quant (only allowed when Q ++ * is max) ++ */ + if (Q < MAXQ) +- cpi->zbin_over_quant = 0; ++ cpi->mb.zbin_over_quant = 0; + else +- cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2; ++ cpi->mb.zbin_over_quant = ++ (zbin_oq_high + zbin_oq_low) / 2; + } + else + { +- // Update rate_correction_factor unless cpi->active_worst_quality has changed. ++ /* Update rate_correction_factor unless ++ * cpi->active_worst_quality has changed. ++ */ + if (!active_worst_qchanged) + vp8_update_rate_correction_factors(cpi, 0); + + Q = vp8_regulate_q(cpi, cpi->this_frame_target); + +- // Special case reset for qlow for constrained quality. +- // This should only trigger where there is very substantial +- // undershoot on a frame and the auto cq level is above +- // the user passsed in value. ++ /* Special case reset for qlow for constrained quality. ++ * This should only trigger where there is very substantial ++ * undershoot on a frame and the auto cq level is above ++ * the user passsed in value. ++ */ + if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && + (Q < q_low) ) + { + q_low = Q; + } + +- while (((Q > q_high) || (cpi->zbin_over_quant > zbin_oq_high)) && (Retries < 10)) ++ while (((Q > q_high) || ++ (cpi->mb.zbin_over_quant > zbin_oq_high)) && ++ (Retries < 10)) + { + vp8_update_rate_correction_factors(cpi, 0); + Q = vp8_regulate_q(cpi, cpi->this_frame_target); +@@ -4021,14 +4189,16 @@ static void encode_frame_to_data_rate + undershoot_seen = 1; + } + +- // Clamp Q to upper and lower limits: ++ /* Clamp Q to upper and lower limits: */ + if (Q > q_high) + Q = q_high; + else if (Q < q_low) + Q = q_low; + +- // Clamp cpi->zbin_over_quant +- cpi->zbin_over_quant = (cpi->zbin_over_quant < zbin_oq_low) ? zbin_oq_low : (cpi->zbin_over_quant > zbin_oq_high) ? zbin_oq_high : cpi->zbin_over_quant; ++ /* Clamp cpi->zbin_over_quant */ ++ cpi->mb.zbin_over_quant = (cpi->mb.zbin_over_quant < zbin_oq_low) ? ++ zbin_oq_low : (cpi->mb.zbin_over_quant > zbin_oq_high) ? ++ zbin_oq_high : cpi->mb.zbin_over_quant; + + Loop = Q != last_q; + } +@@ -4051,30 +4221,20 @@ static void encode_frame_to_data_rate + while (Loop == 1); + + #if 0 +- // Experimental code for lagged and one pass +- // Update stats used for one pass GF selection +- { +- /* +- int frames_so_far; +- double frame_intra_error; +- double frame_coded_error; +- double frame_pcnt_inter; +- double frame_pcnt_motion; +- double frame_mvr; +- double frame_mvr_abs; +- double frame_mvc; +- double frame_mvc_abs; +- */ +- ++ /* Experimental code for lagged and one pass ++ * Update stats used for one pass GF selection ++ */ ++ { + cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_coded_error = (double)cpi->prediction_error; + cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_intra_error = (double)cpi->intra_error; + cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_pcnt_inter = (double)(100 - cpi->this_frame_percent_intra) / 100.0; + } + #endif + +- // Special case code to reduce pulsing when key frames are forced at a +- // fixed interval. Note the reconstruction error if it is the frame before +- // the force key frame ++ /* Special case code to reduce pulsing when key frames are forced at a ++ * fixed interval. Note the reconstruction error if it is the frame before ++ * the force key frame ++ */ + if ( cpi->next_key_frame_forced && (cpi->twopass.frames_to_key == 0) ) + { + cpi->ambient_err = vp8_calc_ss_err(cpi->Source, +@@ -4113,13 +4273,38 @@ static void encode_frame_to_data_rate + } + } + ++ /* Count last ref frame 0,0 usage on current encoded frame. */ ++ { ++ int mb_row; ++ int mb_col; ++ /* Point to beginning of MODE_INFO arrays. */ ++ MODE_INFO *tmp = cm->mi; ++ ++ cpi->zeromv_count = 0; ++ ++ if(cm->frame_type != KEY_FRAME) ++ { ++ for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++) ++ { ++ for (mb_col = 0; mb_col < cm->mb_cols; mb_col ++) ++ { ++ if(tmp->mbmi.mode == ZEROMV) ++ cpi->zeromv_count++; ++ tmp++; ++ } ++ tmp++; ++ } ++ } ++ } ++ + #if CONFIG_MULTI_RES_ENCODING + vp8_cal_dissimilarity(cpi); + #endif + +- // Update the GF useage maps. +- // This is done after completing the compression of a frame when all +- // modes etc. are finalized but before loop filter ++ /* Update the GF useage maps. ++ * This is done after completing the compression of a frame when all ++ * modes etc. are finalized but before loop filter ++ */ + if (cpi->oxcf.number_of_layers == 1) + vp8_update_gf_useage_maps(cpi, cm, &cpi->mb); + +@@ -4134,9 +4319,10 @@ static void encode_frame_to_data_rate + } + #endif + +- // For inter frames the current default behavior is that when +- // cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer +- // This is purely an encoder decision at present. ++ /* For inter frames the current default behavior is that when ++ * cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer ++ * This is purely an encoder decision at present. ++ */ + if (!cpi->oxcf.error_resilient_mode && cm->refresh_golden_frame) + cm->copy_buffer_to_arf = 2; + else +@@ -4147,7 +4333,8 @@ static void encode_frame_to_data_rate + #if CONFIG_MULTITHREAD + if (cpi->b_multi_threaded) + { +- sem_post(&cpi->h_event_start_lpf); /* start loopfilter in separate thread */ ++ /* start loopfilter in separate thread */ ++ sem_post(&cpi->h_event_start_lpf); + cpi->b_lpf_running = 1; + } + else +@@ -4156,7 +4343,7 @@ static void encode_frame_to_data_rate + vp8_loopfilter_frame(cpi, cm); + } + +- update_reference_frames(cm); ++ update_reference_frames(cpi); + + #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) + if (cpi->oxcf.error_resilient_mode) +@@ -4171,7 +4358,7 @@ static void encode_frame_to_data_rate + sem_wait(&cpi->h_event_end_lpf); + #endif + +- // build the bitstream ++ /* build the bitstream */ + vp8_pack_bitstream(cpi, dest, dest_end, size); + + #if CONFIG_MULTITHREAD +@@ -4187,7 +4374,7 @@ static void encode_frame_to_data_rate + * needed in motion search besides loopfilter */ + cm->last_frame_type = cm->frame_type; + +- // Update rate control heuristics ++ /* Update rate control heuristics */ + cpi->total_byte_count += (*size); + cpi->projected_frame_size = (*size) << 3; + +@@ -4208,18 +4395,21 @@ static void encode_frame_to_data_rate + vp8_adjust_key_frame_context(cpi); + } + +- // Keep a record of ambient average Q. ++ /* Keep a record of ambient average Q. */ + if (cm->frame_type != KEY_FRAME) + cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2; + +- // Keep a record from which we can calculate the average Q excluding GF updates and key frames ++ /* Keep a record from which we can calculate the average Q excluding ++ * GF updates and key frames ++ */ + if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) || + (!cm->refresh_golden_frame && !cm->refresh_alt_ref_frame))) + { + cpi->ni_frames++; + +- // Calculate the average Q for normal inter frames (not key or GFU +- // frames). ++ /* Calculate the average Q for normal inter frames (not key or GFU ++ * frames). ++ */ + if ( cpi->pass == 2 ) + { + cpi->ni_tot_qi += Q; +@@ -4227,81 +4417,62 @@ static void encode_frame_to_data_rate + } + else + { +- // Damp value for first few frames ++ /* Damp value for first few frames */ + if (cpi->ni_frames > 150 ) + { + cpi->ni_tot_qi += Q; + cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames); + } +- // For one pass, early in the clip ... average the current frame Q +- // value with the worstq entered by the user as a dampening measure ++ /* For one pass, early in the clip ... average the current frame Q ++ * value with the worstq entered by the user as a dampening measure ++ */ + else + { + cpi->ni_tot_qi += Q; + cpi->ni_av_qi = ((cpi->ni_tot_qi / cpi->ni_frames) + cpi->worst_quality + 1) / 2; + } + +- // If the average Q is higher than what was used in the last frame +- // (after going through the recode loop to keep the frame size within range) +- // then use the last frame value - 1. +- // The -1 is designed to stop Q and hence the data rate, from progressively +- // falling away during difficult sections, but at the same time reduce the number of +- // itterations around the recode loop. ++ /* If the average Q is higher than what was used in the last ++ * frame (after going through the recode loop to keep the frame ++ * size within range) then use the last frame value - 1. The -1 ++ * is designed to stop Q and hence the data rate, from ++ * progressively falling away during difficult sections, but at ++ * the same time reduce the number of itterations around the ++ * recode loop. ++ */ + if (Q > cpi->ni_av_qi) + cpi->ni_av_qi = Q - 1; + } + } + +-#if 0 +- +- // If the frame was massively oversize and we are below optimal buffer level drop next frame +- if ((cpi->drop_frames_allowed) && +- (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && +- (cpi->buffer_level < cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100) && +- (cpi->projected_frame_size > (4 * cpi->this_frame_target))) +- { +- cpi->drop_frame = 1; +- } +- +-#endif +- +- // Set the count for maximum consecutive dropped frames based upon the ratio of +- // this frame size to the target average per frame bandwidth. +- // (cpi->av_per_frame_bandwidth > 0) is just a sanity check to prevent / 0. +- if (cpi->drop_frames_allowed && (cpi->av_per_frame_bandwidth > 0)) +- { +- cpi->max_drop_count = cpi->projected_frame_size / cpi->av_per_frame_bandwidth; +- +- if (cpi->max_drop_count > cpi->max_consec_dropped_frames) +- cpi->max_drop_count = cpi->max_consec_dropped_frames; +- } +- +- // Update the buffer level variable. +- // Non-viewable frames are a special case and are treated as pure overhead. ++ /* Update the buffer level variable. */ ++ /* Non-viewable frames are a special case and are treated as pure overhead. */ + if ( !cm->show_frame ) + cpi->bits_off_target -= cpi->projected_frame_size; + else + cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size; + +- // Clip the buffer level to the maximum specified buffer size ++ /* Clip the buffer level to the maximum specified buffer size */ + if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) + cpi->bits_off_target = cpi->oxcf.maximum_buffer_size; + +- // Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass. ++ /* Rolling monitors of whether we are over or underspending used to ++ * help regulate min and Max Q in two pass. ++ */ + cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4; + cpi->rolling_actual_bits = ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4; + cpi->long_rolling_target_bits = ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32; + cpi->long_rolling_actual_bits = ((cpi->long_rolling_actual_bits * 31) + cpi->projected_frame_size + 16) / 32; + +- // Actual bits spent ++ /* Actual bits spent */ + cpi->total_actual_bits += cpi->projected_frame_size; + +- // Debug stats ++ /* Debug stats */ + cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size); + + cpi->buffer_level = cpi->bits_off_target; + +- // Propagate values to higher temporal layers ++ /* Propagate values to higher temporal layers */ + if (cpi->oxcf.number_of_layers > 1) + { + unsigned int i; +@@ -4309,12 +4480,13 @@ static void encode_frame_to_data_rate + for (i=cpi->current_layer+1; ioxcf.number_of_layers; i++) + { + LAYER_CONTEXT *lc = &cpi->layer_context[i]; +- int bits_off_for_this_layer = lc->target_bandwidth / lc->frame_rate +- - cpi->projected_frame_size; ++ int bits_off_for_this_layer = ++ (int)(lc->target_bandwidth / lc->frame_rate - ++ cpi->projected_frame_size); + + lc->bits_off_target += bits_off_for_this_layer; + +- // Clip buffer level to maximum buffer size for the layer ++ /* Clip buffer level to maximum buffer size for the layer */ + if (lc->bits_off_target > lc->maximum_buffer_size) + lc->bits_off_target = lc->maximum_buffer_size; + +@@ -4324,7 +4496,9 @@ static void encode_frame_to_data_rate + } + } + +- // Update bits left to the kf and gf groups to account for overshoot or undershoot on these frames ++ /* Update bits left to the kf and gf groups to account for overshoot ++ * or undershoot on these frames ++ */ + if (cm->frame_type == KEY_FRAME) + { + cpi->twopass.kf_group_bits += cpi->this_frame_target - cpi->projected_frame_size; +@@ -4357,7 +4531,7 @@ static void encode_frame_to_data_rate + cpi->last_skip_false_probs[0] = cpi->prob_skip_false; + cpi->last_skip_probs_q[0] = cm->base_qindex; + +- //update the baseline ++ /* update the baseline */ + cpi->base_skip_false_prob[cm->base_qindex] = cpi->prob_skip_false; + + } +@@ -4367,7 +4541,7 @@ static void encode_frame_to_data_rate + { + FILE *f = fopen("tmp.stt", "a"); + +- vp8_clear_system_state(); //__asm emms; ++ vp8_clear_system_state(); + + if (cpi->twopass.total_left_stats.coded_error != 0.0) + fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d" +@@ -4383,7 +4557,6 @@ static void encode_frame_to_data_rate + cpi->active_best_quality, cpi->active_worst_quality, + cpi->ni_av_qi, cpi->cq_target_quality, + cpi->zbin_over_quant, +- //cpi->avg_frame_qindex, cpi->zbin_over_quant, + cm->refresh_golden_frame, cm->refresh_alt_ref_frame, + cm->frame_type, cpi->gfu_boost, + cpi->twopass.est_max_qcorrection_factor, +@@ -4406,7 +4579,6 @@ static void encode_frame_to_data_rate + cpi->active_best_quality, cpi->active_worst_quality, + cpi->ni_av_qi, cpi->cq_target_quality, + cpi->zbin_over_quant, +- //cpi->avg_frame_qindex, cpi->zbin_over_quant, + cm->refresh_golden_frame, cm->refresh_alt_ref_frame, + cm->frame_type, cpi->gfu_boost, + cpi->twopass.est_max_qcorrection_factor, +@@ -4436,10 +4608,6 @@ static void encode_frame_to_data_rate + + #endif + +- // If this was a kf or Gf note the Q +- if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cm->refresh_alt_ref_frame) +- cm->last_kf_gf_q = cm->base_qindex; +- + if (cm->refresh_golden_frame == 1) + cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN; + else +@@ -4451,49 +4619,55 @@ static void encode_frame_to_data_rate + cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF; + + +- if (cm->refresh_last_frame & cm->refresh_golden_frame) // both refreshed ++ if (cm->refresh_last_frame & cm->refresh_golden_frame) ++ /* both refreshed */ + cpi->gold_is_last = 1; +- else if (cm->refresh_last_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other ++ else if (cm->refresh_last_frame ^ cm->refresh_golden_frame) ++ /* 1 refreshed but not the other */ + cpi->gold_is_last = 0; + +- if (cm->refresh_last_frame & cm->refresh_alt_ref_frame) // both refreshed ++ if (cm->refresh_last_frame & cm->refresh_alt_ref_frame) ++ /* both refreshed */ + cpi->alt_is_last = 1; +- else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame) // 1 refreshed but not the other ++ else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame) ++ /* 1 refreshed but not the other */ + cpi->alt_is_last = 0; + +- if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame) // both refreshed ++ if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame) ++ /* both refreshed */ + cpi->gold_is_alt = 1; +- else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other ++ else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame) ++ /* 1 refreshed but not the other */ + cpi->gold_is_alt = 0; + +- cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG; ++ cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME; + + if (cpi->gold_is_last) +- cpi->ref_frame_flags &= ~VP8_GOLD_FLAG; ++ cpi->ref_frame_flags &= ~VP8_GOLD_FRAME; + + if (cpi->alt_is_last) +- cpi->ref_frame_flags &= ~VP8_ALT_FLAG; ++ cpi->ref_frame_flags &= ~VP8_ALTR_FRAME; + + if (cpi->gold_is_alt) +- cpi->ref_frame_flags &= ~VP8_ALT_FLAG; ++ cpi->ref_frame_flags &= ~VP8_ALTR_FRAME; + + + if (!cpi->oxcf.error_resilient_mode) + { + if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) +- // Update the alternate reference frame stats as appropriate. ++ /* Update the alternate reference frame stats as appropriate. */ + update_alt_ref_frame_stats(cpi); + else +- // Update the Golden frame stats as appropriate. ++ /* Update the Golden frame stats as appropriate. */ + update_golden_frame_stats(cpi); + } + + if (cm->frame_type == KEY_FRAME) + { +- // Tell the caller that the frame was coded as a key frame ++ /* Tell the caller that the frame was coded as a key frame */ + *frame_flags = cm->frame_flags | FRAMEFLAGS_KEY; + +- // As this frame is a key frame the next defaults to an inter frame. ++ /* As this frame is a key frame the next defaults to an inter frame. */ + cm->frame_type = INTER_FRAME; + + cpi->last_frame_percent_intra = 100; +@@ -4505,20 +4679,24 @@ static void encode_frame_to_data_rate + cpi->last_frame_percent_intra = cpi->this_frame_percent_intra; + } + +- // Clear the one shot update flags for segmentation map and mode/ref loop filter deltas. ++ /* Clear the one shot update flags for segmentation map and mode/ref ++ * loop filter deltas. ++ */ + cpi->mb.e_mbd.update_mb_segmentation_map = 0; + cpi->mb.e_mbd.update_mb_segmentation_data = 0; + cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; + + +- // Dont increment frame counters if this was an altref buffer update not a real frame ++ /* Dont increment frame counters if this was an altref buffer update ++ * not a real frame ++ */ + if (cm->show_frame) + { + cm->current_video_frame++; + cpi->frames_since_key++; + } + +- // reset to normal state now that we are done. ++ /* reset to normal state now that we are done. */ + + + +@@ -4534,67 +4712,11 @@ static void encode_frame_to_data_rate + } + #endif + +- // DEBUG +- //vp8_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show); ++ /* DEBUG */ ++ /* vp8_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show); */ + + + } +- +- +-static void check_gf_quality(VP8_COMP *cpi) +-{ +- VP8_COMMON *cm = &cpi->common; +- int gf_active_pct = (100 * cpi->gf_active_count) / (cm->mb_rows * cm->mb_cols); +- int gf_ref_usage_pct = (cpi->count_mb_ref_frame_usage[GOLDEN_FRAME] * 100) / (cm->mb_rows * cm->mb_cols); +- int last_ref_zz_useage = (cpi->inter_zz_count * 100) / (cm->mb_rows * cm->mb_cols); +- +- // Gf refresh is not currently being signalled +- if (cpi->gf_update_recommended == 0) +- { +- if (cpi->common.frames_since_golden > 7) +- { +- // Low use of gf +- if ((gf_active_pct < 10) || ((gf_active_pct + gf_ref_usage_pct) < 15)) +- { +- // ...but last frame zero zero usage is reasonbable so a new gf might be appropriate +- if (last_ref_zz_useage >= 25) +- { +- cpi->gf_bad_count ++; +- +- if (cpi->gf_bad_count >= 8) // Check that the condition is stable +- { +- cpi->gf_update_recommended = 1; +- cpi->gf_bad_count = 0; +- } +- } +- else +- cpi->gf_bad_count = 0; // Restart count as the background is not stable enough +- } +- else +- cpi->gf_bad_count = 0; // Gf useage has picked up so reset count +- } +- } +- // If the signal is set but has not been read should we cancel it. +- else if (last_ref_zz_useage < 15) +- { +- cpi->gf_update_recommended = 0; +- cpi->gf_bad_count = 0; +- } +- +-#if 0 +- { +- FILE *f = fopen("gfneeded.stt", "a"); +- fprintf(f, "%10d %10d %10d %10d %10ld \n", +- cm->current_video_frame, +- cpi->common.frames_since_golden, +- gf_active_pct, gf_ref_usage_pct, +- cpi->gf_update_recommended); +- fclose(f); +- } +- +-#endif +-} +- + #if !(CONFIG_REALTIME_ONLY) + static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned char * dest_end, unsigned int *frame_flags) + { +@@ -4614,7 +4736,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, + } + #endif + +-//For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us. ++/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ + #if HAVE_NEON + extern void vp8_push_neon(int64_t *store); + extern void vp8_pop_neon(int64_t *store); +@@ -4721,7 +4843,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + cpi->source = NULL; + + #if !(CONFIG_REALTIME_ONLY) +- // Should we code an alternate reference frame ++ /* Should we code an alternate reference frame */ + if (cpi->oxcf.error_resilient_mode == 0 && + cpi->oxcf.play_alternate && + cpi->source_alt_ref_pending) +@@ -4742,7 +4864,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + cm->refresh_golden_frame = 0; + cm->refresh_last_frame = 0; + cm->show_frame = 0; +- cpi->source_alt_ref_pending = 0; // Clear Pending alt Ref flag. ++ /* Clear Pending alt Ref flag. */ ++ cpi->source_alt_ref_pending = 0; + cpi->is_src_frame_alt_ref = 0; + } + } +@@ -4814,7 +4937,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + cpi->last_end_time_stamp_seen = cpi->source->ts_start; + } + +- // adjust frame rates based on timestamps given ++ /* adjust frame rates based on timestamps given */ + if (cm->show_frame) + { + int64_t this_duration; +@@ -4832,9 +4955,10 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen; + last_duration = cpi->last_end_time_stamp_seen + - cpi->last_time_stamp_seen; +- // do a step update if the duration changes by 10% ++ /* do a step update if the duration changes by 10% */ + if (last_duration) +- step = ((this_duration - last_duration) * 10 / last_duration); ++ step = (int)(((this_duration - last_duration) * ++ 10 / last_duration)); + } + + if (this_duration) +@@ -4849,7 +4973,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + * frame rate. If we haven't seen 1 second yet, then average + * over the whole interval seen. + */ +- interval = cpi->source->ts_end - cpi->first_time_stamp_ever; ++ interval = (double)(cpi->source->ts_end - ++ cpi->first_time_stamp_ever); + if(interval > 10000000.0) + interval = 10000000; + +@@ -4862,9 +4987,9 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + + if (cpi->oxcf.number_of_layers > 1) + { +- int i; ++ unsigned int i; + +- // Update frame rates for each layer ++ /* Update frame rates for each layer */ + for (i=0; ioxcf.number_of_layers; i++) + { + LAYER_CONTEXT *lc = &cpi->layer_context[i]; +@@ -4886,7 +5011,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + + update_layer_contexts (cpi); + +- // Restore layer specific context & set frame rate ++ /* Restore layer specific context & set frame rate */ + layer = cpi->oxcf.layer_id[ + cm->current_video_frame % cpi->oxcf.periodicity]; + restore_layer_context (cpi, layer); +@@ -4895,12 +5020,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + + if (cpi->compressor_speed == 2) + { +- if (cpi->oxcf.number_of_layers == 1) +- check_gf_quality(cpi); + vpx_usec_timer_start(&tsctimer); + vpx_usec_timer_start(&ticktimer); + } + ++ cpi->lf_zeromv_pct = (cpi->zeromv_count * 100)/cm->MBs; ++ + #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + { + int i; +@@ -4924,11 +5049,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + } + #endif + +- // start with a 0 size frame ++ /* start with a 0 size frame */ + *size = 0; + +- // Clear down mmx registers +- vp8_clear_system_state(); //__asm emms; ++ /* Clear down mmx registers */ ++ vp8_clear_system_state(); + + cm->frame_type = INTER_FRAME; + cm->frame_flags = *frame_flags; +@@ -4937,7 +5062,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + + if (cm->refresh_alt_ref_frame) + { +- //cm->refresh_golden_frame = 1; + cm->refresh_golden_frame = 0; + cm->refresh_last_frame = 0; + } +@@ -4982,7 +5106,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + vpx_usec_timer_mark(&tsctimer); + vpx_usec_timer_mark(&ticktimer); + +- duration = vpx_usec_timer_elapsed(&ticktimer); ++ duration = (int)(vpx_usec_timer_elapsed(&ticktimer)); + duration2 = (unsigned int)((double)duration / 2); + + if (cm->frame_type != KEY_FRAME) +@@ -4995,7 +5119,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + + if (duration2) + { +- //if(*frame_flags!=1) + { + + if (cpi->avg_pick_mode_time == 0) +@@ -5012,8 +5135,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + vpx_memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc)); + } + +- // Save the contexts separately for alt ref, gold and last. +- // (TODO jbb -> Optimize this with pointers to avoid extra copies. ) ++ /* Save the contexts separately for alt ref, gold and last. */ ++ /* (TODO jbb -> Optimize this with pointers to avoid extra copies. ) */ + if(cm->refresh_alt_ref_frame) + vpx_memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc)); + +@@ -5023,12 +5146,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + if(cm->refresh_last_frame) + vpx_memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc)); + +- // if its a dropped frame honor the requests on subsequent frames ++ /* if its a dropped frame honor the requests on subsequent frames */ + if (*size > 0) + { + cpi->droppable = !frame_is_reference(cpi); + +- // return to normal state ++ /* return to normal state */ + cm->refresh_entropy_probs = 1; + cm->refresh_alt_ref_frame = 0; + cm->refresh_golden_frame = 0; +@@ -5037,7 +5160,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + + } + +- // Save layer specific state ++ /* Save layer specific state */ + if (cpi->oxcf.number_of_layers > 1) + save_layer_context (cpi); + +@@ -5062,14 +5185,14 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + + if (cpi->b_calculate_psnr) + { +- double ye,ue,ve; ++ uint64_t ye,ue,ve; + double frame_psnr; + YV12_BUFFER_CONFIG *orig = cpi->Source; + YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; + int y_samples = orig->y_height * orig->y_width ; + int uv_samples = orig->uv_height * orig->uv_width ; + int t_samples = y_samples + 2 * uv_samples; +- int64_t sq_error, sq_error2; ++ double sq_error, sq_error2; + + ye = calc_plane_error(orig->y_buffer, orig->y_stride, + recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height); +@@ -5080,13 +5203,13 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + ve = calc_plane_error(orig->v_buffer, orig->uv_stride, + recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height); + +- sq_error = ye + ue + ve; ++ sq_error = (double)(ye + ue + ve); + + frame_psnr = vp8_mse2psnr(t_samples, 255.0, sq_error); + +- cpi->total_y += vp8_mse2psnr(y_samples, 255.0, ye); +- cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, ue); +- cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, ve); ++ cpi->total_y += vp8_mse2psnr(y_samples, 255.0, (double)ye); ++ cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, (double)ue); ++ cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, (double)ve); + cpi->total_sq_error += sq_error; + cpi->total += frame_psnr; + #if CONFIG_POSTPROC +@@ -5095,7 +5218,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + double frame_psnr2, frame_ssim2 = 0; + double weight = 0; + +- vp8_deblock(cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0); ++ vp8_deblock(cm, cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0); + vp8_clear_system_state(); + + ye = calc_plane_error(orig->y_buffer, orig->y_stride, +@@ -5107,13 +5230,16 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + ve = calc_plane_error(orig->v_buffer, orig->uv_stride, + pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height); + +- sq_error2 = ye + ue + ve; ++ sq_error2 = (double)(ye + ue + ve); + + frame_psnr2 = vp8_mse2psnr(t_samples, 255.0, sq_error2); + +- cpi->totalp_y += vp8_mse2psnr(y_samples, 255.0, ye); +- cpi->totalp_u += vp8_mse2psnr(uv_samples, 255.0, ue); +- cpi->totalp_v += vp8_mse2psnr(uv_samples, 255.0, ve); ++ cpi->totalp_y += vp8_mse2psnr(y_samples, ++ 255.0, (double)ye); ++ cpi->totalp_u += vp8_mse2psnr(uv_samples, ++ 255.0, (double)ue); ++ cpi->totalp_v += vp8_mse2psnr(uv_samples, ++ 255.0, (double)ve); + cpi->total_sq_error2 += sq_error2; + cpi->totalp += frame_psnr2; + +@@ -5125,7 +5251,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + + if (cpi->oxcf.number_of_layers > 1) + { +- int i; ++ unsigned int i; + + for (i=cpi->current_layer; + ioxcf.number_of_layers; i++) +@@ -5153,7 +5279,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l + + if (cpi->oxcf.number_of_layers > 1) + { +- int i; ++ unsigned int i; + + for (i=cpi->current_layer; + ioxcf.number_of_layers; i++) +@@ -5251,7 +5377,7 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla + ret = -1; + } + +-#endif //!CONFIG_POSTPROC ++#endif + vp8_clear_system_state(); + return ret; + } +@@ -5260,29 +5386,53 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla + int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]) + { + signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; ++ int internal_delta_q[MAX_MB_SEGMENTS]; ++ const int range = 63; ++ int i; + ++ // This method is currently incompatible with the cyclic refresh method ++ if ( cpi->cyclic_refresh_mode_enabled ) ++ return -1; ++ ++ // Check number of rows and columns match + if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols) + return -1; + ++ // Range check the delta Q values and convert the external Q range values ++ // to internal ones. ++ if ( (abs(delta_q[0]) > range) || (abs(delta_q[1]) > range) || ++ (abs(delta_q[2]) > range) || (abs(delta_q[3]) > range) ) ++ return -1; ++ ++ // Range check the delta lf values ++ if ( (abs(delta_lf[0]) > range) || (abs(delta_lf[1]) > range) || ++ (abs(delta_lf[2]) > range) || (abs(delta_lf[3]) > range) ) ++ return -1; ++ + if (!map) + { + disable_segmentation(cpi); + return 0; + } + +- // Set the segmentation Map ++ // Translate the external delta q values to internal values. ++ for ( i = 0; i < MAX_MB_SEGMENTS; i++ ) ++ internal_delta_q[i] = ++ ( delta_q[i] >= 0 ) ? q_trans[delta_q[i]] : -q_trans[-delta_q[i]]; ++ ++ /* Set the segmentation Map */ + set_segmentation_map(cpi, map); + +- // Activate segmentation. ++ /* Activate segmentation. */ + enable_segmentation(cpi); + +- // Set up the quant segment data +- feature_data[MB_LVL_ALT_Q][0] = delta_q[0]; +- feature_data[MB_LVL_ALT_Q][1] = delta_q[1]; +- feature_data[MB_LVL_ALT_Q][2] = delta_q[2]; +- feature_data[MB_LVL_ALT_Q][3] = delta_q[3]; ++ /* Set up the quant segment data */ ++ feature_data[MB_LVL_ALT_Q][0] = internal_delta_q[0]; ++ feature_data[MB_LVL_ALT_Q][1] = internal_delta_q[1]; ++ feature_data[MB_LVL_ALT_Q][2] = internal_delta_q[2]; ++ feature_data[MB_LVL_ALT_Q][3] = internal_delta_q[3]; + +- // Set up the loop segment data s ++ /* Set up the loop segment data s */ + feature_data[MB_LVL_ALT_LF][0] = delta_lf[0]; + feature_data[MB_LVL_ALT_LF][1] = delta_lf[1]; + feature_data[MB_LVL_ALT_LF][2] = delta_lf[2]; +@@ -5293,8 +5443,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne + cpi->segment_encode_breakout[2] = threshold[2]; + cpi->segment_encode_breakout[3] = threshold[3]; + +- // Initialise the feature data structure +- // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1 ++ /* Initialise the feature data structure */ + set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA); + + return 0; +@@ -5316,7 +5465,6 @@ int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, uns + } + else + { +- //cpi->active_map_enabled = 0; + return -1 ; + } + } +@@ -5346,7 +5494,9 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) + unsigned char *src = source->y_buffer; + unsigned char *dst = dest->y_buffer; + +- // Loop through the Y plane raw and reconstruction data summing (square differences) ++ /* Loop through the Y plane raw and reconstruction data summing ++ * (square differences) ++ */ + for (i = 0; i < source->y_height; i += 16) + { + for (j = 0; j < source->y_width; j += 16) +diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h +index 900141b..fb8ad35 100644 +--- a/vp8/encoder/onyx_int.h ++++ b/vp8/encoder/onyx_int.h +@@ -25,6 +25,7 @@ + #include "vp8/common/threading.h" + #include "vpx_ports/mem.h" + #include "vpx/internal/vpx_codec_internal.h" ++#include "vpx/vp8.h" + #include "mcomp.h" + #include "vp8/common/findnearmv.h" + #include "lookahead.h" +@@ -32,7 +33,6 @@ + #include "vp8/encoder/denoising.h" + #endif + +-//#define SPEEDSTATS 1 + #define MIN_GF_INTERVAL 4 + #define DEFAULT_GF_INTERVAL 7 + +@@ -43,7 +43,7 @@ + #define AF_THRESH 25 + #define AF_THRESH2 100 + #define ARF_DECAY_THRESH 12 +-#define MAX_MODES 20 ++ + + #define MIN_THRESHMULT 32 + #define MAX_THRESHMULT 512 +@@ -73,7 +73,6 @@ typedef struct + int mvcosts[2][MVvals+1]; + + #ifdef MODE_STATS +- // Stats + int y_modes[5]; + int uv_modes[4]; + int b_modes[10]; +@@ -232,22 +231,22 @@ enum + + typedef struct + { +- // Layer configuration ++ /* Layer configuration */ + double frame_rate; + int target_bandwidth; + +- // Layer specific coding parameters +- int starting_buffer_level; +- int optimal_buffer_level; +- int maximum_buffer_size; +- int starting_buffer_level_in_ms; +- int optimal_buffer_level_in_ms; +- int maximum_buffer_size_in_ms; ++ /* Layer specific coding parameters */ ++ int64_t starting_buffer_level; ++ int64_t optimal_buffer_level; ++ int64_t maximum_buffer_size; ++ int64_t starting_buffer_level_in_ms; ++ int64_t optimal_buffer_level_in_ms; ++ int64_t maximum_buffer_size_in_ms; + + int avg_frame_size_for_layer; + +- int buffer_level; +- int bits_off_target; ++ int64_t buffer_level; ++ int64_t bits_off_target; + + int64_t total_actual_bits; + int total_target_vs_actual; +@@ -307,7 +306,7 @@ typedef struct VP8_COMP + + MACROBLOCK mb; + VP8_COMMON common; +- vp8_writer bc[9]; // one boolcoder for each partition ++ vp8_writer bc[9]; /* one boolcoder for each partition */ + + VP8_CONFIG oxcf; + +@@ -321,16 +320,20 @@ typedef struct VP8_COMP + YV12_BUFFER_CONFIG scaled_source; + YV12_BUFFER_CONFIG *last_frame_unscaled_source; + +- int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref +- int source_alt_ref_active; // an alt ref frame has been encoded and is usable +- +- int is_src_frame_alt_ref; // source of frame to encode is an exact copy of an alt ref frame ++ /* frame in src_buffers has been identified to be encoded as an alt ref */ ++ int source_alt_ref_pending; ++ /* an alt ref frame has been encoded and is usable */ ++ int source_alt_ref_active; ++ /* source of frame to encode is an exact copy of an alt ref frame */ ++ int is_src_frame_alt_ref; + +- int gold_is_last; // golden frame same as last frame ( short circuit gold searches) +- int alt_is_last; // Alt reference frame same as last ( short circuit altref search) +- int gold_is_alt; // don't do both alt and gold search ( just do gold). ++ /* golden frame same as last frame ( short circuit gold searches) */ ++ int gold_is_last; ++ /* Alt reference frame same as last ( short circuit altref search) */ ++ int alt_is_last; ++ /* don't do both alt and gold search ( just do gold). */ ++ int gold_is_alt; + +- //int refresh_alt_ref_frame; + YV12_BUFFER_CONFIG pick_lf_lvl_frame; + + TOKENEXTRA *tok; +@@ -342,55 +345,62 @@ typedef struct VP8_COMP + unsigned int this_key_frame_forced; + unsigned int next_key_frame_forced; + +- // Ambient reconstruction err target for force key frames ++ /* Ambient reconstruction err target for force key frames */ + int ambient_err; + + unsigned int mode_check_freq[MAX_MODES]; +- unsigned int mode_test_hit_counts[MAX_MODES]; + unsigned int mode_chosen_counts[MAX_MODES]; +- unsigned int mbs_tested_so_far; + +- int rd_thresh_mult[MAX_MODES]; + int rd_baseline_thresh[MAX_MODES]; +- int rd_threshes[MAX_MODES]; + + int RDMULT; + int RDDIV ; + + CODING_CONTEXT coding_context; + +- // Rate targetting variables +- int64_t prediction_error; ++ /* Rate targetting variables */ + int64_t last_prediction_error; +- int64_t intra_error; + int64_t last_intra_error; + + int this_frame_target; + int projected_frame_size; +- int last_q[2]; // Separate values for Intra/Inter ++ int last_q[2]; /* Separate values for Intra/Inter */ + + double rate_correction_factor; + double key_frame_rate_correction_factor; + double gf_rate_correction_factor; + +- int frames_till_gf_update_due; // Count down till next GF +- int current_gf_interval; // GF interval chosen when we coded the last GF ++ /* Count down till next GF */ ++ int frames_till_gf_update_due; ++ ++ /* GF interval chosen when we coded the last GF */ ++ int current_gf_interval; + +- int gf_overspend_bits; // Total bits overspent becasue of GF boost (cumulative) ++ /* Total bits overspent becasue of GF boost (cumulative) */ ++ int gf_overspend_bits; + +- int non_gf_bitrate_adjustment; // Used in the few frames following a GF to recover the extra bits spent in that GF ++ /* Used in the few frames following a GF to recover the extra bits ++ * spent in that GF ++ */ ++ int non_gf_bitrate_adjustment; + +- int kf_overspend_bits; // Extra bits spent on key frames that need to be recovered on inter frames +- int kf_bitrate_adjustment; // Current number of bit s to try and recover on each inter frame. ++ /* Extra bits spent on key frames that need to be recovered */ ++ int kf_overspend_bits; ++ ++ /* Current number of bit s to try and recover on each inter frame. */ ++ int kf_bitrate_adjustment; + int max_gf_interval; + int baseline_gf_interval; +- int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames ++ int active_arnr_frames; + + int64_t key_frame_count; + int prior_key_frame_distance[KEY_FRAME_CONTEXT]; +- int per_frame_bandwidth; // Current section per frame bandwidth target +- int av_per_frame_bandwidth; // Average frame size target for clip +- int min_frame_bandwidth; // Minimum allocation that should be used for any frame ++ /* Current section per frame bandwidth target */ ++ int per_frame_bandwidth; ++ /* Average frame size target for clip */ ++ int av_per_frame_bandwidth; ++ /* Minimum allocation that should be used for any frame */ ++ int min_frame_bandwidth; + int inter_frame_target; + double output_frame_rate; + int64_t last_time_stamp_seen; +@@ -402,12 +412,6 @@ typedef struct VP8_COMP + int ni_frames; + int avg_frame_qindex; + +- int zbin_over_quant; +- int zbin_mode_boost; +- int zbin_mode_boost_enabled; +- int last_zbin_over_quant; +- int last_zbin_mode_boost; +- + int64_t total_byte_count; + + int buffered_mode; +@@ -415,7 +419,7 @@ typedef struct VP8_COMP + double frame_rate; + double ref_frame_rate; + int64_t buffer_level; +- int bits_off_target; ++ int64_t bits_off_target; + + int rolling_target_bits; + int rolling_actual_bits; +@@ -424,7 +428,7 @@ typedef struct VP8_COMP + int long_rolling_actual_bits; + + int64_t total_actual_bits; +- int total_target_vs_actual; // debug stats ++ int total_target_vs_actual; /* debug stats */ + + int worst_quality; + int active_worst_quality; +@@ -433,22 +437,9 @@ typedef struct VP8_COMP + + int cq_target_quality; + +- int drop_frames_allowed; // Are we permitted to drop frames? +- int drop_frame; // Drop this frame? +- int drop_count; // How many frames have we dropped? +- int max_drop_count; // How many frames should we drop? +- int max_consec_dropped_frames; // Limit number of consecutive frames that can be dropped. +- +- +- int ymode_count [VP8_YMODES]; /* intra MB type cts this frame */ +- int uv_mode_count[VP8_UV_MODES]; /* intra MB type cts this frame */ ++ int drop_frames_allowed; /* Are we permitted to drop frames? */ ++ int drop_frame; /* Drop this frame? */ + +- unsigned int MVcount [2] [MVvals]; /* (row,col) MV cts this frame */ +- +- unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ +- +- //DECLARE_ALIGNED(16, int, coef_counts_backup [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]); //not used any more +- //save vp8_tree_probs_from_distribution result for each frame to avoid repeat calculation + vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + char update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + +@@ -462,7 +453,7 @@ typedef struct VP8_COMP + struct vpx_codec_pkt_list *output_pkt_list; + + #if 0 +- // Experimental code for lagged and one pass ++ /* Experimental code for lagged and one pass */ + ONEPASS_FRAMESTATS one_pass_frame_stats[MAX_LAG_BUFFERS]; + int one_pass_frame_index; + #endif +@@ -470,17 +461,14 @@ typedef struct VP8_COMP + int decimation_factor; + int decimation_count; + +- // for real time encoding +- int avg_encode_time; //microsecond +- int avg_pick_mode_time; //microsecond ++ /* for real time encoding */ ++ int avg_encode_time; /* microsecond */ ++ int avg_pick_mode_time; /* microsecond */ + int Speed; +- unsigned int cpu_freq; //Mhz + int compressor_speed; + +- int interquantizer; + int auto_gold; + int auto_adjust_gold_quantizer; +- int goldfreq; + int auto_worst_q; + int cpu_used; + int pass; +@@ -494,29 +482,28 @@ typedef struct VP8_COMP + int last_skip_probs_q[3]; + int recent_ref_frame_usage[MAX_REF_FRAMES]; + +- int count_mb_ref_frame_usage[MAX_REF_FRAMES]; + int this_frame_percent_intra; + int last_frame_percent_intra; + + int ref_frame_flags; + + SPEED_FEATURES sf; +- int error_bins[1024]; + +- // Data used for real time conferencing mode to help determine if it would be good to update the gf +- int inter_zz_count; +- int gf_bad_count; +- int gf_update_recommended; +- int skip_true_count; ++ /* Count ZEROMV on all reference frames. */ ++ int zeromv_count; ++ int lf_zeromv_pct; + + unsigned char *segmentation_map; +- signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment data (can be deltas or absolute values) +- int segment_encode_breakout[MAX_MB_SEGMENTS]; // segment threashold for encode breakout ++ signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; ++ int segment_encode_breakout[MAX_MB_SEGMENTS]; + + unsigned char *active_map; + unsigned int active_map_enabled; +- // Video conferencing cyclic refresh mode flags etc +- // This is a mode designed to clean up the background over time in live encoding scenarious. It uses segmentation ++ ++ /* Video conferencing cyclic refresh mode flags. This is a mode ++ * designed to clean up the background over time in live encoding ++ * scenarious. It uses segmentation. ++ */ + int cyclic_refresh_mode_enabled; + int cyclic_refresh_mode_max_mbs_perframe; + int cyclic_refresh_mode_index; +@@ -524,7 +511,7 @@ typedef struct VP8_COMP + signed char *cyclic_refresh_map; + + #if CONFIG_MULTITHREAD +- // multithread data ++ /* multithread data */ + int * mt_current_mb_col; + int mt_sync_range; + int b_multi_threaded; +@@ -538,7 +525,7 @@ typedef struct VP8_COMP + ENCODETHREAD_DATA *en_thread_data; + LPFTHREAD_DATA lpf_thread_data; + +- //events ++ /* events */ + sem_t *h_event_start_encoding; + sem_t h_event_end_encoding; + sem_t h_event_start_lpf; +@@ -549,7 +536,6 @@ typedef struct VP8_COMP + unsigned int partition_sz[MAX_PARTITIONS]; + unsigned char *partition_d[MAX_PARTITIONS]; + unsigned char *partition_d_end[MAX_PARTITIONS]; +- // end of multithread data + + + fractional_mv_step_fp *find_fractional_mv_step; +@@ -557,10 +543,10 @@ typedef struct VP8_COMP + vp8_refining_search_fn_t refining_search_sad; + vp8_diamond_search_fn_t diamond_search_sad; + vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS]; +- unsigned int time_receive_data; +- unsigned int time_compress_data; +- unsigned int time_pick_lpf; +- unsigned int time_encode_mb_row; ++ uint64_t time_receive_data; ++ uint64_t time_compress_data; ++ uint64_t time_pick_lpf; ++ uint64_t time_encode_mb_row; + + int base_skip_false_prob[128]; + +@@ -594,16 +580,16 @@ typedef struct VP8_COMP + int gf_decay_rate; + int static_scene_max_gf_interval; + int kf_bits; +- int gf_group_error_left; // Remaining error from uncoded frames in a gf group. Two pass use only +- +- // Projected total bits available for a key frame group of frames ++ /* Remaining error from uncoded frames in a gf group. */ ++ int gf_group_error_left; ++ /* Projected total bits available for a key frame group of frames */ + int64_t kf_group_bits; +- +- // Error score of frames still to be coded in kf group ++ /* Error score of frames still to be coded in kf group */ + int64_t kf_group_error_left; +- +- int gf_group_bits; // Projected Bits available for a group of frames including 1 GF or ARF +- int gf_bits; // Bits for the golden frame or ARF - 2 pass only ++ /* Projected Bits available for a group including 1 GF or ARF */ ++ int gf_group_bits; ++ /* Bits for the golden frame or ARF */ ++ int gf_bits; + int alt_extra_bits; + double est_max_qcorrection_factor; + } twopass; +@@ -641,24 +627,25 @@ typedef struct VP8_COMP + #endif + int b_calculate_psnr; + +- // Per MB activity measurement ++ /* Per MB activity measurement */ + unsigned int activity_avg; + unsigned int * mb_activity_map; +- int * mb_norm_activity_map; + +- // Record of which MBs still refer to last golden frame either +- // directly or through 0,0 ++ /* Record of which MBs still refer to last golden frame either ++ * directly or through 0,0 ++ */ + unsigned char *gf_active_flags; + int gf_active_count; + + int output_partition; + +- //Store last frame's MV info for next frame MV prediction ++ /* Store last frame's MV info for next frame MV prediction */ + int_mv *lfmv; + int *lf_ref_frame_sign_bias; + int *lf_ref_frame; + +- int force_next_frame_intra; /* force next frame to intra when kf_auto says so */ ++ /* force next frame to intra when kf_auto says so */ ++ int force_next_frame_intra; + + int droppable; + +@@ -666,7 +653,7 @@ typedef struct VP8_COMP + VP8_DENOISER denoiser; + #endif + +- // Coding layer state variables ++ /* Coding layer state variables */ + unsigned int current_layer; + LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS]; + +@@ -687,17 +674,29 @@ typedef struct VP8_COMP + #if CONFIG_MULTI_RES_ENCODING + /* Number of MBs per row at lower-resolution level */ + int mr_low_res_mb_cols; ++ /* Indicate if lower-res mv info is available */ ++ unsigned char mr_low_res_mv_avail; ++ /* The frame number of each reference frames */ ++ unsigned int current_ref_frames[MAX_REF_FRAMES]; + #endif + ++ struct rd_costs_struct ++ { ++ int mvcosts[2][MVvals+1]; ++ int mvsadcosts[2][MVfpvals+1]; ++ int mbmode_cost[2][MB_MODE_COUNT]; ++ int intra_uv_mode_cost[2][MB_MODE_COUNT]; ++ int bmode_costs[10][10][10]; ++ int inter_bmode_costs[B_MODE_COUNT]; ++ int token_costs[BLOCK_TYPES][COEF_BANDS] ++ [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; ++ } rd_costs; + } VP8_COMP; + +-void control_data_rate(VP8_COMP *cpi); +- +-void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char *dest_end, unsigned long *size); +- +-int rd_cost_intra_mb(MACROBLOCKD *x); ++void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, ++ unsigned char *dest_end, unsigned long *size); + +-void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **); ++void vp8_tokenize_mb(VP8_COMP *, MACROBLOCK *, TOKENEXTRA **); + + void vp8_set_speed_features(VP8_COMP *cpi); + +diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c +index dafb645..673de2b 100644 +--- a/vp8/encoder/pickinter.c ++++ b/vp8/encoder/pickinter.c +@@ -61,7 +61,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, + } + + +-static int get_inter_mbpred_error(MACROBLOCK *mb, ++int vp8_get_inter_mbpred_error(MACROBLOCK *mb, + const vp8_variance_fn_ptr_t *vfp, + unsigned int *sse, + int_mv this_mv) +@@ -132,7 +132,7 @@ static int pick_intra4x4block( + MACROBLOCK *x, + int ib, + B_PREDICTION_MODE *best_mode, +- unsigned int *mode_costs, ++ const int *mode_costs, + + int *bestrate, + int *bestdistortion) +@@ -141,20 +141,24 @@ static int pick_intra4x4block( + BLOCKD *b = &x->e_mbd.block[ib]; + BLOCK *be = &x->block[ib]; + int dst_stride = x->e_mbd.dst.y_stride; +- unsigned char *base_dst = x->e_mbd.dst.y_buffer; ++ unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; + B_PREDICTION_MODE mode; +- int best_rd = INT_MAX; // 1<<30 ++ int best_rd = INT_MAX; + int rate; + int distortion; + +- for (mode = B_DC_PRED; mode <= B_HE_PRED /*B_HU_PRED*/; mode++) ++ unsigned char *Above = dst - dst_stride; ++ unsigned char *yleft = dst - 1; ++ unsigned char top_left = Above[-1]; ++ ++ for (mode = B_DC_PRED; mode <= B_HE_PRED; mode++) + { + int this_rd; + + rate = mode_costs[mode]; +- vp8_intra4x4_predict +- (base_dst + b->offset, dst_stride, +- mode, b->predictor, 16); ++ ++ vp8_intra4x4_predict(Above, yleft, dst_stride, mode, ++ b->predictor, 16, top_left); + distortion = get_prediction_error(be, b); + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + +@@ -167,7 +171,7 @@ static int pick_intra4x4block( + } + } + +- b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode); ++ b->bmi.as_mode = *best_mode; + vp8_encode_intra4x4block(x, ib); + return best_rd; + } +@@ -185,7 +189,7 @@ static int pick_intra4x4mby_modes + int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; + int error; + int distortion = 0; +- unsigned int *bmode_costs; ++ const int *bmode_costs; + + intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16); + +@@ -214,8 +218,9 @@ static int pick_intra4x4mby_modes + distortion += d; + mic->bmi[i].as_mode = best_mode; + +- // Break out case where we have already exceeded best so far value +- // that was passed in ++ /* Break out case where we have already exceeded best so far value ++ * that was passed in ++ */ + if (distortion > *best_dist) + break; + } +@@ -384,15 +389,16 @@ static void pick_intra_mbuv_mode(MACROBLOCK *mb) + + } + +-static void update_mvcount(VP8_COMP *cpi, MACROBLOCKD *xd, int_mv *best_ref_mv) ++static void update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv) + { ++ MACROBLOCKD *xd = &x->e_mbd; + /* Split MV modes currently not supported when RD is nopt enabled, + * therefore, only need to modify MVcount in NEWMV mode. */ + if (xd->mode_info_context->mbmi.mode == NEWMV) + { +- cpi->MVcount[0][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.row - ++ x->MVcount[0][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.row - + best_ref_mv->as_mv.row) >> 1)]++; +- cpi->MVcount[1][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.col - ++ x->MVcount[1][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.col - + best_ref_mv->as_mv.col) >> 1)]++; + } + } +@@ -405,10 +411,9 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim, + MB_PREDICTION_MODE *parent_mode, + int_mv *parent_ref_mv, int mb_row, int mb_col) + { +- LOWER_RES_INFO* store_mode_info +- = (LOWER_RES_INFO*)cpi->oxcf.mr_low_res_mode_info; ++ LOWER_RES_MB_INFO* store_mode_info ++ = ((LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info)->mb_info; + unsigned int parent_mb_index; +- //unsigned int parent_mb_index = map_640x480_to_320x240[mb_row][mb_col]; + + /* Consider different down_sampling_factor. */ + { +@@ -440,7 +445,6 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim, + /* Consider different down_sampling_factor. + * The result can be rounded to be more precise, but it takes more time. + */ +- //int round = cpi->oxcf.mr_down_sampling_factor.den/2; + (*parent_ref_mv).as_mv.row = store_mode_info[parent_mb_index].mv.as_mv.row + *cpi->oxcf.mr_down_sampling_factor.num + /cpi->oxcf.mr_down_sampling_factor.den; +@@ -455,10 +459,18 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim, + + static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x) + { +- if (sse < x->encode_breakout) ++ MACROBLOCKD *xd = &x->e_mbd; ++ ++ unsigned int threshold = (xd->block[0].dequant[1] ++ * xd->block[0].dequant[1] >>4); ++ ++ if(threshold < x->encode_breakout) ++ threshold = x->encode_breakout; ++ ++ if (sse < threshold ) + { +- // Check u and v to make sure skip is ok +- int sse2 = 0; ++ /* Check u and v to make sure skip is ok */ ++ unsigned int sse2 = 0; + + sse2 = VP8_UVSSE(x); + +@@ -469,7 +481,8 @@ static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x) + } + } + +-static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, VP8_COMP *cpi, MACROBLOCK *x) ++static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, ++ VP8_COMP *cpi, MACROBLOCK *x, int rd_adj) + { + MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; + int_mv mv = x->e_mbd.mode_info_context->mbmi.mv; +@@ -486,16 +499,70 @@ static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, V + + if((this_mode != NEWMV) || + !(cpi->sf.half_pixel_search) || cpi->common.full_pixel==1) +- *distortion2 = get_inter_mbpred_error(x, ++ *distortion2 = vp8_get_inter_mbpred_error(x, + &cpi->fn_ptr[BLOCK_16X16], + sse, mv); + + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, *distortion2); + ++ /* Adjust rd to bias to ZEROMV */ ++ if(this_mode == ZEROMV) ++ { ++ /* Bias to ZEROMV on LAST_FRAME reference when it is available. */ ++ if ((cpi->ref_frame_flags & VP8_LAST_FRAME & ++ cpi->common.refresh_last_frame) ++ && x->e_mbd.mode_info_context->mbmi.ref_frame != LAST_FRAME) ++ rd_adj = 100; ++ ++ // rd_adj <= 100 ++ this_rd = ((int64_t)this_rd) * rd_adj / 100; ++ } ++ + check_for_encode_breakout(*sse, x); + return this_rd; + } + ++static void calculate_zeromv_rd_adjustment(VP8_COMP *cpi, MACROBLOCK *x, ++ int *rd_adjustment) ++{ ++ MODE_INFO *mic = x->e_mbd.mode_info_context; ++ int_mv mv_l, mv_a, mv_al; ++ int local_motion_check = 0; ++ ++ if (cpi->lf_zeromv_pct > 40) ++ { ++ /* left mb */ ++ mic -= 1; ++ mv_l = mic->mbmi.mv; ++ ++ if (mic->mbmi.ref_frame != INTRA_FRAME) ++ if( abs(mv_l.as_mv.row) < 8 && abs(mv_l.as_mv.col) < 8) ++ local_motion_check++; ++ ++ /* above-left mb */ ++ mic -= x->e_mbd.mode_info_stride; ++ mv_al = mic->mbmi.mv; ++ ++ if (mic->mbmi.ref_frame != INTRA_FRAME) ++ if( abs(mv_al.as_mv.row) < 8 && abs(mv_al.as_mv.col) < 8) ++ local_motion_check++; ++ ++ /* above mb */ ++ mic += 1; ++ mv_a = mic->mbmi.mv; ++ ++ if (mic->mbmi.ref_frame != INTRA_FRAME) ++ if( abs(mv_a.as_mv.row) < 8 && abs(mv_a.as_mv.col) < 8) ++ local_motion_check++; ++ ++ if (((!x->e_mbd.mb_to_top_edge || !x->e_mbd.mb_to_left_edge) ++ && local_motion_check >0) || local_motion_check >2 ) ++ *rd_adjustment = 80; ++ else if (local_motion_check > 0) ++ *rd_adjustment = 90; ++ } ++} ++ + void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + int recon_uvoffset, int *returnrate, + int *returndistortion, int *returnintra, int mb_row, +@@ -513,7 +580,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + MB_PREDICTION_MODE this_mode; + int num00; + int mdcounts[4]; +- int best_rd = INT_MAX; // 1 << 30; ++ int best_rd = INT_MAX; ++ int rd_adjustment = 100; + int best_intra_rd = INT_MAX; + int mode_index; + int rate; +@@ -523,14 +591,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + int best_mode_index = 0; + unsigned int sse = INT_MAX, best_rd_sse = INT_MAX; + #if CONFIG_TEMPORAL_DENOISING +- unsigned int zero_mv_sse = 0, best_sse = INT_MAX; ++ unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX; + #endif + + int_mv mvp; + + int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + int saddone=0; +- int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7) ++ /* search range got from mv_pred(). It uses step_param levels. (0-7) */ ++ int sr=0; + + unsigned char *plane[4][3]; + int ref_frame_map[4]; +@@ -539,12 +608,39 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + #if CONFIG_MULTI_RES_ENCODING + int dissim = INT_MAX; + int parent_ref_frame = 0; ++ int parent_ref_valid = cpi->oxcf.mr_encoder_id && cpi->mr_low_res_mv_avail; + int_mv parent_ref_mv; + MB_PREDICTION_MODE parent_mode = 0; + +- if (cpi->oxcf.mr_encoder_id) ++ if (parent_ref_valid) ++ { ++ int parent_ref_flag; ++ + get_lower_res_motion_info(cpi, xd, &dissim, &parent_ref_frame, + &parent_mode, &parent_ref_mv, mb_row, mb_col); ++ ++ /* TODO(jkoleszar): The references available (ref_frame_flags) to the ++ * lower res encoder should match those available to this encoder, but ++ * there seems to be a situation where this mismatch can happen in the ++ * case of frame dropping and temporal layers. For example, ++ * GOLD being disallowed in ref_frame_flags, but being returned as ++ * parent_ref_frame. ++ * ++ * In this event, take the conservative approach of disabling the ++ * lower res info for this MB. ++ */ ++ parent_ref_flag = 0; ++ if (parent_ref_frame == LAST_FRAME) ++ parent_ref_flag = (cpi->ref_frame_flags & VP8_LAST_FRAME); ++ else if (parent_ref_frame == GOLDEN_FRAME) ++ parent_ref_flag = (cpi->ref_frame_flags & VP8_GOLD_FRAME); ++ else if (parent_ref_frame == ALTREF_FRAME) ++ parent_ref_flag = (cpi->ref_frame_flags & VP8_ALTR_FRAME); ++ ++ //assert(!parent_ref_frame || parent_ref_flag); ++ if (parent_ref_frame && !parent_ref_flag) ++ parent_ref_valid = 0; ++ } + #endif + + mode_mv = mode_mv_sb[sign_bias]; +@@ -553,6 +649,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); + + /* Setup search priorities */ ++#if CONFIG_MULTI_RES_ENCODING ++ if (parent_ref_valid && parent_ref_frame && dissim < 8) ++ { ++ ref_frame_map[0] = -1; ++ ref_frame_map[1] = parent_ref_frame; ++ ref_frame_map[2] = -1; ++ ref_frame_map[3] = -1; ++ } else ++#endif + get_reference_search_order(cpi, ref_frame_map); + + /* Check to see if there is at least 1 valid reference frame that we need +@@ -574,22 +679,29 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + + get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset); + +- cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame ++ /* Count of the number of MBs tested so far this frame */ ++ x->mbs_tested_so_far++; + + *returnintra = INT_MAX; + x->skip = 0; + + x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; + +- // if we encode a new mv this is important +- // find the best new motion vector ++ /* If the frame has big static background and current MB is in low ++ * motion area, its mode decision is biased to ZEROMV mode. ++ */ ++ calculate_zeromv_rd_adjustment(cpi, x, &rd_adjustment); ++ ++ /* if we encode a new mv this is important ++ * find the best new motion vector ++ */ + for (mode_index = 0; mode_index < MAX_MODES; mode_index++) + { + int frame_cost; + int this_rd = INT_MAX; + int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]]; + +- if (best_rd <= cpi->rd_threshes[mode_index]) ++ if (best_rd <= x->rd_threshes[mode_index]) + continue; + + if (this_ref_frame < 0) +@@ -597,23 +709,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + + x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; + +-#if CONFIG_MULTI_RES_ENCODING +- if (cpi->oxcf.mr_encoder_id) +- { +- /* If parent MB is intra, child MB is intra. */ +- if (!parent_ref_frame && this_ref_frame) +- continue; +- +- /* If parent MB is inter, and it is unlikely there are multiple +- * objects in parent MB, we use parent ref frame as child MB's +- * ref frame. */ +- if (parent_ref_frame && dissim < 8 +- && parent_ref_frame != this_ref_frame) +- continue; +- } +-#endif +- +- // everything but intra ++ /* everything but intra */ + if (x->e_mbd.mode_info_context->mbmi.ref_frame) + { + x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; +@@ -628,7 +724,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + } + + #if CONFIG_MULTI_RES_ENCODING +- if (cpi->oxcf.mr_encoder_id) ++ if (parent_ref_valid) + { + if (vp8_mode_order[mode_index] == NEARESTMV && + mode_mv[NEARESTMV].as_int ==0) +@@ -638,7 +734,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + continue; + + if (vp8_mode_order[mode_index] == NEWMV && parent_mode == ZEROMV +- && best_ref_mv.as_int==0) //&& dissim==0 ++ && best_ref_mv.as_int==0) + continue; + else if(vp8_mode_order[mode_index] == NEWMV && dissim==0 + && best_ref_mv.as_int==parent_ref_mv.as_int) +@@ -650,22 +746,22 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + /* Check to see if the testing frequency for this mode is at its max + * If so then prevent it from being tested and increase the threshold + * for its testing */ +- if (cpi->mode_test_hit_counts[mode_index] && ++ if (x->mode_test_hit_counts[mode_index] && + (cpi->mode_check_freq[mode_index] > 1)) + { +- if (cpi->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] * +- cpi->mode_test_hit_counts[mode_index])) ++ if (x->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] * ++ x->mode_test_hit_counts[mode_index])) + { + /* Increase the threshold for coding this mode to make it less + * likely to be chosen */ +- cpi->rd_thresh_mult[mode_index] += 4; ++ x->rd_thresh_mult[mode_index] += 4; + +- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) +- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; ++ if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) ++ x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; + +- cpi->rd_threshes[mode_index] = ++ x->rd_threshes[mode_index] = + (cpi->rd_baseline_thresh[mode_index] >> 7) * +- cpi->rd_thresh_mult[mode_index]; ++ x->rd_thresh_mult[mode_index]; + continue; + } + } +@@ -673,7 +769,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + /* We have now reached the point where we are going to test the current + * mode so increment the counter for the number of times it has been + * tested */ +- cpi->mode_test_hit_counts[mode_index] ++; ++ x->mode_test_hit_counts[mode_index] ++; + + rate2 = 0; + distortion2 = 0; +@@ -728,7 +824,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + + case SPLITMV: + +- // Split MV modes currently not supported when RD is nopt enabled. ++ /* Split MV modes currently not supported when RD is not enabled. */ + break; + + case DC_PRED: +@@ -777,13 +873,22 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + + int speed_adjust = (cpi->Speed > 5) ? ((cpi->Speed >= 8)? 3 : 2) : 1; + +- // Further step/diamond searches as necessary ++ /* Further step/diamond searches as necessary */ + step_param = cpi->sf.first_step + speed_adjust; + + #if CONFIG_MULTI_RES_ENCODING +- if (cpi->oxcf.mr_encoder_id) ++ /* If lower-res drops this frame, then higher-res encoder does ++ motion search without any previous knowledge. Also, since ++ last frame motion info is not stored, then we can not ++ use improved_mv_pred. */ ++ if (cpi->oxcf.mr_encoder_id && !parent_ref_valid) ++ cpi->sf.improved_mv_pred = 0; ++ ++ if (parent_ref_valid && parent_ref_frame) + { +- // Use parent MV as predictor. Adjust search range accordingly. ++ /* Use parent MV as predictor. Adjust search range ++ * accordingly. ++ */ + mvp.as_int = parent_ref_mv.as_int; + mvp_full.as_mv.col = parent_ref_mv.as_mv.col>>3; + mvp_full.as_mv.row = parent_ref_mv.as_mv.row>>3; +@@ -808,7 +913,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + &near_sadidx[0]); + + sr += speed_adjust; +- //adjust search range according to sr from mv prediction ++ /* adjust search range according to sr from mv prediction */ + if(sr > step_param) + step_param = sr; + +@@ -823,7 +928,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + } + + #if CONFIG_MULTI_RES_ENCODING +- if (cpi->oxcf.mr_encoder_id && dissim <= 2 && ++ if (parent_ref_valid && parent_ref_frame && dissim <= 2 && + MAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row), + abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col)) <= 4) + { +@@ -860,7 +965,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + * change the behavior in lowest-resolution encoder. + * Will improve it later. + */ +- if (!cpi->oxcf.mr_encoder_id) ++ /* Set step_param to 0 to ensure large-range motion search ++ when encoder drops this frame at lower-resolution. ++ */ ++ if (!parent_ref_valid) + step_param = 0; + #endif + bestsme = vp8_hex_search(x, b, d, &mvp_full, &d->bmi.mv, +@@ -877,10 +985,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + x->mvcost, &best_ref_mv); + mode_mv[NEWMV].as_int = d->bmi.mv.as_int; + +- // Further step/diamond searches as necessary +- n = 0; +- //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; +- ++ /* Further step/diamond searches as necessary */ + n = num00; + num00 = 0; + +@@ -927,7 +1032,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + + mode_mv[NEWMV].as_int = d->bmi.mv.as_int; + +- // mv cost; ++ /* mv cost; */ + rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, + cpi->mb.mvcost, 128); + } +@@ -954,7 +1059,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + rate2 += vp8_cost_mv_ref(this_mode, mdcounts); + x->e_mbd.mode_info_context->mbmi.mv.as_int = + mode_mv[this_mode].as_int; +- this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x); ++ this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x, ++ rd_adjustment); + + break; + default: +@@ -964,31 +1070,33 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + #if CONFIG_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity) + { +- // Store for later use by denoiser. +- if (this_mode == ZEROMV && +- x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) +- { +- zero_mv_sse = sse; +- } +- +- // Store the best NEWMV in x for later use in the denoiser. +- // We are restricted to the LAST_FRAME since the denoiser only keeps +- // one filter state. +- if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && +- x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) +- { +- best_sse = sse; +- x->e_mbd.best_sse_inter_mode = NEWMV; +- x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; +- x->e_mbd.need_to_clamp_best_mvs = +- x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; +- } ++ ++ /* Store for later use by denoiser. */ ++ if (this_mode == ZEROMV && sse < zero_mv_sse ) ++ { ++ zero_mv_sse = sse; ++ x->best_zeromv_reference_frame = ++ x->e_mbd.mode_info_context->mbmi.ref_frame; ++ } ++ ++ /* Store the best NEWMV in x for later use in the denoiser. */ ++ if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && ++ sse < best_sse) ++ { ++ best_sse = sse; ++ x->best_sse_inter_mode = NEWMV; ++ x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; ++ x->need_to_clamp_best_mvs = ++ x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; ++ x->best_reference_frame = ++ x->e_mbd.mode_info_context->mbmi.ref_frame; ++ } + } + #endif + + if (this_rd < best_rd || x->skip) + { +- // Note index of best mode ++ /* Note index of best mode */ + best_mode_index = mode_index; + + *returnrate = rate2; +@@ -1001,12 +1109,12 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + /* Testing this mode gave rise to an improvement in best error + * score. Lower threshold a bit for next time + */ +- cpi->rd_thresh_mult[mode_index] = +- (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? +- cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; +- cpi->rd_threshes[mode_index] = ++ x->rd_thresh_mult[mode_index] = ++ (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? ++ x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; ++ x->rd_threshes[mode_index] = + (cpi->rd_baseline_thresh[mode_index] >> 7) * +- cpi->rd_thresh_mult[mode_index]; ++ x->rd_thresh_mult[mode_index]; + } + + /* If the mode did not help improve the best error case then raise the +@@ -1014,33 +1122,33 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + */ + else + { +- cpi->rd_thresh_mult[mode_index] += 4; ++ x->rd_thresh_mult[mode_index] += 4; + +- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) +- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; ++ if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) ++ x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; + +- cpi->rd_threshes[mode_index] = ++ x->rd_threshes[mode_index] = + (cpi->rd_baseline_thresh[mode_index] >> 7) * +- cpi->rd_thresh_mult[mode_index]; ++ x->rd_thresh_mult[mode_index]; + } + + if (x->skip) + break; + } + +- // Reduce the activation RD thresholds for the best choice mode ++ /* Reduce the activation RD thresholds for the best choice mode */ + if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) + { +- int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 3); ++ int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 3); + +- cpi->rd_thresh_mult[best_mode_index] = +- (cpi->rd_thresh_mult[best_mode_index] ++ x->rd_thresh_mult[best_mode_index] = ++ (x->rd_thresh_mult[best_mode_index] + >= (MIN_THRESHMULT + best_adjustment)) ? +- cpi->rd_thresh_mult[best_mode_index] - best_adjustment : ++ x->rd_thresh_mult[best_mode_index] - best_adjustment : + MIN_THRESHMULT; +- cpi->rd_threshes[best_mode_index] = ++ x->rd_threshes[best_mode_index] = + (cpi->rd_baseline_thresh[best_mode_index] >> 7) * +- cpi->rd_thresh_mult[best_mode_index]; ++ x->rd_thresh_mult[best_mode_index]; + } + + +@@ -1052,43 +1160,54 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + this_rdbin = 1023; + } + +- cpi->error_bins[this_rdbin] ++; ++ x->error_bins[this_rdbin] ++; + } + + #if CONFIG_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity) + { +- if (x->e_mbd.best_sse_inter_mode == DC_PRED) { +- // No best MV found. +- x->e_mbd.best_sse_inter_mode = best_mbmode.mode; +- x->e_mbd.best_sse_mv = best_mbmode.mv; +- x->e_mbd.need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs; +- best_sse = best_rd_sse; +- } +- vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, +- recon_yoffset, recon_uvoffset); +- +- // Reevaluate ZEROMV after denoising. +- if (best_mbmode.ref_frame == INTRA_FRAME) +- { +- int this_rd = 0; +- rate2 = 0; +- distortion2 = 0; +- x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME; +- rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; +- this_mode = ZEROMV; +- rate2 += vp8_cost_mv_ref(this_mode, mdcounts); +- x->e_mbd.mode_info_context->mbmi.mode = this_mode; +- x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; +- x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; +- this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x); ++ if (x->best_sse_inter_mode == DC_PRED) ++ { ++ /* No best MV found. */ ++ x->best_sse_inter_mode = best_mbmode.mode; ++ x->best_sse_mv = best_mbmode.mv; ++ x->need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs; ++ x->best_reference_frame = best_mbmode.ref_frame; ++ best_sse = best_rd_sse; ++ } ++ vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, ++ recon_yoffset, recon_uvoffset); + +- if (this_rd < best_rd || x->skip) ++ ++ /* Reevaluate ZEROMV after denoising. */ ++ if (best_mbmode.ref_frame == INTRA_FRAME && ++ x->best_zeromv_reference_frame != INTRA_FRAME) + { +- vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, +- sizeof(MB_MODE_INFO)); ++ int this_rd = 0; ++ int this_ref_frame = x->best_zeromv_reference_frame; ++ rate2 = x->ref_frame_cost[this_ref_frame] + ++ vp8_cost_mv_ref(ZEROMV, mdcounts); ++ distortion2 = 0; ++ ++ /* set up the proper prediction buffers for the frame */ ++ x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; ++ x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; ++ x->e_mbd.pre.u_buffer = plane[this_ref_frame][1]; ++ x->e_mbd.pre.v_buffer = plane[this_ref_frame][2]; ++ ++ x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; ++ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; ++ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; ++ this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x, ++ rd_adjustment); ++ ++ if (this_rd < best_rd) ++ { ++ vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, ++ sizeof(MB_MODE_INFO)); ++ } + } +- } ++ + } + #endif + +@@ -1122,11 +1241,11 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) + best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int; + +- update_mvcount(cpi, &x->e_mbd, &best_ref_mv); ++ update_mvcount(cpi, x, &best_ref_mv); + } + + +-void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) ++void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_) + { + int error4x4, error16x16 = INT_MAX; + int rate, best_rate = 0, distortion, best_sse; +diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h +index 3d83782..35011ca 100644 +--- a/vp8/encoder/pickinter.h ++++ b/vp8/encoder/pickinter.h +@@ -18,6 +18,10 @@ extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + int recon_uvoffset, int *returnrate, + int *returndistortion, int *returnintra, + int mb_row, int mb_col); +-extern void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate); ++extern void vp8_pick_intra_mode(MACROBLOCK *x, int *rate); + ++extern int vp8_get_inter_mbpred_error(MACROBLOCK *mb, ++ const vp8_variance_fn_ptr_t *vfp, ++ unsigned int *sse, ++ int_mv this_mv); + #endif +diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c +index 21af45a..4121349 100644 +--- a/vp8/encoder/picklpf.c ++++ b/vp8/encoder/picklpf.c +@@ -74,7 +74,9 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, + src += srcoffset; + dst += dstoffset; + +- // Loop through the Y plane raw and reconstruction data summing (square differences) ++ /* Loop through the Y plane raw and reconstruction data summing ++ * (square differences) ++ */ + for (i = 0; i < linestocopy; i += 16) + { + for (j = 0; j < source->y_width; j += 16) +@@ -92,7 +94,7 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, + return Total; + } + +-// Enforce a minimum filter level based upon baseline Q ++/* Enforce a minimum filter level based upon baseline Q */ + static int get_min_filter_level(VP8_COMP *cpi, int base_qindex) + { + int min_filter_level; +@@ -113,14 +115,15 @@ static int get_min_filter_level(VP8_COMP *cpi, int base_qindex) + return min_filter_level; + } + +-// Enforce a maximum filter level based upon baseline Q ++/* Enforce a maximum filter level based upon baseline Q */ + static int get_max_filter_level(VP8_COMP *cpi, int base_qindex) + { +- // PGW August 2006: Highest filter values almost always a bad idea ++ /* PGW August 2006: Highest filter values almost always a bad idea */ + +- // jbb chg: 20100118 - not so any more with this overquant stuff allow high values +- // with lots of intra coming in. +- int max_filter_level = MAX_LOOP_FILTER ;//* 3 / 4; ++ /* jbb chg: 20100118 - not so any more with this overquant stuff allow ++ * high values with lots of intra coming in. ++ */ ++ int max_filter_level = MAX_LOOP_FILTER; + (void)base_qindex; + + if (cpi->twopass.section_intra_rating > 8) +@@ -155,7 +158,9 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + cm->last_sharpness_level = cm->sharpness_level; + } + +- // Start the search at the previous frame filter level unless it is now out of range. ++ /* Start the search at the previous frame filter level unless it is ++ * now out of range. ++ */ + if (cm->filter_level < min_filter_level) + cm->filter_level = min_filter_level; + else if (cm->filter_level > max_filter_level) +@@ -164,7 +169,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + filt_val = cm->filter_level; + best_filt_val = filt_val; + +- // Get the err using the previous frame's filter value. ++ /* Get the err using the previous frame's filter value. */ + + /* Copy the unfiltered / processed recon buffer to the new buffer */ + vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show); +@@ -174,17 +179,17 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + + filt_val -= 1 + (filt_val > 10); + +- // Search lower filter levels ++ /* Search lower filter levels */ + while (filt_val >= min_filter_level) + { +- // Apply the loop filter ++ /* Apply the loop filter */ + vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show); + vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); + +- // Get the err for filtered frame ++ /* Get the err for filtered frame */ + filt_err = calc_partial_ssl_err(sd, cm->frame_to_show); + +- // Update the best case record or exit loop. ++ /* Update the best case record or exit loop. */ + if (filt_err < best_err) + { + best_err = filt_err; +@@ -193,32 +198,34 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + else + break; + +- // Adjust filter level ++ /* Adjust filter level */ + filt_val -= 1 + (filt_val > 10); + } + +- // Search up (note that we have already done filt_val = cm->filter_level) ++ /* Search up (note that we have already done filt_val = cm->filter_level) */ + filt_val = cm->filter_level + 1 + (filt_val > 10); + + if (best_filt_val == cm->filter_level) + { +- // Resist raising filter level for very small gains ++ /* Resist raising filter level for very small gains */ + best_err -= (best_err >> 10); + + while (filt_val < max_filter_level) + { +- // Apply the loop filter ++ /* Apply the loop filter */ + vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show); + + vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); + +- // Get the err for filtered frame ++ /* Get the err for filtered frame */ + filt_err = calc_partial_ssl_err(sd, cm->frame_to_show); + +- // Update the best case record or exit loop. ++ /* Update the best case record or exit loop. */ + if (filt_err < best_err) + { +- // Do not raise filter level if improvement is < 1 part in 4096 ++ /* Do not raise filter level if improvement is < 1 part ++ * in 4096 ++ */ + best_err = filt_err - (filt_err >> 10); + + best_filt_val = filt_val; +@@ -226,7 +233,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + else + break; + +- // Adjust filter level ++ /* Adjust filter level */ + filt_val += 1 + (filt_val > 10); + } + } +@@ -243,7 +250,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + cm->frame_to_show = saved_frame; + } + +-// Stub function for now Alt LF not used ++/* Stub function for now Alt LF not used */ + void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val) + { + MACROBLOCKD *mbd = &cpi->mb.e_mbd; +@@ -266,12 +273,14 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + + int filter_step; + int filt_high = 0; +- int filt_mid = cm->filter_level; // Start search at previous frame filter level ++ /* Start search at previous frame filter level */ ++ int filt_mid = cm->filter_level; + int filt_low = 0; + int filt_best; + int filt_direction = 0; + +- int Bias = 0; // Bias against raising loop filter and in favor of lowering it ++ /* Bias against raising loop filter and in favor of lowering it */ ++ int Bias = 0; + + int ss_err[MAX_LOOP_FILTER + 1]; + +@@ -287,7 +296,9 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + else + cm->sharpness_level = cpi->oxcf.Sharpness; + +- // Start the search at the previous frame filter level unless it is now out of range. ++ /* Start the search at the previous frame filter level unless it is ++ * now out of range. ++ */ + filt_mid = cm->filter_level; + + if (filt_mid < min_filter_level) +@@ -295,10 +306,10 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + else if (filt_mid > max_filter_level) + filt_mid = max_filter_level; + +- // Define the initial step size ++ /* Define the initial step size */ + filter_step = (filt_mid < 16) ? 4 : filt_mid / 4; + +- // Get baseline error score ++ /* Get baseline error score */ + + /* Copy the unfiltered / processed recon buffer to the new buffer */ + vp8_yv12_copy_y(saved_frame, cm->frame_to_show); +@@ -314,9 +325,8 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + + while (filter_step > 0) + { +- Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; //PGW change 12/12/06 for small images ++ Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; + +- // jbb chg: 20100118 - in sections with lots of new material coming in don't bias as much to a low filter value + if (cpi->twopass.section_intra_rating < 20) + Bias = Bias * cpi->twopass.section_intra_rating / 20; + +@@ -327,7 +337,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + { + if(ss_err[filt_low] == 0) + { +- // Get Low filter error score ++ /* Get Low filter error score */ + vp8_yv12_copy_y(saved_frame, cm->frame_to_show); + vp8cx_set_alt_lf_level(cpi, filt_low); + vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low); +@@ -338,10 +348,12 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + else + filt_err = ss_err[filt_low]; + +- // If value is close to the best so far then bias towards a lower loop filter value. ++ /* If value is close to the best so far then bias towards a ++ * lower loop filter value. ++ */ + if ((filt_err - Bias) < best_err) + { +- // Was it actually better than the previous best? ++ /* Was it actually better than the previous best? */ + if (filt_err < best_err) + best_err = filt_err; + +@@ -349,7 +361,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + } + } + +- // Now look at filt_high ++ /* Now look at filt_high */ + if ((filt_direction >= 0) && (filt_high != filt_mid)) + { + if(ss_err[filt_high] == 0) +@@ -364,7 +376,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + else + filt_err = ss_err[filt_high]; + +- // Was it better than the previous best? ++ /* Was it better than the previous best? */ + if (filt_err < (best_err - Bias)) + { + best_err = filt_err; +@@ -372,7 +384,9 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) + } + } + +- // Half the step distance if the best filter value was the same as last time ++ /* Half the step distance if the best filter value was the same ++ * as last time ++ */ + if (filt_best == filt_mid) + { + filter_step = filter_step / 2; +diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c +index 5119bb8..5bb49ad 100644 +--- a/vp8/encoder/psnr.c ++++ b/vp8/encoder/psnr.c +@@ -22,7 +22,7 @@ double vp8_mse2psnr(double Samples, double Peak, double Mse) + if ((double)Mse > 0.0) + psnr = 10.0 * log10(Peak * Peak * Samples / Mse); + else +- psnr = MAX_PSNR; // Limit to prevent / 0 ++ psnr = MAX_PSNR; /* Limit to prevent / 0 */ + + if (psnr > MAX_PSNR) + psnr = MAX_PSNR; +diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c +index 766d2b2..33c8ef0 100644 +--- a/vp8/encoder/quantize.c ++++ b/vp8/encoder/quantize.c +@@ -44,21 +44,21 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) + z = coeff_ptr[rc]; + zbin = zbin_ptr[rc] ; + +- sz = (z >> 31); // sign of z +- x = (z ^ sz) - sz; // x = abs(z) ++ sz = (z >> 31); /* sign of z */ ++ x = (z ^ sz) - sz; /* x = abs(z) */ + + if (x >= zbin) + { + x += round_ptr[rc]; + y = (((x * quant_ptr[rc]) >> 16) + x) +- >> quant_shift_ptr[rc]; // quantize (x) +- x = (y ^ sz) - sz; // get the sign back +- qcoeff_ptr[rc] = x; // write to destination +- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value ++ >> quant_shift_ptr[rc]; /* quantize (x) */ ++ x = (y ^ sz) - sz; /* get the sign back */ ++ qcoeff_ptr[rc] = x; /* write to destination */ ++ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ + + if (y) + { +- eob = i; // last nonzero coeffs ++ eob = i; /* last nonzero coeffs */ + } + } + } +@@ -84,17 +84,17 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + +- sz = (z >> 31); // sign of z +- x = (z ^ sz) - sz; // x = abs(z) ++ sz = (z >> 31); /* sign of z */ ++ x = (z ^ sz) - sz; /* x = abs(z) */ + +- y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) +- x = (y ^ sz) - sz; // get the sign back +- qcoeff_ptr[rc] = x; // write to destination +- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value ++ y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */ ++ x = (y ^ sz) - sz; /* get the sign back */ ++ qcoeff_ptr[rc] = x; /* write to destination */ ++ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ + + if (y) + { +- eob = i; // last nonzero coeffs ++ eob = i; /* last nonzero coeffs */ + } + } + *d->eob = (char)(eob + 1); +@@ -132,22 +132,22 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d) + zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; + + zbin_boost_ptr ++; +- sz = (z >> 31); // sign of z +- x = (z ^ sz) - sz; // x = abs(z) ++ sz = (z >> 31); /* sign of z */ ++ x = (z ^ sz) - sz; /* x = abs(z) */ + + if (x >= zbin) + { + x += round_ptr[rc]; + y = (((x * quant_ptr[rc]) >> 16) + x) +- >> quant_shift_ptr[rc]; // quantize (x) +- x = (y ^ sz) - sz; // get the sign back +- qcoeff_ptr[rc] = x; // write to destination +- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value ++ >> quant_shift_ptr[rc]; /* quantize (x) */ ++ x = (y ^ sz) - sz; /* get the sign back */ ++ qcoeff_ptr[rc] = x; /* write to destination */ ++ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ + + if (y) + { +- eob = i; // last nonzero coeffs +- zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength ++ eob = i; /* last nonzero coeffs */ ++ zbin_boost_ptr = b->zrun_zbin_boost; /* reset zero runlength */ + } + } + } +@@ -240,26 +240,23 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d) + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + +- //if ( i == 0 ) +- // zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2; +- //else + zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; + + zbin_boost_ptr ++; +- sz = (z >> 31); // sign of z +- x = (z ^ sz) - sz; // x = abs(z) ++ sz = (z >> 31); /* sign of z */ ++ x = (z ^ sz) - sz; /* x = abs(z) */ + + if (x >= zbin) + { +- y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) +- x = (y ^ sz) - sz; // get the sign back +- qcoeff_ptr[rc] = x; // write to destination +- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value ++ y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */ ++ x = (y ^ sz) - sz; /* get the sign back */ ++ qcoeff_ptr[rc] = x; /* write to destination */ ++ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ + + if (y) + { +- eob = i; // last nonzero coeffs +- zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength ++ eob = i; /* last nonzero coeffs */ ++ zbin_boost_ptr = &b->zrun_zbin_boost[0]; /* reset zrl */ + } + } + } +@@ -441,7 +438,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) + + for (Q = 0; Q < QINDEX_RANGE; Q++) + { +- // dc values ++ /* dc values */ + quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q); + cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val; + invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0, +@@ -469,7 +466,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) + cpi->common.UVdequant[Q][0] = quant_val; + cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7; + +- // all the ac values = ; ++ /* all the ac values = ; */ + quant_val = vp8_ac_yquant(Q); + cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val; + invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1, +@@ -536,7 +533,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) + + for (Q = 0; Q < QINDEX_RANGE; Q++) + { +- // dc values ++ /* dc values */ + quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q); + cpi->Y1quant[Q][0] = (1 << 16) / quant_val; + cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; +@@ -558,7 +555,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) + cpi->common.UVdequant[Q][0] = quant_val; + cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7; + +- // all the ac values = ; ++ /* all the ac values = ; */ + for (i = 1; i < 16; i++) + { + int rc = vp8_default_zig_zag1d[i]; +@@ -590,20 +587,20 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) + + #define ZBIN_EXTRA_Y \ + (( cpi->common.Y1dequant[QIndex][1] * \ +- ( cpi->zbin_over_quant + \ +- cpi->zbin_mode_boost + \ ++ ( x->zbin_over_quant + \ ++ x->zbin_mode_boost + \ + x->act_zbin_adj ) ) >> 7) + + #define ZBIN_EXTRA_UV \ + (( cpi->common.UVdequant[QIndex][1] * \ +- ( cpi->zbin_over_quant + \ +- cpi->zbin_mode_boost + \ ++ ( x->zbin_over_quant + \ ++ x->zbin_mode_boost + \ + x->act_zbin_adj ) ) >> 7) + + #define ZBIN_EXTRA_Y2 \ + (( cpi->common.Y2dequant[QIndex][1] * \ +- ( (cpi->zbin_over_quant / 2) + \ +- cpi->zbin_mode_boost + \ ++ ( (x->zbin_over_quant / 2) + \ ++ x->zbin_mode_boost + \ + x->act_zbin_adj ) ) >> 7) + + void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) +@@ -613,18 +610,18 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) + MACROBLOCKD *xd = &x->e_mbd; + int zbin_extra; + +- // Select the baseline MB Q index. ++ /* Select the baseline MB Q index. */ + if (xd->segmentation_enabled) + { +- // Abs Value ++ /* Abs Value */ + if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA) +- + QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id]; +- // Delta Value ++ /* Delta Value */ + else + { + QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id]; +- QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; // Clamp to valid range ++ /* Clamp to valid range */ ++ QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; + } + } + else +@@ -657,13 +654,13 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) + * This will also require modifications to the x86 and neon assembly. + * */ + for (i = 0; i < 16; i++) +- x->e_mbd.block[i].dequant = xd->dequant_y1; //cpi->common.Y1dequant[QIndex]; ++ x->e_mbd.block[i].dequant = xd->dequant_y1; + for (i = 16; i < 24; i++) +- x->e_mbd.block[i].dequant = xd->dequant_uv; //cpi->common.UVdequant[QIndex]; +- x->e_mbd.block[24].dequant = xd->dequant_y2; //cpi->common.Y2dequant[QIndex]; ++ x->e_mbd.block[i].dequant = xd->dequant_uv; ++ x->e_mbd.block[24].dequant = xd->dequant_y2; + #endif + +- // Y ++ /* Y */ + zbin_extra = ZBIN_EXTRA_Y; + + for (i = 0; i < 16; i++) +@@ -677,7 +674,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) + x->block[i].zbin_extra = (short)zbin_extra; + } + +- // UV ++ /* UV */ + zbin_extra = ZBIN_EXTRA_UV; + + for (i = 16; i < 24; i++) +@@ -691,7 +688,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) + x->block[i].zbin_extra = (short)zbin_extra; + } + +- // Y2 ++ /* Y2 */ + zbin_extra = ZBIN_EXTRA_Y2; + + x->block[24].quant_fast = cpi->Y2quant_fast[QIndex]; +@@ -705,35 +702,35 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) + /* save this macroblock QIndex for vp8_update_zbin_extra() */ + x->q_index = QIndex; + +- cpi->last_zbin_over_quant = cpi->zbin_over_quant; +- cpi->last_zbin_mode_boost = cpi->zbin_mode_boost; ++ x->last_zbin_over_quant = x->zbin_over_quant; ++ x->last_zbin_mode_boost = x->zbin_mode_boost; + x->last_act_zbin_adj = x->act_zbin_adj; + + + + } +- else if(cpi->last_zbin_over_quant != cpi->zbin_over_quant +- || cpi->last_zbin_mode_boost != cpi->zbin_mode_boost ++ else if(x->last_zbin_over_quant != x->zbin_over_quant ++ || x->last_zbin_mode_boost != x->zbin_mode_boost + || x->last_act_zbin_adj != x->act_zbin_adj) + { +- // Y ++ /* Y */ + zbin_extra = ZBIN_EXTRA_Y; + + for (i = 0; i < 16; i++) + x->block[i].zbin_extra = (short)zbin_extra; + +- // UV ++ /* UV */ + zbin_extra = ZBIN_EXTRA_UV; + + for (i = 16; i < 24; i++) + x->block[i].zbin_extra = (short)zbin_extra; + +- // Y2 ++ /* Y2 */ + zbin_extra = ZBIN_EXTRA_Y2; + x->block[24].zbin_extra = (short)zbin_extra; + +- cpi->last_zbin_over_quant = cpi->zbin_over_quant; +- cpi->last_zbin_mode_boost = cpi->zbin_mode_boost; ++ x->last_zbin_over_quant = x->zbin_over_quant; ++ x->last_zbin_mode_boost = x->zbin_mode_boost; + x->last_act_zbin_adj = x->act_zbin_adj; + } + } +@@ -744,19 +741,19 @@ void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x) + int QIndex = x->q_index; + int zbin_extra; + +- // Y ++ /* Y */ + zbin_extra = ZBIN_EXTRA_Y; + + for (i = 0; i < 16; i++) + x->block[i].zbin_extra = (short)zbin_extra; + +- // UV ++ /* UV */ + zbin_extra = ZBIN_EXTRA_UV; + + for (i = 16; i < 24; i++) + x->block[i].zbin_extra = (short)zbin_extra; + +- // Y2 ++ /* Y2 */ + zbin_extra = ZBIN_EXTRA_Y2; + x->block[24].zbin_extra = (short)zbin_extra; + } +@@ -766,10 +763,10 @@ void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x) + + void vp8cx_frame_init_quantizer(VP8_COMP *cpi) + { +- // Clear Zbin mode boost for default case +- cpi->zbin_mode_boost = 0; ++ /* Clear Zbin mode boost for default case */ ++ cpi->mb.zbin_mode_boost = 0; + +- // MB level quantizer setup ++ /* MB level quantizer setup */ + vp8cx_mb_init_quantizer(cpi, &cpi->mb, 0); + } + +@@ -801,7 +798,7 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q) + cm->y2dc_delta_q = new_delta_q; + + +- // Set Segment specific quatizers ++ /* Set Segment specific quatizers */ + mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0]; + mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1]; + mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2]; +diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c +index 472e85f..a399a38 100644 +--- a/vp8/encoder/ratectrl.c ++++ b/vp8/encoder/ratectrl.c +@@ -41,15 +41,16 @@ extern int inter_uv_modes[4]; + extern int inter_b_modes[10]; + #endif + +-// Bits Per MB at different Q (Multiplied by 512) ++/* Bits Per MB at different Q (Multiplied by 512) */ + #define BPER_MB_NORMBITS 9 + +-// Work in progress recalibration of baseline rate tables based on +-// the assumption that bits per mb is inversely proportional to the +-// quantizer value. ++/* Work in progress recalibration of baseline rate tables based on ++ * the assumption that bits per mb is inversely proportional to the ++ * quantizer value. ++ */ + const int vp8_bits_per_mb[2][QINDEX_RANGE] = + { +- // Intra case 450000/Qintra ++ /* Intra case 450000/Qintra */ + { + 1125000,900000, 750000, 642857, 562500, 500000, 450000, 450000, + 409090, 375000, 346153, 321428, 300000, 281250, 264705, 264705, +@@ -68,7 +69,7 @@ const int vp8_bits_per_mb[2][QINDEX_RANGE] = + 36885, 36290, 35714, 35156, 34615, 34090, 33582, 33088, + 32608, 32142, 31468, 31034, 30405, 29801, 29220, 28662, + }, +- // Inter case 285000/Qinter ++ /* Inter case 285000/Qinter */ + { + 712500, 570000, 475000, 407142, 356250, 316666, 285000, 259090, + 237500, 219230, 203571, 190000, 178125, 167647, 158333, 150000, +@@ -109,7 +110,7 @@ static const int kf_boost_qadjustment[QINDEX_RANGE] = + 220, 220, 220, 220, 220, 220, 220, 220, + }; + +-//#define GFQ_ADJUSTMENT (Q+100) ++/* #define GFQ_ADJUSTMENT (Q+100) */ + #define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q] + const int vp8_gf_boost_qadjustment[QINDEX_RANGE] = + { +@@ -173,7 +174,7 @@ static const int kf_gf_boost_qlimits[QINDEX_RANGE] = + 600, 600, 600, 600, 600, 600, 600, 600, + }; + +-// % adjustment to target kf size based on seperation from previous frame ++/* % adjustment to target kf size based on seperation from previous frame */ + static const int kf_boost_seperation_adjustment[16] = + { + 30, 40, 50, 55, 60, 65, 70, 75, +@@ -224,10 +225,11 @@ void vp8_save_coding_context(VP8_COMP *cpi) + { + CODING_CONTEXT *const cc = & cpi->coding_context; + +- // Stores a snapshot of key state variables which can subsequently be +- // restored with a call to vp8_restore_coding_context. These functions are +- // intended for use in a re-code loop in vp8_compress_frame where the +- // quantizer value is adjusted between loop iterations. ++ /* Stores a snapshot of key state variables which can subsequently be ++ * restored with a call to vp8_restore_coding_context. These functions are ++ * intended for use in a re-code loop in vp8_compress_frame where the ++ * quantizer value is adjusted between loop iterations. ++ */ + + cc->frames_since_key = cpi->frames_since_key; + cc->filter_level = cpi->common.filter_level; +@@ -235,18 +237,16 @@ void vp8_save_coding_context(VP8_COMP *cpi) + cc->frames_since_golden = cpi->common.frames_since_golden; + + vp8_copy(cc->mvc, cpi->common.fc.mvc); +- vp8_copy(cc->mvcosts, cpi->mb.mvcosts); ++ vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts); + +- vp8_copy(cc->kf_ymode_prob, cpi->common.kf_ymode_prob); + vp8_copy(cc->ymode_prob, cpi->common.fc.ymode_prob); +- vp8_copy(cc->kf_uv_mode_prob, cpi->common.kf_uv_mode_prob); + vp8_copy(cc->uv_mode_prob, cpi->common.fc.uv_mode_prob); + +- vp8_copy(cc->ymode_count, cpi->ymode_count); +- vp8_copy(cc->uv_mode_count, cpi->uv_mode_count); ++ vp8_copy(cc->ymode_count, cpi->mb.ymode_count); ++ vp8_copy(cc->uv_mode_count, cpi->mb.uv_mode_count); + + +- // Stats ++ /* Stats */ + #ifdef MODE_STATS + vp8_copy(cc->y_modes, y_modes); + vp8_copy(cc->uv_modes, uv_modes); +@@ -264,8 +264,9 @@ void vp8_restore_coding_context(VP8_COMP *cpi) + { + CODING_CONTEXT *const cc = & cpi->coding_context; + +- // Restore key state variables to the snapshot state stored in the +- // previous call to vp8_save_coding_context. ++ /* Restore key state variables to the snapshot state stored in the ++ * previous call to vp8_save_coding_context. ++ */ + + cpi->frames_since_key = cc->frames_since_key; + cpi->common.filter_level = cc->filter_level; +@@ -274,17 +275,15 @@ void vp8_restore_coding_context(VP8_COMP *cpi) + + vp8_copy(cpi->common.fc.mvc, cc->mvc); + +- vp8_copy(cpi->mb.mvcosts, cc->mvcosts); ++ vp8_copy(cpi->rd_costs.mvcosts, cc->mvcosts); + +- vp8_copy(cpi->common.kf_ymode_prob, cc->kf_ymode_prob); + vp8_copy(cpi->common.fc.ymode_prob, cc->ymode_prob); +- vp8_copy(cpi->common.kf_uv_mode_prob, cc->kf_uv_mode_prob); + vp8_copy(cpi->common.fc.uv_mode_prob, cc->uv_mode_prob); + +- vp8_copy(cpi->ymode_count, cc->ymode_count); +- vp8_copy(cpi->uv_mode_count, cc->uv_mode_count); ++ vp8_copy(cpi->mb.ymode_count, cc->ymode_count); ++ vp8_copy(cpi->mb.uv_mode_count, cc->uv_mode_count); + +- // Stats ++ /* Stats */ + #ifdef MODE_STATS + vp8_copy(y_modes, cc->y_modes); + vp8_copy(uv_modes, cc->uv_modes); +@@ -301,36 +300,30 @@ void vp8_restore_coding_context(VP8_COMP *cpi) + + void vp8_setup_key_frame(VP8_COMP *cpi) + { +- // Setup for Key frame: ++ /* Setup for Key frame: */ + + vp8_default_coef_probs(& cpi->common); + +- +- vp8_kf_default_bmode_probs(cpi->common.kf_bmode_prob); +- + vpx_memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); + { + int flag[2] = {1, 1}; + vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag); + } + +- vpx_memset(cpi->common.fc.pre_mvc, 0, sizeof(cpi->common.fc.pre_mvc)); //initialize pre_mvc to all zero. +- +- // Make sure we initialize separate contexts for altref,gold, and normal. +- // TODO shouldn't need 3 different copies of structure to do this! ++ /* Make sure we initialize separate contexts for altref,gold, and normal. ++ * TODO shouldn't need 3 different copies of structure to do this! ++ */ + vpx_memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc)); + vpx_memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc)); + vpx_memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc)); + +- //cpi->common.filter_level = 0; // Reset every key frame. + cpi->common.filter_level = cpi->common.base_qindex * 3 / 8 ; + +- // Provisional interval before next GF ++ /* Provisional interval before next GF */ + if (cpi->auto_gold) +- //cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; + cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; + else +- cpi->frames_till_gf_update_due = cpi->goldfreq; ++ cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; + + cpi->common.refresh_golden_frame = 1; + cpi->common.refresh_alt_ref_frame = 1; +@@ -355,12 +348,12 @@ static int estimate_bits_at_q(int frame_kind, int Q, int MBs, + + static void calc_iframe_target_size(VP8_COMP *cpi) + { +- // boost defaults to half second ++ /* boost defaults to half second */ + int kf_boost; +- int target; ++ uint64_t target; + +- // Clear down mmx registers to allow floating point in what follows +- vp8_clear_system_state(); //__asm emms; ++ /* Clear down mmx registers to allow floating point in what follows */ ++ vp8_clear_system_state(); + + if (cpi->oxcf.fixed_q >= 0) + { +@@ -371,10 +364,10 @@ static void calc_iframe_target_size(VP8_COMP *cpi) + } + else if (cpi->pass == 2) + { +- // New Two pass RC ++ /* New Two pass RC */ + target = cpi->per_frame_bandwidth; + } +- // First Frame is a special case ++ /* First Frame is a special case */ + else if (cpi->common.current_video_frame == 0) + { + /* 1 Pass there is no information on which to base size so use +@@ -388,29 +381,29 @@ static void calc_iframe_target_size(VP8_COMP *cpi) + } + else + { +- // if this keyframe was forced, use a more recent Q estimate ++ /* if this keyframe was forced, use a more recent Q estimate */ + int Q = (cpi->common.frame_flags & FRAMEFLAGS_KEY) + ? cpi->avg_frame_qindex : cpi->ni_av_qi; + +- int initial_boost = 24; // Corresponds to: |2.5 * per_frame_bandwidth| +- // Boost depends somewhat on frame rate: only used for 1 layer case. ++ int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */ ++ /* Boost depends somewhat on frame rate: only used for 1 layer case. */ + if (cpi->oxcf.number_of_layers == 1) { + kf_boost = MAX(initial_boost, (int)(2 * cpi->output_frame_rate - 16)); + } + else { +- // Initial factor: set target size to: |2.5 * per_frame_bandwidth|. ++ /* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */ + kf_boost = initial_boost; + } + +- // adjustment up based on q: this factor ranges from ~1.2 to 2.2. ++ /* adjustment up based on q: this factor ranges from ~1.2 to 2.2. */ + kf_boost = kf_boost * kf_boost_qadjustment[Q] / 100; + +- // frame separation adjustment ( down) ++ /* frame separation adjustment ( down) */ + if (cpi->frames_since_key < cpi->output_frame_rate / 2) + kf_boost = (int)(kf_boost + * cpi->frames_since_key / (cpi->output_frame_rate / 2)); + +- // Minimal target size is |2* per_frame_bandwidth|. ++ /* Minimal target size is |2* per_frame_bandwidth|. */ + if (kf_boost < 16) + kf_boost = 16; + +@@ -427,10 +420,11 @@ static void calc_iframe_target_size(VP8_COMP *cpi) + target = max_rate; + } + +- cpi->this_frame_target = target; ++ cpi->this_frame_target = (int)target; + +- // TODO: if we separate rate targeting from Q targetting, move this. +- // Reset the active worst quality to the baseline value for key frames. ++ /* TODO: if we separate rate targeting from Q targetting, move this. ++ * Reset the active worst quality to the baseline value for key frames. ++ */ + if (cpi->pass != 2) + cpi->active_worst_quality = cpi->worst_quality; + +@@ -439,9 +433,6 @@ static void calc_iframe_target_size(VP8_COMP *cpi) + FILE *f; + + f = fopen("kf_boost.stt", "a"); +- //fprintf(f, " %8d %10d %10d %10d %10d %10d %10d\n", +- // cpi->common.current_video_frame, cpi->target_bandwidth, cpi->frames_to_key, kf_boost_qadjustment[cpi->ni_av_qi], cpi->kf_boost, (cpi->this_frame_target *100 / cpi->per_frame_bandwidth), cpi->this_frame_target ); +- + fprintf(f, " %8u %10d %10d %10d\n", + cpi->common.current_video_frame, cpi->gfu_boost, cpi->baseline_gf_interval, cpi->source_alt_ref_pending); + +@@ -451,14 +442,15 @@ static void calc_iframe_target_size(VP8_COMP *cpi) + } + + +-// Do the best we can to define the parameters for the next GF based on what +-// information we have available. ++/* Do the best we can to define the parameters for the next GF based on what ++ * information we have available. ++ */ + static void calc_gf_params(VP8_COMP *cpi) + { + int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; + int Boost = 0; + +- int gf_frame_useage = 0; // Golden frame useage since last GF ++ int gf_frame_useage = 0; /* Golden frame useage since last GF */ + int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME] + + cpi->recent_ref_frame_usage[LAST_FRAME] + + cpi->recent_ref_frame_usage[GOLDEN_FRAME] + +@@ -466,33 +458,30 @@ static void calc_gf_params(VP8_COMP *cpi) + + int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols); + +- // Reset the last boost indicator +- //cpi->last_boost = 100; +- + if (tot_mbs) + gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]) * 100 / tot_mbs; + + if (pct_gf_active > gf_frame_useage) + gf_frame_useage = pct_gf_active; + +- // Not two pass ++ /* Not two pass */ + if (cpi->pass != 2) + { +- // Single Pass lagged mode: TBD ++ /* Single Pass lagged mode: TBD */ + if (0) + { + } + +- // Single Pass compression: Has to use current and historical data ++ /* Single Pass compression: Has to use current and historical data */ + else + { + #if 0 +- // Experimental code ++ /* Experimental code */ + int index = cpi->one_pass_frame_index; + int frames_to_scan = (cpi->max_gf_interval <= MAX_LAG_BUFFERS) ? cpi->max_gf_interval : MAX_LAG_BUFFERS; + ++ /* ************** Experimental code - incomplete */ + /* +- // *************** Experimental code - incomplete + double decay_val = 1.0; + double IIAccumulator = 0.0; + double last_iiaccumulator = 0.0; +@@ -535,48 +524,51 @@ static void calc_gf_params(VP8_COMP *cpi) + #else + + /*************************************************************/ +- // OLD code ++ /* OLD code */ + +- // Adjust boost based upon ambient Q ++ /* Adjust boost based upon ambient Q */ + Boost = GFQ_ADJUSTMENT; + +- // Adjust based upon most recently measure intra useage ++ /* Adjust based upon most recently measure intra useage */ + Boost = Boost * gf_intra_usage_adjustment[(cpi->this_frame_percent_intra < 15) ? cpi->this_frame_percent_intra : 14] / 100; + +- // Adjust gf boost based upon GF usage since last GF ++ /* Adjust gf boost based upon GF usage since last GF */ + Boost = Boost * gf_adjust_table[gf_frame_useage] / 100; + #endif + } + +- // golden frame boost without recode loop often goes awry. be safe by keeping numbers down. ++ /* golden frame boost without recode loop often goes awry. be ++ * safe by keeping numbers down. ++ */ + if (!cpi->sf.recode_loop) + { + if (cpi->compressor_speed == 2) + Boost = Boost / 2; + } + +- // Apply an upper limit based on Q for 1 pass encodes ++ /* Apply an upper limit based on Q for 1 pass encodes */ + if (Boost > kf_gf_boost_qlimits[Q] && (cpi->pass == 0)) + Boost = kf_gf_boost_qlimits[Q]; + +- // Apply lower limits to boost. ++ /* Apply lower limits to boost. */ + else if (Boost < 110) + Boost = 110; + +- // Note the boost used ++ /* Note the boost used */ + cpi->last_boost = Boost; + + } + +- // Estimate next interval +- // This is updated once the real frame size/boost is known. ++ /* Estimate next interval ++ * This is updated once the real frame size/boost is known. ++ */ + if (cpi->oxcf.fixed_q == -1) + { +- if (cpi->pass == 2) // 2 Pass ++ if (cpi->pass == 2) /* 2 Pass */ + { + cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; + } +- else // 1 Pass ++ else /* 1 Pass */ + { + cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; + +@@ -602,10 +594,10 @@ static void calc_gf_params(VP8_COMP *cpi) + else + cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; + +- // ARF on or off ++ /* ARF on or off */ + if (cpi->pass != 2) + { +- // For now Alt ref is not allowed except in 2 pass modes. ++ /* For now Alt ref is not allowed except in 2 pass modes. */ + cpi->source_alt_ref_pending = 0; + + /*if ( cpi->oxcf.fixed_q == -1) +@@ -642,89 +634,34 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + min_frame_target = cpi->per_frame_bandwidth / 4; + + +- // Special alt reference frame case ++ /* Special alt reference frame case */ + if((cpi->common.refresh_alt_ref_frame) && (cpi->oxcf.number_of_layers == 1)) + { + if (cpi->pass == 2) + { +- cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame ++ /* Per frame bit target for the alt ref frame */ ++ cpi->per_frame_bandwidth = cpi->twopass.gf_bits; + cpi->this_frame_target = cpi->per_frame_bandwidth; + } + + /* One Pass ??? TBD */ +- /*else +- { +- int frames_in_section; +- int allocation_chunks; +- int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; +- int alt_boost; +- int max_arf_rate; +- +- alt_boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100); +- alt_boost += (cpi->frames_till_gf_update_due * 50); +- +- // If alt ref is not currently active then we have a pottential double hit with GF and ARF so reduce the boost a bit. +- // A similar thing is done on GFs that preceed a arf update. +- if ( !cpi->source_alt_ref_active ) +- alt_boost = alt_boost * 3 / 4; +- +- frames_in_section = cpi->frames_till_gf_update_due+1; // Standard frames + GF +- allocation_chunks = (frames_in_section * 100) + alt_boost; +- +- // Normalize Altboost and allocations chunck down to prevent overflow +- while ( alt_boost > 1000 ) +- { +- alt_boost /= 2; +- allocation_chunks /= 2; +- } +- +- else +- { +- int bits_in_section; +- +- if ( cpi->kf_overspend_bits > 0 ) +- { +- Adjustment = (cpi->kf_bitrate_adjustment <= cpi->kf_overspend_bits) ? cpi->kf_bitrate_adjustment : cpi->kf_overspend_bits; +- +- if ( Adjustment > (cpi->per_frame_bandwidth - min_frame_target) ) +- Adjustment = (cpi->per_frame_bandwidth - min_frame_target); +- +- cpi->kf_overspend_bits -= Adjustment; +- +- // Calculate an inter frame bandwidth target for the next few frames designed to recover +- // any extra bits spent on the key frame. +- cpi->inter_frame_target = cpi->per_frame_bandwidth - Adjustment; +- if ( cpi->inter_frame_target < min_frame_target ) +- cpi->inter_frame_target = min_frame_target; +- } +- else +- cpi->inter_frame_target = cpi->per_frame_bandwidth; +- +- bits_in_section = cpi->inter_frame_target * frames_in_section; +- +- // Avoid loss of precision but avoid overflow +- if ( (bits_in_section>>7) > allocation_chunks ) +- cpi->this_frame_target = alt_boost * (bits_in_section / allocation_chunks); +- else +- cpi->this_frame_target = (alt_boost * bits_in_section) / allocation_chunks; +- } +- } +- */ + } + +- // Normal frames (gf,and inter) ++ /* Normal frames (gf,and inter) */ + else + { +- // 2 pass ++ /* 2 pass */ + if (cpi->pass == 2) + { + cpi->this_frame_target = cpi->per_frame_bandwidth; + } +- // 1 pass ++ /* 1 pass */ + else + { +- // Make rate adjustment to recover bits spent in key frame +- // Test to see if the key frame inter data rate correction should still be in force ++ /* Make rate adjustment to recover bits spent in key frame ++ * Test to see if the key frame inter data rate correction ++ * should still be in force ++ */ + if (cpi->kf_overspend_bits > 0) + { + Adjustment = (cpi->kf_bitrate_adjustment <= cpi->kf_overspend_bits) ? cpi->kf_bitrate_adjustment : cpi->kf_overspend_bits; +@@ -734,8 +671,10 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + + cpi->kf_overspend_bits -= Adjustment; + +- // Calculate an inter frame bandwidth target for the next few frames designed to recover +- // any extra bits spent on the key frame. ++ /* Calculate an inter frame bandwidth target for the next ++ * few frames designed to recover any extra bits spent on ++ * the key frame. ++ */ + cpi->this_frame_target = cpi->per_frame_bandwidth - Adjustment; + + if (cpi->this_frame_target < min_frame_target) +@@ -744,7 +683,9 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + else + cpi->this_frame_target = cpi->per_frame_bandwidth; + +- // If appropriate make an adjustment to recover bits spent on a recent GF ++ /* If appropriate make an adjustment to recover bits spent on a ++ * recent GF ++ */ + if ((cpi->gf_overspend_bits > 0) && (cpi->this_frame_target > min_frame_target)) + { + int Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits; +@@ -756,11 +697,11 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + cpi->this_frame_target -= Adjustment; + } + +- // Apply small + and - boosts for non gf frames ++ /* Apply small + and - boosts for non gf frames */ + if ((cpi->last_boost > 150) && (cpi->frames_till_gf_update_due > 0) && + (cpi->current_gf_interval >= (MIN_GF_INTERVAL << 1))) + { +- // % Adjustment limited to the range 1% to 10% ++ /* % Adjustment limited to the range 1% to 10% */ + Adjustment = (cpi->last_boost - 100) >> 5; + + if (Adjustment < 1) +@@ -768,7 +709,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + else if (Adjustment > 10) + Adjustment = 10; + +- // Convert to bits ++ /* Convert to bits */ + Adjustment = (cpi->this_frame_target * Adjustment) / 100; + + if (Adjustment > (cpi->this_frame_target - min_frame_target)) +@@ -782,47 +723,53 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + } + } + +- // Sanity check that the total sum of adjustments is not above the maximum allowed +- // That is that having allowed for KF and GF penalties we have not pushed the +- // current interframe target to low. If the adjustment we apply here is not capable of recovering +- // all the extra bits we have spent in the KF or GF then the remainder will have to be recovered over +- // a longer time span via other buffer / rate control mechanisms. ++ /* Sanity check that the total sum of adjustments is not above the ++ * maximum allowed That is that having allowed for KF and GF penalties ++ * we have not pushed the current interframe target to low. If the ++ * adjustment we apply here is not capable of recovering all the extra ++ * bits we have spent in the KF or GF then the remainder will have to ++ * be recovered over a longer time span via other buffer / rate control ++ * mechanisms. ++ */ + if (cpi->this_frame_target < min_frame_target) + cpi->this_frame_target = min_frame_target; + + if (!cpi->common.refresh_alt_ref_frame) +- // Note the baseline target data rate for this inter frame. ++ /* Note the baseline target data rate for this inter frame. */ + cpi->inter_frame_target = cpi->this_frame_target; + +- // One Pass specific code ++ /* One Pass specific code */ + if (cpi->pass == 0) + { +- // Adapt target frame size with respect to any buffering constraints: ++ /* Adapt target frame size with respect to any buffering constraints: */ + if (cpi->buffered_mode) + { +- int one_percent_bits = 1 + cpi->oxcf.optimal_buffer_level / 100; ++ int one_percent_bits = (int) ++ (1 + cpi->oxcf.optimal_buffer_level / 100); + + if ((cpi->buffer_level < cpi->oxcf.optimal_buffer_level) || + (cpi->bits_off_target < cpi->oxcf.optimal_buffer_level)) + { + int percent_low = 0; + +- // Decide whether or not we need to adjust the frame data rate target. +- // +- // If we are are below the optimal buffer fullness level and adherence +- // to buffering constraints is important to the end usage then adjust +- // the per frame target. ++ /* Decide whether or not we need to adjust the frame data ++ * rate target. ++ * ++ * If we are are below the optimal buffer fullness level ++ * and adherence to buffering constraints is important to ++ * the end usage then adjust the per frame target. ++ */ + if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && + (cpi->buffer_level < cpi->oxcf.optimal_buffer_level)) + { +- percent_low = +- (cpi->oxcf.optimal_buffer_level - cpi->buffer_level) / +- one_percent_bits; ++ percent_low = (int) ++ ((cpi->oxcf.optimal_buffer_level - cpi->buffer_level) / ++ one_percent_bits); + } +- // Are we overshooting the long term clip data rate... ++ /* Are we overshooting the long term clip data rate... */ + else if (cpi->bits_off_target < 0) + { +- // Adjust per frame data target downwards to compensate. ++ /* Adjust per frame data target downwards to compensate. */ + percent_low = (int)(100 * -cpi->bits_off_target / + (cpi->total_byte_count * 8)); + } +@@ -832,40 +779,46 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + else if (percent_low < 0) + percent_low = 0; + +- // lower the target bandwidth for this frame. ++ /* lower the target bandwidth for this frame. */ + cpi->this_frame_target -= + (cpi->this_frame_target * percent_low) / 200; + +- // Are we using allowing control of active_worst_allowed_q +- // according to buffer level. ++ /* Are we using allowing control of active_worst_allowed_q ++ * according to buffer level. ++ */ + if (cpi->auto_worst_q && cpi->ni_frames > 150) + { +- int critical_buffer_level; +- +- // For streaming applications the most important factor is +- // cpi->buffer_level as this takes into account the +- // specified short term buffering constraints. However, +- // hitting the long term clip data rate target is also +- // important. ++ int64_t critical_buffer_level; ++ ++ /* For streaming applications the most important factor is ++ * cpi->buffer_level as this takes into account the ++ * specified short term buffering constraints. However, ++ * hitting the long term clip data rate target is also ++ * important. ++ */ + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + { +- // Take the smaller of cpi->buffer_level and +- // cpi->bits_off_target ++ /* Take the smaller of cpi->buffer_level and ++ * cpi->bits_off_target ++ */ + critical_buffer_level = + (cpi->buffer_level < cpi->bits_off_target) + ? cpi->buffer_level : cpi->bits_off_target; + } +- // For local file playback short term buffering constraints +- // are less of an issue ++ /* For local file playback short term buffering constraints ++ * are less of an issue ++ */ + else + { +- // Consider only how we are doing for the clip as a +- // whole ++ /* Consider only how we are doing for the clip as a ++ * whole ++ */ + critical_buffer_level = cpi->bits_off_target; + } + +- // Set the active worst quality based upon the selected +- // buffer fullness number. ++ /* Set the active worst quality based upon the selected ++ * buffer fullness number. ++ */ + if (critical_buffer_level < cpi->oxcf.optimal_buffer_level) + { + if ( critical_buffer_level > +@@ -877,15 +830,16 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + (critical_buffer_level - + (cpi->oxcf.optimal_buffer_level >> 2)); + +- // Step active worst quality down from +- // cpi->ni_av_qi when (critical_buffer_level == +- // cpi->optimal_buffer_level) to +- // cpi->worst_quality when +- // (critical_buffer_level == +- // cpi->optimal_buffer_level >> 2) ++ /* Step active worst quality down from ++ * cpi->ni_av_qi when (critical_buffer_level == ++ * cpi->optimal_buffer_level) to ++ * cpi->worst_quality when ++ * (critical_buffer_level == ++ * cpi->optimal_buffer_level >> 2) ++ */ + cpi->active_worst_quality = + cpi->worst_quality - +- ((qadjustment_range * above_base) / ++ (int)((qadjustment_range * above_base) / + (cpi->oxcf.optimal_buffer_level*3>>2)); + } + else +@@ -910,9 +864,9 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + && (cpi->buffer_level > cpi->oxcf.optimal_buffer_level)) + { +- percent_high = (cpi->buffer_level ++ percent_high = (int)((cpi->buffer_level + - cpi->oxcf.optimal_buffer_level) +- / one_percent_bits; ++ / one_percent_bits); + } + else if (cpi->bits_off_target > cpi->oxcf.optimal_buffer_level) + { +@@ -928,11 +882,14 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + cpi->this_frame_target += (cpi->this_frame_target * + percent_high) / 200; + +- // Are we allowing control of active_worst_allowed_q according +- // to buffer level. ++ /* Are we allowing control of active_worst_allowed_q according ++ * to buffer level. ++ */ + if (cpi->auto_worst_q && cpi->ni_frames > 150) + { +- // When using the relaxed buffer model stick to the user specified value ++ /* When using the relaxed buffer model stick to the ++ * user specified value ++ */ + cpi->active_worst_quality = cpi->ni_av_qi; + } + else +@@ -941,26 +898,27 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + } + } + +- // Set active_best_quality to prevent quality rising too high ++ /* Set active_best_quality to prevent quality rising too high */ + cpi->active_best_quality = cpi->best_quality; + +- // Worst quality obviously must not be better than best quality ++ /* Worst quality obviously must not be better than best quality */ + if (cpi->active_worst_quality <= cpi->active_best_quality) + cpi->active_worst_quality = cpi->active_best_quality + 1; + + if(cpi->active_worst_quality > 127) + cpi->active_worst_quality = 127; + } +- // Unbuffered mode (eg. video conferencing) ++ /* Unbuffered mode (eg. video conferencing) */ + else + { +- // Set the active worst quality ++ /* Set the active worst quality */ + cpi->active_worst_quality = cpi->worst_quality; + } + +- // Special trap for constrained quality mode +- // "active_worst_quality" may never drop below cq level +- // for any frame type. ++ /* Special trap for constrained quality mode ++ * "active_worst_quality" may never drop below cq level ++ * for any frame type. ++ */ + if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && + cpi->active_worst_quality < cpi->cq_target_quality) + { +@@ -968,16 +926,19 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + } + } + +- // Test to see if we have to drop a frame +- // The auto-drop frame code is only used in buffered mode. +- // In unbufferd mode (eg vide conferencing) the descision to +- // code or drop a frame is made outside the codec in response to real +- // world comms or buffer considerations. +- if (cpi->drop_frames_allowed && cpi->buffered_mode && ++ /* Test to see if we have to drop a frame ++ * The auto-drop frame code is only used in buffered mode. ++ * In unbufferd mode (eg vide conferencing) the descision to ++ * code or drop a frame is made outside the codec in response to real ++ * world comms or buffer considerations. ++ */ ++ if (cpi->drop_frames_allowed && + (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && +- ((cpi->common.frame_type != KEY_FRAME))) //|| !cpi->oxcf.allow_spatial_resampling) ) ++ ((cpi->common.frame_type != KEY_FRAME))) + { +- // Check for a buffer underun-crisis in which case we have to drop a frame ++ /* Check for a buffer underun-crisis in which case we have to drop ++ * a frame ++ */ + if ((cpi->buffer_level < 0)) + { + #if 0 +@@ -988,41 +949,23 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + (cpi->buffer_level * 100) / cpi->oxcf.optimal_buffer_level); + fclose(f); + #endif +- //vpx_log("Decoder: Drop frame due to bandwidth: %d \n",cpi->buffer_level, cpi->av_per_frame_bandwidth); +- +- cpi->drop_frame = 1; +- } +- +-#if 0 +- // Check for other drop frame crtieria (Note 2 pass cbr uses decimation on whole KF sections) +- else if ((cpi->buffer_level < cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100) && +- (cpi->drop_count < cpi->max_drop_count) && (cpi->pass == 0)) +- { + cpi->drop_frame = 1; +- } +- +-#endif + +- if (cpi->drop_frame) +- { +- // Update the buffer level variable. ++ /* Update the buffer level variable. */ + cpi->bits_off_target += cpi->av_per_frame_bandwidth; + if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) +- cpi->bits_off_target = cpi->oxcf.maximum_buffer_size; ++ cpi->bits_off_target = (int)cpi->oxcf.maximum_buffer_size; + cpi->buffer_level = cpi->bits_off_target; + } +- else +- cpi->drop_count = 0; + } + +- // Adjust target frame size for Golden Frames: ++ /* Adjust target frame size for Golden Frames: */ + if (cpi->oxcf.error_resilient_mode == 0 && + (cpi->frames_till_gf_update_due == 0) && !cpi->drop_frame) + { +- //int Boost = 0; + int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; + +- int gf_frame_useage = 0; // Golden frame useage since last GF ++ int gf_frame_useage = 0; /* Golden frame useage since last GF */ + int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME] + + cpi->recent_ref_frame_usage[LAST_FRAME] + + cpi->recent_ref_frame_usage[GOLDEN_FRAME] + +@@ -1030,30 +973,29 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + + int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols); + +- // Reset the last boost indicator +- //cpi->last_boost = 100; +- + if (tot_mbs) + gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]) * 100 / tot_mbs; + + if (pct_gf_active > gf_frame_useage) + gf_frame_useage = pct_gf_active; + +- // Is a fixed manual GF frequency being used ++ /* Is a fixed manual GF frequency being used */ + if (cpi->auto_gold) + { +- // For one pass throw a GF if recent frame intra useage is low or the GF useage is high ++ /* For one pass throw a GF if recent frame intra useage is ++ * low or the GF useage is high ++ */ + if ((cpi->pass == 0) && (cpi->this_frame_percent_intra < 15 || gf_frame_useage >= 5)) + cpi->common.refresh_golden_frame = 1; + +- // Two pass GF descision ++ /* Two pass GF descision */ + else if (cpi->pass == 2) + cpi->common.refresh_golden_frame = 1; + } + + #if 0 + +- // Debug stats ++ /* Debug stats */ + if (0) + { + FILE *f; +@@ -1070,7 +1012,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + { + #if 0 + +- if (0) // p_gw ++ if (0) + { + FILE *f; + +@@ -1086,16 +1028,20 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + calc_gf_params(cpi); + } + +- // If we are using alternate ref instead of gf then do not apply the boost +- // It will instead be applied to the altref update +- // Jims modified boost ++ /* If we are using alternate ref instead of gf then do not apply the ++ * boost It will instead be applied to the altref update Jims ++ * modified boost ++ */ + if (!cpi->source_alt_ref_active) + { + if (cpi->oxcf.fixed_q < 0) + { + if (cpi->pass == 2) + { +- cpi->this_frame_target = cpi->per_frame_bandwidth; // The spend on the GF is defined in the two pass code for two pass encodes ++ /* The spend on the GF is defined in the two pass ++ * code for two pass encodes ++ */ ++ cpi->this_frame_target = cpi->per_frame_bandwidth; + } + else + { +@@ -1104,14 +1050,16 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + int allocation_chunks = (frames_in_section * 100) + (Boost - 100); + int bits_in_section = cpi->inter_frame_target * frames_in_section; + +- // Normalize Altboost and allocations chunck down to prevent overflow ++ /* Normalize Altboost and allocations chunck down to ++ * prevent overflow ++ */ + while (Boost > 1000) + { + Boost /= 2; + allocation_chunks /= 2; + } + +- // Avoid loss of precision but avoid overflow ++ /* Avoid loss of precision but avoid overflow */ + if ((bits_in_section >> 7) > allocation_chunks) + cpi->this_frame_target = Boost * (bits_in_section / allocation_chunks); + else +@@ -1124,10 +1072,11 @@ static void calc_pframe_target_size(VP8_COMP *cpi) + * cpi->last_boost) / 100; + + } +- // If there is an active ARF at this location use the minimum +- // bits on this frame even if it is a contructed arf. +- // The active maximum quantizer insures that an appropriate +- // number of bits will be spent if needed for contstructed ARFs. ++ /* If there is an active ARF at this location use the minimum ++ * bits on this frame even if it is a contructed arf. ++ * The active maximum quantizer insures that an appropriate ++ * number of bits will be spent if needed for contstructed ARFs. ++ */ + else + { + cpi->this_frame_target = 0; +@@ -1151,8 +1100,8 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) + + int projected_size_based_on_q = 0; + +- // Clear down mmx registers to allow floating point in what follows +- vp8_clear_system_state(); //__asm emms; ++ /* Clear down mmx registers to allow floating point in what follows */ ++ vp8_clear_system_state(); + + if (cpi->common.frame_type == KEY_FRAME) + { +@@ -1160,23 +1109,26 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) + } + else + { +- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) ++ if (cpi->oxcf.number_of_layers == 1 && ++ (cpi->common.refresh_alt_ref_frame || ++ cpi->common.refresh_golden_frame)) + rate_correction_factor = cpi->gf_rate_correction_factor; + else + rate_correction_factor = cpi->rate_correction_factor; + } + +- // Work out how big we would have expected the frame to be at this Q given the current correction factor. +- // Stay in double to avoid int overflow when values are large +- //projected_size_based_on_q = ((int)(.5 + rate_correction_factor * vp8_bits_per_mb[cpi->common.frame_type][Q]) * cpi->common.MBs) >> BPER_MB_NORMBITS; ++ /* Work out how big we would have expected the frame to be at this Q ++ * given the current correction factor. Stay in double to avoid int ++ * overflow when values are large ++ */ + projected_size_based_on_q = (int)(((.5 + rate_correction_factor * vp8_bits_per_mb[cpi->common.frame_type][Q]) * cpi->common.MBs) / (1 << BPER_MB_NORMBITS)); + +- // Make some allowance for cpi->zbin_over_quant +- if (cpi->zbin_over_quant > 0) ++ /* Make some allowance for cpi->zbin_over_quant */ ++ if (cpi->mb.zbin_over_quant > 0) + { +- int Z = cpi->zbin_over_quant; ++ int Z = cpi->mb.zbin_over_quant; + double Factor = 0.99; +- double factor_adjustment = 0.01 / 256.0; //(double)ZBIN_OQ_MAX; ++ double factor_adjustment = 0.01 / 256.0; + + while (Z > 0) + { +@@ -1190,13 +1142,13 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) + } + } + +- // Work out a size correction factor. +- //if ( cpi->this_frame_target > 0 ) +- // correction_factor = (100 * cpi->projected_frame_size) / cpi->this_frame_target; ++ /* Work out a size correction factor. */ + if (projected_size_based_on_q > 0) + correction_factor = (100 * cpi->projected_frame_size) / projected_size_based_on_q; + +- // More heavily damped adjustment used if we have been oscillating either side of target ++ /* More heavily damped adjustment used if we have been oscillating ++ * either side of target ++ */ + switch (damp_var) + { + case 0: +@@ -1211,25 +1163,23 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) + break; + } + +- //if ( (correction_factor > 102) && (Q < cpi->active_worst_quality) ) + if (correction_factor > 102) + { +- // We are not already at the worst allowable quality ++ /* We are not already at the worst allowable quality */ + correction_factor = (int)(100.5 + ((correction_factor - 100) * adjustment_limit)); + rate_correction_factor = ((rate_correction_factor * correction_factor) / 100); + +- // Keep rate_correction_factor within limits ++ /* Keep rate_correction_factor within limits */ + if (rate_correction_factor > MAX_BPB_FACTOR) + rate_correction_factor = MAX_BPB_FACTOR; + } +- //else if ( (correction_factor < 99) && (Q > cpi->active_best_quality) ) + else if (correction_factor < 99) + { +- // We are not already at the best allowable quality ++ /* We are not already at the best allowable quality */ + correction_factor = (int)(100.5 - ((100 - correction_factor) * adjustment_limit)); + rate_correction_factor = ((rate_correction_factor * correction_factor) / 100); + +- // Keep rate_correction_factor within limits ++ /* Keep rate_correction_factor within limits */ + if (rate_correction_factor < MIN_BPB_FACTOR) + rate_correction_factor = MIN_BPB_FACTOR; + } +@@ -1238,7 +1188,9 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) + cpi->key_frame_rate_correction_factor = rate_correction_factor; + else + { +- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) ++ if (cpi->oxcf.number_of_layers == 1 && ++ (cpi->common.refresh_alt_ref_frame || ++ cpi->common.refresh_golden_frame)) + cpi->gf_rate_correction_factor = rate_correction_factor; + else + cpi->rate_correction_factor = rate_correction_factor; +@@ -1250,8 +1202,8 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) + { + int Q = cpi->active_worst_quality; + +- // Reset Zbin OQ value +- cpi->zbin_over_quant = 0; ++ /* Reset Zbin OQ value */ ++ cpi->mb.zbin_over_quant = 0; + + if (cpi->oxcf.fixed_q >= 0) + { +@@ -1261,11 +1213,13 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) + { + Q = cpi->oxcf.key_q; + } +- else if (cpi->common.refresh_alt_ref_frame) ++ else if (cpi->oxcf.number_of_layers == 1 && ++ cpi->common.refresh_alt_ref_frame) + { + Q = cpi->oxcf.alt_q; + } +- else if (cpi->common.refresh_golden_frame) ++ else if (cpi->oxcf.number_of_layers == 1 && ++ cpi->common.refresh_golden_frame) + { + Q = cpi->oxcf.gold_q; + } +@@ -1279,20 +1233,25 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) + int bits_per_mb_at_this_q; + double correction_factor; + +- // Select the appropriate correction factor based upon type of frame. ++ /* Select the appropriate correction factor based upon type of frame. */ + if (cpi->common.frame_type == KEY_FRAME) + correction_factor = cpi->key_frame_rate_correction_factor; + else + { +- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) ++ if (cpi->oxcf.number_of_layers == 1 && ++ (cpi->common.refresh_alt_ref_frame || ++ cpi->common.refresh_golden_frame)) + correction_factor = cpi->gf_rate_correction_factor; + else + correction_factor = cpi->rate_correction_factor; + } + +- // Calculate required scaling factor based on target frame size and size of frame produced using previous Q ++ /* Calculate required scaling factor based on target frame size and ++ * size of frame produced using previous Q ++ */ + if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS)) +- target_bits_per_mb = (target_bits_per_frame / cpi->common.MBs) << BPER_MB_NORMBITS; // Case where we would overflow int ++ /* Case where we would overflow int */ ++ target_bits_per_mb = (target_bits_per_frame / cpi->common.MBs) << BPER_MB_NORMBITS; + else + target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cpi->common.MBs; + +@@ -1317,18 +1276,23 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) + while (++i <= cpi->active_worst_quality); + + +- // If we are at MAXQ then enable Q over-run which seeks to claw back additional bits through things like +- // the RD multiplier and zero bin size. ++ /* If we are at MAXQ then enable Q over-run which seeks to claw ++ * back additional bits through things like the RD multiplier ++ * and zero bin size. ++ */ + if (Q >= MAXQ) + { + int zbin_oqmax; + + double Factor = 0.99; +- double factor_adjustment = 0.01 / 256.0; //(double)ZBIN_OQ_MAX; ++ double factor_adjustment = 0.01 / 256.0; + + if (cpi->common.frame_type == KEY_FRAME) +- zbin_oqmax = 0; //ZBIN_OQ_MAX/16 +- else if (cpi->common.refresh_alt_ref_frame || (cpi->common.refresh_golden_frame && !cpi->source_alt_ref_active)) ++ zbin_oqmax = 0; ++ else if (cpi->oxcf.number_of_layers == 1 && ++ (cpi->common.refresh_alt_ref_frame || ++ (cpi->common.refresh_golden_frame && ++ !cpi->source_alt_ref_active))) + zbin_oqmax = 16; + else + zbin_oqmax = ZBIN_OQ_MAX; +@@ -1347,25 +1311,29 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) + cpi->zbin_over_quant = (int)Oq; + }*/ + +- // Each incrment in the zbin is assumed to have a fixed effect on bitrate. This is not of course true. +- // The effect will be highly clip dependent and may well have sudden steps. +- // The idea here is to acheive higher effective quantizers than the normal maximum by expanding the zero +- // bin and hence decreasing the number of low magnitude non zero coefficients. +- while (cpi->zbin_over_quant < zbin_oqmax) ++ /* Each incrment in the zbin is assumed to have a fixed effect ++ * on bitrate. This is not of course true. The effect will be ++ * highly clip dependent and may well have sudden steps. The ++ * idea here is to acheive higher effective quantizers than the ++ * normal maximum by expanding the zero bin and hence ++ * decreasing the number of low magnitude non zero coefficients. ++ */ ++ while (cpi->mb.zbin_over_quant < zbin_oqmax) + { +- cpi->zbin_over_quant ++; ++ cpi->mb.zbin_over_quant ++; + +- if (cpi->zbin_over_quant > zbin_oqmax) +- cpi->zbin_over_quant = zbin_oqmax; ++ if (cpi->mb.zbin_over_quant > zbin_oqmax) ++ cpi->mb.zbin_over_quant = zbin_oqmax; + +- // Adjust bits_per_mb_at_this_q estimate ++ /* Adjust bits_per_mb_at_this_q estimate */ + bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q); + Factor += factor_adjustment; + + if (Factor >= 0.999) + Factor = 0.999; + +- if (bits_per_mb_at_this_q <= target_bits_per_mb) // Break out if we get down to the target rate ++ /* Break out if we get down to the target rate */ ++ if (bits_per_mb_at_this_q <= target_bits_per_mb) + break; + } + +@@ -1380,7 +1348,7 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi) + { + int i; + +- // Average key frame frequency ++ /* Average key frame frequency */ + int av_key_frame_frequency = 0; + + /* First key frame at start of sequence is a special case. We have no +@@ -1431,11 +1399,11 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi) + + void vp8_adjust_key_frame_context(VP8_COMP *cpi) + { +- // Clear down mmx registers to allow floating point in what follows ++ /* Clear down mmx registers to allow floating point in what follows */ + vp8_clear_system_state(); + +- // Do we have any key frame overspend to recover? +- // Two-pass overspend handled elsewhere. ++ /* Do we have any key frame overspend to recover? */ ++ /* Two-pass overspend handled elsewhere. */ + if ((cpi->pass != 2) + && (cpi->projected_frame_size > cpi->per_frame_bandwidth)) + { +@@ -1469,10 +1437,12 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi) + + void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit) + { +- // Set-up bounds on acceptable frame size: ++ /* Set-up bounds on acceptable frame size: */ + if (cpi->oxcf.fixed_q >= 0) + { +- // Fixed Q scenario: frame size never outranges target (there is no target!) ++ /* Fixed Q scenario: frame size never outranges target ++ * (there is no target!) ++ */ + *frame_under_shoot_limit = 0; + *frame_over_shoot_limit = INT_MAX; + } +@@ -1494,18 +1464,22 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, + } + else + { +- // For CBR take buffer fullness into account ++ /* For CBR take buffer fullness into account */ + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + { + if (cpi->buffer_level >= ((cpi->oxcf.optimal_buffer_level + cpi->oxcf.maximum_buffer_size) >> 1)) + { +- // Buffer is too full so relax overshoot and tighten undershoot ++ /* Buffer is too full so relax overshoot and tighten ++ * undershoot ++ */ + *frame_over_shoot_limit = cpi->this_frame_target * 12 / 8; + *frame_under_shoot_limit = cpi->this_frame_target * 6 / 8; + } + else if (cpi->buffer_level <= (cpi->oxcf.optimal_buffer_level >> 1)) + { +- // Buffer is too low so relax undershoot and tighten overshoot ++ /* Buffer is too low so relax undershoot and tighten ++ * overshoot ++ */ + *frame_over_shoot_limit = cpi->this_frame_target * 10 / 8; + *frame_under_shoot_limit = cpi->this_frame_target * 4 / 8; + } +@@ -1515,11 +1489,13 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, + *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8; + } + } +- // VBR and CQ mode +- // Note that tighter restrictions here can help quality but hurt encode speed ++ /* VBR and CQ mode */ ++ /* Note that tighter restrictions here can help quality ++ * but hurt encode speed ++ */ + else + { +- // Stron overshoot limit for constrained quality ++ /* Stron overshoot limit for constrained quality */ + if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) + { + *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; +@@ -1534,9 +1510,10 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, + } + } + +- // For very small rate targets where the fractional adjustment +- // (eg * 7/8) may be tiny make sure there is at least a minimum +- // range. ++ /* For very small rate targets where the fractional adjustment ++ * (eg * 7/8) may be tiny make sure there is at least a minimum ++ * range. ++ */ + *frame_over_shoot_limit += 200; + *frame_under_shoot_limit -= 200; + if ( *frame_under_shoot_limit < 0 ) +@@ -1546,7 +1523,7 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, + } + + +-// return of 0 means drop frame ++/* return of 0 means drop frame */ + int vp8_pick_frame_size(VP8_COMP *cpi) + { + VP8_COMMON *cm = &cpi->common; +@@ -1557,11 +1534,10 @@ int vp8_pick_frame_size(VP8_COMP *cpi) + { + calc_pframe_target_size(cpi); + +- // Check if we're dropping the frame: ++ /* Check if we're dropping the frame: */ + if (cpi->drop_frame) + { + cpi->drop_frame = 0; +- cpi->drop_count++; + return 0; + } + } +diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h +index d4f7796..c43f08d 100644 +--- a/vp8/encoder/ratectrl.h ++++ b/vp8/encoder/ratectrl.h +@@ -22,7 +22,7 @@ extern int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame); + extern void vp8_adjust_key_frame_context(VP8_COMP *cpi); + extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit); + +-// return of 0 means drop frame ++/* return of 0 means drop frame */ + extern int vp8_pick_frame_size(VP8_COMP *cpi); + + #endif +diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c +index 2b706ba..ceb817c 100644 +--- a/vp8/encoder/rdopt.c ++++ b/vp8/encoder/rdopt.c +@@ -21,6 +21,7 @@ + #include "onyx_int.h" + #include "modecosts.h" + #include "encodeintra.h" ++#include "pickinter.h" + #include "vp8/common/entropymode.h" + #include "vp8/common/reconinter.h" + #include "vp8/common/reconintra4x4.h" +@@ -36,7 +37,6 @@ + #if CONFIG_TEMPORAL_DENOISING + #include "denoising.h" + #endif +- + extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x); + + #define MAXF(a,b) (((a) > (b)) ? (a) : (b)) +@@ -149,8 +149,8 @@ const int vp8_ref_frame_order[MAX_MODES] = + }; + + static void fill_token_costs( +- unsigned int c [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS], +- const vp8_prob p [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] ++ int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS], ++ const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES] + ) + { + int i, j, k; +@@ -159,21 +159,26 @@ static void fill_token_costs( + for (i = 0; i < BLOCK_TYPES; i++) + for (j = 0; j < COEF_BANDS; j++) + for (k = 0; k < PREV_COEF_CONTEXTS; k++) +- // check for pt=0 and band > 1 if block type 0 and 0 if blocktype 1 +- if(k==0 && j>(i==0) ) +- vp8_cost_tokens2((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree,2); ++ ++ /* check for pt=0 and band > 1 if block type 0 ++ * and 0 if blocktype 1 ++ */ ++ if (k == 0 && j > (i == 0)) ++ vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2); + else +- vp8_cost_tokens((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree); ++ vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree); + } + +-static int rd_iifactor [ 32 ] = { 4, 4, 3, 2, 1, 0, 0, 0, +- 0, 0, 0, 0, 0, 0, 0, 0, +- 0, 0, 0, 0, 0, 0, 0, 0, +- 0, 0, 0, 0, 0, 0, 0, 0, +- }; ++static const int rd_iifactor[32] = ++{ ++ 4, 4, 3, 2, 1, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0 ++}; + + /* values are now correlated to quantizer */ +-static int sad_per_bit16lut[QINDEX_RANGE] = ++static const int sad_per_bit16lut[QINDEX_RANGE] = + { + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +@@ -192,7 +197,7 @@ static int sad_per_bit16lut[QINDEX_RANGE] = + 11, 11, 11, 11, 12, 12, 12, 12, + 12, 12, 13, 13, 13, 13, 14, 14 + }; +-static int sad_per_bit4lut[QINDEX_RANGE] = ++static const int sad_per_bit4lut[QINDEX_RANGE] = + { + 2, 2, 2, 2, 2, 2, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, +@@ -218,30 +223,30 @@ void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) + cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex]; + } + +-void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) ++void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) + { + int q; + int i; + double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0; + double rdconst = 2.80; + +- vp8_clear_system_state(); //__asm emms; ++ vp8_clear_system_state(); + +- // Further tests required to see if optimum is different +- // for key frames, golden frames and arf frames. +- // if (cpi->common.refresh_golden_frame || +- // cpi->common.refresh_alt_ref_frame) ++ /* Further tests required to see if optimum is different ++ * for key frames, golden frames and arf frames. ++ */ + cpi->RDMULT = (int)(rdconst * (capped_q * capped_q)); + +- // Extend rate multiplier along side quantizer zbin increases +- if (cpi->zbin_over_quant > 0) ++ /* Extend rate multiplier along side quantizer zbin increases */ ++ if (cpi->mb.zbin_over_quant > 0) + { + double oq_factor; + double modq; + +- // Experimental code using the same basic equation as used for Q above +- // The units of cpi->zbin_over_quant are 1/128 of Q bin size +- oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant); ++ /* Experimental code using the same basic equation as used for Q above ++ * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size ++ */ ++ oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant); + modq = (int)((double)capped_q * oq_factor); + cpi->RDMULT = (int)(rdconst * (modq * modq)); + } +@@ -260,6 +265,11 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) + + vp8_set_speed_features(cpi); + ++ for (i = 0; i < MAX_MODES; i++) ++ { ++ x->mode_test_hit_counts[i] = 0; ++ } ++ + q = (int)pow(Qvalue, 1.25); + + if (q < 8) +@@ -274,14 +284,14 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) + { + if (cpi->sf.thresh_mult[i] < INT_MAX) + { +- cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100; ++ x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100; + } + else + { +- cpi->rd_threshes[i] = INT_MAX; ++ x->rd_threshes[i] = INT_MAX; + } + +- cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; ++ cpi->rd_baseline_thresh[i] = x->rd_threshes[i]; + } + } + else +@@ -292,19 +302,19 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) + { + if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) + { +- cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q; ++ x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q; + } + else + { +- cpi->rd_threshes[i] = INT_MAX; ++ x->rd_threshes[i] = INT_MAX; + } + +- cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; ++ cpi->rd_baseline_thresh[i] = x->rd_threshes[i]; + } + } + + { +- // build token cost array for the type of frame we have now ++ /* build token cost array for the type of frame we have now */ + FRAME_CONTEXT *l = &cpi->lfc_n; + + if(cpi->common.refresh_alt_ref_frame) +@@ -323,12 +333,8 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) + */ + + +- // TODO make these mode costs depend on last,alt or gold too. (jbb) ++ /* TODO make these mode costs depend on last,alt or gold too. (jbb) */ + vp8_init_mode_costs(cpi); +- +- // TODO figure onnnnuut why making mv cost frame type dependent didn't help (jbb) +- //vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) l->mvc, flags); +- + } + + } +@@ -353,14 +359,6 @@ void vp8_auto_select_speed(VP8_COMP *cpi) + + #endif + +- /* +- // this is done during parameter valid check +- if( cpi->oxcf.cpu_used > 16) +- cpi->oxcf.cpu_used = 16; +- if( cpi->oxcf.cpu_used < -16) +- cpi->oxcf.cpu_used = -16; +- */ +- + if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress) + { + if (cpi->avg_pick_mode_time == 0) +@@ -387,10 +385,10 @@ void vp8_auto_select_speed(VP8_COMP *cpi) + cpi->avg_pick_mode_time = 0; + cpi->avg_encode_time = 0; + +- // In real-time mode, cpi->speed is in [4, 16]. +- if (cpi->Speed < 4) //if ( cpi->Speed < 0 ) ++ /* In real-time mode, cpi->speed is in [4, 16]. */ ++ if (cpi->Speed < 4) + { +- cpi->Speed = 4; //cpi->Speed = 0; ++ cpi->Speed = 4; + } + } + } +@@ -546,7 +544,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, + if (c < 16) + cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN]; + +- pt = (c != !type); // is eob first coefficient; ++ pt = (c != !type); /* is eob first coefficient; */ + *a = *l = pt; + + return cost; +@@ -592,7 +590,7 @@ static void macro_block_yrd( MACROBLOCK *mb, + vp8_subtract_mby( mb->src_diff, *(mb->block[0].base_src), + mb->block[0].src_stride, mb->e_mbd.predictor, 16); + +- // Fdct and building the 2nd order block ++ /* Fdct and building the 2nd order block */ + for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) + { + mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32); +@@ -600,25 +598,25 @@ static void macro_block_yrd( MACROBLOCK *mb, + *Y2DCPtr++ = beptr->coeff[16]; + } + +- // 2nd order fdct ++ /* 2nd order fdct */ + mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8); + +- // Quantization ++ /* Quantization */ + for (b = 0; b < 16; b++) + { + mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]); + } + +- // DC predication and Quantization of 2nd Order block ++ /* DC predication and Quantization of 2nd Order block */ + mb->quantize_b(mb_y2, x_y2); + +- // Distortion ++ /* Distortion */ + d = vp8_mbblock_error(mb, 1) << 2; + d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff); + + *Distortion = (d >> 4); + +- // rate ++ /* rate */ + *Rate = vp8_rdcost_mby(mb); + } + +@@ -632,12 +630,11 @@ static void copy_predictor(unsigned char *dst, const unsigned char *predictor) + d[12] = p[12]; + } + static int rd_pick_intra4x4block( +- VP8_COMP *cpi, + MACROBLOCK *x, + BLOCK *be, + BLOCKD *b, + B_PREDICTION_MODE *best_mode, +- unsigned int *bmode_costs, ++ const int *bmode_costs, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + +@@ -660,7 +657,11 @@ static int rd_pick_intra4x4block( + DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16*4); + DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16); + int dst_stride = x->e_mbd.dst.y_stride; +- unsigned char *base_dst = x->e_mbd.dst.y_buffer; ++ unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; ++ ++ unsigned char *Above = dst - dst_stride; ++ unsigned char *yleft = dst - 1; ++ unsigned char top_left = Above[-1]; + + for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++) + { +@@ -669,8 +670,8 @@ static int rd_pick_intra4x4block( + + rate = bmode_costs[mode]; + +- vp8_intra4x4_predict(base_dst + b->offset, dst_stride, mode, +- b->predictor, 16); ++ vp8_intra4x4_predict(Above, yleft, dst_stride, mode, ++ b->predictor, 16, top_left); + vp8_subtract_b(be, b, 16); + x->short_fdct4x4(be->src_diff, be->coeff, 32); + x->quantize_b(be, b); +@@ -697,15 +698,14 @@ static int rd_pick_intra4x4block( + vpx_memcpy(best_dqcoeff, b->dqcoeff, 32); + } + } +- b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode); ++ b->bmi.as_mode = *best_mode; + +- vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, base_dst + b->offset, +- dst_stride); ++ vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride); + + return best_rd; + } + +-static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, ++static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, + int *rate_y, int *Distortion, int best_rd) + { + MACROBLOCKD *const xd = &mb->e_mbd; +@@ -717,7 +717,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; +- unsigned int *bmode_costs; ++ const int *bmode_costs; + + vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); +@@ -745,7 +745,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, + } + + total_rd += rd_pick_intra4x4block( +- cpi, mb, mb->block + i, xd->block + i, &best_mode, bmode_costs, ++ mb, mb->block + i, xd->block + i, &best_mode, bmode_costs, + ta + vp8_block2above[i], + tl + vp8_block2left[i], &r, &ry, &d); + +@@ -770,8 +770,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, + } + + +-static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi, +- MACROBLOCK *x, ++static int rd_pick_intra16x16mby_mode(MACROBLOCK *x, + int *Rate, + int *rate_y, + int *Distortion) +@@ -784,7 +783,7 @@ static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi, + int this_rd; + MACROBLOCKD *xd = &x->e_mbd; + +- //Y Search for 16x16 intra prediction mode ++ /* Y Search for 16x16 intra prediction mode */ + for (mode = DC_PRED; mode <= TM_PRED; mode++) + { + xd->mode_info_context->mbmi.mode = mode; +@@ -873,7 +872,8 @@ static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, + return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); + } + +-static void rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion) ++static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate, ++ int *rate_tokenonly, int *distortion) + { + MB_PREDICTION_MODE mode; + MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); +@@ -981,8 +981,9 @@ static int labels2mode( + m = ABOVE4X4; + else + { +- // the only time we should do costing for new motion vector or mode +- // is when we are on a new label (jbb May 08, 2007) ++ /* the only time we should do costing for new motion vector ++ * or mode is when we are on a new label (jbb May 08, 2007) ++ */ + switch (m = this_mode) + { + case NEW4X4 : +@@ -1001,7 +1002,7 @@ static int labels2mode( + break; + } + +- if (m == ABOVE4X4) // replace above with left if same ++ if (m == ABOVE4X4) /* replace above with left if same */ + { + int_mv left_mv; + +@@ -1062,9 +1063,6 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels + vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride, x->e_mbd.subpixel_predict); + vp8_subtract_b(be, bd, 16); + x->short_fdct4x4(be->src_diff, be->coeff, 32); +- +- // set to 0 no way to account for 2nd order DC so discount +- //be->coeff[0] = 0; + x->quantize_b(be, bd); + + distortion += vp8_block_error(be->coeff, bd->dqcoeff); +@@ -1095,8 +1093,8 @@ typedef struct + int mvthresh; + int *mdcounts; + +- int_mv sv_mvp[4]; // save 4 mvp from 8x8 +- int sv_istep[2]; // save 2 initial step_param for 16x8/8x16 ++ int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */ ++ int sv_istep[2]; /* save 2 initial step_param for 16x8/8x16 */ + + } BEST_SEG_INFO; + +@@ -1143,13 +1141,13 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, + labels = vp8_mbsplits[segmentation]; + label_count = vp8_mbsplit_count[segmentation]; + +- // 64 makes this threshold really big effectively +- // making it so that we very rarely check mvs on +- // segments. setting this to 1 would make mv thresh +- // roughly equal to what it is for macroblocks ++ /* 64 makes this threshold really big effectively making it so that we ++ * very rarely check mvs on segments. setting this to 1 would make mv ++ * thresh roughly equal to what it is for macroblocks ++ */ + label_mv_thresh = 1 * bsi->mvthresh / label_count ; + +- // Segmentation method overheads ++ /* Segmentation method overheads */ + rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation); + rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts); + this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); +@@ -1162,7 +1160,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, + B_PREDICTION_MODE mode_selected = ZERO4X4; + int bestlabelyrate = 0; + +- // search for the best motion vector on this segment ++ /* search for the best motion vector on this segment */ + for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++) + { + int this_rd; +@@ -1191,7 +1189,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, + BLOCK *c; + BLOCKD *e; + +- // Is the best so far sufficiently good that we cant justify doing and new motion search. ++ /* Is the best so far sufficiently good that we cant justify ++ * doing a new motion search. ++ */ + if (best_label_rd < label_mv_thresh) + break; + +@@ -1206,7 +1206,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, + step_param = bsi->sv_istep[i]; + } + +- // use previous block's result as next block's MV predictor. ++ /* use previous block's result as next block's MV ++ * predictor. ++ */ + if (segmentation == BLOCK_4X4 && i>0) + { + bsi->mvp.as_int = x->e_mbd.block[i-1].bmi.mv.as_int; +@@ -1225,7 +1227,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, + mvp_full.as_mv.row = bsi->mvp.as_mv.row >>3; + mvp_full.as_mv.col = bsi->mvp.as_mv.col >>3; + +- // find first label ++ /* find first label */ + n = vp8_mbsplit_offset[segmentation][i]; + + c = &x->block[n]; +@@ -1265,7 +1267,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, + + sseshift = segmentation_to_sseshift[segmentation]; + +- // Should we do a full search (best quality only) ++ /* Should we do a full search (best quality only) */ + if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) + { + /* Check if mvp_full is within the range. */ +@@ -1282,7 +1284,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, + } + else + { +- // The full search result is actually worse so re-instate the previous best vector ++ /* The full search result is actually worse so ++ * re-instate the previous best vector ++ */ + e->bmi.mv.as_int = mode_mv[NEW4X4].as_int; + } + } +@@ -1302,7 +1306,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, + rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], + bsi->ref_mv, x->mvcost); + +- // Trap vectors that reach beyond the UMV borders ++ /* Trap vectors that reach beyond the UMV borders */ + if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || + ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) + { +@@ -1354,7 +1358,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, + bsi->segment_rd = this_segment_rd; + bsi->segment_num = segmentation; + +- // store everything needed to come back to this!! ++ /* store everything needed to come back to this!! */ + for (i = 0; i < 16; i++) + { + bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; +@@ -1516,7 +1520,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, + return bsi.segment_rd; + } + +-//The improved MV prediction ++/* The improved MV prediction */ + void vp8_mv_pred + ( + VP8_COMP *cpi, +@@ -1550,7 +1554,9 @@ void vp8_mv_pred + near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0; + near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0; + +- // read in 3 nearby block's MVs from current frame as prediction candidates. ++ /* read in 3 nearby block's MVs from current frame as prediction ++ * candidates. ++ */ + if (above->mbmi.ref_frame != INTRA_FRAME) + { + near_mvs[vcnt].as_int = above->mbmi.mv.as_int; +@@ -1573,12 +1579,12 @@ void vp8_mv_pred + } + vcnt++; + +- // read in 5 nearby block's MVs from last frame. ++ /* read in 5 nearby block's MVs from last frame. */ + if(cpi->common.last_frame_type != KEY_FRAME) + { + mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ; + +- // current in last frame ++ /* current in last frame */ + if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) + { + near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int; +@@ -1587,7 +1593,7 @@ void vp8_mv_pred + } + vcnt++; + +- // above in last frame ++ /* above in last frame */ + if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME) + { + near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int; +@@ -1596,7 +1602,7 @@ void vp8_mv_pred + } + vcnt++; + +- // left in last frame ++ /* left in last frame */ + if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME) + { + near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int; +@@ -1605,7 +1611,7 @@ void vp8_mv_pred + } + vcnt++; + +- // right in last frame ++ /* right in last frame */ + if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME) + { + near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int; +@@ -1614,7 +1620,7 @@ void vp8_mv_pred + } + vcnt++; + +- // below in last frame ++ /* below in last frame */ + if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME) + { + near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int; +@@ -1655,7 +1661,9 @@ void vp8_mv_pred + mv.as_mv.col = mvy[vcnt/2]; + + find = 1; +- //sr is set to 0 to allow calling function to decide the search range. ++ /* sr is set to 0 to allow calling function to decide the search ++ * range. ++ */ + *sr = 0; + } + } +@@ -1667,33 +1675,36 @@ void vp8_mv_pred + + void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]) + { +- +- int near_sad[8] = {0}; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below ++ /* near_sad indexes: ++ * 0-cf above, 1-cf left, 2-cf aboveleft, ++ * 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below ++ */ ++ int near_sad[8] = {0}; + BLOCK *b = &x->block[0]; + unsigned char *src_y_ptr = *(b->base_src); + +- //calculate sad for current frame 3 nearby MBs. ++ /* calculate sad for current frame 3 nearby MBs. */ + if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0) + { + near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX; + }else if(xd->mb_to_top_edge==0) +- { //only has left MB for sad calculation. ++ { /* only has left MB for sad calculation. */ + near_sad[0] = near_sad[2] = INT_MAX; +- near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff); ++ near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX); + }else if(xd->mb_to_left_edge ==0) +- { //only has left MB for sad calculation. ++ { /* only has left MB for sad calculation. */ + near_sad[1] = near_sad[2] = INT_MAX; +- near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff); ++ near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX); + }else + { +- near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff); +- near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff); +- near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff); ++ near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX); ++ near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX); ++ near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX); + } + + if(cpi->common.last_frame_type != KEY_FRAME) + { +- //calculate sad for last frame 5 nearby MBs. ++ /* calculate sad for last frame 5 nearby MBs. */ + unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset; + int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride; + +@@ -1703,14 +1714,14 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse + if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX; + + if(near_sad[4] != INT_MAX) +- near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff); ++ near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX); + if(near_sad[5] != INT_MAX) +- near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff); +- near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, 0x7fffffff); ++ near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX); ++ near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX); + if(near_sad[6] != INT_MAX) +- near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, 0x7fffffff); ++ near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX); + if(near_sad[7] != INT_MAX) +- near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, 0x7fffffff); ++ near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX); + } + + if(cpi->common.last_frame_type != KEY_FRAME) +@@ -1732,18 +1743,18 @@ static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv) + { + if (x->partition_info->bmi[i].mode == NEW4X4) + { +- cpi->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row ++ x->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row + - best_ref_mv->as_mv.row) >> 1)]++; +- cpi->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col ++ x->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col + - best_ref_mv->as_mv.col) >> 1)]++; + } + } + } + else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV) + { +- cpi->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row ++ x->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row + - best_ref_mv->as_mv.row) >> 1)]++; +- cpi->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col ++ x->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col + - best_ref_mv->as_mv.col) >> 1)]++; + } + } +@@ -1766,7 +1777,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4], + { + unsigned int sse; + unsigned int var; +- int threshold = (xd->block[0].dequant[1] ++ unsigned int threshold = (xd->block[0].dequant[1] + * xd->block[0].dequant[1] >>4); + + if(threshold < x->encode_breakout) +@@ -1784,8 +1795,8 @@ static int evaluate_inter_mode_rd(int mdcounts[4], + if ((sse - var < q2dc * q2dc >>4) || + (sse /2 > var && sse-var < 64)) + { +- // Check u and v to make sure skip is ok +- int sse2= VP8_UVSSE(x); ++ /* Check u and v to make sure skip is ok */ ++ unsigned int sse2 = VP8_UVSSE(x); + if (sse2 * 2 < threshold) + { + x->skip = 1; +@@ -1805,17 +1816,15 @@ static int evaluate_inter_mode_rd(int mdcounts[4], + } + + +- //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts); // Experimental debug code +- +- // Add in the Mv/mode cost ++ /* Add in the Mv/mode cost */ + rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts); + +- // Y cost and distortion ++ /* Y cost and distortion */ + macro_block_yrd(x, &rd->rate_y, &distortion); + rd->rate2 += rd->rate_y; + rd->distortion2 += distortion; + +- // UV cost and distortion ++ /* UV cost and distortion */ + rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv, + cpi->common.full_pixel); + rd->rate2 += rd->rate_uv; +@@ -1832,9 +1841,11 @@ static int calculate_final_rd_costs(int this_rd, + VP8_COMP *cpi, MACROBLOCK *x) + { + MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; +- // Where skip is allowable add in the default per mb cost for the no skip case. +- // where we then decide to skip we have to delete this and replace it with the +- // cost of signallying a skip ++ ++ /* Where skip is allowable add in the default per mb cost for the no ++ * skip case. where we then decide to skip we have to delete this and ++ * replace it with the cost of signalling a skip ++ */ + if (cpi->common.mb_no_coeff_skip) + { + *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0); +@@ -1849,7 +1860,10 @@ static int calculate_final_rd_costs(int this_rd, + + if (!disable_skip) + { +- // Test for the condition where skip block will be activated because there are no non zero coefficients and make any necessary adjustment for rate ++ /* Test for the condition where skip block will be activated ++ * because there are no non zero coefficients and make any ++ * necessary adjustment for rate ++ */ + if (cpi->common.mb_no_coeff_skip) + { + int i; +@@ -1874,10 +1888,10 @@ static int calculate_final_rd_costs(int this_rd, + if (tteob == 0) + { + rd->rate2 -= (rd->rate_y + rd->rate_uv); +- //for best_yrd calculation ++ /* for best_yrd calculation */ + rd->rate_uv = 0; + +- // Back out no skip flag costing and add in skip flag costing ++ /* Back out no skip flag costing and add in skip flag costing */ + if (cpi->prob_skip_false) + { + int prob_skip_cost; +@@ -1889,7 +1903,7 @@ static int calculate_final_rd_costs(int this_rd, + } + } + } +- // Calculate the final RD estimate for this mode ++ /* Calculate the final RD estimate for this mode */ + this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2); + if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame + == INTRA_FRAME) +@@ -1953,7 +1967,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + int_mv mvp; + int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + int saddone=0; +- int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7) ++ /* search range got from mv_pred(). It uses step_param levels. (0-7) */ ++ int sr=0; + + unsigned char *plane[4][3]; + int ref_frame_map[4]; +@@ -1962,6 +1977,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + int intra_rd_penalty = 10* vp8_dc_quant(cpi->common.base_qindex, + cpi->common.y1dc_delta_q); + ++#if CONFIG_TEMPORAL_DENOISING ++ unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX, ++ best_rd_sse = INT_MAX; ++#endif ++ + mode_mv = mode_mv_sb[sign_bias]; + best_ref_mv.as_int = 0; + best_mode.rd = INT_MAX; +@@ -1994,7 +2014,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset); + + *returnintra = INT_MAX; +- cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame ++ /* Count of the number of MBs tested so far this frame */ ++ x->mbs_tested_so_far++; + + x->skip = 0; + +@@ -2005,14 +2026,16 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + int other_cost = 0; + int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]]; + +- // Test best rd so far against threshold for trying this mode. +- if (best_mode.rd <= cpi->rd_threshes[mode_index]) ++ /* Test best rd so far against threshold for trying this mode. */ ++ if (best_mode.rd <= x->rd_threshes[mode_index]) + continue; + + if (this_ref_frame < 0) + continue; + +- // These variables hold are rolling total cost and distortion for this mode ++ /* These variables hold are rolling total cost and distortion for ++ * this mode ++ */ + rd.rate2 = 0; + rd.distortion2 = 0; + +@@ -2021,9 +2044,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + x->e_mbd.mode_info_context->mbmi.mode = this_mode; + x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; + +- // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, +- // unless ARNR filtering is enabled in which case we want +- // an unfiltered alternative ++ /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame, ++ * unless ARNR filtering is enabled in which case we want ++ * an unfiltered alternative ++ */ + if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) + { + if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) +@@ -2045,45 +2069,56 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + } + } + +- // Check to see if the testing frequency for this mode is at its max +- // If so then prevent it from being tested and increase the threshold for its testing +- if (cpi->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1)) ++ /* Check to see if the testing frequency for this mode is at its ++ * max If so then prevent it from being tested and increase the ++ * threshold for its testing ++ */ ++ if (x->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1)) + { +- if (cpi->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] * cpi->mode_test_hit_counts[mode_index]) ++ if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] * x->mode_test_hit_counts[mode_index]) + { +- // Increase the threshold for coding this mode to make it less likely to be chosen +- cpi->rd_thresh_mult[mode_index] += 4; ++ /* Increase the threshold for coding this mode to make it ++ * less likely to be chosen ++ */ ++ x->rd_thresh_mult[mode_index] += 4; + +- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) +- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; ++ if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) ++ x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; + +- cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; ++ x->rd_threshes[mode_index] = ++ (cpi->rd_baseline_thresh[mode_index] >> 7) * ++ x->rd_thresh_mult[mode_index]; + + continue; + } + } + +- // We have now reached the point where we are going to test the current mode so increment the counter for the number of times it has been tested +- cpi->mode_test_hit_counts[mode_index] ++; ++ /* We have now reached the point where we are going to test the ++ * current mode so increment the counter for the number of times ++ * it has been tested ++ */ ++ x->mode_test_hit_counts[mode_index] ++; + +- // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise +- if (cpi->zbin_mode_boost_enabled) ++ /* Experimental code. Special case for gf and arf zeromv modes. ++ * Increase zbin size to supress noise ++ */ ++ if (x->zbin_mode_boost_enabled) + { + if ( this_ref_frame == INTRA_FRAME ) +- cpi->zbin_mode_boost = 0; ++ x->zbin_mode_boost = 0; + else + { + if (vp8_mode_order[mode_index] == ZEROMV) + { + if (this_ref_frame != LAST_FRAME) +- cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; ++ x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; + else +- cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; ++ x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; + } + else if (vp8_mode_order[mode_index] == SPLITMV) +- cpi->zbin_mode_boost = 0; ++ x->zbin_mode_boost = 0; + else +- cpi->zbin_mode_boost = MV_ZBIN_BOOST; ++ x->zbin_mode_boost = MV_ZBIN_BOOST; + } + + vp8_update_zbin_extra(cpi, x); +@@ -2091,7 +2126,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + + if(!uv_intra_done && this_ref_frame == INTRA_FRAME) + { +- rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, ++ rd_pick_intra_mbuv_mode(x, &uv_intra_rate, + &uv_intra_rate_tokenonly, + &uv_intra_distortion); + uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode; +@@ -2113,9 +2148,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + { + int tmp_rd; + +- // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED]; ++ /* Note the rate value returned here includes the cost of ++ * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED] ++ */ + int distortion; +- tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rd.rate_y, &distortion, best_mode.yrd); ++ tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion, best_mode.yrd); + rd.rate2 += rate; + rd.distortion2 += distortion; + +@@ -2140,8 +2177,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + int this_rd_thresh; + int distortion; + +- this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ? cpi->rd_threshes[THR_NEW1] : cpi->rd_threshes[THR_NEW3]; +- this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ? cpi->rd_threshes[THR_NEW2] : this_rd_thresh; ++ this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ? ++ x->rd_threshes[THR_NEW1] : x->rd_threshes[THR_NEW3]; ++ this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ? ++ x->rd_threshes[THR_NEW2] : this_rd_thresh; + + tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, + best_mode.yrd, mdcounts, +@@ -2150,10 +2189,12 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + rd.rate2 += rate; + rd.distortion2 += distortion; + +- // If even the 'Y' rd value of split is higher than best so far then dont bother looking at UV ++ /* If even the 'Y' rd value of split is higher than best so far ++ * then dont bother looking at UV ++ */ + if (tmp_rd < best_mode.yrd) + { +- // Now work out UV cost and add it in ++ /* Now work out UV cost and add it in */ + rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, cpi->common.full_pixel); + rd.rate2 += rd.rate_uv; + rd.distortion2 += rd.distortion_uv; +@@ -2225,7 +2266,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + mvp_full.as_mv.col = mvp.as_mv.col>>3; + mvp_full.as_mv.row = mvp.as_mv.row>>3; + +- // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. ++ /* Get intersection of UMV window and valid MV window to ++ * reduce # of checks in diamond search. ++ */ + if (x->mv_col_min < col_min ) + x->mv_col_min = col_min; + if (x->mv_col_max > col_max ) +@@ -2235,11 +2278,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + if (x->mv_row_max > row_max ) + x->mv_row_max = row_max; + +- //adjust search range according to sr from mv prediction ++ /* adjust search range according to sr from mv prediction */ + if(sr > step_param) + step_param = sr; + +- // Initial step/diamond search ++ /* Initial step/diamond search */ + { + bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.mv, + step_param, sadpb, &num00, +@@ -2247,7 +2290,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + x->mvcost, &best_ref_mv); + mode_mv[NEWMV].as_int = d->bmi.mv.as_int; + +- // Further step/diamond searches as necessary ++ /* Further step/diamond searches as necessary */ + n = 0; + further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; + +@@ -2293,11 +2336,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + { + int search_range; + +- //It seems not a good way to set search_range. Need further investigation. +- //search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col)); + search_range = 8; + +- //thissme = cpi->full_search_sad(x, b, d, &d->bmi.mv.as_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); + thissme = cpi->refining_search_sad(x, b, d, &d->bmi.mv, sadpb, + search_range, &cpi->fn_ptr[BLOCK_16X16], + x->mvcost, &best_ref_mv); +@@ -2330,24 +2370,31 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + + mode_mv[NEWMV].as_int = d->bmi.mv.as_int; + +- // Add the new motion vector cost to our rolling cost variable ++ /* Add the new motion vector cost to our rolling cost variable */ + rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96); + } + + case NEARESTMV: + case NEARMV: +- // Clip "next_nearest" so that it does not extend to far out of image ++ /* Clip "next_nearest" so that it does not extend to far out ++ * of image ++ */ + vp8_clamp_mv2(&mode_mv[this_mode], xd); + +- // Do not bother proceeding if the vector (from newmv,nearest or near) is 0,0 as this should then be coded using the zeromv mode. ++ /* Do not bother proceeding if the vector (from newmv, nearest ++ * or near) is 0,0 as this should then be coded using the zeromv ++ * mode. ++ */ + if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && (mode_mv[this_mode].as_int == 0)) + continue; + + case ZEROMV: + +- // Trap vectors that reach beyond the UMV borders +- // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point +- // because of the lack of break statements in the previous two cases. ++ /* Trap vectors that reach beyond the UMV borders ++ * Note that ALL New MV, Nearest MV Near MV and Zero MV code ++ * drops through to this point because of the lack of break ++ * statements in the previous two cases. ++ */ + if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || + ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) + continue; +@@ -2365,35 +2412,52 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + disable_skip, uv_intra_tteob, + intra_rd_penalty, cpi, x); + +- // Keep record of best intra distortion ++ /* Keep record of best intra distortion */ + if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) && + (this_rd < best_mode.intra_rd) ) + { + best_mode.intra_rd = this_rd; + *returnintra = rd.distortion2 ; + } +- + #if CONFIG_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity) + { +- // Store the best NEWMV in x for later use in the denoiser. +- // We are restricted to the LAST_FRAME since the denoiser only keeps +- // one filter state. +- if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && +- x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) +- { +- x->e_mbd.best_sse_inter_mode = NEWMV; +- x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; +- x->e_mbd.need_to_clamp_best_mvs = +- x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; +- } ++ unsigned int sse; ++ vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&sse, ++ mode_mv[this_mode]); ++ ++ if (sse < best_rd_sse) ++ best_rd_sse = sse; ++ ++ /* Store for later use by denoiser. */ ++ if (this_mode == ZEROMV && sse < zero_mv_sse ) ++ { ++ zero_mv_sse = sse; ++ x->best_zeromv_reference_frame = ++ x->e_mbd.mode_info_context->mbmi.ref_frame; ++ } ++ ++ /* Store the best NEWMV in x for later use in the denoiser. */ ++ if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && ++ sse < best_sse) ++ { ++ best_sse = sse; ++ vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&best_sse, ++ mode_mv[this_mode]); ++ x->best_sse_inter_mode = NEWMV; ++ x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; ++ x->need_to_clamp_best_mvs = ++ x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; ++ x->best_reference_frame = ++ x->e_mbd.mode_info_context->mbmi.ref_frame; ++ } + } + #endif + +- // Did this mode help.. i.i is it the new best mode ++ /* Did this mode help.. i.i is it the new best mode */ + if (this_rd < best_mode.rd || x->skip) + { +- // Note index of best mode so far ++ /* Note index of best mode so far */ + best_mode_index = mode_index; + *returnrate = rd.rate2; + *returndistortion = rd.distortion2; +@@ -2406,95 +2470,103 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + update_best_mode(&best_mode, this_rd, &rd, other_cost, x); + + +- // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time +- cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; +- cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; ++ /* Testing this mode gave rise to an improvement in best error ++ * score. Lower threshold a bit for next time ++ */ ++ x->rd_thresh_mult[mode_index] = ++ (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? ++ x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; + } + +- // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around. ++ /* If the mode did not help improve the best error case then raise ++ * the threshold for testing that mode next time around. ++ */ + else + { +- cpi->rd_thresh_mult[mode_index] += 4; +- +- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) +- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; ++ x->rd_thresh_mult[mode_index] += 4; + +- cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; ++ if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) ++ x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; + } ++ x->rd_threshes[mode_index] = ++ (cpi->rd_baseline_thresh[mode_index] >> 7) * ++ x->rd_thresh_mult[mode_index]; + + if (x->skip) + break; + + } + +- // Reduce the activation RD thresholds for the best choice mode ++ /* Reduce the activation RD thresholds for the best choice mode */ + if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) + { +- int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2); +- +- cpi->rd_thresh_mult[best_mode_index] = (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT; +- cpi->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index]; +- +- // If we chose a split mode then reset the new MV thresholds as well +- /*if ( vp8_mode_order[best_mode_index] == SPLITMV ) +- { +- best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWMV] >> 4); +- cpi->rd_thresh_mult[THR_NEWMV] = (cpi->rd_thresh_mult[THR_NEWMV] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWMV]-best_adjustment: MIN_THRESHMULT; +- cpi->rd_threshes[THR_NEWMV] = (cpi->rd_baseline_thresh[THR_NEWMV] >> 7) * cpi->rd_thresh_mult[THR_NEWMV]; +- +- best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWG] >> 4); +- cpi->rd_thresh_mult[THR_NEWG] = (cpi->rd_thresh_mult[THR_NEWG] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWG]-best_adjustment: MIN_THRESHMULT; +- cpi->rd_threshes[THR_NEWG] = (cpi->rd_baseline_thresh[THR_NEWG] >> 7) * cpi->rd_thresh_mult[THR_NEWG]; +- +- best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWA] >> 4); +- cpi->rd_thresh_mult[THR_NEWA] = (cpi->rd_thresh_mult[THR_NEWA] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWA]-best_adjustment: MIN_THRESHMULT; +- cpi->rd_threshes[THR_NEWA] = (cpi->rd_baseline_thresh[THR_NEWA] >> 7) * cpi->rd_thresh_mult[THR_NEWA]; +- }*/ +- ++ int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2); ++ ++ x->rd_thresh_mult[best_mode_index] = ++ (x->rd_thresh_mult[best_mode_index] >= ++ (MIN_THRESHMULT + best_adjustment)) ? ++ x->rd_thresh_mult[best_mode_index] - best_adjustment : ++ MIN_THRESHMULT; ++ x->rd_threshes[best_mode_index] = ++ (cpi->rd_baseline_thresh[best_mode_index] >> 7) * ++ x->rd_thresh_mult[best_mode_index]; + } + +- // Note how often each mode chosen as best ++ /* Note how often each mode chosen as best */ + cpi->mode_chosen_counts[best_mode_index] ++; + + #if CONFIG_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity) + { +- if (x->e_mbd.best_sse_inter_mode == DC_PRED) { +- // No best MV found. +- x->e_mbd.best_sse_inter_mode = best_mode.mbmode.mode; +- x->e_mbd.best_sse_mv = best_mode.mbmode.mv; +- x->e_mbd.need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs; +- } +- +- // TODO(holmer): No SSEs are calculated in rdopt.c. What else can be used? +- vp8_denoiser_denoise_mb(&cpi->denoiser, x, 0, 0, +- recon_yoffset, recon_uvoffset); +- // Reevalute ZEROMV if the current mode is INTRA. +- if (best_mode.mbmode.ref_frame == INTRA_FRAME) +- { +- int this_rd = INT_MAX; +- int disable_skip = 0; +- int other_cost = 0; +- vpx_memset(&rd, 0, sizeof(rd)); +- x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME; +- rd.rate2 += x->ref_frame_cost[LAST_FRAME]; +- rd.rate2 += vp8_cost_mv_ref(ZEROMV, mdcounts); +- x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; +- x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; +- x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; +- this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x); +- this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost, +- disable_skip, uv_intra_tteob, +- intra_rd_penalty, cpi, x); +- if (this_rd < best_mode.rd || x->skip) ++ if (x->best_sse_inter_mode == DC_PRED) + { +- // Note index of best mode so far +- best_mode_index = mode_index; +- *returnrate = rd.rate2; +- *returndistortion = rd.distortion2; +- update_best_mode(&best_mode, this_rd, &rd, other_cost, x); ++ /* No best MV found. */ ++ x->best_sse_inter_mode = best_mode.mbmode.mode; ++ x->best_sse_mv = best_mode.mbmode.mv; ++ x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs; ++ x->best_reference_frame = best_mode.mbmode.ref_frame; ++ best_sse = best_rd_sse; ++ } ++ vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, ++ recon_yoffset, recon_uvoffset); ++ ++ ++ /* Reevaluate ZEROMV after denoising. */ ++ if (best_mode.mbmode.ref_frame == INTRA_FRAME && ++ x->best_zeromv_reference_frame != INTRA_FRAME) ++ { ++ int this_rd = INT_MAX; ++ int disable_skip = 0; ++ int other_cost = 0; ++ int this_ref_frame = x->best_zeromv_reference_frame; ++ rd.rate2 = x->ref_frame_cost[this_ref_frame] + ++ vp8_cost_mv_ref(ZEROMV, mdcounts); ++ rd.distortion2 = 0; ++ ++ /* set up the proper prediction buffers for the frame */ ++ x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; ++ x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; ++ x->e_mbd.pre.u_buffer = plane[this_ref_frame][1]; ++ x->e_mbd.pre.v_buffer = plane[this_ref_frame][2]; ++ ++ x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; ++ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; ++ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; ++ ++ this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x); ++ this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost, ++ disable_skip, uv_intra_tteob, ++ intra_rd_penalty, cpi, x); ++ if (this_rd < best_mode.rd || x->skip) ++ { ++ /* Note index of best mode so far */ ++ best_mode_index = mode_index; ++ *returnrate = rd.rate2; ++ *returndistortion = rd.distortion2; ++ update_best_mode(&best_mode, this_rd, &rd, other_cost, x); ++ } + } +- } ++ + } + #endif + +@@ -2512,7 +2584,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + } + + +- // macroblock modes ++ /* macroblock modes */ + vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO)); + + if (best_mode.mbmode.mode == B_PRED) +@@ -2539,7 +2611,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + rd_update_mvcount(cpi, x, &best_ref_mv); + } + +-void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) ++void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_) + { + int error4x4, error16x16; + int rate4x4, rate16x16 = 0, rateuv; +@@ -2551,15 +2623,13 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) + + x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; + +- rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv); ++ rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv); + rate = rateuv; + +- error16x16 = rd_pick_intra16x16mby_mode(cpi, x, +- &rate16x16, &rate16x16_tokenonly, ++ error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly, + &dist16x16); + +- error4x4 = rd_pick_intra4x4mby_modes(cpi, x, +- &rate4x4, &rate4x4_tokenonly, ++ error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly, + &dist4x4, error16x16); + + if (error4x4 < error16x16) +diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h +index db939f9..1e11fa7 100644 +--- a/vp8/encoder/rdopt.h ++++ b/vp8/encoder/rdopt.h +@@ -65,9 +65,9 @@ static void insertsortsad(int arr[],int idx[], int len) + } + } + +-extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue); ++extern void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue); + extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra); +-extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate); ++extern void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate); + + + static void get_plane_pointers(const YV12_BUFFER_CONFIG *fb, +@@ -86,15 +86,15 @@ static void get_predictor_pointers(const VP8_COMP *cpi, + unsigned int recon_yoffset, + unsigned int recon_uvoffset) + { +- if (cpi->ref_frame_flags & VP8_LAST_FLAG) ++ if (cpi->ref_frame_flags & VP8_LAST_FRAME) + get_plane_pointers(&cpi->common.yv12_fb[cpi->common.lst_fb_idx], + plane[LAST_FRAME], recon_yoffset, recon_uvoffset); + +- if (cpi->ref_frame_flags & VP8_GOLD_FLAG) ++ if (cpi->ref_frame_flags & VP8_GOLD_FRAME) + get_plane_pointers(&cpi->common.yv12_fb[cpi->common.gld_fb_idx], + plane[GOLDEN_FRAME], recon_yoffset, recon_uvoffset); + +- if (cpi->ref_frame_flags & VP8_ALT_FLAG) ++ if (cpi->ref_frame_flags & VP8_ALTR_FRAME) + get_plane_pointers(&cpi->common.yv12_fb[cpi->common.alt_fb_idx], + plane[ALTREF_FRAME], recon_yoffset, recon_uvoffset); + } +@@ -106,11 +106,11 @@ static void get_reference_search_order(const VP8_COMP *cpi, + int i=0; + + ref_frame_map[i++] = INTRA_FRAME; +- if (cpi->ref_frame_flags & VP8_LAST_FLAG) ++ if (cpi->ref_frame_flags & VP8_LAST_FRAME) + ref_frame_map[i++] = LAST_FRAME; +- if (cpi->ref_frame_flags & VP8_GOLD_FLAG) ++ if (cpi->ref_frame_flags & VP8_GOLD_FRAME) + ref_frame_map[i++] = GOLDEN_FRAME; +- if (cpi->ref_frame_flags & VP8_ALT_FLAG) ++ if (cpi->ref_frame_flags & VP8_ALTR_FRAME) + ref_frame_map[i++] = ALTREF_FRAME; + for(; i<4; i++) + ref_frame_map[i] = -1; +diff --git a/vp8/encoder/segmentation.c b/vp8/encoder/segmentation.c +index fc0967d..37972e2 100644 +--- a/vp8/encoder/segmentation.c ++++ b/vp8/encoder/segmentation.c +@@ -22,22 +22,24 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x) + + if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame)) + { +- // Reset Gf useage monitors ++ /* Reset Gf useage monitors */ + vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); + cpi->gf_active_count = cm->mb_rows * cm->mb_cols; + } + else + { +- // for each macroblock row in image ++ /* for each macroblock row in image */ + for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) + { +- // for each macroblock col in image ++ /* for each macroblock col in image */ + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + +- // If using golden then set GF active flag if not already set. +- // If using last frame 0,0 mode then leave flag as it is +- // else if using non 0,0 motion or intra modes then clear flag if it is currently set ++ /* If using golden then set GF active flag if not already set. ++ * If using last frame 0,0 mode then leave flag as it is ++ * else if using non 0,0 motion or intra modes then clear ++ * flag if it is currently set ++ */ + if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME)) + { + if (*(x->gf_active_ptr) == 0) +@@ -52,12 +54,12 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x) + cpi->gf_active_count--; + } + +- x->gf_active_ptr++; // Step onto next entry +- this_mb_mode_info++; // skip to next mb ++ x->gf_active_ptr++; /* Step onto next entry */ ++ this_mb_mode_info++; /* skip to next mb */ + + } + +- // this is to account for the border ++ /* this is to account for the border */ + this_mb_mode_info++; + } + } +diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c +index 6c61b36..b83ae89 100644 +--- a/vp8/encoder/temporal_filter.c ++++ b/vp8/encoder/temporal_filter.c +@@ -30,8 +30,8 @@ + #include + #include + +-#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering +-#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering ++#define ALT_REF_MC_ENABLED 1 /* dis/enable MC in AltRef filtering */ ++#define ALT_REF_SUBPEL_ENABLED 1 /* dis/enable subpel in MC AltRef filtering */ + + #if VP8_TEMPORAL_ALT_REF + +@@ -50,7 +50,7 @@ static void vp8_temporal_filter_predictors_mb_c + int offset; + unsigned char *yptr, *uptr, *vptr; + +- // Y ++ /* Y */ + yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3); + + if ((mv_row | mv_col) & 7) +@@ -63,7 +63,7 @@ static void vp8_temporal_filter_predictors_mb_c + vp8_copy_mem16x16(yptr, stride, &pred[0], 16); + } + +- // U & V ++ /* U & V */ + mv_row >>= 1; + mv_col >>= 1; + stride = (stride + 1) >> 1; +@@ -109,9 +109,10 @@ void vp8_temporal_filter_apply_c + int pixel_value = *frame2++; + + modifier = src_byte - pixel_value; +- // This is an integer approximation of: +- // float coeff = (3.0 * modifer * modifier) / pow(2, strength); +- // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); ++ /* This is an integer approximation of: ++ * float coeff = (3.0 * modifer * modifier) / pow(2, strength); ++ * modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); ++ */ + modifier *= modifier; + modifier *= 3; + modifier += 1 << (strength - 1); +@@ -134,7 +135,6 @@ void vp8_temporal_filter_apply_c + } + + #if ALT_REF_MC_ENABLED +-static int dummy_cost[2*mv_max+1]; + + static int vp8_temporal_filter_find_matching_mb_c + ( +@@ -155,10 +155,7 @@ static int vp8_temporal_filter_find_matching_mb_c + int_mv best_ref_mv1; + int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ + +- int *mvcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] }; +- int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] }; +- +- // Save input state ++ /* Save input state */ + unsigned char **base_src = b->base_src; + int src = b->src; + int src_stride = b->src_stride; +@@ -170,7 +167,7 @@ static int vp8_temporal_filter_find_matching_mb_c + best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >>3; + best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >>3; + +- // Setup frame pointers ++ /* Setup frame pointers */ + b->base_src = &arf_frame->y_buffer; + b->src_stride = arf_frame->y_stride; + b->src = mb_offset; +@@ -179,7 +176,7 @@ static int vp8_temporal_filter_find_matching_mb_c + x->e_mbd.pre.y_stride = frame_ptr->y_stride; + d->offset = mb_offset; + +- // Further step/diamond searches as necessary ++ /* Further step/diamond searches as necessary */ + if (cpi->Speed < 8) + { + step_param = cpi->sf.first_step + (cpi->Speed > 5); +@@ -189,29 +186,29 @@ static int vp8_temporal_filter_find_matching_mb_c + step_param = cpi->sf.first_step + 2; + } + +- /*cpi->sf.search_method == HEX*/ +- // TODO Check that the 16x16 vf & sdf are selected here +- bestsme = vp8_hex_search(x, b, d, +- &best_ref_mv1_full, &d->bmi.mv, +- step_param, +- sadpb, +- &cpi->fn_ptr[BLOCK_16X16], +- mvsadcost, mvcost, &best_ref_mv1); ++ /* TODO Check that the 16x16 vf & sdf are selected here */ ++ /* Ignore mv costing by sending NULL cost arrays */ ++ bestsme = vp8_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.mv, ++ step_param, sadpb, ++ &cpi->fn_ptr[BLOCK_16X16], ++ NULL, NULL, &best_ref_mv1); + + #if ALT_REF_SUBPEL_ENABLED +- // Try sub-pixel MC? +- //if (bestsme > error_thresh && bestsme < INT_MAX) ++ /* Try sub-pixel MC? */ + { + int distortion; + unsigned int sse; ++ /* Ignore mv costing by sending NULL cost array */ + bestsme = cpi->find_fractional_mv_step(x, b, d, +- &d->bmi.mv, &best_ref_mv1, +- x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], +- mvcost, &distortion, &sse); ++ &d->bmi.mv, ++ &best_ref_mv1, ++ x->errorperbit, ++ &cpi->fn_ptr[BLOCK_16X16], ++ NULL, &distortion, &sse); + } + #endif + +- // Save input state ++ /* Save input state */ + b->base_src = base_src; + b->src = src; + b->src_stride = src_stride; +@@ -246,7 +243,7 @@ static void vp8_temporal_filter_iterate_c + unsigned char *dst1, *dst2; + DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16*16 + 8*8 + 8*8); + +- // Save input state ++ /* Save input state */ + unsigned char *y_buffer = mbd->pre.y_buffer; + unsigned char *u_buffer = mbd->pre.u_buffer; + unsigned char *v_buffer = mbd->pre.v_buffer; +@@ -254,16 +251,17 @@ static void vp8_temporal_filter_iterate_c + for (mb_row = 0; mb_row < mb_rows; mb_row++) + { + #if ALT_REF_MC_ENABLED +- // Source frames are extended to 16 pixels. This is different than +- // L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS) +- // A 6 tap filter is used for motion search. This requires 2 pixels +- // before and 3 pixels after. So the largest Y mv on a border would +- // then be 16 - 3. The UV blocks are half the size of the Y and +- // therefore only extended by 8. The largest mv that a UV block +- // can support is 8 - 3. A UV mv is half of a Y mv. +- // (16 - 3) >> 1 == 6 which is greater than 8 - 3. +- // To keep the mv in play for both Y and UV planes the max that it +- // can be on a border is therefore 16 - 5. ++ /* Source frames are extended to 16 pixels. This is different than ++ * L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS) ++ * A 6 tap filter is used for motion search. This requires 2 pixels ++ * before and 3 pixels after. So the largest Y mv on a border would ++ * then be 16 - 3. The UV blocks are half the size of the Y and ++ * therefore only extended by 8. The largest mv that a UV block ++ * can support is 8 - 3. A UV mv is half of a Y mv. ++ * (16 - 3) >> 1 == 6 which is greater than 8 - 3. ++ * To keep the mv in play for both Y and UV planes the max that it ++ * can be on a border is therefore 16 - 5. ++ */ + cpi->mb.mv_row_min = -((mb_row * 16) + (16 - 5)); + cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16) + + (16 - 5); +@@ -285,36 +283,41 @@ static void vp8_temporal_filter_iterate_c + + for (frame = 0; frame < frame_count; frame++) + { +- int err = 0; +- + if (cpi->frames[frame] == NULL) + continue; + + mbd->block[0].bmi.mv.as_mv.row = 0; + mbd->block[0].bmi.mv.as_mv.col = 0; + ++ if (frame == alt_ref_index) ++ { ++ filter_weight = 2; ++ } ++ else ++ { ++ int err = 0; + #if ALT_REF_MC_ENABLED + #define THRESH_LOW 10000 + #define THRESH_HIGH 20000 +- +- // Find best match in this frame by MC +- err = vp8_temporal_filter_find_matching_mb_c +- (cpi, +- cpi->frames[alt_ref_index], +- cpi->frames[frame], +- mb_y_offset, +- THRESH_LOW); +- ++ /* Find best match in this frame by MC */ ++ err = vp8_temporal_filter_find_matching_mb_c ++ (cpi, ++ cpi->frames[alt_ref_index], ++ cpi->frames[frame], ++ mb_y_offset, ++ THRESH_LOW); + #endif +- // Assign higher weight to matching MB if it's error +- // score is lower. If not applying MC default behavior +- // is to weight all MBs equal. +- filter_weight = errframes[frame]->y_buffer + mb_y_offset, +@@ -325,7 +328,7 @@ static void vp8_temporal_filter_iterate_c + mbd->block[0].bmi.mv.as_mv.col, + predictor); + +- // Apply the filter (YUV) ++ /* Apply the filter (YUV) */ + vp8_temporal_filter_apply + (f->y_buffer + mb_y_offset, + f->y_stride, +@@ -358,7 +361,7 @@ static void vp8_temporal_filter_iterate_c + } + } + +- // Normalize filter output to produce AltRef frame ++ /* Normalize filter output to produce AltRef frame */ + dst1 = cpi->alt_ref_buffer.y_buffer; + stride = cpi->alt_ref_buffer.y_stride; + byte = mb_y_offset; +@@ -372,7 +375,7 @@ static void vp8_temporal_filter_iterate_c + + dst1[byte] = (unsigned char)pval; + +- // move to next pixel ++ /* move to next pixel */ + byte++; + } + +@@ -389,19 +392,19 @@ static void vp8_temporal_filter_iterate_c + { + int m=k+64; + +- // U ++ /* U */ + unsigned int pval = accumulator[k] + (count[k] >> 1); + pval *= cpi->fixed_divide[count[k]]; + pval >>= 19; + dst1[byte] = (unsigned char)pval; + +- // V ++ /* V */ + pval = accumulator[m] + (count[m] >> 1); + pval *= cpi->fixed_divide[count[m]]; + pval >>= 19; + dst2[byte] = (unsigned char)pval; + +- // move to next pixel ++ /* move to next pixel */ + byte++; + } + +@@ -416,7 +419,7 @@ static void vp8_temporal_filter_iterate_c + mb_uv_offset += 8*(f->uv_stride-mb_cols); + } + +- // Restore input state ++ /* Restore input state */ + mbd->pre.y_buffer = y_buffer; + mbd->pre.u_buffer = u_buffer; + mbd->pre.v_buffer = v_buffer; +@@ -450,8 +453,7 @@ void vp8_temporal_filter_prepare_c + switch (blur_type) + { + case 1: +- ///////////////////////////////////////// +- // Backward Blur ++ /* Backward Blur */ + + frames_to_blur_backward = num_frames_backward; + +@@ -462,8 +464,7 @@ void vp8_temporal_filter_prepare_c + break; + + case 2: +- ///////////////////////////////////////// +- // Forward Blur ++ /* Forward Blur */ + + frames_to_blur_forward = num_frames_forward; + +@@ -475,8 +476,7 @@ void vp8_temporal_filter_prepare_c + + case 3: + default: +- ///////////////////////////////////////// +- // Center Blur ++ /* Center Blur */ + frames_to_blur_forward = num_frames_forward; + frames_to_blur_backward = num_frames_backward; + +@@ -486,7 +486,7 @@ void vp8_temporal_filter_prepare_c + if (frames_to_blur_backward > frames_to_blur_forward) + frames_to_blur_backward = frames_to_blur_forward; + +- // When max_frames is even we have 1 more frame backward than forward ++ /* When max_frames is even we have 1 more frame backward than forward */ + if (frames_to_blur_forward > (max_frames - 1) / 2) + frames_to_blur_forward = ((max_frames - 1) / 2); + +@@ -499,21 +499,7 @@ void vp8_temporal_filter_prepare_c + + start_frame = distance + frames_to_blur_forward; + +-#ifdef DEBUGFWG +- // DEBUG FWG +- printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d" +- , max_frames +- , num_frames_backward +- , num_frames_forward +- , frames_to_blur +- , frames_to_blur_backward +- , frames_to_blur_forward +- , cpi->source_encode_index +- , cpi->last_alt_ref_sei +- , start_frame); +-#endif +- +- // Setup frame pointers, NULL indicates frame not included in filter ++ /* Setup frame pointers, NULL indicates frame not included in filter */ + vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *)); + for (frame = 0; frame < frames_to_blur; frame++) + { +diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c +index ef41fa8..3b5268b 100644 +--- a/vp8/encoder/tokenize.c ++++ b/vp8/encoder/tokenize.c +@@ -23,7 +23,7 @@ + #ifdef ENTROPY_STATS + _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; + #endif +-void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; ++void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ; + void vp8_fix_contexts(MACROBLOCKD *x); + + #include "dct_value_tokens.h" +@@ -102,11 +102,12 @@ static void fill_value_tokens() + + static void tokenize2nd_order_b + ( +- MACROBLOCKD *x, ++ MACROBLOCK *x, + TOKENEXTRA **tp, + VP8_COMP *cpi + ) + { ++ MACROBLOCKD *xd = &x->e_mbd; + int pt; /* near block/prev token context index */ + int c; /* start at DC */ + TOKENEXTRA *t = *tp;/* store tokens starting here */ +@@ -117,11 +118,11 @@ static void tokenize2nd_order_b + int band, rc, v, token; + int eob; + +- b = x->block + 24; ++ b = xd->block + 24; + qcoeff_ptr = b->qcoeff; +- a = (ENTROPY_CONTEXT *)x->above_context + 8; +- l = (ENTROPY_CONTEXT *)x->left_context + 8; +- eob = x->eobs[24]; ++ a = (ENTROPY_CONTEXT *)xd->above_context + 8; ++ l = (ENTROPY_CONTEXT *)xd->left_context + 8; ++ eob = xd->eobs[24]; + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + + if(!eob) +@@ -131,7 +132,7 @@ static void tokenize2nd_order_b + t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt]; + t->skip_eob_node = 0; + +- ++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN]; ++ ++x->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN]; + t++; + *tp = t; + *a = *l = 0; +@@ -145,7 +146,7 @@ static void tokenize2nd_order_b + + t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt]; + t->skip_eob_node = 0; +- ++cpi->coef_counts [1] [0] [pt] [token]; ++ ++x->coef_counts [1] [0] [pt] [token]; + pt = vp8_prev_token_class[token]; + t++; + c = 1; +@@ -164,7 +165,7 @@ static void tokenize2nd_order_b + + t->skip_eob_node = ((pt == 0)); + +- ++cpi->coef_counts [1] [band] [pt] [token]; ++ ++x->coef_counts [1] [band] [pt] [token]; + + pt = vp8_prev_token_class[token]; + t++; +@@ -177,7 +178,7 @@ static void tokenize2nd_order_b + + t->skip_eob_node = 0; + +- ++cpi->coef_counts [1] [band] [pt] [DCT_EOB_TOKEN]; ++ ++x->coef_counts [1] [band] [pt] [DCT_EOB_TOKEN]; + + t++; + } +@@ -189,12 +190,13 @@ static void tokenize2nd_order_b + + static void tokenize1st_order_b + ( +- MACROBLOCKD *x, ++ MACROBLOCK *x, + TOKENEXTRA **tp, + int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + VP8_COMP *cpi + ) + { ++ MACROBLOCKD *xd = &x->e_mbd; + unsigned int block; + const BLOCKD *b; + int pt; /* near block/prev token context index */ +@@ -207,15 +209,15 @@ static void tokenize1st_order_b + int band, rc, v; + int tmp1, tmp2; + +- b = x->block; ++ b = xd->block; + /* Luma */ + for (block = 0; block < 16; block++, b++) + { + tmp1 = vp8_block2above[block]; + tmp2 = vp8_block2left[block]; + qcoeff_ptr = b->qcoeff; +- a = (ENTROPY_CONTEXT *)x->above_context + tmp1; +- l = (ENTROPY_CONTEXT *)x->left_context + tmp2; ++ a = (ENTROPY_CONTEXT *)xd->above_context + tmp1; ++ l = (ENTROPY_CONTEXT *)xd->left_context + tmp2; + + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + +@@ -228,7 +230,7 @@ static void tokenize1st_order_b + t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt]; + t->skip_eob_node = 0; + +- ++cpi->coef_counts [type] [c] [pt] [DCT_EOB_TOKEN]; ++ ++x->coef_counts [type] [c] [pt] [DCT_EOB_TOKEN]; + t++; + *tp = t; + *a = *l = 0; +@@ -243,7 +245,7 @@ static void tokenize1st_order_b + + t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt]; + t->skip_eob_node = 0; +- ++cpi->coef_counts [type] [c] [pt] [token]; ++ ++x->coef_counts [type] [c] [pt] [token]; + pt = vp8_prev_token_class[token]; + t++; + c++; +@@ -261,7 +263,7 @@ static void tokenize1st_order_b + t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; + + t->skip_eob_node = (pt == 0); +- ++cpi->coef_counts [type] [band] [pt] [token]; ++ ++x->coef_counts [type] [band] [pt] [token]; + + pt = vp8_prev_token_class[token]; + t++; +@@ -273,7 +275,7 @@ static void tokenize1st_order_b + t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; + + t->skip_eob_node = 0; +- ++cpi->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN]; ++ ++x->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN]; + + t++; + } +@@ -287,8 +289,8 @@ static void tokenize1st_order_b + tmp1 = vp8_block2above[block]; + tmp2 = vp8_block2left[block]; + qcoeff_ptr = b->qcoeff; +- a = (ENTROPY_CONTEXT *)x->above_context + tmp1; +- l = (ENTROPY_CONTEXT *)x->left_context + tmp2; ++ a = (ENTROPY_CONTEXT *)xd->above_context + tmp1; ++ l = (ENTROPY_CONTEXT *)xd->left_context + tmp2; + + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + +@@ -299,7 +301,7 @@ static void tokenize1st_order_b + t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt]; + t->skip_eob_node = 0; + +- ++cpi->coef_counts [2] [0] [pt] [DCT_EOB_TOKEN]; ++ ++x->coef_counts [2] [0] [pt] [DCT_EOB_TOKEN]; + t++; + *tp = t; + *a = *l = 0; +@@ -314,7 +316,7 @@ static void tokenize1st_order_b + + t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt]; + t->skip_eob_node = 0; +- ++cpi->coef_counts [2] [0] [pt] [token]; ++ ++x->coef_counts [2] [0] [pt] [token]; + pt = vp8_prev_token_class[token]; + t++; + c = 1; +@@ -333,7 +335,7 @@ static void tokenize1st_order_b + + t->skip_eob_node = (pt == 0); + +- ++cpi->coef_counts [2] [band] [pt] [token]; ++ ++x->coef_counts [2] [band] [pt] [token]; + + pt = vp8_prev_token_class[token]; + t++; +@@ -346,7 +348,7 @@ static void tokenize1st_order_b + + t->skip_eob_node = 0; + +- ++cpi->coef_counts [2] [band] [pt] [DCT_EOB_TOKEN]; ++ ++x->coef_counts [2] [band] [pt] [DCT_EOB_TOKEN]; + + t++; + } +@@ -374,16 +376,18 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block) + } + + +-void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ++void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) + { ++ MACROBLOCKD *xd = &x->e_mbd; + int plane_type; + int has_y2_block; + +- has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED +- && x->mode_info_context->mbmi.mode != SPLITMV); ++ has_y2_block = (xd->mode_info_context->mbmi.mode != B_PRED ++ && xd->mode_info_context->mbmi.mode != SPLITMV); + +- x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x, has_y2_block); +- if (x->mode_info_context->mbmi.mb_skip_coeff) ++ xd->mode_info_context->mbmi.mb_skip_coeff = ++ mb_is_skippable(xd, has_y2_block); ++ if (xd->mode_info_context->mbmi.mb_skip_coeff) + { + if (!cpi->common.mb_no_coeff_skip) + { +@@ -391,8 +395,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) + } + else + { +- vp8_fix_contexts(x); +- cpi->skip_true_count++; ++ vp8_fix_contexts(xd); ++ x->skip_true_count++; + } + + return; +@@ -488,7 +492,8 @@ static void stuff2nd_order_b + TOKENEXTRA **tp, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, +- VP8_COMP *cpi ++ VP8_COMP *cpi, ++ MACROBLOCK *x + ) + { + int pt; /* near block/prev token context index */ +@@ -498,13 +503,12 @@ static void stuff2nd_order_b + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt]; + t->skip_eob_node = 0; +- ++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN]; ++ ++x->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN]; + ++t; + + *tp = t; + pt = 0; + *a = *l = pt; +- + } + + static void stuff1st_order_b +@@ -513,7 +517,8 @@ static void stuff1st_order_b + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + int type, +- VP8_COMP *cpi ++ VP8_COMP *cpi, ++ MACROBLOCK *x + ) + { + int pt; /* near block/prev token context index */ +@@ -524,20 +529,21 @@ static void stuff1st_order_b + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; + t->skip_eob_node = 0; +- ++cpi->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN]; ++ ++x->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN]; + ++t; + *tp = t; + pt = 0; /* 0 <-> all coeff data is zero */ + *a = *l = pt; +- + } ++ + static + void stuff1st_order_buv + ( + TOKENEXTRA **tp, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, +- VP8_COMP *cpi ++ VP8_COMP *cpi, ++ MACROBLOCK *x + ) + { + int pt; /* near block/prev token context index */ +@@ -547,38 +553,38 @@ void stuff1st_order_buv + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt]; + t->skip_eob_node = 0; +- ++cpi->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN]; ++ ++x->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN]; + ++t; + *tp = t; + pt = 0; /* 0 <-> all coeff data is zero */ + *a = *l = pt; +- + } + +-void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ++void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) + { +- ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; +- ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; ++ MACROBLOCKD *xd = &x->e_mbd; ++ ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)xd->above_context; ++ ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)xd->left_context; + int plane_type; + int b; + plane_type = 3; +- if((x->mode_info_context->mbmi.mode != B_PRED +- && x->mode_info_context->mbmi.mode != SPLITMV)) ++ if((xd->mode_info_context->mbmi.mode != B_PRED ++ && xd->mode_info_context->mbmi.mode != SPLITMV)) + { + stuff2nd_order_b(t, +- A + vp8_block2above[24], L + vp8_block2left[24], cpi); ++ A + vp8_block2above[24], L + vp8_block2left[24], cpi, x); + plane_type = 0; + } + + for (b = 0; b < 16; b++) + stuff1st_order_b(t, + A + vp8_block2above[b], +- L + vp8_block2left[b], plane_type, cpi); ++ L + vp8_block2left[b], plane_type, cpi, x); + + for (b = 16; b < 24; b++) + stuff1st_order_buv(t, + A + vp8_block2above[b], +- L + vp8_block2left[b], cpi); ++ L + vp8_block2left[b], cpi, x); + + } + void vp8_fix_contexts(MACROBLOCKD *x) +diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm +index f07b030..6f188cb 100644 +--- a/vp8/encoder/x86/dct_mmx.asm ++++ b/vp8/encoder/x86/dct_mmx.asm +@@ -12,7 +12,7 @@ + %include "vpx_ports/x86_abi_support.asm" + + ;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch) +-global sym(vp8_short_fdct4x4_mmx) ++global sym(vp8_short_fdct4x4_mmx) PRIVATE + sym(vp8_short_fdct4x4_mmx): + push rbp + mov rbp, rsp +diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm +index 3d52a5d..d880ce0 100644 +--- a/vp8/encoder/x86/dct_sse2.asm ++++ b/vp8/encoder/x86/dct_sse2.asm +@@ -61,7 +61,7 @@ + %endmacro + + ;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch) +-global sym(vp8_short_fdct4x4_sse2) ++global sym(vp8_short_fdct4x4_sse2) PRIVATE + sym(vp8_short_fdct4x4_sse2): + + STACK_FRAME_CREATE +@@ -166,7 +166,7 @@ sym(vp8_short_fdct4x4_sse2): + STACK_FRAME_DESTROY + + ;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch) +-global sym(vp8_short_fdct8x4_sse2) ++global sym(vp8_short_fdct8x4_sse2) PRIVATE + sym(vp8_short_fdct8x4_sse2): + + STACK_FRAME_CREATE +diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c +new file mode 100644 +index 0000000..c1ac6c1 +--- /dev/null ++++ b/vp8/encoder/x86/denoising_sse2.c +@@ -0,0 +1,119 @@ ++/* ++ * Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++ * ++ * Use of this source code is governed by a BSD-style license ++ * that can be found in the LICENSE file in the root of the source ++ * tree. An additional intellectual property rights grant can be found ++ * in the file PATENTS. All contributing project authors may ++ * be found in the AUTHORS file in the root of the source tree. ++ */ ++ ++#include "vp8/encoder/denoising.h" ++#include "vp8/common/reconinter.h" ++#include "vpx/vpx_integer.h" ++#include "vpx_mem/vpx_mem.h" ++#include "vpx_rtcd.h" ++ ++#include ++ ++union sum_union { ++ __m128i v; ++ signed char e[16]; ++}; ++ ++int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg, ++ YV12_BUFFER_CONFIG *running_avg, ++ MACROBLOCK *signal, unsigned int motion_magnitude, ++ int y_offset, int uv_offset) ++{ ++ unsigned char *sig = signal->thismb; ++ int sig_stride = 16; ++ unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; ++ int mc_avg_y_stride = mc_running_avg->y_stride; ++ unsigned char *running_avg_y = running_avg->y_buffer + y_offset; ++ int avg_y_stride = running_avg->y_stride; ++ int r; ++ __m128i acc_diff = _mm_setzero_si128(); ++ const __m128i k_0 = _mm_setzero_si128(); ++ const __m128i k_4 = _mm_set1_epi8(4); ++ const __m128i k_8 = _mm_set1_epi8(8); ++ const __m128i k_16 = _mm_set1_epi8(16); ++ /* Modify each level's adjustment according to motion_magnitude. */ ++ const __m128i l3 = _mm_set1_epi8( ++ (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 : 6); ++ /* Difference between level 3 and level 2 is 2. */ ++ const __m128i l32 = _mm_set1_epi8(2); ++ /* Difference between level 2 and level 1 is 1. */ ++ const __m128i l21 = _mm_set1_epi8(1); ++ ++ for (r = 0; r < 16; ++r) ++ { ++ /* Calculate differences */ ++ const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0])); ++ const __m128i v_mc_running_avg_y = _mm_loadu_si128( ++ (__m128i *)(&mc_running_avg_y[0])); ++ __m128i v_running_avg_y; ++ const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); ++ const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); ++ /* Obtain the sign. FF if diff is negative. */ ++ const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); ++ /* Clamp absolute difference to 16 to be used to get mask. Doing this ++ * allows us to use _mm_cmpgt_epi8, which operates on signed byte. */ ++ const __m128i clamped_absdiff = _mm_min_epu8( ++ _mm_or_si128(pdiff, ndiff), k_16); ++ /* Get masks for l2 l1 and l0 adjustments */ ++ const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff); ++ const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff); ++ const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff); ++ /* Get adjustments for l2, l1, and l0 */ ++ __m128i adj2 = _mm_and_si128(mask2, l32); ++ const __m128i adj1 = _mm_and_si128(mask1, l21); ++ const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff); ++ __m128i adj, padj, nadj; ++ ++ /* Combine the adjustments and get absolute adjustments. */ ++ adj2 = _mm_add_epi8(adj2, adj1); ++ adj = _mm_sub_epi8(l3, adj2); ++ adj = _mm_andnot_si128(mask0, adj); ++ adj = _mm_or_si128(adj, adj0); ++ ++ /* Restore the sign and get positive and negative adjustments. */ ++ padj = _mm_andnot_si128(diff_sign, adj); ++ nadj = _mm_and_si128(diff_sign, adj); ++ ++ /* Calculate filtered value. */ ++ v_running_avg_y = _mm_adds_epu8(v_sig, padj); ++ v_running_avg_y = _mm_subs_epu8(v_running_avg_y, nadj); ++ _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y); ++ ++ /* Adjustments <=7, and each element in acc_diff can fit in signed ++ * char. ++ */ ++ acc_diff = _mm_adds_epi8(acc_diff, padj); ++ acc_diff = _mm_subs_epi8(acc_diff, nadj); ++ ++ /* Update pointers for next iteration. */ ++ sig += sig_stride; ++ mc_running_avg_y += mc_avg_y_stride; ++ running_avg_y += avg_y_stride; ++ } ++ ++ { ++ /* Compute the sum of all pixel differences of this MB. */ ++ union sum_union s; ++ int sum_diff = 0; ++ s.v = acc_diff; ++ sum_diff = s.e[0] + s.e[1] + s.e[2] + s.e[3] + s.e[4] + s.e[5] ++ + s.e[6] + s.e[7] + s.e[8] + s.e[9] + s.e[10] + s.e[11] ++ + s.e[12] + s.e[13] + s.e[14] + s.e[15]; ++ ++ if (abs(sum_diff) > SUM_DIFF_THRESHOLD) ++ { ++ return COPY_BLOCK; ++ } ++ } ++ ++ vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride, ++ signal->thismb, sig_stride); ++ return FILTER_BLOCK; ++} +diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm +index 7ec7d60..fe26b18 100644 +--- a/vp8/encoder/x86/encodeopt.asm ++++ b/vp8/encoder/x86/encodeopt.asm +@@ -12,7 +12,7 @@ + %include "vpx_ports/x86_abi_support.asm" + + ;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr) +-global sym(vp8_block_error_xmm) ++global sym(vp8_block_error_xmm) PRIVATE + sym(vp8_block_error_xmm): + push rbp + mov rbp, rsp +@@ -60,7 +60,7 @@ sym(vp8_block_error_xmm): + ret + + ;int vp8_block_error_mmx(short *coeff_ptr, short *dcoef_ptr) +-global sym(vp8_block_error_mmx) ++global sym(vp8_block_error_mmx) PRIVATE + sym(vp8_block_error_mmx): + push rbp + mov rbp, rsp +@@ -126,7 +126,7 @@ sym(vp8_block_error_mmx): + + + ;int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); +-global sym(vp8_mbblock_error_mmx_impl) ++global sym(vp8_mbblock_error_mmx_impl) PRIVATE + sym(vp8_mbblock_error_mmx_impl): + push rbp + mov rbp, rsp +@@ -203,7 +203,7 @@ sym(vp8_mbblock_error_mmx_impl): + + + ;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); +-global sym(vp8_mbblock_error_xmm_impl) ++global sym(vp8_mbblock_error_xmm_impl) PRIVATE + sym(vp8_mbblock_error_xmm_impl): + push rbp + mov rbp, rsp +@@ -273,7 +273,7 @@ sym(vp8_mbblock_error_xmm_impl): + + + ;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); +-global sym(vp8_mbuverror_mmx_impl) ++global sym(vp8_mbuverror_mmx_impl) PRIVATE + sym(vp8_mbuverror_mmx_impl): + push rbp + mov rbp, rsp +@@ -330,7 +330,7 @@ sym(vp8_mbuverror_mmx_impl): + + + ;int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); +-global sym(vp8_mbuverror_xmm_impl) ++global sym(vp8_mbuverror_xmm_impl) PRIVATE + sym(vp8_mbuverror_xmm_impl): + push rbp + mov rbp, rsp +diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm +index 71efd56..f498927 100644 +--- a/vp8/encoder/x86/fwalsh_sse2.asm ++++ b/vp8/encoder/x86/fwalsh_sse2.asm +@@ -12,7 +12,7 @@ + %include "vpx_ports/x86_abi_support.asm" + + ;void vp8_short_walsh4x4_sse2(short *input, short *output, int pitch) +-global sym(vp8_short_walsh4x4_sse2) ++global sym(vp8_short_walsh4x4_sse2) PRIVATE + sym(vp8_short_walsh4x4_sse2): + push rbp + mov rbp, rsp +diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm +index f29a54e..2864ce1 100644 +--- a/vp8/encoder/x86/quantize_mmx.asm ++++ b/vp8/encoder/x86/quantize_mmx.asm +@@ -15,7 +15,7 @@ + ; short *qcoeff_ptr,short *dequant_ptr, + ; short *scan_mask, short *round_ptr, + ; short *quant_ptr, short *dqcoeff_ptr); +-global sym(vp8_fast_quantize_b_impl_mmx) ++global sym(vp8_fast_quantize_b_impl_mmx) PRIVATE + sym(vp8_fast_quantize_b_impl_mmx): + push rbp + mov rbp, rsp +diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm +index 7c249ff..724e54c 100644 +--- a/vp8/encoder/x86/quantize_sse2.asm ++++ b/vp8/encoder/x86/quantize_sse2.asm +@@ -16,7 +16,7 @@ + ; (BLOCK *b, | 0 + ; BLOCKD *d) | 1 + +-global sym(vp8_regular_quantize_b_sse2) ++global sym(vp8_regular_quantize_b_sse2) PRIVATE + sym(vp8_regular_quantize_b_sse2): + push rbp + mov rbp, rsp +@@ -240,7 +240,7 @@ ZIGZAG_LOOP 15 + ; (BLOCK *b, | 0 + ; BLOCKD *d) | 1 + +-global sym(vp8_fast_quantize_b_sse2) ++global sym(vp8_fast_quantize_b_sse2) PRIVATE + sym(vp8_fast_quantize_b_sse2): + push rbp + mov rbp, rsp +diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm +index 70eac0c..f0e5d40 100644 +--- a/vp8/encoder/x86/quantize_sse4.asm ++++ b/vp8/encoder/x86/quantize_sse4.asm +@@ -16,7 +16,7 @@ + ; (BLOCK *b, | 0 + ; BLOCKD *d) | 1 + +-global sym(vp8_regular_quantize_b_sse4) ++global sym(vp8_regular_quantize_b_sse4) PRIVATE + sym(vp8_regular_quantize_b_sse4): + + %if ABI_IS_32BIT +diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm +index e698e90..dd526f4 100644 +--- a/vp8/encoder/x86/quantize_ssse3.asm ++++ b/vp8/encoder/x86/quantize_ssse3.asm +@@ -17,7 +17,7 @@ + ; BLOCKD *d) | 1 + ; + +-global sym(vp8_fast_quantize_b_ssse3) ++global sym(vp8_fast_quantize_b_ssse3) PRIVATE + sym(vp8_fast_quantize_b_ssse3): + push rbp + mov rbp, rsp +diff --git a/vp8/encoder/x86/ssim_opt.asm b/vp8/encoder/x86/ssim_opt.asm +index c6db3d1..5964a85 100644 +--- a/vp8/encoder/x86/ssim_opt.asm ++++ b/vp8/encoder/x86/ssim_opt.asm +@@ -61,7 +61,7 @@ + ; or pavgb At this point this is just meant to be first pass for calculating + ; all the parms needed for 16x16 ssim so we can play with dssim as distortion + ; in mode selection code. +-global sym(vp8_ssim_parms_16x16_sse2) ++global sym(vp8_ssim_parms_16x16_sse2) PRIVATE + sym(vp8_ssim_parms_16x16_sse2): + push rbp + mov rbp, rsp +@@ -151,7 +151,7 @@ sym(vp8_ssim_parms_16x16_sse2): + ; or pavgb At this point this is just meant to be first pass for calculating + ; all the parms needed for 16x16 ssim so we can play with dssim as distortion + ; in mode selection code. +-global sym(vp8_ssim_parms_8x8_sse2) ++global sym(vp8_ssim_parms_8x8_sse2) PRIVATE + sym(vp8_ssim_parms_8x8_sse2): + push rbp + mov rbp, rsp +diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm +index 75e8aa3..794dd22 100644 +--- a/vp8/encoder/x86/subtract_mmx.asm ++++ b/vp8/encoder/x86/subtract_mmx.asm +@@ -14,7 +14,7 @@ + ;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride, + ; short *diff, unsigned char *Predictor, + ; int pitch); +-global sym(vp8_subtract_b_mmx_impl) ++global sym(vp8_subtract_b_mmx_impl) PRIVATE + sym(vp8_subtract_b_mmx_impl): + push rbp + mov rbp, rsp +@@ -75,7 +75,7 @@ sym(vp8_subtract_b_mmx_impl): + + ;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, + ;unsigned char *pred, int pred_stride) +-global sym(vp8_subtract_mby_mmx) ++global sym(vp8_subtract_mby_mmx) PRIVATE + sym(vp8_subtract_mby_mmx): + push rbp + mov rbp, rsp +@@ -150,7 +150,7 @@ sym(vp8_subtract_mby_mmx): + ; int src_stride, unsigned char *upred, + ; unsigned char *vpred, int pred_stride) + +-global sym(vp8_subtract_mbuv_mmx) ++global sym(vp8_subtract_mbuv_mmx) PRIVATE + sym(vp8_subtract_mbuv_mmx): + push rbp + mov rbp, rsp +diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm +index 008e9c7..a5d17f5 100644 +--- a/vp8/encoder/x86/subtract_sse2.asm ++++ b/vp8/encoder/x86/subtract_sse2.asm +@@ -14,7 +14,7 @@ + ;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride, + ; short *diff, unsigned char *Predictor, + ; int pitch); +-global sym(vp8_subtract_b_sse2_impl) ++global sym(vp8_subtract_b_sse2_impl) PRIVATE + sym(vp8_subtract_b_sse2_impl): + push rbp + mov rbp, rsp +@@ -73,7 +73,7 @@ sym(vp8_subtract_b_sse2_impl): + + ;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, + ;unsigned char *pred, int pred_stride) +-global sym(vp8_subtract_mby_sse2) ++global sym(vp8_subtract_mby_sse2) PRIVATE + sym(vp8_subtract_mby_sse2): + push rbp + mov rbp, rsp +@@ -146,7 +146,7 @@ sym(vp8_subtract_mby_sse2): + ;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, + ; int src_stride, unsigned char *upred, + ; unsigned char *vpred, int pred_stride) +-global sym(vp8_subtract_mbuv_sse2) ++global sym(vp8_subtract_mbuv_sse2) PRIVATE + sym(vp8_subtract_mbuv_sse2): + push rbp + mov rbp, rsp +diff --git a/vp8/encoder/x86/temporal_filter_apply_sse2.asm b/vp8/encoder/x86/temporal_filter_apply_sse2.asm +index b97c694..ce9d983 100644 +--- a/vp8/encoder/x86/temporal_filter_apply_sse2.asm ++++ b/vp8/encoder/x86/temporal_filter_apply_sse2.asm +@@ -20,7 +20,7 @@ + ; int filter_weight, | 5 + ; unsigned int *accumulator, | 6 + ; unsigned short *count) | 7 +-global sym(vp8_temporal_filter_apply_sse2) ++global sym(vp8_temporal_filter_apply_sse2) PRIVATE + sym(vp8_temporal_filter_apply_sse2): + + push rbp +diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk +index 3a7b146..a328f46 100644 +--- a/vp8/vp8_common.mk ++++ b/vp8/vp8_common.mk +@@ -30,7 +30,6 @@ VP8_COMMON_SRCS-yes += common/findnearmv.c + VP8_COMMON_SRCS-yes += common/generic/systemdependent.c + VP8_COMMON_SRCS-yes += common/idct_blk.c + VP8_COMMON_SRCS-yes += common/idctllm.c +-VP8_COMMON_SRCS-yes += common/idctllm_test.cc + VP8_COMMON_SRCS-yes += common/alloccommon.h + VP8_COMMON_SRCS-yes += common/blockd.h + VP8_COMMON_SRCS-yes += common/common.h +@@ -85,7 +84,6 @@ VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/dequantize_mmx.asm + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idct_blk_mmx.c + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm +-VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx_test.cc + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm +@@ -122,6 +120,14 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_block_sse2.asm + endif + + # common (c) ++VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idctllm_dspr2.c ++VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/filter_dspr2.c ++VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/loopfilter_filters_dspr2.c ++VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/reconinter_dspr2.c ++VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idct_blk_dspr2.c ++VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/dequantize_dspr2.c ++ ++# common (c) + VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/filter_arm.c + VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.c + VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/reconintra_arm.c +diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c +index 5fb74c4..eeac3a8 100644 +--- a/vp8/vp8_cx_iface.c ++++ b/vp8/vp8_cx_iface.c +@@ -9,6 +9,7 @@ + */ + + ++#include "vpx_rtcd.h" + #include "vpx/vpx_codec.h" + #include "vpx/internal/vpx_codec_internal.h" + #include "vpx_version.h" +@@ -22,7 +23,6 @@ + struct vp8_extracfg + { + struct vpx_codec_pkt_list *pkt_list; +- vp8e_encoding_mode encoding_mode; /** best, good, realtime */ + int cpu_used; /** available cpu percentage in 1/16*/ + unsigned int enable_auto_alt_ref; /** if encoder decides to uses alternate reference frame */ + unsigned int noise_sensitivity; +@@ -51,10 +51,8 @@ static const struct extraconfig_map extracfg_map[] = + { + NULL, + #if !(CONFIG_REALTIME_ONLY) +- VP8_BEST_QUALITY_ENCODING, /* Encoding Mode */ + 0, /* cpu_used */ + #else +- VP8_REAL_TIME_ENCODING, /* Encoding Mode */ + 4, /* cpu_used */ + #endif + 0, /* enable_auto_alt_ref */ +@@ -88,7 +86,8 @@ struct vpx_codec_alg_priv + vpx_image_t preview_img; + unsigned int next_frame_flag; + vp8_postproc_cfg_t preview_ppcfg; +- vpx_codec_pkt_list_decl(64) pkt_list; // changed to accomendate the maximum number of lagged frames allowed ++ /* pkt_list size depends on the maximum number of lagged frames allowed. */ ++ vpx_codec_pkt_list_decl(64) pkt_list; + unsigned int fixed_kf_cntr; + }; + +@@ -146,25 +145,39 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, + RANGE_CHECK_HI(cfg, rc_max_quantizer, 63); + RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer); + RANGE_CHECK_HI(cfg, g_threads, 64); +-#if !(CONFIG_REALTIME_ONLY) +- RANGE_CHECK_HI(cfg, g_lag_in_frames, 25); +-#else ++#if CONFIG_REALTIME_ONLY + RANGE_CHECK_HI(cfg, g_lag_in_frames, 0); ++#elif CONFIG_MULTI_RES_ENCODING ++ if (ctx->base.enc.total_encoders > 1) ++ RANGE_CHECK_HI(cfg, g_lag_in_frames, 0); ++#else ++ RANGE_CHECK_HI(cfg, g_lag_in_frames, 25); + #endif + RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ); + RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000); + RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000); + RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100); + RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO); +- //RANGE_CHECK_BOOL(cfg, g_delete_firstpassfile); +- RANGE_CHECK_BOOL(cfg, rc_resize_allowed); ++ ++/* TODO: add spatial re-sampling support and frame dropping in ++ * multi-res-encoder.*/ ++#if CONFIG_MULTI_RES_ENCODING ++ if (ctx->base.enc.total_encoders > 1) ++ RANGE_CHECK_HI(cfg, rc_resize_allowed, 0); ++#else ++ RANGE_CHECK_BOOL(cfg, rc_resize_allowed); ++#endif + RANGE_CHECK_HI(cfg, rc_dropframe_thresh, 100); + RANGE_CHECK_HI(cfg, rc_resize_up_thresh, 100); + RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100); +-#if !(CONFIG_REALTIME_ONLY) +- RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS); +-#else ++ ++#if CONFIG_REALTIME_ONLY + RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_ONE_PASS); ++#elif CONFIG_MULTI_RES_ENCODING ++ if (ctx->base.enc.total_encoders > 1) ++ RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_ONE_PASS); ++#else ++ RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS); + #endif + + /* VP8 does not support a lower bound on the keyframe interval in +@@ -177,11 +190,6 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, + + RANGE_CHECK_BOOL(vp8_cfg, enable_auto_alt_ref); + RANGE_CHECK(vp8_cfg, cpu_used, -16, 16); +-#if !(CONFIG_REALTIME_ONLY) +- RANGE_CHECK(vp8_cfg, encoding_mode, VP8_BEST_QUALITY_ENCODING, VP8_REAL_TIME_ENCODING); +-#else +- RANGE_CHECK(vp8_cfg, encoding_mode, VP8_REAL_TIME_ENCODING, VP8_REAL_TIME_ENCODING); +-#endif + + #if CONFIG_REALTIME_ONLY && !CONFIG_TEMPORAL_DENOISING + RANGE_CHECK(vp8_cfg, noise_sensitivity, 0, 0); +@@ -189,7 +197,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, + RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6); + #endif + +- RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION); ++ RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, ++ VP8_EIGHT_TOKENPARTITION); + RANGE_CHECK_HI(vp8_cfg, Sharpness, 7); + RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15); + RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6); +@@ -203,7 +212,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, + if (cfg->g_pass == VPX_RC_LAST_PASS) + { + size_t packet_sz = sizeof(FIRSTPASS_STATS); +- int n_packets = cfg->rc_twopass_stats_in.sz / packet_sz; ++ int n_packets = (int)(cfg->rc_twopass_stats_in.sz / ++ packet_sz); + FIRSTPASS_STATS *stats; + + if (!cfg->rc_twopass_stats_in.buf) +@@ -227,7 +237,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, + + if (cfg->ts_number_layers > 1) + { +- int i; ++ unsigned int i; + RANGE_CHECK_HI(cfg, ts_periodicity, 16); + + for (i=1; its_number_layers; i++) +@@ -299,7 +309,7 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, + break; + } + +- if (cfg.g_pass == VPX_RC_FIRST_PASS) ++ if (cfg.g_pass == VPX_RC_FIRST_PASS || cfg.g_pass == VPX_RC_ONE_PASS) + { + oxcf->allow_lag = 0; + oxcf->lag_in_frames = 0; +@@ -355,7 +365,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, + + oxcf->auto_key = cfg.kf_mode == VPX_KF_AUTO + && cfg.kf_min_dist != cfg.kf_max_dist; +- //oxcf->kf_min_dist = cfg.kf_min_dis; + oxcf->key_freq = cfg.kf_max_dist; + + oxcf->number_of_layers = cfg.ts_number_layers; +@@ -385,9 +394,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, + } + #endif + +- //oxcf->delete_first_pass_file = cfg.g_delete_firstpassfile; +- //strcpy(oxcf->first_pass_file, cfg.g_firstpass_file); +- + oxcf->cpu_used = vp8_cfg.cpu_used; + oxcf->encode_breakout = vp8_cfg.static_thresh; + oxcf->play_alternate = vp8_cfg.enable_auto_alt_ref; +@@ -447,7 +453,7 @@ static vpx_codec_err_t vp8e_set_config(vpx_codec_alg_priv_t *ctx, + vpx_codec_err_t res; + + if (((cfg->g_w != ctx->cfg.g_w) || (cfg->g_h != ctx->cfg.g_h)) +- && cfg->g_lag_in_frames > 1) ++ && (cfg->g_lag_in_frames > 1 || cfg->g_pass != VPX_RC_ONE_PASS)) + ERROR("Cannot change width or height after initialization"); + + /* Prevent increasing lag_in_frames. This check is stricter than it needs +@@ -542,19 +548,27 @@ static vpx_codec_err_t vp8e_mr_alloc_mem(const vpx_codec_enc_cfg_t *cfg, + vpx_codec_err_t res = 0; + + #if CONFIG_MULTI_RES_ENCODING ++ LOWER_RES_FRAME_INFO *shared_mem_loc; + int mb_rows = ((cfg->g_w + 15) >>4); + int mb_cols = ((cfg->g_h + 15) >>4); + +- *mem_loc = calloc(mb_rows*mb_cols, sizeof(LOWER_RES_INFO)); +- if(!(*mem_loc)) ++ shared_mem_loc = calloc(1, sizeof(LOWER_RES_FRAME_INFO)); ++ if(!shared_mem_loc) ++ { ++ res = VPX_CODEC_MEM_ERROR; ++ } ++ ++ shared_mem_loc->mb_info = calloc(mb_rows*mb_cols, sizeof(LOWER_RES_MB_INFO)); ++ if(!(shared_mem_loc->mb_info)) + { +- free(*mem_loc); + res = VPX_CODEC_MEM_ERROR; + } + else ++ { ++ *mem_loc = (void *)shared_mem_loc; + res = VPX_CODEC_OK; ++ } + #endif +- + return res; + } + +@@ -568,6 +582,8 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, + + struct VP8_COMP *optr; + ++ vpx_rtcd(); ++ + if (!ctx->priv) + { + priv = calloc(1, sizeof(struct vpx_codec_alg_priv)); +@@ -616,15 +632,15 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, + return VPX_CODEC_MEM_ERROR; + } + ++ if(mr_cfg) ++ ctx->priv->enc.total_encoders = mr_cfg->mr_total_resolutions; ++ else ++ ctx->priv->enc.total_encoders = 1; ++ + res = validate_config(priv, &priv->cfg, &priv->vp8_cfg, 0); + + if (!res) + { +- if(mr_cfg) +- ctx->priv->enc.total_encoders = mr_cfg->mr_total_resolutions; +- else +- ctx->priv->enc.total_encoders = 1; +- + set_vp8e_config(&ctx->priv->alg_priv->oxcf, + ctx->priv->alg_priv->cfg, + ctx->priv->alg_priv->vp8_cfg, +@@ -647,7 +663,11 @@ static vpx_codec_err_t vp8e_destroy(vpx_codec_alg_priv_t *ctx) + #if CONFIG_MULTI_RES_ENCODING + /* Free multi-encoder shared memory */ + if (ctx->oxcf.mr_total_resolutions > 0 && (ctx->oxcf.mr_encoder_id == ctx->oxcf.mr_total_resolutions-1)) ++ { ++ LOWER_RES_FRAME_INFO *shared_mem_loc = (LOWER_RES_FRAME_INFO *)ctx->oxcf.mr_low_res_mode_info; ++ free(shared_mem_loc->mb_info); + free(ctx->oxcf.mr_low_res_mode_info); ++ } + #endif + + free(ctx->cx_data); +@@ -673,7 +693,7 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, + yv12->uv_stride = img->stride[VPX_PLANE_U]; + + yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; +- yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12); //REG_YUV = 0 ++ yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12); + return res; + } + +@@ -733,6 +753,9 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, + if (!ctx->cfg.rc_target_bitrate) + return res; + ++ if (!ctx->cfg.rc_target_bitrate) ++ return res; ++ + if (img) + res = validate_img(ctx, img); + +@@ -756,13 +779,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, + int ref = 7; + + if (flags & VP8_EFLAG_NO_REF_LAST) +- ref ^= VP8_LAST_FLAG; ++ ref ^= VP8_LAST_FRAME; + + if (flags & VP8_EFLAG_NO_REF_GF) +- ref ^= VP8_GOLD_FLAG; ++ ref ^= VP8_GOLD_FRAME; + + if (flags & VP8_EFLAG_NO_REF_ARF) +- ref ^= VP8_ALT_FLAG; ++ ref ^= VP8_ALTR_FRAME; + + vp8_use_as_reference(ctx->cpi, ref); + } +@@ -774,13 +797,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, + int upd = 7; + + if (flags & VP8_EFLAG_NO_UPD_LAST) +- upd ^= VP8_LAST_FLAG; ++ upd ^= VP8_LAST_FRAME; + + if (flags & VP8_EFLAG_NO_UPD_GF) +- upd ^= VP8_GOLD_FLAG; ++ upd ^= VP8_GOLD_FRAME; + + if (flags & VP8_EFLAG_NO_UPD_ARF) +- upd ^= VP8_ALT_FLAG; ++ upd ^= VP8_ALTR_FRAME; + + vp8_update_reference(ctx->cpi, upd); + } +@@ -869,15 +892,16 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, + VP8_COMP *cpi = (VP8_COMP *)ctx->cpi; + + /* Add the frame packet to the list of returned packets. */ +- round = 1000000 * ctx->cfg.g_timebase.num / 2 - 1; ++ round = (vpx_codec_pts_t)1000000 ++ * ctx->cfg.g_timebase.num / 2 - 1; + delta = (dst_end_time_stamp - dst_time_stamp); + pkt.kind = VPX_CODEC_CX_FRAME_PKT; + pkt.data.frame.pts = + (dst_time_stamp * ctx->cfg.g_timebase.den + round) + / ctx->cfg.g_timebase.num / 10000000; +- pkt.data.frame.duration = +- (delta * ctx->cfg.g_timebase.den + round) +- / ctx->cfg.g_timebase.num / 10000000; ++ pkt.data.frame.duration = (unsigned long) ++ ((delta * ctx->cfg.g_timebase.den + round) ++ / ctx->cfg.g_timebase.num / 10000000); + pkt.data.frame.flags = lib_flags << 16; + + if (lib_flags & FRAMEFLAGS_KEY) +@@ -887,10 +911,11 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, + { + pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE; + +- // This timestamp should be as close as possible to the +- // prior PTS so that if a decoder uses pts to schedule when +- // to do this, we start right after last frame was decoded. +- // Invisible frames have no duration. ++ /* This timestamp should be as close as possible to the ++ * prior PTS so that if a decoder uses pts to schedule when ++ * to do this, we start right after last frame was decoded. ++ * Invisible frames have no duration. ++ */ + pkt.data.frame.pts = ((cpi->last_time_stamp_seen + * ctx->cfg.g_timebase.den + round) + / ctx->cfg.g_timebase.num / 10000000) + 1; +@@ -942,8 +967,6 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, + cx_data += size; + cx_data_sz -= size; + } +- +- //printf("timestamp: %lld, duration: %d\n", pkt->data.frame.pts, pkt->data.frame.duration); + } + } + } +diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c +index 37773db..c13d697 100644 +--- a/vp8/vp8_dx_iface.c ++++ b/vp8/vp8_dx_iface.c +@@ -11,12 +11,19 @@ + + #include + #include ++#include "vpx_rtcd.h" + #include "vpx/vpx_decoder.h" + #include "vpx/vp8dx.h" + #include "vpx/internal/vpx_codec_internal.h" + #include "vpx_version.h" + #include "common/onyxd.h" + #include "decoder/onyxd_int.h" ++#include "common/alloccommon.h" ++#include "vpx_mem/vpx_mem.h" ++#if CONFIG_ERROR_CONCEALMENT ++#include "decoder/error_concealment.h" ++#endif ++#include "decoder/decoderthreading.h" + + #define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) + #define VP8_CAP_ERROR_CONCEALMENT (CONFIG_ERROR_CONCEALMENT ? \ +@@ -69,7 +76,7 @@ struct vpx_codec_alg_priv + #endif + vpx_image_t img; + int img_setup; +- int img_avail; ++ void *user_priv; + }; + + static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t flags) +@@ -187,6 +194,8 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, + vpx_codec_err_t res = VPX_CODEC_OK; + (void) data; + ++ vpx_rtcd(); ++ + /* This function only allocates space for the vpx_codec_alg_priv_t + * structure. More memory may be required at the time the stream + * information becomes known. +@@ -341,16 +350,30 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, + long deadline) + { + vpx_codec_err_t res = VPX_CODEC_OK; +- +- ctx->img_avail = 0; ++ unsigned int resolution_change = 0; ++ unsigned int w, h; + + /* Determine the stream parameters. Note that we rely on peek_si to + * validate that we have a buffer that does not wrap around the top + * of the heap. + */ +- if (!ctx->si.h) +- res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si); ++ w = ctx->si.w; ++ h = ctx->si.h; ++ ++ res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si); ++ ++ if((res == VPX_CODEC_UNSUP_BITSTREAM) && !ctx->si.is_kf) ++ { ++ /* the peek function returns an error for non keyframes, however for ++ * this case, it is not an error */ ++ res = VPX_CODEC_OK; ++ } ++ ++ if(!ctx->decoder_init && !ctx->si.is_kf) ++ res = VPX_CODEC_UNSUP_BITSTREAM; + ++ if ((ctx->si.h != h) || (ctx->si.w != w)) ++ resolution_change = 1; + + /* Perform deferred allocations, if required */ + if (!res && ctx->defer_alloc) +@@ -426,6 +449,122 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, + + if (!res && ctx->pbi) + { ++ if(resolution_change) ++ { ++ VP8D_COMP *pbi = ctx->pbi; ++ VP8_COMMON *const pc = & pbi->common; ++ MACROBLOCKD *const xd = & pbi->mb; ++#if CONFIG_MULTITHREAD ++ int i; ++#endif ++ pc->Width = ctx->si.w; ++ pc->Height = ctx->si.h; ++ { ++ int prev_mb_rows = pc->mb_rows; ++ ++ if (setjmp(pbi->common.error.jmp)) ++ { ++ pbi->common.error.setjmp = 0; ++ /* same return value as used in vp8dx_receive_compressed_data */ ++ return -1; ++ } ++ ++ pbi->common.error.setjmp = 1; ++ ++ if (pc->Width <= 0) ++ { ++ pc->Width = w; ++ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, ++ "Invalid frame width"); ++ } ++ ++ if (pc->Height <= 0) ++ { ++ pc->Height = h; ++ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, ++ "Invalid frame height"); ++ } ++ ++ if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height)) ++ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, ++ "Failed to allocate frame buffers"); ++ ++ xd->pre = pc->yv12_fb[pc->lst_fb_idx]; ++ xd->dst = pc->yv12_fb[pc->new_fb_idx]; ++ ++#if CONFIG_MULTITHREAD ++ for (i = 0; i < pbi->allocated_decoding_thread_count; i++) ++ { ++ pbi->mb_row_di[i].mbd.dst = pc->yv12_fb[pc->new_fb_idx]; ++ vp8_build_block_doffsets(&pbi->mb_row_di[i].mbd); ++ } ++#endif ++ vp8_build_block_doffsets(&pbi->mb); ++ ++ /* allocate memory for last frame MODE_INFO array */ ++#if CONFIG_ERROR_CONCEALMENT ++ ++ if (pbi->ec_enabled) ++ { ++ /* old prev_mip was released by vp8_de_alloc_frame_buffers() ++ * called in vp8_alloc_frame_buffers() */ ++ pc->prev_mip = vpx_calloc( ++ (pc->mb_cols + 1) * (pc->mb_rows + 1), ++ sizeof(MODE_INFO)); ++ ++ if (!pc->prev_mip) ++ { ++ vp8_de_alloc_frame_buffers(pc); ++ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, ++ "Failed to allocate" ++ "last frame MODE_INFO array"); ++ } ++ ++ pc->prev_mi = pc->prev_mip + pc->mode_info_stride + 1; ++ ++ if (vp8_alloc_overlap_lists(pbi)) ++ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, ++ "Failed to allocate overlap lists " ++ "for error concealment"); ++ } ++ ++#endif ++ ++#if CONFIG_MULTITHREAD ++ if (pbi->b_multithreaded_rd) ++ vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows); ++#else ++ (void)prev_mb_rows; ++#endif ++ } ++ ++ pbi->common.error.setjmp = 0; ++ ++ /* required to get past the first get_free_fb() call */ ++ ctx->pbi->common.fb_idx_ref_cnt[0] = 0; ++ } ++ ++ ctx->user_priv = user_priv; ++ if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) ++ { ++ VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi; ++ res = update_error_state(ctx, &pbi->common.error); ++ } ++ } ++ ++ return res; ++} ++ ++static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx, ++ vpx_codec_iter_t *iter) ++{ ++ vpx_image_t *img = NULL; ++ ++ /* iter acts as a flip flop, so an image is only returned on the first ++ * call to get_frame. ++ */ ++ if (!(*iter)) ++ { + YV12_BUFFER_CONFIG sd; + int64_t time_stamp = 0, time_end_stamp = 0; + vp8_ppflags_t flags = {0}; +@@ -451,34 +590,10 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, + #endif + } + +- if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) +- { +- VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi; +- res = update_error_state(ctx, &pbi->common.error); +- } +- +- if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags)) ++ if (0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags)) + { +- yuvconfig2image(&ctx->img, &sd, user_priv); +- ctx->img_avail = 1; +- } +- } ++ yuvconfig2image(&ctx->img, &sd, ctx->user_priv); + +- return res; +-} +- +-static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx, +- vpx_codec_iter_t *iter) +-{ +- vpx_image_t *img = NULL; +- +- if (ctx->img_avail) +- { +- /* iter acts as a flip flop, so an image is only returned on the first +- * call to get_frame. +- */ +- if (!(*iter)) +- { + img = &ctx->img; + *iter = img; + } +diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk +index 019edbd..0ae2f10 100644 +--- a/vp8/vp8cx.mk ++++ b/vp8/vp8cx.mk +@@ -22,16 +22,9 @@ ifeq ($(ARCH_ARM),yes) + include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx_arm.mk + endif + +-VP8_CX_SRCS-yes += vp8_cx_iface.c ++VP8_CX_SRCS-yes += vp8cx.mk + +-# encoder +-#INCLUDES += algo/vpx_common/vpx_mem/include +-#INCLUDES += common +-#INCLUDES += common +-#INCLUDES += common +-#INCLUDES += algo/vpx_ref/cpu_id/include +-#INCLUDES += common +-#INCLUDES += encoder ++VP8_CX_SRCS-yes += vp8_cx_iface.c + + VP8_CX_SRCS-yes += encoder/asm_enc_offsets.c + VP8_CX_SRCS-yes += encoder/defaultcoefcounts.h +@@ -99,6 +92,14 @@ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm ++ ++ifeq ($(CONFIG_TEMPORAL_DENOISING),yes) ++VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c ++ifeq ($(HAVE_SSE2),yes) ++vp8/encoder/x86/denoising_sse2.c.o: CFLAGS += -msse2 ++endif ++endif ++ + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c +diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk +index b16615d..b030ee5 100644 +--- a/vp8/vp8cx_arm.mk ++++ b/vp8/vp8cx_arm.mk +@@ -9,7 +9,7 @@ + ## + + +-#VP8_CX_SRCS list is modified according to different platforms. ++VP8_CX_SRCS-$(ARCH_ARM) += vp8cx_arm.mk + + #File list for arm + # encoder +diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk +index 2cfd280..dd39190 100644 +--- a/vp8/vp8dx.mk ++++ b/vp8/vp8dx.mk +@@ -18,6 +18,8 @@ VP8_DX_SRCS-no += $(VP8_COMMON_SRCS-no) + VP8_DX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes) + VP8_DX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no) + ++VP8_DX_SRCS-yes += vp8dx.mk ++ + VP8_DX_SRCS-yes += vp8_dx_iface.c + + # common +diff --git a/vp8_multi_resolution_encoder.c b/vp8_multi_resolution_encoder.c +index 78f50c2..eae36a4 100644 +--- a/vp8_multi_resolution_encoder.c ++++ b/vp8_multi_resolution_encoder.c +@@ -164,7 +164,7 @@ static void write_ivf_file_header(FILE *outfile, + mem_put_le32(header+24, frame_cnt); /* length */ + mem_put_le32(header+28, 0); /* unused */ + +- if(fwrite(header, 1, 32, outfile)); ++ (void) fwrite(header, 1, 32, outfile); + } + + static void write_ivf_frame_header(FILE *outfile, +@@ -181,7 +181,7 @@ static void write_ivf_frame_header(FILE *outfile, + mem_put_le32(header+4, pts&0xFFFFFFFF); + mem_put_le32(header+8, pts >> 32); + +- if(fwrite(header, 1, 12, outfile)); ++ (void) fwrite(header, 1, 12, outfile); + } + + int main(int argc, char **argv) +@@ -273,7 +273,7 @@ int main(int argc, char **argv) + cfg[0].g_w = width; + cfg[0].g_h = height; + cfg[0].g_threads = 1; /* number of threads used */ +- cfg[0].rc_dropframe_thresh = 0; ++ cfg[0].rc_dropframe_thresh = 30; + cfg[0].rc_end_usage = VPX_CBR; + cfg[0].rc_resize_allowed = 0; + cfg[0].rc_min_quantizer = 4; +@@ -283,13 +283,17 @@ int main(int argc, char **argv) + cfg[0].rc_buf_initial_sz = 500; + cfg[0].rc_buf_optimal_sz = 600; + cfg[0].rc_buf_sz = 1000; +- //cfg[0].rc_dropframe_thresh = 10; + cfg[0].g_error_resilient = 1; /* Enable error resilient mode */ + cfg[0].g_lag_in_frames = 0; + + /* Disable automatic keyframe placement */ ++ /* Note: These 3 settings are copied to all levels. But, except the lowest ++ * resolution level, all other levels are set to VPX_KF_DISABLED internally. ++ */ + //cfg[0].kf_mode = VPX_KF_DISABLED; +- cfg[0].kf_min_dist = cfg[0].kf_max_dist = 1000; ++ cfg[0].kf_mode = VPX_KF_AUTO; ++ cfg[0].kf_min_dist = 3000; ++ cfg[0].kf_max_dist = 3000; + + cfg[0].rc_target_bitrate = target_bitrate[0]; /* Set target bitrate */ + cfg[0].g_timebase.num = 1; /* Set fps */ +@@ -361,6 +365,12 @@ int main(int argc, char **argv) + if(vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, static_thresh)) + die_codec(&codec[i], "Failed to set static threshold"); + } ++ /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */ ++ for ( i=0; i< NUM_ENCODERS; i++) ++ { ++ if(vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0)) ++ die_codec(&codec[i], "Failed to set noise_sensitivity"); ++ } + + frame_avail = 1; + got_data = 0; +@@ -405,8 +415,8 @@ int main(int argc, char **argv) + switch(pkt[i]->kind) { + case VPX_CODEC_CX_FRAME_PKT: + write_ivf_frame_header(outfile[i], pkt[i]); +- if(fwrite(pkt[i]->data.frame.buf, 1, pkt[i]->data.frame.sz, +- outfile[i])); ++ (void) fwrite(pkt[i]->data.frame.buf, 1, ++ pkt[i]->data.frame.sz, outfile[i]); + break; + case VPX_CODEC_PSNR_PKT: + if (show_psnr) +diff --git a/vp8_scalable_patterns.c b/vp8_scalable_patterns.c +index 4311b1a..06270fe 100644 +--- a/vp8_scalable_patterns.c ++++ b/vp8_scalable_patterns.c +@@ -93,7 +93,7 @@ static void write_ivf_file_header(FILE *outfile, + mem_put_le32(header+24, frame_cnt); /* length */ + mem_put_le32(header+28, 0); /* unused */ + +- if(fwrite(header, 1, 32, outfile)); ++ (void) fwrite(header, 1, 32, outfile); + } + + +@@ -111,10 +111,10 @@ static void write_ivf_frame_header(FILE *outfile, + mem_put_le32(header+4, pts&0xFFFFFFFF); + mem_put_le32(header+8, pts >> 32); + +- if(fwrite(header, 1, 12, outfile)); ++ (void) fwrite(header, 1, 12, outfile); + } + +-static int mode_to_num_layers[9] = {2, 2, 3, 3, 3, 3, 5, 2, 3}; ++static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3}; + + int main(int argc, char **argv) { + FILE *infile, *outfile[VPX_TS_MAX_LAYERS]; +@@ -129,8 +129,8 @@ int main(int argc, char **argv) { + int got_data; + int flags = 0; + int i; +- int pts = 0; // PTS starts at 0 +- int frame_duration = 1; // 1 timebase tick per frame ++ int pts = 0; /* PTS starts at 0 */ ++ int frame_duration = 1; /* 1 timebase tick per frame */ + + int layering_mode = 0; + int frames_in_layer[VPX_TS_MAX_LAYERS] = {0}; +@@ -138,7 +138,7 @@ int main(int argc, char **argv) { + int flag_periodicity; + int max_intra_size_pct; + +- // Check usage and arguments ++ /* Check usage and arguments */ + if (argc < 9) + die("Usage: %s " + " ... \n", argv[0]); +@@ -150,43 +150,43 @@ int main(int argc, char **argv) { + + if (!sscanf(argv[7], "%d", &layering_mode)) + die ("Invalid mode %s", argv[7]); +- if (layering_mode<0 || layering_mode>8) +- die ("Invalid mode (0..8) %s", argv[7]); ++ if (layering_mode<0 || layering_mode>11) ++ die ("Invalid mode (0..11) %s", argv[7]); + + if (argc != 8+mode_to_num_layers[layering_mode]) + die ("Invalid number of arguments"); + +- if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1)) ++ if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 32)) + die ("Failed to allocate image", width, height); + + printf("Using %s\n",vpx_codec_iface_name(interface)); + +- // Populate encoder configuration ++ /* Populate encoder configuration */ + res = vpx_codec_enc_config_default(interface, &cfg, 0); + if(res) { + printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); + return EXIT_FAILURE; + } + +- // Update the default configuration with our settings ++ /* Update the default configuration with our settings */ + cfg.g_w = width; + cfg.g_h = height; + +- // Timebase format e.g. 30fps: numerator=1, demoninator=30 ++ /* Timebase format e.g. 30fps: numerator=1, demoninator=30 */ + if (!sscanf (argv[5], "%d", &cfg.g_timebase.num )) + die ("Invalid timebase numerator %s", argv[5]); + if (!sscanf (argv[6], "%d", &cfg.g_timebase.den )) + die ("Invalid timebase denominator %s", argv[6]); + + for (i=8; i<8+mode_to_num_layers[layering_mode]; i++) +- if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-8])) ++ if (!sscanf(argv[i], "%ud", &cfg.ts_target_bitrate[i-8])) + die ("Invalid data rate %s", argv[i]); + +- // Real time parameters +- cfg.rc_dropframe_thresh = 0; // 30 ++ /* Real time parameters */ ++ cfg.rc_dropframe_thresh = 0; + cfg.rc_end_usage = VPX_CBR; + cfg.rc_resize_allowed = 0; +- cfg.rc_min_quantizer = 8; ++ cfg.rc_min_quantizer = 2; + cfg.rc_max_quantizer = 56; + cfg.rc_undershoot_pct = 100; + cfg.rc_overshoot_pct = 15; +@@ -194,25 +194,44 @@ int main(int argc, char **argv) { + cfg.rc_buf_optimal_sz = 600; + cfg.rc_buf_sz = 1000; + +- // Enable error resilient mode ++ /* Enable error resilient mode */ + cfg.g_error_resilient = 1; + cfg.g_lag_in_frames = 0; + cfg.kf_mode = VPX_KF_DISABLED; + +- // Disable automatic keyframe placement +- cfg.kf_min_dist = cfg.kf_max_dist = 1000; ++ /* Disable automatic keyframe placement */ ++ cfg.kf_min_dist = cfg.kf_max_dist = 3000; + +- // Temporal scaling parameters: +- // NOTE: The 3 prediction frames cannot be used interchangeably due to +- // differences in the way they are handled throughout the code. The +- // frames should be allocated to layers in the order LAST, GF, ARF. +- // Other combinations work, but may produce slightly inferior results. ++ /* Default setting for bitrate: used in special case of 1 layer (case 0). */ ++ cfg.rc_target_bitrate = cfg.ts_target_bitrate[0]; ++ ++ /* Temporal scaling parameters: */ ++ /* NOTE: The 3 prediction frames cannot be used interchangeably due to ++ * differences in the way they are handled throughout the code. The ++ * frames should be allocated to layers in the order LAST, GF, ARF. ++ * Other combinations work, but may produce slightly inferior results. ++ */ + switch (layering_mode) + { +- + case 0: + { +- // 2-layers, 2-frame period ++ /* 1-layer */ ++ int ids[1] = {0}; ++ cfg.ts_number_layers = 1; ++ cfg.ts_periodicity = 1; ++ cfg.ts_rate_decimator[0] = 1; ++ memcpy(cfg.ts_layer_id, ids, sizeof(ids)); ++ ++ flag_periodicity = cfg.ts_periodicity; ++ ++ // Update L only. ++ layer_flags[0] = VPX_EFLAG_FORCE_KF | ++ VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; ++ break; ++ } ++ case 1: ++ { ++ /* 2-layers, 2-frame period */ + int ids[2] = {0,1}; + cfg.ts_number_layers = 2; + cfg.ts_periodicity = 2; +@@ -222,14 +241,14 @@ int main(int argc, char **argv) { + + flag_periodicity = cfg.ts_periodicity; + #if 1 +- // 0=L, 1=GF, Intra-layer prediction enabled ++ /* 0=L, 1=GF, Intra-layer prediction enabled */ + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; + layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_REF_ARF; + #else +- // 0=L, 1=GF, Intra-layer prediction disabled ++ /* 0=L, 1=GF, Intra-layer prediction disabled */ + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; +@@ -239,9 +258,9 @@ int main(int argc, char **argv) { + break; + } + +- case 1: ++ case 2: + { +- // 2-layers, 3-frame period ++ /* 2-layers, 3-frame period */ + int ids[3] = {0,1,1}; + cfg.ts_number_layers = 2; + cfg.ts_periodicity = 3; +@@ -251,7 +270,7 @@ int main(int argc, char **argv) { + + flag_periodicity = cfg.ts_periodicity; + +- // 0=L, 1=GF, Intra-layer prediction enabled ++ /* 0=L, 1=GF, Intra-layer prediction enabled */ + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; +@@ -262,9 +281,9 @@ int main(int argc, char **argv) { + break; + } + +- case 2: ++ case 3: + { +- // 3-layers, 6-frame period ++ /* 3-layers, 6-frame period */ + int ids[6] = {0,2,2,1,2,2}; + cfg.ts_number_layers = 3; + cfg.ts_periodicity = 6; +@@ -275,7 +294,7 @@ int main(int argc, char **argv) { + + flag_periodicity = cfg.ts_periodicity; + +- // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled ++ /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled */ + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; +@@ -288,9 +307,9 @@ int main(int argc, char **argv) { + break; + } + +- case 3: ++ case 4: + { +- // 3-layers, 4-frame period ++ /* 3-layers, 4-frame period */ + int ids[4] = {0,2,1,2}; + cfg.ts_number_layers = 3; + cfg.ts_periodicity = 4; +@@ -301,7 +320,7 @@ int main(int argc, char **argv) { + + flag_periodicity = cfg.ts_periodicity; + +- // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled ++ /* 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled */ + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; +@@ -315,9 +334,9 @@ int main(int argc, char **argv) { + break; + } + +- case 4: ++ case 5: + { +- // 3-layers, 4-frame period ++ /* 3-layers, 4-frame period */ + int ids[4] = {0,2,1,2}; + cfg.ts_number_layers = 3; + cfg.ts_periodicity = 4; +@@ -328,8 +347,9 @@ int main(int argc, char **argv) { + + flag_periodicity = cfg.ts_periodicity; + +- // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, +- // disabled in layer 2 ++ /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, ++ * disabled in layer 2 ++ */ + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; +@@ -342,9 +362,9 @@ int main(int argc, char **argv) { + break; + } + +- case 5: ++ case 6: + { +- // 3-layers, 4-frame period ++ /* 3-layers, 4-frame period */ + int ids[4] = {0,2,1,2}; + cfg.ts_number_layers = 3; + cfg.ts_periodicity = 4; +@@ -355,7 +375,7 @@ int main(int argc, char **argv) { + + flag_periodicity = cfg.ts_periodicity; + +- // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled ++ /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled */ + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; +@@ -366,11 +386,11 @@ int main(int argc, char **argv) { + break; + } + +- case 6: ++ case 7: + { +- // NOTE: Probably of academic interest only ++ /* NOTE: Probably of academic interest only */ + +- // 5-layers, 16-frame period ++ /* 5-layers, 16-frame period */ + int ids[16] = {0,4,3,4,2,4,3,4,1,4,3,4,2,4,3,4}; + cfg.ts_number_layers = 5; + cfg.ts_periodicity = 16; +@@ -405,9 +425,9 @@ int main(int argc, char **argv) { + break; + } + +- case 7: ++ case 8: + { +- // 2-layers ++ /* 2-layers, with sync point at first frame of layer 1. */ + int ids[2] = {0,1}; + cfg.ts_number_layers = 2; + cfg.ts_periodicity = 2; +@@ -417,30 +437,49 @@ int main(int argc, char **argv) { + + flag_periodicity = 8; + +- // 0=L, 1=GF ++ /* 0=L, 1=GF */ ++ // ARF is used as predictor for all frames, and is only updated on ++ // key frame. Sync point every 8 frames. ++ ++ // Layer 0: predict from L and ARF, update L and G. + layer_flags[0] = VPX_EFLAG_FORCE_KF | +- VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | +- VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; +- layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | +- VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; +- layer_flags[2] = +- layer_flags[4] = +- layer_flags[6] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | +- VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; +- layer_flags[3] = +- layer_flags[5] = VP8_EFLAG_NO_REF_ARF | +- VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; +- layer_flags[7] = VP8_EFLAG_NO_REF_ARF | +- VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | +- VP8_EFLAG_NO_UPD_ARF | ++ VP8_EFLAG_NO_REF_GF | ++ VP8_EFLAG_NO_UPD_ARF; ++ ++ // Layer 1: sync point: predict from L and ARF, and update G. ++ layer_flags[1] = VP8_EFLAG_NO_REF_GF | ++ VP8_EFLAG_NO_UPD_LAST | ++ VP8_EFLAG_NO_UPD_ARF; ++ ++ // Layer 0, predict from L and ARF, update L. ++ layer_flags[2] = VP8_EFLAG_NO_REF_GF | ++ VP8_EFLAG_NO_UPD_GF | ++ VP8_EFLAG_NO_UPD_ARF; ++ ++ // Layer 1: predict from L, G and ARF, and update G. ++ layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | ++ VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ENTROPY; ++ ++ // Layer 0 ++ layer_flags[4] = layer_flags[2]; ++ ++ // Layer 1 ++ layer_flags[5] = layer_flags[3]; ++ ++ // Layer 0 ++ layer_flags[6] = layer_flags[4]; ++ ++ // Layer 1 ++ layer_flags[7] = layer_flags[5]; + break; + } + +- case 8: +- default: ++ case 9: + { +- // 3-layers ++ /* 3-layers */ ++ // Sync points for layer 1 and 2 every 8 frames. ++ + int ids[4] = {0,2,1,2}; + cfg.ts_number_layers = 3; + cfg.ts_periodicity = 4; +@@ -451,7 +490,7 @@ int main(int argc, char **argv) { + + flag_periodicity = 8; + +- // 0=L, 1=GF, 2=ARF ++ /* 0=L, 1=GF, 2=ARF */ + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; +@@ -470,13 +509,109 @@ int main(int argc, char **argv) { + VP8_EFLAG_NO_UPD_ENTROPY; + break; + } ++ case 10: ++ { ++ // 3-layers structure where ARF is used as predictor for all frames, ++ // and is only updated on key frame. ++ // Sync points for layer 1 and 2 every 8 frames. ++ ++ int ids[4] = {0,2,1,2}; ++ cfg.ts_number_layers = 3; ++ cfg.ts_periodicity = 4; ++ cfg.ts_rate_decimator[0] = 4; ++ cfg.ts_rate_decimator[1] = 2; ++ cfg.ts_rate_decimator[2] = 1; ++ memcpy(cfg.ts_layer_id, ids, sizeof(ids)); ++ ++ flag_periodicity = 8; ++ ++ /* 0=L, 1=GF, 2=ARF */ ++ ++ // Layer 0: predict from L and ARF; update L and G. ++ layer_flags[0] = VPX_EFLAG_FORCE_KF | ++ VP8_EFLAG_NO_UPD_ARF | ++ VP8_EFLAG_NO_REF_GF; ++ ++ // Layer 2: sync point: predict from L and ARF; update none. ++ layer_flags[1] = VP8_EFLAG_NO_REF_GF | ++ VP8_EFLAG_NO_UPD_GF | ++ VP8_EFLAG_NO_UPD_ARF | ++ VP8_EFLAG_NO_UPD_LAST | ++ VP8_EFLAG_NO_UPD_ENTROPY; ++ ++ // Layer 1: sync point: predict from L and ARF; update G. ++ layer_flags[2] = VP8_EFLAG_NO_REF_GF | ++ VP8_EFLAG_NO_UPD_ARF | ++ VP8_EFLAG_NO_UPD_LAST; ++ ++ // Layer 2: predict from L, G, ARF; update none. ++ layer_flags[3] = VP8_EFLAG_NO_UPD_GF | ++ VP8_EFLAG_NO_UPD_ARF | ++ VP8_EFLAG_NO_UPD_LAST | ++ VP8_EFLAG_NO_UPD_ENTROPY; ++ ++ // Layer 0: predict from L and ARF; update L. ++ layer_flags[4] = VP8_EFLAG_NO_UPD_GF | ++ VP8_EFLAG_NO_UPD_ARF | ++ VP8_EFLAG_NO_REF_GF; ++ ++ // Layer 2: predict from L, G, ARF; update none. ++ layer_flags[5] = layer_flags[3]; ++ ++ // Layer 1: predict from L, G, ARF; update G. ++ layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | ++ VP8_EFLAG_NO_UPD_LAST; ++ ++ // Layer 2: predict from L, G, ARF; update none. ++ layer_flags[7] = layer_flags[3]; ++ break; ++ } ++ case 11: ++ default: ++ { ++ // 3-layers structure as in case 10, but no sync/refresh points for ++ // layer 1 and 2. ++ ++ int ids[4] = {0,2,1,2}; ++ cfg.ts_number_layers = 3; ++ cfg.ts_periodicity = 4; ++ cfg.ts_rate_decimator[0] = 4; ++ cfg.ts_rate_decimator[1] = 2; ++ cfg.ts_rate_decimator[2] = 1; ++ memcpy(cfg.ts_layer_id, ids, sizeof(ids)); ++ ++ flag_periodicity = 8; ++ ++ /* 0=L, 1=GF, 2=ARF */ ++ ++ // Layer 0: predict from L and ARF; update L. ++ layer_flags[0] = VP8_EFLAG_NO_UPD_GF | ++ VP8_EFLAG_NO_UPD_ARF | ++ VP8_EFLAG_NO_REF_GF; ++ layer_flags[4] = layer_flags[0]; ++ ++ // Layer 1: predict from L, G, ARF; update G. ++ layer_flags[2] = VP8_EFLAG_NO_UPD_ARF | ++ VP8_EFLAG_NO_UPD_LAST; ++ layer_flags[6] = layer_flags[2]; ++ ++ // Layer 2: predict from L, G, ARF; update none. ++ layer_flags[1] = VP8_EFLAG_NO_UPD_GF | ++ VP8_EFLAG_NO_UPD_ARF | ++ VP8_EFLAG_NO_UPD_LAST | ++ VP8_EFLAG_NO_UPD_ENTROPY; ++ layer_flags[3] = layer_flags[1]; ++ layer_flags[5] = layer_flags[1]; ++ layer_flags[7] = layer_flags[1]; ++ break; ++ } + } + +- // Open input file ++ /* Open input file */ + if(!(infile = fopen(argv[1], "rb"))) + die("Failed to open %s for reading", argv[1]); + +- // Open an output file for each stream ++ /* Open an output file for each stream */ + for (i=0; i(_tokenPartitions)); + + frame_avail = 1; + while (frame_avail || got_data) { +@@ -517,8 +651,8 @@ int main(int argc, char **argv) { + 1, flags, VPX_DL_REALTIME)) + die_codec(&codec, "Failed to encode frame"); + +- // Reset KF flag +- if (layering_mode != 6) ++ /* Reset KF flag */ ++ if (layering_mode != 7) + layer_flags[0] &= ~VPX_EFLAG_FORCE_KF; + + got_data = 0; +@@ -530,29 +664,25 @@ int main(int argc, char **argv) { + idata.frame.buf, 1, pkt->data.frame.sz, +- outfile[i])); ++ (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, ++ outfile[i]); + frames_in_layer[i]++; + } + break; + default: + break; + } +- printf (pkt->kind == VPX_CODEC_CX_FRAME_PKT +- && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":"."); +- fflush (stdout); + } + frame_cnt++; + pts += frame_duration; + } +- printf ("\n"); + fclose (infile); + + printf ("Processed %d frames.\n",frame_cnt-1); + if (vpx_codec_destroy(&codec)) + die_codec (&codec, "Failed to destroy codec"); + +- // Try to rewrite the output file headers with the actual frame count ++ /* Try to rewrite the output file headers with the actual frame count */ + for (i=0; inum; + mr_cfg.mr_down_sampling_factor.den = dsf->den; + ++ /* Force Key-frame synchronization. Namely, encoder at higher ++ * resolution always use the same frame_type chosen by the ++ * lowest-resolution encoder. ++ */ ++ if(mr_cfg.mr_encoder_id) ++ cfg->kf_mode = VPX_KF_DISABLED; ++ + ctx->iface = iface; + ctx->name = iface->name; + ctx->priv = NULL; +@@ -126,8 +133,20 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t *ctx, + + if (res) + { +- ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL; ++ const char *error_detail = ++ ctx->priv ? ctx->priv->err_detail : NULL; ++ /* Destroy current ctx */ ++ ctx->err_detail = error_detail; + vpx_codec_destroy(ctx); ++ ++ /* Destroy already allocated high-level ctx */ ++ while (i) ++ { ++ ctx--; ++ ctx->err_detail = error_detail; ++ vpx_codec_destroy(ctx); ++ i--; ++ } + } + + if (ctx->priv) +diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h +index 0af631c..a3c95d2 100644 +--- a/vpx/vp8cx.h ++++ b/vpx/vp8cx.h +@@ -204,8 +204,8 @@ typedef struct vpx_roi_map + unsigned char *roi_map; /**< specify an id between 0 and 3 for each 16x16 region within a frame */ + unsigned int rows; /**< number of rows */ + unsigned int cols; /**< number of cols */ +- int delta_q[4]; /**< quantizer delta [-64, 64] off baseline for regions with id between 0 and 3*/ +- int delta_lf[4]; /**< loop filter strength delta [-32, 32] for regions with id between 0 and 3 */ ++ int delta_q[4]; /**< quantizer delta [-63, 63] off baseline for regions with id between 0 and 3*/ ++ int delta_lf[4]; /**< loop filter strength delta [-63, 63] for regions with id between 0 and 3 */ + unsigned int static_threshold[4];/**< threshold for region to be treated as static */ + } vpx_roi_map_t; + +@@ -234,18 +234,6 @@ typedef struct vpx_scaling_mode + VPX_SCALING_MODE v_scaling_mode; /**< vertical scaling mode */ + } vpx_scaling_mode_t; + +-/*!\brief VP8 encoding mode +- * +- * This defines VP8 encoding mode +- * +- */ +-typedef enum +-{ +- VP8_BEST_QUALITY_ENCODING, +- VP8_GOOD_QUALITY_ENCODING, +- VP8_REAL_TIME_ENCODING +-} vp8e_encoding_mode; +- + /*!\brief VP8 token partition mode + * + * This defines VP8 partitioning mode for compressed data, i.e., the number of +@@ -298,12 +286,12 @@ VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF, unsigned int) + VPX_CTRL_USE_TYPE(VP8E_SET_NOISE_SENSITIVITY, unsigned int) + VPX_CTRL_USE_TYPE(VP8E_SET_SHARPNESS, unsigned int) + VPX_CTRL_USE_TYPE(VP8E_SET_STATIC_THRESHOLD, unsigned int) +-VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, vp8e_token_partitions) ++VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, int) /* vp8e_token_partitions */ + + VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES, unsigned int) + VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH , unsigned int) + VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_TYPE , unsigned int) +-VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, vp8e_tuning) ++VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, int) /* vp8e_tuning */ + VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL , unsigned int) + + VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *) +diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h +index d92e165..243b7a5 100644 +--- a/vpx/vpx_codec.h ++++ b/vpx/vpx_codec.h +@@ -49,15 +49,22 @@ extern "C" { + #ifndef DEPRECATED + #if defined(__GNUC__) && __GNUC__ + #define DEPRECATED __attribute__ ((deprecated)) +-#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ + #elif defined(_MSC_VER) + #define DEPRECATED +-#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */ + #else + #define DEPRECATED +-#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ + #endif ++#endif /* DEPRECATED */ ++ ++#ifndef DECLSPEC_DEPRECATED ++#if defined(__GNUC__) && __GNUC__ ++#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ ++#elif defined(_MSC_VER) ++#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */ ++#else ++#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ + #endif ++#endif /* DECLSPEC_DEPRECATED */ + + /*!\brief Decorator indicating a function is potentially unused */ + #ifdef UNUSED +diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk +index 427fd0f..ffa123f 100644 +--- a/vpx/vpx_codec.mk ++++ b/vpx/vpx_codec.mk +@@ -11,6 +11,21 @@ + + API_EXPORTS += exports + ++API_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h ++API_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h ++API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h ++API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h ++ ++API_SRCS-$(CONFIG_VP8_DECODER) += vp8.h ++API_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h ++API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8.h ++API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h ++ ++API_DOC_SRCS-yes += vpx_codec.h ++API_DOC_SRCS-yes += vpx_decoder.h ++API_DOC_SRCS-yes += vpx_encoder.h ++API_DOC_SRCS-yes += vpx_image.h ++ + API_SRCS-yes += src/vpx_decoder.c + API_SRCS-yes += vpx_decoder.h + API_SRCS-yes += src/vpx_encoder.c +@@ -23,3 +38,4 @@ API_SRCS-yes += vpx_codec.mk + API_SRCS-yes += vpx_codec_impl_bottom.h + API_SRCS-yes += vpx_codec_impl_top.h + API_SRCS-yes += vpx_image.h ++API_SRCS-$(BUILD_LIBVPX) += vpx_integer.h +diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h +index 7992cc4..1ccf1c5 100644 +--- a/vpx/vpx_decoder.h ++++ b/vpx/vpx_decoder.h +@@ -113,6 +113,10 @@ extern "C" { + * function directly, to ensure that the ABI version number parameter + * is properly initialized. + * ++ * If the library was configured with --disable-multithread, this call ++ * is not thread safe and should be guarded with a lock if being used ++ * in a multithreaded context. ++ * + * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags + * parameter), the storage pointed to by the cfg parameter must be + * kept readable and stable until all memory maps have been set. +diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h +index 239036e..67d9033 100644 +--- a/vpx/vpx_encoder.h ++++ b/vpx/vpx_encoder.h +@@ -655,6 +655,10 @@ extern "C" { + * function directly, to ensure that the ABI version number parameter + * is properly initialized. + * ++ * If the library was configured with --disable-multithread, this call ++ * is not thread safe and should be guarded with a lock if being used ++ * in a multithreaded context. ++ * + * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags + * parameter), the storage pointed to by the cfg parameter must be + * kept readable and stable until all memory maps have been set. +diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c +index ebe428d..8ff95a1 100644 +--- a/vpx_ports/arm_cpudetect.c ++++ b/vpx_ports/arm_cpudetect.c +@@ -32,8 +32,33 @@ static int arm_cpu_env_mask(void) + return env && *env ? (int)strtol(env, NULL, 0) : ~0; + } + ++#if !CONFIG_RUNTIME_CPU_DETECT + +-#if defined(_MSC_VER) ++int arm_cpu_caps(void) ++{ ++ /* This function should actually be a no-op. There is no way to adjust any of ++ * these because the RTCD tables do not exist: the functions are called ++ * statically */ ++ int flags; ++ int mask; ++ if (!arm_cpu_env_flags(&flags)) ++ { ++ return flags; ++ } ++ mask = arm_cpu_env_mask(); ++#if HAVE_EDSP ++ flags |= HAS_EDSP; ++#endif /* HAVE_EDSP */ ++#if HAVE_MEDIA ++ flags |= HAS_MEDIA; ++#endif /* HAVE_MEDIA */ ++#if HAVE_NEON ++ flags |= HAS_NEON; ++#endif /* HAVE_NEON */ ++ return flags & mask; ++} ++ ++#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ + /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ + #define WIN32_LEAN_AND_MEAN + #define WIN32_EXTRA_LEAN +@@ -52,7 +77,7 @@ int arm_cpu_caps(void) + * instructions via their assembled hex code. + * All of these instructions should be essentially nops. + */ +-#if defined(HAVE_EDSP) ++#if HAVE_EDSP + if (mask & HAS_EDSP) + { + __try +@@ -66,7 +91,7 @@ int arm_cpu_caps(void) + /*Ignore exception.*/ + } + } +-#if defined(HAVE_MEDIA) ++#if HAVE_MEDIA + if (mask & HAS_MEDIA) + __try + { +@@ -79,7 +104,7 @@ int arm_cpu_caps(void) + /*Ignore exception.*/ + } + } +-#if defined(HAVE_NEON) ++#if HAVE_NEON + if (mask & HAS_NEON) + { + __try +@@ -93,14 +118,13 @@ int arm_cpu_caps(void) + /*Ignore exception.*/ + } + } +-#endif +-#endif +-#endif ++#endif /* HAVE_NEON */ ++#endif /* HAVE_MEDIA */ ++#endif /* HAVE_EDSP */ + return flags & mask; + } + +-#elif defined(__linux__) +-#if defined(__ANDROID__) ++#elif defined(__ANDROID__) /* end _MSC_VER */ + #include + + int arm_cpu_caps(void) +@@ -115,19 +139,20 @@ int arm_cpu_caps(void) + mask = arm_cpu_env_mask(); + features = android_getCpuFeatures(); + +-#if defined(HAVE_EDSP) ++#if HAVE_EDSP + flags |= HAS_EDSP; +-#endif +-#if defined(HAVE_MEDIA) ++#endif /* HAVE_EDSP */ ++#if HAVE_MEDIA + flags |= HAS_MEDIA; +-#endif +-#if defined(HAVE_NEON) ++#endif /* HAVE_MEDIA */ ++#if HAVE_NEON + if (features & ANDROID_CPU_ARM_FEATURE_NEON) + flags |= HAS_NEON; +-#endif ++#endif /* HAVE_NEON */ + return flags & mask; + } +-#else // !defined(__ANDROID__) ++ ++#elif defined(__linux__) /* end __ANDROID__ */ + #include + + int arm_cpu_caps(void) +@@ -153,27 +178,27 @@ int arm_cpu_caps(void) + char buf[512]; + while (fgets(buf, 511, fin) != NULL) + { +-#if defined(HAVE_EDSP) || defined(HAVE_NEON) ++#if HAVE_EDSP || HAVE_NEON + if (memcmp(buf, "Features", 8) == 0) + { + char *p; +-#if defined(HAVE_EDSP) ++#if HAVE_EDSP + p=strstr(buf, " edsp"); + if (p != NULL && (p[5] == ' ' || p[5] == '\n')) + { + flags |= HAS_EDSP; + } +-#if defined(HAVE_NEON) ++#if HAVE_NEON + p = strstr(buf, " neon"); + if (p != NULL && (p[5] == ' ' || p[5] == '\n')) + { + flags |= HAS_NEON; + } +-#endif +-#endif ++#endif /* HAVE_NEON */ ++#endif /* HAVE_EDSP */ + } +-#endif +-#if defined(HAVE_MEDIA) ++#endif /* HAVE_EDSP || HAVE_NEON */ ++#if HAVE_MEDIA + if (memcmp(buf, "CPU architecture:",17) == 0){ + int version; + version = atoi(buf+17); +@@ -182,37 +207,13 @@ int arm_cpu_caps(void) + flags |= HAS_MEDIA; + } + } +-#endif ++#endif /* HAVE_MEDIA */ + } + fclose(fin); + } + return flags & mask; + } +-#endif // defined(__linux__) +-#elif !CONFIG_RUNTIME_CPU_DETECT +- +-int arm_cpu_caps(void) +-{ +- int flags; +- int mask; +- if (!arm_cpu_env_flags(&flags)) +- { +- return flags; +- } +- mask = arm_cpu_env_mask(); +-#if defined(HAVE_EDSP) +- flags |= HAS_EDSP; +-#endif +-#if defined(HAVE_MEDIA) +- flags |= HAS_MEDIA; +-#endif +-#if defined(HAVE_NEON) +- flags |= HAS_NEON; +-#endif +- return flags & mask; +-} +- +-#else ++#else /* end __linux__ */ + #error "--enable-runtime-cpu-detect selected, but no CPU detection method " \ +- "available for your platform. Reconfigure without --enable-runtime-cpu-detect." ++ "available for your platform. Reconfigure with --disable-runtime-cpu-detect." + #endif +diff --git a/vpx_ports/asm_offsets.h b/vpx_ports/asm_offsets.h +index d3b4fc7..7b6ae4a 100644 +--- a/vpx_ports/asm_offsets.h ++++ b/vpx_ports/asm_offsets.h +@@ -19,11 +19,11 @@ + static void assert_##name(void) {switch(0){case 0:case !!(cond):;}} + + #if INLINE_ASM +-#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val)); ++#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val)) + #define BEGIN int main(void) { + #define END return 0; } + #else +-#define DEFINE(sym, val) int sym = val; ++#define DEFINE(sym, val) const int sym = val + #define BEGIN + #define END + #endif +diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm +index 306e235..efad1a5 100644 +--- a/vpx_ports/emms.asm ++++ b/vpx_ports/emms.asm +@@ -12,14 +12,14 @@ + %include "vpx_ports/x86_abi_support.asm" + + section .text +- global sym(vpx_reset_mmx_state) ++global sym(vpx_reset_mmx_state) PRIVATE + sym(vpx_reset_mmx_state): + emms + ret + + + %ifidn __OUTPUT_FORMAT__,x64 +-global sym(vpx_winx64_fldcw) ++global sym(vpx_winx64_fldcw) PRIVATE + sym(vpx_winx64_fldcw): + sub rsp, 8 + mov [rsp], rcx ; win x64 specific +@@ -28,7 +28,7 @@ sym(vpx_winx64_fldcw): + ret + + +-global sym(vpx_winx64_fstcw) ++global sym(vpx_winx64_fstcw) PRIVATE + sym(vpx_winx64_fstcw): + sub rsp, 8 + fstcw [rsp] +diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h +index 0e52368..dec28d5 100644 +--- a/vpx_ports/mem_ops.h ++++ b/vpx_ports/mem_ops.h +@@ -145,27 +145,27 @@ static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) + + #undef mem_get_sbe16 + #define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16) +-mem_get_s_generic(be, 16); ++mem_get_s_generic(be, 16) + + #undef mem_get_sbe24 + #define mem_get_sbe24 mem_ops_wrap_symbol(mem_get_sbe24) +-mem_get_s_generic(be, 24); ++mem_get_s_generic(be, 24) + + #undef mem_get_sbe32 + #define mem_get_sbe32 mem_ops_wrap_symbol(mem_get_sbe32) +-mem_get_s_generic(be, 32); ++mem_get_s_generic(be, 32) + + #undef mem_get_sle16 + #define mem_get_sle16 mem_ops_wrap_symbol(mem_get_sle16) +-mem_get_s_generic(le, 16); ++mem_get_s_generic(le, 16) + + #undef mem_get_sle24 + #define mem_get_sle24 mem_ops_wrap_symbol(mem_get_sle24) +-mem_get_s_generic(le, 24); ++mem_get_s_generic(le, 24) + + #undef mem_get_sle32 + #define mem_get_sle32 mem_ops_wrap_symbol(mem_get_sle32) +-mem_get_s_generic(le, 32); ++mem_get_s_generic(le, 32) + + #undef mem_put_be16 + #define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16) +diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h +index 0fbba65..fca653a 100644 +--- a/vpx_ports/mem_ops_aligned.h ++++ b/vpx_ports/mem_ops_aligned.h +@@ -99,51 +99,51 @@ + + #undef mem_get_be16_aligned + #define mem_get_be16_aligned mem_ops_wrap_symbol(mem_get_be16_aligned) +-mem_get_be_aligned_generic(16); ++mem_get_be_aligned_generic(16) + + #undef mem_get_be32_aligned + #define mem_get_be32_aligned mem_ops_wrap_symbol(mem_get_be32_aligned) +-mem_get_be_aligned_generic(32); ++mem_get_be_aligned_generic(32) + + #undef mem_get_le16_aligned + #define mem_get_le16_aligned mem_ops_wrap_symbol(mem_get_le16_aligned) +-mem_get_le_aligned_generic(16); ++mem_get_le_aligned_generic(16) + + #undef mem_get_le32_aligned + #define mem_get_le32_aligned mem_ops_wrap_symbol(mem_get_le32_aligned) +-mem_get_le_aligned_generic(32); ++mem_get_le_aligned_generic(32) + + #undef mem_get_sbe16_aligned + #define mem_get_sbe16_aligned mem_ops_wrap_symbol(mem_get_sbe16_aligned) +-mem_get_sbe_aligned_generic(16); ++mem_get_sbe_aligned_generic(16) + + #undef mem_get_sbe32_aligned + #define mem_get_sbe32_aligned mem_ops_wrap_symbol(mem_get_sbe32_aligned) +-mem_get_sbe_aligned_generic(32); ++mem_get_sbe_aligned_generic(32) + + #undef mem_get_sle16_aligned + #define mem_get_sle16_aligned mem_ops_wrap_symbol(mem_get_sle16_aligned) +-mem_get_sle_aligned_generic(16); ++mem_get_sle_aligned_generic(16) + + #undef mem_get_sle32_aligned + #define mem_get_sle32_aligned mem_ops_wrap_symbol(mem_get_sle32_aligned) +-mem_get_sle_aligned_generic(32); ++mem_get_sle_aligned_generic(32) + + #undef mem_put_be16_aligned + #define mem_put_be16_aligned mem_ops_wrap_symbol(mem_put_be16_aligned) +-mem_put_be_aligned_generic(16); ++mem_put_be_aligned_generic(16) + + #undef mem_put_be32_aligned + #define mem_put_be32_aligned mem_ops_wrap_symbol(mem_put_be32_aligned) +-mem_put_be_aligned_generic(32); ++mem_put_be_aligned_generic(32) + + #undef mem_put_le16_aligned + #define mem_put_le16_aligned mem_ops_wrap_symbol(mem_put_le16_aligned) +-mem_put_le_aligned_generic(16); ++mem_put_le_aligned_generic(16) + + #undef mem_put_le32_aligned + #define mem_put_le32_aligned mem_ops_wrap_symbol(mem_put_le32_aligned) +-mem_put_le_aligned_generic(32); ++mem_put_le_aligned_generic(32) + + #undef mem_get_ne_aligned_generic + #undef mem_get_se_aligned_generic +diff --git a/vpx_ports/vpx_ports.mk b/vpx_ports/vpx_ports.mk +new file mode 100644 +index 0000000..e6cb52f +--- /dev/null ++++ b/vpx_ports/vpx_ports.mk +@@ -0,0 +1,26 @@ ++## ++## Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++## ++## Use of this source code is governed by a BSD-style license ++## that can be found in the LICENSE file in the root of the source ++## tree. An additional intellectual property rights grant can be found ++## in the file PATENTS. All contributing project authors may ++## be found in the AUTHORS file in the root of the source tree. ++## ++ ++ ++PORTS_SRCS-yes += vpx_ports.mk ++ ++PORTS_SRCS-$(BUILD_LIBVPX) += asm_offsets.h ++PORTS_SRCS-$(BUILD_LIBVPX) += mem.h ++PORTS_SRCS-$(BUILD_LIBVPX) += vpx_timer.h ++ ++ifeq ($(ARCH_X86)$(ARCH_X86_64),yes) ++PORTS_SRCS-$(BUILD_LIBVPX) += emms.asm ++PORTS_SRCS-$(BUILD_LIBVPX) += x86.h ++PORTS_SRCS-$(BUILD_LIBVPX) += x86_abi_support.asm ++PORTS_SRCS-$(BUILD_LIBVPX) += x86_cpuid.c ++endif ++ ++PORTS_SRCS-$(ARCH_ARM) += arm_cpudetect.c ++PORTS_SRCS-$(ARCH_ARM) += arm.h +diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h +index 1341c7f..9dd8c4b 100644 +--- a/vpx_ports/x86.h ++++ b/vpx_ports/x86.h +@@ -162,7 +162,7 @@ x86_readtsc(void) + return tsc; + #else + #if ARCH_X86_64 +- return __rdtsc(); ++ return (unsigned int)__rdtsc(); + #else + __asm rdtsc; + #endif +diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm +index cef6a0b..0c9fe37 100644 +--- a/vpx_ports/x86_abi_support.asm ++++ b/vpx_ports/x86_abi_support.asm +@@ -88,12 +88,41 @@ + %define sym(x) x + %elifidn __OUTPUT_FORMAT__,elf64 + %define sym(x) x ++%elifidn __OUTPUT_FORMAT__,elfx32 ++%define sym(x) x + %elifidn __OUTPUT_FORMAT__,x64 + %define sym(x) x + %else + %define sym(x) _ %+ x + %endif + ++; PRIVATE ++; Macro for the attribute to hide a global symbol for the target ABI. ++; This is only active if CHROMIUM is defined. ++; ++; Chromium doesn't like exported global symbols due to symbol clashing with ++; plugins among other things. ++; ++; Requires Chromium's patched copy of yasm: ++; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761 ++; http://www.tortall.net/projects/yasm/ticket/236 ++; ++%ifdef CHROMIUM ++ %ifidn __OUTPUT_FORMAT__,elf32 ++ %define PRIVATE :hidden ++ %elifidn __OUTPUT_FORMAT__,elf64 ++ %define PRIVATE :hidden ++ %elifidn __OUTPUT_FORMAT__,elfx32 ++ %define PRIVATE :hidden ++ %elifidn __OUTPUT_FORMAT__,x64 ++ %define PRIVATE ++ %else ++ %define PRIVATE :private_extern ++ %endif ++%else ++ %define PRIVATE ++%endif ++ + ; arg() + ; Return the address specification of the given argument + ; +@@ -181,7 +210,16 @@ + %endmacro + %endif + %endif +- %define HIDDEN_DATA(x) x ++ ++ %ifdef CHROMIUM ++ %ifidn __OUTPUT_FORMAT__,macho32 ++ %define HIDDEN_DATA(x) x:private_extern ++ %else ++ %define HIDDEN_DATA(x) x ++ %endif ++ %else ++ %define HIDDEN_DATA(x) x ++ %endif + %else + %macro GET_GOT 1 + %endmacro +@@ -189,6 +227,9 @@ + %ifidn __OUTPUT_FORMAT__,elf64 + %define WRT_PLT wrt ..plt + %define HIDDEN_DATA(x) x:data hidden ++ %elifidn __OUTPUT_FORMAT__,elfx32 ++ %define WRT_PLT wrt ..plt ++ %define HIDDEN_DATA(x) x:data hidden + %else + %define HIDDEN_DATA(x) x + %endif +@@ -330,5 +371,8 @@ section .text + %elifidn __OUTPUT_FORMAT__,elf64 + section .note.GNU-stack noalloc noexec nowrite progbits + section .text ++%elifidn __OUTPUT_FORMAT__,elfx32 ++section .note.GNU-stack noalloc noexec nowrite progbits ++section .text + %endif + +diff --git a/vpx_scale/arm/neon/yv12extend_arm.c b/vpx_scale/arm/neon/yv12extend_arm.c +index 7529fc6..eabd495 100644 +--- a/vpx_scale/arm/neon/yv12extend_arm.c ++++ b/vpx_scale/arm/neon/yv12extend_arm.c +@@ -8,18 +8,14 @@ + * be found in the AUTHORS file in the root of the source tree. + */ + ++#include "./vpx_rtcd.h" + +-#include "vpx_scale/yv12config.h" +-#include "vpx_mem/vpx_mem.h" +-#include "vpx_scale/vpxscale.h" ++extern void vp8_yv12_copy_frame_func_neon(struct yv12_buffer_config *src_ybc, ++ struct yv12_buffer_config *dst_ybc); + +-extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, +- YV12_BUFFER_CONFIG *dst_ybc); ++void vp8_yv12_copy_frame_neon(struct yv12_buffer_config *src_ybc, ++ struct yv12_buffer_config *dst_ybc) { ++ vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc); + +-void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, +- YV12_BUFFER_CONFIG *dst_ybc) +-{ +- vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc); +- +- vp8_yv12_extend_frame_borders_neon(dst_ybc); ++ vp8_yv12_extend_frame_borders_neon(dst_ybc); + } +diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c +index 4468e9d..c116740 100644 +--- a/vpx_scale/generic/bicubic_scaler.c ++++ b/vpx_scale/generic/bicubic_scaler.c +@@ -46,245 +46,229 @@ static float a = -0.6; + // 3 2 + // C0 = a*t - a*t + // +-static short c0_fixed(unsigned int t) +-{ +- // put t in Q16 notation +- unsigned short v1, v2; +- +- // Q16 +- v1 = (a_i * t) >> 16; +- v1 = (v1 * t) >> 16; +- +- // Q16 +- v2 = (a_i * t) >> 16; +- v2 = (v2 * t) >> 16; +- v2 = (v2 * t) >> 16; +- +- // Q12 +- return -((v1 - v2) >> 4); ++static short c0_fixed(unsigned int t) { ++ // put t in Q16 notation ++ unsigned short v1, v2; ++ ++ // Q16 ++ v1 = (a_i * t) >> 16; ++ v1 = (v1 * t) >> 16; ++ ++ // Q16 ++ v2 = (a_i * t) >> 16; ++ v2 = (v2 * t) >> 16; ++ v2 = (v2 * t) >> 16; ++ ++ // Q12 ++ return -((v1 - v2) >> 4); + } + + // 2 3 + // C1 = a*t + (3-2*a)*t - (2-a)*t + // +-static short c1_fixed(unsigned int t) +-{ +- unsigned short v1, v2, v3; +- unsigned short two, three; +- +- // Q16 +- v1 = (a_i * t) >> 16; +- +- // Q13 +- two = 2 << 13; +- v2 = two - (a_i >> 3); +- v2 = (v2 * t) >> 16; +- v2 = (v2 * t) >> 16; +- v2 = (v2 * t) >> 16; +- +- // Q13 +- three = 3 << 13; +- v3 = three - (2 * (a_i >> 3)); +- v3 = (v3 * t) >> 16; +- v3 = (v3 * t) >> 16; +- +- // Q12 +- return (((v1 >> 3) - v2 + v3) >> 1); ++static short c1_fixed(unsigned int t) { ++ unsigned short v1, v2, v3; ++ unsigned short two, three; ++ ++ // Q16 ++ v1 = (a_i * t) >> 16; ++ ++ // Q13 ++ two = 2 << 13; ++ v2 = two - (a_i >> 3); ++ v2 = (v2 * t) >> 16; ++ v2 = (v2 * t) >> 16; ++ v2 = (v2 * t) >> 16; ++ ++ // Q13 ++ three = 3 << 13; ++ v3 = three - (2 * (a_i >> 3)); ++ v3 = (v3 * t) >> 16; ++ v3 = (v3 * t) >> 16; ++ ++ // Q12 ++ return (((v1 >> 3) - v2 + v3) >> 1); + + } + + // 2 3 + // C2 = 1 - (3-a)*t + (2-a)*t + // +-static short c2_fixed(unsigned int t) +-{ +- unsigned short v1, v2, v3; +- unsigned short two, three; +- +- // Q13 +- v1 = 1 << 13; +- +- // Q13 +- three = 3 << 13; +- v2 = three - (a_i >> 3); +- v2 = (v2 * t) >> 16; +- v2 = (v2 * t) >> 16; +- +- // Q13 +- two = 2 << 13; +- v3 = two - (a_i >> 3); +- v3 = (v3 * t) >> 16; +- v3 = (v3 * t) >> 16; +- v3 = (v3 * t) >> 16; +- +- // Q12 +- return (v1 - v2 + v3) >> 1; ++static short c2_fixed(unsigned int t) { ++ unsigned short v1, v2, v3; ++ unsigned short two, three; ++ ++ // Q13 ++ v1 = 1 << 13; ++ ++ // Q13 ++ three = 3 << 13; ++ v2 = three - (a_i >> 3); ++ v2 = (v2 * t) >> 16; ++ v2 = (v2 * t) >> 16; ++ ++ // Q13 ++ two = 2 << 13; ++ v3 = two - (a_i >> 3); ++ v3 = (v3 * t) >> 16; ++ v3 = (v3 * t) >> 16; ++ v3 = (v3 * t) >> 16; ++ ++ // Q12 ++ return (v1 - v2 + v3) >> 1; + } + + // 2 3 + // C3 = a*t - 2*a*t + a*t + // +-static short c3_fixed(unsigned int t) +-{ +- int v1, v2, v3; ++static short c3_fixed(unsigned int t) { ++ int v1, v2, v3; + +- // Q16 +- v1 = (a_i * t) >> 16; ++ // Q16 ++ v1 = (a_i * t) >> 16; + +- // Q15 +- v2 = 2 * (a_i >> 1); +- v2 = (v2 * t) >> 16; +- v2 = (v2 * t) >> 16; ++ // Q15 ++ v2 = 2 * (a_i >> 1); ++ v2 = (v2 * t) >> 16; ++ v2 = (v2 * t) >> 16; + +- // Q16 +- v3 = (a_i * t) >> 16; +- v3 = (v3 * t) >> 16; +- v3 = (v3 * t) >> 16; ++ // Q16 ++ v3 = (a_i * t) >> 16; ++ v3 = (v3 * t) >> 16; ++ v3 = (v3 * t) >> 16; + +- // Q12 +- return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3); ++ // Q12 ++ return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3); + } + #else + // 3 2 + // C0 = -a*t + a*t + // +-float C0(float t) +-{ +- return -a * t * t * t + a * t * t; ++float C0(float t) { ++ return -a * t * t * t + a * t * t; + } + + // 2 3 + // C1 = -a*t + (2*a+3)*t - (a+2)*t + // +-float C1(float t) +-{ +- return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; ++float C1(float t) { ++ return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; + } + + // 2 3 + // C2 = 1 - (a+3)*t + (a+2)*t + // +-float C2(float t) +-{ +- return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; ++float C2(float t) { ++ return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; + } + + // 2 3 + // C3 = a*t - 2*a*t + a*t + // +-float C3(float t) +-{ +- return a * t * t * t - 2.0f * a * t * t + a * t; ++float C3(float t) { ++ return a * t * t * t - 2.0f * a * t * t + a * t; + } + #endif + + #if 0 +-int compare_real_fixed() +-{ +- int i, errors = 0; +- float mult = 1.0 / 10000.0; +- unsigned int fixed_mult = mult * 4294967296;//65536; +- unsigned int phase_offset_int; +- float phase_offset_real; +- +- for (i = 0; i < 10000; i++) +- { +- int fixed0, fixed1, fixed2, fixed3, fixed_total; +- int real0, real1, real2, real3, real_total; +- +- phase_offset_real = (float)i * mult; +- phase_offset_int = (fixed_mult * i) >> 16; ++int compare_real_fixed() { ++ int i, errors = 0; ++ float mult = 1.0 / 10000.0; ++ unsigned int fixed_mult = mult * 4294967296;// 65536; ++ unsigned int phase_offset_int; ++ float phase_offset_real; ++ ++ for (i = 0; i < 10000; i++) { ++ int fixed0, fixed1, fixed2, fixed3, fixed_total; ++ int real0, real1, real2, real3, real_total; ++ ++ phase_offset_real = (float)i * mult; ++ phase_offset_int = (fixed_mult * i) >> 16; + // phase_offset_int = phase_offset_real * 65536; + +- fixed0 = c0_fixed(phase_offset_int); +- real0 = C0(phase_offset_real) * 4096.0; ++ fixed0 = c0_fixed(phase_offset_int); ++ real0 = C0(phase_offset_real) * 4096.0; + +- if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1))) +- errors++; ++ if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1))) ++ errors++; + +- fixed1 = c1_fixed(phase_offset_int); +- real1 = C1(phase_offset_real) * 4096.0; ++ fixed1 = c1_fixed(phase_offset_int); ++ real1 = C1(phase_offset_real) * 4096.0; + +- if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1))) +- errors++; ++ if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1))) ++ errors++; + +- fixed2 = c2_fixed(phase_offset_int); +- real2 = C2(phase_offset_real) * 4096.0; ++ fixed2 = c2_fixed(phase_offset_int); ++ real2 = C2(phase_offset_real) * 4096.0; + +- if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1))) +- errors++; ++ if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1))) ++ errors++; + +- fixed3 = c3_fixed(phase_offset_int); +- real3 = C3(phase_offset_real) * 4096.0; ++ fixed3 = c3_fixed(phase_offset_int); ++ real3 = C3(phase_offset_real) * 4096.0; + +- if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1))) +- errors++; ++ if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1))) ++ errors++; + +- fixed_total = fixed0 + fixed1 + fixed2 + fixed3; +- real_total = real0 + real1 + real2 + real3; ++ fixed_total = fixed0 + fixed1 + fixed2 + fixed3; ++ real_total = real0 + real1 + real2 + real3; + +- if ((fixed_total > 4097) || (fixed_total < 4094)) +- errors ++; ++ if ((fixed_total > 4097) || (fixed_total < 4094)) ++ errors++; + +- if ((real_total > 4097) || (real_total < 4095)) +- errors ++; +- } ++ if ((real_total > 4097) || (real_total < 4095)) ++ errors++; ++ } + +- return errors; ++ return errors; + } + #endif + + // Find greatest common denominator between two integers. Method used here is + // slow compared to Euclid's algorithm, but does not require any division. +-int gcd(int a, int b) +-{ +- // Problem with this algorithm is that if a or b = 0 this function +- // will never exit. Don't want to return 0 because any computation +- // that was based on a common denoninator and tried to reduce by +- // dividing by 0 would fail. Best solution that could be thought of +- // would to be fail by returing a 1; +- if (a <= 0 || b <= 0) +- return 1; +- +- while (a != b) +- { +- if (b > a) +- b = b - a; +- else +- { +- int tmp = a;//swap large and +- a = b; //small +- b = tmp; +- } ++int gcd(int a, int b) { ++ // Problem with this algorithm is that if a or b = 0 this function ++ // will never exit. Don't want to return 0 because any computation ++ // that was based on a common denoninator and tried to reduce by ++ // dividing by 0 would fail. Best solution that could be thought of ++ // would to be fail by returing a 1; ++ if (a <= 0 || b <= 0) ++ return 1; ++ ++ while (a != b) { ++ if (b > a) ++ b = b - a; ++ else { ++ int tmp = a;// swap large and ++ a = b; // small ++ b = tmp; + } ++ } + +- return b; ++ return b; + } + +-void bicubic_coefficient_init() +-{ +- vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); +- g_first_time = 0; ++void bicubic_coefficient_init() { ++ vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); ++ g_first_time = 0; + } + +-void bicubic_coefficient_destroy() +-{ +- if (!g_first_time) +- { +- vpx_free(g_b_scaler.l_w); ++void bicubic_coefficient_destroy() { ++ if (!g_first_time) { ++ vpx_free(g_b_scaler.l_w); + +- vpx_free(g_b_scaler.l_h); ++ vpx_free(g_b_scaler.l_h); + +- vpx_free(g_b_scaler.l_h_uv); ++ vpx_free(g_b_scaler.l_h_uv); + +- vpx_free(g_b_scaler.c_w); ++ vpx_free(g_b_scaler.c_w); + +- vpx_free(g_b_scaler.c_h); ++ vpx_free(g_b_scaler.c_h); + +- vpx_free(g_b_scaler.c_h_uv); ++ vpx_free(g_b_scaler.c_h_uv); + +- vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); +- } ++ vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); ++ } + } + + // Create the coeffients that will be used for the cubic interpolation. +@@ -292,311 +276,294 @@ void bicubic_coefficient_destroy() + // regimes the phase offsets will be different. There are 4 coefficents + // for each point, two on each side. The layout is that there are the + // 4 coefficents for each phase in the array and then the next phase. +-int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) +-{ +- int i; ++int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) { ++ int i; + #ifdef FIXED_POINT +- int phase_offset_int; +- unsigned int fixed_mult; +- int product_val = 0; ++ int phase_offset_int; ++ unsigned int fixed_mult; ++ int product_val = 0; + #else +- float phase_offset; ++ float phase_offset; + #endif +- int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv; ++ int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv; + +- if (g_first_time) +- bicubic_coefficient_init(); ++ if (g_first_time) ++ bicubic_coefficient_init(); + + +- // check to see if the coefficents have already been set up correctly +- if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height) +- && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height)) +- return 0; ++ // check to see if the coefficents have already been set up correctly ++ if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height) ++ && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height)) ++ return 0; + +- g_b_scaler.in_width = in_width; +- g_b_scaler.in_height = in_height; +- g_b_scaler.out_width = out_width; +- g_b_scaler.out_height = out_height; ++ g_b_scaler.in_width = in_width; ++ g_b_scaler.in_height = in_height; ++ g_b_scaler.out_width = out_width; ++ g_b_scaler.out_height = out_height; + +- // Don't want to allow crazy scaling, just try and prevent a catastrophic +- // failure here. Want to fail after setting the member functions so if +- // if the scaler is called the member functions will not scale. +- if (out_width <= 0 || out_height <= 0) +- return -1; ++ // Don't want to allow crazy scaling, just try and prevent a catastrophic ++ // failure here. Want to fail after setting the member functions so if ++ // if the scaler is called the member functions will not scale. ++ if (out_width <= 0 || out_height <= 0) ++ return -1; + +- // reduce in/out width and height ratios using the gcd +- gcd_w = gcd(out_width, in_width); +- gcd_h = gcd(out_height, in_height); +- gcd_h_uv = gcd(out_height, in_height / 2); ++ // reduce in/out width and height ratios using the gcd ++ gcd_w = gcd(out_width, in_width); ++ gcd_h = gcd(out_height, in_height); ++ gcd_h_uv = gcd(out_height, in_height / 2); + +- // the numerator width and height are to be saved in +- // globals so they can be used during the scaling process +- // without having to be recalculated. +- g_b_scaler.nw = out_width / gcd_w; +- d_w = in_width / gcd_w; ++ // the numerator width and height are to be saved in ++ // globals so they can be used during the scaling process ++ // without having to be recalculated. ++ g_b_scaler.nw = out_width / gcd_w; ++ d_w = in_width / gcd_w; + +- g_b_scaler.nh = out_height / gcd_h; +- d_h = in_height / gcd_h; ++ g_b_scaler.nh = out_height / gcd_h; ++ d_h = in_height / gcd_h; + +- g_b_scaler.nh_uv = out_height / gcd_h_uv; +- d_h_uv = (in_height / 2) / gcd_h_uv; ++ g_b_scaler.nh_uv = out_height / gcd_h_uv; ++ d_h_uv = (in_height / 2) / gcd_h_uv; + +- // allocate memory for the coefficents +- vpx_free(g_b_scaler.l_w); ++ // allocate memory for the coefficents ++ vpx_free(g_b_scaler.l_w); + +- vpx_free(g_b_scaler.l_h); ++ vpx_free(g_b_scaler.l_h); + +- vpx_free(g_b_scaler.l_h_uv); ++ vpx_free(g_b_scaler.l_h_uv); + +- g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2); +- g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2); +- g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2); ++ g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2); ++ g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2); ++ g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2); + +- vpx_free(g_b_scaler.c_w); ++ vpx_free(g_b_scaler.c_w); + +- vpx_free(g_b_scaler.c_h); ++ vpx_free(g_b_scaler.c_h); + +- vpx_free(g_b_scaler.c_h_uv); ++ vpx_free(g_b_scaler.c_h_uv); + +- g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2); +- g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2); +- g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2); ++ g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2); ++ g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2); ++ g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2); + +- g_b_scaler.hbuf = g_hbuf; +- g_b_scaler.hbuf_uv = g_hbuf_uv; ++ g_b_scaler.hbuf = g_hbuf; ++ g_b_scaler.hbuf_uv = g_hbuf_uv; + +- // Set up polyphase filter taps. This needs to be done before +- // the scaling because of the floating point math required. The +- // coefficients are multiplied by 2^12 so that fixed point math +- // can be used in the main scaling loop. ++ // Set up polyphase filter taps. This needs to be done before ++ // the scaling because of the floating point math required. The ++ // coefficients are multiplied by 2^12 so that fixed point math ++ // can be used in the main scaling loop. + #ifdef FIXED_POINT +- fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296; ++ fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296; + +- product_val = 0; ++ product_val = 0; + +- for (i = 0; i < g_b_scaler.nw; i++) +- { +- if (product_val > g_b_scaler.nw) +- product_val -= g_b_scaler.nw; ++ for (i = 0; i < g_b_scaler.nw; i++) { ++ if (product_val > g_b_scaler.nw) ++ product_val -= g_b_scaler.nw; + +- phase_offset_int = (fixed_mult * product_val) >> 16; ++ phase_offset_int = (fixed_mult * product_val) >> 16; + +- g_b_scaler.c_w[i*4] = c3_fixed(phase_offset_int); +- g_b_scaler.c_w[i*4+1] = c2_fixed(phase_offset_int); +- g_b_scaler.c_w[i*4+2] = c1_fixed(phase_offset_int); +- g_b_scaler.c_w[i*4+3] = c0_fixed(phase_offset_int); ++ g_b_scaler.c_w[i * 4] = c3_fixed(phase_offset_int); ++ g_b_scaler.c_w[i * 4 + 1] = c2_fixed(phase_offset_int); ++ g_b_scaler.c_w[i * 4 + 2] = c1_fixed(phase_offset_int); ++ g_b_scaler.c_w[i * 4 + 3] = c0_fixed(phase_offset_int); + +- product_val += d_w; +- } ++ product_val += d_w; ++ } + + +- fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296; ++ fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296; + +- product_val = 0; ++ product_val = 0; + +- for (i = 0; i < g_b_scaler.nh; i++) +- { +- if (product_val > g_b_scaler.nh) +- product_val -= g_b_scaler.nh; ++ for (i = 0; i < g_b_scaler.nh; i++) { ++ if (product_val > g_b_scaler.nh) ++ product_val -= g_b_scaler.nh; + +- phase_offset_int = (fixed_mult * product_val) >> 16; ++ phase_offset_int = (fixed_mult * product_val) >> 16; + +- g_b_scaler.c_h[i*4] = c0_fixed(phase_offset_int); +- g_b_scaler.c_h[i*4+1] = c1_fixed(phase_offset_int); +- g_b_scaler.c_h[i*4+2] = c2_fixed(phase_offset_int); +- g_b_scaler.c_h[i*4+3] = c3_fixed(phase_offset_int); ++ g_b_scaler.c_h[i * 4] = c0_fixed(phase_offset_int); ++ g_b_scaler.c_h[i * 4 + 1] = c1_fixed(phase_offset_int); ++ g_b_scaler.c_h[i * 4 + 2] = c2_fixed(phase_offset_int); ++ g_b_scaler.c_h[i * 4 + 3] = c3_fixed(phase_offset_int); + +- product_val += d_h; +- } ++ product_val += d_h; ++ } + +- fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296; ++ fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296; + +- product_val = 0; ++ product_val = 0; + +- for (i = 0; i < g_b_scaler.nh_uv; i++) +- { +- if (product_val > g_b_scaler.nh_uv) +- product_val -= g_b_scaler.nh_uv; ++ for (i = 0; i < g_b_scaler.nh_uv; i++) { ++ if (product_val > g_b_scaler.nh_uv) ++ product_val -= g_b_scaler.nh_uv; + +- phase_offset_int = (fixed_mult * product_val) >> 16; ++ phase_offset_int = (fixed_mult * product_val) >> 16; + +- g_b_scaler.c_h_uv[i*4] = c0_fixed(phase_offset_int); +- g_b_scaler.c_h_uv[i*4+1] = c1_fixed(phase_offset_int); +- g_b_scaler.c_h_uv[i*4+2] = c2_fixed(phase_offset_int); +- g_b_scaler.c_h_uv[i*4+3] = c3_fixed(phase_offset_int); ++ g_b_scaler.c_h_uv[i * 4] = c0_fixed(phase_offset_int); ++ g_b_scaler.c_h_uv[i * 4 + 1] = c1_fixed(phase_offset_int); ++ g_b_scaler.c_h_uv[i * 4 + 2] = c2_fixed(phase_offset_int); ++ g_b_scaler.c_h_uv[i * 4 + 3] = c3_fixed(phase_offset_int); + +- product_val += d_h_uv; +- } ++ product_val += d_h_uv; ++ } + + #else + +- for (i = 0; i < g_nw; i++) +- { +- phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw; +- g_c_w[i*4] = (C3(phase_offset) * 4096.0); +- g_c_w[i*4+1] = (C2(phase_offset) * 4096.0); +- g_c_w[i*4+2] = (C1(phase_offset) * 4096.0); +- g_c_w[i*4+3] = (C0(phase_offset) * 4096.0); +- } +- +- for (i = 0; i < g_nh; i++) +- { +- phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh; +- g_c_h[i*4] = (C0(phase_offset) * 4096.0); +- g_c_h[i*4+1] = (C1(phase_offset) * 4096.0); +- g_c_h[i*4+2] = (C2(phase_offset) * 4096.0); +- g_c_h[i*4+3] = (C3(phase_offset) * 4096.0); +- } +- +- for (i = 0; i < g_nh_uv; i++) +- { +- phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv; +- g_c_h_uv[i*4] = (C0(phase_offset) * 4096.0); +- g_c_h_uv[i*4+1] = (C1(phase_offset) * 4096.0); +- g_c_h_uv[i*4+2] = (C2(phase_offset) * 4096.0); +- g_c_h_uv[i*4+3] = (C3(phase_offset) * 4096.0); +- } ++ for (i = 0; i < g_nw; i++) { ++ phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw; ++ g_c_w[i * 4] = (C3(phase_offset) * 4096.0); ++ g_c_w[i * 4 + 1] = (C2(phase_offset) * 4096.0); ++ g_c_w[i * 4 + 2] = (C1(phase_offset) * 4096.0); ++ g_c_w[i * 4 + 3] = (C0(phase_offset) * 4096.0); ++ } ++ ++ for (i = 0; i < g_nh; i++) { ++ phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh; ++ g_c_h[i * 4] = (C0(phase_offset) * 4096.0); ++ g_c_h[i * 4 + 1] = (C1(phase_offset) * 4096.0); ++ g_c_h[i * 4 + 2] = (C2(phase_offset) * 4096.0); ++ g_c_h[i * 4 + 3] = (C3(phase_offset) * 4096.0); ++ } ++ ++ for (i = 0; i < g_nh_uv; i++) { ++ phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv; ++ g_c_h_uv[i * 4] = (C0(phase_offset) * 4096.0); ++ g_c_h_uv[i * 4 + 1] = (C1(phase_offset) * 4096.0); ++ g_c_h_uv[i * 4 + 2] = (C2(phase_offset) * 4096.0); ++ g_c_h_uv[i * 4 + 3] = (C3(phase_offset) * 4096.0); ++ } + + #endif + +- // Create an array that corresponds input lines to output lines. +- // This doesn't require floating point math, but it does require +- // a division and because hardware division is not present that +- // is a call. +- for (i = 0; i < out_width; i++) +- { +- g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw; ++ // Create an array that corresponds input lines to output lines. ++ // This doesn't require floating point math, but it does require ++ // a division and because hardware division is not present that ++ // is a call. ++ for (i = 0; i < out_width; i++) { ++ g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw; + +- if ((g_b_scaler.l_w[i] + 2) <= in_width) +- g_b_scaler.max_usable_out_width = i; ++ if ((g_b_scaler.l_w[i] + 2) <= in_width) ++ g_b_scaler.max_usable_out_width = i; + +- } ++ } + +- for (i = 0; i < out_height + 1; i++) +- { +- g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh; +- g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv; +- } ++ for (i = 0; i < out_height + 1; i++) { ++ g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh; ++ g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv; ++ } + +- return 0; ++ return 0; + } + + int bicubic_scale(int in_width, int in_height, int in_stride, + int out_width, int out_height, int out_stride, +- unsigned char *input_image, unsigned char *output_image) +-{ +- short *RESTRICT l_w, * RESTRICT l_h; +- short *RESTRICT c_w, * RESTRICT c_h; +- unsigned char *RESTRICT ip, * RESTRICT op; +- unsigned char *RESTRICT hbuf; +- int h, w, lw, lh; +- int temp_sum; +- int phase_offset_w, phase_offset_h; +- +- c_w = g_b_scaler.c_w; +- c_h = g_b_scaler.c_h; +- +- op = output_image; +- +- l_w = g_b_scaler.l_w; +- l_h = g_b_scaler.l_h; +- +- phase_offset_h = 0; +- +- for (h = 0; h < out_height; h++) +- { +- // select the row to work on +- lh = l_h[h]; +- ip = input_image + (in_stride * lh); +- +- // vp8_filter the row vertically into an temporary buffer. +- // If the phase offset == 0 then all the multiplication +- // is going to result in the output equalling the input. +- // So instead point the temporary buffer to the input. +- // Also handle the boundry condition of not being able to +- // filter that last lines. +- if (phase_offset_h && (lh < in_height - 2)) +- { +- hbuf = g_b_scaler.hbuf; +- +- for (w = 0; w < in_width; w++) +- { +- temp_sum = c_h[phase_offset_h*4+3] * ip[w - in_stride]; +- temp_sum += c_h[phase_offset_h*4+2] * ip[w]; +- temp_sum += c_h[phase_offset_h*4+1] * ip[w + in_stride]; +- temp_sum += c_h[phase_offset_h*4] * ip[w + 2*in_stride]; +- +- hbuf[w] = temp_sum >> 12; +- } +- } +- else +- hbuf = ip; +- +- // increase the phase offset for the next time around. +- if (++phase_offset_h >= g_b_scaler.nh) +- phase_offset_h = 0; +- +- // now filter and expand it horizontally into the final +- // output buffer ++ unsigned char *input_image, unsigned char *output_image) { ++ short *RESTRICT l_w, * RESTRICT l_h; ++ short *RESTRICT c_w, * RESTRICT c_h; ++ unsigned char *RESTRICT ip, * RESTRICT op; ++ unsigned char *RESTRICT hbuf; ++ int h, w, lw, lh; ++ int temp_sum; ++ int phase_offset_w, phase_offset_h; ++ ++ c_w = g_b_scaler.c_w; ++ c_h = g_b_scaler.c_h; ++ ++ op = output_image; ++ ++ l_w = g_b_scaler.l_w; ++ l_h = g_b_scaler.l_h; ++ ++ phase_offset_h = 0; ++ ++ for (h = 0; h < out_height; h++) { ++ // select the row to work on ++ lh = l_h[h]; ++ ip = input_image + (in_stride * lh); ++ ++ // vp8_filter the row vertically into an temporary buffer. ++ // If the phase offset == 0 then all the multiplication ++ // is going to result in the output equalling the input. ++ // So instead point the temporary buffer to the input. ++ // Also handle the boundry condition of not being able to ++ // filter that last lines. ++ if (phase_offset_h && (lh < in_height - 2)) { ++ hbuf = g_b_scaler.hbuf; ++ ++ for (w = 0; w < in_width; w++) { ++ temp_sum = c_h[phase_offset_h * 4 + 3] * ip[w - in_stride]; ++ temp_sum += c_h[phase_offset_h * 4 + 2] * ip[w]; ++ temp_sum += c_h[phase_offset_h * 4 + 1] * ip[w + in_stride]; ++ temp_sum += c_h[phase_offset_h * 4] * ip[w + 2 * in_stride]; ++ ++ hbuf[w] = temp_sum >> 12; ++ } ++ } else ++ hbuf = ip; ++ ++ // increase the phase offset for the next time around. ++ if (++phase_offset_h >= g_b_scaler.nh) ++ phase_offset_h = 0; ++ ++ // now filter and expand it horizontally into the final ++ // output buffer ++ phase_offset_w = 0; ++ ++ for (w = 0; w < out_width; w++) { ++ // get the index to use to expand the image ++ lw = l_w[w]; ++ ++ temp_sum = c_w[phase_offset_w * 4] * hbuf[lw - 1]; ++ temp_sum += c_w[phase_offset_w * 4 + 1] * hbuf[lw]; ++ temp_sum += c_w[phase_offset_w * 4 + 2] * hbuf[lw + 1]; ++ temp_sum += c_w[phase_offset_w * 4 + 3] * hbuf[lw + 2]; ++ temp_sum = temp_sum >> 12; ++ ++ if (++phase_offset_w >= g_b_scaler.nw) + phase_offset_w = 0; + +- for (w = 0; w < out_width; w++) +- { +- // get the index to use to expand the image +- lw = l_w[w]; +- +- temp_sum = c_w[phase_offset_w*4] * hbuf[lw - 1]; +- temp_sum += c_w[phase_offset_w*4+1] * hbuf[lw]; +- temp_sum += c_w[phase_offset_w*4+2] * hbuf[lw + 1]; +- temp_sum += c_w[phase_offset_w*4+3] * hbuf[lw + 2]; +- temp_sum = temp_sum >> 12; ++ // boundry conditions ++ if ((lw + 2) >= in_width) ++ temp_sum = hbuf[lw]; + +- if (++phase_offset_w >= g_b_scaler.nw) +- phase_offset_w = 0; ++ if (lw == 0) ++ temp_sum = hbuf[0]; + +- // boundry conditions +- if ((lw + 2) >= in_width) +- temp_sum = hbuf[lw]; +- +- if (lw == 0) +- temp_sum = hbuf[0]; +- +- op[w] = temp_sum; +- } +- +- op += out_stride; ++ op[w] = temp_sum; + } + +- return 0; ++ op += out_stride; ++ } ++ ++ return 0; + } + +-void bicubic_scale_frame_reset() +-{ +- g_b_scaler.out_width = 0; +- g_b_scaler.out_height = 0; ++void bicubic_scale_frame_reset() { ++ g_b_scaler.out_width = 0; ++ g_b_scaler.out_height = 0; + } + + void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, +- int new_width, int new_height) +-{ ++ int new_width, int new_height) { + +- dst->y_width = new_width; +- dst->y_height = new_height; +- dst->uv_width = new_width / 2; +- dst->uv_height = new_height / 2; ++ dst->y_width = new_width; ++ dst->y_height = new_height; ++ dst->uv_width = new_width / 2; ++ dst->uv_height = new_height / 2; + +- dst->y_stride = dst->y_width; +- dst->uv_stride = dst->uv_width; ++ dst->y_stride = dst->y_width; ++ dst->uv_stride = dst->uv_width; + +- bicubic_scale(src->y_width, src->y_height, src->y_stride, +- new_width, new_height, dst->y_stride, +- src->y_buffer, dst->y_buffer); ++ bicubic_scale(src->y_width, src->y_height, src->y_stride, ++ new_width, new_height, dst->y_stride, ++ src->y_buffer, dst->y_buffer); + +- bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, +- new_width / 2, new_height / 2, dst->uv_stride, +- src->u_buffer, dst->u_buffer); ++ bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, ++ new_width / 2, new_height / 2, dst->uv_stride, ++ src->u_buffer, dst->u_buffer); + +- bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, +- new_width / 2, new_height / 2, dst->uv_stride, +- src->v_buffer, dst->v_buffer); ++ bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, ++ new_width / 2, new_height / 2, dst->uv_stride, ++ src->v_buffer, dst->v_buffer); + } +diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c +index 9beb162..60c21fb 100644 +--- a/vpx_scale/generic/gen_scalers.c ++++ b/vpx_scale/generic/gen_scalers.c +@@ -34,47 +34,42 @@ + * SPECIAL NOTES : None. + * + ****************************************************************************/ +-void vp8_horizontal_line_4_5_scale_c +-( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- unsigned i; +- unsigned int a, b, c; +- unsigned char *des = dest; +- const unsigned char *src = source; +- +- (void) dest_width; +- +- for (i = 0; i < source_width - 4; i += 4) +- { +- a = src[0]; +- b = src[1]; +- des [0] = (unsigned char) a; +- des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); +- c = src[2] * 154; +- a = src[3]; +- des [2] = (unsigned char)((b * 102 + c + 128) >> 8); +- des [3] = (unsigned char)((c + 102 * a + 128) >> 8); +- b = src[4]; +- des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8); +- +- src += 4; +- des += 5; +- } +- ++void vp8_horizontal_line_4_5_scale_c(const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width) { ++ unsigned i; ++ unsigned int a, b, c; ++ unsigned char *des = dest; ++ const unsigned char *src = source; ++ ++ (void) dest_width; ++ ++ for (i = 0; i < source_width - 4; i += 4) { + a = src[0]; + b = src[1]; +- des [0] = (unsigned char)(a); ++ des [0] = (unsigned char) a; + des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); + c = src[2] * 154; + a = src[3]; + des [2] = (unsigned char)((b * 102 + c + 128) >> 8); + des [3] = (unsigned char)((c + 102 * a + 128) >> 8); +- des [4] = (unsigned char)(a); ++ b = src[4]; ++ des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8); ++ ++ src += 4; ++ des += 5; ++ } ++ ++ a = src[0]; ++ b = src[1]; ++ des [0] = (unsigned char)(a); ++ des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); ++ c = src[2] * 154; ++ a = src[3]; ++ des [2] = (unsigned char)((b * 102 + c + 128) >> 8); ++ des [3] = (unsigned char)((c + 102 * a + 128) >> 8); ++ des [4] = (unsigned char)(a); + + } + +@@ -97,31 +92,31 @@ void vp8_horizontal_line_4_5_scale_c + * the current band. + * + ****************************************************************************/ +-void vp8_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- unsigned int i; +- unsigned int a, b, c, d; +- unsigned char *des = dest; ++void vp8_vertical_band_4_5_scale_c(unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b, c, d; ++ unsigned char *des = dest; + +- for (i = 0; i < dest_width; i++) +- { +- a = des [0]; +- b = des [dest_pitch]; ++ for (i = 0; i < dest_width; i++) { ++ a = des [0]; ++ b = des [dest_pitch]; + +- des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); ++ des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); + +- c = des[dest_pitch*2] * 154; +- d = des[dest_pitch*3]; ++ c = des[dest_pitch * 2] * 154; ++ d = des[dest_pitch * 3]; + +- des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8); +- des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8); ++ des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8); ++ des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8); + +- /* First line in next band */ +- a = des [dest_pitch * 5]; +- des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8); ++ /* First line in next band */ ++ a = des [dest_pitch * 5]; ++ des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8); + +- des ++; +- } ++ des++; ++ } + } + + /**************************************************************************** +@@ -144,30 +139,30 @@ void vp8_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, + * last band. + * + ****************************************************************************/ +-void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- unsigned int i; +- unsigned int a, b, c, d; +- unsigned char *des = dest; ++void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b, c, d; ++ unsigned char *des = dest; + +- for (i = 0; i < dest_width; ++i) +- { +- a = des[0]; +- b = des[dest_pitch]; ++ for (i = 0; i < dest_width; ++i) { ++ a = des[0]; ++ b = des[dest_pitch]; + +- des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); ++ des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); + +- c = des[dest_pitch*2] * 154; +- d = des[dest_pitch*3]; ++ c = des[dest_pitch * 2] * 154; ++ d = des[dest_pitch * 3]; + +- des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8); +- des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8); ++ des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8); ++ des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8); + +- /* No other line for interplation of this line, so .. */ +- des[dest_pitch*4] = (unsigned char) d; ++ /* No other line for interplation of this line, so .. */ ++ des[dest_pitch * 4] = (unsigned char) d; + +- des++; +- } ++ des++; ++ } + } + + /**************************************************************************** +@@ -190,40 +185,35 @@ void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_p + * + * + ****************************************************************************/ +-void vp8_horizontal_line_2_3_scale_c +-( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- unsigned int i; +- unsigned int a, b, c; +- unsigned char *des = dest; +- const unsigned char *src = source; +- +- (void) dest_width; +- +- for (i = 0; i < source_width - 2; i += 2) +- { +- a = src[0]; +- b = src[1]; +- c = src[2]; +- +- des [0] = (unsigned char)(a); +- des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); +- des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); +- +- src += 2; +- des += 3; +- } +- ++void vp8_horizontal_line_2_3_scale_c(const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b, c; ++ unsigned char *des = dest; ++ const unsigned char *src = source; ++ ++ (void) dest_width; ++ ++ for (i = 0; i < source_width - 2; i += 2) { + a = src[0]; + b = src[1]; ++ c = src[2]; ++ + des [0] = (unsigned char)(a); + des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); +- des [2] = (unsigned char)(b); ++ des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); ++ ++ src += 2; ++ des += 3; ++ } ++ ++ a = src[0]; ++ b = src[1]; ++ des [0] = (unsigned char)(a); ++ des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); ++ des [2] = (unsigned char)(b); + } + + +@@ -246,22 +236,22 @@ void vp8_horizontal_line_2_3_scale_c + * the current band. + * + ****************************************************************************/ +-void vp8_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- unsigned int i; +- unsigned int a, b, c; +- unsigned char *des = dest; +- +- for (i = 0; i < dest_width; i++) +- { +- a = des [0]; +- b = des [dest_pitch]; +- c = des[dest_pitch*3]; +- des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); +- des [dest_pitch*2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); +- +- des++; +- } ++void vp8_vertical_band_2_3_scale_c(unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b, c; ++ unsigned char *des = dest; ++ ++ for (i = 0; i < dest_width; i++) { ++ a = des [0]; ++ b = des [dest_pitch]; ++ c = des[dest_pitch * 3]; ++ des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); ++ des [dest_pitch * 2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); ++ ++ des++; ++ } + } + + /**************************************************************************** +@@ -284,21 +274,21 @@ void vp8_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, + * last band. + * + ****************************************************************************/ +-void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- unsigned int i; +- unsigned int a, b; +- unsigned char *des = dest; +- +- for (i = 0; i < dest_width; ++i) +- { +- a = des [0]; +- b = des [dest_pitch]; +- +- des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); +- des [dest_pitch*2] = (unsigned char)(b); +- des++; +- } ++void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b; ++ unsigned char *des = dest; ++ ++ for (i = 0; i < dest_width; ++i) { ++ a = des [0]; ++ b = des [dest_pitch]; ++ ++ des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); ++ des [dest_pitch * 2] = (unsigned char)(b); ++ des++; ++ } + } + + /**************************************************************************** +@@ -321,49 +311,44 @@ void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_p + * + * + ****************************************************************************/ +-void vp8_horizontal_line_3_5_scale_c +-( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- unsigned int i; +- unsigned int a, b, c; +- unsigned char *des = dest; +- const unsigned char *src = source; +- +- (void) dest_width; +- +- for (i = 0; i < source_width - 3; i += 3) +- { +- a = src[0]; +- b = src[1]; +- des [0] = (unsigned char)(a); +- des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); +- +- c = src[2] ; +- des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); +- des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); +- +- a = src[3]; +- des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); +- +- src += 3; +- des += 5; +- } +- ++void vp8_horizontal_line_3_5_scale_c(const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b, c; ++ unsigned char *des = dest; ++ const unsigned char *src = source; ++ ++ (void) dest_width; ++ ++ for (i = 0; i < source_width - 3; i += 3) { + a = src[0]; + b = src[1]; + des [0] = (unsigned char)(a); +- + des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); +- c = src[2] ; ++ ++ c = src[2]; + des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); + des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); + +- des [4] = (unsigned char)(c); ++ a = src[3]; ++ des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); ++ ++ src += 3; ++ des += 5; ++ } ++ ++ a = src[0]; ++ b = src[1]; ++ des [0] = (unsigned char)(a); ++ ++ des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); ++ c = src[2]; ++ des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); ++ des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); ++ ++ des [4] = (unsigned char)(c); + } + + /**************************************************************************** +@@ -385,28 +370,28 @@ void vp8_horizontal_line_3_5_scale_c + * the current band. + * + ****************************************************************************/ +-void vp8_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- unsigned int i; +- unsigned int a, b, c; +- unsigned char *des = dest; +- +- for (i = 0; i < dest_width; i++) +- { +- a = des [0]; +- b = des [dest_pitch]; +- des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); +- +- c = des[dest_pitch*2]; +- des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); +- des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); +- +- /* First line in next band... */ +- a = des [dest_pitch * 5]; +- des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); +- +- des++; +- } ++void vp8_vertical_band_3_5_scale_c(unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b, c; ++ unsigned char *des = dest; ++ ++ for (i = 0; i < dest_width; i++) { ++ a = des [0]; ++ b = des [dest_pitch]; ++ des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); ++ ++ c = des[dest_pitch * 2]; ++ des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); ++ des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); ++ ++ /* First line in next band... */ ++ a = des [dest_pitch * 5]; ++ des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); ++ ++ des++; ++ } + } + + /**************************************************************************** +@@ -429,28 +414,28 @@ void vp8_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, + * last band. + * + ****************************************************************************/ +-void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- unsigned int i; +- unsigned int a, b, c; +- unsigned char *des = dest; ++void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b, c; ++ unsigned char *des = dest; + +- for (i = 0; i < dest_width; ++i) +- { +- a = des [0]; +- b = des [dest_pitch]; ++ for (i = 0; i < dest_width; ++i) { ++ a = des [0]; ++ b = des [dest_pitch]; + +- des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); ++ des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); + +- c = des[dest_pitch*2]; +- des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); +- des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); ++ c = des[dest_pitch * 2]; ++ des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); ++ des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); + +- /* No other line for interplation of this line, so .. */ +- des [ dest_pitch * 4 ] = (unsigned char)(c) ; ++ /* No other line for interplation of this line, so .. */ ++ des [ dest_pitch * 4 ] = (unsigned char)(c); + +- des++; +- } ++ des++; ++ } + } + + /**************************************************************************** +@@ -473,46 +458,41 @@ void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_p + * + * + ****************************************************************************/ +-void vp8_horizontal_line_3_4_scale_c +-( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- unsigned int i; +- unsigned int a, b, c; +- unsigned char *des = dest; +- const unsigned char *src = source; +- +- (void) dest_width; +- +- for (i = 0; i < source_width - 3; i += 3) +- { +- a = src[0]; +- b = src[1]; +- des [0] = (unsigned char)(a); +- des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); +- +- c = src[2]; +- des [2] = (unsigned char)((b + c + 1) >> 1); +- +- a = src[3]; +- des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); +- +- src += 3; +- des += 4; +- } +- ++void vp8_horizontal_line_3_4_scale_c(const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b, c; ++ unsigned char *des = dest; ++ const unsigned char *src = source; ++ ++ (void) dest_width; ++ ++ for (i = 0; i < source_width - 3; i += 3) { + a = src[0]; + b = src[1]; + des [0] = (unsigned char)(a); + des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); + +- c = src[2] ; ++ c = src[2]; + des [2] = (unsigned char)((b + c + 1) >> 1); +- des [3] = (unsigned char)(c); ++ ++ a = src[3]; ++ des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); ++ ++ src += 3; ++ des += 4; ++ } ++ ++ a = src[0]; ++ b = src[1]; ++ des [0] = (unsigned char)(a); ++ des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); ++ ++ c = src[2]; ++ des [2] = (unsigned char)((b + c + 1) >> 1); ++ des [3] = (unsigned char)(c); + } + + /**************************************************************************** +@@ -534,27 +514,27 @@ void vp8_horizontal_line_3_4_scale_c + * the current band. + * + ****************************************************************************/ +-void vp8_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- unsigned int i; +- unsigned int a, b, c; +- unsigned char *des = dest; +- +- for (i = 0; i < dest_width; i++) +- { +- a = des [0]; +- b = des [dest_pitch]; +- des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); +- +- c = des[dest_pitch*2]; +- des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1); +- +- /* First line in next band... */ +- a = des [dest_pitch*4]; +- des [dest_pitch*3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); +- +- des++; +- } ++void vp8_vertical_band_3_4_scale_c(unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b, c; ++ unsigned char *des = dest; ++ ++ for (i = 0; i < dest_width; i++) { ++ a = des [0]; ++ b = des [dest_pitch]; ++ des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); ++ ++ c = des[dest_pitch * 2]; ++ des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1); ++ ++ /* First line in next band... */ ++ a = des [dest_pitch * 4]; ++ des [dest_pitch * 3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); ++ ++ des++; ++ } + } + + /**************************************************************************** +@@ -577,27 +557,27 @@ void vp8_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, + * last band. + * + ****************************************************************************/ +-void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- unsigned int i; +- unsigned int a, b, c; +- unsigned char *des = dest; ++void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b, c; ++ unsigned char *des = dest; + +- for (i = 0; i < dest_width; ++i) +- { +- a = des [0]; +- b = des [dest_pitch]; ++ for (i = 0; i < dest_width; ++i) { ++ a = des [0]; ++ b = des [dest_pitch]; + +- des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); ++ des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); + +- c = des[dest_pitch*2]; +- des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1); ++ c = des[dest_pitch * 2]; ++ des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1); + +- /* No other line for interplation of this line, so .. */ +- des [dest_pitch*3] = (unsigned char)(c); ++ /* No other line for interplation of this line, so .. */ ++ des [dest_pitch * 3] = (unsigned char)(c); + +- des++; +- } ++ des++; ++ } + } + + /**************************************************************************** +@@ -619,34 +599,29 @@ void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_p + * SPECIAL NOTES : None. + * + ****************************************************************************/ +-void vp8_horizontal_line_1_2_scale_c +-( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- unsigned int i; +- unsigned int a, b; +- unsigned char *des = dest; +- const unsigned char *src = source; +- +- (void) dest_width; +- +- for (i = 0; i < source_width - 1; i += 1) +- { +- a = src[0]; +- b = src[1]; +- des [0] = (unsigned char)(a); +- des [1] = (unsigned char)((a + b + 1) >> 1); +- src += 1; +- des += 2; +- } +- ++void vp8_horizontal_line_1_2_scale_c(const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b; ++ unsigned char *des = dest; ++ const unsigned char *src = source; ++ ++ (void) dest_width; ++ ++ for (i = 0; i < source_width - 1; i += 1) { + a = src[0]; ++ b = src[1]; + des [0] = (unsigned char)(a); +- des [1] = (unsigned char)(a); ++ des [1] = (unsigned char)((a + b + 1) >> 1); ++ src += 1; ++ des += 2; ++ } ++ ++ a = src[0]; ++ des [0] = (unsigned char)(a); ++ des [1] = (unsigned char)(a); + } + + /**************************************************************************** +@@ -668,21 +643,21 @@ void vp8_horizontal_line_1_2_scale_c + * the current band. + * + ****************************************************************************/ +-void vp8_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- unsigned int i; +- unsigned int a, b; +- unsigned char *des = dest; ++void vp8_vertical_band_1_2_scale_c(unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b; ++ unsigned char *des = dest; + +- for (i = 0; i < dest_width; i++) +- { +- a = des [0]; +- b = des [dest_pitch * 2]; ++ for (i = 0; i < dest_width; i++) { ++ a = des [0]; ++ b = des [dest_pitch * 2]; + +- des[dest_pitch] = (unsigned char)((a + b + 1) >> 1); ++ des[dest_pitch] = (unsigned char)((a + b + 1) >> 1); + +- des++; +- } ++ des++; ++ } + } + + /**************************************************************************** +@@ -705,16 +680,16 @@ void vp8_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, + * last band. + * + ****************************************************************************/ +-void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- unsigned int i; +- unsigned char *des = dest; +- +- for (i = 0; i < dest_width; ++i) +- { +- des[dest_pitch] = des[0]; +- des++; +- } ++void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned char *des = dest; ++ ++ for (i = 0; i < dest_width; ++i) { ++ des[dest_pitch] = des[0]; ++ des++; ++ } + } + + +@@ -740,67 +715,64 @@ void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_p + * SPECIAL NOTES : None. + * + ****************************************************************************/ +-void vp8_horizontal_line_5_4_scale_c +-( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- unsigned i; +- unsigned int a, b, c, d, e; +- unsigned char *des = dest; +- const unsigned char *src = source; +- +- (void) dest_width; +- +- for (i = 0; i < source_width; i += 5) +- { +- a = src[0]; +- b = src[1]; +- c = src[2]; +- d = src[3]; +- e = src[4]; +- +- des[0] = (unsigned char) a; +- des[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); +- des[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); +- des[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); +- +- src += 5; +- des += 4; +- } ++void vp8_horizontal_line_5_4_scale_c(const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width) { ++ unsigned i; ++ unsigned int a, b, c, d, e; ++ unsigned char *des = dest; ++ const unsigned char *src = source; ++ ++ (void) dest_width; ++ ++ for (i = 0; i < source_width; i += 5) { ++ a = src[0]; ++ b = src[1]; ++ c = src[2]; ++ d = src[3]; ++ e = src[4]; ++ ++ des[0] = (unsigned char) a; ++ des[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); ++ des[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); ++ des[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); ++ ++ src += 5; ++ des += 4; ++ } + } + + + + +-void vp8_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- unsigned int i; +- unsigned int a, b, c, d, e; +- unsigned char *des = dest; +- unsigned char *src = source; ++void vp8_vertical_band_5_4_scale_c(unsigned char *source, ++ unsigned int src_pitch, ++ unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b, c, d, e; ++ unsigned char *des = dest; ++ unsigned char *src = source; + +- for (i = 0; i < dest_width; i++) +- { ++ for (i = 0; i < dest_width; i++) { + +- a = src[0 * src_pitch]; +- b = src[1 * src_pitch]; +- c = src[2 * src_pitch]; +- d = src[3 * src_pitch]; +- e = src[4 * src_pitch]; ++ a = src[0 * src_pitch]; ++ b = src[1 * src_pitch]; ++ c = src[2 * src_pitch]; ++ d = src[3 * src_pitch]; ++ e = src[4 * src_pitch]; + +- des[0 * dest_pitch] = (unsigned char) a; +- des[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); +- des[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); +- des[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); ++ des[0 * dest_pitch] = (unsigned char) a; ++ des[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); ++ des[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); ++ des[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); + +- src ++; +- des ++; ++ src++; ++ des++; + +- } ++ } + } + + +@@ -824,63 +796,60 @@ void vp8_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch + * + * + ****************************************************************************/ +-void vp8_horizontal_line_5_3_scale_c +-( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- unsigned int i; +- unsigned int a, b, c, d , e; +- unsigned char *des = dest; +- const unsigned char *src = source; +- +- (void) dest_width; +- +- for (i = 0; i < source_width; i += 5) +- { +- a = src[0]; +- b = src[1]; +- c = src[2]; +- d = src[3]; +- e = src[4]; +- +- des[0] = (unsigned char) a; +- des[1] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); +- des[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); +- +- src += 5; +- des += 3; +- } ++void vp8_horizontal_line_5_3_scale_c(const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b, c, d, e; ++ unsigned char *des = dest; ++ const unsigned char *src = source; ++ ++ (void) dest_width; ++ ++ for (i = 0; i < source_width; i += 5) { ++ a = src[0]; ++ b = src[1]; ++ c = src[2]; ++ d = src[3]; ++ e = src[4]; ++ ++ des[0] = (unsigned char) a; ++ des[1] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); ++ des[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); ++ ++ src += 5; ++ des += 3; ++ } + + } + +-void vp8_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- unsigned int i; +- unsigned int a, b, c, d, e; +- unsigned char *des = dest; +- unsigned char *src = source; ++void vp8_vertical_band_5_3_scale_c(unsigned char *source, ++ unsigned int src_pitch, ++ unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a, b, c, d, e; ++ unsigned char *des = dest; ++ unsigned char *src = source; + +- for (i = 0; i < dest_width; i++) +- { ++ for (i = 0; i < dest_width; i++) { + +- a = src[0 * src_pitch]; +- b = src[1 * src_pitch]; +- c = src[2 * src_pitch]; +- d = src[3 * src_pitch]; +- e = src[4 * src_pitch]; ++ a = src[0 * src_pitch]; ++ b = src[1 * src_pitch]; ++ c = src[2 * src_pitch]; ++ d = src[3 * src_pitch]; ++ e = src[4 * src_pitch]; + +- des[0 * dest_pitch] = (unsigned char) a; +- des[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); +- des[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); ++ des[0 * dest_pitch] = (unsigned char) a; ++ des[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); ++ des[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); + +- src ++; +- des ++; ++ src++; ++ des++; + +- } ++ } + } + + /**************************************************************************** +@@ -902,55 +871,52 @@ void vp8_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch + * SPECIAL NOTES : None. + * + ****************************************************************************/ +-void vp8_horizontal_line_2_1_scale_c +-( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- unsigned int i; +- unsigned int a; +- unsigned char *des = dest; +- const unsigned char *src = source; +- +- (void) dest_width; +- +- for (i = 0; i < source_width; i += 2) +- { +- a = src[0]; +- des [0] = (unsigned char)(a); +- src += 2; +- des += 1; +- } +- +- +- +-} +-void vp8_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- (void) dest_pitch; +- (void) src_pitch; +- vpx_memcpy(dest, source, dest_width); ++void vp8_horizontal_line_2_1_scale_c(const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width) { ++ unsigned int i; ++ unsigned int a; ++ unsigned char *des = dest; ++ const unsigned char *src = source; ++ ++ (void) dest_width; ++ ++ for (i = 0; i < source_width; i += 2) { ++ a = src[0]; ++ des [0] = (unsigned char)(a); ++ src += 2; ++ des += 1; ++ } + } + +-void vp8_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- int i; +- int temp; +- int width = dest_width; +- +- (void) dest_pitch; +- +- for (i = 0; i < width; i++) +- { +- temp = 8; +- temp += source[i-(int)src_pitch] * 3; +- temp += source[i] * 10; +- temp += source[i+src_pitch] * 3; +- temp >>= 4 ; +- dest[i] = (unsigned char)(temp); +- } ++void vp8_vertical_band_2_1_scale_c(unsigned char *source, ++ unsigned int src_pitch, ++ unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ (void) dest_pitch; ++ (void) src_pitch; ++ vpx_memcpy(dest, source, dest_width); ++} + ++void vp8_vertical_band_2_1_scale_i_c(unsigned char *source, ++ unsigned int src_pitch, ++ unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width) { ++ int i; ++ int temp; ++ int width = dest_width; ++ ++ (void) dest_pitch; ++ ++ for (i = 0; i < width; i++) { ++ temp = 8; ++ temp += source[i - (int)src_pitch] * 3; ++ temp += source[i] * 10; ++ temp += source[i + src_pitch] * 3; ++ temp >>= 4; ++ dest[i] = (unsigned char)(temp); ++ } + } +diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c +index c02e4ff..7de85ca 100644 +--- a/vpx_scale/generic/vpxscale.c ++++ b/vpx_scale/generic/vpxscale.c +@@ -20,23 +20,22 @@ + /**************************************************************************** + * Header Files + ****************************************************************************/ +-#include "vpx_rtcd.h" ++#include "./vpx_rtcd.h" + #include "vpx_mem/vpx_mem.h" + #include "vpx_scale/yv12config.h" + #include "vpx_scale/scale_mode.h" + +-typedef struct +-{ +- int expanded_frame_width; +- int expanded_frame_height; ++typedef struct { ++ int expanded_frame_width; ++ int expanded_frame_height; + +- int HScale; +- int HRatio; +- int VScale; +- int VRatio; ++ int HScale; ++ int HRatio; ++ int VScale; ++ int VRatio; + +- YV12_BUFFER_CONFIG *src_yuv_config; +- YV12_BUFFER_CONFIG *dst_yuv_config; ++ YV12_BUFFER_CONFIG *src_yuv_config; ++ YV12_BUFFER_CONFIG *dst_yuv_config; + + } SCALE_VARS; + +@@ -60,15 +59,14 @@ typedef struct + ****************************************************************************/ + static + void horizontal_line_copy( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- (void) dest_width; +- +- duck_memcpy(dest, source, source_width); ++ const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width ++) { ++ (void) dest_width; ++ ++ duck_memcpy(dest, source, source_width); + } + /**************************************************************************** + * +@@ -90,16 +88,15 @@ void horizontal_line_copy( + ****************************************************************************/ + static + void null_scale( +- unsigned char *dest, +- unsigned int dest_pitch, +- unsigned int dest_width +-) +-{ +- (void) dest; +- (void) dest_pitch; +- (void) dest_width; +- +- return; ++ unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width ++) { ++ (void) dest; ++ (void) dest_pitch; ++ (void) dest_width; ++ ++ return; + } + + /**************************************************************************** +@@ -127,35 +124,33 @@ void null_scale( + static + void scale1d_2t1_i + ( +- const unsigned char *source, +- int source_step, +- unsigned int source_scale, +- unsigned int source_length, +- unsigned char *dest, +- int dest_step, +- unsigned int dest_scale, +- unsigned int dest_length +-) +-{ +- unsigned int i, j; +- unsigned int temp; +- int source_pitch = source_step; +- (void) source_length; +- (void) source_scale; +- (void) dest_scale; +- +- source_step *= 2; +- dest[0] = source[0]; +- +- for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step) +- { +- temp = 8; +- temp += 3 * source[j-source_pitch]; +- temp += 10 * source[j]; +- temp += 3 * source[j+source_pitch]; +- temp >>= 4; +- dest[i] = (char)(temp); +- } ++ const unsigned char *source, ++ int source_step, ++ unsigned int source_scale, ++ unsigned int source_length, ++ unsigned char *dest, ++ int dest_step, ++ unsigned int dest_scale, ++ unsigned int dest_length ++) { ++ unsigned int i, j; ++ unsigned int temp; ++ int source_pitch = source_step; ++ (void) source_length; ++ (void) source_scale; ++ (void) dest_scale; ++ ++ source_step *= 2; ++ dest[0] = source[0]; ++ ++ for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step) { ++ temp = 8; ++ temp += 3 * source[j - source_pitch]; ++ temp += 10 * source[j]; ++ temp += 3 * source[j + source_pitch]; ++ temp >>= 4; ++ dest[i] = (char)(temp); ++ } + } + + /**************************************************************************** +@@ -183,27 +178,26 @@ void scale1d_2t1_i + static + void scale1d_2t1_ps + ( +- const unsigned char *source, +- int source_step, +- unsigned int source_scale, +- unsigned int source_length, +- unsigned char *dest, +- int dest_step, +- unsigned int dest_scale, +- unsigned int dest_length +-) +-{ +- unsigned int i, j; +- +- (void) source_length; +- (void) source_scale; +- (void) dest_scale; +- +- source_step *= 2; +- j = 0; +- +- for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step) +- dest[i] = source[j]; ++ const unsigned char *source, ++ int source_step, ++ unsigned int source_scale, ++ unsigned int source_length, ++ unsigned char *dest, ++ int dest_step, ++ unsigned int dest_scale, ++ unsigned int dest_length ++) { ++ unsigned int i, j; ++ ++ (void) source_length; ++ (void) source_scale; ++ (void) dest_scale; ++ ++ source_step *= 2; ++ j = 0; ++ ++ for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step) ++ dest[i] = source[j]; + } + /**************************************************************************** + * +@@ -230,45 +224,42 @@ void scale1d_2t1_ps + static + void scale1d_c + ( +- const unsigned char *source, +- int source_step, +- unsigned int source_scale, +- unsigned int source_length, +- unsigned char *dest, +- int dest_step, +- unsigned int dest_scale, +- unsigned int dest_length +-) +-{ +- unsigned int i; +- unsigned int round_value = dest_scale / 2; +- unsigned int left_modifier = dest_scale; +- unsigned int right_modifier = 0; +- unsigned char left_pixel = *source; +- unsigned char right_pixel = *(source + source_step); +- +- (void) source_length; +- +- /* These asserts are needed if there are boundary issues... */ +- /*assert ( dest_scale > source_scale );*/ +- /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/ +- +- for (i = 0; i < dest_length * dest_step; i += dest_step) +- { +- dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale); +- +- right_modifier += source_scale; +- +- while (right_modifier > dest_scale) +- { +- right_modifier -= dest_scale; +- source += source_step; +- left_pixel = *source; +- right_pixel = *(source + source_step); +- } +- +- left_modifier = dest_scale - right_modifier; ++ const unsigned char *source, ++ int source_step, ++ unsigned int source_scale, ++ unsigned int source_length, ++ unsigned char *dest, ++ int dest_step, ++ unsigned int dest_scale, ++ unsigned int dest_length ++) { ++ unsigned int i; ++ unsigned int round_value = dest_scale / 2; ++ unsigned int left_modifier = dest_scale; ++ unsigned int right_modifier = 0; ++ unsigned char left_pixel = *source; ++ unsigned char right_pixel = *(source + source_step); ++ ++ (void) source_length; ++ ++ /* These asserts are needed if there are boundary issues... */ ++ /*assert ( dest_scale > source_scale );*/ ++ /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/ ++ ++ for (i = 0; i < dest_length * dest_step; i += dest_step) { ++ dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale); ++ ++ right_modifier += source_scale; ++ ++ while (right_modifier > dest_scale) { ++ right_modifier -= dest_scale; ++ source += source_step; ++ left_pixel = *source; ++ right_pixel = *(source + source_step); + } ++ ++ left_modifier = dest_scale - right_modifier; ++ } + } + + /**************************************************************************** +@@ -304,246 +295,221 @@ void scale1d_c + static + void Scale2D + ( +- /*const*/ +- unsigned char *source, +- int source_pitch, +- unsigned int source_width, +- unsigned int source_height, +- unsigned char *dest, +- int dest_pitch, +- unsigned int dest_width, +- unsigned int dest_height, +- unsigned char *temp_area, +- unsigned char temp_area_height, +- unsigned int hscale, +- unsigned int hratio, +- unsigned int vscale, +- unsigned int vratio, +- unsigned int interlaced +-) +-{ +- /*unsigned*/ +- int i, j, k; +- int bands; +- int dest_band_height; +- int source_band_height; +- +- typedef void (*Scale1D)(const unsigned char * source, int source_step, unsigned int source_scale, unsigned int source_length, +- unsigned char * dest, int dest_step, unsigned int dest_scale, unsigned int dest_length); +- +- Scale1D Scale1Dv = scale1d_c; +- Scale1D Scale1Dh = scale1d_c; +- +- void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; +- void (*vert_band_scale)(unsigned char *, unsigned int, unsigned char *, unsigned int, unsigned int) = NULL; +- +- int ratio_scalable = 1; +- int interpolation = 0; +- +- unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */ +- unsigned char *line_src; +- +- +- source_base = (unsigned char *)source; +- +- if (source_pitch < 0) +- { +- int offset; +- +- offset = (source_height - 1); +- offset *= source_pitch; +- +- source_base += offset; +- } +- +- /* find out the ratio for each direction */ +- switch (hratio * 10 / hscale) +- { ++ /*const*/ ++ unsigned char *source, ++ int source_pitch, ++ unsigned int source_width, ++ unsigned int source_height, ++ unsigned char *dest, ++ int dest_pitch, ++ unsigned int dest_width, ++ unsigned int dest_height, ++ unsigned char *temp_area, ++ unsigned char temp_area_height, ++ unsigned int hscale, ++ unsigned int hratio, ++ unsigned int vscale, ++ unsigned int vratio, ++ unsigned int interlaced ++) { ++ /*unsigned*/ ++ int i, j, k; ++ int bands; ++ int dest_band_height; ++ int source_band_height; ++ ++ typedef void (*Scale1D)(const unsigned char * source, int source_step, unsigned int source_scale, unsigned int source_length, ++ unsigned char * dest, int dest_step, unsigned int dest_scale, unsigned int dest_length); ++ ++ Scale1D Scale1Dv = scale1d_c; ++ Scale1D Scale1Dh = scale1d_c; ++ ++ void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; ++ void (*vert_band_scale)(unsigned char *, unsigned int, unsigned char *, unsigned int, unsigned int) = NULL; ++ ++ int ratio_scalable = 1; ++ int interpolation = 0; ++ ++ unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */ ++ unsigned char *line_src; ++ ++ ++ source_base = (unsigned char *)source; ++ ++ if (source_pitch < 0) { ++ int offset; ++ ++ offset = (source_height - 1); ++ offset *= source_pitch; ++ ++ source_base += offset; ++ } ++ ++ /* find out the ratio for each direction */ ++ switch (hratio * 10 / hscale) { + case 8: +- /* 4-5 Scale in Width direction */ +- horiz_line_scale = vp8_horizontal_line_5_4_scale; +- break; ++ /* 4-5 Scale in Width direction */ ++ horiz_line_scale = vp8_horizontal_line_5_4_scale; ++ break; + case 6: +- /* 3-5 Scale in Width direction */ +- horiz_line_scale = vp8_horizontal_line_5_3_scale; +- break; ++ /* 3-5 Scale in Width direction */ ++ horiz_line_scale = vp8_horizontal_line_5_3_scale; ++ break; + case 5: +- /* 1-2 Scale in Width direction */ +- horiz_line_scale = vp8_horizontal_line_2_1_scale; +- break; ++ /* 1-2 Scale in Width direction */ ++ horiz_line_scale = vp8_horizontal_line_2_1_scale; ++ break; + default: +- /* The ratio is not acceptable now */ +- /* throw("The ratio is not acceptable for now!"); */ +- ratio_scalable = 0; +- break; +- } ++ /* The ratio is not acceptable now */ ++ /* throw("The ratio is not acceptable for now!"); */ ++ ratio_scalable = 0; ++ break; ++ } + +- switch (vratio * 10 / vscale) +- { ++ switch (vratio * 10 / vscale) { + case 8: +- /* 4-5 Scale in vertical direction */ +- vert_band_scale = vp8_vertical_band_5_4_scale; +- source_band_height = 5; +- dest_band_height = 4; +- break; ++ /* 4-5 Scale in vertical direction */ ++ vert_band_scale = vp8_vertical_band_5_4_scale; ++ source_band_height = 5; ++ dest_band_height = 4; ++ break; + case 6: +- /* 3-5 Scale in vertical direction */ +- vert_band_scale = vp8_vertical_band_5_3_scale; +- source_band_height = 5; +- dest_band_height = 3; +- break; ++ /* 3-5 Scale in vertical direction */ ++ vert_band_scale = vp8_vertical_band_5_3_scale; ++ source_band_height = 5; ++ dest_band_height = 3; ++ break; + case 5: +- /* 1-2 Scale in vertical direction */ ++ /* 1-2 Scale in vertical direction */ + +- if (interlaced) +- { +- /* if the content is interlaced, point sampling is used */ +- vert_band_scale = vp8_vertical_band_2_1_scale; +- } +- else +- { ++ if (interlaced) { ++ /* if the content is interlaced, point sampling is used */ ++ vert_band_scale = vp8_vertical_band_2_1_scale; ++ } else { + +- interpolation = 1; +- /* if the content is progressive, interplo */ +- vert_band_scale = vp8_vertical_band_2_1_scale_i; ++ interpolation = 1; ++ /* if the content is progressive, interplo */ ++ vert_band_scale = vp8_vertical_band_2_1_scale_i; + +- } ++ } + +- source_band_height = 2; +- dest_band_height = 1; +- break; ++ source_band_height = 2; ++ dest_band_height = 1; ++ break; + default: +- /* The ratio is not acceptable now */ +- /* throw("The ratio is not acceptable for now!"); */ +- ratio_scalable = 0; +- break; ++ /* The ratio is not acceptable now */ ++ /* throw("The ratio is not acceptable for now!"); */ ++ ratio_scalable = 0; ++ break; ++ } ++ ++ if (ratio_scalable) { ++ if (source_height == dest_height) { ++ /* for each band of the image */ ++ for (k = 0; k < (int)dest_height; k++) { ++ horiz_line_scale(source, source_width, dest, dest_width); ++ source += source_pitch; ++ dest += dest_pitch; ++ } ++ ++ return; + } + +- if (ratio_scalable) +- { +- if (source_height == dest_height) +- { +- /* for each band of the image */ +- for (k = 0; k < (int)dest_height; k++) +- { +- horiz_line_scale(source, source_width, dest, dest_width); +- source += source_pitch; +- dest += dest_pitch; +- } +- +- return; +- } +- +- if (interpolation) +- { +- if (source < source_base) +- source = source_base; +- +- horiz_line_scale(source, source_width, temp_area, dest_width); +- } +- +- for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++) +- { +- /* scale one band horizontally */ +- for (i = 0; i < source_band_height; i++) +- { +- /* Trap case where we could read off the base of the source buffer */ +- +- line_src = (unsigned char *)source + i * source_pitch; +- +- if (line_src < source_base) +- line_src = source_base; +- +- horiz_line_scale(line_src, source_width, +- temp_area + (i + 1)*dest_pitch, dest_width); +- } +- +- /* Vertical scaling is in place */ +- vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width); +- +- if (interpolation) +- vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width); +- +- /* Next band... */ +- source += (unsigned long) source_band_height * source_pitch; +- dest += (unsigned long) dest_band_height * dest_pitch; +- } +- +- return; ++ if (interpolation) { ++ if (source < source_base) ++ source = source_base; ++ ++ horiz_line_scale(source, source_width, temp_area, dest_width); + } + +- if (hscale == 2 && hratio == 1) +- Scale1Dh = scale1d_2t1_ps; ++ for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++) { ++ /* scale one band horizontally */ ++ for (i = 0; i < source_band_height; i++) { ++ /* Trap case where we could read off the base of the source buffer */ + +- if (vscale == 2 && vratio == 1) +- { +- if (interlaced) +- Scale1Dv = scale1d_2t1_ps; +- else +- Scale1Dv = scale1d_2t1_i; +- } ++ line_src = (unsigned char *)source + i * source_pitch; + +- if (source_height == dest_height) +- { +- /* for each band of the image */ +- for (k = 0; k < (int)dest_height; k++) +- { +- Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width); +- source += source_pitch; +- dest += dest_pitch; +- } +- +- return; +- } ++ if (line_src < source_base) ++ line_src = source_base; ++ ++ horiz_line_scale(line_src, source_width, ++ temp_area + (i + 1)*dest_pitch, dest_width); ++ } ++ ++ /* Vertical scaling is in place */ ++ vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width); + +- if (dest_height > source_height) +- { +- dest_band_height = temp_area_height - 1; +- source_band_height = dest_band_height * source_height / dest_height; ++ if (interpolation) ++ vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width); ++ ++ /* Next band... */ ++ source += (unsigned long) source_band_height * source_pitch; ++ dest += (unsigned long) dest_band_height * dest_pitch; + } ++ ++ return; ++ } ++ ++ if (hscale == 2 && hratio == 1) ++ Scale1Dh = scale1d_2t1_ps; ++ ++ if (vscale == 2 && vratio == 1) { ++ if (interlaced) ++ Scale1Dv = scale1d_2t1_ps; + else +- { +- source_band_height = temp_area_height - 1; +- dest_band_height = source_band_height * vratio / vscale; ++ Scale1Dv = scale1d_2t1_i; ++ } ++ ++ if (source_height == dest_height) { ++ /* for each band of the image */ ++ for (k = 0; k < (int)dest_height; k++) { ++ Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width); ++ source += source_pitch; ++ dest += dest_pitch; + } + +- /* first row needs to be done so that we can stay one row ahead for vertical zoom */ +- Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width); ++ return; ++ } ++ ++ if (dest_height > source_height) { ++ dest_band_height = temp_area_height - 1; ++ source_band_height = dest_band_height * source_height / dest_height; ++ } else { ++ source_band_height = temp_area_height - 1; ++ dest_band_height = source_band_height * vratio / vscale; ++ } + +- /* for each band of the image */ +- bands = (dest_height + dest_band_height - 1) / dest_band_height; +- +- for (k = 0; k < bands; k++) +- { +- /* scale one band horizontally */ +- for (i = 1; i < source_band_height + 1; i++) +- { +- if (k * source_band_height + i < (int) source_height) +- { +- Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1, +- temp_area + i * dest_pitch, 1, hratio, dest_width); +- } +- else /* Duplicate the last row */ +- { +- /* copy temp_area row 0 over from last row in the past */ +- duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch); +- } +- } +- +- /* scale one band vertically */ +- for (j = 0; j < (int)dest_width; j++) +- { +- Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1, +- &dest[j], dest_pitch, vratio, dest_band_height); +- } ++ /* first row needs to be done so that we can stay one row ahead for vertical zoom */ ++ Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width); + ++ /* for each band of the image */ ++ bands = (dest_height + dest_band_height - 1) / dest_band_height; ++ ++ for (k = 0; k < bands; k++) { ++ /* scale one band horizontally */ ++ for (i = 1; i < source_band_height + 1; i++) { ++ if (k * source_band_height + i < (int) source_height) { ++ Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1, ++ temp_area + i * dest_pitch, 1, hratio, dest_width); ++ } else { /* Duplicate the last row */ + /* copy temp_area row 0 over from last row in the past */ +- duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); ++ duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch); ++ } ++ } + +- /* move to the next band */ +- source += source_band_height * source_pitch; +- dest += dest_band_height * dest_pitch; ++ /* scale one band vertically */ ++ for (j = 0; j < (int)dest_width; j++) { ++ Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1, ++ &dest[j], dest_pitch, vratio, dest_band_height); + } ++ ++ /* copy temp_area row 0 over from last row in the past */ ++ duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); ++ ++ /* move to the next band */ ++ source += source_band_height * source_pitch; ++ dest += dest_band_height * dest_pitch; ++ } + } + + /**************************************************************************** +@@ -572,57 +538,56 @@ void Scale2D + ****************************************************************************/ + void vp8_scale_frame + ( +- YV12_BUFFER_CONFIG *src, +- YV12_BUFFER_CONFIG *dst, +- unsigned char *temp_area, +- unsigned char temp_height, +- unsigned int hscale, +- unsigned int hratio, +- unsigned int vscale, +- unsigned int vratio, +- unsigned int interlaced +-) +-{ +- int i; +- int dw = (hscale - 1 + src->y_width * hratio) / hscale; +- int dh = (vscale - 1 + src->y_height * vratio) / vscale; +- +- /* call our internal scaling routines!! */ +- Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height, +- (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh, +- temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); +- +- if (dw < (int)dst->y_width) +- for (i = 0; i < dh; i++) +- duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i*dst->y_stride+dw-2], dst->y_width - dw + 1); +- +- if (dh < (int)dst->y_height) +- for (i = dh - 1; i < (int)dst->y_height; i++) +- duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); +- +- Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height, +- (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2, +- temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); +- +- if (dw / 2 < (int)dst->uv_width) +- for (i = 0; i < dst->uv_height; i++) +- duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1); +- +- if (dh / 2 < (int)dst->uv_height) +- for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) +- duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); +- +- Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height, +- (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2, +- temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); +- +- if (dw / 2 < (int)dst->uv_width) +- for (i = 0; i < dst->uv_height; i++) +- duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1); +- +- if (dh / 2 < (int) dst->uv_height) +- for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) +- duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); ++ YV12_BUFFER_CONFIG *src, ++ YV12_BUFFER_CONFIG *dst, ++ unsigned char *temp_area, ++ unsigned char temp_height, ++ unsigned int hscale, ++ unsigned int hratio, ++ unsigned int vscale, ++ unsigned int vratio, ++ unsigned int interlaced ++) { ++ int i; ++ int dw = (hscale - 1 + src->y_width * hratio) / hscale; ++ int dh = (vscale - 1 + src->y_height * vratio) / vscale; ++ ++ /* call our internal scaling routines!! */ ++ Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height, ++ (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh, ++ temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); ++ ++ if (dw < (int)dst->y_width) ++ for (i = 0; i < dh; i++) ++ duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i * dst->y_stride + dw - 2], dst->y_width - dw + 1); ++ ++ if (dh < (int)dst->y_height) ++ for (i = dh - 1; i < (int)dst->y_height; i++) ++ duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); ++ ++ Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height, ++ (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2, ++ temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); ++ ++ if (dw / 2 < (int)dst->uv_width) ++ for (i = 0; i < dst->uv_height; i++) ++ duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); ++ ++ if (dh / 2 < (int)dst->uv_height) ++ for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) ++ duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); ++ ++ Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height, ++ (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2, ++ temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); ++ ++ if (dw / 2 < (int)dst->uv_width) ++ for (i = 0; i < dst->uv_height; i++) ++ duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); ++ ++ if (dh / 2 < (int) dst->uv_height) ++ for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) ++ duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); + } + /**************************************************************************** + * +@@ -651,183 +616,177 @@ void vp8_scale_frame + static + int any_ratio_2d_scale + ( +- SCALE_VARS *si, +- const unsigned char *source, +- int source_pitch, +- unsigned int source_width, +- unsigned int source_height, +- unsigned char *dest, +- unsigned int dest_pitch, +- unsigned int dest_width, +- unsigned int dest_height +-) +-{ +- unsigned int i, k; +- unsigned int src_band_height = 0; +- unsigned int dest_band_height = 0; +- +- /* suggested scale factors */ +- int hs = si->HScale; +- int hr = si->HRatio; +- int vs = si->VScale; +- int vr = si->VRatio; +- +- /* assume the ratios are scalable instead of should be centered */ +- int ratio_scalable = 1; +- +- const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch))); +- const unsigned char *line_src; +- +- void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; +- void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; +- void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; +- +- (void) si; +- +- /* find out the ratio for each direction */ +- switch (hr * 30 / hs) +- { ++ SCALE_VARS *si, ++ const unsigned char *source, ++ int source_pitch, ++ unsigned int source_width, ++ unsigned int source_height, ++ unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width, ++ unsigned int dest_height ++) { ++ unsigned int i, k; ++ unsigned int src_band_height = 0; ++ unsigned int dest_band_height = 0; ++ ++ /* suggested scale factors */ ++ int hs = si->HScale; ++ int hr = si->HRatio; ++ int vs = si->VScale; ++ int vr = si->VRatio; ++ ++ /* assume the ratios are scalable instead of should be centered */ ++ int ratio_scalable = 1; ++ ++ const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch))); ++ const unsigned char *line_src; ++ ++ void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; ++ void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; ++ void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; ++ ++ (void) si; ++ ++ /* find out the ratio for each direction */ ++ switch (hr * 30 / hs) { + case 24: +- /* 4-5 Scale in Width direction */ +- horiz_line_scale = vp8_horizontal_line_4_5_scale; +- break; ++ /* 4-5 Scale in Width direction */ ++ horiz_line_scale = vp8_horizontal_line_4_5_scale; ++ break; + case 22: +- /* 3-4 Scale in Width direction */ +- horiz_line_scale = vp8_horizontal_line_3_4_scale; +- break; ++ /* 3-4 Scale in Width direction */ ++ horiz_line_scale = vp8_horizontal_line_3_4_scale; ++ break; + + case 20: +- /* 4-5 Scale in Width direction */ +- horiz_line_scale = vp8_horizontal_line_2_3_scale; +- break; ++ /* 4-5 Scale in Width direction */ ++ horiz_line_scale = vp8_horizontal_line_2_3_scale; ++ break; + case 18: +- /* 3-5 Scale in Width direction */ +- horiz_line_scale = vp8_horizontal_line_3_5_scale; +- break; ++ /* 3-5 Scale in Width direction */ ++ horiz_line_scale = vp8_horizontal_line_3_5_scale; ++ break; + case 15: +- /* 1-2 Scale in Width direction */ +- horiz_line_scale = vp8_horizontal_line_1_2_scale; +- break; ++ /* 1-2 Scale in Width direction */ ++ horiz_line_scale = vp8_horizontal_line_1_2_scale; ++ break; + case 30: +- /* no scale in Width direction */ +- horiz_line_scale = horizontal_line_copy; +- break; ++ /* no scale in Width direction */ ++ horiz_line_scale = horizontal_line_copy; ++ break; + default: +- /* The ratio is not acceptable now */ +- /* throw("The ratio is not acceptable for now!"); */ +- ratio_scalable = 0; +- break; +- } ++ /* The ratio is not acceptable now */ ++ /* throw("The ratio is not acceptable for now!"); */ ++ ratio_scalable = 0; ++ break; ++ } + +- switch (vr * 30 / vs) +- { ++ switch (vr * 30 / vs) { + case 24: +- /* 4-5 Scale in vertical direction */ +- vert_band_scale = vp8_vertical_band_4_5_scale; +- last_vert_band_scale = vp8_last_vertical_band_4_5_scale; +- src_band_height = 4; +- dest_band_height = 5; +- break; ++ /* 4-5 Scale in vertical direction */ ++ vert_band_scale = vp8_vertical_band_4_5_scale; ++ last_vert_band_scale = vp8_last_vertical_band_4_5_scale; ++ src_band_height = 4; ++ dest_band_height = 5; ++ break; + case 22: +- /* 3-4 Scale in vertical direction */ +- vert_band_scale = vp8_vertical_band_3_4_scale; +- last_vert_band_scale = vp8_last_vertical_band_3_4_scale; +- src_band_height = 3; +- dest_band_height = 4; +- break; ++ /* 3-4 Scale in vertical direction */ ++ vert_band_scale = vp8_vertical_band_3_4_scale; ++ last_vert_band_scale = vp8_last_vertical_band_3_4_scale; ++ src_band_height = 3; ++ dest_band_height = 4; ++ break; + case 20: +- /* 2-3 Scale in vertical direction */ +- vert_band_scale = vp8_vertical_band_2_3_scale; +- last_vert_band_scale = vp8_last_vertical_band_2_3_scale; +- src_band_height = 2; +- dest_band_height = 3; +- break; ++ /* 2-3 Scale in vertical direction */ ++ vert_band_scale = vp8_vertical_band_2_3_scale; ++ last_vert_band_scale = vp8_last_vertical_band_2_3_scale; ++ src_band_height = 2; ++ dest_band_height = 3; ++ break; + case 18: +- /* 3-5 Scale in vertical direction */ +- vert_band_scale = vp8_vertical_band_3_5_scale; +- last_vert_band_scale = vp8_last_vertical_band_3_5_scale; +- src_band_height = 3; +- dest_band_height = 5; +- break; ++ /* 3-5 Scale in vertical direction */ ++ vert_band_scale = vp8_vertical_band_3_5_scale; ++ last_vert_band_scale = vp8_last_vertical_band_3_5_scale; ++ src_band_height = 3; ++ dest_band_height = 5; ++ break; + case 15: +- /* 1-2 Scale in vertical direction */ +- vert_band_scale = vp8_vertical_band_1_2_scale; +- last_vert_band_scale = vp8_last_vertical_band_1_2_scale; +- src_band_height = 1; +- dest_band_height = 2; +- break; ++ /* 1-2 Scale in vertical direction */ ++ vert_band_scale = vp8_vertical_band_1_2_scale; ++ last_vert_band_scale = vp8_last_vertical_band_1_2_scale; ++ src_band_height = 1; ++ dest_band_height = 2; ++ break; + case 30: +- /* no scale in Width direction */ +- vert_band_scale = null_scale; +- last_vert_band_scale = null_scale; +- src_band_height = 4; +- dest_band_height = 4; +- break; ++ /* no scale in Width direction */ ++ vert_band_scale = null_scale; ++ last_vert_band_scale = null_scale; ++ src_band_height = 4; ++ dest_band_height = 4; ++ break; + default: +- /* The ratio is not acceptable now */ +- /* throw("The ratio is not acceptable for now!"); */ +- ratio_scalable = 0; +- break; +- } ++ /* The ratio is not acceptable now */ ++ /* throw("The ratio is not acceptable for now!"); */ ++ ratio_scalable = 0; ++ break; ++ } + +- if (ratio_scalable == 0) +- return ratio_scalable; ++ if (ratio_scalable == 0) ++ return ratio_scalable; + +- horiz_line_scale(source, source_width, dest, dest_width); ++ horiz_line_scale(source, source_width, dest, dest_width); + +- /* except last band */ +- for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++) +- { +- /* scale one band horizontally */ +- for (i = 1; i < src_band_height; i++) +- { +- /* Trap case where we could read off the base of the source buffer */ +- line_src = source + i * source_pitch; ++ /* except last band */ ++ for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++) { ++ /* scale one band horizontally */ ++ for (i = 1; i < src_band_height; i++) { ++ /* Trap case where we could read off the base of the source buffer */ ++ line_src = source + i * source_pitch; + +- if (line_src < source_base) +- line_src = source_base; ++ if (line_src < source_base) ++ line_src = source_base; + +- horiz_line_scale(line_src, source_width, +- dest + i * dest_pitch, dest_width); +- } ++ horiz_line_scale(line_src, source_width, ++ dest + i * dest_pitch, dest_width); ++ } + +- /* first line of next band */ +- /* Trap case where we could read off the base of the source buffer */ +- line_src = source + src_band_height * source_pitch; ++ /* first line of next band */ ++ /* Trap case where we could read off the base of the source buffer */ ++ line_src = source + src_band_height * source_pitch; + +- if (line_src < source_base) +- line_src = source_base; ++ if (line_src < source_base) ++ line_src = source_base; + +- horiz_line_scale(line_src, source_width, +- dest + dest_band_height * dest_pitch, +- dest_width); ++ horiz_line_scale(line_src, source_width, ++ dest + dest_band_height * dest_pitch, ++ dest_width); + +- /* Vertical scaling is in place */ +- vert_band_scale(dest, dest_pitch, dest_width); ++ /* Vertical scaling is in place */ ++ vert_band_scale(dest, dest_pitch, dest_width); + +- /* Next band... */ +- source += src_band_height * source_pitch; +- dest += dest_band_height * dest_pitch; +- } ++ /* Next band... */ ++ source += src_band_height * source_pitch; ++ dest += dest_band_height * dest_pitch; ++ } + +- /* scale one band horizontally */ +- for (i = 1; i < src_band_height; i++) +- { +- /* Trap case where we could read off the base of the source buffer */ +- line_src = source + i * source_pitch; ++ /* scale one band horizontally */ ++ for (i = 1; i < src_band_height; i++) { ++ /* Trap case where we could read off the base of the source buffer */ ++ line_src = source + i * source_pitch; + +- if (line_src < source_base) +- line_src = source_base; ++ if (line_src < source_base) ++ line_src = source_base; + +- horiz_line_scale(line_src, source_width, +- dest + i * dest_pitch, +- dest_width); +- } ++ horiz_line_scale(line_src, source_width, ++ dest + i * dest_pitch, ++ dest_width); ++ } + +- /* Vertical scaling is in place */ +- last_vert_band_scale(dest, dest_pitch, dest_width); ++ /* Vertical scaling is in place */ ++ last_vert_band_scale(dest, dest_pitch, dest_width); + +- return ratio_scalable; ++ return ratio_scalable; + } + + /**************************************************************************** +@@ -849,70 +808,69 @@ int any_ratio_2d_scale + * + ****************************************************************************/ + static +-int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) +-{ +- int i; +- int ew; +- int eh; +- +- /* suggested scale factors */ +- int hs = scale_vars->HScale; +- int hr = scale_vars->HRatio; +- int vs = scale_vars->VScale; +- int vr = scale_vars->VRatio; +- +- int ratio_scalable = 1; +- +- int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs; +- int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs; +- int dw = scale_vars->expanded_frame_width; +- int dh = scale_vars->expanded_frame_height; +- YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config; +- YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config; +- +- if (hr == 3) +- ew = (sw + 2) / 3 * 3 * hs / hr; +- else +- ew = (sw + 7) / 8 * 8 * hs / hr; +- +- if (vr == 3) +- eh = (sh + 2) / 3 * 3 * vs / vr; +- else +- eh = (sh + 7) / 8 * 8 * vs / vr; +- +- ratio_scalable = any_ratio_2d_scale(scale_vars, +- (const unsigned char *)src_yuv_config->y_buffer, +- src_yuv_config->y_stride, sw, sh, +- (unsigned char *) dst_yuv_config->y_buffer + YOffset, +- dst_yuv_config->y_stride, dw, dh); +- +- for (i = 0; i < eh; i++) +- duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw); +- +- for (i = dh; i < eh; i++) +- duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew); +- +- if (ratio_scalable == 0) +- return ratio_scalable; ++int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) { ++ int i; ++ int ew; ++ int eh; ++ ++ /* suggested scale factors */ ++ int hs = scale_vars->HScale; ++ int hr = scale_vars->HRatio; ++ int vs = scale_vars->VScale; ++ int vr = scale_vars->VRatio; ++ ++ int ratio_scalable = 1; ++ ++ int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs; ++ int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs; ++ int dw = scale_vars->expanded_frame_width; ++ int dh = scale_vars->expanded_frame_height; ++ YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config; ++ YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config; ++ ++ if (hr == 3) ++ ew = (sw + 2) / 3 * 3 * hs / hr; ++ else ++ ew = (sw + 7) / 8 * 8 * hs / hr; ++ ++ if (vr == 3) ++ eh = (sh + 2) / 3 * 3 * vs / vr; ++ else ++ eh = (sh + 7) / 8 * 8 * vs / vr; ++ ++ ratio_scalable = any_ratio_2d_scale(scale_vars, ++ (const unsigned char *)src_yuv_config->y_buffer, ++ src_yuv_config->y_stride, sw, sh, ++ (unsigned char *) dst_yuv_config->y_buffer + YOffset, ++ dst_yuv_config->y_stride, dw, dh); ++ ++ for (i = 0; i < eh; i++) ++ duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw); ++ ++ for (i = dh; i < eh; i++) ++ duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew); ++ ++ if (ratio_scalable == 0) ++ return ratio_scalable; + +- sw = (sw + 1) >> 1; +- sh = (sh + 1) >> 1; +- dw = (dw + 1) >> 1; +- dh = (dh + 1) >> 1; ++ sw = (sw + 1) >> 1; ++ sh = (sh + 1) >> 1; ++ dw = (dw + 1) >> 1; ++ dh = (dh + 1) >> 1; + +- any_ratio_2d_scale(scale_vars, +- (const unsigned char *)src_yuv_config->u_buffer, +- src_yuv_config->y_stride / 2, sw, sh, +- (unsigned char *)dst_yuv_config->u_buffer + UVOffset, +- dst_yuv_config->uv_stride, dw, dh); ++ any_ratio_2d_scale(scale_vars, ++ (const unsigned char *)src_yuv_config->u_buffer, ++ src_yuv_config->y_stride / 2, sw, sh, ++ (unsigned char *)dst_yuv_config->u_buffer + UVOffset, ++ dst_yuv_config->uv_stride, dw, dh); + +- any_ratio_2d_scale(scale_vars, +- (const unsigned char *)src_yuv_config->v_buffer, +- src_yuv_config->y_stride / 2, sw, sh, +- (unsigned char *)dst_yuv_config->v_buffer + UVOffset, +- dst_yuv_config->uv_stride, dw, dh); ++ any_ratio_2d_scale(scale_vars, ++ (const unsigned char *)src_yuv_config->v_buffer, ++ src_yuv_config->y_stride / 2, sw, sh, ++ (unsigned char *)dst_yuv_config->v_buffer + UVOffset, ++ dst_yuv_config->uv_stride, dw, dh); + +- return ratio_scalable; ++ return ratio_scalable; + } + + /**************************************************************************** +@@ -931,52 +889,48 @@ int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) + * + ****************************************************************************/ + static void +-center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config) +-{ +- int i; +- int row_offset, col_offset; +- unsigned char *src_data_pointer; +- unsigned char *dst_data_pointer; +- +- /* center values */ +- row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2; +- col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2; +- +- /* Y's */ +- src_data_pointer = src_yuv_config->y_buffer; +- dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset; +- +- for (i = 0; i < src_yuv_config->y_height; i++) +- { +- duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width); +- dst_data_pointer += dst_yuv_config->y_stride; +- src_data_pointer += src_yuv_config->y_stride; +- } +- +- row_offset /= 2; +- col_offset /= 2; +- +- /* U's */ +- src_data_pointer = src_yuv_config->u_buffer; +- dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; +- +- for (i = 0; i < src_yuv_config->uv_height; i++) +- { +- duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); +- dst_data_pointer += dst_yuv_config->uv_stride; +- src_data_pointer += src_yuv_config->uv_stride; +- } +- +- /* V's */ +- src_data_pointer = src_yuv_config->v_buffer; +- dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; +- +- for (i = 0; i < src_yuv_config->uv_height; i++) +- { +- duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); +- dst_data_pointer += dst_yuv_config->uv_stride; +- src_data_pointer += src_yuv_config->uv_stride; +- } ++center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config) { ++ int i; ++ int row_offset, col_offset; ++ unsigned char *src_data_pointer; ++ unsigned char *dst_data_pointer; ++ ++ /* center values */ ++ row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2; ++ col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2; ++ ++ /* Y's */ ++ src_data_pointer = src_yuv_config->y_buffer; ++ dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset; ++ ++ for (i = 0; i < src_yuv_config->y_height; i++) { ++ duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width); ++ dst_data_pointer += dst_yuv_config->y_stride; ++ src_data_pointer += src_yuv_config->y_stride; ++ } ++ ++ row_offset /= 2; ++ col_offset /= 2; ++ ++ /* U's */ ++ src_data_pointer = src_yuv_config->u_buffer; ++ dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; ++ ++ for (i = 0; i < src_yuv_config->uv_height; i++) { ++ duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); ++ dst_data_pointer += dst_yuv_config->uv_stride; ++ src_data_pointer += src_yuv_config->uv_stride; ++ } ++ ++ /* V's */ ++ src_data_pointer = src_yuv_config->v_buffer; ++ dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; ++ ++ for (i = 0; i < src_yuv_config->uv_height; i++) { ++ duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); ++ dst_data_pointer += dst_yuv_config->uv_stride; ++ src_data_pointer += src_yuv_config->uv_stride; ++ } + } + + /**************************************************************************** +@@ -999,61 +953,58 @@ center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_con + void + vp8_yv12_scale_or_center + ( +- YV12_BUFFER_CONFIG *src_yuv_config, +- YV12_BUFFER_CONFIG *dst_yuv_config, +- int expanded_frame_width, +- int expanded_frame_height, +- int scaling_mode, +- int HScale, +- int HRatio, +- int VScale, +- int VRatio +-) +-{ +- /*if ( ppi->post_processing_level ) +- update_umvborder ( ppi, frame_buffer );*/ +- +- +- switch (scaling_mode) +- { ++ YV12_BUFFER_CONFIG *src_yuv_config, ++ YV12_BUFFER_CONFIG *dst_yuv_config, ++ int expanded_frame_width, ++ int expanded_frame_height, ++ int scaling_mode, ++ int HScale, ++ int HRatio, ++ int VScale, ++ int VRatio ++) { ++ /*if ( ppi->post_processing_level ) ++ update_umvborder ( ppi, frame_buffer );*/ ++ ++ ++ switch (scaling_mode) { + case SCALE_TO_FIT: +- case MAINTAIN_ASPECT_RATIO: +- { +- SCALE_VARS scale_vars; +- /* center values */ ++ case MAINTAIN_ASPECT_RATIO: { ++ SCALE_VARS scale_vars; ++ /* center values */ + #if 1 +- int row = (dst_yuv_config->y_height - expanded_frame_height) / 2; +- int col = (dst_yuv_config->y_width - expanded_frame_width) / 2; +- /*int YOffset = row * dst_yuv_config->y_width + col; +- int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/ +- int YOffset = row * dst_yuv_config->y_stride + col; +- int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1); ++ int row = (dst_yuv_config->y_height - expanded_frame_height) / 2; ++ int col = (dst_yuv_config->y_width - expanded_frame_width) / 2; ++ /*int YOffset = row * dst_yuv_config->y_width + col; ++ int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/ ++ int YOffset = row * dst_yuv_config->y_stride + col; ++ int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1); + #else +- int row = (src_yuv_config->y_height - expanded_frame_height) / 2; +- int col = (src_yuv_config->y_width - expanded_frame_width) / 2; +- int YOffset = row * src_yuv_config->y_width + col; +- int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1); ++ int row = (src_yuv_config->y_height - expanded_frame_height) / 2; ++ int col = (src_yuv_config->y_width - expanded_frame_width) / 2; ++ int YOffset = row * src_yuv_config->y_width + col; ++ int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1); + #endif + +- scale_vars.dst_yuv_config = dst_yuv_config; +- scale_vars.src_yuv_config = src_yuv_config; +- scale_vars.HScale = HScale; +- scale_vars.HRatio = HRatio; +- scale_vars.VScale = VScale; +- scale_vars.VRatio = VRatio; +- scale_vars.expanded_frame_width = expanded_frame_width; +- scale_vars.expanded_frame_height = expanded_frame_height; ++ scale_vars.dst_yuv_config = dst_yuv_config; ++ scale_vars.src_yuv_config = src_yuv_config; ++ scale_vars.HScale = HScale; ++ scale_vars.HRatio = HRatio; ++ scale_vars.VScale = VScale; ++ scale_vars.VRatio = VRatio; ++ scale_vars.expanded_frame_width = expanded_frame_width; ++ scale_vars.expanded_frame_height = expanded_frame_height; + +- /* perform center and scale */ +- any_ratio_frame_scale(&scale_vars, YOffset, UVOffset); ++ /* perform center and scale */ ++ any_ratio_frame_scale(&scale_vars, YOffset, UVOffset); + +- break; ++ break; + } + case CENTER: +- center_image(src_yuv_config, dst_yuv_config); +- break; ++ center_image(src_yuv_config, dst_yuv_config); ++ break; + + default: +- break; +- } ++ break; ++ } + } +diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c +index eff594e..4cb2a41 100644 +--- a/vpx_scale/generic/yv12config.c ++++ b/vpx_scale/generic/yv12config.c +@@ -20,81 +20,73 @@ + * + ****************************************************************************/ + int +-vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) +-{ +- if (ybf) +- { +- vpx_free(ybf->buffer_alloc); +- +- /* buffer_alloc isn't accessed by most functions. Rather y_buffer, +- u_buffer and v_buffer point to buffer_alloc and are used. Clear out +- all of this so that a freed pointer isn't inadvertently used */ +- vpx_memset (ybf, 0, sizeof (YV12_BUFFER_CONFIG)); +- } +- else +- { +- return -1; +- } +- +- return 0; ++vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) { ++ if (ybf) { ++ vpx_free(ybf->buffer_alloc); ++ ++ /* buffer_alloc isn't accessed by most functions. Rather y_buffer, ++ u_buffer and v_buffer point to buffer_alloc and are used. Clear out ++ all of this so that a freed pointer isn't inadvertently used */ ++ vpx_memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); ++ } else { ++ return -1; ++ } ++ ++ return 0; + } + + /**************************************************************************** + * + ****************************************************************************/ + int +-vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) +-{ +-/*NOTE:*/ +- +- if (ybf) +- { +- int y_stride = ((width + 2 * border) + 31) & ~31; +- int yplane_size = (height + 2 * border) * y_stride; +- int uv_width = width >> 1; +- int uv_height = height >> 1; +- /** There is currently a bunch of code which assumes +- * uv_stride == y_stride/2, so enforce this here. */ +- int uv_stride = y_stride >> 1; +- int uvplane_size = (uv_height + border) * uv_stride; +- +- vp8_yv12_de_alloc_frame_buffer(ybf); +- +- /** Only support allocating buffers that have a height and width that +- * are multiples of 16, and a border that's a multiple of 32. +- * The border restriction is required to get 16-byte alignment of the +- * start of the chroma rows without intoducing an arbitrary gap +- * between planes, which would break the semantics of things like +- * vpx_img_set_rect(). */ +- if ((width & 0xf) | (height & 0xf) | (border & 0x1f)) +- return -3; +- +- ybf->y_width = width; +- ybf->y_height = height; +- ybf->y_stride = y_stride; +- +- ybf->uv_width = uv_width; +- ybf->uv_height = uv_height; +- ybf->uv_stride = uv_stride; +- +- ybf->border = border; +- ybf->frame_size = yplane_size + 2 * uvplane_size; +- +- ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size); +- +- if (ybf->buffer_alloc == NULL) +- return -1; +- +- ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; +- ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; +- ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; +- +- ybf->corrupted = 0; /* assume not currupted by errors */ +- } +- else +- { +- return -2; +- } +- +- return 0; ++vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) { ++ /*NOTE:*/ ++ ++ if (ybf) { ++ int y_stride = ((width + 2 * border) + 31) & ~31; ++ int yplane_size = (height + 2 * border) * y_stride; ++ int uv_width = width >> 1; ++ int uv_height = height >> 1; ++ /** There is currently a bunch of code which assumes ++ * uv_stride == y_stride/2, so enforce this here. */ ++ int uv_stride = y_stride >> 1; ++ int uvplane_size = (uv_height + border) * uv_stride; ++ ++ vp8_yv12_de_alloc_frame_buffer(ybf); ++ ++ /** Only support allocating buffers that have a height and width that ++ * are multiples of 16, and a border that's a multiple of 32. ++ * The border restriction is required to get 16-byte alignment of the ++ * start of the chroma rows without intoducing an arbitrary gap ++ * between planes, which would break the semantics of things like ++ * vpx_img_set_rect(). */ ++ if ((width & 0xf) | (height & 0xf) | (border & 0x1f)) ++ return -3; ++ ++ ybf->y_width = width; ++ ybf->y_height = height; ++ ybf->y_stride = y_stride; ++ ++ ybf->uv_width = uv_width; ++ ybf->uv_height = uv_height; ++ ybf->uv_stride = uv_stride; ++ ++ ybf->border = border; ++ ybf->frame_size = yplane_size + 2 * uvplane_size; ++ ++ ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size); ++ ++ if (ybf->buffer_alloc == NULL) ++ return -1; ++ ++ ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; ++ ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; ++ ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; ++ ++ ybf->corrupted = 0; /* assume not currupted by errors */ ++ } else { ++ return -2; ++ } ++ ++ return 0; + } +diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c +index 638633b..247078c 100644 +--- a/vpx_scale/generic/yv12extend.c ++++ b/vpx_scale/generic/yv12extend.c +@@ -21,184 +21,174 @@ + * + ****************************************************************************/ + void +-vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) +-{ +- int i; +- unsigned char *src_ptr1, *src_ptr2; +- unsigned char *dest_ptr1, *dest_ptr2; +- +- unsigned int Border; +- int plane_stride; +- int plane_height; +- int plane_width; +- +- /***********/ +- /* Y Plane */ +- /***********/ +- Border = ybf->border; +- plane_stride = ybf->y_stride; +- plane_height = ybf->y_height; +- plane_width = ybf->y_width; +- +- /* copy the left and right most columns out */ +- src_ptr1 = ybf->y_buffer; +- src_ptr2 = src_ptr1 + plane_width - 1; +- dest_ptr1 = src_ptr1 - Border; +- dest_ptr2 = src_ptr2 + 1; +- +- for (i = 0; i < plane_height; i++) +- { +- vpx_memset(dest_ptr1, src_ptr1[0], Border); +- vpx_memset(dest_ptr2, src_ptr2[0], Border); +- src_ptr1 += plane_stride; +- src_ptr2 += plane_stride; +- dest_ptr1 += plane_stride; +- dest_ptr2 += plane_stride; +- } +- +- /* Now copy the top and bottom source lines into each line of the respective borders */ +- src_ptr1 = ybf->y_buffer - Border; +- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; +- dest_ptr1 = src_ptr1 - (Border * plane_stride); +- dest_ptr2 = src_ptr2 + plane_stride; +- +- for (i = 0; i < (int)Border; i++) +- { +- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); +- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); +- dest_ptr1 += plane_stride; +- dest_ptr2 += plane_stride; +- } +- +- +- /***********/ +- /* U Plane */ +- /***********/ +- plane_stride = ybf->uv_stride; +- plane_height = ybf->uv_height; +- plane_width = ybf->uv_width; +- Border /= 2; +- +- /* copy the left and right most columns out */ +- src_ptr1 = ybf->u_buffer; +- src_ptr2 = src_ptr1 + plane_width - 1; +- dest_ptr1 = src_ptr1 - Border; +- dest_ptr2 = src_ptr2 + 1; +- +- for (i = 0; i < plane_height; i++) +- { +- vpx_memset(dest_ptr1, src_ptr1[0], Border); +- vpx_memset(dest_ptr2, src_ptr2[0], Border); +- src_ptr1 += plane_stride; +- src_ptr2 += plane_stride; +- dest_ptr1 += plane_stride; +- dest_ptr2 += plane_stride; +- } +- +- /* Now copy the top and bottom source lines into each line of the respective borders */ +- src_ptr1 = ybf->u_buffer - Border; +- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; +- dest_ptr1 = src_ptr1 - (Border * plane_stride); +- dest_ptr2 = src_ptr2 + plane_stride; +- +- for (i = 0; i < (int)(Border); i++) +- { +- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); +- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); +- dest_ptr1 += plane_stride; +- dest_ptr2 += plane_stride; +- } +- +- /***********/ +- /* V Plane */ +- /***********/ +- +- /* copy the left and right most columns out */ +- src_ptr1 = ybf->v_buffer; +- src_ptr2 = src_ptr1 + plane_width - 1; +- dest_ptr1 = src_ptr1 - Border; +- dest_ptr2 = src_ptr2 + 1; +- +- for (i = 0; i < plane_height; i++) +- { +- vpx_memset(dest_ptr1, src_ptr1[0], Border); +- vpx_memset(dest_ptr2, src_ptr2[0], Border); +- src_ptr1 += plane_stride; +- src_ptr2 += plane_stride; +- dest_ptr1 += plane_stride; +- dest_ptr2 += plane_stride; +- } +- +- /* Now copy the top and bottom source lines into each line of the respective borders */ +- src_ptr1 = ybf->v_buffer - Border; +- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; +- dest_ptr1 = src_ptr1 - (Border * plane_stride); +- dest_ptr2 = src_ptr2 + plane_stride; +- +- for (i = 0; i < (int)(Border); i++) +- { +- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); +- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); +- dest_ptr1 += plane_stride; +- dest_ptr2 += plane_stride; +- } ++vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { ++ int i; ++ unsigned char *src_ptr1, *src_ptr2; ++ unsigned char *dest_ptr1, *dest_ptr2; ++ ++ unsigned int Border; ++ int plane_stride; ++ int plane_height; ++ int plane_width; ++ ++ /***********/ ++ /* Y Plane */ ++ /***********/ ++ Border = ybf->border; ++ plane_stride = ybf->y_stride; ++ plane_height = ybf->y_height; ++ plane_width = ybf->y_width; ++ ++ /* copy the left and right most columns out */ ++ src_ptr1 = ybf->y_buffer; ++ src_ptr2 = src_ptr1 + plane_width - 1; ++ dest_ptr1 = src_ptr1 - Border; ++ dest_ptr2 = src_ptr2 + 1; ++ ++ for (i = 0; i < plane_height; i++) { ++ vpx_memset(dest_ptr1, src_ptr1[0], Border); ++ vpx_memset(dest_ptr2, src_ptr2[0], Border); ++ src_ptr1 += plane_stride; ++ src_ptr2 += plane_stride; ++ dest_ptr1 += plane_stride; ++ dest_ptr2 += plane_stride; ++ } ++ ++ /* Now copy the top and bottom source lines into each line of the respective borders */ ++ src_ptr1 = ybf->y_buffer - Border; ++ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; ++ dest_ptr1 = src_ptr1 - (Border * plane_stride); ++ dest_ptr2 = src_ptr2 + plane_stride; ++ ++ for (i = 0; i < (int)Border; i++) { ++ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); ++ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); ++ dest_ptr1 += plane_stride; ++ dest_ptr2 += plane_stride; ++ } ++ ++ ++ /***********/ ++ /* U Plane */ ++ /***********/ ++ plane_stride = ybf->uv_stride; ++ plane_height = ybf->uv_height; ++ plane_width = ybf->uv_width; ++ Border /= 2; ++ ++ /* copy the left and right most columns out */ ++ src_ptr1 = ybf->u_buffer; ++ src_ptr2 = src_ptr1 + plane_width - 1; ++ dest_ptr1 = src_ptr1 - Border; ++ dest_ptr2 = src_ptr2 + 1; ++ ++ for (i = 0; i < plane_height; i++) { ++ vpx_memset(dest_ptr1, src_ptr1[0], Border); ++ vpx_memset(dest_ptr2, src_ptr2[0], Border); ++ src_ptr1 += plane_stride; ++ src_ptr2 += plane_stride; ++ dest_ptr1 += plane_stride; ++ dest_ptr2 += plane_stride; ++ } ++ ++ /* Now copy the top and bottom source lines into each line of the respective borders */ ++ src_ptr1 = ybf->u_buffer - Border; ++ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; ++ dest_ptr1 = src_ptr1 - (Border * plane_stride); ++ dest_ptr2 = src_ptr2 + plane_stride; ++ ++ for (i = 0; i < (int)(Border); i++) { ++ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); ++ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); ++ dest_ptr1 += plane_stride; ++ dest_ptr2 += plane_stride; ++ } ++ ++ /***********/ ++ /* V Plane */ ++ /***********/ ++ ++ /* copy the left and right most columns out */ ++ src_ptr1 = ybf->v_buffer; ++ src_ptr2 = src_ptr1 + plane_width - 1; ++ dest_ptr1 = src_ptr1 - Border; ++ dest_ptr2 = src_ptr2 + 1; ++ ++ for (i = 0; i < plane_height; i++) { ++ vpx_memset(dest_ptr1, src_ptr1[0], Border); ++ vpx_memset(dest_ptr2, src_ptr2[0], Border); ++ src_ptr1 += plane_stride; ++ src_ptr2 += plane_stride; ++ dest_ptr1 += plane_stride; ++ dest_ptr2 += plane_stride; ++ } ++ ++ /* Now copy the top and bottom source lines into each line of the respective borders */ ++ src_ptr1 = ybf->v_buffer - Border; ++ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; ++ dest_ptr1 = src_ptr1 - (Border * plane_stride); ++ dest_ptr2 = src_ptr2 + plane_stride; ++ ++ for (i = 0; i < (int)(Border); i++) { ++ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); ++ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); ++ dest_ptr1 += plane_stride; ++ dest_ptr2 += plane_stride; ++ } + } + + + static void +-extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) +-{ +- int i; +- unsigned char *src_ptr1, *src_ptr2; +- unsigned char *dest_ptr1, *dest_ptr2; +- +- unsigned int Border; +- int plane_stride; +- int plane_height; +- int plane_width; +- +- /***********/ +- /* Y Plane */ +- /***********/ +- Border = ybf->border; +- plane_stride = ybf->y_stride; +- plane_height = ybf->y_height; +- plane_width = ybf->y_width; +- +- /* copy the left and right most columns out */ +- src_ptr1 = ybf->y_buffer; +- src_ptr2 = src_ptr1 + plane_width - 1; +- dest_ptr1 = src_ptr1 - Border; +- dest_ptr2 = src_ptr2 + 1; +- +- for (i = 0; i < plane_height; i++) +- { +- vpx_memset(dest_ptr1, src_ptr1[0], Border); +- vpx_memset(dest_ptr2, src_ptr2[0], Border); +- src_ptr1 += plane_stride; +- src_ptr2 += plane_stride; +- dest_ptr1 += plane_stride; +- dest_ptr2 += plane_stride; +- } +- +- /* Now copy the top and bottom source lines into each line of the respective borders */ +- src_ptr1 = ybf->y_buffer - Border; +- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; +- dest_ptr1 = src_ptr1 - (Border * plane_stride); +- dest_ptr2 = src_ptr2 + plane_stride; +- +- for (i = 0; i < (int)Border; i++) +- { +- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); +- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); +- dest_ptr1 += plane_stride; +- dest_ptr2 += plane_stride; +- } +- +- plane_stride /= 2; +- plane_height /= 2; +- plane_width /= 2; +- Border /= 2; ++extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) { ++ int i; ++ unsigned char *src_ptr1, *src_ptr2; ++ unsigned char *dest_ptr1, *dest_ptr2; ++ ++ unsigned int Border; ++ int plane_stride; ++ int plane_height; ++ int plane_width; ++ ++ /***********/ ++ /* Y Plane */ ++ /***********/ ++ Border = ybf->border; ++ plane_stride = ybf->y_stride; ++ plane_height = ybf->y_height; ++ plane_width = ybf->y_width; ++ ++ /* copy the left and right most columns out */ ++ src_ptr1 = ybf->y_buffer; ++ src_ptr2 = src_ptr1 + plane_width - 1; ++ dest_ptr1 = src_ptr1 - Border; ++ dest_ptr2 = src_ptr2 + 1; ++ ++ for (i = 0; i < plane_height; i++) { ++ vpx_memset(dest_ptr1, src_ptr1[0], Border); ++ vpx_memset(dest_ptr2, src_ptr2[0], Border); ++ src_ptr1 += plane_stride; ++ src_ptr2 += plane_stride; ++ dest_ptr1 += plane_stride; ++ dest_ptr2 += plane_stride; ++ } ++ ++ /* Now copy the top and bottom source lines into each line of the respective borders */ ++ src_ptr1 = ybf->y_buffer - Border; ++ src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; ++ dest_ptr1 = src_ptr1 - (Border * plane_stride); ++ dest_ptr2 = src_ptr2 + plane_stride; ++ ++ for (i = 0; i < (int)Border; i++) { ++ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); ++ vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); ++ dest_ptr1 += plane_stride; ++ dest_ptr2 += plane_stride; ++ } ++ ++ plane_stride /= 2; ++ plane_height /= 2; ++ plane_width /= 2; ++ Border /= 2; + + } + +@@ -221,57 +211,53 @@ extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) + * + ****************************************************************************/ + void +-vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) +-{ +- int row; +- unsigned char *source, *dest; +- +- source = src_ybc->y_buffer; +- dest = dst_ybc->y_buffer; +- +- for (row = 0; row < src_ybc->y_height; row++) +- { +- vpx_memcpy(dest, source, src_ybc->y_width); +- source += src_ybc->y_stride; +- dest += dst_ybc->y_stride; +- } +- +- source = src_ybc->u_buffer; +- dest = dst_ybc->u_buffer; +- +- for (row = 0; row < src_ybc->uv_height; row++) +- { +- vpx_memcpy(dest, source, src_ybc->uv_width); +- source += src_ybc->uv_stride; +- dest += dst_ybc->uv_stride; +- } +- +- source = src_ybc->v_buffer; +- dest = dst_ybc->v_buffer; +- +- for (row = 0; row < src_ybc->uv_height; row++) +- { +- vpx_memcpy(dest, source, src_ybc->uv_width); +- source += src_ybc->uv_stride; +- dest += dst_ybc->uv_stride; +- } +- +- vp8_yv12_extend_frame_borders_c(dst_ybc); ++vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc, ++ YV12_BUFFER_CONFIG *dst_ybc) { ++ int row; ++ unsigned char *source, *dest; ++ ++ source = src_ybc->y_buffer; ++ dest = dst_ybc->y_buffer; ++ ++ for (row = 0; row < src_ybc->y_height; row++) { ++ vpx_memcpy(dest, source, src_ybc->y_width); ++ source += src_ybc->y_stride; ++ dest += dst_ybc->y_stride; ++ } ++ ++ source = src_ybc->u_buffer; ++ dest = dst_ybc->u_buffer; ++ ++ for (row = 0; row < src_ybc->uv_height; row++) { ++ vpx_memcpy(dest, source, src_ybc->uv_width); ++ source += src_ybc->uv_stride; ++ dest += dst_ybc->uv_stride; ++ } ++ ++ source = src_ybc->v_buffer; ++ dest = dst_ybc->v_buffer; ++ ++ for (row = 0; row < src_ybc->uv_height; row++) { ++ vpx_memcpy(dest, source, src_ybc->uv_width); ++ source += src_ybc->uv_stride; ++ dest += dst_ybc->uv_stride; ++ } ++ ++ vp8_yv12_extend_frame_borders_c(dst_ybc); + } + +-void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) +-{ +- int row; +- unsigned char *source, *dest; ++void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc, ++ YV12_BUFFER_CONFIG *dst_ybc) { ++ int row; ++ unsigned char *source, *dest; + + +- source = src_ybc->y_buffer; +- dest = dst_ybc->y_buffer; ++ source = src_ybc->y_buffer; ++ dest = dst_ybc->y_buffer; + +- for (row = 0; row < src_ybc->y_height; row++) +- { +- vpx_memcpy(dest, source, src_ybc->y_width); +- source += src_ybc->y_stride; +- dest += dst_ybc->y_stride; +- } ++ for (row = 0; row < src_ybc->y_height; row++) { ++ vpx_memcpy(dest, source, src_ybc->y_width); ++ source += src_ybc->y_stride; ++ dest += dst_ybc->y_stride; ++ } + } +diff --git a/vpx_scale/include/generic/vpxscale_arbitrary.h b/vpx_scale/include/generic/vpxscale_arbitrary.h +index 39de181..c535252 100644 +--- a/vpx_scale/include/generic/vpxscale_arbitrary.h ++++ b/vpx_scale/include/generic/vpxscale_arbitrary.h +@@ -14,33 +14,32 @@ + + #include "vpx_scale/yv12config.h" + +-typedef struct +-{ +- int in_width; +- int in_height; +- +- int out_width; +- int out_height; +- int max_usable_out_width; +- +- // numerator for the width and height +- int nw; +- int nh; +- int nh_uv; +- +- // output to input correspondance array +- short *l_w; +- short *l_h; +- short *l_h_uv; +- +- // polyphase coefficients +- short *c_w; +- short *c_h; +- short *c_h_uv; +- +- // buffer for horizontal filtering. +- unsigned char *hbuf; +- unsigned char *hbuf_uv; ++typedef struct { ++ int in_width; ++ int in_height; ++ ++ int out_width; ++ int out_height; ++ int max_usable_out_width; ++ ++ // numerator for the width and height ++ int nw; ++ int nh; ++ int nh_uv; ++ ++ // output to input correspondance array ++ short *l_w; ++ short *l_h; ++ short *l_h_uv; ++ ++ // polyphase coefficients ++ short *c_w; ++ short *c_h; ++ short *c_h_uv; ++ ++ // buffer for horizontal filtering. ++ unsigned char *hbuf; ++ unsigned char *hbuf_uv; + } BICUBIC_SCALER_STRUCT; + + int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height); +diff --git a/vpx_scale/scale_mode.h b/vpx_scale/scale_mode.h +index 1476e64..5581385 100644 +--- a/vpx_scale/scale_mode.h ++++ b/vpx_scale/scale_mode.h +@@ -17,12 +17,11 @@ + #ifndef SCALE_MODE_H + #define SCALE_MODE_H + +-typedef enum +-{ +- MAINTAIN_ASPECT_RATIO = 0x0, +- SCALE_TO_FIT = 0x1, +- CENTER = 0x2, +- OTHER = 0x3 ++typedef enum { ++ MAINTAIN_ASPECT_RATIO = 0x0, ++ SCALE_TO_FIT = 0x1, ++ CENTER = 0x2, ++ OTHER = 0x3 + } SCALE_MODE; + + +diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h +index 8919a24..3c2194d 100644 +--- a/vpx_scale/vpxscale.h ++++ b/vpx_scale/vpxscale.h +@@ -14,29 +14,24 @@ + + #include "vpx_scale/yv12config.h" + +-extern void vp8_yv12_scale_or_center +-( +- YV12_BUFFER_CONFIG *src_yuv_config, +- YV12_BUFFER_CONFIG *dst_yuv_config, +- int expanded_frame_width, +- int expanded_frame_height, +- int scaling_mode, +- int HScale, +- int HRatio, +- int VScale, +- int VRatio +-); +-extern void vp8_scale_frame +-( +- YV12_BUFFER_CONFIG *src, +- YV12_BUFFER_CONFIG *dst, +- unsigned char *temp_area, +- unsigned char temp_height, +- unsigned int hscale, +- unsigned int hratio, +- unsigned int vscale, +- unsigned int vratio, +- unsigned int interlaced +-); ++extern void vp8_yv12_scale_or_center(YV12_BUFFER_CONFIG *src_yuv_config, ++ YV12_BUFFER_CONFIG *dst_yuv_config, ++ int expanded_frame_width, ++ int expanded_frame_height, ++ int scaling_mode, ++ int HScale, ++ int HRatio, ++ int VScale, ++ int VRatio); ++ ++extern void vp8_scale_frame(YV12_BUFFER_CONFIG *src, ++ YV12_BUFFER_CONFIG *dst, ++ unsigned char *temp_area, ++ unsigned char temp_height, ++ unsigned int hscale, ++ unsigned int hratio, ++ unsigned int vscale, ++ unsigned int vratio, ++ unsigned int interlaced); + + #endif +diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c +index 3711fe5..2d96cc7 100644 +--- a/vpx_scale/win32/scaleopt.c ++++ b/vpx_scale/win32/scaleopt.c +@@ -61,114 +61,112 @@ __declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51, + static + void horizontal_line_3_5_scale_mmx + ( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- (void) dest_width; ++ const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width ++) { ++ (void) dest_width; + +- __asm +- { ++ __asm { + +- push ebx ++ push ebx + +- mov esi, source +- mov edi, dest ++ mov esi, source ++ mov edi, dest + +- mov ecx, source_width +- lea edx, [esi+ecx-3]; ++ mov ecx, source_width ++ lea edx, [esi+ecx-3]; + +- movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx +- movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx ++ movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx ++ movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx + +- movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx +- pxor mm7, mm7 // clear mm7 ++ movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx ++ pxor mm7, mm7 // clear mm7 + +- horiz_line_3_5_loop: ++ horiz_line_3_5_loop: + +- mov eax, DWORD PTR [esi] // eax = 00 01 02 03 +- mov ebx, eax ++ mov eax, DWORD PTR [esi] // eax = 00 01 02 03 ++ mov ebx, eax + +- and ebx, 0xffff00 // ebx = xx 01 02 xx +- mov ecx, eax // ecx = 00 01 02 03 ++ and ebx, 0xffff00 // ebx = xx 01 02 xx ++ mov ecx, eax // ecx = 00 01 02 03 + +- and eax, 0xffff0000 // eax = xx xx 02 03 +- xor ecx, eax // ecx = 00 01 xx xx ++ and eax, 0xffff0000 // eax = xx xx 02 03 ++ xor ecx, eax // ecx = 00 01 xx xx + +- shr ebx, 8 // ebx = 01 02 xx xx +- or eax, ebx // eax = 01 02 02 03 ++ shr ebx, 8 // ebx = 01 02 xx xx ++ or eax, ebx // eax = 01 02 02 03 + +- shl ebx, 16 // ebx = xx xx 01 02 +- movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx ++ shl ebx, 16 // ebx = xx xx 01 02 ++ movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx + +- or ebx, ecx // ebx = 00 01 01 02 +- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx ++ or ebx, ecx // ebx = 00 01 01 02 ++ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx + +- movd mm0, ebx // mm0 = 00 01 01 02 +- pmullw mm1, mm6 // ++ movd mm0, ebx // mm0 = 00 01 01 02 ++ pmullw mm1, mm6 // + +- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx +- pmullw mm0, mm5 // ++ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx ++ pmullw mm0, mm5 // + +- mov [edi], ebx // writeoutput 00 xx xx xx +- add esi, 3 ++ mov [edi], ebx // writeoutput 00 xx xx xx ++ add esi, 3 + +- add edi, 5 +- paddw mm0, mm1 ++ add edi, 5 ++ paddw mm0, mm1 + +- paddw mm0, mm4 +- psrlw mm0, 8 ++ paddw mm0, mm4 ++ psrlw mm0, 8 + +- cmp esi, edx +- packuswb mm0, mm7 ++ cmp esi, edx ++ packuswb mm0, mm7 + +- movd DWORD Ptr [edi-4], mm0 +- jl horiz_line_3_5_loop ++ movd DWORD Ptr [edi-4], mm0 ++ jl horiz_line_3_5_loop + +-//Exit: +- mov eax, DWORD PTR [esi] // eax = 00 01 02 03 +- mov ebx, eax ++// Exit: ++ mov eax, DWORD PTR [esi] // eax = 00 01 02 03 ++ mov ebx, eax + +- and ebx, 0xffff00 // ebx = xx 01 02 xx +- mov ecx, eax // ecx = 00 01 02 03 ++ and ebx, 0xffff00 // ebx = xx 01 02 xx ++ mov ecx, eax // ecx = 00 01 02 03 + +- and eax, 0xffff0000 // eax = xx xx 02 03 +- xor ecx, eax // ecx = 00 01 xx xx ++ and eax, 0xffff0000 // eax = xx xx 02 03 ++ xor ecx, eax // ecx = 00 01 xx xx + +- shr ebx, 8 // ebx = 01 02 xx xx +- or eax, ebx // eax = 01 02 02 03 ++ shr ebx, 8 // ebx = 01 02 xx xx ++ or eax, ebx // eax = 01 02 02 03 + +- shl eax, 8 // eax = xx 01 02 02 +- and eax, 0xffff0000 // eax = xx xx 02 02 ++ shl eax, 8 // eax = xx 01 02 02 ++ and eax, 0xffff0000 // eax = xx xx 02 02 + +- or eax, ebx // eax = 01 02 02 02 ++ or eax, ebx // eax = 01 02 02 02 + +- shl ebx, 16 // ebx = xx xx 01 02 +- movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx ++ shl ebx, 16 // ebx = xx xx 01 02 ++ movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx + +- or ebx, ecx // ebx = 00 01 01 02 +- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx ++ or ebx, ecx // ebx = 00 01 01 02 ++ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx + +- movd mm0, ebx // mm0 = 00 01 01 02 +- pmullw mm1, mm6 // ++ movd mm0, ebx // mm0 = 00 01 01 02 ++ pmullw mm1, mm6 // + +- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx +- pmullw mm0, mm5 // ++ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx ++ pmullw mm0, mm5 // + +- mov [edi], ebx // writeoutput 00 xx xx xx +- paddw mm0, mm1 ++ mov [edi], ebx // writeoutput 00 xx xx xx ++ paddw mm0, mm1 + +- paddw mm0, mm4 +- psrlw mm0, 8 ++ paddw mm0, mm4 ++ psrlw mm0, 8 + +- packuswb mm0, mm7 +- movd DWORD Ptr [edi+1], mm0 ++ packuswb mm0, mm7 ++ movd DWORD Ptr [edi+1], mm0 + +- pop ebx ++ pop ebx + +- } ++ } + + } + +@@ -194,120 +192,118 @@ void horizontal_line_3_5_scale_mmx + static + void horizontal_line_4_5_scale_mmx + ( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- (void)dest_width; ++ const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width ++) { ++ (void)dest_width; + +- __asm +- { ++ __asm { + +- mov esi, source +- mov edi, dest ++ mov esi, source ++ mov edi, dest + +- mov ecx, source_width +- lea edx, [esi+ecx-8]; ++ mov ecx, source_width ++ lea edx, [esi+ecx-8]; + +- movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx +- movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx ++ movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx ++ movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx + +- movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx +- pxor mm7, mm7 // clear mm7 ++ movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx ++ pxor mm7, mm7 // clear mm7 + +- horiz_line_4_5_loop: ++ horiz_line_4_5_loop: + +- movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07 +- movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08 ++ movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07 ++ movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08 + +- movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 +- movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 ++ movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 ++ movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 + +- movd DWORD PTR [edi], mm0 // write output 00 xx xx xx +- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx ++ movd DWORD PTR [edi], mm0 // write output 00 xx xx xx ++ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx + +- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx +- pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 ++ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx ++ pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 + +- pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 +- punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx ++ pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 ++ punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx + +- movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx +- pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 ++ movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx ++ pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 + +- punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx +- pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 ++ punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx ++ pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 + +- paddw mm0, mm1 // added round values +- paddw mm0, mm4 ++ paddw mm0, mm1 // added round values ++ paddw mm0, mm4 + +- psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx +- packuswb mm0, mm7 ++ psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx ++ packuswb mm0, mm7 + +- movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 +- add edi, 10 ++ movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 ++ add edi, 10 + +- add esi, 8 +- paddw mm2, mm3 // ++ add esi, 8 ++ paddw mm2, mm3 // + +- paddw mm2, mm4 // added round values +- cmp esi, edx ++ paddw mm2, mm4 // added round values ++ cmp esi, edx + +- psrlw mm2, 8 +- packuswb mm2, mm7 ++ psrlw mm2, 8 ++ packuswb mm2, mm7 + +- movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09 +- jl horiz_line_4_5_loop ++ movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09 ++ jl horiz_line_4_5_loop + +-//Exit: +- movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07 +- movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 ++// Exit: ++ movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07 ++ movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 + +- movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 +- psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 ++ movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 ++ psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 + +- movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 +- pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 ++ movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 ++ pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 + +- psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 +- por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 ++ psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 ++ por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 + +- movq mm3, mm1 ++ movq mm3, mm1 + +- movd DWORD PTR [edi], mm0 // write output 00 xx xx xx +- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx ++ movd DWORD PTR [edi], mm0 // write output 00 xx xx xx ++ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx + +- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx +- pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 ++ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx ++ pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 + +- pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 +- punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx ++ pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 ++ punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx + +- movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx +- pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 ++ movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx ++ pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 + +- punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx +- pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 ++ punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx ++ pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 + +- paddw mm0, mm1 // added round values +- paddw mm0, mm4 ++ paddw mm0, mm1 // added round values ++ paddw mm0, mm4 + +- psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx +- packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx ++ psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx ++ packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx + +- movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 +- paddw mm2, mm3 // ++ movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 ++ paddw mm2, mm3 // + +- paddw mm2, mm4 // added round values +- psrlw mm2, 8 ++ paddw mm2, mm4 // added round values ++ psrlw mm2, 8 + +- packuswb mm2, mm7 +- movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09 ++ packuswb mm2, mm7 ++ movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09 + + +- } ++ } + } + + /**************************************************************************** +@@ -332,167 +328,165 @@ void horizontal_line_4_5_scale_mmx + static + void vertical_band_4_5_scale_mmx + ( +- unsigned char *dest, +- unsigned int dest_pitch, +- unsigned int dest_width +-) +-{ +- __asm +- { ++ unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width ++) { ++ __asm { + +- mov esi, dest // Get the source and destination pointer +- mov ecx, dest_pitch // Get the pitch size ++ mov esi, dest // Get the source and destination pointer ++ mov ecx, dest_pitch // Get the pitch size + +- lea edi, [esi+ecx*2] // tow lines below +- add edi, ecx // three lines below ++ lea edi, [esi+ecx*2] // tow lines below ++ add edi, ecx // three lines below + +- pxor mm7, mm7 // clear out mm7 +- mov edx, dest_width // Loop counter ++ pxor mm7, mm7 // clear out mm7 ++ mov edx, dest_width // Loop counter + +- vs_4_5_loop: ++ vs_4_5_loop: + +- movq mm0, QWORD ptr [esi] // src[0]; +- movq mm1, QWORD ptr [esi+ecx] // src[1]; ++ movq mm0, QWORD ptr [esi] // src[0]; ++ movq mm1, QWORD ptr [esi+ecx] // src[1]; + +- movq mm2, mm0 // Make a copy +- punpcklbw mm0, mm7 // unpack low to word ++ movq mm2, mm0 // Make a copy ++ punpcklbw mm0, mm7 // unpack low to word + +- movq mm5, one_fifth +- punpckhbw mm2, mm7 // unpack high to word ++ movq mm5, one_fifth ++ punpckhbw mm2, mm7 // unpack high to word + +- pmullw mm0, mm5 // a * 1/5 ++ pmullw mm0, mm5 // a * 1/5 + +- movq mm3, mm1 // make a copy +- punpcklbw mm1, mm7 // unpack low to word ++ movq mm3, mm1 // make a copy ++ punpcklbw mm1, mm7 // unpack low to word + +- pmullw mm2, mm5 // a * 1/5 +- movq mm6, four_fifths // constan ++ pmullw mm2, mm5 // a * 1/5 ++ movq mm6, four_fifths // constan + +- movq mm4, mm1 // copy of low b +- pmullw mm4, mm6 // b * 4/5 ++ movq mm4, mm1 // copy of low b ++ pmullw mm4, mm6 // b * 4/5 + +- punpckhbw mm3, mm7 // unpack high to word +- movq mm5, mm3 // copy of high b ++ punpckhbw mm3, mm7 // unpack high to word ++ movq mm5, mm3 // copy of high b + +- pmullw mm5, mm6 // b * 4/5 +- paddw mm0, mm4 // a * 1/5 + b * 4/5 ++ pmullw mm5, mm6 // b * 4/5 ++ paddw mm0, mm4 // a * 1/5 + b * 4/5 + +- paddw mm2, mm5 // a * 1/5 + b * 4/5 +- paddw mm0, round_values // + 128 ++ paddw mm2, mm5 // a * 1/5 + b * 4/5 ++ paddw mm0, round_values // + 128 + +- paddw mm2, round_values // + 128 +- psrlw mm0, 8 ++ paddw mm2, round_values // + 128 ++ psrlw mm0, 8 + +- psrlw mm2, 8 +- packuswb mm0, mm2 // des [1] ++ psrlw mm2, 8 ++ packuswb mm0, mm2 // des [1] + +- movq QWORD ptr [esi+ecx], mm0 // write des[1] +- movq mm0, [esi+ecx*2] // mm0 = src[2] ++ movq QWORD ptr [esi+ecx], mm0 // write des[1] ++ movq mm0, [esi+ecx*2] // mm0 = src[2] + +- // mm1, mm3 --- Src[1] +- // mm0 --- Src[2] +- // mm7 for unpacking ++ // mm1, mm3 --- Src[1] ++ // mm0 --- Src[2] ++ // mm7 for unpacking + +- movq mm5, two_fifths +- movq mm2, mm0 // make a copy ++ movq mm5, two_fifths ++ movq mm2, mm0 // make a copy + +- pmullw mm1, mm5 // b * 2/5 +- movq mm6, three_fifths ++ pmullw mm1, mm5 // b * 2/5 ++ movq mm6, three_fifths + + +- punpcklbw mm0, mm7 // unpack low to word +- pmullw mm3, mm5 // b * 2/5 ++ punpcklbw mm0, mm7 // unpack low to word ++ pmullw mm3, mm5 // b * 2/5 + +- movq mm4, mm0 // make copy of c +- punpckhbw mm2, mm7 // unpack high to word ++ movq mm4, mm0 // make copy of c ++ punpckhbw mm2, mm7 // unpack high to word + +- pmullw mm4, mm6 // c * 3/5 +- movq mm5, mm2 ++ pmullw mm4, mm6 // c * 3/5 ++ movq mm5, mm2 + +- pmullw mm5, mm6 // c * 3/5 +- paddw mm1, mm4 // b * 2/5 + c * 3/5 ++ pmullw mm5, mm6 // c * 3/5 ++ paddw mm1, mm4 // b * 2/5 + c * 3/5 + +- paddw mm3, mm5 // b * 2/5 + c * 3/5 +- paddw mm1, round_values // + 128 ++ paddw mm3, mm5 // b * 2/5 + c * 3/5 ++ paddw mm1, round_values // + 128 + +- paddw mm3, round_values // + 128 +- psrlw mm1, 8 ++ paddw mm3, round_values // + 128 ++ psrlw mm1, 8 + +- psrlw mm3, 8 +- packuswb mm1, mm3 // des[2] ++ psrlw mm3, 8 ++ packuswb mm1, mm3 // des[2] + +- movq QWORD ptr [esi+ecx*2], mm1 // write des[2] +- movq mm1, [edi] // mm1=Src[3]; ++ movq QWORD ptr [esi+ecx*2], mm1 // write des[2] ++ movq mm1, [edi] // mm1=Src[3]; + +- // mm0, mm2 --- Src[2] +- // mm1 --- Src[3] +- // mm6 --- 3/5 +- // mm7 for unpacking ++ // mm0, mm2 --- Src[2] ++ // mm1 --- Src[3] ++ // mm6 --- 3/5 ++ // mm7 for unpacking + +- pmullw mm0, mm6 // c * 3/5 +- movq mm5, two_fifths // mm5 = 2/5 ++ pmullw mm0, mm6 // c * 3/5 ++ movq mm5, two_fifths // mm5 = 2/5 + +- movq mm3, mm1 // make a copy +- pmullw mm2, mm6 // c * 3/5 ++ movq mm3, mm1 // make a copy ++ pmullw mm2, mm6 // c * 3/5 + +- punpcklbw mm1, mm7 // unpack low +- movq mm4, mm1 // make a copy ++ punpcklbw mm1, mm7 // unpack low ++ movq mm4, mm1 // make a copy + +- punpckhbw mm3, mm7 // unpack high +- pmullw mm4, mm5 // d * 2/5 ++ punpckhbw mm3, mm7 // unpack high ++ pmullw mm4, mm5 // d * 2/5 + +- movq mm6, mm3 // make a copy +- pmullw mm6, mm5 // d * 2/5 ++ movq mm6, mm3 // make a copy ++ pmullw mm6, mm5 // d * 2/5 + +- paddw mm0, mm4 // c * 3/5 + d * 2/5 +- paddw mm2, mm6 // c * 3/5 + d * 2/5 ++ paddw mm0, mm4 // c * 3/5 + d * 2/5 ++ paddw mm2, mm6 // c * 3/5 + d * 2/5 + +- paddw mm0, round_values // + 128 +- paddw mm2, round_values // + 128 ++ paddw mm0, round_values // + 128 ++ paddw mm2, round_values // + 128 + +- psrlw mm0, 8 +- psrlw mm2, 8 ++ psrlw mm0, 8 ++ psrlw mm2, 8 + +- packuswb mm0, mm2 // des[3] +- movq QWORD ptr [edi], mm0 // write des[3] ++ packuswb mm0, mm2 // des[3] ++ movq QWORD ptr [edi], mm0 // write des[3] + +- // mm1, mm3 --- Src[3] +- // mm7 -- cleared for unpacking ++ // mm1, mm3 --- Src[3] ++ // mm7 -- cleared for unpacking + +- movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group ++ movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group + +- movq mm5, four_fifths // mm5 = 4/5 +- pmullw mm1, mm5 // d * 4/5 ++ movq mm5, four_fifths // mm5 = 4/5 ++ pmullw mm1, mm5 // d * 4/5 + +- movq mm6, one_fifth // mm6 = 1/5 +- movq mm2, mm0 // make a copy ++ movq mm6, one_fifth // mm6 = 1/5 ++ movq mm2, mm0 // make a copy + +- pmullw mm3, mm5 // d * 4/5 +- punpcklbw mm0, mm7 // unpack low ++ pmullw mm3, mm5 // d * 4/5 ++ punpcklbw mm0, mm7 // unpack low + +- pmullw mm0, mm6 // an * 1/5 +- punpckhbw mm2, mm7 // unpack high ++ pmullw mm0, mm6 // an * 1/5 ++ punpckhbw mm2, mm7 // unpack high + +- paddw mm1, mm0 // d * 4/5 + an * 1/5 +- pmullw mm2, mm6 // an * 1/5 ++ paddw mm1, mm0 // d * 4/5 + an * 1/5 ++ pmullw mm2, mm6 // an * 1/5 + +- paddw mm3, mm2 // d * 4/5 + an * 1/5 +- paddw mm1, round_values // + 128 ++ paddw mm3, mm2 // d * 4/5 + an * 1/5 ++ paddw mm1, round_values // + 128 + +- paddw mm3, round_values // + 128 +- psrlw mm1, 8 ++ paddw mm3, round_values // + 128 ++ psrlw mm1, 8 + +- psrlw mm3, 8 +- packuswb mm1, mm3 // des[4] ++ psrlw mm3, 8 ++ packuswb mm1, mm3 // des[4] + +- movq QWORD ptr [edi+ecx], mm1 // write des[4] ++ movq QWORD ptr [edi+ecx], mm1 // write des[4] + +- add edi, 8 +- add esi, 8 ++ add edi, 8 ++ add esi, 8 + +- sub edx, 8 +- jg vs_4_5_loop +- } ++ sub edx, 8 ++ jg vs_4_5_loop ++ } + } + + /**************************************************************************** +@@ -517,139 +511,137 @@ void vertical_band_4_5_scale_mmx + static + void last_vertical_band_4_5_scale_mmx + ( +- unsigned char *dest, +- unsigned int dest_pitch, +- unsigned int dest_width +-) +-{ +- __asm +- { +- mov esi, dest // Get the source and destination pointer +- mov ecx, dest_pitch // Get the pitch size ++ unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width ++) { ++ __asm { ++ mov esi, dest // Get the source and destination pointer ++ mov ecx, dest_pitch // Get the pitch size + +- lea edi, [esi+ecx*2] // tow lines below +- add edi, ecx // three lines below ++ lea edi, [esi+ecx*2] // tow lines below ++ add edi, ecx // three lines below + +- pxor mm7, mm7 // clear out mm7 +- mov edx, dest_width // Loop counter ++ pxor mm7, mm7 // clear out mm7 ++ mov edx, dest_width // Loop counter + +- last_vs_4_5_loop: ++ last_vs_4_5_loop: + +- movq mm0, QWORD ptr [esi] // src[0]; +- movq mm1, QWORD ptr [esi+ecx] // src[1]; ++ movq mm0, QWORD ptr [esi] // src[0]; ++ movq mm1, QWORD ptr [esi+ecx] // src[1]; + +- movq mm2, mm0 // Make a copy +- punpcklbw mm0, mm7 // unpack low to word ++ movq mm2, mm0 // Make a copy ++ punpcklbw mm0, mm7 // unpack low to word + +- movq mm5, one_fifth +- punpckhbw mm2, mm7 // unpack high to word ++ movq mm5, one_fifth ++ punpckhbw mm2, mm7 // unpack high to word + +- pmullw mm0, mm5 // a * 1/5 ++ pmullw mm0, mm5 // a * 1/5 + +- movq mm3, mm1 // make a copy +- punpcklbw mm1, mm7 // unpack low to word ++ movq mm3, mm1 // make a copy ++ punpcklbw mm1, mm7 // unpack low to word + +- pmullw mm2, mm5 // a * 1/5 +- movq mm6, four_fifths // constan ++ pmullw mm2, mm5 // a * 1/5 ++ movq mm6, four_fifths // constan + +- movq mm4, mm1 // copy of low b +- pmullw mm4, mm6 // b * 4/5 ++ movq mm4, mm1 // copy of low b ++ pmullw mm4, mm6 // b * 4/5 + +- punpckhbw mm3, mm7 // unpack high to word +- movq mm5, mm3 // copy of high b ++ punpckhbw mm3, mm7 // unpack high to word ++ movq mm5, mm3 // copy of high b + +- pmullw mm5, mm6 // b * 4/5 +- paddw mm0, mm4 // a * 1/5 + b * 4/5 ++ pmullw mm5, mm6 // b * 4/5 ++ paddw mm0, mm4 // a * 1/5 + b * 4/5 + +- paddw mm2, mm5 // a * 1/5 + b * 4/5 +- paddw mm0, round_values // + 128 ++ paddw mm2, mm5 // a * 1/5 + b * 4/5 ++ paddw mm0, round_values // + 128 + +- paddw mm2, round_values // + 128 +- psrlw mm0, 8 ++ paddw mm2, round_values // + 128 ++ psrlw mm0, 8 + +- psrlw mm2, 8 +- packuswb mm0, mm2 // des [1] ++ psrlw mm2, 8 ++ packuswb mm0, mm2 // des [1] + +- movq QWORD ptr [esi+ecx], mm0 // write des[1] +- movq mm0, [esi+ecx*2] // mm0 = src[2] ++ movq QWORD ptr [esi+ecx], mm0 // write des[1] ++ movq mm0, [esi+ecx*2] // mm0 = src[2] + +- // mm1, mm3 --- Src[1] +- // mm0 --- Src[2] +- // mm7 for unpacking ++ // mm1, mm3 --- Src[1] ++ // mm0 --- Src[2] ++ // mm7 for unpacking + +- movq mm5, two_fifths +- movq mm2, mm0 // make a copy ++ movq mm5, two_fifths ++ movq mm2, mm0 // make a copy + +- pmullw mm1, mm5 // b * 2/5 +- movq mm6, three_fifths ++ pmullw mm1, mm5 // b * 2/5 ++ movq mm6, three_fifths + + +- punpcklbw mm0, mm7 // unpack low to word +- pmullw mm3, mm5 // b * 2/5 ++ punpcklbw mm0, mm7 // unpack low to word ++ pmullw mm3, mm5 // b * 2/5 + +- movq mm4, mm0 // make copy of c +- punpckhbw mm2, mm7 // unpack high to word ++ movq mm4, mm0 // make copy of c ++ punpckhbw mm2, mm7 // unpack high to word + +- pmullw mm4, mm6 // c * 3/5 +- movq mm5, mm2 ++ pmullw mm4, mm6 // c * 3/5 ++ movq mm5, mm2 + +- pmullw mm5, mm6 // c * 3/5 +- paddw mm1, mm4 // b * 2/5 + c * 3/5 ++ pmullw mm5, mm6 // c * 3/5 ++ paddw mm1, mm4 // b * 2/5 + c * 3/5 + +- paddw mm3, mm5 // b * 2/5 + c * 3/5 +- paddw mm1, round_values // + 128 ++ paddw mm3, mm5 // b * 2/5 + c * 3/5 ++ paddw mm1, round_values // + 128 + +- paddw mm3, round_values // + 128 +- psrlw mm1, 8 ++ paddw mm3, round_values // + 128 ++ psrlw mm1, 8 + +- psrlw mm3, 8 +- packuswb mm1, mm3 // des[2] ++ psrlw mm3, 8 ++ packuswb mm1, mm3 // des[2] + +- movq QWORD ptr [esi+ecx*2], mm1 // write des[2] +- movq mm1, [edi] // mm1=Src[3]; ++ movq QWORD ptr [esi+ecx*2], mm1 // write des[2] ++ movq mm1, [edi] // mm1=Src[3]; + +- movq QWORD ptr [edi+ecx], mm1 // write des[4]; ++ movq QWORD ptr [edi+ecx], mm1 // write des[4]; + +- // mm0, mm2 --- Src[2] +- // mm1 --- Src[3] +- // mm6 --- 3/5 +- // mm7 for unpacking ++ // mm0, mm2 --- Src[2] ++ // mm1 --- Src[3] ++ // mm6 --- 3/5 ++ // mm7 for unpacking + +- pmullw mm0, mm6 // c * 3/5 +- movq mm5, two_fifths // mm5 = 2/5 ++ pmullw mm0, mm6 // c * 3/5 ++ movq mm5, two_fifths // mm5 = 2/5 + +- movq mm3, mm1 // make a copy +- pmullw mm2, mm6 // c * 3/5 ++ movq mm3, mm1 // make a copy ++ pmullw mm2, mm6 // c * 3/5 + +- punpcklbw mm1, mm7 // unpack low +- movq mm4, mm1 // make a copy ++ punpcklbw mm1, mm7 // unpack low ++ movq mm4, mm1 // make a copy + +- punpckhbw mm3, mm7 // unpack high +- pmullw mm4, mm5 // d * 2/5 ++ punpckhbw mm3, mm7 // unpack high ++ pmullw mm4, mm5 // d * 2/5 + +- movq mm6, mm3 // make a copy +- pmullw mm6, mm5 // d * 2/5 ++ movq mm6, mm3 // make a copy ++ pmullw mm6, mm5 // d * 2/5 + +- paddw mm0, mm4 // c * 3/5 + d * 2/5 +- paddw mm2, mm6 // c * 3/5 + d * 2/5 ++ paddw mm0, mm4 // c * 3/5 + d * 2/5 ++ paddw mm2, mm6 // c * 3/5 + d * 2/5 + +- paddw mm0, round_values // + 128 +- paddw mm2, round_values // + 128 ++ paddw mm0, round_values // + 128 ++ paddw mm2, round_values // + 128 + +- psrlw mm0, 8 +- psrlw mm2, 8 ++ psrlw mm0, 8 ++ psrlw mm2, 8 + +- packuswb mm0, mm2 // des[3] +- movq QWORD ptr [edi], mm0 // write des[3] ++ packuswb mm0, mm2 // des[3] ++ movq QWORD ptr [edi], mm0 // write des[3] + +- // mm1, mm3 --- Src[3] +- // mm7 -- cleared for unpacking +- add edi, 8 +- add esi, 8 ++ // mm1, mm3 --- Src[3] ++ // mm7 -- cleared for unpacking ++ add edi, 8 ++ add esi, 8 + +- sub edx, 8 +- jg last_vs_4_5_loop +- } ++ sub edx, 8 ++ jg last_vs_4_5_loop ++ } + } + + /**************************************************************************** +@@ -674,153 +666,151 @@ void last_vertical_band_4_5_scale_mmx + static + void vertical_band_3_5_scale_mmx + ( +- unsigned char *dest, +- unsigned int dest_pitch, +- unsigned int dest_width +-) +-{ +- __asm +- { +- mov esi, dest // Get the source and destination pointer +- mov ecx, dest_pitch // Get the pitch size ++ unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width ++) { ++ __asm { ++ mov esi, dest // Get the source and destination pointer ++ mov ecx, dest_pitch // Get the pitch size + +- lea edi, [esi+ecx*2] // tow lines below +- add edi, ecx // three lines below ++ lea edi, [esi+ecx*2] // tow lines below ++ add edi, ecx // three lines below + +- pxor mm7, mm7 // clear out mm7 +- mov edx, dest_width // Loop counter ++ pxor mm7, mm7 // clear out mm7 ++ mov edx, dest_width // Loop counter + +- vs_3_5_loop: ++ vs_3_5_loop: + +- movq mm0, QWORD ptr [esi] // src[0]; +- movq mm1, QWORD ptr [esi+ecx] // src[1]; ++ movq mm0, QWORD ptr [esi] // src[0]; ++ movq mm1, QWORD ptr [esi+ecx] // src[1]; + +- movq mm2, mm0 // Make a copy +- punpcklbw mm0, mm7 // unpack low to word ++ movq mm2, mm0 // Make a copy ++ punpcklbw mm0, mm7 // unpack low to word + +- movq mm5, two_fifths // mm5 = 2/5 +- punpckhbw mm2, mm7 // unpack high to word ++ movq mm5, two_fifths // mm5 = 2/5 ++ punpckhbw mm2, mm7 // unpack high to word + +- pmullw mm0, mm5 // a * 2/5 ++ pmullw mm0, mm5 // a * 2/5 + +- movq mm3, mm1 // make a copy +- punpcklbw mm1, mm7 // unpack low to word ++ movq mm3, mm1 // make a copy ++ punpcklbw mm1, mm7 // unpack low to word + +- pmullw mm2, mm5 // a * 2/5 +- movq mm6, three_fifths // mm6 = 3/5 ++ pmullw mm2, mm5 // a * 2/5 ++ movq mm6, three_fifths // mm6 = 3/5 + +- movq mm4, mm1 // copy of low b +- pmullw mm4, mm6 // b * 3/5 ++ movq mm4, mm1 // copy of low b ++ pmullw mm4, mm6 // b * 3/5 + +- punpckhbw mm3, mm7 // unpack high to word +- movq mm5, mm3 // copy of high b ++ punpckhbw mm3, mm7 // unpack high to word ++ movq mm5, mm3 // copy of high b + +- pmullw mm5, mm6 // b * 3/5 +- paddw mm0, mm4 // a * 2/5 + b * 3/5 ++ pmullw mm5, mm6 // b * 3/5 ++ paddw mm0, mm4 // a * 2/5 + b * 3/5 + +- paddw mm2, mm5 // a * 2/5 + b * 3/5 +- paddw mm0, round_values // + 128 ++ paddw mm2, mm5 // a * 2/5 + b * 3/5 ++ paddw mm0, round_values // + 128 + +- paddw mm2, round_values // + 128 +- psrlw mm0, 8 ++ paddw mm2, round_values // + 128 ++ psrlw mm0, 8 + +- psrlw mm2, 8 +- packuswb mm0, mm2 // des [1] ++ psrlw mm2, 8 ++ packuswb mm0, mm2 // des [1] + +- movq QWORD ptr [esi+ecx], mm0 // write des[1] +- movq mm0, [esi+ecx*2] // mm0 = src[2] ++ movq QWORD ptr [esi+ecx], mm0 // write des[1] ++ movq mm0, [esi+ecx*2] // mm0 = src[2] + +- // mm1, mm3 --- Src[1] +- // mm0 --- Src[2] +- // mm7 for unpacking ++ // mm1, mm3 --- Src[1] ++ // mm0 --- Src[2] ++ // mm7 for unpacking + +- movq mm4, mm1 // b low +- pmullw mm1, four_fifths // b * 4/5 low ++ movq mm4, mm1 // b low ++ pmullw mm1, four_fifths // b * 4/5 low + +- movq mm5, mm3 // b high +- pmullw mm3, four_fifths // b * 4/5 high ++ movq mm5, mm3 // b high ++ pmullw mm3, four_fifths // b * 4/5 high + +- movq mm2, mm0 // c +- pmullw mm4, one_fifth // b * 1/5 ++ movq mm2, mm0 // c ++ pmullw mm4, one_fifth // b * 1/5 + +- punpcklbw mm0, mm7 // c low +- pmullw mm5, one_fifth // b * 1/5 ++ punpcklbw mm0, mm7 // c low ++ pmullw mm5, one_fifth // b * 1/5 + +- movq mm6, mm0 // make copy of c low +- punpckhbw mm2, mm7 // c high ++ movq mm6, mm0 // make copy of c low ++ punpckhbw mm2, mm7 // c high + +- pmullw mm6, one_fifth // c * 1/5 low +- movq mm7, mm2 // make copy of c high ++ pmullw mm6, one_fifth // c * 1/5 low ++ movq mm7, mm2 // make copy of c high + +- pmullw mm7, one_fifth // c * 1/5 high +- paddw mm1, mm6 // b * 4/5 + c * 1/5 low ++ pmullw mm7, one_fifth // c * 1/5 high ++ paddw mm1, mm6 // b * 4/5 + c * 1/5 low + +- paddw mm3, mm7 // b * 4/5 + c * 1/5 high +- movq mm6, mm0 // make copy of c low ++ paddw mm3, mm7 // b * 4/5 + c * 1/5 high ++ movq mm6, mm0 // make copy of c low + +- pmullw mm6, four_fifths // c * 4/5 low +- movq mm7, mm2 // make copy of c high ++ pmullw mm6, four_fifths // c * 4/5 low ++ movq mm7, mm2 // make copy of c high + +- pmullw mm7, four_fifths // c * 4/5 high ++ pmullw mm7, four_fifths // c * 4/5 high + +- paddw mm4, mm6 // b * 1/5 + c * 4/5 low +- paddw mm5, mm7 // b * 1/5 + c * 4/5 high ++ paddw mm4, mm6 // b * 1/5 + c * 4/5 low ++ paddw mm5, mm7 // b * 1/5 + c * 4/5 high + +- paddw mm1, round_values // + 128 +- paddw mm3, round_values // + 128 ++ paddw mm1, round_values // + 128 ++ paddw mm3, round_values // + 128 + +- psrlw mm1, 8 +- psrlw mm3, 8 ++ psrlw mm1, 8 ++ psrlw mm3, 8 + +- packuswb mm1, mm3 // des[2] +- movq QWORD ptr [esi+ecx*2], mm1 // write des[2] ++ packuswb mm1, mm3 // des[2] ++ movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + +- paddw mm4, round_values // + 128 +- paddw mm5, round_values // + 128 ++ paddw mm4, round_values // + 128 ++ paddw mm5, round_values // + 128 + +- psrlw mm4, 8 +- psrlw mm5, 8 ++ psrlw mm4, 8 ++ psrlw mm5, 8 + +- packuswb mm4, mm5 // des[3] +- movq QWORD ptr [edi], mm4 // write des[3] ++ packuswb mm4, mm5 // des[3] ++ movq QWORD ptr [edi], mm4 // write des[3] + +- // mm0, mm2 --- Src[3] ++ // mm0, mm2 --- Src[3] + +- pxor mm7, mm7 // clear mm7 for unpacking +- movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group ++ pxor mm7, mm7 // clear mm7 for unpacking ++ movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group + +- movq mm5, three_fifths // mm5 = 3/5 +- pmullw mm0, mm5 // d * 3/5 ++ movq mm5, three_fifths // mm5 = 3/5 ++ pmullw mm0, mm5 // d * 3/5 + +- movq mm6, two_fifths // mm6 = 2/5 +- movq mm3, mm1 // make a copy ++ movq mm6, two_fifths // mm6 = 2/5 ++ movq mm3, mm1 // make a copy + +- pmullw mm2, mm5 // d * 3/5 +- punpcklbw mm1, mm7 // unpack low ++ pmullw mm2, mm5 // d * 3/5 ++ punpcklbw mm1, mm7 // unpack low + +- pmullw mm1, mm6 // an * 2/5 +- punpckhbw mm3, mm7 // unpack high ++ pmullw mm1, mm6 // an * 2/5 ++ punpckhbw mm3, mm7 // unpack high + +- paddw mm0, mm1 // d * 3/5 + an * 2/5 +- pmullw mm3, mm6 // an * 2/5 ++ paddw mm0, mm1 // d * 3/5 + an * 2/5 ++ pmullw mm3, mm6 // an * 2/5 + +- paddw mm2, mm3 // d * 3/5 + an * 2/5 +- paddw mm0, round_values // + 128 ++ paddw mm2, mm3 // d * 3/5 + an * 2/5 ++ paddw mm0, round_values // + 128 + +- paddw mm2, round_values // + 128 +- psrlw mm0, 8 ++ paddw mm2, round_values // + 128 ++ psrlw mm0, 8 + +- psrlw mm2, 8 +- packuswb mm0, mm2 // des[4] ++ psrlw mm2, 8 ++ packuswb mm0, mm2 // des[4] + +- movq QWORD ptr [edi+ecx], mm0 // write des[4] ++ movq QWORD ptr [edi+ecx], mm0 // write des[4] + +- add edi, 8 +- add esi, 8 ++ add edi, 8 ++ add esi, 8 + +- sub edx, 8 +- jg vs_3_5_loop +- } ++ sub edx, 8 ++ jg vs_3_5_loop ++ } + } + + /**************************************************************************** +@@ -845,129 +835,127 @@ void vertical_band_3_5_scale_mmx + static + void last_vertical_band_3_5_scale_mmx + ( +- unsigned char *dest, +- unsigned int dest_pitch, +- unsigned int dest_width +-) +-{ +- __asm +- { +- mov esi, dest // Get the source and destination pointer +- mov ecx, dest_pitch // Get the pitch size ++ unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width ++) { ++ __asm { ++ mov esi, dest // Get the source and destination pointer ++ mov ecx, dest_pitch // Get the pitch size + +- lea edi, [esi+ecx*2] // tow lines below +- add edi, ecx // three lines below ++ lea edi, [esi+ecx*2] // tow lines below ++ add edi, ecx // three lines below + +- pxor mm7, mm7 // clear out mm7 +- mov edx, dest_width // Loop counter ++ pxor mm7, mm7 // clear out mm7 ++ mov edx, dest_width // Loop counter + + +- last_vs_3_5_loop: ++ last_vs_3_5_loop: + +- movq mm0, QWORD ptr [esi] // src[0]; +- movq mm1, QWORD ptr [esi+ecx] // src[1]; ++ movq mm0, QWORD ptr [esi] // src[0]; ++ movq mm1, QWORD ptr [esi+ecx] // src[1]; + +- movq mm2, mm0 // Make a copy +- punpcklbw mm0, mm7 // unpack low to word ++ movq mm2, mm0 // Make a copy ++ punpcklbw mm0, mm7 // unpack low to word + +- movq mm5, two_fifths // mm5 = 2/5 +- punpckhbw mm2, mm7 // unpack high to word ++ movq mm5, two_fifths // mm5 = 2/5 ++ punpckhbw mm2, mm7 // unpack high to word + +- pmullw mm0, mm5 // a * 2/5 ++ pmullw mm0, mm5 // a * 2/5 + +- movq mm3, mm1 // make a copy +- punpcklbw mm1, mm7 // unpack low to word ++ movq mm3, mm1 // make a copy ++ punpcklbw mm1, mm7 // unpack low to word + +- pmullw mm2, mm5 // a * 2/5 +- movq mm6, three_fifths // mm6 = 3/5 ++ pmullw mm2, mm5 // a * 2/5 ++ movq mm6, three_fifths // mm6 = 3/5 + +- movq mm4, mm1 // copy of low b +- pmullw mm4, mm6 // b * 3/5 ++ movq mm4, mm1 // copy of low b ++ pmullw mm4, mm6 // b * 3/5 + +- punpckhbw mm3, mm7 // unpack high to word +- movq mm5, mm3 // copy of high b ++ punpckhbw mm3, mm7 // unpack high to word ++ movq mm5, mm3 // copy of high b + +- pmullw mm5, mm6 // b * 3/5 +- paddw mm0, mm4 // a * 2/5 + b * 3/5 ++ pmullw mm5, mm6 // b * 3/5 ++ paddw mm0, mm4 // a * 2/5 + b * 3/5 + +- paddw mm2, mm5 // a * 2/5 + b * 3/5 +- paddw mm0, round_values // + 128 ++ paddw mm2, mm5 // a * 2/5 + b * 3/5 ++ paddw mm0, round_values // + 128 + +- paddw mm2, round_values // + 128 +- psrlw mm0, 8 ++ paddw mm2, round_values // + 128 ++ psrlw mm0, 8 + +- psrlw mm2, 8 +- packuswb mm0, mm2 // des [1] ++ psrlw mm2, 8 ++ packuswb mm0, mm2 // des [1] + +- movq QWORD ptr [esi+ecx], mm0 // write des[1] +- movq mm0, [esi+ecx*2] // mm0 = src[2] ++ movq QWORD ptr [esi+ecx], mm0 // write des[1] ++ movq mm0, [esi+ecx*2] // mm0 = src[2] + + + +- // mm1, mm3 --- Src[1] +- // mm0 --- Src[2] +- // mm7 for unpacking ++ // mm1, mm3 --- Src[1] ++ // mm0 --- Src[2] ++ // mm7 for unpacking + +- movq mm4, mm1 // b low +- pmullw mm1, four_fifths // b * 4/5 low ++ movq mm4, mm1 // b low ++ pmullw mm1, four_fifths // b * 4/5 low + +- movq QWORD ptr [edi+ecx], mm0 // write des[4] ++ movq QWORD ptr [edi+ecx], mm0 // write des[4] + +- movq mm5, mm3 // b high +- pmullw mm3, four_fifths // b * 4/5 high ++ movq mm5, mm3 // b high ++ pmullw mm3, four_fifths // b * 4/5 high + +- movq mm2, mm0 // c +- pmullw mm4, one_fifth // b * 1/5 ++ movq mm2, mm0 // c ++ pmullw mm4, one_fifth // b * 1/5 + +- punpcklbw mm0, mm7 // c low +- pmullw mm5, one_fifth // b * 1/5 ++ punpcklbw mm0, mm7 // c low ++ pmullw mm5, one_fifth // b * 1/5 + +- movq mm6, mm0 // make copy of c low +- punpckhbw mm2, mm7 // c high ++ movq mm6, mm0 // make copy of c low ++ punpckhbw mm2, mm7 // c high + +- pmullw mm6, one_fifth // c * 1/5 low +- movq mm7, mm2 // make copy of c high ++ pmullw mm6, one_fifth // c * 1/5 low ++ movq mm7, mm2 // make copy of c high + +- pmullw mm7, one_fifth // c * 1/5 high +- paddw mm1, mm6 // b * 4/5 + c * 1/5 low ++ pmullw mm7, one_fifth // c * 1/5 high ++ paddw mm1, mm6 // b * 4/5 + c * 1/5 low + +- paddw mm3, mm7 // b * 4/5 + c * 1/5 high +- movq mm6, mm0 // make copy of c low ++ paddw mm3, mm7 // b * 4/5 + c * 1/5 high ++ movq mm6, mm0 // make copy of c low + +- pmullw mm6, four_fifths // c * 4/5 low +- movq mm7, mm2 // make copy of c high ++ pmullw mm6, four_fifths // c * 4/5 low ++ movq mm7, mm2 // make copy of c high + +- pmullw mm7, four_fifths // c * 4/5 high ++ pmullw mm7, four_fifths // c * 4/5 high + +- paddw mm4, mm6 // b * 1/5 + c * 4/5 low +- paddw mm5, mm7 // b * 1/5 + c * 4/5 high ++ paddw mm4, mm6 // b * 1/5 + c * 4/5 low ++ paddw mm5, mm7 // b * 1/5 + c * 4/5 high + +- paddw mm1, round_values // + 128 +- paddw mm3, round_values // + 128 ++ paddw mm1, round_values // + 128 ++ paddw mm3, round_values // + 128 + +- psrlw mm1, 8 +- psrlw mm3, 8 ++ psrlw mm1, 8 ++ psrlw mm3, 8 + +- packuswb mm1, mm3 // des[2] +- movq QWORD ptr [esi+ecx*2], mm1 // write des[2] ++ packuswb mm1, mm3 // des[2] ++ movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + +- paddw mm4, round_values // + 128 +- paddw mm5, round_values // + 128 ++ paddw mm4, round_values // + 128 ++ paddw mm5, round_values // + 128 + +- psrlw mm4, 8 +- psrlw mm5, 8 ++ psrlw mm4, 8 ++ psrlw mm5, 8 + +- packuswb mm4, mm5 // des[3] +- movq QWORD ptr [edi], mm4 // write des[3] ++ packuswb mm4, mm5 // des[3] ++ movq QWORD ptr [edi], mm4 // write des[3] + +- // mm0, mm2 --- Src[3] ++ // mm0, mm2 --- Src[3] + +- add edi, 8 +- add esi, 8 ++ add edi, 8 ++ add esi, 8 + +- sub edx, 8 +- jg last_vs_3_5_loop +- } ++ sub edx, 8 ++ jg last_vs_3_5_loop ++ } + } + + /**************************************************************************** +@@ -992,52 +980,50 @@ void last_vertical_band_3_5_scale_mmx + static + void vertical_band_1_2_scale_mmx + ( +- unsigned char *dest, +- unsigned int dest_pitch, +- unsigned int dest_width +-) +-{ +- __asm +- { ++ unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width ++) { ++ __asm { + +- mov esi, dest // Get the source and destination pointer +- mov ecx, dest_pitch // Get the pitch size ++ mov esi, dest // Get the source and destination pointer ++ mov ecx, dest_pitch // Get the pitch size + +- pxor mm7, mm7 // clear out mm7 +- mov edx, dest_width // Loop counter ++ pxor mm7, mm7 // clear out mm7 ++ mov edx, dest_width // Loop counter + +- vs_1_2_loop: ++ vs_1_2_loop: + +- movq mm0, [esi] // get Src[0] +- movq mm1, [esi + ecx * 2] // get Src[1] ++ movq mm0, [esi] // get Src[0] ++ movq mm1, [esi + ecx * 2] // get Src[1] + +- movq mm2, mm0 // make copy before unpack +- movq mm3, mm1 // make copy before unpack ++ movq mm2, mm0 // make copy before unpack ++ movq mm3, mm1 // make copy before unpack + +- punpcklbw mm0, mm7 // low Src[0] +- movq mm6, four_ones // mm6= 1, 1, 1, 1 ++ punpcklbw mm0, mm7 // low Src[0] ++ movq mm6, four_ones // mm6= 1, 1, 1, 1 + +- punpcklbw mm1, mm7 // low Src[1] +- paddw mm0, mm1 // low (a + b) ++ punpcklbw mm1, mm7 // low Src[1] ++ paddw mm0, mm1 // low (a + b) + +- punpckhbw mm2, mm7 // high Src[0] +- paddw mm0, mm6 // low (a + b + 1) ++ punpckhbw mm2, mm7 // high Src[0] ++ paddw mm0, mm6 // low (a + b + 1) + +- punpckhbw mm3, mm7 +- paddw mm2, mm3 // high (a + b ) ++ punpckhbw mm3, mm7 ++ paddw mm2, mm3 // high (a + b ) + +- psraw mm0, 1 // low (a + b +1 )/2 +- paddw mm2, mm6 // high (a + b + 1) ++ psraw mm0, 1 // low (a + b +1 )/2 ++ paddw mm2, mm6 // high (a + b + 1) + +- psraw mm2, 1 // high (a + b + 1)/2 +- packuswb mm0, mm2 // pack results ++ psraw mm2, 1 // high (a + b + 1)/2 ++ packuswb mm0, mm2 // pack results + +- movq [esi+ecx], mm0 // write out eight bytes +- add esi, 8 ++ movq [esi+ecx], mm0 // write out eight bytes ++ add esi, 8 + +- sub edx, 8 +- jg vs_1_2_loop +- } ++ sub edx, 8 ++ jg vs_1_2_loop ++ } + + } + +@@ -1063,28 +1049,26 @@ void vertical_band_1_2_scale_mmx + static + void last_vertical_band_1_2_scale_mmx + ( +- unsigned char *dest, +- unsigned int dest_pitch, +- unsigned int dest_width +-) +-{ +- __asm +- { +- mov esi, dest // Get the source and destination pointer +- mov ecx, dest_pitch // Get the pitch size ++ unsigned char *dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width ++) { ++ __asm { ++ mov esi, dest // Get the source and destination pointer ++ mov ecx, dest_pitch // Get the pitch size + +- mov edx, dest_width // Loop counter ++ mov edx, dest_width // Loop counter + +- last_vs_1_2_loop: ++ last_vs_1_2_loop: + +- movq mm0, [esi] // get Src[0] +- movq [esi+ecx], mm0 // write out eight bytes ++ movq mm0, [esi] // get Src[0] ++ movq [esi+ecx], mm0 // write out eight bytes + +- add esi, 8 +- sub edx, 8 ++ add esi, 8 ++ sub edx, 8 + +- jg last_vs_1_2_loop +- } ++ jg last_vs_1_2_loop ++ } + } + + /**************************************************************************** +@@ -1108,106 +1092,104 @@ void last_vertical_band_1_2_scale_mmx + static + void horizontal_line_1_2_scale_mmx + ( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- (void) dest_width; ++ const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width ++) { ++ (void) dest_width; + +- __asm +- { +- mov esi, source +- mov edi, dest ++ __asm { ++ mov esi, source ++ mov edi, dest + +- pxor mm7, mm7 +- movq mm6, four_ones ++ pxor mm7, mm7 ++ movq mm6, four_ones + +- mov ecx, source_width ++ mov ecx, source_width + +- hs_1_2_loop: ++ hs_1_2_loop: + +- movq mm0, [esi] +- movq mm1, [esi+1] ++ movq mm0, [esi] ++ movq mm1, [esi+1] + +- movq mm2, mm0 +- movq mm3, mm1 ++ movq mm2, mm0 ++ movq mm3, mm1 + +- movq mm4, mm0 +- punpcklbw mm0, mm7 ++ movq mm4, mm0 ++ punpcklbw mm0, mm7 + +- punpcklbw mm1, mm7 +- paddw mm0, mm1 ++ punpcklbw mm1, mm7 ++ paddw mm0, mm1 + +- paddw mm0, mm6 +- punpckhbw mm2, mm7 ++ paddw mm0, mm6 ++ punpckhbw mm2, mm7 + +- punpckhbw mm3, mm7 +- paddw mm2, mm3 ++ punpckhbw mm3, mm7 ++ paddw mm2, mm3 + +- paddw mm2, mm6 +- psraw mm0, 1 ++ paddw mm2, mm6 ++ psraw mm0, 1 + +- psraw mm2, 1 +- packuswb mm0, mm2 ++ psraw mm2, 1 ++ packuswb mm0, mm2 + +- movq mm2, mm4 +- punpcklbw mm2, mm0 ++ movq mm2, mm4 ++ punpcklbw mm2, mm0 + +- movq [edi], mm2 +- punpckhbw mm4, mm0 ++ movq [edi], mm2 ++ punpckhbw mm4, mm0 + +- movq [edi+8], mm4 +- add esi, 8 ++ movq [edi+8], mm4 ++ add esi, 8 + +- add edi, 16 +- sub ecx, 8 ++ add edi, 16 ++ sub ecx, 8 + +- cmp ecx, 8 +- jg hs_1_2_loop ++ cmp ecx, 8 ++ jg hs_1_2_loop + + // last eight pixel + +- movq mm0, [esi] +- movq mm1, mm0 ++ movq mm0, [esi] ++ movq mm1, mm0 + +- movq mm2, mm0 +- movq mm3, mm1 ++ movq mm2, mm0 ++ movq mm3, mm1 + +- psrlq mm1, 8 +- psrlq mm3, 56 ++ psrlq mm1, 8 ++ psrlq mm3, 56 + +- psllq mm3, 56 +- por mm1, mm3 ++ psllq mm3, 56 ++ por mm1, mm3 + +- movq mm3, mm1 +- movq mm4, mm0 ++ movq mm3, mm1 ++ movq mm4, mm0 + +- punpcklbw mm0, mm7 +- punpcklbw mm1, mm7 ++ punpcklbw mm0, mm7 ++ punpcklbw mm1, mm7 + +- paddw mm0, mm1 +- paddw mm0, mm6 ++ paddw mm0, mm1 ++ paddw mm0, mm6 + +- punpckhbw mm2, mm7 +- punpckhbw mm3, mm7 ++ punpckhbw mm2, mm7 ++ punpckhbw mm3, mm7 + +- paddw mm2, mm3 +- paddw mm2, mm6 ++ paddw mm2, mm3 ++ paddw mm2, mm6 + +- psraw mm0, 1 +- psraw mm2, 1 ++ psraw mm0, 1 ++ psraw mm2, 1 + +- packuswb mm0, mm2 +- movq mm2, mm4 ++ packuswb mm0, mm2 ++ movq mm2, mm4 + +- punpcklbw mm2, mm0 +- movq [edi], mm2 ++ punpcklbw mm2, mm0 ++ movq [edi], mm2 + +- punpckhbw mm4, mm0 +- movq [edi+8], mm4 +- } ++ punpckhbw mm4, mm0 ++ movq [edi+8], mm4 ++ } + } + + +@@ -1240,86 +1222,84 @@ __declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128, + static + void horizontal_line_5_4_scale_mmx + ( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- /* +- unsigned i; +- unsigned int a, b, c, d, e; +- unsigned char *des = dest; +- const unsigned char *src = source; ++ const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width ++) { ++ /* ++ unsigned i; ++ unsigned int a, b, c, d, e; ++ unsigned char *des = dest; ++ const unsigned char *src = source; + +- (void) dest_width; ++ (void) dest_width; + +- for ( i=0; i>8); +- des[2] = ((c*128 + d*128 + 128)>>8); +- des[3] = ((d* 64 + e*192 + 128)>>8); ++ des[0] = a; ++ des[1] = ((b*192 + c* 64 + 128)>>8); ++ des[2] = ((c*128 + d*128 + 128)>>8); ++ des[3] = ((d* 64 + e*192 + 128)>>8); + +- src += 5; +- des += 4; +- } +- */ +- (void) dest_width; ++ src += 5; ++ des += 4; ++ } ++ */ ++ (void) dest_width; + +- __asm +- { ++ __asm { + +- mov esi, source ; +- mov edi, dest ; ++ mov esi, source; ++ mov edi, dest; + +- mov ecx, source_width ; +- movq mm5, const54_1 ; ++ mov ecx, source_width; ++ movq mm5, const54_1; + +- pxor mm7, mm7 ; +- movq mm6, const54_2 ; ++ pxor mm7, mm7; ++ movq mm6, const54_2; + +- movq mm4, round_values ; +- lea edx, [esi+ecx] ; +- horizontal_line_5_4_loop: ++ movq mm4, round_values; ++ lea edx, [esi+ecx]; ++ horizontal_line_5_4_loop: + +- movq mm0, QWORD PTR [esi] ; +- 00 01 02 03 04 05 06 07 +- movq mm1, mm0 ; +- 00 01 02 03 04 05 06 07 ++ movq mm0, QWORD PTR [esi]; ++ 00 01 02 03 04 05 06 07 ++ movq mm1, mm0; ++ 00 01 02 03 04 05 06 07 + +- psrlq mm0, 8 ; +- 01 02 03 04 05 06 07 xx +- punpcklbw mm1, mm7 ; +- xx 00 xx 01 xx 02 xx 03 ++ psrlq mm0, 8; ++ 01 02 03 04 05 06 07 xx ++ punpcklbw mm1, mm7; ++ xx 00 xx 01 xx 02 xx 03 + +- punpcklbw mm0, mm7 ; +- xx 01 xx 02 xx 03 xx 04 +- pmullw mm1, mm5 ++ punpcklbw mm0, mm7; ++ xx 01 xx 02 xx 03 xx 04 ++ pmullw mm1, mm5 + +- pmullw mm0, mm6 +- add esi, 5 ++ pmullw mm0, mm6 ++ add esi, 5 + +- add edi, 4 +- paddw mm1, mm0 ++ add edi, 4 ++ paddw mm1, mm0 + +- paddw mm1, mm4 +- psrlw mm1, 8 ++ paddw mm1, mm4 ++ psrlw mm1, 8 + +- cmp esi, edx +- packuswb mm1, mm7 ++ cmp esi, edx ++ packuswb mm1, mm7 + +- movd DWORD PTR [edi-4], mm1 ++ movd DWORD PTR [edi-4], mm1 + +- jl horizontal_line_5_4_loop ++ jl horizontal_line_5_4_loop + +- } ++ } + + } + __declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64, 64, 64 }; +@@ -1327,86 +1307,84 @@ __declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128, + __declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 }; + + static +-void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ ++void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + +- __asm +- { +- push ebx ++ __asm { ++ push ebx + +- mov esi, source // Get the source and destination pointer +- mov ecx, src_pitch // Get the pitch size ++ mov esi, source // Get the source and destination pointer ++ mov ecx, src_pitch // Get the pitch size + +- mov edi, dest // tow lines below +- pxor mm7, mm7 // clear out mm7 ++ mov edi, dest // tow lines below ++ pxor mm7, mm7 // clear out mm7 + +- mov edx, dest_pitch // Loop counter +- mov ebx, dest_width ++ mov edx, dest_pitch // Loop counter ++ mov ebx, dest_width + +- vs_5_4_loop: ++ vs_5_4_loop: + +- movd mm0, DWORD ptr [esi] // src[0]; +- movd mm1, DWORD ptr [esi+ecx] // src[1]; ++ movd mm0, DWORD ptr [esi] // src[0]; ++ movd mm1, DWORD ptr [esi+ecx] // src[1]; + +- movd mm2, DWORD ptr [esi+ecx*2] +- lea eax, [esi+ecx*2] // ++ movd mm2, DWORD ptr [esi+ecx*2] ++ lea eax, [esi+ecx*2] // + +- punpcklbw mm1, mm7 +- punpcklbw mm2, mm7 ++ punpcklbw mm1, mm7 ++ punpcklbw mm2, mm7 + +- movq mm3, mm2 +- pmullw mm1, three_fourths ++ movq mm3, mm2 ++ pmullw mm1, three_fourths + +- pmullw mm2, one_fourths +- movd mm4, [eax+ecx] ++ pmullw mm2, one_fourths ++ movd mm4, [eax+ecx] + +- pmullw mm3, two_fourths +- punpcklbw mm4, mm7 ++ pmullw mm3, two_fourths ++ punpcklbw mm4, mm7 + +- movq mm5, mm4 +- pmullw mm4, two_fourths ++ movq mm5, mm4 ++ pmullw mm4, two_fourths + +- paddw mm1, mm2 +- movd mm6, [eax+ecx*2] ++ paddw mm1, mm2 ++ movd mm6, [eax+ecx*2] + +- pmullw mm5, one_fourths +- paddw mm1, round_values; ++ pmullw mm5, one_fourths ++ paddw mm1, round_values; + +- paddw mm3, mm4 +- psrlw mm1, 8 ++ paddw mm3, mm4 ++ psrlw mm1, 8 + +- punpcklbw mm6, mm7 +- paddw mm3, round_values ++ punpcklbw mm6, mm7 ++ paddw mm3, round_values + +- pmullw mm6, three_fourths +- psrlw mm3, 8 ++ pmullw mm6, three_fourths ++ psrlw mm3, 8 + +- packuswb mm1, mm7 +- packuswb mm3, mm7 ++ packuswb mm1, mm7 ++ packuswb mm3, mm7 + +- movd DWORD PTR [edi], mm0 +- movd DWORD PTR [edi+edx], mm1 ++ movd DWORD PTR [edi], mm0 ++ movd DWORD PTR [edi+edx], mm1 + + +- paddw mm5, mm6 +- movd DWORD PTR [edi+edx*2], mm3 ++ paddw mm5, mm6 ++ movd DWORD PTR [edi+edx*2], mm3 + +- lea eax, [edi+edx*2] +- paddw mm5, round_values ++ lea eax, [edi+edx*2] ++ paddw mm5, round_values + +- psrlw mm5, 8 +- add edi, 4 ++ psrlw mm5, 8 ++ add edi, 4 + +- packuswb mm5, mm7 +- movd DWORD PTR [eax+edx], mm5 ++ packuswb mm5, mm7 ++ movd DWORD PTR [eax+edx], mm5 + +- add esi, 4 +- sub ebx, 4 ++ add esi, 4 ++ sub ebx, 4 + +- jg vs_5_4_loop ++ jg vs_5_4_loop + +- pop ebx +- } ++ pop ebx ++ } + } + + +@@ -1417,96 +1395,94 @@ __declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85, + static + void horizontal_line_5_3_scale_mmx + ( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ ++ const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width ++) { + +- (void) dest_width; +- __asm +- { ++ (void) dest_width; ++ __asm { + +- mov esi, source ; +- mov edi, dest ; ++ mov esi, source; ++ mov edi, dest; + +- mov ecx, source_width ; +- movq mm5, const53_1 ; ++ mov ecx, source_width; ++ movq mm5, const53_1; + +- pxor mm7, mm7 ; +- movq mm6, const53_2 ; ++ pxor mm7, mm7; ++ movq mm6, const53_2; + +- movq mm4, round_values ; +- lea edx, [esi+ecx-5] ; +- horizontal_line_5_3_loop: ++ movq mm4, round_values; ++ lea edx, [esi+ecx-5]; ++ horizontal_line_5_3_loop: + +- movq mm0, QWORD PTR [esi] ; +- 00 01 02 03 04 05 06 07 +- movq mm1, mm0 ; +- 00 01 02 03 04 05 06 07 ++ movq mm0, QWORD PTR [esi]; ++ 00 01 02 03 04 05 06 07 ++ movq mm1, mm0; ++ 00 01 02 03 04 05 06 07 + +- psllw mm0, 8 ; +- xx 00 xx 02 xx 04 xx 06 +- psrlw mm1, 8 ; +- 01 xx 03 xx 05 xx 07 xx ++ psllw mm0, 8; ++ xx 00 xx 02 xx 04 xx 06 ++ psrlw mm1, 8; ++ 01 xx 03 xx 05 xx 07 xx + +- psrlw mm0, 8 ; +- 00 xx 02 xx 04 xx 06 xx +- psllq mm1, 16 ; +- xx xx 01 xx 03 xx 05 xx ++ psrlw mm0, 8; ++ 00 xx 02 xx 04 xx 06 xx ++ psllq mm1, 16; ++ xx xx 01 xx 03 xx 05 xx + +- pmullw mm0, mm6 ++ pmullw mm0, mm6 + +- pmullw mm1, mm5 +- add esi, 5 ++ pmullw mm1, mm5 ++ add esi, 5 + +- add edi, 3 +- paddw mm1, mm0 ++ add edi, 3 ++ paddw mm1, mm0 + +- paddw mm1, mm4 +- psrlw mm1, 8 ++ paddw mm1, mm4 ++ psrlw mm1, 8 + +- cmp esi, edx +- packuswb mm1, mm7 ++ cmp esi, edx ++ packuswb mm1, mm7 + +- movd DWORD PTR [edi-3], mm1 +- jl horizontal_line_5_3_loop ++ movd DWORD PTR [edi-3], mm1 ++ jl horizontal_line_5_3_loop + +-//exit condition +- movq mm0, QWORD PTR [esi] ; +- 00 01 02 03 04 05 06 07 +- movq mm1, mm0 ; +- 00 01 02 03 04 05 06 07 ++// exit condition ++ movq mm0, QWORD PTR [esi]; ++ 00 01 02 03 04 05 06 07 ++ movq mm1, mm0; ++ 00 01 02 03 04 05 06 07 + +- psllw mm0, 8 ; +- xx 00 xx 02 xx 04 xx 06 +- psrlw mm1, 8 ; +- 01 xx 03 xx 05 xx 07 xx ++ psllw mm0, 8; ++ xx 00 xx 02 xx 04 xx 06 ++ psrlw mm1, 8; ++ 01 xx 03 xx 05 xx 07 xx + +- psrlw mm0, 8 ; +- 00 xx 02 xx 04 xx 06 xx +- psllq mm1, 16 ; +- xx xx 01 xx 03 xx 05 xx ++ psrlw mm0, 8; ++ 00 xx 02 xx 04 xx 06 xx ++ psllq mm1, 16; ++ xx xx 01 xx 03 xx 05 xx + +- pmullw mm0, mm6 ++ pmullw mm0, mm6 + +- pmullw mm1, mm5 +- paddw mm1, mm0 ++ pmullw mm1, mm5 ++ paddw mm1, mm0 + +- paddw mm1, mm4 +- psrlw mm1, 8 ++ paddw mm1, mm4 ++ psrlw mm1, 8 + +- packuswb mm1, mm7 +- movd eax, mm1 ++ packuswb mm1, mm7 ++ movd eax, mm1 + +- mov edx, eax +- shr edx, 16 ++ mov edx, eax ++ shr edx, 16 + +- mov WORD PTR[edi], ax +- mov BYTE PTR[edi+2], dl ++ mov WORD PTR[edi], ax ++ mov BYTE PTR[edi+2], dl + +- } ++ } + + } + +@@ -1514,75 +1490,73 @@ __declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85 + __declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 }; + + static +-void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ ++void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + +- __asm +- { +- push ebx ++ __asm { ++ push ebx + +- mov esi, source // Get the source and destination pointer +- mov ecx, src_pitch // Get the pitch size ++ mov esi, source // Get the source and destination pointer ++ mov ecx, src_pitch // Get the pitch size + +- mov edi, dest // tow lines below +- pxor mm7, mm7 // clear out mm7 ++ mov edi, dest // tow lines below ++ pxor mm7, mm7 // clear out mm7 + +- mov edx, dest_pitch // Loop counter +- movq mm5, one_thirds ++ mov edx, dest_pitch // Loop counter ++ movq mm5, one_thirds + +- movq mm6, two_thirds +- mov ebx, dest_width; ++ movq mm6, two_thirds ++ mov ebx, dest_width; + +- vs_5_3_loop: ++ vs_5_3_loop: + +- movd mm0, DWORD ptr [esi] // src[0]; +- movd mm1, DWORD ptr [esi+ecx] // src[1]; ++ movd mm0, DWORD ptr [esi] // src[0]; ++ movd mm1, DWORD ptr [esi+ecx] // src[1]; + +- movd mm2, DWORD ptr [esi+ecx*2] +- lea eax, [esi+ecx*2] // ++ movd mm2, DWORD ptr [esi+ecx*2] ++ lea eax, [esi+ecx*2] // + +- punpcklbw mm1, mm7 +- punpcklbw mm2, mm7 ++ punpcklbw mm1, mm7 ++ punpcklbw mm2, mm7 + +- pmullw mm1, mm5 +- pmullw mm2, mm6 ++ pmullw mm1, mm5 ++ pmullw mm2, mm6 + +- movd mm3, DWORD ptr [eax+ecx] +- movd mm4, DWORD ptr [eax+ecx*2] ++ movd mm3, DWORD ptr [eax+ecx] ++ movd mm4, DWORD ptr [eax+ecx*2] + +- punpcklbw mm3, mm7 +- punpcklbw mm4, mm7 ++ punpcklbw mm3, mm7 ++ punpcklbw mm4, mm7 + +- pmullw mm3, mm6 +- pmullw mm4, mm5 ++ pmullw mm3, mm6 ++ pmullw mm4, mm5 + + +- movd DWORD PTR [edi], mm0 +- paddw mm1, mm2 ++ movd DWORD PTR [edi], mm0 ++ paddw mm1, mm2 + +- paddw mm1, round_values +- psrlw mm1, 8 ++ paddw mm1, round_values ++ psrlw mm1, 8 + +- packuswb mm1, mm7 +- paddw mm3, mm4 ++ packuswb mm1, mm7 ++ paddw mm3, mm4 + +- paddw mm3, round_values +- movd DWORD PTR [edi+edx], mm1 ++ paddw mm3, round_values ++ movd DWORD PTR [edi+edx], mm1 + +- psrlw mm3, 8 +- packuswb mm3, mm7 ++ psrlw mm3, 8 ++ packuswb mm3, mm7 + +- movd DWORD PTR [edi+edx*2], mm3 ++ movd DWORD PTR [edi+edx*2], mm3 + + +- add edi, 4 +- add esi, 4 ++ add edi, 4 ++ add esi, 4 + +- sub ebx, 4 +- jg vs_5_3_loop ++ sub ebx, 4 ++ jg vs_5_3_loop + +- pop ebx +- } ++ pop ebx ++ } + } + + +@@ -1609,48 +1583,45 @@ void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, + static + void horizontal_line_2_1_scale_mmx + ( +- const unsigned char *source, +- unsigned int source_width, +- unsigned char *dest, +- unsigned int dest_width +-) +-{ +- (void) dest_width; +- (void) source_width; +- __asm +- { +- mov esi, source +- mov edi, dest +- +- pxor mm7, mm7 +- mov ecx, dest_width +- +- xor edx, edx +- hs_2_1_loop: +- +- movq mm0, [esi+edx*2] +- psllw mm0, 8 +- +- psrlw mm0, 8 +- packuswb mm0, mm7 +- +- movd DWORD Ptr [edi+edx], mm0; +- add edx, 4 +- +- cmp edx, ecx +- jl hs_2_1_loop +- +- } ++ const unsigned char *source, ++ unsigned int source_width, ++ unsigned char *dest, ++ unsigned int dest_width ++) { ++ (void) dest_width; ++ (void) source_width; ++ __asm { ++ mov esi, source ++ mov edi, dest ++ ++ pxor mm7, mm7 ++ mov ecx, dest_width ++ ++ xor edx, edx ++ hs_2_1_loop: ++ ++ movq mm0, [esi+edx*2] ++ psllw mm0, 8 ++ ++ psrlw mm0, 8 ++ packuswb mm0, mm7 ++ ++ movd DWORD Ptr [edi+edx], mm0; ++ add edx, 4 ++ ++ cmp edx, ecx ++ jl hs_2_1_loop ++ ++ } + } + + + + static +-void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ +- (void) dest_pitch; +- (void) src_pitch; +- vpx_memcpy(dest, source, dest_width); ++void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { ++ (void) dest_pitch; ++ (void) src_pitch; ++ vpx_memcpy(dest, source, dest_width); + } + + +@@ -1658,91 +1629,88 @@ __declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 4 + __declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 160, 160, 160 }; + + static +-void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) +-{ ++void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + +- (void) dest_pitch; +- __asm +- { +- mov esi, source +- mov edi, dest ++ (void) dest_pitch; ++ __asm { ++ mov esi, source ++ mov edi, dest + +- mov eax, src_pitch +- mov edx, dest_width ++ mov eax, src_pitch ++ mov edx, dest_width + +- pxor mm7, mm7 +- sub esi, eax //back one line ++ pxor mm7, mm7 ++ sub esi, eax // back one line + + +- lea ecx, [esi+edx]; +- movq mm6, round_values; ++ lea ecx, [esi+edx]; ++ movq mm6, round_values; + +- movq mm5, three_sixteenths; +- movq mm4, ten_sixteenths; ++ movq mm5, three_sixteenths; ++ movq mm4, ten_sixteenths; + +- vs_2_1_i_loop: +- movd mm0, [esi] // +- movd mm1, [esi+eax] // ++ vs_2_1_i_loop: ++ movd mm0, [esi] // ++ movd mm1, [esi+eax] // + +- movd mm2, [esi+eax*2] // +- punpcklbw mm0, mm7 ++ movd mm2, [esi+eax*2] // ++ punpcklbw mm0, mm7 + +- pmullw mm0, mm5 +- punpcklbw mm1, mm7 ++ pmullw mm0, mm5 ++ punpcklbw mm1, mm7 + +- pmullw mm1, mm4 +- punpcklbw mm2, mm7 ++ pmullw mm1, mm4 ++ punpcklbw mm2, mm7 + +- pmullw mm2, mm5 +- paddw mm0, round_values ++ pmullw mm2, mm5 ++ paddw mm0, round_values + +- paddw mm1, mm2 +- paddw mm0, mm1 ++ paddw mm1, mm2 ++ paddw mm0, mm1 + +- psrlw mm0, 8 +- packuswb mm0, mm7 ++ psrlw mm0, 8 ++ packuswb mm0, mm7 + +- movd DWORD PTR [edi], mm0 +- add esi, 4 ++ movd DWORD PTR [edi], mm0 ++ add esi, 4 + +- add edi, 4; +- cmp esi, ecx +- jl vs_2_1_i_loop ++ add edi, 4; ++ cmp esi, ecx ++ jl vs_2_1_i_loop + +- } ++ } + } + + + + void +-register_mmxscalers(void) +-{ +- vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; +- vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; +- vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; +- vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; +- vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; +- vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; +- vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; +- vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; +- vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; +- +- vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; +- vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; +- vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; +- vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; +- vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; +- vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; +- +- +- +- vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; +- vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; +- vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; +- vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; +- vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; +- vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; +- vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; ++register_mmxscalers(void) { ++ vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; ++ vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; ++ vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; ++ vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; ++ vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; ++ vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; ++ vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; ++ vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; ++ vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; ++ ++ vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; ++ vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; ++ vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; ++ vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; ++ vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; ++ vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; ++ ++ ++ ++ vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; ++ vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; ++ vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; ++ vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; ++ vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; ++ vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; ++ vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; + + + +diff --git a/vpx_scale/win32/scalesystemdependent.c b/vpx_scale/win32/scalesystemdependent.c +index 19e61c3..98913d1 100644 +--- a/vpx_scale/win32/scalesystemdependent.c ++++ b/vpx_scale/win32/scalesystemdependent.c +@@ -46,46 +46,42 @@ extern void register_mmxscalers(void); + * + ****************************************************************************/ + void +-vp8_scale_machine_specific_config(void) +-{ +- // If MMX supported then set to use MMX versions of functions else +- // use original 'C' versions. +- int mmx_enabled; +- int xmm_enabled; +- int wmt_enabled; ++vp8_scale_machine_specific_config(void) { ++ // If MMX supported then set to use MMX versions of functions else ++ // use original 'C' versions. ++ int mmx_enabled; ++ int xmm_enabled; ++ int wmt_enabled; + +- vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); ++ vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); + +- if (mmx_enabled || xmm_enabled || wmt_enabled) +- { +- register_mmxscalers(); +- } +- else +- { +- vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c; +- vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c; +- vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; +- vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c; +- vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c; +- vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; +- vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; +- vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; +- vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; +- vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; +- vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; +- vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; +- vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c; +- vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c; +- vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; ++ if (mmx_enabled || xmm_enabled || wmt_enabled) { ++ register_mmxscalers(); ++ } else { ++ vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c; ++ vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c; ++ vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; ++ vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c; ++ vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c; ++ vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; ++ vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; ++ vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; ++ vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; ++ vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; ++ vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; ++ vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; ++ vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c; ++ vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c; ++ vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; + + +- vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; +- vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; +- vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; +- vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; +- vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; +- vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; +- vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; ++ vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; ++ vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; ++ vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; ++ vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; ++ vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; ++ vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; ++ vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; + +- } ++ } + } +diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h +index 800f700..6a8a1fc 100644 +--- a/vpx_scale/yv12config.h ++++ b/vpx_scale/yv12config.h +@@ -16,54 +16,54 @@ extern "C" + { + #endif + +-#define VP7BORDERINPIXELS 48 + #define VP8BORDERINPIXELS 32 ++#define VP9BORDERINPIXELS 64 ++#define VP9_INTERP_EXTEND 4 + +- /************************************* +- For INT_YUV: ++ /************************************* ++ For INT_YUV: + +- Y = (R+G*2+B)/4; +- U = (R-B)/2; +- V = (G*2 - R - B)/4; +- And +- R = Y+U-V; +- G = Y+V; +- B = Y-U-V; +- ************************************/ +- typedef enum +- { +- REG_YUV = 0, /* Regular yuv */ +- INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */ +- } +- YUV_TYPE; ++ Y = (R+G*2+B)/4; ++ U = (R-B)/2; ++ V = (G*2 - R - B)/4; ++ And ++ R = Y+U-V; ++ G = Y+V; ++ B = Y-U-V; ++ ************************************/ ++ typedef enum ++ { ++ REG_YUV = 0, /* Regular yuv */ ++ INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */ ++ } ++ YUV_TYPE; + +- typedef struct yv12_buffer_config +- { +- int y_width; +- int y_height; +- int y_stride; +-/* int yinternal_width; */ ++ typedef struct yv12_buffer_config { ++ int y_width; ++ int y_height; ++ int y_stride; ++ /* int yinternal_width; */ + +- int uv_width; +- int uv_height; +- int uv_stride; +-/* int uvinternal_width; */ ++ int uv_width; ++ int uv_height; ++ int uv_stride; ++ /* int uvinternal_width; */ + +- unsigned char *y_buffer; +- unsigned char *u_buffer; +- unsigned char *v_buffer; ++ unsigned char *y_buffer; ++ unsigned char *u_buffer; ++ unsigned char *v_buffer; + +- unsigned char *buffer_alloc; +- int border; +- int frame_size; +- YUV_TYPE clrtype; ++ unsigned char *buffer_alloc; ++ int border; ++ int frame_size; ++ YUV_TYPE clrtype; + +- int corrupted; +- int flags; +- } YV12_BUFFER_CONFIG; ++ int corrupted; ++ int flags; ++ } YV12_BUFFER_CONFIG; + +- int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border); +- int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); ++ int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border); ++ int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); + + #ifdef __cplusplus + } +diff --git a/vpxdec.c b/vpxdec.c +index 4482f3d..9b728bf 100644 +--- a/vpxdec.c ++++ b/vpxdec.c +@@ -52,7 +52,7 @@ static const char *exec_name; + static const struct + { + char const *name; +- const vpx_codec_iface_t *iface; ++ vpx_codec_iface_t *iface; + unsigned int fourcc; + unsigned int fourcc_mask; + } ifaces[] = +@@ -152,7 +152,8 @@ static void usage_exit() + "write to. If the\n argument does not include any escape " + "characters, the output will be\n written to a single file. " + "Otherwise, the filename will be calculated by\n expanding " +- "the following escape characters:\n" ++ "the following escape characters:\n"); ++ fprintf(stderr, + "\n\t%%w - Frame width" + "\n\t%%h - Frame height" + "\n\t%% - Frame number, zero padded to places (1..9)" +@@ -356,7 +357,7 @@ void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5) + } + else + { +- if(fwrite(buf, 1, len, out)); ++ (void) fwrite(buf, 1, len, out); + } + } + +@@ -502,7 +503,7 @@ nestegg_seek_cb(int64_t offset, int whence, void * userdata) + case NESTEGG_SEEK_CUR: whence = SEEK_CUR; break; + case NESTEGG_SEEK_END: whence = SEEK_END; break; + }; +- return fseek(userdata, offset, whence)? -1 : 0; ++ return fseek(userdata, (long)offset, whence)? -1 : 0; + } + + +@@ -559,7 +560,7 @@ webm_guess_framerate(struct input_ctx *input, + goto fail; + + *fps_num = (i - 1) * 1000000; +- *fps_den = tstamp / 1000; ++ *fps_den = (unsigned int)(tstamp / 1000); + return 0; + fail: + nestegg_destroy(input->nestegg_ctx); +@@ -580,10 +581,10 @@ file_is_webm(struct input_ctx *input, + unsigned int i, n; + int track_type = -1; + +- nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, +- input->infile}; ++ nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, 0}; + nestegg_video_params params; + ++ io.userdata = input->infile; + if(nestegg_init(&input->nestegg_ctx, io, NULL)) + goto fail; + +@@ -647,7 +648,7 @@ void generate_filename(const char *pattern, char *out, size_t q_len, + { + size_t pat_len; + +- // parse the pattern ++ /* parse the pattern */ + q[q_len - 1] = '\0'; + switch(p[1]) + { +@@ -677,7 +678,7 @@ void generate_filename(const char *pattern, char *out, size_t q_len, + { + size_t copy_len; + +- // copy the next segment ++ /* copy the next segment */ + if(!next_pat) + copy_len = strlen(p); + else +@@ -922,7 +923,7 @@ int main(int argc, const char **argv_) + p = strchr(p, '%'); + if(p && p[1] >= '1' && p[1] <= '9') + { +- // pattern contains sequence number, so it's not unique. ++ /* pattern contains sequence number, so it's not unique. */ + single_file = 0; + break; + } +@@ -962,7 +963,8 @@ int main(int argc, const char **argv_) + That will have to wait until these tools support WebM natively.*/ + sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n", + "420jpeg", width, height, fps_num, fps_den, 'p'); +- out_put(out, (unsigned char *)buffer, strlen(buffer), do_md5); ++ out_put(out, (unsigned char *)buffer, ++ (unsigned int)strlen(buffer), do_md5); + } + + /* Try to determine the codec from the fourcc. */ +@@ -1040,7 +1042,7 @@ int main(int argc, const char **argv_) + + vpx_usec_timer_start(&timer); + +- if (vpx_codec_decode(&decoder, buf, buf_sz, NULL, 0)) ++ if (vpx_codec_decode(&decoder, buf, (unsigned int)buf_sz, NULL, 0)) + { + const char *detail = vpx_codec_error_detail(&decoder); + fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder)); +@@ -1052,7 +1054,7 @@ int main(int argc, const char **argv_) + } + + vpx_usec_timer_mark(&timer); +- dx_time += vpx_usec_timer_elapsed(&timer); ++ dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer); + + ++frame_in; + +@@ -1064,9 +1066,14 @@ int main(int argc, const char **argv_) + } + frames_corrupted += corrupted; + ++ vpx_usec_timer_start(&timer); ++ + if ((img = vpx_codec_get_frame(&decoder, &iter))) + ++frame_out; + ++ vpx_usec_timer_mark(&timer); ++ dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer); ++ + if (progress) + show_progress(frame_in, frame_out, dx_time); + +diff --git a/vpxenc.c b/vpxenc.c +index d32b21b..c9547ea 100644 +--- a/vpxenc.c ++++ b/vpxenc.c +@@ -54,11 +54,7 @@ typedef __int64 off_t; + #define off_t off64_t + #endif + +-#if defined(_MSC_VER) +-#define LITERALU64(n) n +-#else +-#define LITERALU64(n) n##LLU +-#endif ++#define LITERALU64(hi,lo) ((((uint64_t)hi)<<32)|lo) + + /* We should use 32-bit file operations in WebM file format + * when building ARM executable file (.axf) with RVCT */ +@@ -68,12 +64,28 @@ typedef long off_t; + #define ftello ftell + #endif + ++/* Swallow warnings about unused results of fread/fwrite */ ++static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, ++ FILE *stream) ++{ ++ return fread(ptr, size, nmemb, stream); ++} ++#define fread wrap_fread ++ ++static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb, ++ FILE *stream) ++{ ++ return fwrite(ptr, size, nmemb, stream); ++} ++#define fwrite wrap_fwrite ++ ++ + static const char *exec_name; + + static const struct codec_item + { + char const *name; +- const vpx_codec_iface_t *iface; ++ vpx_codec_iface_t *iface; + unsigned int fourcc; + } codecs[] = + { +@@ -245,7 +257,7 @@ void stats_write(stats_io_t *stats, const void *pkt, size_t len) + { + if (stats->file) + { +- if(fwrite(pkt, 1, len, stats->file)); ++ (void) fwrite(pkt, 1, len, stats->file); + } + else + { +@@ -338,7 +350,7 @@ static int read_frame(struct input_state *input, vpx_image_t *img) + * write_ivf_frame_header() for documentation on the frame header + * layout. + */ +- if(fread(junk, 1, IVF_FRAME_HDR_SZ, f)); ++ (void) fread(junk, 1, IVF_FRAME_HDR_SZ, f); + } + + for (plane = 0; plane < 3; plane++) +@@ -468,7 +480,7 @@ static void write_ivf_file_header(FILE *outfile, + mem_put_le32(header + 24, frame_cnt); /* length */ + mem_put_le32(header + 28, 0); /* unused */ + +- if(fwrite(header, 1, 32, outfile)); ++ (void) fwrite(header, 1, 32, outfile); + } + + +@@ -482,18 +494,18 @@ static void write_ivf_frame_header(FILE *outfile, + return; + + pts = pkt->data.frame.pts; +- mem_put_le32(header, pkt->data.frame.sz); ++ mem_put_le32(header, (int)pkt->data.frame.sz); + mem_put_le32(header + 4, pts & 0xFFFFFFFF); + mem_put_le32(header + 8, pts >> 32); + +- if(fwrite(header, 1, 12, outfile)); ++ (void) fwrite(header, 1, 12, outfile); + } + + static void write_ivf_frame_size(FILE *outfile, size_t size) + { + char header[4]; +- mem_put_le32(header, size); +- fwrite(header, 1, 4, outfile); ++ mem_put_le32(header, (int)size); ++ (void) fwrite(header, 1, 4, outfile); + } + + +@@ -541,13 +553,13 @@ struct EbmlGlobal + + void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len) + { +- if(fwrite(buffer_in, 1, len, glob->stream)); ++ (void) fwrite(buffer_in, 1, len, glob->stream); + } + + #define WRITE_BUFFER(s) \ + for(i = len-1; i>=0; i--)\ + { \ +- x = *(const s *)buffer_in >> (i * CHAR_BIT); \ ++ x = (char)(*(const s *)buffer_in >> (i * CHAR_BIT)); \ + Ebml_Write(glob, &x, 1); \ + } + void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, int buffer_size, unsigned long len) +@@ -597,9 +609,9 @@ static void + Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, + unsigned long class_id) + { +- //todo this is always taking 8 bytes, this may need later optimization +- //this is a key that says length unknown +- uint64_t unknownLen = LITERALU64(0x01FFFFFFFFFFFFFF); ++ /* todo this is always taking 8 bytes, this may need later optimization */ ++ /* this is a key that says length unknown */ ++ uint64_t unknownLen = LITERALU64(0x01FFFFFF, 0xFFFFFFFF); + + Ebml_WriteID(glob, class_id); + *ebmlLoc = ftello(glob->stream); +@@ -617,7 +629,7 @@ Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc) + + /* Calculate the size of this element */ + size = pos - *ebmlLoc - 8; +- size |= LITERALU64(0x0100000000000000); ++ size |= LITERALU64(0x01000000,0x00000000); + + /* Seek back to the beginning of the element and write the new size */ + fseeko(glob->stream, *ebmlLoc, SEEK_SET); +@@ -664,7 +676,7 @@ write_webm_seek_info(EbmlGlobal *ebml) + Ebml_EndSubElement(ebml, &start); + } + { +- //segment info ++ /* segment info */ + EbmlLoc startInfo; + uint64_t frame_time; + char version_string[64]; +@@ -686,7 +698,7 @@ write_webm_seek_info(EbmlGlobal *ebml) + Ebml_StartSubElement(ebml, &startInfo, Info); + Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000); + Ebml_SerializeFloat(ebml, Segment_Duration, +- ebml->last_pts_ms + frame_time); ++ (double)(ebml->last_pts_ms + frame_time)); + Ebml_SerializeString(ebml, 0x4D80, version_string); + Ebml_SerializeString(ebml, 0x5741, version_string); + Ebml_EndSubElement(ebml, &startInfo); +@@ -704,16 +716,16 @@ write_webm_file_header(EbmlGlobal *glob, + EbmlLoc start; + Ebml_StartSubElement(glob, &start, EBML); + Ebml_SerializeUnsigned(glob, EBMLVersion, 1); +- Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); //EBML Read Version +- Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); //EBML Max ID Length +- Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); //EBML Max Size Length +- Ebml_SerializeString(glob, DocType, "webm"); //Doc Type +- Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); //Doc Type Version +- Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); //Doc Type Read Version ++ Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); ++ Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); ++ Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); ++ Ebml_SerializeString(glob, DocType, "webm"); ++ Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); ++ Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); + Ebml_EndSubElement(glob, &start); + } + { +- Ebml_StartSubElement(glob, &glob->startSegment, Segment); //segment ++ Ebml_StartSubElement(glob, &glob->startSegment, Segment); + glob->position_reference = ftello(glob->stream); + glob->framerate = *fps; + write_webm_seek_info(glob); +@@ -731,7 +743,7 @@ write_webm_file_header(EbmlGlobal *glob, + Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber); + glob->track_id_pos = ftello(glob->stream); + Ebml_SerializeUnsigned32(glob, TrackUID, trackID); +- Ebml_SerializeUnsigned(glob, TrackType, 1); //video is always 1 ++ Ebml_SerializeUnsigned(glob, TrackType, 1); + Ebml_SerializeString(glob, CodecID, "V_VP8"); + { + unsigned int pixelWidth = cfg->g_w; +@@ -744,13 +756,13 @@ write_webm_file_header(EbmlGlobal *glob, + Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight); + Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt); + Ebml_SerializeFloat(glob, FrameRate, frameRate); +- Ebml_EndSubElement(glob, &videoStart); //Video ++ Ebml_EndSubElement(glob, &videoStart); + } +- Ebml_EndSubElement(glob, &start); //Track Entry ++ Ebml_EndSubElement(glob, &start); /* Track Entry */ + } + Ebml_EndSubElement(glob, &trackStart); + } +- // segment element is open ++ /* segment element is open */ + } + } + +@@ -778,7 +790,7 @@ write_webm_block(EbmlGlobal *glob, + if(pts_ms - glob->cluster_timecode > SHRT_MAX) + start_cluster = 1; + else +- block_timecode = pts_ms - glob->cluster_timecode; ++ block_timecode = (unsigned short)pts_ms - glob->cluster_timecode; + + is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY); + if(start_cluster || is_keyframe) +@@ -789,9 +801,9 @@ write_webm_block(EbmlGlobal *glob, + /* Open the new cluster */ + block_timecode = 0; + glob->cluster_open = 1; +- glob->cluster_timecode = pts_ms; ++ glob->cluster_timecode = (uint32_t)pts_ms; + glob->cluster_pos = ftello(glob->stream); +- Ebml_StartSubElement(glob, &glob->startCluster, Cluster); //cluster ++ Ebml_StartSubElement(glob, &glob->startCluster, Cluster); /* cluster */ + Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode); + + /* Save a cue point if this is a keyframe. */ +@@ -816,7 +828,7 @@ write_webm_block(EbmlGlobal *glob, + /* Write the Simple Block */ + Ebml_WriteID(glob, SimpleBlock); + +- block_length = pkt->data.frame.sz + 4; ++ block_length = (unsigned long)pkt->data.frame.sz + 4; + block_length |= 0x10000000; + Ebml_Serialize(glob, &block_length, sizeof(block_length), 4); + +@@ -833,7 +845,7 @@ write_webm_block(EbmlGlobal *glob, + flags |= 0x08; + Ebml_Write(glob, &flags, 1); + +- Ebml_Write(glob, pkt->data.frame.buf, pkt->data.frame.sz); ++ Ebml_Write(glob, pkt->data.frame.buf, (unsigned long)pkt->data.frame.sz); + } + + +@@ -865,7 +877,6 @@ write_webm_file_footer(EbmlGlobal *glob, long hash) + Ebml_SerializeUnsigned(glob, CueTrack, 1); + Ebml_SerializeUnsigned64(glob, CueClusterPosition, + cue->loc - glob->position_reference); +- //Ebml_SerializeUnsigned(glob, CueBlockNumber, cue->blockNumber); + Ebml_EndSubElement(glob, &start); + } + Ebml_EndSubElement(glob, &start); +@@ -942,7 +953,7 @@ static double vp8_mse2psnr(double Samples, double Peak, double Mse) + if ((double)Mse > 0.0) + psnr = 10.0 * log10(Peak * Peak * Samples / Mse); + else +- psnr = 60; // Limit to prevent / 0 ++ psnr = 60; /* Limit to prevent / 0 */ + + if (psnr > 60) + psnr = 60; +@@ -978,6 +989,8 @@ static const arg_def_t good_dl = ARG_DEF(NULL, "good", 0, + "Use Good Quality Deadline"); + static const arg_def_t rt_dl = ARG_DEF(NULL, "rt", 0, + "Use Realtime Quality Deadline"); ++static const arg_def_t quietarg = ARG_DEF("q", "quiet", 0, ++ "Do not print encode progress"); + static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0, + "Show encoder parameters"); + static const arg_def_t psnrarg = ARG_DEF(NULL, "psnr", 0, +@@ -997,7 +1010,7 @@ static const arg_def_t *main_args[] = + &debugmode, + &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline, + &best_dl, &good_dl, &rt_dl, +- &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, &rate_hist_n, ++ &quietarg, &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, &rate_hist_n, + NULL + }; + +@@ -1225,7 +1238,7 @@ static int merge_hist_buckets(struct hist_bucket *bucket, + { + int last_bucket = buckets - 1; + +- // merge the small bucket with an adjacent one. ++ /* merge the small bucket with an adjacent one. */ + if(small_bucket == 0) + merge_bucket = 1; + else if(small_bucket == last_bucket) +@@ -1325,7 +1338,7 @@ static void show_histogram(const struct hist_bucket *bucket, + int j; + float pct; + +- pct = 100.0 * (float)bucket[i].count / (float)total; ++ pct = (float)(100.0 * bucket[i].count / total); + len = HIST_BAR_MAX * bucket[i].count / scale; + if(len < 1) + len = 1; +@@ -1393,7 +1406,7 @@ static void init_rate_histogram(struct rate_hist *hist, + */ + hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000; + +- // prevent division by zero ++ /* prevent division by zero */ + if (hist->samples == 0) + hist->samples=1; + +@@ -1427,7 +1440,7 @@ static void update_rate_histogram(struct rate_hist *hist, + + idx = hist->frames++ % hist->samples; + hist->pts[idx] = now; +- hist->sz[idx] = pkt->data.frame.sz; ++ hist->sz[idx] = (int)pkt->data.frame.sz; + + if(now < cfg->rc_buf_initial_sz) + return; +@@ -1449,15 +1462,15 @@ static void update_rate_histogram(struct rate_hist *hist, + return; + + avg_bitrate = sum_sz * 8 * 1000 / (now - then); +- idx = avg_bitrate * (RATE_BINS/2) / (cfg->rc_target_bitrate * 1000); ++ idx = (int)(avg_bitrate * (RATE_BINS/2) / (cfg->rc_target_bitrate * 1000)); + if(idx < 0) + idx = 0; + if(idx > RATE_BINS-1) + idx = RATE_BINS-1; + if(hist->bucket[idx].low > avg_bitrate) +- hist->bucket[idx].low = avg_bitrate; ++ hist->bucket[idx].low = (int)avg_bitrate; + if(hist->bucket[idx].high < avg_bitrate) +- hist->bucket[idx].high = avg_bitrate; ++ hist->bucket[idx].high = (int)avg_bitrate; + hist->bucket[idx].count++; + hist->total++; + } +@@ -1495,6 +1508,7 @@ struct global_config + int usage; + int deadline; + int use_i420; ++ int quiet; + int verbose; + int limit; + int show_psnr; +@@ -1619,6 +1633,8 @@ static void parse_global_config(struct global_config *global, char **argv) + global->use_i420 = 0; + else if (arg_match(&arg, &use_i420, argi)) + global->use_i420 = 1; ++ else if (arg_match(&arg, &quietarg, argi)) ++ global->quiet = 1; + else if (arg_match(&arg, &verbosearg, argi)) + global->verbose = 1; + else if (arg_match(&arg, &limit, argi)) +@@ -2000,7 +2016,7 @@ static void set_default_kf_interval(struct stream_state *stream, + { + double framerate = (double)global->framerate.num/global->framerate.den; + if (framerate > 0.0) +- stream->config.cfg.kf_max_dist = 5.0*framerate; ++ stream->config.cfg.kf_max_dist = (unsigned int)(5.0*framerate); + } + } + +@@ -2180,7 +2196,7 @@ static void encode_frame(struct stream_state *stream, + / cfg->g_timebase.num / global->framerate.num; + vpx_usec_timer_start(&timer); + vpx_codec_encode(&stream->encoder, img, frame_start, +- next_frame_start - frame_start, ++ (unsigned long)(next_frame_start - frame_start), + 0, global->deadline); + vpx_usec_timer_mark(&timer); + stream->cx_time += vpx_usec_timer_elapsed(&timer); +@@ -2224,8 +2240,9 @@ static void get_cx_data(struct stream_state *stream, + { + stream->frames_out++; + } +- fprintf(stderr, " %6luF", +- (unsigned long)pkt->data.frame.sz); ++ if (!global->quiet) ++ fprintf(stderr, " %6luF", ++ (unsigned long)pkt->data.frame.sz); + + update_rate_histogram(&stream->rate_hist, cfg, pkt); + if(stream->config.write_webm) +@@ -2233,7 +2250,8 @@ static void get_cx_data(struct stream_state *stream, + /* Update the hash */ + if(!stream->ebml.debug) + stream->hash = murmur(pkt->data.frame.buf, +- pkt->data.frame.sz, stream->hash); ++ (int)pkt->data.frame.sz, ++ stream->hash); + + write_webm_block(&stream->ebml, cfg, pkt); + } +@@ -2259,15 +2277,16 @@ static void get_cx_data(struct stream_state *stream, + } + } + +- fwrite(pkt->data.frame.buf, 1, +- pkt->data.frame.sz, stream->file); ++ (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, ++ stream->file); + } + stream->nbytes += pkt->data.raw.sz; + break; + case VPX_CODEC_STATS_PKT: + stream->frames_out++; +- fprintf(stderr, " %6luS", +- (unsigned long)pkt->data.twopass_stats.sz); ++ if (!global->quiet) ++ fprintf(stderr, " %6luS", ++ (unsigned long)pkt->data.twopass_stats.sz); + stats_write(&stream->stats, + pkt->data.twopass_stats.buf, + pkt->data.twopass_stats.sz); +@@ -2283,7 +2302,8 @@ static void get_cx_data(struct stream_state *stream, + stream->psnr_samples_total += pkt->data.psnr.samples[0]; + for (i = 0; i < 4; i++) + { +- fprintf(stderr, "%.3lf ", pkt->data.psnr.psnr[i]); ++ if (!global->quiet) ++ fprintf(stderr, "%.3f ", pkt->data.psnr.psnr[i]); + stream->psnr_totals[i] += pkt->data.psnr.psnr[i]; + } + stream->psnr_count++; +@@ -2306,13 +2326,13 @@ static void show_psnr(struct stream_state *stream) + return; + + fprintf(stderr, "Stream %d PSNR (Overall/Avg/Y/U/V)", stream->index); +- ovpsnr = vp8_mse2psnr(stream->psnr_samples_total, 255.0, +- stream->psnr_sse_total); +- fprintf(stderr, " %.3lf", ovpsnr); ++ ovpsnr = vp8_mse2psnr((double)stream->psnr_samples_total, 255.0, ++ (double)stream->psnr_sse_total); ++ fprintf(stderr, " %.3f", ovpsnr); + + for (i = 0; i < 4; i++) + { +- fprintf(stderr, " %.3lf", stream->psnr_totals[i]/stream->psnr_count); ++ fprintf(stderr, " %.3f", stream->psnr_totals[i]/stream->psnr_count); + } + fprintf(stderr, "\n"); + } +@@ -2320,7 +2340,7 @@ static void show_psnr(struct stream_state *stream) + + float usec_to_fps(uint64_t usec, unsigned int frames) + { +- return usec > 0 ? (float)frames * 1000000.0 / (float)usec : 0; ++ return (float)(usec > 0 ? frames * 1000000.0 / (float)usec : 0); + } + + +@@ -2437,7 +2457,7 @@ int main(int argc, const char **argv_) + vpx_img_alloc(&raw, + input.use_i420 ? VPX_IMG_FMT_I420 + : VPX_IMG_FMT_YV12, +- input.w, input.h, 1); ++ input.w, input.h, 32); + + FOREACH_STREAM(init_rate_histogram(&stream->rate_hist, + &stream->config.cfg, +@@ -2462,18 +2482,21 @@ int main(int argc, const char **argv_) + if (frame_avail) + frames_in++; + +- if(stream_cnt == 1) +- fprintf(stderr, +- "\rPass %d/%d frame %4d/%-4d %7"PRId64"B \033[K", +- pass + 1, global.passes, frames_in, +- streams->frames_out, (int64_t)streams->nbytes); +- else +- fprintf(stderr, +- "\rPass %d/%d frame %4d %7lu %s (%.2f fps)\033[K", +- pass + 1, global.passes, frames_in, +- cx_time > 9999999 ? cx_time / 1000 : cx_time, +- cx_time > 9999999 ? "ms" : "us", +- usec_to_fps(cx_time, frames_in)); ++ if (!global.quiet) ++ { ++ if(stream_cnt == 1) ++ fprintf(stderr, ++ "\rPass %d/%d frame %4d/%-4d %7"PRId64"B \033[K", ++ pass + 1, global.passes, frames_in, ++ streams->frames_out, (int64_t)streams->nbytes); ++ else ++ fprintf(stderr, ++ "\rPass %d/%d frame %4d %7lu %s (%.2f fps)\033[K", ++ pass + 1, global.passes, frames_in, ++ cx_time > 9999999 ? cx_time / 1000 : cx_time, ++ cx_time > 9999999 ? "ms" : "us", ++ usec_to_fps(cx_time, frames_in)); ++ } + + } + else +@@ -2484,7 +2507,7 @@ int main(int argc, const char **argv_) + frame_avail ? &raw : NULL, + frames_in)); + vpx_usec_timer_mark(&timer); +- cx_time += vpx_usec_timer_elapsed(&timer); ++ cx_time += (unsigned long)vpx_usec_timer_elapsed(&timer); + + FOREACH_STREAM(update_quantizer_histogram(stream)); + +@@ -2497,20 +2520,21 @@ int main(int argc, const char **argv_) + if(stream_cnt > 1) + fprintf(stderr, "\n"); + +- FOREACH_STREAM(fprintf( +- stderr, +- "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s" +- " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1, +- global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes, +- frames_in ? (unsigned long)(stream->nbytes * 8 / frames_in) : 0, +- frames_in ? (int64_t)stream->nbytes * 8 +- * (int64_t)global.framerate.num / global.framerate.den +- / frames_in +- : 0, +- stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time, +- stream->cx_time > 9999999 ? "ms" : "us", +- usec_to_fps(stream->cx_time, frames_in)); +- ); ++ if (!global.quiet) ++ FOREACH_STREAM(fprintf( ++ stderr, ++ "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s" ++ " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1, ++ global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes, ++ frames_in ? (unsigned long)(stream->nbytes * 8 / frames_in) : 0, ++ frames_in ? (int64_t)stream->nbytes * 8 ++ * (int64_t)global.framerate.num / global.framerate.den ++ / frames_in ++ : 0, ++ stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time, ++ stream->cx_time > 9999999 ? "ms" : "us", ++ usec_to_fps(stream->cx_time, frames_in)); ++ ); + + if (global.show_psnr) + FOREACH_STREAM(show_psnr(stream)); +diff --git a/y4minput.c b/y4minput.c +index dd51421..ff9ffbc 100644 +--- a/y4minput.c ++++ b/y4minput.c +@@ -662,7 +662,7 @@ int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip){ + _nskip--; + } + else{ +- ret=fread(buffer+i,1,1,_fin); ++ ret=(int)fread(buffer+i,1,1,_fin); + if(ret<1)return -1; + } + if(buffer[i]=='\n')break; +@@ -818,7 +818,7 @@ int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *_img){ + int c_sz; + int ret; + /*Read and skip the frame header.*/ +- ret=fread(frame,1,6,_fin); ++ ret=(int)fread(frame,1,6,_fin); + if(ret<6)return 0; + if(memcmp(frame,"FRAME",5)){ + fprintf(stderr,"Loss of framing in Y4M input data\n");